Search dblp for Publications

export results for "policy parameter"

 download as .bib file

@article{DBLP:journals/npl/SaglamMCK24,
  author       = {Baturay Saglam and
                  Furkan B. Mutlu and
                  Dogan C. Cicek and
                  Suleyman Serdar Kozat},
  title        = {Parameter-Free Reduction of the Estimation Bias in Deep Reinforcement
                  Learning for Deterministic Policy Gradients},
  journal      = {Neural Process. Lett.},
  volume       = {56},
  number       = {2},
  pages        = {80},
  year         = {2024},
  url          = {https://doi.org/10.1007/s11063-024-11461-y},
  doi          = {10.1007/S11063-024-11461-Y},
  timestamp    = {Sat, 04 May 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/npl/SaglamMCK24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/swevo/KleinZS24,
  author       = {Luk{\'{a}}s Klein and
                  Ivan Zelinka and
                  David Seidl},
  title        = {Optimizing parameters in swarm intelligence using reinforcement learning:
                  An application of Proximal Policy Optimization to the iSOMA algorithm},
  journal      = {Swarm Evol. Comput.},
  volume       = {85},
  pages        = {101487},
  year         = {2024},
  url          = {https://doi.org/10.1016/j.swevo.2024.101487},
  doi          = {10.1016/J.SWEVO.2024.101487},
  timestamp    = {Tue, 21 May 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/swevo/KleinZS24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/MondalA24,
  author       = {Washim Uddin Mondal and
                  Vaneet Aggarwal},
  editor       = {Sanjoy Dasgupta and
                  Stephan Mandt and
                  Yingzhen Li},
  title        = {Improved Sample Complexity Analysis of Natural Policy Gradient Algorithm
                  with General Parameterization for Infinite Horizon Discounted Reward
                  Markov Decision Processes},
  booktitle    = {International Conference on Artificial Intelligence and Statistics,
                  2-4 May 2024, Palau de Congressos, Valencia, Spain},
  series       = {Proceedings of Machine Learning Research},
  volume       = {238},
  pages        = {3097--3105},
  publisher    = {{PMLR}},
  year         = {2024},
  url          = {https://proceedings.mlr.press/v238/u-mondal24a.html},
  timestamp    = {Mon, 13 May 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/MondalA24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2402-02042,
  author       = {Qinbo Bai and
                  Washim Uddin Mondal and
                  Vaneet Aggarwal},
  title        = {Learning General Parameterized Policies for Infinite Horizon Average
                  Reward Constrained MDPs via Primal-Dual Policy Gradient Algorithm},
  journal      = {CoRR},
  volume       = {abs/2402.02042},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2402.02042},
  doi          = {10.48550/ARXIV.2402.02042},
  eprinttype    = {arXiv},
  eprint       = {2402.02042},
  timestamp    = {Fri, 09 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2402-02042.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2403-11062,
  author       = {Yudong Luo and
                  Yangchen Pan and
                  Han Wang and
                  Philip H. S. Torr and
                  Pascal Poupart},
  title        = {A Simple Mixture Policy Parameterization for Improving Sample Efficiency
                  of CVaR Optimization},
  journal      = {CoRR},
  volume       = {abs/2403.11062},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2403.11062},
  doi          = {10.48550/ARXIV.2403.11062},
  eprinttype    = {arXiv},
  eprint       = {2403.11062},
  timestamp    = {Mon, 08 Apr 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2403-11062.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/cor/VisentinPRT23,
  author       = {Andrea Visentin and
                  Steven D. Prestwich and
                  Roberto Rossi and
                  S. Armagan Tarim},
  title        = {Stochastic dynamic programming heuristic for the (R,s,S) policy parameters
                  computation},
  journal      = {Comput. Oper. Res.},
  volume       = {158},
  pages        = {106289},
  year         = {2023},
  url          = {https://doi.org/10.1016/j.cor.2023.106289},
  doi          = {10.1016/J.COR.2023.106289},
  timestamp    = {Tue, 12 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/cor/VisentinPRT23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/eswa/ShenZHGL23,
  author       = {Chun Shen and
                  Sheng Zhu and
                  Shuai Han and
                  Xiaoyu Gong and
                  Shuai L{\"{u}}},
  title        = {Guided deterministic policy optimization with gradient-free policy
                  parameters information},
  journal      = {Expert Syst. Appl.},
  volume       = {231},
  pages        = {120693},
  year         = {2023},
  url          = {https://doi.org/10.1016/j.eswa.2023.120693},
  doi          = {10.1016/J.ESWA.2023.120693},
  timestamp    = {Tue, 07 May 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/eswa/ShenZHGL23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ijcon/PakkhesalS23,
  author       = {Sajjad Pakkhesal and
                  Saeed Shamaghdari},
  title        = {SOS-based policy iteration for \emph{H\({}_{\mbox{{\(\infty\)}}}\)}
                  control of polynomial systems with uncertain parameters},
  journal      = {Int. J. Control},
  volume       = {96},
  number       = {4},
  pages        = {1053--1066},
  year         = {2023},
  url          = {https://doi.org/10.1080/00207179.2022.2027523},
  doi          = {10.1080/00207179.2022.2027523},
  timestamp    = {Fri, 28 Apr 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ijcon/PakkhesalS23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/soco/TabariS23,
  author       = {Mahmoud Mohammad Rezapour Tabari and
                  Reihaneh Safari},
  title        = {Development of water re-allocation policy under uncertainty conditions
                  in the inflow to reservoir and demands parameters: a case study of
                  Karaj AmirKabir dam},
  journal      = {Soft Comput.},
  volume       = {27},
  number       = {10},
  pages        = {6521--6547},
  year         = {2023},
  url          = {https://doi.org/10.1007/s00500-023-07885-8},
  doi          = {10.1007/S00500-023-07885-8},
  timestamp    = {Wed, 17 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/soco/TabariS23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/PattathilZO23,
  author       = {Sarath Pattathil and
                  Kaiqing Zhang and
                  Asuman E. Ozdaglar},
  editor       = {Francisco J. R. Ruiz and
                  Jennifer G. Dy and
                  Jan{-}Willem van de Meent},
  title        = {Symmetric (Optimistic) Natural Policy Gradient for Multi-Agent Learning
                  with Parameter Convergence},
  booktitle    = {International Conference on Artificial Intelligence and Statistics,
                  25-27 April 2023, Palau de Congressos, Valencia, Spain},
  series       = {Proceedings of Machine Learning Research},
  volume       = {206},
  pages        = {5641--5685},
  publisher    = {{PMLR}},
  year         = {2023},
  url          = {https://proceedings.mlr.press/v206/pattathil23a.html},
  timestamp    = {Mon, 19 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/PattathilZO23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/FuYGD023,
  author       = {Haotian Fu and
                  Jiayu Yao and
                  Omer Gottesman and
                  Finale Doshi{-}Velez and
                  George Konidaris},
  title        = {Performance Bounds for Model and Policy Transfer in Hidden-parameter
                  MDPs},
  booktitle    = {The Eleventh International Conference on Learning Representations,
                  {ICLR} 2023, Kigali, Rwanda, May 1-5, 2023},
  publisher    = {OpenReview.net},
  year         = {2023},
  url          = {https://openreview.net/pdf?id=20gBzEzgtiI},
  timestamp    = {Fri, 30 Jun 2023 14:38:38 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/FuYGD023.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/AlfanoYR23,
  author       = {Carlo Alfano and
                  Rui Yuan and
                  Patrick Rebeschini},
  editor       = {Alice Oh and
                  Tristan Naumann and
                  Amir Globerson and
                  Kate Saenko and
                  Moritz Hardt and
                  Sergey Levine},
  title        = {A Novel Framework for Policy Mirror Descent with General Parameterization
                  and Linear Convergence},
  booktitle    = {Advances in Neural Information Processing Systems 36: Annual Conference
                  on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans,
                  LA, USA, December 10 - 16, 2023},
  year         = {2023},
  url          = {http://papers.nips.cc/paper\_files/paper/2023/hash/61a9278dfef5f871b5e472389f8d6fa1-Abstract-Conference.html},
  timestamp    = {Fri, 01 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/AlfanoYR23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2303-16548,
  author       = {Deyue Li},
  title        = {Policy Gradient Methods for Discrete Time Linear Quadratic Regulator
                  With Random Parameters},
  journal      = {CoRR},
  volume       = {abs/2303.16548},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2303.16548},
  doi          = {10.48550/ARXIV.2303.16548},
  eprinttype    = {arXiv},
  eprint       = {2303.16548},
  timestamp    = {Thu, 13 Apr 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2303-16548.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2305-19575,
  author       = {Jiacai Liu and
                  Jinchi Chen and
                  Ke Wei},
  title        = {On the Linear Convergence of Policy Gradient under Hadamard Parameterization},
  journal      = {CoRR},
  volume       = {abs/2305.19575},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2305.19575},
  doi          = {10.48550/ARXIV.2305.19575},
  eprinttype    = {arXiv},
  eprint       = {2305.19575},
  timestamp    = {Tue, 06 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2305-19575.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2306-09872,
  author       = {So Kuroki and
                  Jiaxian Guo and
                  Tatsuya Matsushima and
                  Takuya Okubo and
                  Masato Kobayashi and
                  Yuya Ikeda and
                  Ryosuke Takanami and
                  Paul Yoo and
                  Yutaka Matsuo and
                  Yusuke Iwasawa},
  title        = {GenORM: Generalizable One-shot Rope Manipulation with Parameter-Aware
                  Policy},
  journal      = {CoRR},
  volume       = {abs/2306.09872},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2306.09872},
  doi          = {10.48550/ARXIV.2306.09872},
  eprinttype    = {arXiv},
  eprint       = {2306.09872},
  timestamp    = {Thu, 22 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2306-09872.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2309-09051,
  author       = {So Kuroki and
                  Jiaxian Guo and
                  Tatsuya Matsushima and
                  Takuya Okubo and
                  Masato Kobayashi and
                  Yuya Ikeda and
                  Ryosuke Takanami and
                  Paul Yoo and
                  Yutaka Matsuo and
                  Yusuke Iwasawa},
  title        = {GenDOM: Generalizable One-shot Deformable Object Manipulation with
                  Parameter-Aware Policy},
  journal      = {CoRR},
  volume       = {abs/2309.09051},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2309.09051},
  doi          = {10.48550/ARXIV.2309.09051},
  eprinttype    = {arXiv},
  eprint       = {2309.09051},
  timestamp    = {Fri, 22 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2309-09051.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2310-05053,
  author       = {Lang Feng and
                  Dong Xing and
                  Junru Zhang and
                  Gang Pan},
  title        = {{FP3O:} Enabling Proximal Policy Optimization in Multi-Agent Cooperation
                  with Parameter-Sharing Versatility},
  journal      = {CoRR},
  volume       = {abs/2310.05053},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2310.05053},
  doi          = {10.48550/ARXIV.2310.05053},
  eprinttype    = {arXiv},
  eprint       = {2310.05053},
  timestamp    = {Fri, 20 Oct 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2310-05053.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2310-11677,
  author       = {Washim Uddin Mondal and
                  Vaneet Aggarwal},
  title        = {Improved Sample Complexity Analysis of Natural Policy Gradient Algorithm
                  with General Parameterization for Infinite Horizon Discounted Reward
                  Markov Decision Processes},
  journal      = {CoRR},
  volume       = {abs/2310.11677},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2310.11677},
  doi          = {10.48550/ARXIV.2310.11677},
  eprinttype    = {arXiv},
  eprint       = {2310.11677},
  timestamp    = {Fri, 27 Oct 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2310-11677.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@phdthesis{DBLP:phd/es/Daniel22,
  author       = {Beatriz Cabrero Daniel},
  title        = {Automating crowd simulation: from parameter tuning to dynamic context-to-policy
                  adaptation},
  school       = {Pompeu Fabra University, Spain},
  year         = {2022},
  url          = {http://hdl.handle.net/10803/673251},
  timestamp    = {Tue, 27 Sep 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/phd/es/Daniel22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/systems/WakelandH22,
  author       = {Wayne W. Wakeland and
                  Jack B. Homer},
  title        = {Addressing Parameter Uncertainty in a Health Policy Simulation Model
                  Using Monte Carlo Sensitivity Methods},
  journal      = {Syst.},
  volume       = {10},
  number       = {6},
  pages        = {225},
  year         = {2022},
  url          = {https://doi.org/10.3390/systems10060225},
  doi          = {10.3390/SYSTEMS10060225},
  timestamp    = {Tue, 31 Jan 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/systems/WakelandH22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tmlr/ZhouHHZRFL22,
  author       = {Kaichen Zhou and
                  Lanqing Hong and
                  Shoukang Hu and
                  Fengwei Zhou and
                  Binxin Ru and
                  Jiashi Feng and
                  Zhenguo Li},
  title        = {{DHA:} End-to-End Joint Optimization of Data Augmentation Policy,
                  Hyper-parameter and Architecture},
  journal      = {Trans. Mach. Learn. Res.},
  volume       = {2022},
  year         = {2022},
  url          = {https://openreview.net/forum?id=MHOAEiTlen},
  timestamp    = {Fri, 19 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tmlr/ZhouHHZRFL22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tsg/SepulvedaAMS22,
  author       = {Juan Sep{\'{u}}lveda and
                  Alejandro Angulo and
                  Fernando Mancilla{-}David and
                  Alexandre Street},
  title        = {Robust Co-Optimization of Droop and Affine Policy Parameters in Active
                  Distribution Systems With High Penetration of Photovoltaic Generation},
  journal      = {{IEEE} Trans. Smart Grid},
  volume       = {13},
  number       = {6},
  pages        = {4355--4366},
  year         = {2022},
  url          = {https://doi.org/10.1109/TSG.2022.3177947},
  doi          = {10.1109/TSG.2022.3177947},
  timestamp    = {Mon, 05 Dec 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/tsg/SepulvedaAMS22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/twc/FlaniganL22,
  author       = {Katherine A. Flanigan and
                  Jerome P. Lynch},
  title        = {Optimal Event-Based Policy for Remote Parameter Estimation in Wireless
                  Sensing Architectures Under Resource Constraints},
  journal      = {{IEEE} Trans. Wirel. Commun.},
  volume       = {21},
  number       = {7},
  pages        = {5293--5304},
  year         = {2022},
  url          = {https://doi.org/10.1109/TWC.2021.3139289},
  doi          = {10.1109/TWC.2021.3139289},
  timestamp    = {Thu, 27 Jul 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/twc/FlaniganL22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2201-08832,
  author       = {Wesley A. Suttle and
                  Alec Koppel and
                  Ji Liu},
  title        = {Occupancy Information Ratio: Infinite-Horizon, Information-Directed,
                  Parameterized Policy Search},
  journal      = {CoRR},
  volume       = {abs/2201.08832},
  year         = {2022},
  url          = {https://arxiv.org/abs/2201.08832},
  eprinttype    = {arXiv},
  eprint       = {2201.08832},
  timestamp    = {Tue, 21 Jun 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2201-08832.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2202-03957,
  author       = {Stephen James and
                  Pieter Abbeel},
  title        = {Bingham Policy Parameterization for 3D Rotations in Reinforcement
                  Learning},
  journal      = {CoRR},
  volume       = {abs/2202.03957},
  year         = {2022},
  url          = {https://arxiv.org/abs/2202.03957},
  eprinttype    = {arXiv},
  eprint       = {2202.03957},
  timestamp    = {Thu, 10 Feb 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2202-03957.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2206-10073,
  author       = {Trang H. Tran and
                  Lam M. Nguyen and
                  Katya Scheinberg},
  title        = {Finding Optimal Policy for Queueing Models: New Parameterization},
  journal      = {CoRR},
  volume       = {abs/2206.10073},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2206.10073},
  doi          = {10.48550/ARXIV.2206.10073},
  eprinttype    = {arXiv},
  eprint       = {2206.10073},
  timestamp    = {Tue, 28 Jun 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2206-10073.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2210-12812,
  author       = {Sarath Pattathil and
                  Kaiqing Zhang and
                  Asuman E. Ozdaglar},
  title        = {Symmetric (Optimistic) Natural Policy Gradient for Multi-agent Learning
                  with Parameter Convergence},
  journal      = {CoRR},
  volume       = {abs/2210.12812},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2210.12812},
  doi          = {10.48550/ARXIV.2210.12812},
  eprinttype    = {arXiv},
  eprint       = {2210.12812},
  timestamp    = {Fri, 28 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2210-12812.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/candie/HaghighiCM21,
  author       = {Firoozeh Haghighi and
                  Bruno Castanier and
                  Hasan Misaii},
  title        = {Rolling horizon optimal maintenance policy for a system subject to
                  shocks and degradation under uncertain parameters},
  journal      = {Comput. Ind. Eng.},
  volume       = {157},
  pages        = {107298},
  year         = {2021},
  url          = {https://doi.org/10.1016/j.cie.2021.107298},
  doi          = {10.1016/J.CIE.2021.107298},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/candie/HaghighiCM21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/csysl/GravellGS21,
  author       = {Benjamin Gravell and
                  Karthik Ganapathy and
                  Tyler H. Summers},
  title        = {Policy Iteration for Linear Quadratic Games With Stochastic Parameters},
  journal      = {{IEEE} Control. Syst. Lett.},
  volume       = {5},
  number       = {1},
  pages        = {307--312},
  year         = {2021},
  url          = {https://doi.org/10.1109/LCSYS.2020.3001883},
  doi          = {10.1109/LCSYS.2020.3001883},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/csysl/GravellGS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/eor/VisentinPRT21,
  author       = {Andrea Visentin and
                  Steve Prestwich and
                  Roberto Rossi and
                  S. Armagan Tarim},
  title        = {Computing optimal (R, s, {S)} policy parameters by a hybrid of branch-and-bound
                  and stochastic dynamic programming},
  journal      = {Eur. J. Oper. Res.},
  volume       = {294},
  number       = {1},
  pages        = {91--99},
  year         = {2021},
  url          = {https://doi.org/10.1016/j.ejor.2021.01.012},
  doi          = {10.1016/J.EJOR.2021.01.012},
  timestamp    = {Mon, 28 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/eor/VisentinPRT21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tist/LiLSCYY21,
  author       = {Shilei Li and
                  Meng Li and
                  Jiongming Su and
                  Shaofei Chen and
                  Zhimin Yuan and
                  Qing Ye},
  title        = {{PP-PG:} Combining Parameter Perturbation with Policy Gradient Methods
                  for Effective and Efficient Explorations in Deep Reinforcement Learning},
  journal      = {{ACM} Trans. Intell. Syst. Technol.},
  volume       = {12},
  number       = {3},
  pages        = {35:1--35:21},
  year         = {2021},
  url          = {https://doi.org/10.1145/3452008},
  doi          = {10.1145/3452008},
  timestamp    = {Wed, 01 Sep 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tist/LiLSCYY21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cdc/SunF21,
  author       = {Yue Sun and
                  Maryam Fazel},
  title        = {Learning Optimal Controllers by Policy Gradient: Global Optimality
                  via Convex Parameterization},
  booktitle    = {2021 60th {IEEE} Conference on Decision and Control (CDC), Austin,
                  TX, USA, December 14-17, 2021},
  pages        = {4576--4581},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/CDC45484.2021.9682821},
  doi          = {10.1109/CDC45484.2021.9682821},
  timestamp    = {Tue, 17 May 2022 15:53:17 +0200},
  biburl       = {https://dblp.org/rec/conf/cdc/SunF21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iecon/ArditiKUBO21,
  author       = {Emir Arditi and
                  Tjasa Kunavar and
                  Emre Ugur and
                  Jan Babic and
                  Erhan {\"{O}}ztop},
  title        = {Inferring Cost Functions Using Reward Parameter Search and Policy
                  Gradient Reinforcement Learning},
  booktitle    = {{IECON} 2021 - 47th Annual Conference of the {IEEE} Industrial Electronics
                  Society, Toronto, ON, Canada, October 13-16, 2021},
  pages        = {1--6},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/IECON48115.2021.9589967},
  doi          = {10.1109/IECON48115.2021.9589967},
  timestamp    = {Mon, 05 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/iecon/ArditiKUBO21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcci/GomezT21,
  author       = {Alejandro de Miguel Gomez and
                  Farshad Ghassemi Toosi},
  editor       = {Thomas B{\"{a}}ck and
                  Christian Wagner and
                  Jonathan M. Garibaldi and
                  H. K. Lam and
                  Marie Cottrell and
                  Juan Juli{\'{a}}n Merelo and
                  Kevin Warwick},
  title        = {Continuous Parameter Control in Genetic Algorithms using Policy Gradient
                  Reinforcement Learning},
  booktitle    = {Proceedings of the 13th International Joint Conference on Computational
                  Intelligence, {IJCCI} 2021, Online Streaming, October 25-27, 2021},
  pages        = {115--122},
  publisher    = {{SCITEPRESS}},
  year         = {2021},
  url          = {https://doi.org/10.5220/0010643500003063},
  doi          = {10.5220/0010643500003063},
  timestamp    = {Tue, 06 Jun 2023 14:58:01 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcci/GomezT21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2105-13986,
  author       = {Caleb M. Bowyer},
  title        = {Improving Generalization in Mountain Car Through the Partitioned Parameterized
                  Policy Approach via Quasi-Stochastic Gradient Descent},
  journal      = {CoRR},
  volume       = {abs/2105.13986},
  year         = {2021},
  url          = {https://arxiv.org/abs/2105.13986},
  eprinttype    = {arXiv},
  eprint       = {2105.13986},
  timestamp    = {Tue, 01 Jun 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2105-13986.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2109-05765,
  author       = {Kaichen Zhou and
                  Lanqing Hong and
                  Shoukang Hu and
                  Fengwei Zhou and
                  Binxin Ru and
                  Jiashi Feng and
                  Zhenguo Li},
  title        = {{DHA:} End-to-End Joint Optimization of Data Augmentation Policy,
                  Hyper-parameter and Architecture},
  journal      = {CoRR},
  volume       = {abs/2109.05765},
  year         = {2021},
  url          = {https://arxiv.org/abs/2109.05765},
  eprinttype    = {arXiv},
  eprint       = {2109.05765},
  timestamp    = {Tue, 21 Sep 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2109-05765.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2110-15799,
  author       = {Benjamin A. Spiegel and
                  George Konidaris},
  title        = {Guided Policy Search for Parameterized Skills using Adverbs},
  journal      = {CoRR},
  volume       = {abs/2110.15799},
  year         = {2021},
  url          = {https://arxiv.org/abs/2110.15799},
  eprinttype    = {arXiv},
  eprint       = {2110.15799},
  timestamp    = {Tue, 02 Nov 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2110-15799.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/rfc/rfc9029,
  author       = {Adrian Farrel},
  title        = {Updates to the Allocation Policy for the Border Gateway Protocol -
                  Link State {(BGP-LS)} Parameters Registries},
  journal      = {{RFC}},
  volume       = {9029},
  pages        = {1--5},
  year         = {2021},
  url          = {https://doi.org/10.17487/RFC9029},
  doi          = {10.17487/RFC9029},
  timestamp    = {Wed, 09 Mar 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/rfc/rfc9029.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/access/AhmedAMJH20,
  author       = {Bilal Ahmed and
                  Nadeem Ahmed and
                  Asad Waqar Malik and
                  Mohsin Jafri and
                  Taimur Hafeez},
  title        = {Fingerprinting {SDN} Policy Parameters: An Empirical Study},
  journal      = {{IEEE} Access},
  volume       = {8},
  pages        = {142379--142392},
  year         = {2020},
  url          = {https://doi.org/10.1109/ACCESS.2020.3012176},
  doi          = {10.1109/ACCESS.2020.3012176},
  timestamp    = {Sat, 05 Sep 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/access/AhmedAMJH20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/eaai/LiuLSWH20,
  author       = {Tundong Liu and
                  Liduan Li and
                  Guifang Shao and
                  Xiaomin Wu and
                  Meng Huang},
  title        = {A novel policy gradient algorithm with PSO-based parameter exploration
                  for continuous control},
  journal      = {Eng. Appl. Artif. Intell.},
  volume       = {90},
  pages        = {103525},
  year         = {2020},
  url          = {https://doi.org/10.1016/j.engappai.2020.103525},
  doi          = {10.1016/J.ENGAPPAI.2020.103525},
  timestamp    = {Wed, 07 Dec 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/eaai/LiuLSWH20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/eor/OmshiGS20,
  author       = {E. Mosayebi Omshi and
                  Antoine Grall and
                  Soudabeh Shemehsavar},
  title        = {A dynamic auto-adaptive predictive maintenance policy for degradation
                  with unknown parameters},
  journal      = {Eur. J. Oper. Res.},
  volume       = {282},
  number       = {1},
  pages        = {81--92},
  year         = {2020},
  url          = {https://doi.org/10.1016/j.ejor.2019.08.050},
  doi          = {10.1016/J.EJOR.2019.08.050},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/eor/OmshiGS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ijpr/SgarbossaZFC20,
  author       = {Fabio Sgarbossa and
                  Ilenia Zennaro and
                  Eleonora Florian and
                  Martina Calzavara},
  title        = {Age replacement policy in the case of no data: the effect of Weibull
                  parameter estimation},
  journal      = {Int. J. Prod. Res.},
  volume       = {58},
  number       = {19},
  pages        = {5851--5869},
  year         = {2020},
  url          = {https://doi.org/10.1080/00207543.2019.1660824},
  doi          = {10.1080/00207543.2019.1660824},
  timestamp    = {Wed, 07 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ijpr/SgarbossaZFC20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ijsysc/HanF20,
  author       = {Kezhen Han and
                  Jian Feng},
  title        = {Fault tolerant tracking control for a class of linear parameter varying
                  systems using reduced-order simultaneous estimator and optimal preview
                  policy},
  journal      = {Int. J. Syst. Sci.},
  volume       = {51},
  number       = {2},
  pages        = {313--333},
  year         = {2020},
  url          = {https://doi.org/10.1080/00207721.2019.1704096},
  doi          = {10.1080/00207721.2019.1704096},
  timestamp    = {Fri, 08 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ijsysc/HanF20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tinstmc/RenDZ020,
  author       = {He Ren and
                  Jing Dai and
                  Huaguang Zhang and
                  Kun Zhang},
  title        = {Off-policy integral reinforcement learning algorithm in dealing with
                  nonzero sum game for nonlinear distributed parameter systems},
  journal      = {Trans. Inst. Meas. Control},
  volume       = {42},
  number       = {15},
  pages        = {2919--2928},
  year         = {2020},
  url          = {https://doi.org/10.1177/0142331220932634},
  doi          = {10.1177/0142331220932634},
  timestamp    = {Mon, 28 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tinstmc/RenDZ020.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/fuzzIEEE/SachanD20,
  author       = {Swati Sachan and
                  Nishant Donchak},
  title        = {Generalized Stochastic Petri-Net Algorithm with Fuzzy Parameters to
                  Evaluate Infrastructure Asset Management Policy},
  booktitle    = {29th {IEEE} International Conference on Fuzzy Systems, {FUZZ-IEEE}
                  2020, Glasgow, UK, July 19-24, 2020},
  pages        = {1--8},
  publisher    = {{IEEE}},
  year         = {2020},
  url          = {https://doi.org/10.1109/FUZZ48607.2020.9177562},
  doi          = {10.1109/FUZZ48607.2020.9177562},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/fuzzIEEE/SachanD20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcnn/Hosino20,
  author       = {Tikara Hosino},
  title        = {Variational Bayesian Parameter-Based Policy Exploration},
  booktitle    = {2020 International Joint Conference on Neural Networks, {IJCNN} 2020,
                  Glasgow, United Kingdom, July 19-24, 2020},
  pages        = {1--7},
  publisher    = {{IEEE}},
  year         = {2020},
  url          = {https://doi.org/10.1109/IJCNN48605.2020.9207091},
  doi          = {10.1109/IJCNN48605.2020.9207091},
  timestamp    = {Tue, 06 Oct 2020 15:44:04 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcnn/Hosino20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iros/PossasB0FR20,
  author       = {Rafael Possas and
                  Lucas Barcelos and
                  Rafael Oliveira and
                  Dieter Fox and
                  Fabio Ramos},
  title        = {Online BayesSim for Combined Simulator Parameter Inference and Policy
                  Improvement},
  booktitle    = {{IEEE/RSJ} International Conference on Intelligent Robots and Systems,
                  {IROS} 2020, Las Vegas, NV, USA, October 24, 2020 - January 24, 2021},
  pages        = {5445--5452},
  publisher    = {{IEEE}},
  year         = {2020},
  url          = {https://doi.org/10.1109/IROS45743.2020.9341401},
  doi          = {10.1109/IROS45743.2020.9341401},
  timestamp    = {Tue, 18 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iros/PossasB0FR20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/wpes/SomeR20,
  author       = {Doli{\`{e}}re Francis Som{\'{e}} and
                  Tamara Rezk},
  editor       = {Jay Ligatti and
                  Xinming Ou and
                  Wouter Lueks and
                  Paul Syverson},
  title        = {Strenghtening Content Security Policy via Monitoring and {URL} Parameters
                  Filtering},
  booktitle    = {WPES'20: Proceedings of the 19th Workshop on Privacy in the Electronic
                  Society, Virtual Event, USA, November 9, 2020},
  pages        = {1--13},
  publisher    = {{ACM}},
  year         = {2020},
  url          = {https://doi.org/10.1145/3411497.3420222},
  doi          = {10.1145/3411497.3420222},
  timestamp    = {Wed, 16 Dec 2020 15:24:47 +0100},
  biburl       = {https://dblp.org/rec/conf/wpes/SomeR20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2006-07554,
  author       = {Yunhao Tang and
                  Krzysztof Choromanski},
  title        = {Online Hyper-parameter Tuning in Off-policy Learning via Evolutionary
                  Strategies},
  journal      = {CoRR},
  volume       = {abs/2006.07554},
  year         = {2020},
  url          = {https://arxiv.org/abs/2006.07554},
  eprinttype    = {arXiv},
  eprint       = {2006.07554},
  timestamp    = {Wed, 17 Jun 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2006-07554.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2012-03532,
  author       = {Alessandro Sestini and
                  Alexander Kuhnle and
                  Andrew D. Bagdanov},
  title        = {Deep Policy Networks for {NPC} Behaviors that Adapt to Changing Design
                  Parameters in Roguelike Games},
  journal      = {CoRR},
  volume       = {abs/2012.03532},
  year         = {2020},
  url          = {https://arxiv.org/abs/2012.03532},
  eprinttype    = {arXiv},
  eprint       = {2012.03532},
  timestamp    = {Wed, 09 Dec 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2012-03532.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/infor/ChangCYZ19,
  author       = {Chin{-}Chih Chang and
                  Yen{-}Luan Chen and
                  Xiaoling Yin and
                  Zhe George Zhang},
  title        = {Generalized multi-parameter preventive replacement policy for systems
                  with random processing times},
  journal      = {{INFOR} Inf. Syst. Oper. Res.},
  volume       = {57},
  number       = {2},
  pages        = {187--203},
  year         = {2019},
  url          = {https://doi.org/10.1080/03155986.2018.1533210},
  doi          = {10.1080/03155986.2018.1533210},
  timestamp    = {Thu, 16 Apr 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/infor/ChangCYZ19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/puc/ValmassoiGSKP19,
  author       = {Arianna Valmassoi and
                  Salem Gharbia and
                  Silvana Di Sabatino and
                  Prashant Kumar and
                  Francesco Pilla},
  title        = {Future impacts of the reforestation policy on the atmospheric parameters
                  in Ireland: a sensitivity study including heat discomfort impacts
                  on humans and livestock},
  journal      = {Pers. Ubiquitous Comput.},
  volume       = {23},
  number       = {5-6},
  pages        = {707--721},
  year         = {2019},
  url          = {https://doi.org/10.1007/s00779-018-1160-3},
  doi          = {10.1007/S00779-018-1160-3},
  timestamp    = {Thu, 09 Jul 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/puc/ValmassoiGSKP19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ccta/OhKHKLKLLC19,
  author       = {Tae{-}Ho Oh and
                  Tae{-}Il Kim and
                  Ji{-}Seok Han and
                  Young{-}Seok Kim and
                  Ji{-}Hyung Lee and
                  Sang{-}Oh Kim and
                  Sang{-}Sub Lee and
                  Sang{-}Hoon Lee and
                  Dong{-}Il Dan Cho},
  title        = {Deep Deterministic Policy Gradient-based Parameter Selection Method
                  of Notch Filters for Suppressing Mechanical Resonance in Industrial
                  Servo Systems},
  booktitle    = {2019 {IEEE} Conference on Control Technology and Applications, {CCTA}
                  2019, Hong Kong, SAR, China, August 19-21, 2019},
  pages        = {320--324},
  publisher    = {{IEEE}},
  year         = {2019},
  url          = {https://doi.org/10.1109/CCTA.2019.8920682},
  doi          = {10.1109/CCTA.2019.8920682},
  timestamp    = {Tue, 14 Feb 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ccta/OhKHKLKLLC19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icra/LiGWSH19,
  author       = {Minhan Li and
                  Xiang Gao and
                  Yue Wen and
                  Jennie Si and
                  He Helen Huang},
  title        = {Offline Policy Iteration Based Reinforcement Learning Controller for
                  Online Robotic Knee Prosthesis Parameter Tuning},
  booktitle    = {International Conference on Robotics and Automation, {ICRA} 2019,
                  Montreal, QC, Canada, May 20-24, 2019},
  pages        = {2831--2837},
  publisher    = {{IEEE}},
  year         = {2019},
  url          = {https://doi.org/10.1109/ICRA.2019.8794212},
  doi          = {10.1109/ICRA.2019.8794212},
  timestamp    = {Mon, 05 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icra/LiGWSH19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ictc/JungBLKP19,
  author       = {Joonyoung Jung and
                  Jang Won Bae and
                  Chun{-}Hee Lee and
                  Dong{-}oh Kang and
                  Euihyun Paik},
  title        = {{GBM} based Policy Influence Analysis of Agent Simulation Parameters},
  booktitle    = {2019 International Conference on Information and Communication Technology
                  Convergence, {ICTC} 2019, Jeju Island, Korea (South), October 16-18,
                  2019},
  pages        = {1324--1326},
  publisher    = {{IEEE}},
  year         = {2019},
  url          = {https://doi.org/10.1109/ICTC46691.2019.8939694},
  doi          = {10.1109/ICTC46691.2019.8939694},
  timestamp    = {Tue, 07 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ictc/JungBLKP19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ro-man/PrakashVB19,
  author       = {Ravi Prakash and
                  Mohit Vohra and
                  Laxmidhar Behera},
  title        = {Learning Optimal Parameterized Policy for High Level Strategies in
                  a Game Setting},
  booktitle    = {28th {IEEE} International Conference on Robot and Human Interactive
                  Communication, {RO-MAN} 2019, New Delhi, India, October 14-18, 2019},
  pages        = {1--6},
  publisher    = {{IEEE}},
  year         = {2019},
  url          = {https://doi.org/10.1109/RO-MAN46459.2019.8956383},
  doi          = {10.1109/RO-MAN46459.2019.8956383},
  timestamp    = {Thu, 23 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ro-man/PrakashVB19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/firai/QueisserS18,
  author       = {Jeffrey F. Queisser and
                  Jochen J. Steil},
  title        = {Bootstrapping of Parameterized Skills Through Hybrid Optimization
                  in Task and Policy Spaces},
  journal      = {Frontiers Robotics {AI}},
  volume       = {5},
  pages        = {49},
  year         = {2018},
  url          = {https://doi.org/10.3389/frobt.2018.00049},
  doi          = {10.3389/FROBT.2018.00049},
  timestamp    = {Tue, 16 Aug 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/firai/QueisserS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ijrsda/SinghPNS18,
  author       = {Trailokyanath Singh and
                  Hadibandhu Pattanayak and
                  Ameeya Kumar Nayak and
                  Nirakar Niranjan Sethy},
  title        = {An Optimal Policy with Three-Parameter Weibull Distribution Deterioration,
                  Quadratic Demand, and Salvage Value Under Partial Backlogging},
  journal      = {Int. J. Rough Sets Data Anal.},
  volume       = {5},
  number       = {1},
  pages        = {79--98},
  year         = {2018},
  url          = {https://doi.org/10.4018/IJRSDA.2018010106},
  doi          = {10.4018/IJRSDA.2018010106},
  timestamp    = {Thu, 16 Apr 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ijrsda/SinghPNS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ior/VercraeneGK18,
  author       = {Samuel Vercraene and
                  Jean{-}Philippe Gayon and
                  Fikri Karaesmen},
  title        = {Effects of System Parameters on the Optimal Cost and Policy in a Class
                  of Multidimensional Queueing Control Problems},
  journal      = {Oper. Res.},
  volume       = {66},
  number       = {1},
  pages        = {150--162},
  year         = {2018},
  url          = {https://doi.org/10.1287/opre.2017.1600},
  doi          = {10.1287/OPRE.2017.1600},
  timestamp    = {Tue, 01 Jun 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ior/VercraeneGK18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ccgrid/ChunduriGLSN18,
  author       = {Sudheer Chunduri and
                  Meysam Ghaffari and
                  Mehran Sadeghi Lahijani and
                  Ashok Srinivasan and
                  Sirish Namilae},
  editor       = {Esam El{-}Araby and
                  Dhabaleswar K. Panda and
                  Sandra Gesing and
                  Amy W. Apon and
                  Volodymyr V. Kindratenko and
                  Massimo Cafaro and
                  Alfredo Cuzzocrea},
  title        = {Parallel Low Discrepancy Parameter Sweep for Public Health Policy},
  booktitle    = {18th {IEEE/ACM} International Symposium on Cluster, Cloud and Grid
                  Computing, {CCGRID} 2018, Washington, DC, USA, May 1-4, 2018},
  pages        = {291--300},
  publisher    = {{IEEE} Computer Society},
  year         = {2018},
  url          = {https://doi.org/10.1109/CCGRID.2018.00044},
  doi          = {10.1109/CCGRID.2018.00044},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ccgrid/ChunduriGLSN18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/gecco/OhashiFSA18,
  author       = {Kyotaro Ohashi and
                  Natsuki Fujiyoshi and
                  Naoki Sakamoto and
                  Youhei Akimoto},
  editor       = {Hern{\'{a}}n E. Aguirre and
                  Keiki Takadama},
  title        = {Model parameter adaptive instance-based policy optimization for episodic
                  control tasks of nonholonomic systems},
  booktitle    = {Proceedings of the Genetic and Evolutionary Computation Conference
                  Companion, {GECCO} 2018, Kyoto, Japan, July 15-19, 2018},
  pages        = {1426--1433},
  publisher    = {{ACM}},
  year         = {2018},
  url          = {https://doi.org/10.1145/3205651.3208295},
  doi          = {10.1145/3205651.3208295},
  timestamp    = {Sat, 19 Oct 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/gecco/OhashiFSA18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iccsip/GaoWLSH18,
  author       = {Xiang Gao and
                  Yue Wen and
                  Minhan Li and
                  Jennie Si and
                  He (Helen) Huang},
  editor       = {Fuchun Sun and
                  Huaping Liu and
                  Dewen Hu},
  title        = {Robotic Knee Parameter Tuning Using Approximate Policy Iteration},
  booktitle    = {Cognitive Systems and Signal Processing - 4th International Conference,
                  {ICCSIP} 2018, Beijing, China, November 29 - December 1, 2018, Revised
                  Selected Papers, Part {I}},
  series       = {Communications in Computer and Information Science},
  volume       = {1005},
  pages        = {554--563},
  publisher    = {Springer},
  year         = {2018},
  url          = {https://doi.org/10.1007/978-981-13-7983-3\_49},
  doi          = {10.1007/978-981-13-7983-3\_49},
  timestamp    = {Mon, 05 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/iccsip/GaoWLSH18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icra/Chatzilygeroudis18,
  author       = {Konstantinos I. Chatzilygeroudis and
                  Jean{-}Baptiste Mouret},
  title        = {Using Parameterized Black-Box Priors to Scale Up Model-Based Policy
                  Search for Robotics},
  booktitle    = {2018 {IEEE} International Conference on Robotics and Automation, {ICRA}
                  2018, Brisbane, Australia, May 21-25, 2018},
  pages        = {1--9},
  publisher    = {{IEEE}},
  year         = {2018},
  url          = {https://doi.org/10.1109/ICRA.2018.8461083},
  doi          = {10.1109/ICRA.2018.8461083},
  timestamp    = {Wed, 16 Oct 2019 14:14:51 +0200},
  biburl       = {https://dblp.org/rec/conf/icra/Chatzilygeroudis18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iros/IshigeUTK18,
  author       = {Matthew Ishige and
                  Takuya Umedachi and
                  Tadahiro Taniguchi and
                  Yoshihiro Kawahara},
  title        = {Learning Oscillator-Based Gait Controller for String-Form Soft Robots
                  Using Parameter-Exploring Policy Gradients},
  booktitle    = {2018 {IEEE/RSJ} International Conference on Intelligent Robots and
                  Systems, {IROS} 2018, Madrid, Spain, October 1-5, 2018},
  pages        = {6445--6452},
  publisher    = {{IEEE}},
  year         = {2018},
  url          = {https://doi.org/10.1109/IROS.2018.8594338},
  doi          = {10.1109/IROS.2018.8594338},
  timestamp    = {Wed, 17 Feb 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/iros/IshigeUTK18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/safecomp/BehzadanM18,
  author       = {Vahid Behzadan and
                  Arslan Munir},
  editor       = {Barbara Gallina and
                  Amund Skavhaug and
                  Erwin Schoitsch and
                  Friedemann Bitsch},
  title        = {Mitigation of Policy Manipulation Attacks on Deep Q-Networks with
                  Parameter-Space Noise},
  booktitle    = {Computer Safety, Reliability, and Security - {SAFECOMP} 2018 Workshops,
                  ASSURE, DECSoS, SASSUR, STRIVE, and WAISE, V{\"{a}}ster{\aa}s,
                  Sweden, September 18, 2018, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {11094},
  pages        = {406--417},
  publisher    = {Springer},
  year         = {2018},
  url          = {https://doi.org/10.1007/978-3-319-99229-7\_34},
  doi          = {10.1007/978-3-319-99229-7\_34},
  timestamp    = {Tue, 14 May 2019 10:00:44 +0200},
  biburl       = {https://dblp.org/rec/conf/safecomp/BehzadanM18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1806-02190,
  author       = {Vahid Behzadan and
                  Arslan Munir},
  title        = {Mitigation of Policy Manipulation Attacks on Deep Q-Networks with
                  Parameter-Space Noise},
  journal      = {CoRR},
  volume       = {abs/1806.02190},
  year         = {2018},
  url          = {http://arxiv.org/abs/1806.02190},
  eprinttype    = {arXiv},
  eprint       = {1806.02190},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1806-02190.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ant/ValmassoiGSSP17,
  author       = {Arianna Valmassoi and
                  Salem Gharbia and
                  Santa Stibe and
                  Silvana Di Sabatino and
                  Francesco Pilla},
  editor       = {Elhadi M. Shakshuki},
  title        = {Future Impacts of the Reforestation Policy on the amospheric parameters:
                  a sensitivity study over Ireland},
  booktitle    = {The 8th International Conference on Ambient Systems, Networks and
                  Technologies {(ANT} 2017) / The 7th International Conference on Sustainable
                  Energy Information Technology {(SEIT} 2017), 16-19 May 2017, Madeira,
                  Portugal},
  series       = {Procedia Computer Science},
  volume       = {109},
  pages        = {367--375},
  publisher    = {Elsevier},
  year         = {2017},
  url          = {https://doi.org/10.1016/j.procs.2017.05.403},
  doi          = {10.1016/J.PROCS.2017.05.403},
  timestamp    = {Thu, 08 Jul 2021 16:04:01 +0200},
  biburl       = {https://dblp.org/rec/conf/ant/ValmassoiGSSP17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1709-06917,
  author       = {Konstantinos I. Chatzilygeroudis and
                  Jean{-}Baptiste Mouret},
  title        = {Using Parameterized Black-Box Priors to Scale Up Model-Based Policy
                  Search for Robotics},
  journal      = {CoRR},
  volume       = {abs/1709.06917},
  year         = {2017},
  url          = {http://arxiv.org/abs/1709.06917},
  eprinttype    = {arXiv},
  eprint       = {1709.06917},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1709-06917.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1710-00336,
  author       = {Xiangxiang Chu and
                  Hangjun Ye},
  title        = {Parameter Sharing Deep Deterministic Policy Gradient for Cooperative
                  Multi-agent Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1710.00336},
  year         = {2017},
  url          = {http://arxiv.org/abs/1710.00336},
  eprinttype    = {arXiv},
  eprint       = {1710.00336},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1710-00336.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/alr/WangUD16,
  author       = {Jiexin Wang and
                  Eiji Uchibe and
                  Kenji Doya},
  title        = {EM-based policy hyper parameter exploration: application to standing
                  and balancing of a two-wheeled smartphone robot},
  journal      = {Artif. Life Robotics},
  volume       = {21},
  number       = {1},
  pages        = {125--131},
  year         = {2016},
  url          = {https://doi.org/10.1007/s10015-015-0260-7},
  doi          = {10.1007/S10015-015-0260-7},
  timestamp    = {Mon, 03 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/alr/WangUD16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jota/ChengZFW16,
  author       = {Kang Cheng and
                  Kanjian Zhang and
                  Shumin Fei and
                  Haikun Wei},
  title        = {Potential-Based Least-Squares Policy Iteration for a Parameterized
                  Feedback Control System},
  journal      = {J. Optim. Theory Appl.},
  volume       = {169},
  number       = {2},
  pages        = {692--704},
  year         = {2016},
  url          = {https://doi.org/10.1007/s10957-015-0809-6},
  doi          = {10.1007/S10957-015-0809-6},
  timestamp    = {Tue, 07 Apr 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jota/ChengZFW16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/sttt/RaniseTT16,
  author       = {Silvio Ranise and
                  Anh Tuan Truong and
                  Riccardo Traverso},
  title        = {Parameterized model checking for security policy analysis},
  journal      = {Int. J. Softw. Tools Technol. Transf.},
  volume       = {18},
  number       = {5},
  pages        = {559--573},
  year         = {2016},
  url          = {https://doi.org/10.1007/s10009-015-0410-1},
  doi          = {10.1007/S10009-015-0410-1},
  timestamp    = {Tue, 16 Aug 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/sttt/RaniseTT16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/arobots/ReinhartS15,
  author       = {Ren{\'{e}} Felix Reinhart and
                  Jochen Jakob Steil},
  title        = {Efficient policy search in low-dimensional embedding spaces by generalizing
                  motion primitives with a parameterized skill memory},
  journal      = {Auton. Robots},
  volume       = {38},
  number       = {4},
  pages        = {331--348},
  year         = {2015},
  url          = {https://doi.org/10.1007/s10514-014-9417-9},
  doi          = {10.1007/S10514-014-9417-9},
  timestamp    = {Thu, 18 May 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/arobots/ReinhartS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ijsysc/MovahedZ15,
  author       = {Kamran Karimi Movahed and
                  Zhi{-}Hai Zhang},
  title        = {Robust design of (\emph{s}, \emph{S}) inventory policy parameters
                  in supply chains with demand and lead time uncertainties},
  journal      = {Int. J. Syst. Sci.},
  volume       = {46},
  number       = {12},
  pages        = {2258--2268},
  year         = {2015},
  url          = {https://doi.org/10.1080/00207721.2013.860637},
  doi          = {10.1080/00207721.2013.860637},
  timestamp    = {Wed, 22 Jul 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ijsysc/MovahedZ15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ijsysc/YangC15,
  author       = {Dong{-}Yuh Yang and
                  Po{-}Kai Chang},
  title        = {A parametric programming solution to the \emph{F}-policy queue with
                  fuzzy parameters},
  journal      = {Int. J. Syst. Sci.},
  volume       = {46},
  number       = {4},
  pages        = {590--598},
  year         = {2015},
  url          = {https://doi.org/10.1080/00207721.2013.792975},
  doi          = {10.1080/00207721.2013.792975},
  timestamp    = {Wed, 22 Jul 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ijsysc/YangC15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icarsc/AbdolmalekiLR0N15,
  author       = {Abbas Abdolmaleki and
                  Nuno Lau and
                  Lu{\'{\i}}s Paulo Reis and
                  Jan Peters and
                  Gerhard Neumann},
  title        = {Contextual Policy Search for Generalizing a Parameterized Biped Walking
                  Controller},
  booktitle    = {2015 {IEEE} International Conference on Autonomous Robot Systems and
                  Competitions, {ICARSC} 2015, Vila Real, Portugal, April 8-10, 2015},
  pages        = {17--22},
  publisher    = {{IEEE}},
  year         = {2015},
  url          = {https://doi.org/10.1109/ICARSC.2015.43},
  doi          = {10.1109/ICARSC.2015.43},
  timestamp    = {Wed, 16 Oct 2019 14:14:57 +0200},
  biburl       = {https://dblp.org/rec/conf/icarsc/AbdolmalekiLR0N15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcnn/ManganiniPRB15,
  author       = {Giorgio Manganini and
                  Matteo Pirotta and
                  Marcello Restelli and
                  Luca Bascetta},
  title        = {Following Newton direction in Policy Gradient with parameter exploration},
  booktitle    = {2015 International Joint Conference on Neural Networks, {IJCNN} 2015,
                  Killarney, Ireland, July 12-17, 2015},
  pages        = {1--8},
  publisher    = {{IEEE}},
  year         = {2015},
  url          = {https://doi.org/10.1109/IJCNN.2015.7280673},
  doi          = {10.1109/IJCNN.2015.7280673},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcnn/ManganiniPRB15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ssci/VaerenberghHDMN15,
  author       = {Kevin Van Vaerenbergh and
                  Yann{-}Micha{\"{e}}l De Hauwere and
                  Bruno Depraetere and
                  Kristof Van Moffaert and
                  Ann Now{\'{e}}},
  title        = {A Policy Gradient with Parameter-Based Exploration Approach for Zone-Heating},
  booktitle    = {{IEEE} Symposium Series on Computational Intelligence, {SSCI} 2015,
                  Cape Town, South Africa, December 7-10, 2015},
  pages        = {556--563},
  publisher    = {{IEEE}},
  year         = {2015},
  url          = {https://doi.org/10.1109/SSCI.2015.88},
  doi          = {10.1109/SSCI.2015.88},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ssci/VaerenberghHDMN15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/nn/TangkarattMZMS14,
  author       = {Voot Tangkaratt and
                  Syogo Mori and
                  Tingting Zhao and
                  Jun Morimoto and
                  Masashi Sugiyama},
  title        = {Model-based policy gradients with parameter-based exploration by least-squares
                  conditional density estimation},
  journal      = {Neural Networks},
  volume       = {57},
  pages        = {128--140},
  year         = {2014},
  url          = {https://doi.org/10.1016/j.neunet.2014.06.006},
  doi          = {10.1016/J.NEUNET.2014.06.006},
  timestamp    = {Mon, 07 Sep 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/nn/TangkarattMZMS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iros/ReinhartS14,
  author       = {Ren{\'{e}} Felix Reinhart and
                  Jochen Jakob Steil},
  title        = {Efficient policy search with a parameterized skill memory},
  booktitle    = {2014 {IEEE/RSJ} International Conference on Intelligent Robots and
                  Systems, {IROS} 2014, Chicago, IL, USA, September 14-18, 2014},
  pages        = {1400--1407},
  publisher    = {{IEEE}},
  year         = {2014},
  url          = {https://doi.org/10.1109/IROS.2014.6942740},
  doi          = {10.1109/IROS.2014.6942740},
  timestamp    = {Tue, 05 Sep 2023 15:07:47 +0200},
  biburl       = {https://dblp.org/rec/conf/iros/ReinhartS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/eor/InderfurthKK13,
  author       = {Karl Inderfurth and
                  Peter Kelle and
                  Rainer Kleber},
  title        = {Dual sourcing using capacity reservation and spot market: Optimal
                  procurement policy and heuristic parameter determination},
  journal      = {Eur. J. Oper. Res.},
  volume       = {225},
  number       = {2},
  pages        = {298--309},
  year         = {2013},
  url          = {https://doi.org/10.1016/j.ejor.2012.08.025},
  doi          = {10.1016/J.EJOR.2012.08.025},
  timestamp    = {Tue, 21 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/eor/InderfurthKK13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/neco/ZhaoHTMS13,
  author       = {Tingting Zhao and
                  Hirotaka Hachiya and
                  Voot Tangkaratt and
                  Jun Morimoto and
                  Masashi Sugiyama},
  title        = {Efficient Sample Reuse in Policy Gradients with Parameter-Based Exploration},
  journal      = {Neural Comput.},
  volume       = {25},
  number       = {6},
  pages        = {1512--1547},
  year         = {2013},
  url          = {https://doi.org/10.1162/NECO\_a\_00452},
  doi          = {10.1162/NECO\_A\_00452},
  timestamp    = {Mon, 07 Sep 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/neco/ZhaoHTMS13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/soco/NiL13,
  author       = {Yaodong Ni and
                  Zhi{-}Qiang Liu},
  title        = {Policy iteration for bounded-parameter POMDPs},
  journal      = {Soft Comput.},
  volume       = {17},
  number       = {4},
  pages        = {537--548},
  year         = {2013},
  url          = {https://doi.org/10.1007/s00500-012-0932-3},
  doi          = {10.1007/S00500-012-0932-3},
  timestamp    = {Sat, 20 May 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/soco/NiL13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/IEEEcloud/KikuchiUKM13,
  author       = {Shinji Kikuchi and
                  Tetsuya Uchiumi and
                  Shinya Kitajima and
                  Yasuhide Matsumoto},
  title        = {Configuration Policy Extraction for Parameter Settings in Cloud Infrastructure
                  Using {UML/OCL} Verification},
  booktitle    = {2013 {IEEE} Sixth International Conference on Cloud Computing, Santa
                  Clara, CA, USA, June 28 - July 3, 2013},
  pages        = {67--74},
  publisher    = {{IEEE} Computer Society},
  year         = {2013},
  url          = {https://doi.org/10.1109/CLOUD.2013.51},
  doi          = {10.1109/CLOUD.2013.51},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/IEEEcloud/KikuchiUKM13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/apnoms/KimPJ13,
  author       = {Hyeonwoo Kim and
                  Wooguil Pak and
                  Hongtaek Ju},
  title        = {Correlation analysis between inference accuracy and inference parameters
                  for stateless firewall policy},
  booktitle    = {15th Asia-Pacific Network Operations and Management Symposium, {APNOMS}
                  2013, Hiroshima, Japan, September 25-27, 2013},
  pages        = {1--6},
  publisher    = {{IEEE}},
  year         = {2013},
  url          = {https://ieeexplore.ieee.org/document/6665274/},
  timestamp    = {Sun, 08 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/apnoms/KimPJ13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ascc/XiaJ13,
  author       = {Li Xia and
                  Qing{-}Shan Jia},
  title        = {Policy iteration for parameterized Markov decision processes and its
                  application},
  booktitle    = {9th Asian Control Conference, {ASCC} 2013, Istanbul, Turkey, June
                  23-26, 2013},
  pages        = {1--6},
  publisher    = {{IEEE}},
  year         = {2013},
  url          = {https://doi.org/10.1109/ASCC.2013.6606023},
  doi          = {10.1109/ASCC.2013.6606023},
  timestamp    = {Sat, 09 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ascc/XiaJ13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icann/Sehnke13,
  author       = {Frank Sehnke},
  editor       = {Valeri M. Mladenov and
                  Petia D. Koprinkova{-}Hristova and
                  G{\"{u}}nther Palm and
                  Alessandro E. P. Villa and
                  Bruno Appollini and
                  Nikola K. Kasabov},
  title        = {Efficient Baseline-Free Sampling in Parameter Exploring Policy Gradients:
                  Super Symmetric {PGPE}},
  booktitle    = {Artificial Neural Networks and Machine Learning - {ICANN} 2013 - 23rd
                  International Conference on Artificial Neural Networks, Sofia, Bulgaria,
                  September 10-13, 2013. Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {8131},
  pages        = {130--137},
  publisher    = {Springer},
  year         = {2013},
  url          = {https://doi.org/10.1007/978-3-642-40728-4\_17},
  doi          = {10.1007/978-3-642-40728-4\_17},
  timestamp    = {Thu, 04 Jun 2020 19:07:57 +0200},
  biburl       = {https://dblp.org/rec/conf/icann/Sehnke13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/MoriTZMS13,
  author       = {Syogo Mori and
                  Voot Tangkaratt and
                  Tingting Zhao and
                  Jun Morimoto and
                  Masashi Sugiyama},
  title        = {Model-Based Policy Gradients with Parameter-Based Exploration by Least-Squares
                  Conditional Density Estimation},
  journal      = {CoRR},
  volume       = {abs/1307.5118},
  year         = {2013},
  url          = {http://arxiv.org/abs/1307.5118},
  eprinttype    = {arXiv},
  eprint       = {1307.5118},
  timestamp    = {Mon, 07 Sep 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/MoriTZMS13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/Sehnke13,
  author       = {Frank Sehnke},
  title        = {Efficient Baseline-free Sampling in Parameter Exploring Policy Gradients:
                  Super Symmetric {PGPE}},
  journal      = {CoRR},
  volume       = {abs/1312.3811},
  year         = {2013},
  url          = {http://arxiv.org/abs/1312.3811},
  eprinttype    = {arXiv},
  eprint       = {1312.3811},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/Sehnke13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1301-3966,
  author       = {Tingting Zhao and
                  Hirotaka Hachiya and
                  Voot Tangkaratt and
                  Jun Morimoto and
                  Masashi Sugiyama},
  title        = {Efficient Sample Reuse in Policy Gradients with Parameter-based Exploration},
  journal      = {CoRR},
  volume       = {abs/1301.3966},
  year         = {2013},
  url          = {http://arxiv.org/abs/1301.3966},
  eprinttype    = {arXiv},
  eprint       = {1301.3966},
  timestamp    = {Mon, 07 Sep 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1301-3966.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@phdthesis{DBLP:phd/dnb/Sehnke12,
  author       = {Frank Sehnke},
  title        = {Parameter exploring policy gradients and their implications},
  school       = {Technical University Munich},
  year         = {2012},
  url          = {https://nbn-resolving.org/urn:nbn:de:bvb:91-diss-20121030-1099128-0-7},
  urn          = {urn:nbn:de:bvb:91-diss-20121030-1099128-0-7},
  timestamp    = {Sat, 17 Jul 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/phd/dnb/Sehnke12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tac/JettoO12,
  author       = {Leopoldo Jetto and
                  Valentina Orsini},
  title        = {A Supervised Switching Control Policy for {LPV} Systems With Inaccurate
                  Parameter Knowledge},
  journal      = {{IEEE} Trans. Autom. Control.},
  volume       = {57},
  number       = {6},
  pages        = {1527--1532},
  year         = {2012},
  url          = {https://doi.org/10.1109/TAC.2011.2174690},
  doi          = {10.1109/TAC.2011.2174690},
  timestamp    = {Wed, 20 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tac/JettoO12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tnsm/SamakA12,
  author       = {Taghrid Samak and
                  Ehab Al{-}Shaer},
  title        = {Fuzzy Conflict Analysis for QoS Policy Parameters in DiffServ Networks},
  journal      = {{IEEE} Trans. Netw. Serv. Manag.},
  volume       = {9},
  number       = {4},
  pages        = {459--472},
  year         = {2012},
  url          = {https://doi.org/10.1109/TNSM.2012.062512.120308},
  doi          = {10.1109/TNSM.2012.062512.120308},
  timestamp    = {Thu, 27 Aug 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tnsm/SamakA12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/MisuK12,
  author       = {Teruhisa Misu and
                  Hideki Kashioka},
  title        = {Simultaneous feature selection and parameter optimization for training
                  of dialog policy by reinforcement learning},
  booktitle    = {2012 {IEEE} Spoken Language Technology Workshop (SLT), Miami, FL,
                  USA, December 2-5, 2012},
  pages        = {1--6},
  publisher    = {{IEEE}},
  year         = {2012},
  url          = {https://doi.org/10.1109/SLT.2012.6424160},
  doi          = {10.1109/SLT.2012.6424160},
  timestamp    = {Wed, 16 Oct 2019 14:14:53 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/MisuK12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/candie/NevesSM11,
  author       = {Maxstaley L. Neves and
                  Leonardo P. Santiago and
                  Carlos A. Maia},
  title        = {A condition-based maintenance policy and input parameters estimation
                  for deteriorating systems under periodic inspection},
  journal      = {Comput. Ind. Eng.},
  volume       = {61},
  number       = {3},
  pages        = {503--511},
  year         = {2011},
  url          = {https://doi.org/10.1016/j.cie.2011.04.005},
  doi          = {10.1016/J.CIE.2011.04.005},
  timestamp    = {Thu, 20 Feb 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/candie/NevesSM11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/cor/PaulR11,
  author       = {Brijesh Paul and
                  Chandrasekharan Rajendran},
  title        = {Rationing mechanisms and inventory control-policy parameters for a
                  divergent supply chain operating with lost sales and costs of review},
  journal      = {Comput. Oper. Res.},
  volume       = {38},
  number       = {8},
  pages        = {1117--1130},
  year         = {2011},
  url          = {https://doi.org/10.1016/j.cor.2010.11.002},
  doi          = {10.1016/J.COR.2010.11.002},
  timestamp    = {Tue, 18 Feb 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/cor/PaulR11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/infocom/WeinsbergSM11,
  author       = {Udi Weinsberg and
                  Augustin Soule and
                  Laurent Massouli{\'{e}}},
  title        = {Inferring traffic shaping and policy parameters using end host measurements},
  booktitle    = {{INFOCOM} 2011. 30th {IEEE} International Conference on Computer Communications,
                  Joint Conference of the {IEEE} Computer and Communications Societies,
                  10-15 April 2011, Shanghai, China},
  pages        = {151--155},
  publisher    = {{IEEE}},
  year         = {2011},
  url          = {https://doi.org/10.1109/INFCOM.2011.5934941},
  doi          = {10.1109/INFCOM.2011.5934941},
  timestamp    = {Wed, 16 Oct 2019 14:14:51 +0200},
  biburl       = {https://dblp.org/rec/conf/infocom/WeinsbergSM11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iros/KormushevUCTC11,
  author       = {Petar Kormushev and
                  Barkan Ugurlu and
                  Sylvain Calinon and
                  Nikolaos G. Tsagarakis and
                  Darwin G. Caldwell},
  title        = {Bipedal walking energy minimization by reinforcement learning with
                  evolving policy parameterization},
  booktitle    = {2011 {IEEE/RSJ} International Conference on Intelligent Robots and
                  Systems, {IROS} 2011, San Francisco, CA, USA, September 25-30, 2011},
  pages        = {318--324},
  publisher    = {{IEEE}},
  year         = {2011},
  url          = {https://doi.org/10.1109/IROS.2011.6094427},
  doi          = {10.1109/IROS.2011.6094427},
  timestamp    = {Wed, 16 Oct 2019 14:14:51 +0200},
  biburl       = {https://dblp.org/rec/conf/iros/KormushevUCTC11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/automatica/MarkouP10,
  author       = {Michael M. Markou and
                  Christos G. Panayiotou},
  title        = {On-line control of the threshold policy parameter for multiclass systems},
  journal      = {Autom.},
  volume       = {46},
  number       = {3},
  pages        = {528--536},
  year         = {2010},
  url          = {https://doi.org/10.1016/j.automatica.2009.12.002},
  doi          = {10.1016/J.AUTOMATICA.2009.12.002},
  timestamp    = {Thu, 20 Feb 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/automatica/MarkouP10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/nn/SehnkeORGPS10,
  author       = {Frank Sehnke and
                  Christian Osendorfer and
                  Thomas R{\"{u}}ckstie{\ss} and
                  Alex Graves and
                  Jan Peters and
                  J{\"{u}}rgen Schmidhuber},
  title        = {Parameter-exploring policy gradients},
  journal      = {Neural Networks},
  volume       = {23},
  number       = {4},
  pages        = {551--559},
  year         = {2010},
  url          = {https://doi.org/10.1016/j.neunet.2009.12.004},
  doi          = {10.1016/J.NEUNET.2009.12.004},
  timestamp    = {Wed, 14 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/nn/SehnkeORGPS10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icann/GruttnerSSS10,
  author       = {Mandy Gr{\"{u}}ttner and
                  Frank Sehnke and
                  Tom Schaul and
                  J{\"{u}}rgen Schmidhuber},
  editor       = {Konstantinos I. Diamantaras and
                  Wlodek Duch and
                  Lazaros S. Iliadis},
  title        = {Multi-Dimensional Deep Memory Atari-Go Players for Parameter Exploring
                  Policy Gradients},
  booktitle    = {Artificial Neural Networks - {ICANN} 2010, 20th International Conference,
                  Thessaloniki, Greece, September 15-18, 2010, Proceedings, Part {II}},
  series       = {Lecture Notes in Computer Science},
  volume       = {6353},
  pages        = {114--123},
  publisher    = {Springer},
  year         = {2010},
  url          = {https://doi.org/10.1007/978-3-642-15822-3\_14},
  doi          = {10.1007/978-3-642-15822-3\_14},
  timestamp    = {Tue, 14 May 2019 10:00:49 +0200},
  biburl       = {https://dblp.org/rec/conf/icann/GruttnerSSS10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icmla/SehnkeGOS10,
  author       = {Frank Sehnke and
                  Alex Graves and
                  Christian Osendorfer and
                  J{\"{u}}rgen Schmidhuber},
  editor       = {Sorin Draghici and
                  Taghi M. Khoshgoftaar and
                  Vasile Palade and
                  Witold Pedrycz and
                  M. Arif Wani and
                  Xingquan Zhu},
  title        = {Multimodal Parameter-exploring Policy Gradients},
  booktitle    = {The Ninth International Conference on Machine Learning and Applications,
                  {ICMLA} 2010, Washington, DC, USA, 12-14 December 2010},
  pages        = {113--118},
  publisher    = {{IEEE} Computer Society},
  year         = {2010},
  url          = {https://doi.org/10.1109/ICMLA.2010.24},
  doi          = {10.1109/ICMLA.2010.24},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icmla/SehnkeGOS10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/MiyamaeNOK10,
  author       = {Atsushi Miyamae and
                  Yuichi Nagata and
                  Isao Ono and
                  Shigenobu Kobayashi},
  editor       = {John D. Lafferty and
                  Christopher K. I. Williams and
                  John Shawe{-}Taylor and
                  Richard S. Zemel and
                  Aron Culotta},
  title        = {Natural Policy Gradient Methods with Parameter-based Exploration for
                  Control Tasks},
  booktitle    = {Advances in Neural Information Processing Systems 23: 24th Annual
                  Conference on Neural Information Processing Systems 2010. Proceedings
                  of a meeting held 6-9 December 2010, Vancouver, British Columbia,
                  Canada},
  pages        = {1660--1668},
  publisher    = {Curran Associates, Inc.},
  year         = {2010},
  url          = {https://proceedings.neurips.cc/paper/2010/hash/44c4c17332cace2124a1a836d9fc4b6f-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/MiyamaeNOK10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/questa/CilOK09,
  author       = {Eren Basar {\c{C}}il and
                  E. Lerzan {\"{O}}rmeci and
                  Fikri Karaesmen},
  title        = {Effects of system parameters on the optimal policy structure in a
                  class of queueing control problems},
  journal      = {Queueing Syst. Theory Appl.},
  volume       = {61},
  number       = {4},
  pages        = {273--304},
  year         = {2009},
  url          = {https://doi.org/10.1007/s11134-009-9109-x},
  doi          = {10.1007/S11134-009-9109-X},
  timestamp    = {Tue, 01 Jun 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/questa/CilOK09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/candie/LauXZ08,
  author       = {R. S. M. Lau and
                  Jinxing Xie and
                  Xiande Zhao},
  title        = {Effects of inventory policy on supply chain performance: {A} simulation
                  study of critical decision parameters},
  journal      = {Comput. Ind. Eng.},
  volume       = {55},
  number       = {3},
  pages        = {620--633},
  year         = {2008},
  url          = {https://doi.org/10.1016/j.cie.2008.02.002},
  doi          = {10.1016/J.CIE.2008.02.002},
  timestamp    = {Thu, 20 Feb 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/candie/LauXZ08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icann/SehnkeORGPS08,
  author       = {Frank Sehnke and
                  Christian Osendorfer and
                  Thomas R{\"{u}}ckstie{\ss} and
                  Alex Graves and
                  Jan Peters and
                  J{\"{u}}rgen Schmidhuber},
  editor       = {Vera Kurkov{\'{a}} and
                  Roman Neruda and
                  Jan Koutn{\'{\i}}k},
  title        = {Policy Gradients with Parameter-Based Exploration for Control},
  booktitle    = {Artificial Neural Networks - {ICANN} 2008 , 18th International Conference,
                  Prague, Czech Republic, September 3-6, 2008, Proceedings, Part {I}},
  series       = {Lecture Notes in Computer Science},
  volume       = {5163},
  pages        = {387--396},
  publisher    = {Springer},
  year         = {2008},
  url          = {https://doi.org/10.1007/978-3-540-87536-9\_40},
  doi          = {10.1007/978-3-540-87536-9\_40},
  timestamp    = {Tue, 14 May 2019 10:00:49 +0200},
  biburl       = {https://dblp.org/rec/conf/icann/SehnkeORGPS08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icuimc/GuptaGG08,
  author       = {Shyam K. Gupta and
                  Vikram Goyal and
                  Anand Gupta},
  editor       = {Won Kim and
                  Hyung{-}Jin Choi},
  title        = {Precomputation of privacy policy parameters for auditing {SQL} queries},
  booktitle    = {Proceedings of the 2nd International Conference on Ubiquitous Information
                  Management and Communication, {ICUIMC} 2008, Suwon, Korea, January
                  31 - February 01, 2008},
  pages        = {87--93},
  publisher    = {{ACM}},
  year         = {2008},
  url          = {https://doi.org/10.1145/1352793.1352812},
  doi          = {10.1145/1352793.1352812},
  timestamp    = {Fri, 23 Dec 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icuimc/GuptaGG08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jors/Garcia-FloresWB03,
  author       = {Rodolfo Garc{\'{\i}}a{-}Flores and
                  X. Z. Wang and
                  Thomas F. Burgess},
  title        = {Tuning inventory policy parameters in a small chemical company},
  journal      = {J. Oper. Res. Soc.},
  volume       = {54},
  number       = {4},
  pages        = {350--361},
  year         = {2003},
  url          = {https://doi.org/10.1057/palgrave.jors.2601530},
  doi          = {10.1057/PALGRAVE.JORS.2601530},
  timestamp    = {Fri, 22 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jors/Garcia-FloresWB03.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/policy/BelokosztolszkiEM03,
  author       = {Andr{\'{a}}s Belokosztolszki and
                  David M. Eyers and
                  Ken Moody},
  title        = {Policy Contexts: Controlling Information Flow in Parameterised {RBAC}},
  booktitle    = {4th {IEEE} International Workshop on Policies for Distributed Systems
                  and Networks {(POLICY} 2003), 4-6 June 2003, Lake Como, Italy},
  pages        = {99--110},
  publisher    = {{IEEE} Computer Society},
  year         = {2003},
  url          = {https://doi.org/10.1109/POLICY.2003.1206964},
  doi          = {10.1109/POLICY.2003.1206964},
  timestamp    = {Thu, 23 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/policy/BelokosztolszkiEM03.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icc/AbdallahEE99,
  author       = {Mohamed M. Abdallah and
                  Mahmoud T. El{-}Hadidi and
                  Khaled El{-}Sayed},
  title        = {Performance analysis and estimation of call admission policy parameters
                  for multiple traffic classes in wireless {ATM} networks},
  booktitle    = {1999 {IEEE} International Conference on Communications: Global Convergence
                  Through Communications, {ICC} 1999, Vancouver, BC, Canada, June 18-22,
                  1999},
  pages        = {404--410},
  publisher    = {{IEEE}},
  year         = {1999},
  url          = {https://doi.org/10.1109/ICC.1999.767972},
  doi          = {10.1109/ICC.1999.767972},
  timestamp    = {Mon, 23 Nov 2020 08:36:56 +0100},
  biburl       = {https://dblp.org/rec/conf/icc/AbdallahEE99.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iscc/AbdallahEE99,
  author       = {Mohamed M. Abdallah and
                  Mahmoud T. El{-}Hadidi and
                  Khaled M. F. Elsayed},
  title        = {Effect of User Mobility on the QoS Parameters for the Guard Channel
                  Policy},
  booktitle    = {Proceedings of the Fourth {IEEE} Symposium on Computers and Communications
                  {(ISCC} 1999), 6-8 July 1999, Sharm El Sheikh, Red Sea, Egypt},
  pages        = {409--415},
  publisher    = {{IEEE} Computer Society},
  year         = {1999},
  url          = {https://doi.org/10.1109/ISCC.1999.780933},
  doi          = {10.1109/ISCC.1999.780933},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/iscc/AbdallahEE99.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/wcnc/AbdallahEE99,
  author       = {Mohamed M. Abdallah and
                  Khaled M. F. Elsayed and
                  Mahmoud T. El{-}Hadidi},
  title        = {Effect of user mobility on the QoS parameters for the guard channel
                  policy},
  booktitle    = {1999 {IEEE} Wireless Communications and Networking Conference, {WCNC}
                  1999, September 21-24, 1999, New Orleans, Louisiana, {USA}},
  pages        = {1503--1507},
  publisher    = {{IEEE}},
  year         = {1999},
  url          = {https://doi.org/10.1109/WCNC.1999.796989},
  doi          = {10.1109/WCNC.1999.796989},
  timestamp    = {Wed, 16 Oct 2019 14:14:50 +0200},
  biburl       = {https://dblp.org/rec/conf/wcnc/AbdallahEE99.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/mmor/Eijs94,
  author       = {Marc J. G. van Eijs},
  title        = {On the determination of the control parameters of the optimal can-order
                  policy},
  journal      = {Math. Methods Oper. Res.},
  volume       = {39},
  number       = {3},
  pages        = {289--304},
  year         = {1994},
  url          = {https://doi.org/10.1007/BF01435459},
  doi          = {10.1007/BF01435459},
  timestamp    = {Tue, 03 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/mmor/Eijs94.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
a service of  Schloss Dagstuhl - Leibniz Center for Informatics