default search action
Search dblp for Publications
export results for "policy difference"
@article{DBLP:journals/ijinfoman/DwivediJHDAAAAAAAABBBBBBBCCCC24, author = {Yogesh K. Dwivedi and Anand Jeyaraj and Laurie Hughes and Gareth H. Davies and Manju Ahuja and Mousa Ahmed Albashrawi and Adil S. Al{-}Busaidi and Salah A. Al{-}Sharhan and Khalid Ibrahim Al{-}Sulaiti and Levent Altinay and Shem Amalaya and Sunil Archak and Mar{\'{\i}}a Teresa Ballestar and Shonil A. Bhagwat and Anandhi Bharadwaj and Amit Bhushan and Indranil Bose and Pawan Budhwar and Deborah Bunker and Alexandru Capatina and Lemuria D. Carter and Ioanna D. Constantiou and Crispin R. Coombs and Tom Crick and Csaba Cs{\'{a}}ki and Yves Darnige and Rahul D{\'{e}} and Rick Delbridge and Rameshwar Dubey and Robin Gauld and Ravikumar Gutti and Mari{\'{e}} Hattingh and Arve Haug and Leeya Hendricks and Airo Hino and Cathy H. C. Hsu and Netta Iivari and Marijn Janssen and Ikram Jebabli and Paul Jones and Iris A. Junglas and Abhishek Kaushik and Deepak Khazanchi and Mitsuru Kodama and Sascha Kraus and Vikram Kumar and Christian Maier and F. Tegwen Malik and Machdel Matthee and Ian P. McCarthy and Marco Meier and Bhimaraya A. Metri and Adrian Micu and Angela{-}Eliza Micu and Santosh K. Misra and Anubhav Mishra and Tonja Molin{-}Juustila and Leif Oppermann and Nicholas O'Regan and Abhipsa Pal and Neeraj Pandey and Ilias O. Pappas and Andrew Parker and Kavita Pathak and Daniel A. Pienta and Ariana Polyviou and Ramakrishnan Raman and Samuel Ribeiro{-}Navarrete and Paavo Ritala and Michael Rosemann and Suprateek Sarker and Pallavi Saxena and Daniel Schlagwein and Hergen Schultze and Chitra Sharma and Sujeet Kumar Sharma and Antonis C. Simintiras and Vinay Kumar Singh and Hanlie Smuts and John Soldatos and Manoj Kumar Tiwari and Jason Bennett Thatcher and Cristina Vanberghen and {\'{A}}kos Varga and Polyxeni Vassilakopoulou and Viswanath Venkatesh and Giampaolo Viglia and Tim Vorley and Michael Wade and Paul Walton}, title = {"Real impact": Challenges and opportunities in bridging the gap between research and practice - Making a difference in industry, policy, and society}, journal = {Int. J. Inf. Manag.}, volume = {78}, pages = {102750}, year = {2024}, url = {https://doi.org/10.1016/j.ijinfomgt.2023.102750}, doi = {10.1016/J.IJINFOMGT.2023.102750}, timestamp = {Tue, 09 Jul 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ijinfoman/DwivediJHDAAAAAAAABBBBBBBCCCC24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/atal/Kumar24, author = {Akshat Kumar}, editor = {Mehdi Dastani and Jaime Sim{\~{a}}o Sichman and Natasha Alechina and Virginia Dignum}, title = {Difference of Convex Functions Programming for Policy Optimization in Reinforcement Learning}, booktitle = {Proceedings of the 23rd International Conference on Autonomous Agents and Multiagent Systems, {AAMAS} 2024, Auckland, New Zealand, May 6-10, 2024}, pages = {2339--2341}, publisher = {International Foundation for Autonomous Agents and Multiagent Systems / {ACM}}, year = {2024}, url = {https://dl.acm.org/doi/10.5555/3635637.3663153}, doi = {10.5555/3635637.3663153}, timestamp = {Wed, 26 Jun 2024 14:06:50 +0200}, biburl = {https://dblp.org/rec/conf/atal/Kumar24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2406-06856, author = {Adhyyan Narang and Andrew Wagenmaker and Lillian J. Ratliff and Kevin G. Jamieson}, title = {Sample Complexity Reduction via Policy Difference Estimation in Tabular Reinforcement Learning}, journal = {CoRR}, volume = {abs/2406.06856}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2406.06856}, doi = {10.48550/ARXIV.2406.06856}, eprinttype = {arXiv}, eprint = {2406.06856}, timestamp = {Mon, 08 Jul 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2406-06856.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/apin/CaoLWFZ23, author = {Jiaqing Cao and Quan Liu and Lan Wu and Qiming Fu and Shan Zhong}, title = {Temporal-difference emphasis learning with regularized correction for off-policy evaluation and control}, journal = {Appl. Intell.}, volume = {53}, number = {18}, pages = {20917--20937}, year = {2023}, url = {https://doi.org/10.1007/s10489-023-04579-4}, doi = {10.1007/S10489-023-04579-4}, timestamp = {Sat, 30 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/apin/CaoLWFZ23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icic/ParkKMNH23, author = {Bumgeun Park and Taeyoung Kim and Woohyeon Moon and Sarvar Hussain Nengroo and Dongsoo Har}, editor = {De{-}Shuang Huang and Prashan Premaratne and Baohua Jin and Boyang Qu and Kang{-}Hyun Jo and Abir Hussain}, title = {Off-Policy Reinforcement Learning with Loss Function Weighted by Temporal Difference Error}, booktitle = {Advanced Intelligent Computing Technology and Applications - 19th International Conference, {ICIC} 2023, Zhengzhou, China, August 10-13, 2023, Proceedings, Part {V}}, series = {Lecture Notes in Computer Science}, volume = {14090}, pages = {600--613}, publisher = {Springer}, year = {2023}, url = {https://doi.org/10.1007/978-981-99-4761-4\_51}, doi = {10.1007/978-981-99-4761-4\_51}, timestamp = {Tue, 08 Aug 2023 13:15:21 +0200}, biburl = {https://dblp.org/rec/conf/icic/ParkKMNH23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/uai/ChenMLYYG23, author = {Xingguo Chen and Xingzhou Ma and Yang Li and Guang Yang and Shangdong Yang and Yang Gao}, editor = {Robin J. Evans and Ilya Shpitser}, title = {Modified Retrace for Off-Policy Temporal Difference Learning}, booktitle = {Uncertainty in Artificial Intelligence, {UAI} 2023, July 31 - 4 August 2023, Pittsburgh, PA, {USA}}, series = {Proceedings of Machine Learning Research}, volume = {216}, pages = {303--312}, publisher = {{PMLR}}, year = {2023}, url = {https://proceedings.mlr.press/v216/chen23a.html}, timestamp = {Wed, 01 May 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/uai/ChenMLYYG23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/access/LeeKH22, author = {Donghwan Lee and Do Wan Kim and Jianghai Hu}, title = {Distributed Off-Policy Temporal Difference Learning Using Primal-Dual Method}, journal = {{IEEE} Access}, volume = {10}, pages = {107077--107094}, year = {2022}, url = {https://doi.org/10.1109/ACCESS.2022.3211395}, doi = {10.1109/ACCESS.2022.3211395}, timestamp = {Thu, 09 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/access/LeeKH22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jmlr/JiaZ22, author = {Yanwei Jia and Xun Yu Zhou}, title = {Policy Evaluation and Temporal-Difference Learning in Continuous Time and Space: {A} Martingale Approach}, journal = {J. Mach. Learn. Res.}, volume = {23}, pages = {154:1--154:55}, year = {2022}, url = {https://jmlr.org/papers/v23/21-0947.html}, timestamp = {Wed, 11 Sep 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jmlr/JiaZ22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/LiXL22, author = {Yueheng Li and Guangming Xie and Zongqing Lu}, editor = {Kamalika Chaudhuri and Stefanie Jegelka and Le Song and Csaba Szepesv{\'{a}}ri and Gang Niu and Sivan Sabato}, title = {Difference Advantage Estimation for Multi-Agent Policy Gradients}, booktitle = {International Conference on Machine Learning, {ICML} 2022, 17-23 July 2022, Baltimore, Maryland, {USA}}, series = {Proceedings of Machine Learning Research}, volume = {162}, pages = {13066--13085}, publisher = {{PMLR}}, year = {2022}, url = {https://proceedings.mlr.press/v162/li22w.html}, timestamp = {Tue, 12 Jul 2022 17:36:52 +0200}, biburl = {https://dblp.org/rec/conf/icml/LiXL22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/TosattoPWM22, author = {Samuele Tosatto and Andrew Patterson and Martha White and Rupam Mahmood}, editor = {Kamalika Chaudhuri and Stefanie Jegelka and Le Song and Csaba Szepesv{\'{a}}ri and Gang Niu and Sivan Sabato}, title = {A Temporal-Difference Approach to Policy Gradient Estimation}, booktitle = {International Conference on Machine Learning, {ICML} 2022, 17-23 July 2022, Baltimore, Maryland, {USA}}, series = {Proceedings of Machine Learning Research}, volume = {162}, pages = {21609--21632}, publisher = {{PMLR}}, year = {2022}, url = {https://proceedings.mlr.press/v162/tosatto22a.html}, timestamp = {Tue, 12 Jul 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/TosattoPWM22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/Thomas22, author = {Valentin Thomas}, editor = {Sanmi Koyejo and S. Mohamed and A. Agarwal and Danielle Belgrave and K. Cho and A. Oh}, title = {On the role of overparameterization in off-policy Temporal Difference learning with linear function approximation}, booktitle = {Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans, LA, USA, November 28 - December 9, 2022}, year = {2022}, url = {http://papers.nips.cc/paper\_files/paper/2022/hash/f115f619b62833aadc5acb058975b0e6-Abstract-Conference.html}, timestamp = {Mon, 08 Jan 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/Thomas22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ssci/SaglamMK22, author = {Baturay Saglam and Furkan B. Mutlu and Suleyman S. Kozat}, editor = {Hisao Ishibuchi and Chee{-}Keong Kwoh and Ah{-}Hwee Tan and Dipti Srinivasan and Chunyan Miao and Anupam Trivedi and Keeley A. Crockett}, title = {An Optimistic Approach to the Temporal Difference Error in Off-Policy Actor-Critic Algorithms}, booktitle = {{IEEE} Symposium Series on Computational Intelligence, {SSCI} 2022, Singapore, December 4-7, 2022}, pages = {875--883}, publisher = {{IEEE}}, year = {2022}, url = {https://doi.org/10.1109/SSCI51031.2022.10022298}, doi = {10.1109/SSCI51031.2022.10022298}, timestamp = {Wed, 08 Feb 2023 22:09:16 +0100}, biburl = {https://dblp.org/rec/conf/ssci/SaglamMK22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2202-02396, author = {Samuele Tosatto and Andrew Patterson and Martha White and A. Rupam Mahmood}, title = {A Temporal-Difference Approach to Policy Gradient Estimation}, journal = {CoRR}, volume = {abs/2202.02396}, year = {2022}, url = {https://arxiv.org/abs/2202.02396}, eprinttype = {arXiv}, eprint = {2202.02396}, timestamp = {Wed, 09 Feb 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2202-02396.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2202-07960, author = {Ziad Kobeissi and Francis R. Bach}, title = {On a Variance Reduction Correction of the Temporal Difference for Policy Evaluation in the Stochastic Continuous Setting}, journal = {CoRR}, volume = {abs/2202.07960}, year = {2022}, url = {https://arxiv.org/abs/2202.07960}, eprinttype = {arXiv}, eprint = {2202.07960}, timestamp = {Sun, 03 Jul 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2202-07960.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2203-10172, author = {Eric Graves and Sina Ghiassian}, title = {Importance Sampling Placement in Off-Policy Temporal-Difference Methods}, journal = {CoRR}, volume = {abs/2203.10172}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2203.10172}, doi = {10.48550/ARXIV.2203.10172}, eprinttype = {arXiv}, eprint = {2203.10172}, timestamp = {Mon, 28 Mar 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2203-10172.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2212-13175, author = {Bumgeun Park and Taeyoung Kim and Woohyeon Moon and Luiz Felipe Vecchietti and Dongsoo Har}, title = {Off-Policy Reinforcement Learning with Loss Function Weighted by Temporal Difference Error}, journal = {CoRR}, volume = {abs/2212.13175}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2212.13175}, doi = {10.48550/ARXIV.2212.13175}, eprinttype = {arXiv}, eprint = {2212.13175}, timestamp = {Wed, 04 Jan 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2212-13175.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/algorithms/WangLYP21, author = {Huimu Wang and Zhen Liu and Jianqiang Yi and Zhiqiang Pu}, title = {Multiagent Hierarchical Cognition Difference Policy for Multiagent Cooperation}, journal = {Algorithms}, volume = {14}, number = {3}, pages = {98}, year = {2021}, url = {https://doi.org/10.3390/a14030098}, doi = {10.3390/A14030098}, timestamp = {Mon, 28 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/algorithms/WangLYP21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/isci/CaoLZFZ21, author = {Jiaqing Cao and Quan Liu and Fei Zhu and Qiming Fu and Shan Zhong}, title = {Gradient temporal-difference learning for off-policy evaluation using emphatic weightings}, journal = {Inf. Sci.}, volume = {580}, pages = {311--330}, year = {2021}, url = {https://doi.org/10.1016/j.ins.2021.08.082}, doi = {10.1016/J.INS.2021.08.082}, timestamp = {Sat, 30 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/isci/CaoLZFZ21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/tac/KoppelWSSR21, author = {Alec Koppel and Garrett Warnell and Ethan Stump and Peter Stone and Alejandro Ribeiro}, title = {Policy Evaluation in Continuous MDPs With Efficient Kernelized Gradient Temporal Difference}, journal = {{IEEE} Trans. Autom. Control.}, volume = {66}, number = {4}, pages = {1856--1863}, year = {2021}, url = {https://doi.org/10.1109/TAC.2020.3029315}, doi = {10.1109/TAC.2020.3029315}, timestamp = {Wed, 07 Apr 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/tac/KoppelWSSR21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/atal/CastelliniDOS21, author = {Jacopo Castellini and Sam Devlin and Frans A. Oliehoek and Rahul Savani}, editor = {Frank Dignum and Alessio Lomuscio and Ulle Endriss and Ann Now{\'{e}}}, title = {Difference Rewards Policy Gradients}, booktitle = {{AAMAS} '21: 20th International Conference on Autonomous Agents and Multiagent Systems, Virtual Event, United Kingdom, May 3-7, 2021}, pages = {1475--1477}, publisher = {{ACM}}, year = {2021}, url = {https://www.ifaamas.org/Proceedings/aamas2021/pdfs/p1475.pdf}, doi = {10.5555/3463952.3464130}, timestamp = {Wed, 20 Jul 2022 17:03:47 +0200}, biburl = {https://dblp.org/rec/conf/atal/CastelliniDOS21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/cdc/StankovicBS21, author = {Milos S. Stankovic and Marko Beko and Srdjan S. Stankovic}, title = {Distributed Consensus-Based Multi-Agent Off-Policy Temporal-Difference Learning}, booktitle = {2021 60th {IEEE} Conference on Decision and Control (CDC), Austin, TX, USA, December 14-17, 2021}, pages = {5976--5981}, publisher = {{IEEE}}, year = {2021}, url = {https://doi.org/10.1109/CDC45484.2021.9683607}, doi = {10.1109/CDC45484.2021.9683607}, timestamp = {Thu, 23 Jun 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/cdc/StankovicBS21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2108-06655, author = {Yanwei Jia and Xun Yu Zhou}, title = {Policy Evaluation and Temporal-Difference Learning in Continuous Time and Space: {A} Martingale Approach}, journal = {CoRR}, volume = {abs/2108.06655}, year = {2021}, url = {https://arxiv.org/abs/2108.06655}, eprinttype = {arXiv}, eprint = {2108.06655}, timestamp = {Wed, 18 Aug 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2108-06655.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2109-12002, author = {Yaqi Duan and Mengdi Wang and Martin J. Wainwright}, title = {Optimal policy evaluation using kernel-based temporal difference methods}, journal = {CoRR}, volume = {abs/2109.12002}, year = {2021}, url = {https://arxiv.org/abs/2109.12002}, eprinttype = {arXiv}, eprint = {2109.12002}, timestamp = {Mon, 27 Sep 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2109-12002.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jamia/TurveyKNHBHD20, author = {Carolyn L. Turvey and Dawn M. Klein and Kim M. Nazi and Susan T. Haidary and Omar Bouhaddou and Nelson Hsing and Margaret Donahue}, title = {Racial differences in patient consent policy preferences for electronic health information exchange}, journal = {J. Am. Medical Informatics Assoc.}, volume = {27}, number = {5}, pages = {717--725}, year = {2020}, url = {https://doi.org/10.1093/jamia/ocaa012}, doi = {10.1093/JAMIA/OCAA012}, timestamp = {Mon, 26 Oct 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/jamia/TurveyKNHBHD20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/cdc/HerediaM20, author = {Paulo Heredia and Shaoshuai Mou}, title = {Finite-Sample Analysis of Multi-Agent Policy Evaluation with Kernelized Gradient Temporal Difference}, booktitle = {59th {IEEE} Conference on Decision and Control, {CDC} 2020, Jeju Island, South Korea, December 14-18, 2020}, pages = {5647--5652}, publisher = {{IEEE}}, year = {2020}, url = {https://doi.org/10.1109/CDC42340.2020.9303966}, doi = {10.1109/CDC42340.2020.9303966}, timestamp = {Fri, 04 Mar 2022 13:31:02 +0100}, biburl = {https://dblp.org/rec/conf/cdc/HerediaM20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ecai/DiddigiKB20, author = {Raghuram Bharadwaj Diddigi and Chandramouli Kamanchi and Shalabh Bhatnagar}, editor = {Giuseppe De Giacomo and Alejandro Catal{\'{a}} and Bistra Dilkina and Michela Milano and Sen{\'{e}}n Barro and Alberto Bugar{\'{\i}}n and J{\'{e}}r{\^{o}}me Lang}, title = {A Convergent Off-Policy Temporal Difference Algorithm}, booktitle = {{ECAI} 2020 - 24th European Conference on Artificial Intelligence, 29 August-8 September 2020, Santiago de Compostela, Spain, August 29 - September 8, 2020 - Including 10th Conference on Prestigious Applications of Artificial Intelligence {(PAIS} 2020)}, series = {Frontiers in Artificial Intelligence and Applications}, volume = {325}, pages = {1103--1110}, publisher = {{IOS} Press}, year = {2020}, url = {https://doi.org/10.3233/FAIA200207}, doi = {10.3233/FAIA200207}, timestamp = {Sat, 09 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ecai/DiddigiKB20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/paams/Algarvio0S20, author = {Hugo Algarvio and Fernando Lopes and Jo{\~{a}}o Santana}, editor = {Fernando de la Prieta and Philippe Mathieu and Jaime Andr{\'{e}}s Rinc{\'{o}}n Arango and Alia El Bolock and Elena del Val and Jaume Jord{\'{a}}n Prunera and Jo{\~{a}}o Carneiro and Rub{\'{e}}n Fuentes and Fernando Lopes and Vicente Juli{\'{a}}n}, title = {Renewable Energy Support Policy Based on Contracts for Difference and Bilateral Negotiation}, booktitle = {Highlights in Practical Applications of Agents, Multi-Agent Systems, and Trust-worthiness. The {PAAMS} Collection - International Workshops of {PAAMS} 2020, L'Aquila, Italy, October 7-9, 2020, Proceedings}, series = {Communications in Computer and Information Science}, volume = {1233}, pages = {293--301}, publisher = {Springer}, year = {2020}, url = {https://doi.org/10.1007/978-3-030-51999-5\_24}, doi = {10.1007/978-3-030-51999-5\_24}, timestamp = {Thu, 04 Jul 2024 21:34:17 +0200}, biburl = {https://dblp.org/rec/conf/paams/Algarvio0S20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/sac/LeeHM20, author = {Sangyeop Lee and Myoung Hoon Ha and Byung Ro Moon}, editor = {Chih{-}Cheng Hung and Tom{\'{a}}s Cern{\'{y}} and Dongwan Shin and Alessio Bechini}, title = {Understanding features on evolutionary policy optimizations: feature learning difference between gradient-based and evolutionary policy optimizations}, booktitle = {{SAC} '20: The 35th {ACM/SIGAPP} Symposium on Applied Computing, online event, [Brno, Czech Republic], March 30 - April 3, 2020}, pages = {1112--1118}, publisher = {{ACM}}, year = {2020}, url = {https://doi.org/10.1145/3341105.3373966}, doi = {10.1145/3341105.3373966}, timestamp = {Sun, 06 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/sac/LeeHM20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2012-11258, author = {Jacopo Castellini and Sam Devlin and Frans A. Oliehoek and Rahul Savani}, title = {Difference Rewards Policy Gradients}, journal = {CoRR}, volume = {abs/2012.11258}, year = {2020}, url = {https://arxiv.org/abs/2012.11258}, eprinttype = {arXiv}, eprint = {2012.11258}, timestamp = {Mon, 04 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2012-11258.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/cscw/GroverBMR19, author = {Ted Grover and Ayse Elvan Bayraktaroglu and Gloria Mark and Eugenia Ha Rim Rho}, title = {Moral and Affective Differences in {U.S.} Immigration Policy Debate on Twitter}, journal = {Comput. Support. Cooperative Work.}, volume = {28}, number = {3-4}, pages = {317--355}, year = {2019}, url = {https://doi.org/10.1007/s10606-019-09357-w}, doi = {10.1007/S10606-019-09357-W}, timestamp = {Sun, 04 Aug 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/cscw/GroverBMR19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/RiquelmePVMGMBN19, author = {Carlos Riquelme and Hugo Penedones and Damien Vincent and Hartmut Maennel and Sylvain Gelly and Timothy A. Mann and Andr{\'{e}} Barreto and Gergely Neu}, editor = {Hanna M. Wallach and Hugo Larochelle and Alina Beygelzimer and Florence d'Alch{\'{e}}{-}Buc and Emily B. Fox and Roman Garnett}, title = {Adaptive Temporal-Difference Learning for Policy Evaluation with Per-State Uncertainty Estimates}, booktitle = {Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS 2019, December 8-14, 2019, Vancouver, BC, Canada}, pages = {11872--11882}, year = {2019}, url = {https://proceedings.neurips.cc/paper/2019/hash/816a6db41f0e44644bc65808b6db5ca4-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/RiquelmePVMGMBN19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1906-07987, author = {Hugo Penedones and Carlos Riquelme and Damien Vincent and Hartmut Maennel and Timothy A. Mann and Andr{\'{e}} Barreto and Sylvain Gelly and Gergely Neu}, title = {Adaptive Temporal-Difference Learning for Policy Evaluation with Per-State Uncertainty Estimates}, journal = {CoRR}, volume = {abs/1906.07987}, year = {2019}, url = {http://arxiv.org/abs/1906.07987}, eprinttype = {arXiv}, eprint = {1906.07987}, timestamp = {Mon, 10 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1906-07987.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1911-05697, author = {Raghuram Bharadwaj Diddigi and Chandramouli Kamanchi and Shalabh Bhatnagar}, title = {A Convergent Off-Policy Temporal Difference Algorithm}, journal = {CoRR}, volume = {abs/1911.05697}, year = {2019}, url = {http://arxiv.org/abs/1911.05697}, eprinttype = {arXiv}, eprint = {1911.05697}, timestamp = {Mon, 02 Dec 2019 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1911-05697.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1907-00526, title = {FiDi-RL: Incorporating Deep Reinforcement Learning with Finite-Difference Policy Search for Efficient Learning of Continuous Control}, journal = {CoRR}, volume = {abs/1907.00526}, year = {2019}, note = {Withdrawn.}, url = {http://arxiv.org/abs/1907.00526}, eprinttype = {arXiv}, eprint = {1907.00526}, timestamp = {Wed, 05 Feb 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1907-00526.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/access/LiWSJ18, author = {Dazi Li and Yuting Wang and Tianheng Song and Qibing Jin}, title = {An Adaptive Policy Evaluation Network Based on Recursive Least Squares Temporal Difference With Gradient Correction}, journal = {{IEEE} Access}, volume = {6}, pages = {7515--7525}, year = {2018}, url = {https://doi.org/10.1109/ACCESS.2018.2805298}, doi = {10.1109/ACCESS.2018.2805298}, timestamp = {Wed, 04 Jul 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/access/LiWSJ18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/mor/KarmakarB18, author = {Prasenjit Karmakar and Shalabh Bhatnagar}, title = {Two Time-Scale Stochastic Approximation with Controlled Markov Noise and Off-Policy Temporal-Difference Learning}, journal = {Math. Oper. Res.}, volume = {43}, number = {1}, pages = {130--151}, year = {2018}, url = {https://doi.org/10.1287/moor.2017.0855}, doi = {10.1287/MOOR.2017.0855}, timestamp = {Sat, 09 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/mor/KarmakarB18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/soco/GivchiP18, author = {Arash Givchi and Maziar Palhang}, title = {Off-policy temporal difference learning with distribution adaptation in fast mixing chains}, journal = {Soft Comput.}, volume = {22}, number = {3}, pages = {737--750}, year = {2018}, url = {https://doi.org/10.1007/s00500-017-2490-1}, doi = {10.1007/S00500-017-2490-1}, timestamp = {Sat, 30 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/soco/GivchiP18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1712-09652, author = {Huizhen Yu}, title = {On Convergence of some Gradient-based Temporal-Differences Algorithms for Off-Policy Learning}, journal = {CoRR}, volume = {abs/1712.09652}, year = {2017}, url = {http://arxiv.org/abs/1712.09652}, eprinttype = {arXiv}, eprint = {1712.09652}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1712-09652.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jmlr/SuttonMW16, author = {Richard S. Sutton and Ashique Rupam Mahmood and Martha White}, title = {An Emphatic Approach to the Problem of Off-policy Temporal-Difference Learning}, journal = {J. Mach. Learn. Res.}, volume = {17}, pages = {73:1--73:29}, year = {2016}, url = {https://jmlr.org/papers/v17/14-488.html}, timestamp = {Wed, 11 Sep 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jmlr/SuttonMW16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/KarmakarMB16, author = {Prasenjit Karmakar and Raj Kumar Maity and Shalabh Bhatnagar}, title = {On a convergent off -policy temporal difference learning algorithm in on-line learning environment}, journal = {CoRR}, volume = {abs/1605.06076}, year = {2016}, url = {http://arxiv.org/abs/1605.06076}, eprinttype = {arXiv}, eprint = {1605.06076}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/KarmakarMB16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/tele/JungK15a, author = {Wonsuk Jung and Youngsun Kwon}, title = {Differences between {LTE} and 3G service customers: Business and policy implications}, journal = {Telematics Informatics}, volume = {32}, number = {4}, pages = {667--680}, year = {2015}, url = {https://doi.org/10.1016/j.tele.2015.03.001}, doi = {10.1016/J.TELE.2015.03.001}, timestamp = {Mon, 24 Feb 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/tele/JungK15a.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aips/DannN015, author = {Christoph Dann and Gerhard Neumann and Jan Peters}, editor = {Ronen I. Brafman and Carmel Domshlak and Patrik Haslum and Shlomo Zilberstein}, title = {Policy Evaluation with Temporal Differences: {A} Survey and Comparison (Extended Abstract)}, booktitle = {Proceedings of the Twenty-Fifth International Conference on Automated Planning and Scheduling, {ICAPS} 2015, Jerusalem, Israel, June 7-11, 2015}, pages = {359--360}, publisher = {{AAAI} Press}, year = {2015}, url = {http://www.aaai.org/ocs/index.php/ICAPS/ICAPS15/paper/view/10635}, timestamp = {Tue, 26 May 2015 21:03:02 +0200}, biburl = {https://dblp.org/rec/conf/aips/DannN015.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/SuttonMW15, author = {Richard S. Sutton and Ashique Rupam Mahmood and Martha White}, title = {An Emphatic Approach to the Problem of Off-policy Temporal-Difference Learning}, journal = {CoRR}, volume = {abs/1503.04269}, year = {2015}, url = {http://arxiv.org/abs/1503.04269}, eprinttype = {arXiv}, eprint = {1503.04269}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/SuttonMW15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jmlr/DannNP14, author = {Christoph Dann and Gerhard Neumann and Jan Peters}, title = {Policy evaluation with temporal differences: a survey and comparison}, journal = {J. Mach. Learn. Res.}, volume = {15}, number = {1}, pages = {809--883}, year = {2014}, url = {https://dl.acm.org/doi/10.5555/2627435.2638563}, doi = {10.5555/2627435.2638563}, timestamp = {Thu, 02 Jun 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jmlr/DannNP14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jota/ChengFZLW14, author = {Kang Cheng and Shumin Fei and Kanjian Zhang and Xiaomei Liu and Haikun Wei}, title = {Temporal Difference-Based Policy Iteration for Optimal Control of Stochastic Systems}, journal = {J. Optim. Theory Appl.}, volume = {163}, number = {1}, pages = {165--180}, year = {2014}, url = {https://doi.org/10.1007/s10957-013-0418-1}, doi = {10.1007/S10957-013-0418-1}, timestamp = {Tue, 07 Apr 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jota/ChengFZLW14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icis/KamKG13, author = {Hwee{-}Joo Kam and Pairin Katerattanakul and Greg Gogolin}, editor = {Richard L. Baskerville and Michael Chau}, title = {A Cross Industry Study: Differences in Information Security Policy Compliance between the Banking Industry and Higher Education}, booktitle = {Proceedings of the International Conference on Information Systems, {ICIS} 2013, Milano, Italy, December 15-18, 2013}, publisher = {Association for Information Systems}, year = {2013}, url = {http://aisel.aisnet.org/icis2013/proceedings/SecurityOfIS/4}, timestamp = {Wed, 30 Oct 2019 17:01:36 +0100}, biburl = {https://dblp.org/rec/conf/icis/KamKG13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/db/KuhnJ09, author = {Kristine M. Kuhn and K. D. Joshi}, title = {The reported and revealed importance of job attributes to aspiring information technology: a policy-capturing study of gender differences}, journal = {Data Base}, volume = {40}, number = {3}, pages = {40--60}, year = {2009}, url = {https://doi.org/10.1145/1592401.1592406}, doi = {10.1145/1592401.1592406}, timestamp = {Wed, 20 May 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/db/KuhnJ09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/esann/Heidrich-MeisnerI08, author = {Verena Heidrich{-}Meisner and Christian Igel}, title = {Similarities and differences between policy gradient methods and evolution strategies}, booktitle = {16th European Symposium on Artificial Neural Networks, {ESANN} 2008, Bruges, Belgium, April 23-25, 2008, Proceedings}, pages = {149--154}, year = {2008}, url = {https://www.esann.org/sites/default/files/proceedings/legacy/es2008-47.pdf}, timestamp = {Tue, 02 Aug 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/esann/Heidrich-MeisnerI08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/SuttonSM08, author = {Richard S. Sutton and Csaba Szepesv{\'{a}}ri and Hamid Reza Maei}, editor = {Daphne Koller and Dale Schuurmans and Yoshua Bengio and L{\'{e}}on Bottou}, title = {A Convergent O(n) Temporal-difference Algorithm for Off-policy Learning with Linear Function Approximation}, booktitle = {Advances in Neural Information Processing Systems 21, Proceedings of the Twenty-Second Annual Conference on Neural Information Processing Systems, Vancouver, British Columbia, Canada, December 8-11, 2008}, pages = {1609--1616}, publisher = {Curran Associates, Inc.}, year = {2008}, url = {https://proceedings.neurips.cc/paper/2008/hash/e0c641195b27425bb056ac56f8953d24-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/SuttonSM08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/TaylorWS07, author = {Matthew E. Taylor and Shimon Whiteson and Peter Stone}, title = {Temporal Difference and Policy Search Methods for Reinforcement Learning: An Empirical Comparison}, booktitle = {Proceedings of the Twenty-Second {AAAI} Conference on Artificial Intelligence, July 22-26, 2007, Vancouver, British Columbia, Canada}, pages = {1675--1678}, publisher = {{AAAI} Press}, year = {2007}, url = {http://www.aaai.org/Library/AAAI/2007/aaai07-271.php}, timestamp = {Tue, 05 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/TaylorWS07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/mss/GerberU06, author = {Anke Gerber and Thorsten Upmann}, title = {Bargaining solutions at work: Qualitative differences in policy implications}, journal = {Math. Soc. Sci.}, volume = {52}, number = {2}, pages = {162--175}, year = {2006}, url = {https://doi.org/10.1016/j.mathsocsci.2006.06.002}, doi = {10.1016/J.MATHSOCSCI.2006.06.002}, timestamp = {Mon, 24 Feb 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/mss/GerberU06.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/tis/Wilkinson04, author = {Kenton T. Wilkinson}, title = {Language Difference and Communication Policy in the Information Age}, journal = {Inf. Soc.}, volume = {20}, number = {3}, pages = {217--229}, year = {2004}, url = {https://doi.org/10.1080/01972240490456890}, doi = {10.1080/01972240490456890}, timestamp = {Wed, 14 Nov 2018 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/tis/Wilkinson04.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/iepol/Martins03, author = {Maria Lurdes Castro Martins}, title = {International differences in telecommunications demand}, journal = {Inf. Econ. Policy}, volume = {15}, number = {3}, pages = {291--303}, year = {2003}, url = {https://doi.org/10.1016/S0167-6245(02)00111-7}, doi = {10.1016/S0167-6245(02)00111-7}, timestamp = {Sat, 22 Feb 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/iepol/Martins03.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/PrecupSD01, author = {Doina Precup and Richard S. Sutton and Sanjoy Dasgupta}, editor = {Carla E. Brodley and Andrea Pohoreckyj Danyluk}, title = {Off-Policy Temporal Difference Learning with Function Approximation}, booktitle = {Proceedings of the Eighteenth International Conference on Machine Learning {(ICML} 2001), Williams College, Williamstown, MA, USA, June 28 - July 1, 2001}, pages = {417--424}, publisher = {Morgan Kaufmann}, year = {2001}, timestamp = {Wed, 27 Nov 2002 10:53:35 +0100}, biburl = {https://dblp.org/rec/conf/icml/PrecupSD01.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ijmi/Ishikawa00, author = {Kiyomu Ishikawa}, title = {Health data use and protection policy; based on differences by cultural and social environment}, journal = {Int. J. Medical Informatics}, volume = {60}, number = {2}, pages = {119--125}, year = {2000}, url = {https://doi.org/10.1016/S1386-5056(00)00111-8}, doi = {10.1016/S1386-5056(00)00111-8}, timestamp = {Fri, 21 Feb 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/ijmi/Ishikawa00.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
manage site settings
To protect your privacy, all features that rely on external API calls from your browser are turned off by default. You need to opt-in for them to become active. All settings here will be stored as cookies with your web browser. For more information see our F.A.Q.