default search action

combined dblp search
author search
venue search
publication search

ask others

Search dblp for Publications

Name: dblp XML data dump
Creator: Schloss Dagstuhl - Leibniz Center for Informatics
Published: 1993
License: https://creativecommons.org/publicdomain/zero/1.0/
Keywords: dblp, XML, computer science, scholarly publications, metadata

export results for "policy difference"

> Home > Search

download as .bib file

@article{DBLP:journals/ijinfoman/DwivediJHDAAAAAAAABBBBBBBCCCC24,
  author       = {Yogesh K. Dwivedi and
                  Anand Jeyaraj and
                  Laurie Hughes and
                  Gareth H. Davies and
                  Manju Ahuja and
                  Mousa Ahmed Albashrawi and
                  Adil S. Al{-}Busaidi and
                  Salah A. Al{-}Sharhan and
                  Khalid Ibrahim Al{-}Sulaiti and
                  Levent Altinay and
                  Shem Amalaya and
                  Sunil Archak and
                  Mar{\'{\i}}a Teresa Ballestar and
                  Shonil A. Bhagwat and
                  Anandhi Bharadwaj and
                  Amit Bhushan and
                  Indranil Bose and
                  Pawan Budhwar and
                  Deborah Bunker and
                  Alexandru Capatina and
                  Lemuria D. Carter and
                  Ioanna D. Constantiou and
                  Crispin R. Coombs and
                  Tom Crick and
                  Csaba Cs{\'{a}}ki and
                  Yves Darnige and
                  Rahul D{\'{e}} and
                  Rick Delbridge and
                  Rameshwar Dubey and
                  Robin Gauld and
                  Ravikumar Gutti and
                  Mari{\'{e}} Hattingh and
                  Arve Haug and
                  Leeya Hendricks and
                  Airo Hino and
                  Cathy H. C. Hsu and
                  Netta Iivari and
                  Marijn Janssen and
                  Ikram Jebabli and
                  Paul Jones and
                  Iris A. Junglas and
                  Abhishek Kaushik and
                  Deepak Khazanchi and
                  Mitsuru Kodama and
                  Sascha Kraus and
                  Vikram Kumar and
                  Christian Maier and
                  F. Tegwen Malik and
                  Machdel Matthee and
                  Ian P. McCarthy and
                  Marco Meier and
                  Bhimaraya A. Metri and
                  Adrian Micu and
                  Angela{-}Eliza Micu and
                  Santosh K. Misra and
                  Anubhav Mishra and
                  Tonja Molin{-}Juustila and
                  Leif Oppermann and
                  Nicholas O'Regan and
                  Abhipsa Pal and
                  Neeraj Pandey and
                  Ilias O. Pappas and
                  Andrew Parker and
                  Kavita Pathak and
                  Daniel A. Pienta and
                  Ariana Polyviou and
                  Ramakrishnan Raman and
                  Samuel Ribeiro{-}Navarrete and
                  Paavo Ritala and
                  Michael Rosemann and
                  Suprateek Sarker and
                  Pallavi Saxena and
                  Daniel Schlagwein and
                  Hergen Schultze and
                  Chitra Sharma and
                  Sujeet Kumar Sharma and
                  Antonis C. Simintiras and
                  Vinay Kumar Singh and
                  Hanlie Smuts and
                  John Soldatos and
                  Manoj Kumar Tiwari and
                  Jason Bennett Thatcher and
                  Cristina Vanberghen and
                  {\'{A}}kos Varga and
                  Polyxeni Vassilakopoulou and
                  Viswanath Venkatesh and
                  Giampaolo Viglia and
                  Tim Vorley and
                  Michael Wade and
                  Paul Walton},
  title        = {"Real impact": Challenges and opportunities in bridging the gap between
                  research and practice - Making a difference in industry, policy, and
                  society},
  journal      = {Int. J. Inf. Manag.},
  volume       = {78},
  pages        = {102750},
  year         = {2024},
  url          = {https://doi.org/10.1016/j.ijinfomgt.2023.102750},
  doi          = {10.1016/J.IJINFOMGT.2023.102750},
  timestamp    = {Tue, 09 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ijinfoman/DwivediJHDAAAAAAAABBBBBBBCCCC24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/atal/Kumar24,
  author       = {Akshat Kumar},
  editor       = {Mehdi Dastani and
                  Jaime Sim{\~{a}}o Sichman and
                  Natasha Alechina and
                  Virginia Dignum},
  title        = {Difference of Convex Functions Programming for Policy Optimization
                  in Reinforcement Learning},
  booktitle    = {Proceedings of the 23rd International Conference on Autonomous Agents
                  and Multiagent Systems, {AAMAS} 2024, Auckland, New Zealand, May 6-10,
                  2024},
  pages        = {2339--2341},
  publisher    = {International Foundation for Autonomous Agents and Multiagent Systems
                  / {ACM}},
  year         = {2024},
  url          = {https://dl.acm.org/doi/10.5555/3635637.3663153},
  doi          = {10.5555/3635637.3663153},
  timestamp    = {Wed, 26 Jun 2024 14:06:50 +0200},
  biburl       = {https://dblp.org/rec/conf/atal/Kumar24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2406-06856,
  author       = {Adhyyan Narang and
                  Andrew Wagenmaker and
                  Lillian J. Ratliff and
                  Kevin G. Jamieson},
  title        = {Sample Complexity Reduction via Policy Difference Estimation in Tabular
                  Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2406.06856},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2406.06856},
  doi          = {10.48550/ARXIV.2406.06856},
  eprinttype    = {arXiv},
  eprint       = {2406.06856},
  timestamp    = {Mon, 08 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2406-06856.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/apin/CaoLWFZ23,
  author       = {Jiaqing Cao and
                  Quan Liu and
                  Lan Wu and
                  Qiming Fu and
                  Shan Zhong},
  title        = {Temporal-difference emphasis learning with regularized correction
                  for off-policy evaluation and control},
  journal      = {Appl. Intell.},
  volume       = {53},
  number       = {18},
  pages        = {20917--20937},
  year         = {2023},
  url          = {https://doi.org/10.1007/s10489-023-04579-4},
  doi          = {10.1007/S10489-023-04579-4},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/apin/CaoLWFZ23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icic/ParkKMNH23,
  author       = {Bumgeun Park and
                  Taeyoung Kim and
                  Woohyeon Moon and
                  Sarvar Hussain Nengroo and
                  Dongsoo Har},
  editor       = {De{-}Shuang Huang and
                  Prashan Premaratne and
                  Baohua Jin and
                  Boyang Qu and
                  Kang{-}Hyun Jo and
                  Abir Hussain},
  title        = {Off-Policy Reinforcement Learning with Loss Function Weighted by Temporal
                  Difference Error},
  booktitle    = {Advanced Intelligent Computing Technology and Applications - 19th
                  International Conference, {ICIC} 2023, Zhengzhou, China, August 10-13,
                  2023, Proceedings, Part {V}},
  series       = {Lecture Notes in Computer Science},
  volume       = {14090},
  pages        = {600--613},
  publisher    = {Springer},
  year         = {2023},
  url          = {https://doi.org/10.1007/978-981-99-4761-4\_51},
  doi          = {10.1007/978-981-99-4761-4\_51},
  timestamp    = {Tue, 08 Aug 2023 13:15:21 +0200},
  biburl       = {https://dblp.org/rec/conf/icic/ParkKMNH23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/uai/ChenMLYYG23,
  author       = {Xingguo Chen and
                  Xingzhou Ma and
                  Yang Li and
                  Guang Yang and
                  Shangdong Yang and
                  Yang Gao},
  editor       = {Robin J. Evans and
                  Ilya Shpitser},
  title        = {Modified Retrace for Off-Policy Temporal Difference Learning},
  booktitle    = {Uncertainty in Artificial Intelligence, {UAI} 2023, July 31 - 4 August
                  2023, Pittsburgh, PA, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {216},
  pages        = {303--312},
  publisher    = {{PMLR}},
  year         = {2023},
  url          = {https://proceedings.mlr.press/v216/chen23a.html},
  timestamp    = {Wed, 01 May 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/uai/ChenMLYYG23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/access/LeeKH22,
  author       = {Donghwan Lee and
                  Do Wan Kim and
                  Jianghai Hu},
  title        = {Distributed Off-Policy Temporal Difference Learning Using Primal-Dual
                  Method},
  journal      = {{IEEE} Access},
  volume       = {10},
  pages        = {107077--107094},
  year         = {2022},
  url          = {https://doi.org/10.1109/ACCESS.2022.3211395},
  doi          = {10.1109/ACCESS.2022.3211395},
  timestamp    = {Thu, 09 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/access/LeeKH22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/jmlr/JiaZ22,
  author       = {Yanwei Jia and
                  Xun Yu Zhou},
  title        = {Policy Evaluation and Temporal-Difference Learning in Continuous Time
                  and Space: {A} Martingale Approach},
  journal      = {J. Mach. Learn. Res.},
  volume       = {23},
  pages        = {154:1--154:55},
  year         = {2022},
  url          = {https://jmlr.org/papers/v23/21-0947.html},
  timestamp    = {Wed, 11 Sep 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/JiaZ22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icml/LiXL22,
  author       = {Yueheng Li and
                  Guangming Xie and
                  Zongqing Lu},
  editor       = {Kamalika Chaudhuri and
                  Stefanie Jegelka and
                  Le Song and
                  Csaba Szepesv{\'{a}}ri and
                  Gang Niu and
                  Sivan Sabato},
  title        = {Difference Advantage Estimation for Multi-Agent Policy Gradients},
  booktitle    = {International Conference on Machine Learning, {ICML} 2022, 17-23 July
                  2022, Baltimore, Maryland, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {162},
  pages        = {13066--13085},
  publisher    = {{PMLR}},
  year         = {2022},
  url          = {https://proceedings.mlr.press/v162/li22w.html},
  timestamp    = {Tue, 12 Jul 2022 17:36:52 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/LiXL22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icml/TosattoPWM22,
  author       = {Samuele Tosatto and
                  Andrew Patterson and
                  Martha White and
                  Rupam Mahmood},
  editor       = {Kamalika Chaudhuri and
                  Stefanie Jegelka and
                  Le Song and
                  Csaba Szepesv{\'{a}}ri and
                  Gang Niu and
                  Sivan Sabato},
  title        = {A Temporal-Difference Approach to Policy Gradient Estimation},
  booktitle    = {International Conference on Machine Learning, {ICML} 2022, 17-23 July
                  2022, Baltimore, Maryland, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {162},
  pages        = {21609--21632},
  publisher    = {{PMLR}},
  year         = {2022},
  url          = {https://proceedings.mlr.press/v162/tosatto22a.html},
  timestamp    = {Tue, 12 Jul 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/TosattoPWM22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/nips/Thomas22,
  author       = {Valentin Thomas},
  editor       = {Sanmi Koyejo and
                  S. Mohamed and
                  A. Agarwal and
                  Danielle Belgrave and
                  K. Cho and
                  A. Oh},
  title        = {On the role of overparameterization in off-policy Temporal Difference
                  learning with linear function approximation},
  booktitle    = {Advances in Neural Information Processing Systems 35: Annual Conference
                  on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans,
                  LA, USA, November 28 - December 9, 2022},
  year         = {2022},
  url          = {http://papers.nips.cc/paper\_files/paper/2022/hash/f115f619b62833aadc5acb058975b0e6-Abstract-Conference.html},
  timestamp    = {Mon, 08 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/Thomas22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/ssci/SaglamMK22,
  author       = {Baturay Saglam and
                  Furkan B. Mutlu and
                  Suleyman S. Kozat},
  editor       = {Hisao Ishibuchi and
                  Chee{-}Keong Kwoh and
                  Ah{-}Hwee Tan and
                  Dipti Srinivasan and
                  Chunyan Miao and
                  Anupam Trivedi and
                  Keeley A. Crockett},
  title        = {An Optimistic Approach to the Temporal Difference Error in Off-Policy
                  Actor-Critic Algorithms},
  booktitle    = {{IEEE} Symposium Series on Computational Intelligence, {SSCI} 2022,
                  Singapore, December 4-7, 2022},
  pages        = {875--883},
  publisher    = {{IEEE}},
  year         = {2022},
  url          = {https://doi.org/10.1109/SSCI51031.2022.10022298},
  doi          = {10.1109/SSCI51031.2022.10022298},
  timestamp    = {Wed, 08 Feb 2023 22:09:16 +0100},
  biburl       = {https://dblp.org/rec/conf/ssci/SaglamMK22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2202-02396,
  author       = {Samuele Tosatto and
                  Andrew Patterson and
                  Martha White and
                  A. Rupam Mahmood},
  title        = {A Temporal-Difference Approach to Policy Gradient Estimation},
  journal      = {CoRR},
  volume       = {abs/2202.02396},
  year         = {2022},
  url          = {https://arxiv.org/abs/2202.02396},
  eprinttype    = {arXiv},
  eprint       = {2202.02396},
  timestamp    = {Wed, 09 Feb 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2202-02396.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2202-07960,
  author       = {Ziad Kobeissi and
                  Francis R. Bach},
  title        = {On a Variance Reduction Correction of the Temporal Difference for
                  Policy Evaluation in the Stochastic Continuous Setting},
  journal      = {CoRR},
  volume       = {abs/2202.07960},
  year         = {2022},
  url          = {https://arxiv.org/abs/2202.07960},
  eprinttype    = {arXiv},
  eprint       = {2202.07960},
  timestamp    = {Sun, 03 Jul 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2202-07960.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2203-10172,
  author       = {Eric Graves and
                  Sina Ghiassian},
  title        = {Importance Sampling Placement in Off-Policy Temporal-Difference Methods},
  journal      = {CoRR},
  volume       = {abs/2203.10172},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2203.10172},
  doi          = {10.48550/ARXIV.2203.10172},
  eprinttype    = {arXiv},
  eprint       = {2203.10172},
  timestamp    = {Mon, 28 Mar 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2203-10172.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2212-13175,
  author       = {Bumgeun Park and
                  Taeyoung Kim and
                  Woohyeon Moon and
                  Luiz Felipe Vecchietti and
                  Dongsoo Har},
  title        = {Off-Policy Reinforcement Learning with Loss Function Weighted by Temporal
                  Difference Error},
  journal      = {CoRR},
  volume       = {abs/2212.13175},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2212.13175},
  doi          = {10.48550/ARXIV.2212.13175},
  eprinttype    = {arXiv},
  eprint       = {2212.13175},
  timestamp    = {Wed, 04 Jan 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2212-13175.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/algorithms/WangLYP21,
  author       = {Huimu Wang and
                  Zhen Liu and
                  Jianqiang Yi and
                  Zhiqiang Pu},
  title        = {Multiagent Hierarchical Cognition Difference Policy for Multiagent
                  Cooperation},
  journal      = {Algorithms},
  volume       = {14},
  number       = {3},
  pages        = {98},
  year         = {2021},
  url          = {https://doi.org/10.3390/a14030098},
  doi          = {10.3390/A14030098},
  timestamp    = {Mon, 28 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/algorithms/WangLYP21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/isci/CaoLZFZ21,
  author       = {Jiaqing Cao and
                  Quan Liu and
                  Fei Zhu and
                  Qiming Fu and
                  Shan Zhong},
  title        = {Gradient temporal-difference learning for off-policy evaluation using
                  emphatic weightings},
  journal      = {Inf. Sci.},
  volume       = {580},
  pages        = {311--330},
  year         = {2021},
  url          = {https://doi.org/10.1016/j.ins.2021.08.082},
  doi          = {10.1016/J.INS.2021.08.082},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/isci/CaoLZFZ21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/tac/KoppelWSSR21,
  author       = {Alec Koppel and
                  Garrett Warnell and
                  Ethan Stump and
                  Peter Stone and
                  Alejandro Ribeiro},
  title        = {Policy Evaluation in Continuous MDPs With Efficient Kernelized Gradient
                  Temporal Difference},
  journal      = {{IEEE} Trans. Autom. Control.},
  volume       = {66},
  number       = {4},
  pages        = {1856--1863},
  year         = {2021},
  url          = {https://doi.org/10.1109/TAC.2020.3029315},
  doi          = {10.1109/TAC.2020.3029315},
  timestamp    = {Wed, 07 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tac/KoppelWSSR21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/atal/CastelliniDOS21,
  author       = {Jacopo Castellini and
                  Sam Devlin and
                  Frans A. Oliehoek and
                  Rahul Savani},
  editor       = {Frank Dignum and
                  Alessio Lomuscio and
                  Ulle Endriss and
                  Ann Now{\'{e}}},
  title        = {Difference Rewards Policy Gradients},
  booktitle    = {{AAMAS} '21: 20th International Conference on Autonomous Agents and
                  Multiagent Systems, Virtual Event, United Kingdom, May 3-7, 2021},
  pages        = {1475--1477},
  publisher    = {{ACM}},
  year         = {2021},
  url          = {https://www.ifaamas.org/Proceedings/aamas2021/pdfs/p1475.pdf},
  doi          = {10.5555/3463952.3464130},
  timestamp    = {Wed, 20 Jul 2022 17:03:47 +0200},
  biburl       = {https://dblp.org/rec/conf/atal/CastelliniDOS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/cdc/StankovicBS21,
  author       = {Milos S. Stankovic and
                  Marko Beko and
                  Srdjan S. Stankovic},
  title        = {Distributed Consensus-Based Multi-Agent Off-Policy Temporal-Difference
                  Learning},
  booktitle    = {2021 60th {IEEE} Conference on Decision and Control (CDC), Austin,
                  TX, USA, December 14-17, 2021},
  pages        = {5976--5981},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/CDC45484.2021.9683607},
  doi          = {10.1109/CDC45484.2021.9683607},
  timestamp    = {Thu, 23 Jun 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/cdc/StankovicBS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2108-06655,
  author       = {Yanwei Jia and
                  Xun Yu Zhou},
  title        = {Policy Evaluation and Temporal-Difference Learning in Continuous Time
                  and Space: {A} Martingale Approach},
  journal      = {CoRR},
  volume       = {abs/2108.06655},
  year         = {2021},
  url          = {https://arxiv.org/abs/2108.06655},
  eprinttype    = {arXiv},
  eprint       = {2108.06655},
  timestamp    = {Wed, 18 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2108-06655.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2109-12002,
  author       = {Yaqi Duan and
                  Mengdi Wang and
                  Martin J. Wainwright},
  title        = {Optimal policy evaluation using kernel-based temporal difference methods},
  journal      = {CoRR},
  volume       = {abs/2109.12002},
  year         = {2021},
  url          = {https://arxiv.org/abs/2109.12002},
  eprinttype    = {arXiv},
  eprint       = {2109.12002},
  timestamp    = {Mon, 27 Sep 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2109-12002.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/jamia/TurveyKNHBHD20,
  author       = {Carolyn L. Turvey and
                  Dawn M. Klein and
                  Kim M. Nazi and
                  Susan T. Haidary and
                  Omar Bouhaddou and
                  Nelson Hsing and
                  Margaret Donahue},
  title        = {Racial differences in patient consent policy preferences for electronic
                  health information exchange},
  journal      = {J. Am. Medical Informatics Assoc.},
  volume       = {27},
  number       = {5},
  pages        = {717--725},
  year         = {2020},
  url          = {https://doi.org/10.1093/jamia/ocaa012},
  doi          = {10.1093/JAMIA/OCAA012},
  timestamp    = {Mon, 26 Oct 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/jamia/TurveyKNHBHD20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/cdc/HerediaM20,
  author       = {Paulo Heredia and
                  Shaoshuai Mou},
  title        = {Finite-Sample Analysis of Multi-Agent Policy Evaluation with Kernelized
                  Gradient Temporal Difference},
  booktitle    = {59th {IEEE} Conference on Decision and Control, {CDC} 2020, Jeju Island,
                  South Korea, December 14-18, 2020},
  pages        = {5647--5652},
  publisher    = {{IEEE}},
  year         = {2020},
  url          = {https://doi.org/10.1109/CDC42340.2020.9303966},
  doi          = {10.1109/CDC42340.2020.9303966},
  timestamp    = {Fri, 04 Mar 2022 13:31:02 +0100},
  biburl       = {https://dblp.org/rec/conf/cdc/HerediaM20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/ecai/DiddigiKB20,
  author       = {Raghuram Bharadwaj Diddigi and
                  Chandramouli Kamanchi and
                  Shalabh Bhatnagar},
  editor       = {Giuseppe De Giacomo and
                  Alejandro Catal{\'{a}} and
                  Bistra Dilkina and
                  Michela Milano and
                  Sen{\'{e}}n Barro and
                  Alberto Bugar{\'{\i}}n and
                  J{\'{e}}r{\^{o}}me Lang},
  title        = {A Convergent Off-Policy Temporal Difference Algorithm},
  booktitle    = {{ECAI} 2020 - 24th European Conference on Artificial Intelligence,
                  29 August-8 September 2020, Santiago de Compostela, Spain, August
                  29 - September 8, 2020 - Including 10th Conference on Prestigious
                  Applications of Artificial Intelligence {(PAIS} 2020)},
  series       = {Frontiers in Artificial Intelligence and Applications},
  volume       = {325},
  pages        = {1103--1110},
  publisher    = {{IOS} Press},
  year         = {2020},
  url          = {https://doi.org/10.3233/FAIA200207},
  doi          = {10.3233/FAIA200207},
  timestamp    = {Sat, 09 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ecai/DiddigiKB20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/paams/Algarvio0S20,
  author       = {Hugo Algarvio and
                  Fernando Lopes and
                  Jo{\~{a}}o Santana},
  editor       = {Fernando de la Prieta and
                  Philippe Mathieu and
                  Jaime Andr{\'{e}}s Rinc{\'{o}}n Arango and
                  Alia El Bolock and
                  Elena del Val and
                  Jaume Jord{\'{a}}n Prunera and
                  Jo{\~{a}}o Carneiro and
                  Rub{\'{e}}n Fuentes and
                  Fernando Lopes and
                  Vicente Juli{\'{a}}n},
  title        = {Renewable Energy Support Policy Based on Contracts for Difference
                  and Bilateral Negotiation},
  booktitle    = {Highlights in Practical Applications of Agents, Multi-Agent Systems,
                  and Trust-worthiness. The {PAAMS} Collection - International Workshops
                  of {PAAMS} 2020, L'Aquila, Italy, October 7-9, 2020, Proceedings},
  series       = {Communications in Computer and Information Science},
  volume       = {1233},
  pages        = {293--301},
  publisher    = {Springer},
  year         = {2020},
  url          = {https://doi.org/10.1007/978-3-030-51999-5\_24},
  doi          = {10.1007/978-3-030-51999-5\_24},
  timestamp    = {Thu, 04 Jul 2024 21:34:17 +0200},
  biburl       = {https://dblp.org/rec/conf/paams/Algarvio0S20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/sac/LeeHM20,
  author       = {Sangyeop Lee and
                  Myoung Hoon Ha and
                  Byung Ro Moon},
  editor       = {Chih{-}Cheng Hung and
                  Tom{\'{a}}s Cern{\'{y}} and
                  Dongwan Shin and
                  Alessio Bechini},
  title        = {Understanding features on evolutionary policy optimizations: feature
                  learning difference between gradient-based and evolutionary policy
                  optimizations},
  booktitle    = {{SAC} '20: The 35th {ACM/SIGAPP} Symposium on Applied Computing, online
                  event, [Brno, Czech Republic], March 30 - April 3, 2020},
  pages        = {1112--1118},
  publisher    = {{ACM}},
  year         = {2020},
  url          = {https://doi.org/10.1145/3341105.3373966},
  doi          = {10.1145/3341105.3373966},
  timestamp    = {Sun, 06 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/sac/LeeHM20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2012-11258,
  author       = {Jacopo Castellini and
                  Sam Devlin and
                  Frans A. Oliehoek and
                  Rahul Savani},
  title        = {Difference Rewards Policy Gradients},
  journal      = {CoRR},
  volume       = {abs/2012.11258},
  year         = {2020},
  url          = {https://arxiv.org/abs/2012.11258},
  eprinttype    = {arXiv},
  eprint       = {2012.11258},
  timestamp    = {Mon, 04 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2012-11258.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/cscw/GroverBMR19,
  author       = {Ted Grover and
                  Ayse Elvan Bayraktaroglu and
                  Gloria Mark and
                  Eugenia Ha Rim Rho},
  title        = {Moral and Affective Differences in {U.S.} Immigration Policy Debate
                  on Twitter},
  journal      = {Comput. Support. Cooperative Work.},
  volume       = {28},
  number       = {3-4},
  pages        = {317--355},
  year         = {2019},
  url          = {https://doi.org/10.1007/s10606-019-09357-w},
  doi          = {10.1007/S10606-019-09357-W},
  timestamp    = {Sun, 04 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/cscw/GroverBMR19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/nips/RiquelmePVMGMBN19,
  author       = {Carlos Riquelme and
                  Hugo Penedones and
                  Damien Vincent and
                  Hartmut Maennel and
                  Sylvain Gelly and
                  Timothy A. Mann and
                  Andr{\'{e}} Barreto and
                  Gergely Neu},
  editor       = {Hanna M. Wallach and
                  Hugo Larochelle and
                  Alina Beygelzimer and
                  Florence d'Alch{\'{e}}{-}Buc and
                  Emily B. Fox and
                  Roman Garnett},
  title        = {Adaptive Temporal-Difference Learning for Policy Evaluation with Per-State
                  Uncertainty Estimates},
  booktitle    = {Advances in Neural Information Processing Systems 32: Annual Conference
                  on Neural Information Processing Systems 2019, NeurIPS 2019, December
                  8-14, 2019, Vancouver, BC, Canada},
  pages        = {11872--11882},
  year         = {2019},
  url          = {https://proceedings.neurips.cc/paper/2019/hash/816a6db41f0e44644bc65808b6db5ca4-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/RiquelmePVMGMBN19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1906-07987,
  author       = {Hugo Penedones and
                  Carlos Riquelme and
                  Damien Vincent and
                  Hartmut Maennel and
                  Timothy A. Mann and
                  Andr{\'{e}} Barreto and
                  Sylvain Gelly and
                  Gergely Neu},
  title        = {Adaptive Temporal-Difference Learning for Policy Evaluation with Per-State
                  Uncertainty Estimates},
  journal      = {CoRR},
  volume       = {abs/1906.07987},
  year         = {2019},
  url          = {http://arxiv.org/abs/1906.07987},
  eprinttype    = {arXiv},
  eprint       = {1906.07987},
  timestamp    = {Mon, 10 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1906-07987.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1911-05697,
  author       = {Raghuram Bharadwaj Diddigi and
                  Chandramouli Kamanchi and
                  Shalabh Bhatnagar},
  title        = {A Convergent Off-Policy Temporal Difference Algorithm},
  journal      = {CoRR},
  volume       = {abs/1911.05697},
  year         = {2019},
  url          = {http://arxiv.org/abs/1911.05697},
  eprinttype    = {arXiv},
  eprint       = {1911.05697},
  timestamp    = {Mon, 02 Dec 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1911-05697.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1907-00526,
  title        = {FiDi-RL: Incorporating Deep Reinforcement Learning with Finite-Difference
                  Policy Search for Efficient Learning of Continuous Control},
  journal      = {CoRR},
  volume       = {abs/1907.00526},
  year         = {2019},
  note         = {Withdrawn.},
  url          = {http://arxiv.org/abs/1907.00526},
  eprinttype    = {arXiv},
  eprint       = {1907.00526},
  timestamp    = {Wed, 05 Feb 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1907-00526.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/access/LiWSJ18,
  author       = {Dazi Li and
                  Yuting Wang and
                  Tianheng Song and
                  Qibing Jin},
  title        = {An Adaptive Policy Evaluation Network Based on Recursive Least Squares
                  Temporal Difference With Gradient Correction},
  journal      = {{IEEE} Access},
  volume       = {6},
  pages        = {7515--7525},
  year         = {2018},
  url          = {https://doi.org/10.1109/ACCESS.2018.2805298},
  doi          = {10.1109/ACCESS.2018.2805298},
  timestamp    = {Wed, 04 Jul 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/access/LiWSJ18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/mor/KarmakarB18,
  author       = {Prasenjit Karmakar and
                  Shalabh Bhatnagar},
  title        = {Two Time-Scale Stochastic Approximation with Controlled Markov Noise
                  and Off-Policy Temporal-Difference Learning},
  journal      = {Math. Oper. Res.},
  volume       = {43},
  number       = {1},
  pages        = {130--151},
  year         = {2018},
  url          = {https://doi.org/10.1287/moor.2017.0855},
  doi          = {10.1287/MOOR.2017.0855},
  timestamp    = {Sat, 09 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/mor/KarmakarB18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/soco/GivchiP18,
  author       = {Arash Givchi and
                  Maziar Palhang},
  title        = {Off-policy temporal difference learning with distribution adaptation
                  in fast mixing chains},
  journal      = {Soft Comput.},
  volume       = {22},
  number       = {3},
  pages        = {737--750},
  year         = {2018},
  url          = {https://doi.org/10.1007/s00500-017-2490-1},
  doi          = {10.1007/S00500-017-2490-1},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/soco/GivchiP18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1712-09652,
  author       = {Huizhen Yu},
  title        = {On Convergence of some Gradient-based Temporal-Differences Algorithms
                  for Off-Policy Learning},
  journal      = {CoRR},
  volume       = {abs/1712.09652},
  year         = {2017},
  url          = {http://arxiv.org/abs/1712.09652},
  eprinttype    = {arXiv},
  eprint       = {1712.09652},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1712-09652.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/jmlr/SuttonMW16,
  author       = {Richard S. Sutton and
                  Ashique Rupam Mahmood and
                  Martha White},
  title        = {An Emphatic Approach to the Problem of Off-policy Temporal-Difference
                  Learning},
  journal      = {J. Mach. Learn. Res.},
  volume       = {17},
  pages        = {73:1--73:29},
  year         = {2016},
  url          = {https://jmlr.org/papers/v17/14-488.html},
  timestamp    = {Wed, 11 Sep 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/SuttonMW16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/KarmakarMB16,
  author       = {Prasenjit Karmakar and
                  Raj Kumar Maity and
                  Shalabh Bhatnagar},
  title        = {On a convergent off -policy temporal difference learning algorithm
                  in on-line learning environment},
  journal      = {CoRR},
  volume       = {abs/1605.06076},
  year         = {2016},
  url          = {http://arxiv.org/abs/1605.06076},
  eprinttype    = {arXiv},
  eprint       = {1605.06076},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/KarmakarMB16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/tele/JungK15a,
  author       = {Wonsuk Jung and
                  Youngsun Kwon},
  title        = {Differences between {LTE} and 3G service customers: Business and policy
                  implications},
  journal      = {Telematics Informatics},
  volume       = {32},
  number       = {4},
  pages        = {667--680},
  year         = {2015},
  url          = {https://doi.org/10.1016/j.tele.2015.03.001},
  doi          = {10.1016/J.TELE.2015.03.001},
  timestamp    = {Mon, 24 Feb 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/tele/JungK15a.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/aips/DannN015,
  author       = {Christoph Dann and
                  Gerhard Neumann and
                  Jan Peters},
  editor       = {Ronen I. Brafman and
                  Carmel Domshlak and
                  Patrik Haslum and
                  Shlomo Zilberstein},
  title        = {Policy Evaluation with Temporal Differences: {A} Survey and Comparison
                  (Extended Abstract)},
  booktitle    = {Proceedings of the Twenty-Fifth International Conference on Automated
                  Planning and Scheduling, {ICAPS} 2015, Jerusalem, Israel, June 7-11,
                  2015},
  pages        = {359--360},
  publisher    = {{AAAI} Press},
  year         = {2015},
  url          = {http://www.aaai.org/ocs/index.php/ICAPS/ICAPS15/paper/view/10635},
  timestamp    = {Tue, 26 May 2015 21:03:02 +0200},
  biburl       = {https://dblp.org/rec/conf/aips/DannN015.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/SuttonMW15,
  author       = {Richard S. Sutton and
                  Ashique Rupam Mahmood and
                  Martha White},
  title        = {An Emphatic Approach to the Problem of Off-policy Temporal-Difference
                  Learning},
  journal      = {CoRR},
  volume       = {abs/1503.04269},
  year         = {2015},
  url          = {http://arxiv.org/abs/1503.04269},
  eprinttype    = {arXiv},
  eprint       = {1503.04269},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/SuttonMW15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/jmlr/DannNP14,
  author       = {Christoph Dann and
                  Gerhard Neumann and
                  Jan Peters},
  title        = {Policy evaluation with temporal differences: a survey and comparison},
  journal      = {J. Mach. Learn. Res.},
  volume       = {15},
  number       = {1},
  pages        = {809--883},
  year         = {2014},
  url          = {https://dl.acm.org/doi/10.5555/2627435.2638563},
  doi          = {10.5555/2627435.2638563},
  timestamp    = {Thu, 02 Jun 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/DannNP14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/jota/ChengFZLW14,
  author       = {Kang Cheng and
                  Shumin Fei and
                  Kanjian Zhang and
                  Xiaomei Liu and
                  Haikun Wei},
  title        = {Temporal Difference-Based Policy Iteration for Optimal Control of
                  Stochastic Systems},
  journal      = {J. Optim. Theory Appl.},
  volume       = {163},
  number       = {1},
  pages        = {165--180},
  year         = {2014},
  url          = {https://doi.org/10.1007/s10957-013-0418-1},
  doi          = {10.1007/S10957-013-0418-1},
  timestamp    = {Tue, 07 Apr 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jota/ChengFZLW14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icis/KamKG13,
  author       = {Hwee{-}Joo Kam and
                  Pairin Katerattanakul and
                  Greg Gogolin},
  editor       = {Richard L. Baskerville and
                  Michael Chau},
  title        = {A Cross Industry Study: Differences in Information Security Policy
                  Compliance between the Banking Industry and Higher Education},
  booktitle    = {Proceedings of the International Conference on Information Systems,
                  {ICIS} 2013, Milano, Italy, December 15-18, 2013},
  publisher    = {Association for Information Systems},
  year         = {2013},
  url          = {http://aisel.aisnet.org/icis2013/proceedings/SecurityOfIS/4},
  timestamp    = {Wed, 30 Oct 2019 17:01:36 +0100},
  biburl       = {https://dblp.org/rec/conf/icis/KamKG13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/db/KuhnJ09,
  author       = {Kristine M. Kuhn and
                  K. D. Joshi},
  title        = {The reported and revealed importance of job attributes to aspiring
                  information technology: a policy-capturing study of gender differences},
  journal      = {Data Base},
  volume       = {40},
  number       = {3},
  pages        = {40--60},
  year         = {2009},
  url          = {https://doi.org/10.1145/1592401.1592406},
  doi          = {10.1145/1592401.1592406},
  timestamp    = {Wed, 20 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/db/KuhnJ09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/esann/Heidrich-MeisnerI08,
  author       = {Verena Heidrich{-}Meisner and
                  Christian Igel},
  title        = {Similarities and differences between policy gradient methods and evolution
                  strategies},
  booktitle    = {16th European Symposium on Artificial Neural Networks, {ESANN} 2008,
                  Bruges, Belgium, April 23-25, 2008, Proceedings},
  pages        = {149--154},
  year         = {2008},
  url          = {https://www.esann.org/sites/default/files/proceedings/legacy/es2008-47.pdf},
  timestamp    = {Tue, 02 Aug 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/esann/Heidrich-MeisnerI08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/nips/SuttonSM08,
  author       = {Richard S. Sutton and
                  Csaba Szepesv{\'{a}}ri and
                  Hamid Reza Maei},
  editor       = {Daphne Koller and
                  Dale Schuurmans and
                  Yoshua Bengio and
                  L{\'{e}}on Bottou},
  title        = {A Convergent O(n) Temporal-difference Algorithm for Off-policy Learning
                  with Linear Function Approximation},
  booktitle    = {Advances in Neural Information Processing Systems 21, Proceedings
                  of the Twenty-Second Annual Conference on Neural Information Processing
                  Systems, Vancouver, British Columbia, Canada, December 8-11, 2008},
  pages        = {1609--1616},
  publisher    = {Curran Associates, Inc.},
  year         = {2008},
  url          = {https://proceedings.neurips.cc/paper/2008/hash/e0c641195b27425bb056ac56f8953d24-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/SuttonSM08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/aaai/TaylorWS07,
  author       = {Matthew E. Taylor and
                  Shimon Whiteson and
                  Peter Stone},
  title        = {Temporal Difference and Policy Search Methods for Reinforcement Learning:
                  An Empirical Comparison},
  booktitle    = {Proceedings of the Twenty-Second {AAAI} Conference on Artificial Intelligence,
                  July 22-26, 2007, Vancouver, British Columbia, Canada},
  pages        = {1675--1678},
  publisher    = {{AAAI} Press},
  year         = {2007},
  url          = {http://www.aaai.org/Library/AAAI/2007/aaai07-271.php},
  timestamp    = {Tue, 05 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/TaylorWS07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/mss/GerberU06,
  author       = {Anke Gerber and
                  Thorsten Upmann},
  title        = {Bargaining solutions at work: Qualitative differences in policy implications},
  journal      = {Math. Soc. Sci.},
  volume       = {52},
  number       = {2},
  pages        = {162--175},
  year         = {2006},
  url          = {https://doi.org/10.1016/j.mathsocsci.2006.06.002},
  doi          = {10.1016/J.MATHSOCSCI.2006.06.002},
  timestamp    = {Mon, 24 Feb 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/mss/GerberU06.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/tis/Wilkinson04,
  author       = {Kenton T. Wilkinson},
  title        = {Language Difference and Communication Policy in the Information Age},
  journal      = {Inf. Soc.},
  volume       = {20},
  number       = {3},
  pages        = {217--229},
  year         = {2004},
  url          = {https://doi.org/10.1080/01972240490456890},
  doi          = {10.1080/01972240490456890},
  timestamp    = {Wed, 14 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/tis/Wilkinson04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/iepol/Martins03,
  author       = {Maria Lurdes Castro Martins},
  title        = {International differences in telecommunications demand},
  journal      = {Inf. Econ. Policy},
  volume       = {15},
  number       = {3},
  pages        = {291--303},
  year         = {2003},
  url          = {https://doi.org/10.1016/S0167-6245(02)00111-7},
  doi          = {10.1016/S0167-6245(02)00111-7},
  timestamp    = {Sat, 22 Feb 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/iepol/Martins03.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icml/PrecupSD01,
  author       = {Doina Precup and
                  Richard S. Sutton and
                  Sanjoy Dasgupta},
  editor       = {Carla E. Brodley and
                  Andrea Pohoreckyj Danyluk},
  title        = {Off-Policy Temporal Difference Learning with Function Approximation},
  booktitle    = {Proceedings of the Eighteenth International Conference on Machine
                  Learning {(ICML} 2001), Williams College, Williamstown, MA, USA, June
                  28 - July 1, 2001},
  pages        = {417--424},
  publisher    = {Morgan Kaufmann},
  year         = {2001},
  timestamp    = {Wed, 27 Nov 2002 10:53:35 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/PrecupSD01.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/ijmi/Ishikawa00,
  author       = {Kiyomu Ishikawa},
  title        = {Health data use and protection policy; based on differences by cultural
                  and social environment},
  journal      = {Int. J. Medical Informatics},
  volume       = {60},
  number       = {2},
  pages        = {119--125},
  year         = {2000},
  url          = {https://doi.org/10.1016/S1386-5056(00)00111-8},
  doi          = {10.1016/S1386-5056(00)00111-8},
  timestamp    = {Fri, 21 Feb 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ijmi/Ishikawa00.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

manage site settings

To protect your privacy, all features that rely on external API calls from your browser are turned off by default. You need to opt-in for them to become active. All settings here will be stored as cookies with your web browser. For more information see our F.A.Q.