BibTeX records: David Silver

download as .bib file

@article{DBLP:journals/nature/MankowitzMZGSPL23,
  author       = {Daniel J. Mankowitz and
                  Andrea Michi and
                  Anton Zhernov and
                  Marco Gelmi and
                  Marco Selvi and
                  Cosmin Paduraru and
                  Edouard Leurent and
                  Shariq Iqbal and
                  Jean{-}Baptiste Lespiau and
                  Alex Ahern and
                  Thomas K{\"{o}}ppe and
                  Kevin Millikin and
                  Stephen Gaffney and
                  Sophie Elster and
                  Jackson Broshear and
                  Chris Gamble and
                  Kieran Milan and
                  Robert Tung and
                  Minjae Hwang and
                  A. Taylan Cemgil and
                  Mohammadamin Barekatain and
                  Yujia Li and
                  Amol Mandhane and
                  Thomas Hubert and
                  Julian Schrittwieser and
                  Demis Hassabis and
                  Pushmeet Kohli and
                  Martin A. Riedmiller and
                  Oriol Vinyals and
                  David Silver},
  title        = {Faster sorting algorithms discovered using deep reinforcement learning},
  journal      = {Nat.},
  volume       = {618},
  number       = {7964},
  pages        = {257--263},
  year         = {2023},
  url          = {https://doi.org/10.1038/s41586-023-06004-9},
  doi          = {10.1038/S41586-023-06004-9},
  timestamp    = {Wed, 24 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/nature/MankowitzMZGSPL23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2312-11805,
  author       = {Rohan Anil and
                  Sebastian Borgeaud and
                  Yonghui Wu and
                  Jean{-}Baptiste Alayrac and
                  Jiahui Yu and
                  Radu Soricut and
                  Johan Schalkwyk and
                  Andrew M. Dai and
                  Anja Hauth and
                  Katie Millican and
                  David Silver and
                  Slav Petrov and
                  Melvin Johnson and
                  Ioannis Antonoglou and
                  Julian Schrittwieser and
                  Amelia Glaese and
                  Jilin Chen and
                  Emily Pitler and
                  Timothy P. Lillicrap and
                  Angeliki Lazaridou and
                  Orhan Firat and
                  James Molloy and
                  Michael Isard and
                  Paul Ronald Barham and
                  Tom Hennigan and
                  Benjamin Lee and
                  Fabio Viola and
                  Malcolm Reynolds and
                  Yuanzhong Xu and
                  Ryan Doherty and
                  Eli Collins and
                  Clemens Meyer and
                  Eliza Rutherford and
                  Erica Moreira and
                  Kareem Ayoub and
                  Megha Goel and
                  George Tucker and
                  Enrique Piqueras and
                  Maxim Krikun and
                  Iain Barr and
                  Nikolay Savinov and
                  Ivo Danihelka and
                  Becca Roelofs and
                  Ana{\"{\i}}s White and
                  Anders Andreassen and
                  Tamara von Glehn and
                  Lakshman Yagati and
                  Mehran Kazemi and
                  Lucas Gonzalez and
                  Misha Khalman and
                  Jakub Sygnowski and
                  et al.},
  title        = {Gemini: {A} Family of Highly Capable Multimodal Models},
  journal      = {CoRR},
  volume       = {abs/2312.11805},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2312.11805},
  doi          = {10.48550/ARXIV.2312.11805},
  eprinttype    = {arXiv},
  eprint       = {2312.11805},
  timestamp    = {Tue, 16 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2312-11805.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/nature/FawziBHHRB0RSSS22,
  author       = {Alhussein Fawzi and
                  Matej Balog and
                  Aja Huang and
                  Thomas Hubert and
                  Bernardino Romera{-}Paredes and
                  Mohammadamin Barekatain and
                  Alexander Novikov and
                  Francisco J. R. Ruiz and
                  Julian Schrittwieser and
                  Grzegorz Swirszcz and
                  David Silver and
                  Demis Hassabis and
                  Pushmeet Kohli},
  title        = {Discovering faster matrix multiplication algorithms with reinforcement
                  learning},
  journal      = {Nat.},
  volume       = {610},
  number       = {7930},
  pages        = {47--53},
  year         = {2022},
  url          = {https://doi.org/10.1038/s41586-022-05172-4},
  doi          = {10.1038/S41586-022-05172-4},
  timestamp    = {Fri, 22 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/nature/FawziBHHRB0RSSS22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/nn/MatsuoLSPSSUM22,
  author       = {Yutaka Matsuo and
                  Yann LeCun and
                  Maneesh Sahani and
                  Doina Precup and
                  David Silver and
                  Masashi Sugiyama and
                  Eiji Uchibe and
                  Jun Morimoto},
  title        = {Deep learning, reinforcement learning, and world models},
  journal      = {Neural Networks},
  volume       = {152},
  pages        = {267--275},
  year         = {2022},
  url          = {https://doi.org/10.1016/j.neunet.2022.03.037},
  doi          = {10.1016/J.NEUNET.2022.03.037},
  timestamp    = {Mon, 28 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/nn/MatsuoLSPSSUM22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/AntonoglouSOHS22,
  author       = {Ioannis Antonoglou and
                  Julian Schrittwieser and
                  Sherjil Ozair and
                  Thomas K. Hubert and
                  David Silver},
  title        = {Planning in Stochastic Environments with a Learned Model},
  booktitle    = {The Tenth International Conference on Learning Representations, {ICLR}
                  2022, Virtual Event, April 25-29, 2022},
  publisher    = {OpenReview.net},
  year         = {2022},
  url          = {https://openreview.net/forum?id=X6D9bAHhBQ1},
  timestamp    = {Sat, 20 Aug 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/AntonoglouSOHS22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/DanihelkaGSS22,
  author       = {Ivo Danihelka and
                  Arthur Guez and
                  Julian Schrittwieser and
                  David Silver},
  title        = {Policy improvement by planning with Gumbel},
  booktitle    = {The Tenth International Conference on Learning Representations, {ICLR}
                  2022, Virtual Event, April 25-29, 2022},
  publisher    = {OpenReview.net},
  year         = {2022},
  url          = {https://openreview.net/forum?id=bERaNdoegnO},
  timestamp    = {Sat, 20 Aug 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/DanihelkaGSS22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/FlennerhagSZHS022,
  author       = {Sebastian Flennerhag and
                  Yannick Schroecker and
                  Tom Zahavy and
                  Hado van Hasselt and
                  David Silver and
                  Satinder Singh},
  title        = {Bootstrapped Meta-Learning},
  booktitle    = {The Tenth International Conference on Learning Representations, {ICLR}
                  2022, Virtual Event, April 25-29, 2022},
  publisher    = {OpenReview.net},
  year         = {2022},
  url          = {https://openreview.net/forum?id=b-ny3x071E5},
  timestamp    = {Sat, 20 Aug 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/FlennerhagSZHS022.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/SilverGDHH22,
  author       = {David Silver and
                  Anirudh Goyal and
                  Ivo Danihelka and
                  Matteo Hessel and
                  Hado van Hasselt},
  title        = {Learning by Directional Gradient Descent},
  booktitle    = {The Tenth International Conference on Learning Representations, {ICLR}
                  2022, Virtual Event, April 25-29, 2022},
  publisher    = {OpenReview.net},
  year         = {2022},
  url          = {https://openreview.net/forum?id=5i7lJLuhTm},
  timestamp    = {Sat, 20 Aug 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/SilverGDHH22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2206-15378,
  author       = {Julien P{\'{e}}rolat and
                  Bart De Vylder and
                  Daniel Hennes and
                  Eugene Tarassov and
                  Florian Strub and
                  Vincent de Boer and
                  Paul Muller and
                  Jerome T. Connor and
                  Neil Burch and
                  Thomas W. Anthony and
                  Stephen McAleer and
                  Romuald Elie and
                  Sarah H. Cen and
                  Zhe Wang and
                  Audrunas Gruslys and
                  Aleksandra Malysheva and
                  Mina Khan and
                  Sherjil Ozair and
                  Finbarr Timbers and
                  Toby Pohlen and
                  Tom Eccles and
                  Mark Rowland and
                  Marc Lanctot and
                  Jean{-}Baptiste Lespiau and
                  Bilal Piot and
                  Shayegan Omidshafiei and
                  Edward Lockhart and
                  Laurent Sifre and
                  Nathalie Beauguerlange and
                  R{\'{e}}mi Munos and
                  David Silver and
                  Satinder Singh and
                  Demis Hassabis and
                  Karl Tuyls},
  title        = {Mastering the Game of Stratego with Model-Free Multiagent Reinforcement
                  Learning},
  journal      = {CoRR},
  volume       = {abs/2206.15378},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2206.15378},
  doi          = {10.48550/ARXIV.2206.15378},
  eprinttype    = {arXiv},
  eprint       = {2206.15378},
  timestamp    = {Wed, 28 Sep 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2206-15378.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ai/SilverSPS21,
  author       = {David Silver and
                  Satinder Singh and
                  Doina Precup and
                  Richard S. Sutton},
  title        = {Reward is enough},
  journal      = {Artif. Intell.},
  volume       = {299},
  pages        = {103535},
  year         = {2021},
  url          = {https://doi.org/10.1016/j.artint.2021.103535},
  doi          = {10.1016/J.ARTINT.2021.103535},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ai/SilverSPS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/DabneyBRDQBS21,
  author       = {Will Dabney and
                  Andr{\'{e}} Barreto and
                  Mark Rowland and
                  Robert Dadashi and
                  John Quan and
                  Marc G. Bellemare and
                  David Silver},
  title        = {The Value-Improvement Path: Towards Better Representations for Reinforcement
                  Learning},
  booktitle    = {Thirty-Fifth {AAAI} Conference on Artificial Intelligence, {AAAI}
                  2021, Thirty-Third Conference on Innovative Applications of Artificial
                  Intelligence, {IAAI} 2021, The Eleventh Symposium on Educational Advances
                  in Artificial Intelligence, {EAAI} 2021, Virtual Event, February 2-9,
                  2021},
  pages        = {7160--7168},
  publisher    = {{AAAI} Press},
  year         = {2021},
  url          = {https://doi.org/10.1609/aaai.v35i8.16880},
  doi          = {10.1609/AAAI.V35I8.16880},
  timestamp    = {Mon, 04 Sep 2023 12:29:24 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/DabneyBRDQBS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/HasseltMHSBB21,
  author       = {Hado van Hasselt and
                  Sephora Madjiheurem and
                  Matteo Hessel and
                  David Silver and
                  Andr{\'{e}} Barreto and
                  Diana Borsa},
  title        = {Expected Eligibility Traces},
  booktitle    = {Thirty-Fifth {AAAI} Conference on Artificial Intelligence, {AAAI}
                  2021, Thirty-Third Conference on Innovative Applications of Artificial
                  Intelligence, {IAAI} 2021, The Eleventh Symposium on Educational Advances
                  in Artificial Intelligence, {EAAI} 2021, Virtual Event, February 2-9,
                  2021},
  pages        = {9997--10005},
  publisher    = {{AAAI} Press},
  year         = {2021},
  url          = {https://doi.org/10.1609/aaai.v35i11.17200},
  doi          = {10.1609/AAAI.V35I11.17200},
  timestamp    = {Mon, 04 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/HasseltMHSBB21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/HesselDVGSSWSH21,
  author       = {Matteo Hessel and
                  Ivo Danihelka and
                  Fabio Viola and
                  Arthur Guez and
                  Simon Schmitt and
                  Laurent Sifre and
                  Theophane Weber and
                  David Silver and
                  Hado van Hasselt},
  editor       = {Marina Meila and
                  Tong Zhang},
  title        = {Muesli: Combining Improvements in Policy Optimization},
  booktitle    = {Proceedings of the 38th International Conference on Machine Learning,
                  {ICML} 2021, 18-24 July 2021, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {139},
  pages        = {4214--4226},
  publisher    = {{PMLR}},
  year         = {2021},
  url          = {http://proceedings.mlr.press/v139/hessel21a.html},
  timestamp    = {Wed, 25 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/HesselDVGSSWSH21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/HubertSABSS21,
  author       = {Thomas Hubert and
                  Julian Schrittwieser and
                  Ioannis Antonoglou and
                  Mohammadamin Barekatain and
                  Simon Schmitt and
                  David Silver},
  editor       = {Marina Meila and
                  Tong Zhang},
  title        = {Learning and Planning in Complex Action Spaces},
  booktitle    = {Proceedings of the 38th International Conference on Machine Learning,
                  {ICML} 2021, 18-24 July 2021, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {139},
  pages        = {4476--4486},
  publisher    = {{PMLR}},
  year         = {2021},
  url          = {http://proceedings.mlr.press/v139/hubert21a.html},
  timestamp    = {Wed, 25 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/HubertSABSS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/FarquharBMFHHS21,
  author       = {Gregory Farquhar and
                  Kate Baumli and
                  Zita Marinho and
                  Angelos Filos and
                  Matteo Hessel and
                  Hado Philip van Hasselt and
                  David Silver},
  editor       = {Marc'Aurelio Ranzato and
                  Alina Beygelzimer and
                  Yann N. Dauphin and
                  Percy Liang and
                  Jennifer Wortman Vaughan},
  title        = {Self-Consistent Models and Values},
  booktitle    = {Advances in Neural Information Processing Systems 34: Annual Conference
                  on Neural Information Processing Systems 2021, NeurIPS 2021, December
                  6-14, 2021, virtual},
  pages        = {1111--1125},
  year         = {2021},
  url          = {https://proceedings.neurips.cc/paper/2021/hash/08f0efebb1c51aada9430a089a2050cc-Abstract.html},
  timestamp    = {Tue, 03 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/FarquharBMFHHS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/GrimmBFSS21,
  author       = {Christopher Grimm and
                  Andr{\'{e}} Barreto and
                  Gregory Farquhar and
                  David Silver and
                  Satinder Singh},
  editor       = {Marc'Aurelio Ranzato and
                  Alina Beygelzimer and
                  Yann N. Dauphin and
                  Percy Liang and
                  Jennifer Wortman Vaughan},
  title        = {Proper Value Equivalence},
  booktitle    = {Advances in Neural Information Processing Systems 34: Annual Conference
                  on Neural Information Processing Systems 2021, NeurIPS 2021, December
                  6-14, 2021, virtual},
  pages        = {7773--7786},
  year         = {2021},
  url          = {https://proceedings.neurips.cc/paper/2021/hash/400e5e6a7ce0c754f281525fae75a873-Abstract.html},
  timestamp    = {Tue, 03 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/GrimmBFSS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/SchrittwieserHM21,
  author       = {Julian Schrittwieser and
                  Thomas Hubert and
                  Amol Mandhane and
                  Mohammadamin Barekatain and
                  Ioannis Antonoglou and
                  David Silver},
  editor       = {Marc'Aurelio Ranzato and
                  Alina Beygelzimer and
                  Yann N. Dauphin and
                  Percy Liang and
                  Jennifer Wortman Vaughan},
  title        = {Online and Offline Reinforcement Learning by Planning with a Learned
                  Model},
  booktitle    = {Advances in Neural Information Processing Systems 34: Annual Conference
                  on Neural Information Processing Systems 2021, NeurIPS 2021, December
                  6-14, 2021, virtual},
  pages        = {27580--27591},
  year         = {2021},
  url          = {https://proceedings.neurips.cc/paper/2021/hash/e8258e5140317ff36c7f8225a3bf9590-Abstract.html},
  timestamp    = {Tue, 03 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/SchrittwieserHM21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/VeeriahZHXOKHSS21,
  author       = {Vivek Veeriah and
                  Tom Zahavy and
                  Matteo Hessel and
                  Zhongwen Xu and
                  Junhyuk Oh and
                  Iurii Kemaev and
                  Hado van Hasselt and
                  David Silver and
                  Satinder Singh},
  editor       = {Marc'Aurelio Ranzato and
                  Alina Beygelzimer and
                  Yann N. Dauphin and
                  Percy Liang and
                  Jennifer Wortman Vaughan},
  title        = {Discovery of Options via Meta-Learned Subgoals},
  booktitle    = {Advances in Neural Information Processing Systems 34: Annual Conference
                  on Neural Information Processing Systems 2021, NeurIPS 2021, December
                  6-14, 2021, virtual},
  pages        = {29861--29873},
  year         = {2021},
  url          = {https://proceedings.neurips.cc/paper/2021/hash/fa246d0262c3925617b0c72bb20eeb1d-Abstract.html},
  timestamp    = {Tue, 03 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/VeeriahZHXOKHSS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2102-06741,
  author       = {Vivek Veeriah and
                  Tom Zahavy and
                  Matteo Hessel and
                  Zhongwen Xu and
                  Junhyuk Oh and
                  Iurii Kemaev and
                  Hado van Hasselt and
                  David Silver and
                  Satinder Singh},
  title        = {Discovery of Options via Meta-Learned Subgoals},
  journal      = {CoRR},
  volume       = {abs/2102.06741},
  year         = {2021},
  url          = {https://arxiv.org/abs/2102.06741},
  eprinttype    = {arXiv},
  eprint       = {2102.06741},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2102-06741.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2104-06159,
  author       = {Matteo Hessel and
                  Ivo Danihelka and
                  Fabio Viola and
                  Arthur Guez and
                  Simon Schmitt and
                  Laurent Sifre and
                  Theophane Weber and
                  David Silver and
                  Hado van Hasselt},
  title        = {Muesli: Combining Improvements in Policy Optimization},
  journal      = {CoRR},
  volume       = {abs/2104.06159},
  year         = {2021},
  url          = {https://arxiv.org/abs/2104.06159},
  eprinttype    = {arXiv},
  eprint       = {2104.06159},
  timestamp    = {Mon, 19 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2104-06159.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2104-06294,
  author       = {Julian Schrittwieser and
                  Thomas Hubert and
                  Amol Mandhane and
                  Mohammadamin Barekatain and
                  Ioannis Antonoglou and
                  David Silver},
  title        = {Online and Offline Reinforcement Learning by Planning with a Learned
                  Model},
  journal      = {CoRR},
  volume       = {abs/2104.06294},
  year         = {2021},
  url          = {https://arxiv.org/abs/2104.06294},
  eprinttype    = {arXiv},
  eprint       = {2104.06294},
  timestamp    = {Mon, 19 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2104-06294.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2104-06303,
  author       = {Thomas Hubert and
                  Julian Schrittwieser and
                  Ioannis Antonoglou and
                  Mohammadamin Barekatain and
                  Simon Schmitt and
                  David Silver},
  title        = {Learning and Planning in Complex Action Spaces},
  journal      = {CoRR},
  volume       = {abs/2104.06303},
  year         = {2021},
  url          = {https://arxiv.org/abs/2104.06303},
  eprinttype    = {arXiv},
  eprint       = {2104.06303},
  timestamp    = {Mon, 19 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2104-06303.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2106-10316,
  author       = {Christopher Grimm and
                  Andr{\'{e}} Barreto and
                  Gregory Farquhar and
                  David Silver and
                  Satinder Singh},
  title        = {Proper Value Equivalence},
  journal      = {CoRR},
  volume       = {abs/2106.10316},
  year         = {2021},
  url          = {https://arxiv.org/abs/2106.10316},
  eprinttype    = {arXiv},
  eprint       = {2106.10316},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2106-10316.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2106-13105,
  author       = {Andr{\'{e}} Barreto and
                  Diana Borsa and
                  Shaobo Hou and
                  Gheorghe Comanici and
                  Eser Ayg{\"{u}}n and
                  Philippe Hamel and
                  Daniel Toyama and
                  Jonathan J. Hunt and
                  Shibl Mourad and
                  David Silver and
                  Doina Precup},
  title        = {The Option Keyboard: Combining Skills in Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2106.13105},
  year         = {2021},
  url          = {https://arxiv.org/abs/2106.13105},
  eprinttype    = {arXiv},
  eprint       = {2106.13105},
  timestamp    = {Mon, 10 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2106-13105.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2109-04504,
  author       = {Sebastian Flennerhag and
                  Yannick Schroecker and
                  Tom Zahavy and
                  Hado van Hasselt and
                  David Silver and
                  Satinder Singh},
  title        = {Bootstrapped Meta-Learning},
  journal      = {CoRR},
  volume       = {abs/2109.04504},
  year         = {2021},
  url          = {https://arxiv.org/abs/2109.04504},
  eprinttype    = {arXiv},
  eprint       = {2109.04504},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2109-04504.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2110-12840,
  author       = {Gregory Farquhar and
                  Kate Baumli and
                  Zita Marinho and
                  Angelos Filos and
                  Matteo Hessel and
                  Hado van Hasselt and
                  David Silver},
  title        = {Self-Consistent Models and Values},
  journal      = {CoRR},
  volume       = {abs/2110.12840},
  year         = {2021},
  url          = {https://arxiv.org/abs/2110.12840},
  eprinttype    = {arXiv},
  eprint       = {2110.12840},
  timestamp    = {Thu, 28 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2110-12840.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/nature/Senior0JKSGQZNB20,
  author       = {Andrew W. Senior and
                  Richard Evans and
                  John Jumper and
                  James Kirkpatrick and
                  Laurent Sifre and
                  Tim Green and
                  Chongli Qin and
                  Augustin Z{\'{\i}}dek and
                  Alexander W. R. Nelson and
                  Alex Bridgland and
                  Hugo Penedones and
                  Stig Petersen and
                  Karen Simonyan and
                  Steve Crossan and
                  Pushmeet Kohli and
                  David T. Jones and
                  David Silver and
                  Koray Kavukcuoglu and
                  Demis Hassabis},
  title        = {Improved protein structure prediction using potentials from deep learning},
  journal      = {Nat.},
  volume       = {577},
  number       = {7792},
  pages        = {706--710},
  year         = {2020},
  url          = {https://doi.org/10.1038/s41586-019-1923-7},
  doi          = {10.1038/S41586-019-1923-7},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/nature/Senior0JKSGQZNB20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/nature/SchrittwieserAH20,
  author       = {Julian Schrittwieser and
                  Ioannis Antonoglou and
                  Thomas Hubert and
                  Karen Simonyan and
                  Laurent Sifre and
                  Simon Schmitt and
                  Arthur Guez and
                  Edward Lockhart and
                  Demis Hassabis and
                  Thore Graepel and
                  Timothy P. Lillicrap and
                  David Silver},
  title        = {Mastering Atari, Go, chess and shogi by planning with a learned model},
  journal      = {Nat.},
  volume       = {588},
  number       = {7839},
  pages        = {604--609},
  year         = {2020},
  url          = {https://doi.org/10.1038/s41586-020-03051-4},
  doi          = {10.1038/S41586-020-03051-4},
  timestamp    = {Fri, 02 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/nature/SchrittwieserAH20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/pnas/BarretoHBSP20,
  author       = {Andr{\'{e}} Barreto and
                  Shaobo Hou and
                  Diana Borsa and
                  David Silver and
                  Doina Precup},
  title        = {Fast reinforcement learning with generalized policy updates},
  journal      = {Proc. Natl. Acad. Sci. {USA}},
  volume       = {117},
  number       = {48},
  pages        = {30079--30087},
  year         = {2020},
  url          = {https://doi.org/10.1073/pnas.1907370117},
  doi          = {10.1073/PNAS.1907370117},
  timestamp    = {Mon, 10 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/pnas/BarretoHBSP20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/OsbandDHASSMLSS20,
  author       = {Ian Osband and
                  Yotam Doron and
                  Matteo Hessel and
                  John Aslanides and
                  Eren Sezener and
                  Andre Saraiva and
                  Katrina McKinney and
                  Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri and
                  Satinder Singh and
                  Benjamin Van Roy and
                  Richard S. Sutton and
                  David Silver and
                  Hado van Hasselt},
  title        = {Behaviour Suite for Reinforcement Learning},
  booktitle    = {8th International Conference on Learning Representations, {ICLR} 2020,
                  Addis Ababa, Ethiopia, April 26-30, 2020},
  publisher    = {OpenReview.net},
  year         = {2020},
  url          = {https://openreview.net/forum?id=rygf-kSYwH},
  timestamp    = {Mon, 15 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/OsbandDHASSMLSS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/ZhengOHXKHSS20,
  author       = {Zeyu Zheng and
                  Junhyuk Oh and
                  Matteo Hessel and
                  Zhongwen Xu and
                  Manuel Kroiss and
                  Hado van Hasselt and
                  David Silver and
                  Satinder Singh},
  title        = {What Can Learned Intrinsic Rewards Capture?},
  booktitle    = {Proceedings of the 37th International Conference on Machine Learning,
                  {ICML} 2020, 13-18 July 2020, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {119},
  pages        = {11436--11446},
  publisher    = {{PMLR}},
  year         = {2020},
  url          = {http://proceedings.mlr.press/v119/zheng20b.html},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/ZhengOHXKHSS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/GrimmBSS20,
  author       = {Christopher Grimm and
                  Andr{\'{e}} Barreto and
                  Satinder Singh and
                  David Silver},
  editor       = {Hugo Larochelle and
                  Marc'Aurelio Ranzato and
                  Raia Hadsell and
                  Maria{-}Florina Balcan and
                  Hsuan{-}Tien Lin},
  title        = {The Value Equivalence Principle for Model-Based Reinforcement Learning},
  booktitle    = {Advances in Neural Information Processing Systems 33: Annual Conference
                  on Neural Information Processing Systems 2020, NeurIPS 2020, December
                  6-12, 2020, virtual},
  year         = {2020},
  url          = {https://proceedings.neurips.cc/paper/2020/hash/3bb585ea00014b0e3ebe4c6dd165a358-Abstract.html},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/GrimmBSS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/GuezVWBKPSH20,
  author       = {Arthur Guez and
                  Fabio Viola and
                  Theophane Weber and
                  Lars Buesing and
                  Steven Kapturowski and
                  Doina Precup and
                  David Silver and
                  Nicolas Heess},
  editor       = {Hugo Larochelle and
                  Marc'Aurelio Ranzato and
                  Raia Hadsell and
                  Maria{-}Florina Balcan and
                  Hsuan{-}Tien Lin},
  title        = {Value-driven Hindsight Modelling},
  booktitle    = {Advances in Neural Information Processing Systems 33: Annual Conference
                  on Neural Information Processing Systems 2020, NeurIPS 2020, December
                  6-12, 2020, virtual},
  year         = {2020},
  url          = {https://proceedings.neurips.cc/paper/2020/hash/9381fc93ad66f9ec4b2eef71147a6665-Abstract.html},
  timestamp    = {Tue, 19 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/GuezVWBKPSH20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/OhHCXHSS20,
  author       = {Junhyuk Oh and
                  Matteo Hessel and
                  Wojciech M. Czarnecki and
                  Zhongwen Xu and
                  Hado van Hasselt and
                  Satinder Singh and
                  David Silver},
  editor       = {Hugo Larochelle and
                  Marc'Aurelio Ranzato and
                  Raia Hadsell and
                  Maria{-}Florina Balcan and
                  Hsuan{-}Tien Lin},
  title        = {Discovering Reinforcement Learning Algorithms},
  booktitle    = {Advances in Neural Information Processing Systems 33: Annual Conference
                  on Neural Information Processing Systems 2020, NeurIPS 2020, December
                  6-12, 2020, virtual},
  year         = {2020},
  url          = {https://proceedings.neurips.cc/paper/2020/hash/0b96d81f0494fde5428c7aea243c9157-Abstract.html},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/OhHCXHSS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/XuHHOSS20,
  author       = {Zhongwen Xu and
                  Hado Philip van Hasselt and
                  Matteo Hessel and
                  Junhyuk Oh and
                  Satinder Singh and
                  David Silver},
  editor       = {Hugo Larochelle and
                  Marc'Aurelio Ranzato and
                  Raia Hadsell and
                  Maria{-}Florina Balcan and
                  Hsuan{-}Tien Lin},
  title        = {Meta-Gradient Reinforcement Learning with an Objective Discovered
                  Online},
  booktitle    = {Advances in Neural Information Processing Systems 33: Annual Conference
                  on Neural Information Processing Systems 2020, NeurIPS 2020, December
                  6-12, 2020, virtual},
  year         = {2020},
  url          = {https://proceedings.neurips.cc/paper/2020/hash/ae3d525daf92cee0003a7f2d92c34ea3-Abstract.html},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/XuHHOSS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/ZahavyXVHOHSS20,
  author       = {Tom Zahavy and
                  Zhongwen Xu and
                  Vivek Veeriah and
                  Matteo Hessel and
                  Junhyuk Oh and
                  Hado van Hasselt and
                  David Silver and
                  Satinder Singh},
  editor       = {Hugo Larochelle and
                  Marc'Aurelio Ranzato and
                  Raia Hadsell and
                  Maria{-}Florina Balcan and
                  Hsuan{-}Tien Lin},
  title        = {A Self-Tuning Actor-Critic Algorithm},
  booktitle    = {Advances in Neural Information Processing Systems 33: Annual Conference
                  on Neural Information Processing Systems 2020, NeurIPS 2020, December
                  6-12, 2020, virtual},
  year         = {2020},
  url          = {https://proceedings.neurips.cc/paper/2020/hash/f02208a057804ee16ac72ff4d3cec53b-Abstract.html},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/ZahavyXVHOHSS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2002-08329,
  author       = {Arthur Guez and
                  Fabio Viola and
                  Th{\'{e}}ophane Weber and
                  Lars Buesing and
                  Steven Kapturowski and
                  Doina Precup and
                  David Silver and
                  Nicolas Heess},
  title        = {Value-driven Hindsight Modelling},
  journal      = {CoRR},
  volume       = {abs/2002.08329},
  year         = {2020},
  url          = {https://arxiv.org/abs/2002.08329},
  eprinttype    = {arXiv},
  eprint       = {2002.08329},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2002-08329.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2002-12928,
  author       = {Tom Zahavy and
                  Zhongwen Xu and
                  Vivek Veeriah and
                  Matteo Hessel and
                  Junhyuk Oh and
                  Hado van Hasselt and
                  David Silver and
                  Satinder Singh},
  title        = {Self-Tuning Deep Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2002.12928},
  year         = {2020},
  url          = {https://arxiv.org/abs/2002.12928},
  eprinttype    = {arXiv},
  eprint       = {2002.12928},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2002-12928.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2006-02243,
  author       = {Will Dabney and
                  Andr{\'{e}} Barreto and
                  Mark Rowland and
                  Robert Dadashi and
                  John Quan and
                  Marc G. Bellemare and
                  David Silver},
  title        = {The Value-Improvement Path: Towards Better Representations for Reinforcement
                  Learning},
  journal      = {CoRR},
  volume       = {abs/2006.02243},
  year         = {2020},
  url          = {https://arxiv.org/abs/2006.02243},
  eprinttype    = {arXiv},
  eprint       = {2006.02243},
  timestamp    = {Mon, 10 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2006-02243.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2007-01839,
  author       = {Hado van Hasselt and
                  Sephora Madjiheurem and
                  Matteo Hessel and
                  David Silver and
                  Andr{\'{e}} Barreto and
                  Diana Borsa},
  title        = {Expected Eligibility Traces},
  journal      = {CoRR},
  volume       = {abs/2007.01839},
  year         = {2020},
  url          = {https://arxiv.org/abs/2007.01839},
  eprinttype    = {arXiv},
  eprint       = {2007.01839},
  timestamp    = {Mon, 10 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2007-01839.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2007-08433,
  author       = {Zhongwen Xu and
                  Hado van Hasselt and
                  Matteo Hessel and
                  Junhyuk Oh and
                  Satinder Singh and
                  David Silver},
  title        = {Meta-Gradient Reinforcement Learning with an Objective Discovered
                  Online},
  journal      = {CoRR},
  volume       = {abs/2007.08433},
  year         = {2020},
  url          = {https://arxiv.org/abs/2007.08433},
  eprinttype    = {arXiv},
  eprint       = {2007.08433},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2007-08433.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2007-08794,
  author       = {Junhyuk Oh and
                  Matteo Hessel and
                  Wojciech M. Czarnecki and
                  Zhongwen Xu and
                  Hado van Hasselt and
                  Satinder Singh and
                  David Silver},
  title        = {Discovering Reinforcement Learning Algorithms},
  journal      = {CoRR},
  volume       = {abs/2007.08794},
  year         = {2020},
  url          = {https://arxiv.org/abs/2007.08794},
  eprinttype    = {arXiv},
  eprint       = {2007.08794},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2007-08794.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2011-03506,
  author       = {Christopher Grimm and
                  Andr{\'{e}} Barreto and
                  Satinder Singh and
                  David Silver},
  title        = {The Value Equivalence Principle for Model-Based Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2011.03506},
  year         = {2020},
  url          = {https://arxiv.org/abs/2011.03506},
  eprinttype    = {arXiv},
  eprint       = {2011.03506},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2011-03506.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/nature/VinyalsBCMDCCPE19,
  author       = {Oriol Vinyals and
                  Igor Babuschkin and
                  Wojciech M. Czarnecki and
                  Micha{\"{e}}l Mathieu and
                  Andrew Dudzik and
                  Junyoung Chung and
                  David H. Choi and
                  Richard Powell and
                  Timo Ewalds and
                  Petko Georgiev and
                  Junhyuk Oh and
                  Dan Horgan and
                  Manuel Kroiss and
                  Ivo Danihelka and
                  Aja Huang and
                  Laurent Sifre and
                  Trevor Cai and
                  John P. Agapiou and
                  Max Jaderberg and
                  Alexander Sasha Vezhnevets and
                  R{\'{e}}mi Leblond and
                  Tobias Pohlen and
                  Valentin Dalibard and
                  David Budden and
                  Yury Sulsky and
                  James Molloy and
                  Tom Le Paine and
                  {\c{C}}aglar G{\"{u}}l{\c{c}}ehre and
                  Ziyu Wang and
                  Tobias Pfaff and
                  Yuhuai Wu and
                  Roman Ring and
                  Dani Yogatama and
                  Dario W{\"{u}}nsch and
                  Katrina McKinney and
                  Oliver Smith and
                  Tom Schaul and
                  Timothy P. Lillicrap and
                  Koray Kavukcuoglu and
                  Demis Hassabis and
                  Chris Apps and
                  David Silver},
  title        = {Grandmaster level in StarCraft {II} using multi-agent reinforcement
                  learning},
  journal      = {Nat.},
  volume       = {575},
  number       = {7782},
  pages        = {350--354},
  year         = {2019},
  url          = {https://doi.org/10.1038/s41586-019-1724-z},
  doi          = {10.1038/S41586-019-1724-Z},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/nature/VinyalsBCMDCCPE19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/WeberHBS19,
  author       = {Th{\'{e}}ophane Weber and
                  Nicolas Heess and
                  Lars Buesing and
                  David Silver},
  editor       = {Kamalika Chaudhuri and
                  Masashi Sugiyama},
  title        = {Credit Assignment Techniques in Stochastic Computation Graphs},
  booktitle    = {The 22nd International Conference on Artificial Intelligence and Statistics,
                  {AISTATS} 2019, 16-18 April 2019, Naha, Okinawa, Japan},
  series       = {Proceedings of Machine Learning Research},
  volume       = {89},
  pages        = {2650--2660},
  publisher    = {{PMLR}},
  year         = {2019},
  url          = {http://proceedings.mlr.press/v89/weber19a.html},
  timestamp    = {Fri, 07 Jun 2019 09:03:47 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/WeberHBS19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/BorsaBQMHMSS19,
  author       = {Diana Borsa and
                  Andr{\'{e}} Barreto and
                  John Quan and
                  Daniel J. Mankowitz and
                  Hado van Hasselt and
                  R{\'{e}}mi Munos and
                  David Silver and
                  Tom Schaul},
  title        = {Universal Successor Features Approximators},
  booktitle    = {7th International Conference on Learning Representations, {ICLR} 2019,
                  New Orleans, LA, USA, May 6-9, 2019},
  publisher    = {OpenReview.net},
  year         = {2019},
  url          = {https://openreview.net/forum?id=S1VWjiRcKX},
  timestamp    = {Mon, 10 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/iclr/BorsaBQMHMSS19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/GuezMGKRWRSOEWS19,
  author       = {Arthur Guez and
                  Mehdi Mirza and
                  Karol Gregor and
                  Rishabh Kabra and
                  S{\'{e}}bastien Racani{\`{e}}re and
                  Theophane Weber and
                  David Raposo and
                  Adam Santoro and
                  Laurent Orseau and
                  Tom Eccles and
                  Greg Wayne and
                  David Silver and
                  Timothy P. Lillicrap},
  editor       = {Kamalika Chaudhuri and
                  Ruslan Salakhutdinov},
  title        = {An Investigation of Model-Free Planning},
  booktitle    = {Proceedings of the 36th International Conference on Machine Learning,
                  {ICML} 2019, 9-15 June 2019, Long Beach, California, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {97},
  pages        = {2464--2473},
  publisher    = {{PMLR}},
  year         = {2019},
  url          = {http://proceedings.mlr.press/v97/guez19a.html},
  timestamp    = {Tue, 11 Jun 2019 15:37:38 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/GuezMGKRWRSOEWS19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/VeeriahHXRLOHSS19,
  author       = {Vivek Veeriah and
                  Matteo Hessel and
                  Zhongwen Xu and
                  Janarthanan Rajendran and
                  Richard L. Lewis and
                  Junhyuk Oh and
                  Hado van Hasselt and
                  David Silver and
                  Satinder Singh},
  editor       = {Hanna M. Wallach and
                  Hugo Larochelle and
                  Alina Beygelzimer and
                  Florence d'Alch{\'{e}}{-}Buc and
                  Emily B. Fox and
                  Roman Garnett},
  title        = {Discovery of Useful Questions as Auxiliary Tasks},
  booktitle    = {Advances in Neural Information Processing Systems 32: Annual Conference
                  on Neural Information Processing Systems 2019, NeurIPS 2019, December
                  8-14, 2019, Vancouver, BC, Canada},
  pages        = {9306--9317},
  year         = {2019},
  url          = {https://proceedings.neurips.cc/paper/2019/hash/10ff0b5e85e5b85cc3095d431d8c08b4-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/VeeriahHXRLOHSS19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/BarretoBHCAHTHM19,
  author       = {Andr{\'{e}} Barreto and
                  Diana Borsa and
                  Shaobo Hou and
                  Gheorghe Comanici and
                  Eser Ayg{\"{u}}n and
                  Philippe Hamel and
                  Daniel Toyama and
                  Jonathan J. Hunt and
                  Shibl Mourad and
                  David Silver and
                  Doina Precup},
  editor       = {Hanna M. Wallach and
                  Hugo Larochelle and
                  Alina Beygelzimer and
                  Florence d'Alch{\'{e}}{-}Buc and
                  Emily B. Fox and
                  Roman Garnett},
  title        = {The Option Keyboard: Combining Skills in Reinforcement Learning},
  booktitle    = {Advances in Neural Information Processing Systems 32: Annual Conference
                  on Neural Information Processing Systems 2019, NeurIPS 2019, December
                  8-14, 2019, Vancouver, BC, Canada},
  pages        = {13031--13041},
  year         = {2019},
  url          = {https://proceedings.neurips.cc/paper/2019/hash/251c5ffd6b62cc21c446c963c76cf214-Abstract.html},
  timestamp    = {Mon, 10 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/BarretoBHCAHTHM19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1901-01761,
  author       = {Th{\'{e}}ophane Weber and
                  Nicolas Heess and
                  Lars Buesing and
                  David Silver},
  title        = {Credit Assignment Techniques in Stochastic Computation Graphs},
  journal      = {CoRR},
  volume       = {abs/1901.01761},
  year         = {2019},
  url          = {http://arxiv.org/abs/1901.01761},
  eprinttype    = {arXiv},
  eprint       = {1901.01761},
  timestamp    = {Thu, 31 Jan 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1901-01761.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1901-03559,
  author       = {Arthur Guez and
                  Mehdi Mirza and
                  Karol Gregor and
                  Rishabh Kabra and
                  S{\'{e}}bastien Racani{\`{e}}re and
                  Th{\'{e}}ophane Weber and
                  David Raposo and
                  Adam Santoro and
                  Laurent Orseau and
                  Tom Eccles and
                  Greg Wayne and
                  David Silver and
                  Timothy P. Lillicrap},
  title        = {An investigation of model-free planning},
  journal      = {CoRR},
  volume       = {abs/1901.03559},
  year         = {2019},
  url          = {http://arxiv.org/abs/1901.03559},
  eprinttype    = {arXiv},
  eprint       = {1901.03559},
  timestamp    = {Fri, 01 Feb 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1901-03559.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1901-10964,
  author       = {Andr{\'{e}} Barreto and
                  Diana Borsa and
                  John Quan and
                  Tom Schaul and
                  David Silver and
                  Matteo Hessel and
                  Daniel J. Mankowitz and
                  Augustin Z{\'{\i}}dek and
                  R{\'{e}}mi Munos},
  title        = {Transfer in Deep Reinforcement Learning Using Successor Features and
                  Generalised Policy Improvement},
  journal      = {CoRR},
  volume       = {abs/1901.10964},
  year         = {2019},
  url          = {http://arxiv.org/abs/1901.10964},
  eprinttype    = {arXiv},
  eprint       = {1901.10964},
  timestamp    = {Mon, 10 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1901-10964.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1907-02908,
  author       = {Matteo Hessel and
                  Hado van Hasselt and
                  Joseph Modayil and
                  David Silver},
  title        = {On Inductive Biases in Deep Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1907.02908},
  year         = {2019},
  url          = {http://arxiv.org/abs/1907.02908},
  eprinttype    = {arXiv},
  eprint       = {1907.02908},
  timestamp    = {Mon, 08 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1907-02908.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1908-03568,
  author       = {Ian Osband and
                  Yotam Doron and
                  Matteo Hessel and
                  John Aslanides and
                  Eren Sezener and
                  Andre Saraiva and
                  Katrina McKinney and
                  Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri and
                  Satinder Singh and
                  Benjamin Van Roy and
                  Richard S. Sutton and
                  David Silver and
                  Hado van Hasselt},
  title        = {Behaviour Suite for Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1908.03568},
  year         = {2019},
  url          = {http://arxiv.org/abs/1908.03568},
  eprinttype    = {arXiv},
  eprint       = {1908.03568},
  timestamp    = {Mon, 15 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1908-03568.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1909-04607,
  author       = {Vivek Veeriah and
                  Matteo Hessel and
                  Zhongwen Xu and
                  Richard L. Lewis and
                  Janarthanan Rajendran and
                  Junhyuk Oh and
                  Hado van Hasselt and
                  David Silver and
                  Satinder Singh},
  title        = {Discovery of Useful Questions as Auxiliary Tasks},
  journal      = {CoRR},
  volume       = {abs/1909.04607},
  year         = {2019},
  url          = {http://arxiv.org/abs/1909.04607},
  eprinttype    = {arXiv},
  eprint       = {1909.04607},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1909-04607.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1911-08265,
  author       = {Julian Schrittwieser and
                  Ioannis Antonoglou and
                  Thomas Hubert and
                  Karen Simonyan and
                  Laurent Sifre and
                  Simon Schmitt and
                  Arthur Guez and
                  Edward Lockhart and
                  Demis Hassabis and
                  Thore Graepel and
                  Timothy P. Lillicrap and
                  David Silver},
  title        = {Mastering Atari, Go, Chess and Shogi by Planning with a Learned Model},
  journal      = {CoRR},
  volume       = {abs/1911.08265},
  year         = {2019},
  url          = {http://arxiv.org/abs/1911.08265},
  eprinttype    = {arXiv},
  eprint       = {1911.08265},
  timestamp    = {Mon, 02 Dec 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1911-08265.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1912-05500,
  author       = {Zeyu Zheng and
                  Junhyuk Oh and
                  Matteo Hessel and
                  Zhongwen Xu and
                  Manuel Kroiss and
                  Hado van Hasselt and
                  David Silver and
                  Satinder Singh},
  title        = {What Can Learned Intrinsic Rewards Capture?},
  journal      = {CoRR},
  volume       = {abs/1912.05500},
  year         = {2019},
  url          = {http://arxiv.org/abs/1912.05500},
  eprinttype    = {arXiv},
  eprint       = {1912.05500},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1912-05500.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/nn/SunSTH18,
  author       = {Ron Sun and
                  David Silver and
                  Gerald Tesauro and
                  Guang{-}Bin Huang},
  title        = {Introduction to the special issue on deep reinforcement learning:
                  An editorial},
  journal      = {Neural Networks},
  volume       = {107},
  pages        = {1--2},
  year         = {2018},
  url          = {https://doi.org/10.1016/j.neunet.2018.08.001},
  doi          = {10.1016/J.NEUNET.2018.08.001},
  timestamp    = {Wed, 07 Dec 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/nn/SunSTH18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/HesselMHSODHPAS18,
  author       = {Matteo Hessel and
                  Joseph Modayil and
                  Hado van Hasselt and
                  Tom Schaul and
                  Georg Ostrovski and
                  Will Dabney and
                  Dan Horgan and
                  Bilal Piot and
                  Mohammad Gheshlaghi Azar and
                  David Silver},
  editor       = {Sheila A. McIlraith and
                  Kilian Q. Weinberger},
  title        = {Rainbow: Combining Improvements in Deep Reinforcement Learning},
  booktitle    = {Proceedings of the Thirty-Second {AAAI} Conference on Artificial Intelligence,
                  (AAAI-18), the 30th innovative Applications of Artificial Intelligence
                  (IAAI-18), and the 8th {AAAI} Symposium on Educational Advances in
                  Artificial Intelligence (EAAI-18), New Orleans, Louisiana, USA, February
                  2-7, 2018},
  pages        = {3215--3222},
  publisher    = {{AAAI} Press},
  year         = {2018},
  url          = {https://doi.org/10.1609/aaai.v32i1.11796},
  doi          = {10.1609/AAAI.V32I1.11796},
  timestamp    = {Mon, 04 Sep 2023 12:29:24 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/HesselMHSODHPAS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/HorganQBBHHS18,
  author       = {Dan Horgan and
                  John Quan and
                  David Budden and
                  Gabriel Barth{-}Maron and
                  Matteo Hessel and
                  Hado van Hasselt and
                  David Silver},
  title        = {Distributed Prioritized Experience Replay},
  booktitle    = {6th International Conference on Learning Representations, {ICLR} 2018,
                  Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings},
  publisher    = {OpenReview.net},
  year         = {2018},
  url          = {https://openreview.net/forum?id=H1Dy---0Z},
  timestamp    = {Thu, 25 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/HorganQBBHHS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/BarretoBQSSHMZM18,
  author       = {Andr{\'{e}} Barreto and
                  Diana Borsa and
                  John Quan and
                  Tom Schaul and
                  David Silver and
                  Matteo Hessel and
                  Daniel J. Mankowitz and
                  Augustin Z{\'{\i}}dek and
                  R{\'{e}}mi Munos},
  editor       = {Jennifer G. Dy and
                  Andreas Krause},
  title        = {Transfer in Deep Reinforcement Learning Using Successor Features and
                  Generalised Policy Improvement},
  booktitle    = {Proceedings of the 35th International Conference on Machine Learning,
                  {ICML} 2018, Stockholmsm{\"{a}}ssan, Stockholm, Sweden, July
                  10-15, 2018},
  series       = {Proceedings of Machine Learning Research},
  volume       = {80},
  pages        = {510--519},
  publisher    = {{PMLR}},
  year         = {2018},
  url          = {http://proceedings.mlr.press/v80/barreto18a.html},
  timestamp    = {Mon, 10 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/BarretoBQSSHMZM18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/DabneyOSM18,
  author       = {Will Dabney and
                  Georg Ostrovski and
                  David Silver and
                  R{\'{e}}mi Munos},
  editor       = {Jennifer G. Dy and
                  Andreas Krause},
  title        = {Implicit Quantile Networks for Distributional Reinforcement Learning},
  booktitle    = {Proceedings of the 35th International Conference on Machine Learning,
                  {ICML} 2018, Stockholmsm{\"{a}}ssan, Stockholm, Sweden, July
                  10-15, 2018},
  series       = {Proceedings of Machine Learning Research},
  volume       = {80},
  pages        = {1104--1113},
  publisher    = {{PMLR}},
  year         = {2018},
  url          = {http://proceedings.mlr.press/v80/dabney18a.html},
  timestamp    = {Wed, 03 Apr 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/DabneyOSM18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/GuezWASVWMS18,
  author       = {Arthur Guez and
                  Theophane Weber and
                  Ioannis Antonoglou and
                  Karen Simonyan and
                  Oriol Vinyals and
                  Daan Wierstra and
                  R{\'{e}}mi Munos and
                  David Silver},
  editor       = {Jennifer G. Dy and
                  Andreas Krause},
  title        = {Learning to Search with MCTSnets},
  booktitle    = {Proceedings of the 35th International Conference on Machine Learning,
                  {ICML} 2018, Stockholmsm{\"{a}}ssan, Stockholm, Sweden, July
                  10-15, 2018},
  series       = {Proceedings of Machine Learning Research},
  volume       = {80},
  pages        = {1817--1826},
  publisher    = {{PMLR}},
  year         = {2018},
  url          = {http://proceedings.mlr.press/v80/guez18a.html},
  timestamp    = {Wed, 03 Apr 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/GuezWASVWMS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/XuHS18,
  author       = {Zhongwen Xu and
                  Hado van Hasselt and
                  David Silver},
  editor       = {Samy Bengio and
                  Hanna M. Wallach and
                  Hugo Larochelle and
                  Kristen Grauman and
                  Nicol{\`{o}} Cesa{-}Bianchi and
                  Roman Garnett},
  title        = {Meta-Gradient Reinforcement Learning},
  booktitle    = {Advances in Neural Information Processing Systems 31: Annual Conference
                  on Neural Information Processing Systems 2018, NeurIPS 2018, December
                  3-8, 2018, Montr{\'{e}}al, Canada},
  pages        = {2402--2413},
  year         = {2018},
  url          = {https://proceedings.neurips.cc/paper/2018/hash/2715518c875999308842e3455eda2fe3-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/XuHS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1802-04697,
  author       = {Arthur Guez and
                  Th{\'{e}}ophane Weber and
                  Ioannis Antonoglou and
                  Karen Simonyan and
                  Oriol Vinyals and
                  Daan Wierstra and
                  R{\'{e}}mi Munos and
                  David Silver},
  title        = {Learning to Search with MCTSnets},
  journal      = {CoRR},
  volume       = {abs/1802.04697},
  year         = {2018},
  url          = {http://arxiv.org/abs/1802.04697},
  eprinttype    = {arXiv},
  eprint       = {1802.04697},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1802-04697.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1802-08294,
  author       = {Daniel J. Mankowitz and
                  Augustin Z{\'{\i}}dek and
                  Andr{\'{e}} Barreto and
                  Dan Horgan and
                  Matteo Hessel and
                  John Quan and
                  Junhyuk Oh and
                  Hado van Hasselt and
                  David Silver and
                  Tom Schaul},
  title        = {Unicorn: Continual Learning with a Universal, Off-policy Agent},
  journal      = {CoRR},
  volume       = {abs/1802.08294},
  year         = {2018},
  url          = {http://arxiv.org/abs/1802.08294},
  eprinttype    = {arXiv},
  eprint       = {1802.08294},
  timestamp    = {Mon, 10 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1802-08294.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1803-00933,
  author       = {Dan Horgan and
                  John Quan and
                  David Budden and
                  Gabriel Barth{-}Maron and
                  Matteo Hessel and
                  Hado van Hasselt and
                  David Silver},
  title        = {Distributed Prioritized Experience Replay},
  journal      = {CoRR},
  volume       = {abs/1803.00933},
  year         = {2018},
  url          = {http://arxiv.org/abs/1803.00933},
  eprinttype    = {arXiv},
  eprint       = {1803.00933},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1803-00933.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1803-10760,
  author       = {Greg Wayne and
                  Chia{-}Chun Hung and
                  David Amos and
                  Mehdi Mirza and
                  Arun Ahuja and
                  Agnieszka Grabska{-}Barwinska and
                  Jack W. Rae and
                  Piotr Mirowski and
                  Joel Z. Leibo and
                  Adam Santoro and
                  Mevlana Gemici and
                  Malcolm Reynolds and
                  Tim Harley and
                  Josh Abramson and
                  Shakir Mohamed and
                  Danilo Jimenez Rezende and
                  David Saxton and
                  Adam Cain and
                  Chloe Hillier and
                  David Silver and
                  Koray Kavukcuoglu and
                  Matthew M. Botvinick and
                  Demis Hassabis and
                  Timothy P. Lillicrap},
  title        = {Unsupervised Predictive Memory in a Goal-Directed Agent},
  journal      = {CoRR},
  volume       = {abs/1803.10760},
  year         = {2018},
  url          = {http://arxiv.org/abs/1803.10760},
  eprinttype    = {arXiv},
  eprint       = {1803.10760},
  timestamp    = {Wed, 20 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1803-10760.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1805-09801,
  author       = {Zhongwen Xu and
                  Hado van Hasselt and
                  David Silver},
  title        = {Meta-Gradient Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1805.09801},
  year         = {2018},
  url          = {http://arxiv.org/abs/1805.09801},
  eprinttype    = {arXiv},
  eprint       = {1805.09801},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1805-09801.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1806-06923,
  author       = {Will Dabney and
                  Georg Ostrovski and
                  David Silver and
                  R{\'{e}}mi Munos},
  title        = {Implicit Quantile Networks for Distributional Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1806.06923},
  year         = {2018},
  url          = {http://arxiv.org/abs/1806.06923},
  eprinttype    = {arXiv},
  eprint       = {1806.06923},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1806-06923.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1807-01281,
  author       = {Max Jaderberg and
                  Wojciech M. Czarnecki and
                  Iain Dunning and
                  Luke Marris and
                  Guy Lever and
                  Antonio Garc{\'{\i}}a Casta{\~{n}}eda and
                  Charles Beattie and
                  Neil C. Rabinowitz and
                  Ari S. Morcos and
                  Avraham Ruderman and
                  Nicolas Sonnerat and
                  Tim Green and
                  Louise Deason and
                  Joel Z. Leibo and
                  David Silver and
                  Demis Hassabis and
                  Koray Kavukcuoglu and
                  Thore Graepel},
  title        = {Human-level performance in first-person multiplayer games with population-based
                  deep reinforcement learning},
  journal      = {CoRR},
  volume       = {abs/1807.01281},
  year         = {2018},
  url          = {http://arxiv.org/abs/1807.01281},
  eprinttype    = {arXiv},
  eprint       = {1807.01281},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1807-01281.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1812-06855,
  author       = {Yutian Chen and
                  Aja Huang and
                  Ziyu Wang and
                  Ioannis Antonoglou and
                  Julian Schrittwieser and
                  David Silver and
                  Nando de Freitas},
  title        = {Bayesian Optimization in AlphaGo},
  journal      = {CoRR},
  volume       = {abs/1812.06855},
  year         = {2018},
  url          = {http://arxiv.org/abs/1812.06855},
  eprinttype    = {arXiv},
  eprint       = {1812.06855},
  timestamp    = {Tue, 01 Jan 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1812-06855.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1812-07626,
  author       = {Diana Borsa and
                  Andr{\'{e}} Barreto and
                  John Quan and
                  Daniel J. Mankowitz and
                  R{\'{e}}mi Munos and
                  Hado van Hasselt and
                  David Silver and
                  Tom Schaul},
  title        = {Universal Successor Features Approximators},
  journal      = {CoRR},
  volume       = {abs/1812.07626},
  year         = {2018},
  url          = {http://arxiv.org/abs/1812.07626},
  eprinttype    = {arXiv},
  eprint       = {1812.07626},
  timestamp    = {Mon, 10 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1812-07626.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/cacm/Silver17,
  author       = {David Silver},
  title        = {Technical perspective: Solving imperfect information games},
  journal      = {Commun. {ACM}},
  volume       = {60},
  number       = {11},
  pages        = {80},
  year         = {2017},
  url          = {https://doi.org/10.1145/3131286},
  doi          = {10.1145/3131286},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/cacm/Silver17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/nature/SilverSSAHGHBLB17,
  author       = {David Silver and
                  Julian Schrittwieser and
                  Karen Simonyan and
                  Ioannis Antonoglou and
                  Aja Huang and
                  Arthur Guez and
                  Thomas Hubert and
                  Lucas Baker and
                  Matthew Lai and
                  Adrian Bolton and
                  Yutian Chen and
                  Timothy P. Lillicrap and
                  Fan Hui and
                  Laurent Sifre and
                  George van den Driessche and
                  Thore Graepel and
                  Demis Hassabis},
  title        = {Mastering the game of Go without human knowledge},
  journal      = {Nat.},
  volume       = {550},
  number       = {7676},
  pages        = {354--359},
  year         = {2017},
  url          = {https://doi.org/10.1038/nature24270},
  doi          = {10.1038/NATURE24270},
  timestamp    = {Mon, 27 Sep 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/nature/SilverSSAHGHBLB17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/JaderbergMCSLSK17,
  author       = {Max Jaderberg and
                  Volodymyr Mnih and
                  Wojciech Marian Czarnecki and
                  Tom Schaul and
                  Joel Z. Leibo and
                  David Silver and
                  Koray Kavukcuoglu},
  title        = {Reinforcement Learning with Unsupervised Auxiliary Tasks},
  booktitle    = {5th International Conference on Learning Representations, {ICLR} 2017,
                  Toulon, France, April 24-26, 2017, Conference Track Proceedings},
  publisher    = {OpenReview.net},
  year         = {2017},
  url          = {https://openreview.net/forum?id=SJ6yPD5xg},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/JaderbergMCSLSK17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/JaderbergCOVGSK17,
  author       = {Max Jaderberg and
                  Wojciech Marian Czarnecki and
                  Simon Osindero and
                  Oriol Vinyals and
                  Alex Graves and
                  David Silver and
                  Koray Kavukcuoglu},
  editor       = {Doina Precup and
                  Yee Whye Teh},
  title        = {Decoupled Neural Interfaces using Synthetic Gradients},
  booktitle    = {Proceedings of the 34th International Conference on Machine Learning,
                  {ICML} 2017, Sydney, NSW, Australia, 6-11 August 2017},
  series       = {Proceedings of Machine Learning Research},
  volume       = {70},
  pages        = {1627--1635},
  publisher    = {{PMLR}},
  year         = {2017},
  url          = {http://proceedings.mlr.press/v70/jaderberg17a.html},
  timestamp    = {Wed, 29 May 2019 08:41:45 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/JaderbergCOVGSK17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SilverHHSGHDRRB17,
  author       = {David Silver and
                  Hado van Hasselt and
                  Matteo Hessel and
                  Tom Schaul and
                  Arthur Guez and
                  Tim Harley and
                  Gabriel Dulac{-}Arnold and
                  David P. Reichert and
                  Neil C. Rabinowitz and
                  Andr{\'{e}} Barreto and
                  Thomas Degris},
  editor       = {Doina Precup and
                  Yee Whye Teh},
  title        = {The Predictron: End-To-End Learning and Planning},
  booktitle    = {Proceedings of the 34th International Conference on Machine Learning,
                  {ICML} 2017, Sydney, NSW, Australia, 6-11 August 2017},
  series       = {Proceedings of Machine Learning Research},
  volume       = {70},
  pages        = {3191--3199},
  publisher    = {{PMLR}},
  year         = {2017},
  url          = {http://proceedings.mlr.press/v70/silver17a.html},
  timestamp    = {Mon, 10 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/SilverHHSGHDRRB17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/VezhnevetsOSHJS17,
  author       = {Alexander Sasha Vezhnevets and
                  Simon Osindero and
                  Tom Schaul and
                  Nicolas Heess and
                  Max Jaderberg and
                  David Silver and
                  Koray Kavukcuoglu},
  editor       = {Doina Precup and
                  Yee Whye Teh},
  title        = {FeUdal Networks for Hierarchical Reinforcement Learning},
  booktitle    = {Proceedings of the 34th International Conference on Machine Learning,
                  {ICML} 2017, Sydney, NSW, Australia, 6-11 August 2017},
  series       = {Proceedings of Machine Learning Research},
  volume       = {70},
  pages        = {3540--3549},
  publisher    = {{PMLR}},
  year         = {2017},
  url          = {http://proceedings.mlr.press/v70/vezhnevets17a.html},
  timestamp    = {Wed, 03 Apr 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/VezhnevetsOSHJS17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/XuMHBSS17,
  author       = {Zhongwen Xu and
                  Joseph Modayil and
                  Hado van Hasselt and
                  Andr{\'{e}} Barreto and
                  David Silver and
                  Tom Schaul},
  editor       = {Isabelle Guyon and
                  Ulrike von Luxburg and
                  Samy Bengio and
                  Hanna M. Wallach and
                  Rob Fergus and
                  S. V. N. Vishwanathan and
                  Roman Garnett},
  title        = {Natural Value Approximators: Learning when to Trust Past Estimates},
  booktitle    = {Advances in Neural Information Processing Systems 30: Annual Conference
                  on Neural Information Processing Systems 2017, December 4-9, 2017,
                  Long Beach, CA, {USA}},
  pages        = {2120--2128},
  year         = {2017},
  url          = {https://proceedings.neurips.cc/paper/2017/hash/fb60d411a5c5b72b2e7d3527cfc84fd0-Abstract.html},
  timestamp    = {Mon, 10 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/XuMHBSS17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/BarretoDMHSSH17,
  author       = {Andr{\'{e}} Barreto and
                  Will Dabney and
                  R{\'{e}}mi Munos and
                  Jonathan J. Hunt and
                  Tom Schaul and
                  David Silver and
                  Hado van Hasselt},
  editor       = {Isabelle Guyon and
                  Ulrike von Luxburg and
                  Samy Bengio and
                  Hanna M. Wallach and
                  Rob Fergus and
                  S. V. N. Vishwanathan and
                  Roman Garnett},
  title        = {Successor Features for Transfer in Reinforcement Learning},
  booktitle    = {Advances in Neural Information Processing Systems 30: Annual Conference
                  on Neural Information Processing Systems 2017, December 4-9, 2017,
                  Long Beach, CA, {USA}},
  pages        = {4055--4065},
  year         = {2017},
  url          = {https://proceedings.neurips.cc/paper/2017/hash/350db081a661525235354dd3e19b8c05-Abstract.html},
  timestamp    = {Mon, 10 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/BarretoDMHSSH17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/LanctotZGLTPSG17,
  author       = {Marc Lanctot and
                  Vin{\'{\i}}cius Flores Zambaldi and
                  Audrunas Gruslys and
                  Angeliki Lazaridou and
                  Karl Tuyls and
                  Julien P{\'{e}}rolat and
                  David Silver and
                  Thore Graepel},
  editor       = {Isabelle Guyon and
                  Ulrike von Luxburg and
                  Samy Bengio and
                  Hanna M. Wallach and
                  Rob Fergus and
                  S. V. N. Vishwanathan and
                  Roman Garnett},
  title        = {A Unified Game-Theoretic Approach to Multiagent Reinforcement Learning},
  booktitle    = {Advances in Neural Information Processing Systems 30: Annual Conference
                  on Neural Information Processing Systems 2017, December 4-9, 2017,
                  Long Beach, CA, {USA}},
  pages        = {4190--4203},
  year         = {2017},
  url          = {https://proceedings.neurips.cc/paper/2017/hash/3323fe11e9595c09af38fe67567a9394-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/LanctotZGLTPSG17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/RacaniereWRBGRB17,
  author       = {S{\'{e}}bastien Racani{\`{e}}re and
                  Theophane Weber and
                  David P. Reichert and
                  Lars Buesing and
                  Arthur Guez and
                  Danilo Jimenez Rezende and
                  Adri{\`{a}} Puigdom{\`{e}}nech Badia and
                  Oriol Vinyals and
                  Nicolas Heess and
                  Yujia Li and
                  Razvan Pascanu and
                  Peter W. Battaglia and
                  Demis Hassabis and
                  David Silver and
                  Daan Wierstra},
  editor       = {Isabelle Guyon and
                  Ulrike von Luxburg and
                  Samy Bengio and
                  Hanna M. Wallach and
                  Rob Fergus and
                  S. V. N. Vishwanathan and
                  Roman Garnett},
  title        = {Imagination-Augmented Agents for Deep Reinforcement Learning},
  booktitle    = {Advances in Neural Information Processing Systems 30: Annual Conference
                  on Neural Information Processing Systems 2017, December 4-9, 2017,
                  Long Beach, CA, {USA}},
  pages        = {5690--5701},
  year         = {2017},
  url          = {https://proceedings.neurips.cc/paper/2017/hash/9e82757e9a1c12cb710ad680db11f6f1-Abstract.html},
  timestamp    = {Sat, 02 Dec 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/RacaniereWRBGRB17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/VezhnevetsOSHJS17,
  author       = {Alexander Sasha Vezhnevets and
                  Simon Osindero and
                  Tom Schaul and
                  Nicolas Heess and
                  Max Jaderberg and
                  David Silver and
                  Koray Kavukcuoglu},
  title        = {FeUdal Networks for Hierarchical Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1703.01161},
  year         = {2017},
  url          = {http://arxiv.org/abs/1703.01161},
  eprinttype    = {arXiv},
  eprint       = {1703.01161},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/VezhnevetsOSHJS17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HeessTSLMWTEWER17,
  author       = {Nicolas Heess and
                  Dhruva TB and
                  Srinivasan Sriram and
                  Jay Lemmon and
                  Josh Merel and
                  Greg Wayne and
                  Yuval Tassa and
                  Tom Erez and
                  Ziyu Wang and
                  S. M. Ali Eslami and
                  Martin A. Riedmiller and
                  David Silver},
  title        = {Emergence of Locomotion Behaviours in Rich Environments},
  journal      = {CoRR},
  volume       = {abs/1707.02286},
  year         = {2017},
  url          = {http://arxiv.org/abs/1707.02286},
  eprinttype    = {arXiv},
  eprint       = {1707.02286},
  timestamp    = {Mon, 22 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/HeessTSLMWTEWER17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/WeberRRBGRBVHLP17,
  author       = {Theophane Weber and
                  S{\'{e}}bastien Racani{\`{e}}re and
                  David P. Reichert and
                  Lars Buesing and
                  Arthur Guez and
                  Danilo Jimenez Rezende and
                  Adri{\`{a}} Puigdom{\`{e}}nech Badia and
                  Oriol Vinyals and
                  Nicolas Heess and
                  Yujia Li and
                  Razvan Pascanu and
                  Peter W. Battaglia and
                  David Silver and
                  Daan Wierstra},
  title        = {Imagination-Augmented Agents for Deep Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1707.06203},
  year         = {2017},
  url          = {http://arxiv.org/abs/1707.06203},
  eprinttype    = {arXiv},
  eprint       = {1707.06203},
  timestamp    = {Sat, 02 Dec 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/WeberRRBGRBVHLP17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1708-04782,
  author       = {Oriol Vinyals and
                  Timo Ewalds and
                  Sergey Bartunov and
                  Petko Georgiev and
                  Alexander Sasha Vezhnevets and
                  Michelle Yeo and
                  Alireza Makhzani and
                  Heinrich K{\"{u}}ttler and
                  John P. Agapiou and
                  Julian Schrittwieser and
                  John Quan and
                  Stephen Gaffney and
                  Stig Petersen and
                  Karen Simonyan and
                  Tom Schaul and
                  Hado van Hasselt and
                  David Silver and
                  Timothy P. Lillicrap and
                  Kevin Calderone and
                  Paul Keet and
                  Anthony Brunasso and
                  David Lawrence and
                  Anders Ekermo and
                  Jacob Repp and
                  Rodney Tsing},
  title        = {StarCraft {II:} {A} New Challenge for Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1708.04782},
  year         = {2017},
  url          = {http://arxiv.org/abs/1708.04782},
  eprinttype    = {arXiv},
  eprint       = {1708.04782},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1708-04782.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1710-02298,
  author       = {Matteo Hessel and
                  Joseph Modayil and
                  Hado van Hasselt and
                  Tom Schaul and
                  Georg Ostrovski and
                  Will Dabney and
                  Daniel Horgan and
                  Bilal Piot and
                  Mohammad Gheshlaghi Azar and
                  David Silver},
  title        = {Rainbow: Combining Improvements in Deep Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1710.02298},
  year         = {2017},
  url          = {http://arxiv.org/abs/1710.02298},
  eprinttype    = {arXiv},
  eprint       = {1710.02298},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1710-02298.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1711-00832,
  author       = {Marc Lanctot and
                  Vin{\'{\i}}cius Flores Zambaldi and
                  Audrunas Gruslys and
                  Angeliki Lazaridou and
                  Karl Tuyls and
                  Julien P{\'{e}}rolat and
                  David Silver and
                  Thore Graepel},
  title        = {A Unified Game-Theoretic Approach to Multiagent Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1711.00832},
  year         = {2017},
  url          = {http://arxiv.org/abs/1711.00832},
  eprinttype    = {arXiv},
  eprint       = {1711.00832},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1711-00832.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1712-01815,
  author       = {David Silver and
                  Thomas Hubert and
                  Julian Schrittwieser and
                  Ioannis Antonoglou and
                  Matthew Lai and
                  Arthur Guez and
                  Marc Lanctot and
                  Laurent Sifre and
                  Dharshan Kumaran and
                  Thore Graepel and
                  Timothy P. Lillicrap and
                  Karen Simonyan and
                  Demis Hassabis},
  title        = {Mastering Chess and Shogi by Self-Play with a General Reinforcement
                  Learning Algorithm},
  journal      = {CoRR},
  volume       = {abs/1712.01815},
  year         = {2017},
  url          = {http://arxiv.org/abs/1712.01815},
  eprinttype    = {arXiv},
  eprint       = {1712.01815},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1712-01815.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/nature/SilverHMGSDSAPL16,
  author       = {David Silver and
                  Aja Huang and
                  Chris J. Maddison and
                  Arthur Guez and
                  Laurent Sifre and
                  George van den Driessche and
                  Julian Schrittwieser and
                  Ioannis Antonoglou and
                  Vedavyas Panneershelvam and
                  Marc Lanctot and
                  Sander Dieleman and
                  Dominik Grewe and
                  John Nham and
                  Nal Kalchbrenner and
                  Ilya Sutskever and
                  Timothy P. Lillicrap and
                  Madeleine Leach and
                  Koray Kavukcuoglu and
                  Thore Graepel and
                  Demis Hassabis},
  title        = {Mastering the game of Go with deep neural networks and tree search},
  journal      = {Nat.},
  volume       = {529},
  number       = {7587},
  pages        = {484--489},
  year         = {2016},
  url          = {https://doi.org/10.1038/nature16961},
  doi          = {10.1038/NATURE16961},
  timestamp    = {Mon, 27 Sep 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/nature/SilverHMGSDSAPL16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/HasseltGS16,
  author       = {Hado van Hasselt and
                  Arthur Guez and
                  David Silver},
  editor       = {Dale Schuurmans and
                  Michael P. Wellman},
  title        = {Deep Reinforcement Learning with Double Q-Learning},
  booktitle    = {Proceedings of the Thirtieth {AAAI} Conference on Artificial Intelligence,
                  February 12-17, 2016, Phoenix, Arizona, {USA}},
  pages        = {2094--2100},
  publisher    = {{AAAI} Press},
  year         = {2016},
  url          = {https://doi.org/10.1609/aaai.v30i1.10295},
  doi          = {10.1609/AAAI.V30I1.10295},
  timestamp    = {Mon, 04 Sep 2023 15:08:28 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/HasseltGS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/MnihBMGLHSK16,
  author       = {Volodymyr Mnih and
                  Adri{\`{a}} Puigdom{\`{e}}nech Badia and
                  Mehdi Mirza and
                  Alex Graves and
                  Timothy P. Lillicrap and
                  Tim Harley and
                  David Silver and
                  Koray Kavukcuoglu},
  editor       = {Maria{-}Florina Balcan and
                  Kilian Q. Weinberger},
  title        = {Asynchronous Methods for Deep Reinforcement Learning},
  booktitle    = {Proceedings of the 33nd International Conference on Machine Learning,
                  {ICML} 2016, New York City, NY, USA, June 19-24, 2016},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {48},
  pages        = {1928--1937},
  publisher    = {JMLR.org},
  year         = {2016},
  url          = {http://proceedings.mlr.press/v48/mniha16.html},
  timestamp    = {Wed, 29 May 2019 08:41:46 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/MnihBMGLHSK16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/HasseltGHMS16,
  author       = {Hado van Hasselt and
                  Arthur Guez and
                  Matteo Hessel and
                  Volodymyr Mnih and
                  David Silver},
  editor       = {Daniel D. Lee and
                  Masashi Sugiyama and
                  Ulrike von Luxburg and
                  Isabelle Guyon and
                  Roman Garnett},
  title        = {Learning values across many orders of magnitude},
  booktitle    = {Advances in Neural Information Processing Systems 29: Annual Conference
                  on Neural Information Processing Systems 2016, December 5-10, 2016,
                  Barcelona, Spain},
  pages        = {4287--4295},
  year         = {2016},
  url          = {https://proceedings.neurips.cc/paper/2016/hash/5227b6aaf294f5f027273aebf16015f2-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/HasseltGHMS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:journals/corr/LillicrapHPHETS15,
  author       = {Timothy P. Lillicrap and
                  Jonathan J. Hunt and
                  Alexander Pritzel and
                  Nicolas Heess and
                  Tom Erez and
                  Yuval Tassa and
                  David Silver and
                  Daan Wierstra},
  editor       = {Yoshua Bengio and
                  Yann LeCun},
  title        = {Continuous control with deep reinforcement learning},
  booktitle    = {4th International Conference on Learning Representations, {ICLR} 2016,
                  San Juan, Puerto Rico, May 2-4, 2016, Conference Track Proceedings},
  year         = {2016},
  url          = {http://arxiv.org/abs/1509.02971},
  timestamp    = {Thu, 25 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/LillicrapHPHETS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:journals/corr/SchaulQAS15,
  author       = {Tom Schaul and
                  John Quan and
                  Ioannis Antonoglou and
                  David Silver},
  editor       = {Yoshua Bengio and
                  Yann LeCun},
  title        = {Prioritized Experience Replay},
  booktitle    = {4th International Conference on Learning Representations, {ICLR} 2016,
                  San Juan, Puerto Rico, May 2-4, 2016, Conference Track Proceedings},
  year         = {2016},
  url          = {http://arxiv.org/abs/1511.05952},
  timestamp    = {Thu, 25 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/SchaulQAS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/MnihBMGLHSK16,
  author       = {Volodymyr Mnih and
                  Adri{\`{a}} Puigdom{\`{e}}nech Badia and
                  Mehdi Mirza and
                  Alex Graves and
                  Timothy P. Lillicrap and
                  Tim Harley and
                  David Silver and
                  Koray Kavukcuoglu},
  title        = {Asynchronous Methods for Deep Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1602.01783},
  year         = {2016},
  url          = {http://arxiv.org/abs/1602.01783},
  eprinttype    = {arXiv},
  eprint       = {1602.01783},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/MnihBMGLHSK16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HasseltGHS16,
  author       = {Hado van Hasselt and
                  Arthur Guez and
                  Matteo Hessel and
                  David Silver},
  title        = {Learning functions across many orders of magnitudes},
  journal      = {CoRR},
  volume       = {abs/1602.07714},
  year         = {2016},
  url          = {http://arxiv.org/abs/1602.07714},
  eprinttype    = {arXiv},
  eprint       = {1602.07714},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/HasseltGHS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HeinrichS16,
  author       = {Johannes Heinrich and
                  David Silver},
  title        = {Deep Reinforcement Learning from Self-Play in Imperfect-Information
                  Games},
  journal      = {CoRR},
  volume       = {abs/1603.01121},
  year         = {2016},
  url          = {http://arxiv.org/abs/1603.01121},
  eprinttype    = {arXiv},
  eprint       = {1603.01121},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/HeinrichS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/BarretoMSS16,
  author       = {Andr{\'{e}} Barreto and
                  R{\'{e}}mi Munos and
                  Tom Schaul and
                  David Silver},
  title        = {Successor Features for Transfer in Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1606.05312},
  year         = {2016},
  url          = {http://arxiv.org/abs/1606.05312},
  eprinttype    = {arXiv},
  eprint       = {1606.05312},
  timestamp    = {Mon, 10 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/BarretoMSS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HeessWTLRS16,
  author       = {Nicolas Heess and
                  Gregory Wayne and
                  Yuval Tassa and
                  Timothy P. Lillicrap and
                  Martin A. Riedmiller and
                  David Silver},
  title        = {Learning and Transfer of Modulated Locomotor Controllers},
  journal      = {CoRR},
  volume       = {abs/1610.05182},
  year         = {2016},
  url          = {http://arxiv.org/abs/1610.05182},
  eprinttype    = {arXiv},
  eprint       = {1610.05182},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/HeessWTLRS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/JaderbergMCSLSK16,
  author       = {Max Jaderberg and
                  Volodymyr Mnih and
                  Wojciech Marian Czarnecki and
                  Tom Schaul and
                  Joel Z. Leibo and
                  David Silver and
                  Koray Kavukcuoglu},
  title        = {Reinforcement Learning with Unsupervised Auxiliary Tasks},
  journal      = {CoRR},
  volume       = {abs/1611.05397},
  year         = {2016},
  url          = {http://arxiv.org/abs/1611.05397},
  eprinttype    = {arXiv},
  eprint       = {1611.05397},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/JaderbergMCSLSK16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/SilverHHSGHDRRB16,
  author       = {David Silver and
                  Hado van Hasselt and
                  Matteo Hessel and
                  Tom Schaul and
                  Arthur Guez and
                  Tim Harley and
                  Gabriel Dulac{-}Arnold and
                  David P. Reichert and
                  Neil C. Rabinowitz and
                  Andr{\'{e}} Barreto and
                  Thomas Degris},
  title        = {The Predictron: End-To-End Learning and Planning},
  journal      = {CoRR},
  volume       = {abs/1612.08810},
  year         = {2016},
  url          = {http://arxiv.org/abs/1612.08810},
  eprinttype    = {arXiv},
  eprint       = {1612.08810},
  timestamp    = {Mon, 10 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/SilverHHSGHDRRB16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/nature/MnihKSRVBGRFOPB15,
  author       = {Volodymyr Mnih and
                  Koray Kavukcuoglu and
                  David Silver and
                  Andrei A. Rusu and
                  Joel Veness and
                  Marc G. Bellemare and
                  Alex Graves and
                  Martin A. Riedmiller and
                  Andreas Fidjeland and
                  Georg Ostrovski and
                  Stig Petersen and
                  Charles Beattie and
                  Amir Sadik and
                  Ioannis Antonoglou and
                  Helen King and
                  Dharshan Kumaran and
                  Daan Wierstra and
                  Shane Legg and
                  Demis Hassabis},
  title        = {Human-level control through deep reinforcement learning},
  journal      = {Nat.},
  volume       = {518},
  number       = {7540},
  pages        = {529--533},
  year         = {2015},
  url          = {https://doi.org/10.1038/nature14236},
  doi          = {10.1038/NATURE14236},
  timestamp    = {Mon, 08 Jun 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/nature/MnihKSRVBGRFOPB15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cscw/VinesWSWO15,
  author       = {John Vines and
                  Peter C. Wright and
                  David Silver and
                  Maggie Winchcombe and
                  Patrick Olivier},
  editor       = {Dan Cosley and
                  Andrea Forte and
                  Luigina Ciolfi and
                  David McDonald},
  title        = {Authenticity, Relatability and Collaborative Approaches to Sharing
                  Knowledge about Assistive Living Technology},
  booktitle    = {Proceedings of the 18th {ACM} Conference on Computer Supported Cooperative
                  Work {\&} Social Computing, {CSCW} 2015, Vancouver, BC, Canada,
                  March 14 - 18, 2015},
  pages        = {82--94},
  publisher    = {{ACM}},
  year         = {2015},
  url          = {https://doi.org/10.1145/2675133.2675222},
  doi          = {10.1145/2675133.2675222},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/cscw/VinesWSWO15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/HeinrichLS15,
  author       = {Johannes Heinrich and
                  Marc Lanctot and
                  David Silver},
  editor       = {Francis R. Bach and
                  David M. Blei},
  title        = {Fictitious Self-Play in Extensive-Form Games},
  booktitle    = {Proceedings of the 32nd International Conference on Machine Learning,
                  {ICML} 2015, Lille, France, 6-11 July 2015},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {37},
  pages        = {805--813},
  publisher    = {JMLR.org},
  year         = {2015},
  url          = {http://proceedings.mlr.press/v37/heinrich15.html},
  timestamp    = {Wed, 29 May 2019 08:41:45 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/HeinrichLS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SchaulHGS15,
  author       = {Tom Schaul and
                  Daniel Horgan and
                  Karol Gregor and
                  David Silver},
  editor       = {Francis R. Bach and
                  David M. Blei},
  title        = {Universal Value Function Approximators},
  booktitle    = {Proceedings of the 32nd International Conference on Machine Learning,
                  {ICML} 2015, Lille, France, 6-11 July 2015},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {37},
  pages        = {1312--1320},
  publisher    = {JMLR.org},
  year         = {2015},
  url          = {http://proceedings.mlr.press/v37/schaul15.html},
  timestamp    = {Wed, 29 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/SchaulHGS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/HeinrichS15,
  author       = {Johannes Heinrich and
                  David Silver},
  editor       = {Qiang Yang and
                  Michael J. Wooldridge},
  title        = {Smooth {UCT} Search in Computer Poker},
  booktitle    = {Proceedings of the Twenty-Fourth International Joint Conference on
                  Artificial Intelligence, {IJCAI} 2015, Buenos Aires, Argentina, July
                  25-31, 2015},
  pages        = {554--560},
  publisher    = {{AAAI} Press},
  year         = {2015},
  url          = {http://ijcai.org/Abstract/15/084},
  timestamp    = {Tue, 20 Aug 2019 16:16:43 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/HeinrichS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iros/BradleyCSPHRS15,
  author       = {David M. Bradley and
                  Jonathan K. Chang and
                  David Silver and
                  Matthew Powers and
                  Herman Herman and
                  Peter Rander and
                  Anthony Stentz},
  title        = {Scene understanding for a high-mobility walking robot},
  booktitle    = {2015 {IEEE/RSJ} International Conference on Intelligent Robots and
                  Systems, {IROS} 2015, Hamburg, Germany, September 28 - October 2,
                  2015},
  pages        = {1144--1151},
  publisher    = {{IEEE}},
  year         = {2015},
  url          = {https://doi.org/10.1109/IROS.2015.7353514},
  doi          = {10.1109/IROS.2015.7353514},
  timestamp    = {Wed, 16 Oct 2019 14:14:51 +0200},
  biburl       = {https://dblp.org/rec/conf/iros/BradleyCSPHRS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/HeessWSLET15,
  author       = {Nicolas Heess and
                  Gregory Wayne and
                  David Silver and
                  Timothy P. Lillicrap and
                  Tom Erez and
                  Yuval Tassa},
  editor       = {Corinna Cortes and
                  Neil D. Lawrence and
                  Daniel D. Lee and
                  Masashi Sugiyama and
                  Roman Garnett},
  title        = {Learning Continuous Control Policies by Stochastic Value Gradients},
  booktitle    = {Advances in Neural Information Processing Systems 28: Annual Conference
                  on Neural Information Processing Systems 2015, December 7-12, 2015,
                  Montreal, Quebec, Canada},
  pages        = {2944--2952},
  year         = {2015},
  url          = {https://proceedings.neurips.cc/paper/2015/hash/148510031349642de5ca0c544f31b2ef-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/HeessWSLET15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:journals/corr/MaddisonHSS14,
  author       = {Chris J. Maddison and
                  Aja Huang and
                  Ilya Sutskever and
                  David Silver},
  editor       = {Yoshua Bengio and
                  Yann LeCun},
  title        = {Move Evaluation in Go Using Deep Convolutional Neural Networks},
  booktitle    = {3rd International Conference on Learning Representations, {ICLR} 2015,
                  San Diego, CA, USA, May 7-9, 2015, Conference Track Proceedings},
  year         = {2015},
  url          = {http://arxiv.org/abs/1412.6564},
  timestamp    = {Thu, 25 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/MaddisonHSS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/CiosekS15,
  author       = {Kamil Ciosek and
                  David Silver},
  title        = {Value Iteration with Options and State Aggregation},
  journal      = {CoRR},
  volume       = {abs/1501.03959},
  year         = {2015},
  url          = {http://arxiv.org/abs/1501.03959},
  eprinttype    = {arXiv},
  eprint       = {1501.03959},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/CiosekS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/NairSBAFMPSBPLM15,
  author       = {Arun Nair and
                  Praveen Srinivasan and
                  Sam Blackwell and
                  Cagdas Alcicek and
                  Rory Fearon and
                  Alessandro De Maria and
                  Vedavyas Panneershelvam and
                  Mustafa Suleyman and
                  Charles Beattie and
                  Stig Petersen and
                  Shane Legg and
                  Volodymyr Mnih and
                  Koray Kavukcuoglu and
                  David Silver},
  title        = {Massively Parallel Methods for Deep Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1507.04296},
  year         = {2015},
  url          = {http://arxiv.org/abs/1507.04296},
  eprinttype    = {arXiv},
  eprint       = {1507.04296},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/NairSBAFMPSBPLM15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HasseltGS15,
  author       = {Hado van Hasselt and
                  Arthur Guez and
                  David Silver},
  title        = {Deep Reinforcement Learning with Double Q-learning},
  journal      = {CoRR},
  volume       = {abs/1509.06461},
  year         = {2015},
  url          = {http://arxiv.org/abs/1509.06461},
  eprinttype    = {arXiv},
  eprint       = {1509.06461},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/HasseltGS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HeessWSLTE15,
  author       = {Nicolas Heess and
                  Greg Wayne and
                  David Silver and
                  Timothy P. Lillicrap and
                  Yuval Tassa and
                  Tom Erez},
  title        = {Learning Continuous Control Policies by Stochastic Value Gradients},
  journal      = {CoRR},
  volume       = {abs/1510.09142},
  year         = {2015},
  url          = {http://arxiv.org/abs/1510.09142},
  eprinttype    = {arXiv},
  eprint       = {1510.09142},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/HeessWSLTE15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HeessHLS15,
  author       = {Nicolas Heess and
                  Jonathan J. Hunt and
                  Timothy P. Lillicrap and
                  David Silver},
  title        = {Memory-based control with recurrent neural networks},
  journal      = {CoRR},
  volume       = {abs/1512.04455},
  year         = {2015},
  url          = {http://arxiv.org/abs/1512.04455},
  eprinttype    = {arXiv},
  eprint       = {1512.04455},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/HeessHLS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SilverLHDWR14,
  author       = {David Silver and
                  Guy Lever and
                  Nicolas Heess and
                  Thomas Degris and
                  Daan Wierstra and
                  Martin A. Riedmiller},
  title        = {Deterministic Policy Gradient Algorithms},
  booktitle    = {Proceedings of the 31th International Conference on Machine Learning,
                  {ICML} 2014, Beijing, China, 21-26 June 2014},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {32},
  pages        = {387--395},
  publisher    = {JMLR.org},
  year         = {2014},
  url          = {http://proceedings.mlr.press/v32/silver14.html},
  timestamp    = {Wed, 29 May 2019 08:41:45 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/SilverLHDWR14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/GuezHSD14,
  author       = {Arthur Guez and
                  Nicolas Heess and
                  David Silver and
                  Peter Dayan},
  editor       = {Zoubin Ghahramani and
                  Max Welling and
                  Corinna Cortes and
                  Neil D. Lawrence and
                  Kilian Q. Weinberger},
  title        = {Bayes-Adaptive Simulation-based Search with Value Function Approximation},
  booktitle    = {Advances in Neural Information Processing Systems 27: Annual Conference
                  on Neural Information Processing Systems 2014, December 8-13 2014,
                  Montreal, Quebec, Canada},
  pages        = {451--459},
  year         = {2014},
  url          = {https://proceedings.neurips.cc/paper/2014/hash/839ab46820b524afda05122893c2fe8e-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/GuezHSD14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/uss/SilverJBCJ14,
  author       = {David Silver and
                  Suman Jana and
                  Dan Boneh and
                  Eric Yawei Chen and
                  Collin Jackson},
  editor       = {Kevin Fu and
                  Jaeyeon Jung},
  title        = {Password Managers: Attacks and Defenses},
  booktitle    = {Proceedings of the 23rd {USENIX} Security Symposium, San Diego, CA,
                  USA, August 20-22, 2014},
  pages        = {449--464},
  publisher    = {{USENIX} Association},
  year         = {2014},
  url          = {https://www.usenix.org/conference/usenixsecurity14/technical-sessions/presentation/silver},
  timestamp    = {Mon, 01 Feb 2021 08:43:04 +0100},
  biburl       = {https://dblp.org/rec/conf/uss/SilverJBCJ14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:journals/corr/SchaulAS13,
  author       = {Tom Schaul and
                  Ioannis Antonoglou and
                  David Silver},
  editor       = {Yoshua Bengio and
                  Yann LeCun},
  title        = {Unit Tests for Stochastic Optimization},
  booktitle    = {2nd International Conference on Learning Representations, {ICLR} 2014,
                  Banff, AB, Canada, April 14-16, 2014, Conference Track Proceedings},
  year         = {2014},
  url          = {http://arxiv.org/abs/1312.6055},
  timestamp    = {Thu, 04 Apr 2019 13:20:07 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/SchaulAS13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/BranavanSB14,
  author       = {S. R. K. Branavan and
                  David Silver and
                  Regina Barzilay},
  title        = {Learning to Win by Reading Manuals in a Monte-Carlo Framework},
  journal      = {CoRR},
  volume       = {abs/1401.5390},
  year         = {2014},
  url          = {http://arxiv.org/abs/1401.5390},
  eprinttype    = {arXiv},
  eprint       = {1401.5390},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/BranavanSB14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/GuezSD14,
  author       = {Arthur Guez and
                  David Silver and
                  Peter Dayan},
  title        = {Better Optimism By Bayes: Adaptive Planning with Rich Models},
  journal      = {CoRR},
  volume       = {abs/1402.1958},
  year         = {2014},
  url          = {http://arxiv.org/abs/1402.1958},
  eprinttype    = {arXiv},
  eprint       = {1402.1958},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/GuezSD14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jair/GuezSD13,
  author       = {Arthur Guez and
                  David Silver and
                  Peter Dayan},
  title        = {Scalable and Efficient Bayes-Adaptive Reinforcement Learning Based
                  on Monte-Carlo Tree Search},
  journal      = {J. Artif. Intell. Res.},
  volume       = {48},
  pages        = {841--883},
  year         = {2013},
  url          = {https://doi.org/10.1613/jair.4117},
  doi          = {10.1613/JAIR.4117},
  timestamp    = {Mon, 21 Jan 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/jair/GuezSD13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aips/SilverSM13,
  author       = {David Silver and
                  Richard S. Sutton and
                  Martin M{\"{u}}ller},
  editor       = {Daniel Borrajo and
                  Subbarao Kambhampati and
                  Angelo Oddi and
                  Simone Fratini},
  title        = {Temporal-Difference Search in Computer Go},
  booktitle    = {Proceedings of the Twenty-Third International Conference on Automated
                  Planning and Scheduling, {ICAPS} 2013, Rome, Italy, June 10-14, 2013},
  publisher    = {{AAAI}},
  year         = {2013},
  url          = {http://www.aaai.org/ocs/index.php/ICAPS/ICAPS13/paper/view/6037},
  timestamp    = {Wed, 29 Mar 2017 16:45:27 +0200},
  biburl       = {https://dblp.org/rec/conf/aips/SilverSM13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SilverNBWM13,
  author       = {David Silver and
                  Leonard Newnham and
                  David Barker and
                  Suzanne Weller and
                  Jason McFall},
  title        = {Concurrent Reinforcement Learning from Customer Interactions},
  booktitle    = {Proceedings of the 30th International Conference on Machine Learning,
                  {ICML} 2013, Atlanta, GA, USA, 16-21 June 2013},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {28},
  pages        = {924--932},
  publisher    = {JMLR.org},
  year         = {2013},
  url          = {http://proceedings.mlr.press/v28/silver13.html},
  timestamp    = {Wed, 29 May 2019 08:41:45 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/SilverNBWM13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/MnihKSGAWR13,
  author       = {Volodymyr Mnih and
                  Koray Kavukcuoglu and
                  David Silver and
                  Alex Graves and
                  Ioannis Antonoglou and
                  Daan Wierstra and
                  Martin A. Riedmiller},
  title        = {Playing Atari with Deep Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1312.5602},
  year         = {2013},
  url          = {http://arxiv.org/abs/1312.5602},
  eprinttype    = {arXiv},
  eprint       = {1312.5602},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/MnihKSGAWR13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/cacm/GellyKSSSST12,
  author       = {Sylvain Gelly and
                  Levente Kocsis and
                  Marc Schoenauer and
                  Mich{\`{e}}le Sebag and
                  David Silver and
                  Csaba Szepesv{\'{a}}ri and
                  Olivier Teytaud},
  title        = {The grand challenge of computer Go: Monte Carlo tree search and extensions},
  journal      = {Commun. {ACM}},
  volume       = {55},
  number       = {3},
  pages        = {106--113},
  year         = {2012},
  url          = {https://doi.org/10.1145/2093548.2093574},
  doi          = {10.1145/2093548.2093574},
  timestamp    = {Mon, 26 Oct 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/cacm/GellyKSSSST12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/isu/Silver12,
  author       = {David Silver},
  title        = {Digital natives on a media fast},
  journal      = {Inf. Serv. Use},
  volume       = {32},
  number       = {3-4},
  pages        = {137--139},
  year         = {2012},
  url          = {https://doi.org/10.3233/ISU-2012-0681},
  doi          = {10.3233/ISU-2012-0681},
  timestamp    = {Mon, 11 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/isu/Silver12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jair/BranavanSB12,
  author       = {S. R. K. Branavan and
                  David Silver and
                  Regina Barzilay},
  title        = {Learning to Win by Reading Manuals in a Monte-Carlo Framework},
  journal      = {J. Artif. Intell. Res.},
  volume       = {43},
  pages        = {661--704},
  year         = {2012},
  url          = {https://doi.org/10.1613/jair.3484},
  doi          = {10.1613/JAIR.3484},
  timestamp    = {Mon, 21 Jan 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/jair/BranavanSB12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/SilverSM12,
  author       = {David Silver and
                  Richard S. Sutton and
                  Martin M{\"{u}}ller},
  title        = {Temporal-difference search in computer Go},
  journal      = {Mach. Learn.},
  volume       = {87},
  number       = {2},
  pages        = {183--219},
  year         = {2012},
  url          = {https://doi.org/10.1007/s10994-012-5280-0},
  doi          = {10.1007/S10994-012-5280-0},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ml/SilverSM12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ewrl/HeessST12,
  author       = {Nicolas Heess and
                  David Silver and
                  Yee Whye Teh},
  editor       = {Marc Peter Deisenroth and
                  Csaba Szepesv{\'{a}}ri and
                  Jan Peters},
  title        = {Actor-Critic Reinforcement Learning with Energy-Based Policies},
  booktitle    = {Proceedings of the Tenth European Workshop on Reinforcement Learning,
                  {EWRL} 2012, Edinburgh, Scotland, UK, June, 2012},
  series       = {{JMLR} Proceedings},
  volume       = {24},
  pages        = {43--58},
  publisher    = {JMLR.org},
  year         = {2012},
  url          = {http://proceedings.mlr.press/v24/heess12a.html},
  timestamp    = {Wed, 29 May 2019 08:41:44 +0200},
  biburl       = {https://dblp.org/rec/conf/ewrl/HeessST12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ewrl/Silver12,
  author       = {David Silver},
  editor       = {Marc Peter Deisenroth and
                  Csaba Szepesv{\'{a}}ri and
                  Jan Peters},
  title        = {Gradient Temporal Difference Networks},
  booktitle    = {Proceedings of the Tenth European Workshop on Reinforcement Learning,
                  {EWRL} 2012, Edinburgh, Scotland, UK, June, 2012},
  series       = {{JMLR} Proceedings},
  volume       = {24},
  pages        = {117--130},
  publisher    = {JMLR.org},
  year         = {2012},
  url          = {http://proceedings.mlr.press/v24/silver12a.html},
  timestamp    = {Wed, 29 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ewrl/Silver12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SilverC12,
  author       = {David Silver and
                  Kamil Ciosek},
  title        = {Compositional Planning Using Optimal Option Models},
  booktitle    = {Proceedings of the 29th International Conference on Machine Learning,
                  {ICML} 2012, Edinburgh, Scotland, UK, June 26 - July 1, 2012},
  publisher    = {icml.cc / Omnipress},
  year         = {2012},
  url          = {http://icml.cc/2012/papers/564.pdf},
  timestamp    = {Wed, 03 Apr 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/SilverC12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icra/SilverBS12,
  author       = {David Silver and
                  J. Andrew Bagnell and
                  Anthony Stentz},
  title        = {Active learning from demonstration for robust autonomous navigation},
  booktitle    = {{IEEE} International Conference on Robotics and Automation, {ICRA}
                  2012, 14-18 May, 2012, St. Paul, Minnesota, {USA}},
  pages        = {200--207},
  publisher    = {{IEEE}},
  year         = {2012},
  url          = {https://doi.org/10.1109/ICRA.2012.6224757},
  doi          = {10.1109/ICRA.2012.6224757},
  timestamp    = {Wed, 16 Oct 2019 14:14:51 +0200},
  biburl       = {https://dblp.org/rec/conf/icra/SilverBS12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iser/SilverBS12,
  author       = {David Silver and
                  J. Andrew Bagnell and
                  Anthony Stentz},
  editor       = {Jaydev P. Desai and
                  Gregory Dudek and
                  Oussama Khatib and
                  Vijay Kumar},
  title        = {Learning Autonomous Driving Styles and Maneuvers from Expert Demonstration},
  booktitle    = {Experimental Robotics - The 13th International Symposium on Experimental
                  Robotics, {ISER} 2012, June 18-21, 2012, Qu{\'{e}}bec City, Canada},
  series       = {Springer Tracts in Advanced Robotics},
  volume       = {88},
  pages        = {371--386},
  publisher    = {Springer},
  year         = {2012},
  url          = {https://doi.org/10.1007/978-3-319-00065-7\_26},
  doi          = {10.1007/978-3-319-00065-7\_26},
  timestamp    = {Fri, 26 May 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iser/SilverBS12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/GuezSD12,
  author       = {Arthur Guez and
                  David Silver and
                  Peter Dayan},
  editor       = {Peter L. Bartlett and
                  Fernando C. N. Pereira and
                  Christopher J. C. Burges and
                  L{\'{e}}on Bottou and
                  Kilian Q. Weinberger},
  title        = {Efficient Bayes-Adaptive Reinforcement Learning using Sample-Based
                  Search},
  booktitle    = {Advances in Neural Information Processing Systems 25: 26th Annual
                  Conference on Neural Information Processing Systems 2012. Proceedings
                  of a meeting held December 3-6, 2012, Lake Tahoe, Nevada, United States},
  pages        = {1034--1042},
  year         = {2012},
  url          = {https://proceedings.neurips.cc/paper/2012/hash/35051070e572e47d2c26c241ab88307f-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/GuezSD12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1205-3109,
  author       = {Arthur Guez and
                  David Silver and
                  Peter Dayan},
  title        = {Efficient Bayes-Adaptive Reinforcement Learning using Sample-Based
                  Search},
  journal      = {CoRR},
  volume       = {abs/1205.3109},
  year         = {2012},
  url          = {http://arxiv.org/abs/1205.3109},
  eprinttype    = {arXiv},
  eprint       = {1205.3109},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1205-3109.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ai/GellyS11,
  author       = {Sylvain Gelly and
                  David Silver},
  title        = {Monte-Carlo tree search and rapid action value estimation in computer
                  Go},
  journal      = {Artif. Intell.},
  volume       = {175},
  number       = {11},
  pages        = {1856--1875},
  year         = {2011},
  url          = {https://doi.org/10.1016/j.artint.2011.03.007},
  doi          = {10.1016/J.ARTINT.2011.03.007},
  timestamp    = {Sat, 27 May 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ai/GellyS11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jair/VenessNHUS11,
  author       = {Joel Veness and
                  Kee Siong Ng and
                  Marcus Hutter and
                  William T. B. Uther and
                  David Silver},
  title        = {A Monte-Carlo {AIXI} Approximation},
  journal      = {J. Artif. Intell. Res.},
  volume       = {40},
  pages        = {95--142},
  year         = {2011},
  url          = {https://doi.org/10.1613/jair.3125},
  doi          = {10.1613/JAIR.3125},
  timestamp    = {Mon, 21 Jan 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/jair/VenessNHUS11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/acl/BranavanSB11,
  author       = {S. R. K. Branavan and
                  David Silver and
                  Regina Barzilay},
  editor       = {Dekang Lin and
                  Yuji Matsumoto and
                  Rada Mihalcea},
  title        = {Learning to Win by Reading Manuals in a Monte-Carlo Framework},
  booktitle    = {The 49th Annual Meeting of the Association for Computational Linguistics:
                  Human Language Technologies, Proceedings of the Conference, 19-24
                  June, 2011, Portland, Oregon, {USA}},
  pages        = {268--277},
  publisher    = {The Association for Computer Linguistics},
  year         = {2011},
  url          = {https://aclanthology.org/P11-1028/},
  timestamp    = {Fri, 06 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/acl/BranavanSB11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/BranavanSB11,
  author       = {S. R. K. Branavan and
                  David Silver and
                  Regina Barzilay},
  editor       = {Toby Walsh},
  title        = {Non-Linear Monte-Carlo Search in Civilization {II}},
  booktitle    = {{IJCAI} 2011, Proceedings of the 22nd International Joint Conference
                  on Artificial Intelligence, Barcelona, Catalonia, Spain, July 16-22,
                  2011},
  pages        = {2404--2410},
  publisher    = {{IJCAI/AAAI}},
  year         = {2011},
  url          = {https://doi.org/10.5591/978-1-57735-516-8/IJCAI11-401},
  doi          = {10.5591/978-1-57735-516-8/IJCAI11-401},
  timestamp    = {Tue, 20 Aug 2019 16:16:04 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/BranavanSB11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iros/SilverS11,
  author       = {David Silver and
                  Anthony Stentz},
  title        = {Monte Carlo Localization and registration to prior data for outdoor
                  navigation},
  booktitle    = {2011 {IEEE/RSJ} International Conference on Intelligent Robots and
                  Systems, {IROS} 2011, San Francisco, CA, USA, September 25-30, 2011},
  pages        = {510--517},
  publisher    = {{IEEE}},
  year         = {2011},
  url          = {https://doi.org/10.1109/IROS.2011.6094532},
  doi          = {10.1109/IROS.2011.6094532},
  timestamp    = {Wed, 16 Oct 2019 14:14:51 +0200},
  biburl       = {https://dblp.org/rec/conf/iros/SilverS11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@phdthesis{DBLP:phd/us/Silver18,
  author       = {David Silver},
  title        = {Learning Preference Models for Autonomous Mobile Robots in Complex
                  Domains},
  school       = {Carnegie Mellon University, {USA}},
  year         = {2010},
  url          = {https://doi.org/10.1184/r1/6720380.v1},
  doi          = {10.1184/R1/6720380.V1},
  timestamp    = {Wed, 27 Jul 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/phd/us/Silver18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ijrr/SilverBS10,
  author       = {David Silver and
                  J. Andrew Bagnell and
                  Anthony Stentz},
  title        = {Learning from Demonstration for Autonomous Navigation in Complex Unstructured
                  Terrain},
  journal      = {Int. J. Robotics Res.},
  volume       = {29},
  number       = {12},
  pages        = {1565--1592},
  year         = {2010},
  url          = {https://doi.org/10.1177/0278364910369715},
  doi          = {10.1177/0278364910369715},
  timestamp    = {Thu, 17 Sep 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ijrr/SilverBS10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ram/BagnellBSSS10,
  author       = {J. Andrew Bagnell and
                  David M. Bradley and
                  David Silver and
                  Boris Sofman and
                  Anthony Stentz},
  title        = {Learning for Autonomous Navigation},
  journal      = {{IEEE} Robotics Autom. Mag.},
  volume       = {17},
  number       = {2},
  pages        = {74--84},
  year         = {2010},
  url          = {https://doi.org/10.1109/MRA.2010.936946},
  doi          = {10.1109/MRA.2010.936946},
  timestamp    = {Mon, 18 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ram/BagnellBSSS10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/VenessNHS10,
  author       = {Joel Veness and
                  Kee Siong Ng and
                  Marcus Hutter and
                  David Silver},
  editor       = {Maria Fox and
                  David Poole},
  title        = {Reinforcement Learning via {AIXI} Approximation},
  booktitle    = {Proceedings of the Twenty-Fourth {AAAI} Conference on Artificial Intelligence,
                  {AAAI} 2010, Atlanta, Georgia, USA, July 11-15, 2010},
  pages        = {605--611},
  publisher    = {{AAAI} Press},
  year         = {2010},
  url          = {https://doi.org/10.1609/aaai.v24i1.7667},
  doi          = {10.1609/AAAI.V24I1.7667},
  timestamp    = {Mon, 04 Sep 2023 16:23:45 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/VenessNHS10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/SilverV10,
  author       = {David Silver and
                  Joel Veness},
  editor       = {John D. Lafferty and
                  Christopher K. I. Williams and
                  John Shawe{-}Taylor and
                  Richard S. Zemel and
                  Aron Culotta},
  title        = {Monte-Carlo Planning in Large POMDPs},
  booktitle    = {Advances in Neural Information Processing Systems 23: 24th Annual
                  Conference on Neural Information Processing Systems 2010. Proceedings
                  of a meeting held 6-9 December 2010, Vancouver, British Columbia,
                  Canada},
  pages        = {2164--2172},
  publisher    = {Curran Associates, Inc.},
  year         = {2010},
  url          = {https://proceedings.neurips.cc/paper/2010/hash/edfbe1afcf9246bb0d40eb4d8027d90f-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/SilverV10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1007-2049,
  author       = {Joel Veness and
                  Kee Siong Ng and
                  Marcus Hutter and
                  David Silver},
  title        = {Reinforcement Learning via {AIXI} Approximation},
  journal      = {CoRR},
  volume       = {abs/1007.2049},
  year         = {2010},
  url          = {http://arxiv.org/abs/1007.2049},
  eprinttype    = {arXiv},
  eprint       = {1007.2049},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1007-2049.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/arobots/RatliffSB09,
  author       = {Nathan D. Ratliff and
                  David Silver and
                  J. Andrew Bagnell},
  title        = {Learning to search: Functional gradient techniques for imitation learning},
  journal      = {Auton. Robots},
  volume       = {27},
  number       = {1},
  pages        = {25--53},
  year         = {2009},
  url          = {https://doi.org/10.1007/s10514-009-9121-3},
  doi          = {10.1007/S10514-009-9121-3},
  timestamp    = {Thu, 18 May 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/arobots/RatliffSB09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/fsr/SilverBS09,
  author       = {David Silver and
                  J. Andrew Bagnell and
                  Anthony Stentz},
  editor       = {Andrew Howard and
                  Karl Iagnemma and
                  Alonzo Kelly},
  title        = {Applied Imitation Learning for Autonomous Navigation in Complex Natural
                  Terrain},
  booktitle    = {Field and Service Robotics, Results of the 7th International Conference,
                  {FSR} 2009, Cambridge, Massachusetts, USA, 14-16 July 2009},
  series       = {Springer Tracts in Advanced Robotics},
  volume       = {62},
  pages        = {249--259},
  publisher    = {Springer},
  year         = {2009},
  url          = {https://doi.org/10.1007/978-3-642-13408-1\_23},
  doi          = {10.1007/978-3-642-13408-1\_23},
  timestamp    = {Mon, 22 May 2017 17:10:59 +0200},
  biburl       = {https://dblp.org/rec/conf/fsr/SilverBS09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SilverT09,
  author       = {David Silver and
                  Gerald Tesauro},
  editor       = {Andrea Pohoreckyj Danyluk and
                  L{\'{e}}on Bottou and
                  Michael L. Littman},
  title        = {Monte-Carlo simulation balancing},
  booktitle    = {Proceedings of the 26th Annual International Conference on Machine
                  Learning, {ICML} 2009, Montreal, Quebec, Canada, June 14-18, 2009},
  series       = {{ACM} International Conference Proceeding Series},
  volume       = {382},
  pages        = {945--952},
  publisher    = {{ACM}},
  year         = {2009},
  url          = {https://doi.org/10.1145/1553374.1553495},
  doi          = {10.1145/1553374.1553495},
  timestamp    = {Tue, 06 Nov 2018 16:58:29 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/SilverT09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SuttonMPBSSW09,
  author       = {Richard S. Sutton and
                  Hamid Reza Maei and
                  Doina Precup and
                  Shalabh Bhatnagar and
                  David Silver and
                  Csaba Szepesv{\'{a}}ri and
                  Eric Wiewiora},
  editor       = {Andrea Pohoreckyj Danyluk and
                  L{\'{e}}on Bottou and
                  Michael L. Littman},
  title        = {Fast gradient-descent methods for temporal-difference learning with
                  linear function approximation},
  booktitle    = {Proceedings of the 26th Annual International Conference on Machine
                  Learning, {ICML} 2009, Montreal, Quebec, Canada, June 14-18, 2009},
  series       = {{ACM} International Conference Proceeding Series},
  volume       = {382},
  pages        = {993--1000},
  publisher    = {{ACM}},
  year         = {2009},
  url          = {https://doi.org/10.1145/1553374.1553501},
  doi          = {10.1145/1553374.1553501},
  timestamp    = {Sat, 09 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/SuttonMPBSSW09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/isrr/SilverBS09,
  author       = {David Silver and
                  J. Andrew Bagnell and
                  Anthony Stentz},
  editor       = {C{\'{e}}dric Pradalier and
                  Roland Siegwart and
                  Gerhard Hirzinger},
  title        = {Perceptual Interpretation for Autonomous Navigation through Dynamic
                  Imitation Learning},
  booktitle    = {Robotics Research - The 14th International Symposium, {ISRR} 2009,
                  August 31 - September 3, 2009, Lucerne, Switzerland},
  series       = {Springer Tracts in Advanced Robotics},
  volume       = {70},
  pages        = {433--449},
  publisher    = {Springer},
  year         = {2009},
  url          = {https://doi.org/10.1007/978-3-642-19457-3\_26},
  doi          = {10.1007/978-3-642-19457-3\_26},
  timestamp    = {Sun, 02 Jun 2019 21:26:25 +0200},
  biburl       = {https://dblp.org/rec/conf/isrr/SilverBS09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/MaeiSBPSS09,
  author       = {Hamid Reza Maei and
                  Csaba Szepesv{\'{a}}ri and
                  Shalabh Bhatnagar and
                  Doina Precup and
                  David Silver and
                  Richard S. Sutton},
  editor       = {Yoshua Bengio and
                  Dale Schuurmans and
                  John D. Lafferty and
                  Christopher K. I. Williams and
                  Aron Culotta},
  title        = {Convergent Temporal-Difference Learning with Arbitrary Smooth Function
                  Approximation},
  booktitle    = {Advances in Neural Information Processing Systems 22: 23rd Annual
                  Conference on Neural Information Processing Systems 2009. Proceedings
                  of a meeting held 7-10 December 2009, Vancouver, British Columbia,
                  Canada},
  pages        = {1204--1212},
  publisher    = {Curran Associates, Inc.},
  year         = {2009},
  url          = {https://proceedings.neurips.cc/paper/2009/hash/3a15c7d0bbe60300a39f76f8a5ba6896-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/MaeiSBPSS09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/VenessSUB09,
  author       = {Joel Veness and
                  David Silver and
                  William T. B. Uther and
                  Alan Blair},
  editor       = {Yoshua Bengio and
                  Dale Schuurmans and
                  John D. Lafferty and
                  Christopher K. I. Williams and
                  Aron Culotta},
  title        = {Bootstrapping from Game Tree Search},
  booktitle    = {Advances in Neural Information Processing Systems 22: 23rd Annual
                  Conference on Neural Information Processing Systems 2009. Proceedings
                  of a meeting held 7-10 December 2009, Vancouver, British Columbia,
                  Canada},
  pages        = {1937--1945},
  publisher    = {Curran Associates, Inc.},
  year         = {2009},
  url          = {https://proceedings.neurips.cc/paper/2009/hash/389bc7bb1e1c2a5e7e147703232a88f6-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/VenessSUB09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-0909-0801,
  author       = {Joel Veness and
                  Kee Siong Ng and
                  Marcus Hutter and
                  David Silver},
  title        = {A Monte Carlo {AIXI} Approximation},
  journal      = {CoRR},
  volume       = {abs/0909.0801},
  year         = {2009},
  url          = {http://arxiv.org/abs/0909.0801},
  eprinttype    = {arXiv},
  eprint       = {0909.0801},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-0909-0801.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/firstmonday/Silver08,
  author       = {David Silver},
  title        = {History, Hype, and Hope: An Afterward},
  journal      = {First Monday},
  volume       = {13},
  number       = {3},
  year         = {2008},
  url          = {http://www.uic.edu/htbin/cgiwrap/bin/ojs/index.php/fm/article/view/2143/1950},
  timestamp    = {Thu, 26 Jun 2008 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/firstmonday/Silver08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/GellyS08,
  author       = {Sylvain Gelly and
                  David Silver},
  editor       = {Dieter Fox and
                  Carla P. Gomes},
  title        = {Achieving Master Level Play in 9 x 9 Computer Go},
  booktitle    = {Proceedings of the Twenty-Third {AAAI} Conference on Artificial Intelligence,
                  {AAAI} 2008, Chicago, Illinois, USA, July 13-17, 2008},
  pages        = {1537--1540},
  publisher    = {{AAAI} Press},
  year         = {2008},
  url          = {http://www.aaai.org/Library/AAAI/2008/aaai08-257.php},
  timestamp    = {Tue, 05 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/GellyS08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SilverSM08,
  author       = {David Silver and
                  Richard S. Sutton and
                  Martin M{\"{u}}ller},
  editor       = {William W. Cohen and
                  Andrew McCallum and
                  Sam T. Roweis},
  title        = {Sample-based learning and search with permanent and transient memories},
  booktitle    = {Machine Learning, Proceedings of the Twenty-Fifth International Conference
                  {(ICML} 2008), Helsinki, Finland, June 5-9, 2008},
  series       = {{ACM} International Conference Proceeding Series},
  volume       = {307},
  pages        = {968--975},
  publisher    = {{ACM}},
  year         = {2008},
  url          = {https://doi.org/10.1145/1390156.1390278},
  doi          = {10.1145/1390156.1390278},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/SilverSM08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/rss/SilverBS08,
  author       = {David Silver and
                  James A. Bagnell and
                  Anthony Stentz},
  editor       = {Oliver Brock and
                  Jeff Trinkle and
                  Fabio Ramos},
  title        = {High Performance Outdoor Navigation from Overhead Data using Imitation
                  Learning},
  booktitle    = {Robotics: Science and Systems IV, Eidgen{\"{o}}ssische Technische
                  Hochschule Z{\"{u}}rich, Zurich, Switzerland, June 25-28, 2008},
  publisher    = {The {MIT} Press},
  year         = {2008},
  url          = {http://www.roboticsproceedings.org/rss04/p34.html},
  doi          = {10.15607/RSS.2008.IV.034},
  timestamp    = {Tue, 18 Oct 2022 08:35:38 +0200},
  biburl       = {https://dblp.org/rec/conf/rss/SilverBS08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/GellyS07,
  author       = {Sylvain Gelly and
                  David Silver},
  editor       = {Zoubin Ghahramani},
  title        = {Combining online and offline knowledge in {UCT}},
  booktitle    = {Machine Learning, Proceedings of the Twenty-Fourth International Conference
                  {(ICML} 2007), Corvallis, Oregon, USA, June 20-24, 2007},
  series       = {{ACM} International Conference Proceeding Series},
  volume       = {227},
  pages        = {273--280},
  publisher    = {{ACM}},
  year         = {2007},
  url          = {https://doi.org/10.1145/1273496.1273531},
  doi          = {10.1145/1273496.1273531},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/GellyS07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SuttonKS07,
  author       = {Richard S. Sutton and
                  Anna Koop and
                  David Silver},
  editor       = {Zoubin Ghahramani},
  title        = {On the role of tracking in stationary environments},
  booktitle    = {Machine Learning, Proceedings of the Twenty-Fourth International Conference
                  {(ICML} 2007), Corvallis, Oregon, USA, June 20-24, 2007},
  series       = {{ACM} International Conference Proceeding Series},
  volume       = {227},
  pages        = {871--878},
  publisher    = {{ACM}},
  year         = {2007},
  url          = {https://doi.org/10.1145/1273496.1273606},
  doi          = {10.1145/1273496.1273606},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/SuttonKS07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/SilverSM07,
  author       = {David Silver and
                  Richard S. Sutton and
                  Martin M{\"{u}}ller},
  editor       = {Manuela M. Veloso},
  title        = {Reinforcement Learning of Local Shape in the Game of Go},
  booktitle    = {{IJCAI} 2007, Proceedings of the 20th International Joint Conference
                  on Artificial Intelligence, Hyderabad, India, January 6-12, 2007},
  pages        = {1053--1058},
  year         = {2007},
  url          = {http://ijcai.org/Proceedings/07/Papers/170.pdf},
  timestamp    = {Tue, 20 Aug 2019 16:17:11 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/SilverSM07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jfr/MorrisFOBSBTWW06,
  author       = {Aaron Morris and
                  Dave Ferguson and
                  Zachary Omohundro and
                  David M. Bradley and
                  David Silver and
                  Christopher R. Baker and
                  Scott Thayer and
                  Chuck Whittaker and
                  William Whittaker},
  title        = {Recent developments in subterranean robotics},
  journal      = {J. Field Robotics},
  volume       = {23},
  number       = {1},
  pages        = {35--57},
  year         = {2006},
  url          = {https://doi.org/10.1002/rob.20106},
  doi          = {10.1002/ROB.20106},
  timestamp    = {Mon, 23 Oct 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jfr/MorrisFOBSBTWW06.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jfr/SilverFMT06,
  author       = {David Silver and
                  Dave Ferguson and
                  Aaron Morris and
                  Scott Thayer},
  title        = {Topological exploration of subterranean environments},
  journal      = {J. Field Robotics},
  volume       = {23},
  number       = {6-7},
  pages        = {395--415},
  year         = {2006},
  url          = {https://doi.org/10.1002/rob.20130},
  doi          = {10.1002/ROB.20130},
  timestamp    = {Mon, 23 Oct 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jfr/SilverFMT06.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iros/SilverSVBS06,
  author       = {David Silver and
                  Boris Sofman and
                  Nicolas Vandapel and
                  J. Andrew Bagnell and
                  Anthony Stentz},
  title        = {Experimental Analysis of Overhead Data Processing To Support Long
                  Range Navigation},
  booktitle    = {2006 {IEEE/RSJ} International Conference on Intelligent Robots and
                  Systems, {IROS} 2006, October 9-15, 2006, Beijing, China},
  pages        = {2443--2450},
  publisher    = {{IEEE}},
  year         = {2006},
  url          = {https://doi.org/10.1109/IROS.2006.281686},
  doi          = {10.1109/IROS.2006.281686},
  timestamp    = {Wed, 16 Oct 2019 14:14:51 +0200},
  biburl       = {https://dblp.org/rec/conf/iros/SilverSVBS06.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/trob/LisienMSKRC05,
  author       = {Brad Lisien and
                  Deryck Morales and
                  David Silver and
                  George Kantor and
                  Ioannis M. Rekleitis and
                  Howie Choset},
  title        = {The hierarchical atlas},
  journal      = {{IEEE} Trans. Robotics},
  volume       = {21},
  number       = {3},
  pages        = {473--481},
  year         = {2005},
  url          = {https://doi.org/10.1109/TRO.2004.837237},
  doi          = {10.1109/TRO.2004.837237},
  timestamp    = {Mon, 15 Jun 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/trob/LisienMSKRC05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aiide/Silver05,
  author       = {David Silver},
  editor       = {R. Michael Young and
                  John E. Laird},
  title        = {Cooperative Pathfinding},
  booktitle    = {Proceedings of the First Artificial Intelligence and Interactive Digital
                  Entertainment Conference, June 1-5, 2005, Marina del Rey, California,
                  {USA}},
  pages        = {117--122},
  publisher    = {{AAAI} Press},
  year         = {2005},
  timestamp    = {Mon, 26 Feb 2007 09:13:55 +0100},
  biburl       = {https://dblp.org/rec/conf/aiide/Silver05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/fsr/SilverCT05,
  author       = {David Silver and
                  Joseph Carsten and
                  Scott Thayer},
  editor       = {Peter I. Corke and
                  Salah Sukkarieh},
  title        = {Topological Global Localization for Subterranean Voids},
  booktitle    = {Field and Service Robotics, Results of the 5th International Conference,
                  {FSR} 2005, July 29-31, 2005, Port Douglas, QLD, Australia},
  series       = {Springer Tracts in Advanced Robotics},
  volume       = {25},
  pages        = {117--128},
  publisher    = {Springer},
  year         = {2005},
  url          = {https://doi.org/10.1007/978-3-540-33453-8\_11},
  doi          = {10.1007/978-3-540-33453-8\_11},
  timestamp    = {Mon, 22 May 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/fsr/SilverCT05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icra/MorrisSFT05,
  author       = {Aaron Morris and
                  David Silver and
                  David I. Ferguson and
                  Scott Thayer},
  title        = {Towards Topological Exploration of Abandoned Mines},
  booktitle    = {Proceedings of the 2005 {IEEE} International Conference on Robotics
                  and Automation, {ICRA} 2005, April 18-22, 2005, Barcelona, Spain},
  pages        = {2117--2123},
  publisher    = {{IEEE}},
  year         = {2005},
  url          = {https://doi.org/10.1109/ROBOT.2005.1570426},
  doi          = {10.1109/ROBOT.2005.1570426},
  timestamp    = {Mon, 22 May 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icra/MorrisSFT05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/nms/Silver04,
  author       = {David Silver},
  title        = {Internet/Cyberculture/ Digital Culture/New Media/ Fill-in-the-Blank
                  Studies},
  journal      = {New Media Soc.},
  volume       = {6},
  number       = {1},
  pages        = {55--64},
  year         = {2004},
  url          = {https://doi.org/10.1177/1461444804039915},
  doi          = {10.1177/1461444804039915},
  timestamp    = {Thu, 17 Sep 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/nms/Silver04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icra/SilverMRLC04,
  author       = {David Silver and
                  Deryck Morales and
                  Ioannis M. Rekleitis and
                  Brad Lisien and
                  Howie Choset},
  title        = {Arc Carving: Obtaining Accurate, Low Latency Maps from Ultrasonic
                  Range Sensors},
  booktitle    = {Proceedings of the 2004 {IEEE} International Conference on Robotics
                  and Automation, {ICRA} 2004, April 26 - May 1, 2004, New Orleans,
                  LA, {USA}},
  pages        = {1554--1561},
  publisher    = {{IEEE}},
  year         = {2004},
  url          = {https://doi.org/10.1109/ROBOT.2004.1308045},
  doi          = {10.1109/ROBOT.2004.1308045},
  timestamp    = {Mon, 15 Jun 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icra/SilverMRLC04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iros/SilverFMT04,
  author       = {David Silver and
                  Dave Ferguson and
                  Aaron Morris and
                  Scott Thayer},
  title        = {Feature extraction for topological mine maps},
  booktitle    = {2004 {IEEE/RSJ} International Conference on Intelligent Robots and
                  Systems, Sendai, Japan, September 28 - October 2, 2004},
  pages        = {773--779},
  publisher    = {{IEEE}},
  year         = {2004},
  url          = {https://doi.org/10.1109/IROS.2004.1389446},
  doi          = {10.1109/IROS.2004.1389446},
  timestamp    = {Wed, 16 Oct 2019 14:14:51 +0200},
  biburl       = {https://dblp.org/rec/conf/iros/SilverFMT04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ram/SilverBT04,
  author       = {David Silver and
                  David M. Bradley and
                  Scott Thayer},
  title        = {Scan matching for flooded subterranean voids},
  booktitle    = {2004 {IEEE} Conference on Robotics, Automation and Mechatronics, {RAM}
                  2004, December 1-3, 2004, Singapore},
  pages        = {422--427},
  publisher    = {{IEEE}},
  year         = {2004},
  url          = {https://doi.org/10.1109/RAMECH.2004.1438957},
  doi          = {10.1109/RAMECH.2004.1438957},
  timestamp    = {Thu, 12 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ram/SilverBT04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ram/BradleyST04,
  author       = {David M. Bradley and
                  David Silver and
                  Scott Thayer},
  title        = {A regional point descriptor for global topological localization in
                  flooded subterranean environments},
  booktitle    = {2004 {IEEE} Conference on Robotics, Automation and Mechatronics, {RAM}
                  2004, December 1-3, 2004, Singapore},
  pages        = {440--445},
  publisher    = {{IEEE}},
  year         = {2004},
  url          = {https://doi.org/10.1109/RAMECH.2004.1438960},
  doi          = {10.1109/RAMECH.2004.1438960},
  timestamp    = {Thu, 12 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ram/BradleyST04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iros/LisienMSKRC03,
  author       = {Brad Lisien and
                  Deryck Morales and
                  David Silver and
                  George Kantor and
                  Ioannis M. Rekleitis and
                  Howie Choset},
  title        = {Hierarchical simultaneous localization and mapping},
  booktitle    = {2003 {IEEE/RSJ} International Conference on Intelligent Robots and
                  Systems, Las Vegas, Nevada, USA, October 27 - November 1, 2003},
  pages        = {448--453},
  publisher    = {{IEEE}},
  year         = {2003},
  url          = {https://doi.org/10.1109/IROS.2003.1250670},
  doi          = {10.1109/IROS.2003.1250670},
  timestamp    = {Mon, 15 Jun 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iros/LisienMSKRC03.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/nms/Silver00,
  author       = {David Silver},
  title        = {Book Review: Life Online: Researching Real Experience in Virtual Space},
  journal      = {New Media Soc.},
  volume       = {2},
  number       = {2},
  pages        = {251--255},
  year         = {2000},
  url          = {https://doi.org/10.1177/1461444800002002008},
  doi          = {10.1177/1461444800002002008},
  timestamp    = {Thu, 17 Sep 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/nms/Silver00.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
a service of  Schloss Dagstuhl - Leibniz Center for Informatics