Stop the war!
Остановите войну!
for scientists:
default search action
BibTeX records: David Silver
@article{DBLP:journals/nature/MankowitzMZGSPL23, author = {Daniel J. Mankowitz and Andrea Michi and Anton Zhernov and Marco Gelmi and Marco Selvi and Cosmin Paduraru and Edouard Leurent and Shariq Iqbal and Jean{-}Baptiste Lespiau and Alex Ahern and Thomas K{\"{o}}ppe and Kevin Millikin and Stephen Gaffney and Sophie Elster and Jackson Broshear and Chris Gamble and Kieran Milan and Robert Tung and Minjae Hwang and A. Taylan Cemgil and Mohammadamin Barekatain and Yujia Li and Amol Mandhane and Thomas Hubert and Julian Schrittwieser and Demis Hassabis and Pushmeet Kohli and Martin A. Riedmiller and Oriol Vinyals and David Silver}, title = {Faster sorting algorithms discovered using deep reinforcement learning}, journal = {Nat.}, volume = {618}, number = {7964}, pages = {257--263}, year = {2023}, url = {https://doi.org/10.1038/s41586-023-06004-9}, doi = {10.1038/S41586-023-06004-9}, timestamp = {Wed, 24 Jan 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/nature/MankowitzMZGSPL23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2312-11805, author = {Rohan Anil and Sebastian Borgeaud and Yonghui Wu and Jean{-}Baptiste Alayrac and Jiahui Yu and Radu Soricut and Johan Schalkwyk and Andrew M. Dai and Anja Hauth and Katie Millican and David Silver and Slav Petrov and Melvin Johnson and Ioannis Antonoglou and Julian Schrittwieser and Amelia Glaese and Jilin Chen and Emily Pitler and Timothy P. Lillicrap and Angeliki Lazaridou and Orhan Firat and James Molloy and Michael Isard and Paul Ronald Barham and Tom Hennigan and Benjamin Lee and Fabio Viola and Malcolm Reynolds and Yuanzhong Xu and Ryan Doherty and Eli Collins and Clemens Meyer and Eliza Rutherford and Erica Moreira and Kareem Ayoub and Megha Goel and George Tucker and Enrique Piqueras and Maxim Krikun and Iain Barr and Nikolay Savinov and Ivo Danihelka and Becca Roelofs and Ana{\"{\i}}s White and Anders Andreassen and Tamara von Glehn and Lakshman Yagati and Mehran Kazemi and Lucas Gonzalez and Misha Khalman and Jakub Sygnowski and et al.}, title = {Gemini: {A} Family of Highly Capable Multimodal Models}, journal = {CoRR}, volume = {abs/2312.11805}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2312.11805}, doi = {10.48550/ARXIV.2312.11805}, eprinttype = {arXiv}, eprint = {2312.11805}, timestamp = {Tue, 16 Jan 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2312-11805.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/nature/FawziBHHRB0RSSS22, author = {Alhussein Fawzi and Matej Balog and Aja Huang and Thomas Hubert and Bernardino Romera{-}Paredes and Mohammadamin Barekatain and Alexander Novikov and Francisco J. R. Ruiz and Julian Schrittwieser and Grzegorz Swirszcz and David Silver and Demis Hassabis and Pushmeet Kohli}, title = {Discovering faster matrix multiplication algorithms with reinforcement learning}, journal = {Nat.}, volume = {610}, number = {7930}, pages = {47--53}, year = {2022}, url = {https://doi.org/10.1038/s41586-022-05172-4}, doi = {10.1038/S41586-022-05172-4}, timestamp = {Fri, 22 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/nature/FawziBHHRB0RSSS22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/nn/MatsuoLSPSSUM22, author = {Yutaka Matsuo and Yann LeCun and Maneesh Sahani and Doina Precup and David Silver and Masashi Sugiyama and Eiji Uchibe and Jun Morimoto}, title = {Deep learning, reinforcement learning, and world models}, journal = {Neural Networks}, volume = {152}, pages = {267--275}, year = {2022}, url = {https://doi.org/10.1016/j.neunet.2022.03.037}, doi = {10.1016/J.NEUNET.2022.03.037}, timestamp = {Mon, 28 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/nn/MatsuoLSPSSUM22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/AntonoglouSOHS22, author = {Ioannis Antonoglou and Julian Schrittwieser and Sherjil Ozair and Thomas K. Hubert and David Silver}, title = {Planning in Stochastic Environments with a Learned Model}, booktitle = {The Tenth International Conference on Learning Representations, {ICLR} 2022, Virtual Event, April 25-29, 2022}, publisher = {OpenReview.net}, year = {2022}, url = {https://openreview.net/forum?id=X6D9bAHhBQ1}, timestamp = {Sat, 20 Aug 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/AntonoglouSOHS22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/DanihelkaGSS22, author = {Ivo Danihelka and Arthur Guez and Julian Schrittwieser and David Silver}, title = {Policy improvement by planning with Gumbel}, booktitle = {The Tenth International Conference on Learning Representations, {ICLR} 2022, Virtual Event, April 25-29, 2022}, publisher = {OpenReview.net}, year = {2022}, url = {https://openreview.net/forum?id=bERaNdoegnO}, timestamp = {Sat, 20 Aug 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/DanihelkaGSS22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/FlennerhagSZHS022, author = {Sebastian Flennerhag and Yannick Schroecker and Tom Zahavy and Hado van Hasselt and David Silver and Satinder Singh}, title = {Bootstrapped Meta-Learning}, booktitle = {The Tenth International Conference on Learning Representations, {ICLR} 2022, Virtual Event, April 25-29, 2022}, publisher = {OpenReview.net}, year = {2022}, url = {https://openreview.net/forum?id=b-ny3x071E5}, timestamp = {Sat, 20 Aug 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/FlennerhagSZHS022.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/SilverGDHH22, author = {David Silver and Anirudh Goyal and Ivo Danihelka and Matteo Hessel and Hado van Hasselt}, title = {Learning by Directional Gradient Descent}, booktitle = {The Tenth International Conference on Learning Representations, {ICLR} 2022, Virtual Event, April 25-29, 2022}, publisher = {OpenReview.net}, year = {2022}, url = {https://openreview.net/forum?id=5i7lJLuhTm}, timestamp = {Sat, 20 Aug 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/SilverGDHH22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2206-15378, author = {Julien P{\'{e}}rolat and Bart De Vylder and Daniel Hennes and Eugene Tarassov and Florian Strub and Vincent de Boer and Paul Muller and Jerome T. Connor and Neil Burch and Thomas W. Anthony and Stephen McAleer and Romuald Elie and Sarah H. Cen and Zhe Wang and Audrunas Gruslys and Aleksandra Malysheva and Mina Khan and Sherjil Ozair and Finbarr Timbers and Toby Pohlen and Tom Eccles and Mark Rowland and Marc Lanctot and Jean{-}Baptiste Lespiau and Bilal Piot and Shayegan Omidshafiei and Edward Lockhart and Laurent Sifre and Nathalie Beauguerlange and R{\'{e}}mi Munos and David Silver and Satinder Singh and Demis Hassabis and Karl Tuyls}, title = {Mastering the Game of Stratego with Model-Free Multiagent Reinforcement Learning}, journal = {CoRR}, volume = {abs/2206.15378}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2206.15378}, doi = {10.48550/ARXIV.2206.15378}, eprinttype = {arXiv}, eprint = {2206.15378}, timestamp = {Wed, 28 Sep 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2206-15378.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ai/SilverSPS21, author = {David Silver and Satinder Singh and Doina Precup and Richard S. Sutton}, title = {Reward is enough}, journal = {Artif. Intell.}, volume = {299}, pages = {103535}, year = {2021}, url = {https://doi.org/10.1016/j.artint.2021.103535}, doi = {10.1016/J.ARTINT.2021.103535}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ai/SilverSPS21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/DabneyBRDQBS21, author = {Will Dabney and Andr{\'{e}} Barreto and Mark Rowland and Robert Dadashi and John Quan and Marc G. Bellemare and David Silver}, title = {The Value-Improvement Path: Towards Better Representations for Reinforcement Learning}, booktitle = {Thirty-Fifth {AAAI} Conference on Artificial Intelligence, {AAAI} 2021, Thirty-Third Conference on Innovative Applications of Artificial Intelligence, {IAAI} 2021, The Eleventh Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2021, Virtual Event, February 2-9, 2021}, pages = {7160--7168}, publisher = {{AAAI} Press}, year = {2021}, url = {https://doi.org/10.1609/aaai.v35i8.16880}, doi = {10.1609/AAAI.V35I8.16880}, timestamp = {Mon, 04 Sep 2023 12:29:24 +0200}, biburl = {https://dblp.org/rec/conf/aaai/DabneyBRDQBS21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/HasseltMHSBB21, author = {Hado van Hasselt and Sephora Madjiheurem and Matteo Hessel and David Silver and Andr{\'{e}} Barreto and Diana Borsa}, title = {Expected Eligibility Traces}, booktitle = {Thirty-Fifth {AAAI} Conference on Artificial Intelligence, {AAAI} 2021, Thirty-Third Conference on Innovative Applications of Artificial Intelligence, {IAAI} 2021, The Eleventh Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2021, Virtual Event, February 2-9, 2021}, pages = {9997--10005}, publisher = {{AAAI} Press}, year = {2021}, url = {https://doi.org/10.1609/aaai.v35i11.17200}, doi = {10.1609/AAAI.V35I11.17200}, timestamp = {Mon, 04 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/HasseltMHSBB21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/HesselDVGSSWSH21, author = {Matteo Hessel and Ivo Danihelka and Fabio Viola and Arthur Guez and Simon Schmitt and Laurent Sifre and Theophane Weber and David Silver and Hado van Hasselt}, editor = {Marina Meila and Tong Zhang}, title = {Muesli: Combining Improvements in Policy Optimization}, booktitle = {Proceedings of the 38th International Conference on Machine Learning, {ICML} 2021, 18-24 July 2021, Virtual Event}, series = {Proceedings of Machine Learning Research}, volume = {139}, pages = {4214--4226}, publisher = {{PMLR}}, year = {2021}, url = {http://proceedings.mlr.press/v139/hessel21a.html}, timestamp = {Wed, 25 Aug 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/HesselDVGSSWSH21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/HubertSABSS21, author = {Thomas Hubert and Julian Schrittwieser and Ioannis Antonoglou and Mohammadamin Barekatain and Simon Schmitt and David Silver}, editor = {Marina Meila and Tong Zhang}, title = {Learning and Planning in Complex Action Spaces}, booktitle = {Proceedings of the 38th International Conference on Machine Learning, {ICML} 2021, 18-24 July 2021, Virtual Event}, series = {Proceedings of Machine Learning Research}, volume = {139}, pages = {4476--4486}, publisher = {{PMLR}}, year = {2021}, url = {http://proceedings.mlr.press/v139/hubert21a.html}, timestamp = {Wed, 25 Aug 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/HubertSABSS21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/FarquharBMFHHS21, author = {Gregory Farquhar and Kate Baumli and Zita Marinho and Angelos Filos and Matteo Hessel and Hado Philip van Hasselt and David Silver}, editor = {Marc'Aurelio Ranzato and Alina Beygelzimer and Yann N. Dauphin and Percy Liang and Jennifer Wortman Vaughan}, title = {Self-Consistent Models and Values}, booktitle = {Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021, NeurIPS 2021, December 6-14, 2021, virtual}, pages = {1111--1125}, year = {2021}, url = {https://proceedings.neurips.cc/paper/2021/hash/08f0efebb1c51aada9430a089a2050cc-Abstract.html}, timestamp = {Tue, 03 May 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/FarquharBMFHHS21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/GrimmBFSS21, author = {Christopher Grimm and Andr{\'{e}} Barreto and Gregory Farquhar and David Silver and Satinder Singh}, editor = {Marc'Aurelio Ranzato and Alina Beygelzimer and Yann N. Dauphin and Percy Liang and Jennifer Wortman Vaughan}, title = {Proper Value Equivalence}, booktitle = {Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021, NeurIPS 2021, December 6-14, 2021, virtual}, pages = {7773--7786}, year = {2021}, url = {https://proceedings.neurips.cc/paper/2021/hash/400e5e6a7ce0c754f281525fae75a873-Abstract.html}, timestamp = {Tue, 03 May 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/GrimmBFSS21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/SchrittwieserHM21, author = {Julian Schrittwieser and Thomas Hubert and Amol Mandhane and Mohammadamin Barekatain and Ioannis Antonoglou and David Silver}, editor = {Marc'Aurelio Ranzato and Alina Beygelzimer and Yann N. Dauphin and Percy Liang and Jennifer Wortman Vaughan}, title = {Online and Offline Reinforcement Learning by Planning with a Learned Model}, booktitle = {Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021, NeurIPS 2021, December 6-14, 2021, virtual}, pages = {27580--27591}, year = {2021}, url = {https://proceedings.neurips.cc/paper/2021/hash/e8258e5140317ff36c7f8225a3bf9590-Abstract.html}, timestamp = {Tue, 03 May 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/SchrittwieserHM21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/VeeriahZHXOKHSS21, author = {Vivek Veeriah and Tom Zahavy and Matteo Hessel and Zhongwen Xu and Junhyuk Oh and Iurii Kemaev and Hado van Hasselt and David Silver and Satinder Singh}, editor = {Marc'Aurelio Ranzato and Alina Beygelzimer and Yann N. Dauphin and Percy Liang and Jennifer Wortman Vaughan}, title = {Discovery of Options via Meta-Learned Subgoals}, booktitle = {Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021, NeurIPS 2021, December 6-14, 2021, virtual}, pages = {29861--29873}, year = {2021}, url = {https://proceedings.neurips.cc/paper/2021/hash/fa246d0262c3925617b0c72bb20eeb1d-Abstract.html}, timestamp = {Tue, 03 May 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/VeeriahZHXOKHSS21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2102-06741, author = {Vivek Veeriah and Tom Zahavy and Matteo Hessel and Zhongwen Xu and Junhyuk Oh and Iurii Kemaev and Hado van Hasselt and David Silver and Satinder Singh}, title = {Discovery of Options via Meta-Learned Subgoals}, journal = {CoRR}, volume = {abs/2102.06741}, year = {2021}, url = {https://arxiv.org/abs/2102.06741}, eprinttype = {arXiv}, eprint = {2102.06741}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2102-06741.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2104-06159, author = {Matteo Hessel and Ivo Danihelka and Fabio Viola and Arthur Guez and Simon Schmitt and Laurent Sifre and Theophane Weber and David Silver and Hado van Hasselt}, title = {Muesli: Combining Improvements in Policy Optimization}, journal = {CoRR}, volume = {abs/2104.06159}, year = {2021}, url = {https://arxiv.org/abs/2104.06159}, eprinttype = {arXiv}, eprint = {2104.06159}, timestamp = {Mon, 19 Apr 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2104-06159.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2104-06294, author = {Julian Schrittwieser and Thomas Hubert and Amol Mandhane and Mohammadamin Barekatain and Ioannis Antonoglou and David Silver}, title = {Online and Offline Reinforcement Learning by Planning with a Learned Model}, journal = {CoRR}, volume = {abs/2104.06294}, year = {2021}, url = {https://arxiv.org/abs/2104.06294}, eprinttype = {arXiv}, eprint = {2104.06294}, timestamp = {Mon, 19 Apr 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2104-06294.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2104-06303, author = {Thomas Hubert and Julian Schrittwieser and Ioannis Antonoglou and Mohammadamin Barekatain and Simon Schmitt and David Silver}, title = {Learning and Planning in Complex Action Spaces}, journal = {CoRR}, volume = {abs/2104.06303}, year = {2021}, url = {https://arxiv.org/abs/2104.06303}, eprinttype = {arXiv}, eprint = {2104.06303}, timestamp = {Mon, 19 Apr 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2104-06303.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2106-10316, author = {Christopher Grimm and Andr{\'{e}} Barreto and Gregory Farquhar and David Silver and Satinder Singh}, title = {Proper Value Equivalence}, journal = {CoRR}, volume = {abs/2106.10316}, year = {2021}, url = {https://arxiv.org/abs/2106.10316}, eprinttype = {arXiv}, eprint = {2106.10316}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2106-10316.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2106-13105, author = {Andr{\'{e}} Barreto and Diana Borsa and Shaobo Hou and Gheorghe Comanici and Eser Ayg{\"{u}}n and Philippe Hamel and Daniel Toyama and Jonathan J. Hunt and Shibl Mourad and David Silver and Doina Precup}, title = {The Option Keyboard: Combining Skills in Reinforcement Learning}, journal = {CoRR}, volume = {abs/2106.13105}, year = {2021}, url = {https://arxiv.org/abs/2106.13105}, eprinttype = {arXiv}, eprint = {2106.13105}, timestamp = {Mon, 10 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2106-13105.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2109-04504, author = {Sebastian Flennerhag and Yannick Schroecker and Tom Zahavy and Hado van Hasselt and David Silver and Satinder Singh}, title = {Bootstrapped Meta-Learning}, journal = {CoRR}, volume = {abs/2109.04504}, year = {2021}, url = {https://arxiv.org/abs/2109.04504}, eprinttype = {arXiv}, eprint = {2109.04504}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2109-04504.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2110-12840, author = {Gregory Farquhar and Kate Baumli and Zita Marinho and Angelos Filos and Matteo Hessel and Hado van Hasselt and David Silver}, title = {Self-Consistent Models and Values}, journal = {CoRR}, volume = {abs/2110.12840}, year = {2021}, url = {https://arxiv.org/abs/2110.12840}, eprinttype = {arXiv}, eprint = {2110.12840}, timestamp = {Thu, 28 Oct 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2110-12840.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/nature/Senior0JKSGQZNB20, author = {Andrew W. Senior and Richard Evans and John Jumper and James Kirkpatrick and Laurent Sifre and Tim Green and Chongli Qin and Augustin Z{\'{\i}}dek and Alexander W. R. Nelson and Alex Bridgland and Hugo Penedones and Stig Petersen and Karen Simonyan and Steve Crossan and Pushmeet Kohli and David T. Jones and David Silver and Koray Kavukcuoglu and Demis Hassabis}, title = {Improved protein structure prediction using potentials from deep learning}, journal = {Nat.}, volume = {577}, number = {7792}, pages = {706--710}, year = {2020}, url = {https://doi.org/10.1038/s41586-019-1923-7}, doi = {10.1038/S41586-019-1923-7}, timestamp = {Thu, 14 Oct 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/nature/Senior0JKSGQZNB20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/nature/SchrittwieserAH20, author = {Julian Schrittwieser and Ioannis Antonoglou and Thomas Hubert and Karen Simonyan and Laurent Sifre and Simon Schmitt and Arthur Guez and Edward Lockhart and Demis Hassabis and Thore Graepel and Timothy P. Lillicrap and David Silver}, title = {Mastering Atari, Go, chess and shogi by planning with a learned model}, journal = {Nat.}, volume = {588}, number = {7839}, pages = {604--609}, year = {2020}, url = {https://doi.org/10.1038/s41586-020-03051-4}, doi = {10.1038/S41586-020-03051-4}, timestamp = {Fri, 02 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/nature/SchrittwieserAH20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/pnas/BarretoHBSP20, author = {Andr{\'{e}} Barreto and Shaobo Hou and Diana Borsa and David Silver and Doina Precup}, title = {Fast reinforcement learning with generalized policy updates}, journal = {Proc. Natl. Acad. Sci. {USA}}, volume = {117}, number = {48}, pages = {30079--30087}, year = {2020}, url = {https://doi.org/10.1073/pnas.1907370117}, doi = {10.1073/PNAS.1907370117}, timestamp = {Mon, 10 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/pnas/BarretoHBSP20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/OsbandDHASSMLSS20, author = {Ian Osband and Yotam Doron and Matteo Hessel and John Aslanides and Eren Sezener and Andre Saraiva and Katrina McKinney and Tor Lattimore and Csaba Szepesv{\'{a}}ri and Satinder Singh and Benjamin Van Roy and Richard S. Sutton and David Silver and Hado van Hasselt}, title = {Behaviour Suite for Reinforcement Learning}, booktitle = {8th International Conference on Learning Representations, {ICLR} 2020, Addis Ababa, Ethiopia, April 26-30, 2020}, publisher = {OpenReview.net}, year = {2020}, url = {https://openreview.net/forum?id=rygf-kSYwH}, timestamp = {Mon, 15 May 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/OsbandDHASSMLSS20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/ZhengOHXKHSS20, author = {Zeyu Zheng and Junhyuk Oh and Matteo Hessel and Zhongwen Xu and Manuel Kroiss and Hado van Hasselt and David Silver and Satinder Singh}, title = {What Can Learned Intrinsic Rewards Capture?}, booktitle = {Proceedings of the 37th International Conference on Machine Learning, {ICML} 2020, 13-18 July 2020, Virtual Event}, series = {Proceedings of Machine Learning Research}, volume = {119}, pages = {11436--11446}, publisher = {{PMLR}}, year = {2020}, url = {http://proceedings.mlr.press/v119/zheng20b.html}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/ZhengOHXKHSS20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/GrimmBSS20, author = {Christopher Grimm and Andr{\'{e}} Barreto and Satinder Singh and David Silver}, editor = {Hugo Larochelle and Marc'Aurelio Ranzato and Raia Hadsell and Maria{-}Florina Balcan and Hsuan{-}Tien Lin}, title = {The Value Equivalence Principle for Model-Based Reinforcement Learning}, booktitle = {Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6-12, 2020, virtual}, year = {2020}, url = {https://proceedings.neurips.cc/paper/2020/hash/3bb585ea00014b0e3ebe4c6dd165a358-Abstract.html}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/GrimmBSS20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/GuezVWBKPSH20, author = {Arthur Guez and Fabio Viola and Theophane Weber and Lars Buesing and Steven Kapturowski and Doina Precup and David Silver and Nicolas Heess}, editor = {Hugo Larochelle and Marc'Aurelio Ranzato and Raia Hadsell and Maria{-}Florina Balcan and Hsuan{-}Tien Lin}, title = {Value-driven Hindsight Modelling}, booktitle = {Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6-12, 2020, virtual}, year = {2020}, url = {https://proceedings.neurips.cc/paper/2020/hash/9381fc93ad66f9ec4b2eef71147a6665-Abstract.html}, timestamp = {Tue, 19 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/GuezVWBKPSH20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/OhHCXHSS20, author = {Junhyuk Oh and Matteo Hessel and Wojciech M. Czarnecki and Zhongwen Xu and Hado van Hasselt and Satinder Singh and David Silver}, editor = {Hugo Larochelle and Marc'Aurelio Ranzato and Raia Hadsell and Maria{-}Florina Balcan and Hsuan{-}Tien Lin}, title = {Discovering Reinforcement Learning Algorithms}, booktitle = {Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6-12, 2020, virtual}, year = {2020}, url = {https://proceedings.neurips.cc/paper/2020/hash/0b96d81f0494fde5428c7aea243c9157-Abstract.html}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/OhHCXHSS20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/XuHHOSS20, author = {Zhongwen Xu and Hado Philip van Hasselt and Matteo Hessel and Junhyuk Oh and Satinder Singh and David Silver}, editor = {Hugo Larochelle and Marc'Aurelio Ranzato and Raia Hadsell and Maria{-}Florina Balcan and Hsuan{-}Tien Lin}, title = {Meta-Gradient Reinforcement Learning with an Objective Discovered Online}, booktitle = {Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6-12, 2020, virtual}, year = {2020}, url = {https://proceedings.neurips.cc/paper/2020/hash/ae3d525daf92cee0003a7f2d92c34ea3-Abstract.html}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/XuHHOSS20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/ZahavyXVHOHSS20, author = {Tom Zahavy and Zhongwen Xu and Vivek Veeriah and Matteo Hessel and Junhyuk Oh and Hado van Hasselt and David Silver and Satinder Singh}, editor = {Hugo Larochelle and Marc'Aurelio Ranzato and Raia Hadsell and Maria{-}Florina Balcan and Hsuan{-}Tien Lin}, title = {A Self-Tuning Actor-Critic Algorithm}, booktitle = {Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6-12, 2020, virtual}, year = {2020}, url = {https://proceedings.neurips.cc/paper/2020/hash/f02208a057804ee16ac72ff4d3cec53b-Abstract.html}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/ZahavyXVHOHSS20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2002-08329, author = {Arthur Guez and Fabio Viola and Th{\'{e}}ophane Weber and Lars Buesing and Steven Kapturowski and Doina Precup and David Silver and Nicolas Heess}, title = {Value-driven Hindsight Modelling}, journal = {CoRR}, volume = {abs/2002.08329}, year = {2020}, url = {https://arxiv.org/abs/2002.08329}, eprinttype = {arXiv}, eprint = {2002.08329}, timestamp = {Mon, 02 Mar 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2002-08329.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2002-12928, author = {Tom Zahavy and Zhongwen Xu and Vivek Veeriah and Matteo Hessel and Junhyuk Oh and Hado van Hasselt and David Silver and Satinder Singh}, title = {Self-Tuning Deep Reinforcement Learning}, journal = {CoRR}, volume = {abs/2002.12928}, year = {2020}, url = {https://arxiv.org/abs/2002.12928}, eprinttype = {arXiv}, eprint = {2002.12928}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2002-12928.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2006-02243, author = {Will Dabney and Andr{\'{e}} Barreto and Mark Rowland and Robert Dadashi and John Quan and Marc G. Bellemare and David Silver}, title = {The Value-Improvement Path: Towards Better Representations for Reinforcement Learning}, journal = {CoRR}, volume = {abs/2006.02243}, year = {2020}, url = {https://arxiv.org/abs/2006.02243}, eprinttype = {arXiv}, eprint = {2006.02243}, timestamp = {Mon, 10 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2006-02243.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2007-01839, author = {Hado van Hasselt and Sephora Madjiheurem and Matteo Hessel and David Silver and Andr{\'{e}} Barreto and Diana Borsa}, title = {Expected Eligibility Traces}, journal = {CoRR}, volume = {abs/2007.01839}, year = {2020}, url = {https://arxiv.org/abs/2007.01839}, eprinttype = {arXiv}, eprint = {2007.01839}, timestamp = {Mon, 10 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2007-01839.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2007-08433, author = {Zhongwen Xu and Hado van Hasselt and Matteo Hessel and Junhyuk Oh and Satinder Singh and David Silver}, title = {Meta-Gradient Reinforcement Learning with an Objective Discovered Online}, journal = {CoRR}, volume = {abs/2007.08433}, year = {2020}, url = {https://arxiv.org/abs/2007.08433}, eprinttype = {arXiv}, eprint = {2007.08433}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2007-08433.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2007-08794, author = {Junhyuk Oh and Matteo Hessel and Wojciech M. Czarnecki and Zhongwen Xu and Hado van Hasselt and Satinder Singh and David Silver}, title = {Discovering Reinforcement Learning Algorithms}, journal = {CoRR}, volume = {abs/2007.08794}, year = {2020}, url = {https://arxiv.org/abs/2007.08794}, eprinttype = {arXiv}, eprint = {2007.08794}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2007-08794.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2011-03506, author = {Christopher Grimm and Andr{\'{e}} Barreto and Satinder Singh and David Silver}, title = {The Value Equivalence Principle for Model-Based Reinforcement Learning}, journal = {CoRR}, volume = {abs/2011.03506}, year = {2020}, url = {https://arxiv.org/abs/2011.03506}, eprinttype = {arXiv}, eprint = {2011.03506}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2011-03506.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/nature/VinyalsBCMDCCPE19, author = {Oriol Vinyals and Igor Babuschkin and Wojciech M. Czarnecki and Micha{\"{e}}l Mathieu and Andrew Dudzik and Junyoung Chung and David H. Choi and Richard Powell and Timo Ewalds and Petko Georgiev and Junhyuk Oh and Dan Horgan and Manuel Kroiss and Ivo Danihelka and Aja Huang and Laurent Sifre and Trevor Cai and John P. Agapiou and Max Jaderberg and Alexander Sasha Vezhnevets and R{\'{e}}mi Leblond and Tobias Pohlen and Valentin Dalibard and David Budden and Yury Sulsky and James Molloy and Tom Le Paine and {\c{C}}aglar G{\"{u}}l{\c{c}}ehre and Ziyu Wang and Tobias Pfaff and Yuhuai Wu and Roman Ring and Dani Yogatama and Dario W{\"{u}}nsch and Katrina McKinney and Oliver Smith and Tom Schaul and Timothy P. Lillicrap and Koray Kavukcuoglu and Demis Hassabis and Chris Apps and David Silver}, title = {Grandmaster level in StarCraft {II} using multi-agent reinforcement learning}, journal = {Nat.}, volume = {575}, number = {7782}, pages = {350--354}, year = {2019}, url = {https://doi.org/10.1038/s41586-019-1724-z}, doi = {10.1038/S41586-019-1724-Z}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/nature/VinyalsBCMDCCPE19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aistats/WeberHBS19, author = {Th{\'{e}}ophane Weber and Nicolas Heess and Lars Buesing and David Silver}, editor = {Kamalika Chaudhuri and Masashi Sugiyama}, title = {Credit Assignment Techniques in Stochastic Computation Graphs}, booktitle = {The 22nd International Conference on Artificial Intelligence and Statistics, {AISTATS} 2019, 16-18 April 2019, Naha, Okinawa, Japan}, series = {Proceedings of Machine Learning Research}, volume = {89}, pages = {2650--2660}, publisher = {{PMLR}}, year = {2019}, url = {http://proceedings.mlr.press/v89/weber19a.html}, timestamp = {Fri, 07 Jun 2019 09:03:47 +0200}, biburl = {https://dblp.org/rec/conf/aistats/WeberHBS19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/BorsaBQMHMSS19, author = {Diana Borsa and Andr{\'{e}} Barreto and John Quan and Daniel J. Mankowitz and Hado van Hasselt and R{\'{e}}mi Munos and David Silver and Tom Schaul}, title = {Universal Successor Features Approximators}, booktitle = {7th International Conference on Learning Representations, {ICLR} 2019, New Orleans, LA, USA, May 6-9, 2019}, publisher = {OpenReview.net}, year = {2019}, url = {https://openreview.net/forum?id=S1VWjiRcKX}, timestamp = {Mon, 10 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/iclr/BorsaBQMHMSS19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/GuezMGKRWRSOEWS19, author = {Arthur Guez and Mehdi Mirza and Karol Gregor and Rishabh Kabra and S{\'{e}}bastien Racani{\`{e}}re and Theophane Weber and David Raposo and Adam Santoro and Laurent Orseau and Tom Eccles and Greg Wayne and David Silver and Timothy P. Lillicrap}, editor = {Kamalika Chaudhuri and Ruslan Salakhutdinov}, title = {An Investigation of Model-Free Planning}, booktitle = {Proceedings of the 36th International Conference on Machine Learning, {ICML} 2019, 9-15 June 2019, Long Beach, California, {USA}}, series = {Proceedings of Machine Learning Research}, volume = {97}, pages = {2464--2473}, publisher = {{PMLR}}, year = {2019}, url = {http://proceedings.mlr.press/v97/guez19a.html}, timestamp = {Tue, 11 Jun 2019 15:37:38 +0200}, biburl = {https://dblp.org/rec/conf/icml/GuezMGKRWRSOEWS19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/VeeriahHXRLOHSS19, author = {Vivek Veeriah and Matteo Hessel and Zhongwen Xu and Janarthanan Rajendran and Richard L. Lewis and Junhyuk Oh and Hado van Hasselt and David Silver and Satinder Singh}, editor = {Hanna M. Wallach and Hugo Larochelle and Alina Beygelzimer and Florence d'Alch{\'{e}}{-}Buc and Emily B. Fox and Roman Garnett}, title = {Discovery of Useful Questions as Auxiliary Tasks}, booktitle = {Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS 2019, December 8-14, 2019, Vancouver, BC, Canada}, pages = {9306--9317}, year = {2019}, url = {https://proceedings.neurips.cc/paper/2019/hash/10ff0b5e85e5b85cc3095d431d8c08b4-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/VeeriahHXRLOHSS19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/BarretoBHCAHTHM19, author = {Andr{\'{e}} Barreto and Diana Borsa and Shaobo Hou and Gheorghe Comanici and Eser Ayg{\"{u}}n and Philippe Hamel and Daniel Toyama and Jonathan J. Hunt and Shibl Mourad and David Silver and Doina Precup}, editor = {Hanna M. Wallach and Hugo Larochelle and Alina Beygelzimer and Florence d'Alch{\'{e}}{-}Buc and Emily B. Fox and Roman Garnett}, title = {The Option Keyboard: Combining Skills in Reinforcement Learning}, booktitle = {Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS 2019, December 8-14, 2019, Vancouver, BC, Canada}, pages = {13031--13041}, year = {2019}, url = {https://proceedings.neurips.cc/paper/2019/hash/251c5ffd6b62cc21c446c963c76cf214-Abstract.html}, timestamp = {Mon, 10 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/BarretoBHCAHTHM19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1901-01761, author = {Th{\'{e}}ophane Weber and Nicolas Heess and Lars Buesing and David Silver}, title = {Credit Assignment Techniques in Stochastic Computation Graphs}, journal = {CoRR}, volume = {abs/1901.01761}, year = {2019}, url = {http://arxiv.org/abs/1901.01761}, eprinttype = {arXiv}, eprint = {1901.01761}, timestamp = {Thu, 31 Jan 2019 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1901-01761.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1901-03559, author = {Arthur Guez and Mehdi Mirza and Karol Gregor and Rishabh Kabra and S{\'{e}}bastien Racani{\`{e}}re and Th{\'{e}}ophane Weber and David Raposo and Adam Santoro and Laurent Orseau and Tom Eccles and Greg Wayne and David Silver and Timothy P. Lillicrap}, title = {An investigation of model-free planning}, journal = {CoRR}, volume = {abs/1901.03559}, year = {2019}, url = {http://arxiv.org/abs/1901.03559}, eprinttype = {arXiv}, eprint = {1901.03559}, timestamp = {Fri, 01 Feb 2019 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1901-03559.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1901-10964, author = {Andr{\'{e}} Barreto and Diana Borsa and John Quan and Tom Schaul and David Silver and Matteo Hessel and Daniel J. Mankowitz and Augustin Z{\'{\i}}dek and R{\'{e}}mi Munos}, title = {Transfer in Deep Reinforcement Learning Using Successor Features and Generalised Policy Improvement}, journal = {CoRR}, volume = {abs/1901.10964}, year = {2019}, url = {http://arxiv.org/abs/1901.10964}, eprinttype = {arXiv}, eprint = {1901.10964}, timestamp = {Mon, 10 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1901-10964.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1907-02908, author = {Matteo Hessel and Hado van Hasselt and Joseph Modayil and David Silver}, title = {On Inductive Biases in Deep Reinforcement Learning}, journal = {CoRR}, volume = {abs/1907.02908}, year = {2019}, url = {http://arxiv.org/abs/1907.02908}, eprinttype = {arXiv}, eprint = {1907.02908}, timestamp = {Mon, 08 Jul 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1907-02908.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1908-03568, author = {Ian Osband and Yotam Doron and Matteo Hessel and John Aslanides and Eren Sezener and Andre Saraiva and Katrina McKinney and Tor Lattimore and Csaba Szepesv{\'{a}}ri and Satinder Singh and Benjamin Van Roy and Richard S. Sutton and David Silver and Hado van Hasselt}, title = {Behaviour Suite for Reinforcement Learning}, journal = {CoRR}, volume = {abs/1908.03568}, year = {2019}, url = {http://arxiv.org/abs/1908.03568}, eprinttype = {arXiv}, eprint = {1908.03568}, timestamp = {Mon, 15 May 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1908-03568.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1909-04607, author = {Vivek Veeriah and Matteo Hessel and Zhongwen Xu and Richard L. Lewis and Janarthanan Rajendran and Junhyuk Oh and Hado van Hasselt and David Silver and Satinder Singh}, title = {Discovery of Useful Questions as Auxiliary Tasks}, journal = {CoRR}, volume = {abs/1909.04607}, year = {2019}, url = {http://arxiv.org/abs/1909.04607}, eprinttype = {arXiv}, eprint = {1909.04607}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1909-04607.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1911-08265, author = {Julian Schrittwieser and Ioannis Antonoglou and Thomas Hubert and Karen Simonyan and Laurent Sifre and Simon Schmitt and Arthur Guez and Edward Lockhart and Demis Hassabis and Thore Graepel and Timothy P. Lillicrap and David Silver}, title = {Mastering Atari, Go, Chess and Shogi by Planning with a Learned Model}, journal = {CoRR}, volume = {abs/1911.08265}, year = {2019}, url = {http://arxiv.org/abs/1911.08265}, eprinttype = {arXiv}, eprint = {1911.08265}, timestamp = {Mon, 02 Dec 2019 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1911-08265.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1912-05500, author = {Zeyu Zheng and Junhyuk Oh and Matteo Hessel and Zhongwen Xu and Manuel Kroiss and Hado van Hasselt and David Silver and Satinder Singh}, title = {What Can Learned Intrinsic Rewards Capture?}, journal = {CoRR}, volume = {abs/1912.05500}, year = {2019}, url = {http://arxiv.org/abs/1912.05500}, eprinttype = {arXiv}, eprint = {1912.05500}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1912-05500.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/nn/SunSTH18, author = {Ron Sun and David Silver and Gerald Tesauro and Guang{-}Bin Huang}, title = {Introduction to the special issue on deep reinforcement learning: An editorial}, journal = {Neural Networks}, volume = {107}, pages = {1--2}, year = {2018}, url = {https://doi.org/10.1016/j.neunet.2018.08.001}, doi = {10.1016/J.NEUNET.2018.08.001}, timestamp = {Wed, 07 Dec 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/nn/SunSTH18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/HesselMHSODHPAS18, author = {Matteo Hessel and Joseph Modayil and Hado van Hasselt and Tom Schaul and Georg Ostrovski and Will Dabney and Dan Horgan and Bilal Piot and Mohammad Gheshlaghi Azar and David Silver}, editor = {Sheila A. McIlraith and Kilian Q. Weinberger}, title = {Rainbow: Combining Improvements in Deep Reinforcement Learning}, booktitle = {Proceedings of the Thirty-Second {AAAI} Conference on Artificial Intelligence, (AAAI-18), the 30th innovative Applications of Artificial Intelligence (IAAI-18), and the 8th {AAAI} Symposium on Educational Advances in Artificial Intelligence (EAAI-18), New Orleans, Louisiana, USA, February 2-7, 2018}, pages = {3215--3222}, publisher = {{AAAI} Press}, year = {2018}, url = {https://doi.org/10.1609/aaai.v32i1.11796}, doi = {10.1609/AAAI.V32I1.11796}, timestamp = {Mon, 04 Sep 2023 12:29:24 +0200}, biburl = {https://dblp.org/rec/conf/aaai/HesselMHSODHPAS18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/HorganQBBHHS18, author = {Dan Horgan and John Quan and David Budden and Gabriel Barth{-}Maron and Matteo Hessel and Hado van Hasselt and David Silver}, title = {Distributed Prioritized Experience Replay}, booktitle = {6th International Conference on Learning Representations, {ICLR} 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings}, publisher = {OpenReview.net}, year = {2018}, url = {https://openreview.net/forum?id=H1Dy---0Z}, timestamp = {Thu, 25 Jul 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/HorganQBBHHS18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/BarretoBQSSHMZM18, author = {Andr{\'{e}} Barreto and Diana Borsa and John Quan and Tom Schaul and David Silver and Matteo Hessel and Daniel J. Mankowitz and Augustin Z{\'{\i}}dek and R{\'{e}}mi Munos}, editor = {Jennifer G. Dy and Andreas Krause}, title = {Transfer in Deep Reinforcement Learning Using Successor Features and Generalised Policy Improvement}, booktitle = {Proceedings of the 35th International Conference on Machine Learning, {ICML} 2018, Stockholmsm{\"{a}}ssan, Stockholm, Sweden, July 10-15, 2018}, series = {Proceedings of Machine Learning Research}, volume = {80}, pages = {510--519}, publisher = {{PMLR}}, year = {2018}, url = {http://proceedings.mlr.press/v80/barreto18a.html}, timestamp = {Mon, 10 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icml/BarretoBQSSHMZM18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/DabneyOSM18, author = {Will Dabney and Georg Ostrovski and David Silver and R{\'{e}}mi Munos}, editor = {Jennifer G. Dy and Andreas Krause}, title = {Implicit Quantile Networks for Distributional Reinforcement Learning}, booktitle = {Proceedings of the 35th International Conference on Machine Learning, {ICML} 2018, Stockholmsm{\"{a}}ssan, Stockholm, Sweden, July 10-15, 2018}, series = {Proceedings of Machine Learning Research}, volume = {80}, pages = {1104--1113}, publisher = {{PMLR}}, year = {2018}, url = {http://proceedings.mlr.press/v80/dabney18a.html}, timestamp = {Wed, 03 Apr 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/DabneyOSM18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/GuezWASVWMS18, author = {Arthur Guez and Theophane Weber and Ioannis Antonoglou and Karen Simonyan and Oriol Vinyals and Daan Wierstra and R{\'{e}}mi Munos and David Silver}, editor = {Jennifer G. Dy and Andreas Krause}, title = {Learning to Search with MCTSnets}, booktitle = {Proceedings of the 35th International Conference on Machine Learning, {ICML} 2018, Stockholmsm{\"{a}}ssan, Stockholm, Sweden, July 10-15, 2018}, series = {Proceedings of Machine Learning Research}, volume = {80}, pages = {1817--1826}, publisher = {{PMLR}}, year = {2018}, url = {http://proceedings.mlr.press/v80/guez18a.html}, timestamp = {Wed, 03 Apr 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/GuezWASVWMS18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/XuHS18, author = {Zhongwen Xu and Hado van Hasselt and David Silver}, editor = {Samy Bengio and Hanna M. Wallach and Hugo Larochelle and Kristen Grauman and Nicol{\`{o}} Cesa{-}Bianchi and Roman Garnett}, title = {Meta-Gradient Reinforcement Learning}, booktitle = {Advances in Neural Information Processing Systems 31: Annual Conference on Neural Information Processing Systems 2018, NeurIPS 2018, December 3-8, 2018, Montr{\'{e}}al, Canada}, pages = {2402--2413}, year = {2018}, url = {https://proceedings.neurips.cc/paper/2018/hash/2715518c875999308842e3455eda2fe3-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/XuHS18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1802-04697, author = {Arthur Guez and Th{\'{e}}ophane Weber and Ioannis Antonoglou and Karen Simonyan and Oriol Vinyals and Daan Wierstra and R{\'{e}}mi Munos and David Silver}, title = {Learning to Search with MCTSnets}, journal = {CoRR}, volume = {abs/1802.04697}, year = {2018}, url = {http://arxiv.org/abs/1802.04697}, eprinttype = {arXiv}, eprint = {1802.04697}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1802-04697.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1802-08294, author = {Daniel J. Mankowitz and Augustin Z{\'{\i}}dek and Andr{\'{e}} Barreto and Dan Horgan and Matteo Hessel and John Quan and Junhyuk Oh and Hado van Hasselt and David Silver and Tom Schaul}, title = {Unicorn: Continual Learning with a Universal, Off-policy Agent}, journal = {CoRR}, volume = {abs/1802.08294}, year = {2018}, url = {http://arxiv.org/abs/1802.08294}, eprinttype = {arXiv}, eprint = {1802.08294}, timestamp = {Mon, 10 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1802-08294.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1803-00933, author = {Dan Horgan and John Quan and David Budden and Gabriel Barth{-}Maron and Matteo Hessel and Hado van Hasselt and David Silver}, title = {Distributed Prioritized Experience Replay}, journal = {CoRR}, volume = {abs/1803.00933}, year = {2018}, url = {http://arxiv.org/abs/1803.00933}, eprinttype = {arXiv}, eprint = {1803.00933}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1803-00933.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1803-10760, author = {Greg Wayne and Chia{-}Chun Hung and David Amos and Mehdi Mirza and Arun Ahuja and Agnieszka Grabska{-}Barwinska and Jack W. Rae and Piotr Mirowski and Joel Z. Leibo and Adam Santoro and Mevlana Gemici and Malcolm Reynolds and Tim Harley and Josh Abramson and Shakir Mohamed and Danilo Jimenez Rezende and David Saxton and Adam Cain and Chloe Hillier and David Silver and Koray Kavukcuoglu and Matthew M. Botvinick and Demis Hassabis and Timothy P. Lillicrap}, title = {Unsupervised Predictive Memory in a Goal-Directed Agent}, journal = {CoRR}, volume = {abs/1803.10760}, year = {2018}, url = {http://arxiv.org/abs/1803.10760}, eprinttype = {arXiv}, eprint = {1803.10760}, timestamp = {Wed, 20 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1803-10760.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1805-09801, author = {Zhongwen Xu and Hado van Hasselt and David Silver}, title = {Meta-Gradient Reinforcement Learning}, journal = {CoRR}, volume = {abs/1805.09801}, year = {2018}, url = {http://arxiv.org/abs/1805.09801}, eprinttype = {arXiv}, eprint = {1805.09801}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1805-09801.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1806-06923, author = {Will Dabney and Georg Ostrovski and David Silver and R{\'{e}}mi Munos}, title = {Implicit Quantile Networks for Distributional Reinforcement Learning}, journal = {CoRR}, volume = {abs/1806.06923}, year = {2018}, url = {http://arxiv.org/abs/1806.06923}, eprinttype = {arXiv}, eprint = {1806.06923}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1806-06923.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1807-01281, author = {Max Jaderberg and Wojciech M. Czarnecki and Iain Dunning and Luke Marris and Guy Lever and Antonio Garc{\'{\i}}a Casta{\~{n}}eda and Charles Beattie and Neil C. Rabinowitz and Ari S. Morcos and Avraham Ruderman and Nicolas Sonnerat and Tim Green and Louise Deason and Joel Z. Leibo and David Silver and Demis Hassabis and Koray Kavukcuoglu and Thore Graepel}, title = {Human-level performance in first-person multiplayer games with population-based deep reinforcement learning}, journal = {CoRR}, volume = {abs/1807.01281}, year = {2018}, url = {http://arxiv.org/abs/1807.01281}, eprinttype = {arXiv}, eprint = {1807.01281}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1807-01281.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1812-06855, author = {Yutian Chen and Aja Huang and Ziyu Wang and Ioannis Antonoglou and Julian Schrittwieser and David Silver and Nando de Freitas}, title = {Bayesian Optimization in AlphaGo}, journal = {CoRR}, volume = {abs/1812.06855}, year = {2018}, url = {http://arxiv.org/abs/1812.06855}, eprinttype = {arXiv}, eprint = {1812.06855}, timestamp = {Tue, 01 Jan 2019 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1812-06855.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1812-07626, author = {Diana Borsa and Andr{\'{e}} Barreto and John Quan and Daniel J. Mankowitz and R{\'{e}}mi Munos and Hado van Hasselt and David Silver and Tom Schaul}, title = {Universal Successor Features Approximators}, journal = {CoRR}, volume = {abs/1812.07626}, year = {2018}, url = {http://arxiv.org/abs/1812.07626}, eprinttype = {arXiv}, eprint = {1812.07626}, timestamp = {Mon, 10 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1812-07626.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/cacm/Silver17, author = {David Silver}, title = {Technical perspective: Solving imperfect information games}, journal = {Commun. {ACM}}, volume = {60}, number = {11}, pages = {80}, year = {2017}, url = {https://doi.org/10.1145/3131286}, doi = {10.1145/3131286}, timestamp = {Tue, 06 Nov 2018 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/cacm/Silver17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/nature/SilverSSAHGHBLB17, author = {David Silver and Julian Schrittwieser and Karen Simonyan and Ioannis Antonoglou and Aja Huang and Arthur Guez and Thomas Hubert and Lucas Baker and Matthew Lai and Adrian Bolton and Yutian Chen and Timothy P. Lillicrap and Fan Hui and Laurent Sifre and George van den Driessche and Thore Graepel and Demis Hassabis}, title = {Mastering the game of Go without human knowledge}, journal = {Nat.}, volume = {550}, number = {7676}, pages = {354--359}, year = {2017}, url = {https://doi.org/10.1038/nature24270}, doi = {10.1038/NATURE24270}, timestamp = {Mon, 27 Sep 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/nature/SilverSSAHGHBLB17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/JaderbergMCSLSK17, author = {Max Jaderberg and Volodymyr Mnih and Wojciech Marian Czarnecki and Tom Schaul and Joel Z. Leibo and David Silver and Koray Kavukcuoglu}, title = {Reinforcement Learning with Unsupervised Auxiliary Tasks}, booktitle = {5th International Conference on Learning Representations, {ICLR} 2017, Toulon, France, April 24-26, 2017, Conference Track Proceedings}, publisher = {OpenReview.net}, year = {2017}, url = {https://openreview.net/forum?id=SJ6yPD5xg}, timestamp = {Thu, 14 Oct 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/JaderbergMCSLSK17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/JaderbergCOVGSK17, author = {Max Jaderberg and Wojciech Marian Czarnecki and Simon Osindero and Oriol Vinyals and Alex Graves and David Silver and Koray Kavukcuoglu}, editor = {Doina Precup and Yee Whye Teh}, title = {Decoupled Neural Interfaces using Synthetic Gradients}, booktitle = {Proceedings of the 34th International Conference on Machine Learning, {ICML} 2017, Sydney, NSW, Australia, 6-11 August 2017}, series = {Proceedings of Machine Learning Research}, volume = {70}, pages = {1627--1635}, publisher = {{PMLR}}, year = {2017}, url = {http://proceedings.mlr.press/v70/jaderberg17a.html}, timestamp = {Wed, 29 May 2019 08:41:45 +0200}, biburl = {https://dblp.org/rec/conf/icml/JaderbergCOVGSK17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/SilverHHSGHDRRB17, author = {David Silver and Hado van Hasselt and Matteo Hessel and Tom Schaul and Arthur Guez and Tim Harley and Gabriel Dulac{-}Arnold and David P. Reichert and Neil C. Rabinowitz and Andr{\'{e}} Barreto and Thomas Degris}, editor = {Doina Precup and Yee Whye Teh}, title = {The Predictron: End-To-End Learning and Planning}, booktitle = {Proceedings of the 34th International Conference on Machine Learning, {ICML} 2017, Sydney, NSW, Australia, 6-11 August 2017}, series = {Proceedings of Machine Learning Research}, volume = {70}, pages = {3191--3199}, publisher = {{PMLR}}, year = {2017}, url = {http://proceedings.mlr.press/v70/silver17a.html}, timestamp = {Mon, 10 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icml/SilverHHSGHDRRB17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/VezhnevetsOSHJS17, author = {Alexander Sasha Vezhnevets and Simon Osindero and Tom Schaul and Nicolas Heess and Max Jaderberg and David Silver and Koray Kavukcuoglu}, editor = {Doina Precup and Yee Whye Teh}, title = {FeUdal Networks for Hierarchical Reinforcement Learning}, booktitle = {Proceedings of the 34th International Conference on Machine Learning, {ICML} 2017, Sydney, NSW, Australia, 6-11 August 2017}, series = {Proceedings of Machine Learning Research}, volume = {70}, pages = {3540--3549}, publisher = {{PMLR}}, year = {2017}, url = {http://proceedings.mlr.press/v70/vezhnevets17a.html}, timestamp = {Wed, 03 Apr 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/VezhnevetsOSHJS17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/XuMHBSS17, author = {Zhongwen Xu and Joseph Modayil and Hado van Hasselt and Andr{\'{e}} Barreto and David Silver and Tom Schaul}, editor = {Isabelle Guyon and Ulrike von Luxburg and Samy Bengio and Hanna M. Wallach and Rob Fergus and S. V. N. Vishwanathan and Roman Garnett}, title = {Natural Value Approximators: Learning when to Trust Past Estimates}, booktitle = {Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017, December 4-9, 2017, Long Beach, CA, {USA}}, pages = {2120--2128}, year = {2017}, url = {https://proceedings.neurips.cc/paper/2017/hash/fb60d411a5c5b72b2e7d3527cfc84fd0-Abstract.html}, timestamp = {Mon, 10 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/XuMHBSS17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/BarretoDMHSSH17, author = {Andr{\'{e}} Barreto and Will Dabney and R{\'{e}}mi Munos and Jonathan J. Hunt and Tom Schaul and David Silver and Hado van Hasselt}, editor = {Isabelle Guyon and Ulrike von Luxburg and Samy Bengio and Hanna M. Wallach and Rob Fergus and S. V. N. Vishwanathan and Roman Garnett}, title = {Successor Features for Transfer in Reinforcement Learning}, booktitle = {Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017, December 4-9, 2017, Long Beach, CA, {USA}}, pages = {4055--4065}, year = {2017}, url = {https://proceedings.neurips.cc/paper/2017/hash/350db081a661525235354dd3e19b8c05-Abstract.html}, timestamp = {Mon, 10 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/BarretoDMHSSH17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/LanctotZGLTPSG17, author = {Marc Lanctot and Vin{\'{\i}}cius Flores Zambaldi and Audrunas Gruslys and Angeliki Lazaridou and Karl Tuyls and Julien P{\'{e}}rolat and David Silver and Thore Graepel}, editor = {Isabelle Guyon and Ulrike von Luxburg and Samy Bengio and Hanna M. Wallach and Rob Fergus and S. V. N. Vishwanathan and Roman Garnett}, title = {A Unified Game-Theoretic Approach to Multiagent Reinforcement Learning}, booktitle = {Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017, December 4-9, 2017, Long Beach, CA, {USA}}, pages = {4190--4203}, year = {2017}, url = {https://proceedings.neurips.cc/paper/2017/hash/3323fe11e9595c09af38fe67567a9394-Abstract.html}, timestamp = {Thu, 21 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/LanctotZGLTPSG17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/RacaniereWRBGRB17, author = {S{\'{e}}bastien Racani{\`{e}}re and Theophane Weber and David P. Reichert and Lars Buesing and Arthur Guez and Danilo Jimenez Rezende and Adri{\`{a}} Puigdom{\`{e}}nech Badia and Oriol Vinyals and Nicolas Heess and Yujia Li and Razvan Pascanu and Peter W. Battaglia and Demis Hassabis and David Silver and Daan Wierstra}, editor = {Isabelle Guyon and Ulrike von Luxburg and Samy Bengio and Hanna M. Wallach and Rob Fergus and S. V. N. Vishwanathan and Roman Garnett}, title = {Imagination-Augmented Agents for Deep Reinforcement Learning}, booktitle = {Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017, December 4-9, 2017, Long Beach, CA, {USA}}, pages = {5690--5701}, year = {2017}, url = {https://proceedings.neurips.cc/paper/2017/hash/9e82757e9a1c12cb710ad680db11f6f1-Abstract.html}, timestamp = {Sat, 02 Dec 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/RacaniereWRBGRB17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/VezhnevetsOSHJS17, author = {Alexander Sasha Vezhnevets and Simon Osindero and Tom Schaul and Nicolas Heess and Max Jaderberg and David Silver and Koray Kavukcuoglu}, title = {FeUdal Networks for Hierarchical Reinforcement Learning}, journal = {CoRR}, volume = {abs/1703.01161}, year = {2017}, url = {http://arxiv.org/abs/1703.01161}, eprinttype = {arXiv}, eprint = {1703.01161}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/VezhnevetsOSHJS17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/HeessTSLMWTEWER17, author = {Nicolas Heess and Dhruva TB and Srinivasan Sriram and Jay Lemmon and Josh Merel and Greg Wayne and Yuval Tassa and Tom Erez and Ziyu Wang and S. M. Ali Eslami and Martin A. Riedmiller and David Silver}, title = {Emergence of Locomotion Behaviours in Rich Environments}, journal = {CoRR}, volume = {abs/1707.02286}, year = {2017}, url = {http://arxiv.org/abs/1707.02286}, eprinttype = {arXiv}, eprint = {1707.02286}, timestamp = {Mon, 22 Jul 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/HeessTSLMWTEWER17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/WeberRRBGRBVHLP17, author = {Theophane Weber and S{\'{e}}bastien Racani{\`{e}}re and David P. Reichert and Lars Buesing and Arthur Guez and Danilo Jimenez Rezende and Adri{\`{a}} Puigdom{\`{e}}nech Badia and Oriol Vinyals and Nicolas Heess and Yujia Li and Razvan Pascanu and Peter W. Battaglia and David Silver and Daan Wierstra}, title = {Imagination-Augmented Agents for Deep Reinforcement Learning}, journal = {CoRR}, volume = {abs/1707.06203}, year = {2017}, url = {http://arxiv.org/abs/1707.06203}, eprinttype = {arXiv}, eprint = {1707.06203}, timestamp = {Sat, 02 Dec 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/WeberRRBGRBVHLP17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1708-04782, author = {Oriol Vinyals and Timo Ewalds and Sergey Bartunov and Petko Georgiev and Alexander Sasha Vezhnevets and Michelle Yeo and Alireza Makhzani and Heinrich K{\"{u}}ttler and John P. Agapiou and Julian Schrittwieser and John Quan and Stephen Gaffney and Stig Petersen and Karen Simonyan and Tom Schaul and Hado van Hasselt and David Silver and Timothy P. Lillicrap and Kevin Calderone and Paul Keet and Anthony Brunasso and David Lawrence and Anders Ekermo and Jacob Repp and Rodney Tsing}, title = {StarCraft {II:} {A} New Challenge for Reinforcement Learning}, journal = {CoRR}, volume = {abs/1708.04782}, year = {2017}, url = {http://arxiv.org/abs/1708.04782}, eprinttype = {arXiv}, eprint = {1708.04782}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1708-04782.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1710-02298, author = {Matteo Hessel and Joseph Modayil and Hado van Hasselt and Tom Schaul and Georg Ostrovski and Will Dabney and Daniel Horgan and Bilal Piot and Mohammad Gheshlaghi Azar and David Silver}, title = {Rainbow: Combining Improvements in Deep Reinforcement Learning}, journal = {CoRR}, volume = {abs/1710.02298}, year = {2017}, url = {http://arxiv.org/abs/1710.02298}, eprinttype = {arXiv}, eprint = {1710.02298}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1710-02298.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1711-00832, author = {Marc Lanctot and Vin{\'{\i}}cius Flores Zambaldi and Audrunas Gruslys and Angeliki Lazaridou and Karl Tuyls and Julien P{\'{e}}rolat and David Silver and Thore Graepel}, title = {A Unified Game-Theoretic Approach to Multiagent Reinforcement Learning}, journal = {CoRR}, volume = {abs/1711.00832}, year = {2017}, url = {http://arxiv.org/abs/1711.00832}, eprinttype = {arXiv}, eprint = {1711.00832}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1711-00832.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1712-01815, author = {David Silver and Thomas Hubert and Julian Schrittwieser and Ioannis Antonoglou and Matthew Lai and Arthur Guez and Marc Lanctot and Laurent Sifre and Dharshan Kumaran and Thore Graepel and Timothy P. Lillicrap and Karen Simonyan and Demis Hassabis}, title = {Mastering Chess and Shogi by Self-Play with a General Reinforcement Learning Algorithm}, journal = {CoRR}, volume = {abs/1712.01815}, year = {2017}, url = {http://arxiv.org/abs/1712.01815}, eprinttype = {arXiv}, eprint = {1712.01815}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1712-01815.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/nature/SilverHMGSDSAPL16, author = {David Silver and Aja Huang and Chris J. Maddison and Arthur Guez and Laurent Sifre and George van den Driessche and Julian Schrittwieser and Ioannis Antonoglou and Vedavyas Panneershelvam and Marc Lanctot and Sander Dieleman and Dominik Grewe and John Nham and Nal Kalchbrenner and Ilya Sutskever and Timothy P. Lillicrap and Madeleine Leach and Koray Kavukcuoglu and Thore Graepel and Demis Hassabis}, title = {Mastering the game of Go with deep neural networks and tree search}, journal = {Nat.}, volume = {529}, number = {7587}, pages = {484--489}, year = {2016}, url = {https://doi.org/10.1038/nature16961}, doi = {10.1038/NATURE16961}, timestamp = {Mon, 27 Sep 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/nature/SilverHMGSDSAPL16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/HasseltGS16, author = {Hado van Hasselt and Arthur Guez and David Silver}, editor = {Dale Schuurmans and Michael P. Wellman}, title = {Deep Reinforcement Learning with Double Q-Learning}, booktitle = {Proceedings of the Thirtieth {AAAI} Conference on Artificial Intelligence, February 12-17, 2016, Phoenix, Arizona, {USA}}, pages = {2094--2100}, publisher = {{AAAI} Press}, year = {2016}, url = {https://doi.org/10.1609/aaai.v30i1.10295}, doi = {10.1609/AAAI.V30I1.10295}, timestamp = {Mon, 04 Sep 2023 15:08:28 +0200}, biburl = {https://dblp.org/rec/conf/aaai/HasseltGS16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/MnihBMGLHSK16, author = {Volodymyr Mnih and Adri{\`{a}} Puigdom{\`{e}}nech Badia and Mehdi Mirza and Alex Graves and Timothy P. Lillicrap and Tim Harley and David Silver and Koray Kavukcuoglu}, editor = {Maria{-}Florina Balcan and Kilian Q. Weinberger}, title = {Asynchronous Methods for Deep Reinforcement Learning}, booktitle = {Proceedings of the 33nd International Conference on Machine Learning, {ICML} 2016, New York City, NY, USA, June 19-24, 2016}, series = {{JMLR} Workshop and Conference Proceedings}, volume = {48}, pages = {1928--1937}, publisher = {JMLR.org}, year = {2016}, url = {http://proceedings.mlr.press/v48/mniha16.html}, timestamp = {Wed, 29 May 2019 08:41:46 +0200}, biburl = {https://dblp.org/rec/conf/icml/MnihBMGLHSK16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/HasseltGHMS16, author = {Hado van Hasselt and Arthur Guez and Matteo Hessel and Volodymyr Mnih and David Silver}, editor = {Daniel D. Lee and Masashi Sugiyama and Ulrike von Luxburg and Isabelle Guyon and Roman Garnett}, title = {Learning values across many orders of magnitude}, booktitle = {Advances in Neural Information Processing Systems 29: Annual Conference on Neural Information Processing Systems 2016, December 5-10, 2016, Barcelona, Spain}, pages = {4287--4295}, year = {2016}, url = {https://proceedings.neurips.cc/paper/2016/hash/5227b6aaf294f5f027273aebf16015f2-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/HasseltGHMS16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:journals/corr/LillicrapHPHETS15, author = {Timothy P. Lillicrap and Jonathan J. Hunt and Alexander Pritzel and Nicolas Heess and Tom Erez and Yuval Tassa and David Silver and Daan Wierstra}, editor = {Yoshua Bengio and Yann LeCun}, title = {Continuous control with deep reinforcement learning}, booktitle = {4th International Conference on Learning Representations, {ICLR} 2016, San Juan, Puerto Rico, May 2-4, 2016, Conference Track Proceedings}, year = {2016}, url = {http://arxiv.org/abs/1509.02971}, timestamp = {Thu, 25 Jul 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/LillicrapHPHETS15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:journals/corr/SchaulQAS15, author = {Tom Schaul and John Quan and Ioannis Antonoglou and David Silver}, editor = {Yoshua Bengio and Yann LeCun}, title = {Prioritized Experience Replay}, booktitle = {4th International Conference on Learning Representations, {ICLR} 2016, San Juan, Puerto Rico, May 2-4, 2016, Conference Track Proceedings}, year = {2016}, url = {http://arxiv.org/abs/1511.05952}, timestamp = {Thu, 25 Jul 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/SchaulQAS15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/MnihBMGLHSK16, author = {Volodymyr Mnih and Adri{\`{a}} Puigdom{\`{e}}nech Badia and Mehdi Mirza and Alex Graves and Timothy P. Lillicrap and Tim Harley and David Silver and Koray Kavukcuoglu}, title = {Asynchronous Methods for Deep Reinforcement Learning}, journal = {CoRR}, volume = {abs/1602.01783}, year = {2016}, url = {http://arxiv.org/abs/1602.01783}, eprinttype = {arXiv}, eprint = {1602.01783}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/MnihBMGLHSK16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/HasseltGHS16, author = {Hado van Hasselt and Arthur Guez and Matteo Hessel and David Silver}, title = {Learning functions across many orders of magnitudes}, journal = {CoRR}, volume = {abs/1602.07714}, year = {2016}, url = {http://arxiv.org/abs/1602.07714}, eprinttype = {arXiv}, eprint = {1602.07714}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/HasseltGHS16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/HeinrichS16, author = {Johannes Heinrich and David Silver}, title = {Deep Reinforcement Learning from Self-Play in Imperfect-Information Games}, journal = {CoRR}, volume = {abs/1603.01121}, year = {2016}, url = {http://arxiv.org/abs/1603.01121}, eprinttype = {arXiv}, eprint = {1603.01121}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/HeinrichS16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/BarretoMSS16, author = {Andr{\'{e}} Barreto and R{\'{e}}mi Munos and Tom Schaul and David Silver}, title = {Successor Features for Transfer in Reinforcement Learning}, journal = {CoRR}, volume = {abs/1606.05312}, year = {2016}, url = {http://arxiv.org/abs/1606.05312}, eprinttype = {arXiv}, eprint = {1606.05312}, timestamp = {Mon, 10 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/BarretoMSS16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/HeessWTLRS16, author = {Nicolas Heess and Gregory Wayne and Yuval Tassa and Timothy P. Lillicrap and Martin A. Riedmiller and David Silver}, title = {Learning and Transfer of Modulated Locomotor Controllers}, journal = {CoRR}, volume = {abs/1610.05182}, year = {2016}, url = {http://arxiv.org/abs/1610.05182}, eprinttype = {arXiv}, eprint = {1610.05182}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/HeessWTLRS16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/JaderbergMCSLSK16, author = {Max Jaderberg and Volodymyr Mnih and Wojciech Marian Czarnecki and Tom Schaul and Joel Z. Leibo and David Silver and Koray Kavukcuoglu}, title = {Reinforcement Learning with Unsupervised Auxiliary Tasks}, journal = {CoRR}, volume = {abs/1611.05397}, year = {2016}, url = {http://arxiv.org/abs/1611.05397}, eprinttype = {arXiv}, eprint = {1611.05397}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/JaderbergMCSLSK16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/SilverHHSGHDRRB16, author = {David Silver and Hado van Hasselt and Matteo Hessel and Tom Schaul and Arthur Guez and Tim Harley and Gabriel Dulac{-}Arnold and David P. Reichert and Neil C. Rabinowitz and Andr{\'{e}} Barreto and Thomas Degris}, title = {The Predictron: End-To-End Learning and Planning}, journal = {CoRR}, volume = {abs/1612.08810}, year = {2016}, url = {http://arxiv.org/abs/1612.08810}, eprinttype = {arXiv}, eprint = {1612.08810}, timestamp = {Mon, 10 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/SilverHHSGHDRRB16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/nature/MnihKSRVBGRFOPB15, author = {Volodymyr Mnih and Koray Kavukcuoglu and David Silver and Andrei A. Rusu and Joel Veness and Marc G. Bellemare and Alex Graves and Martin A. Riedmiller and Andreas Fidjeland and Georg Ostrovski and Stig Petersen and Charles Beattie and Amir Sadik and Ioannis Antonoglou and Helen King and Dharshan Kumaran and Daan Wierstra and Shane Legg and Demis Hassabis}, title = {Human-level control through deep reinforcement learning}, journal = {Nat.}, volume = {518}, number = {7540}, pages = {529--533}, year = {2015}, url = {https://doi.org/10.1038/nature14236}, doi = {10.1038/NATURE14236}, timestamp = {Mon, 08 Jun 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/nature/MnihKSRVBGRFOPB15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/cscw/VinesWSWO15, author = {John Vines and Peter C. Wright and David Silver and Maggie Winchcombe and Patrick Olivier}, editor = {Dan Cosley and Andrea Forte and Luigina Ciolfi and David McDonald}, title = {Authenticity, Relatability and Collaborative Approaches to Sharing Knowledge about Assistive Living Technology}, booktitle = {Proceedings of the 18th {ACM} Conference on Computer Supported Cooperative Work {\&} Social Computing, {CSCW} 2015, Vancouver, BC, Canada, March 14 - 18, 2015}, pages = {82--94}, publisher = {{ACM}}, year = {2015}, url = {https://doi.org/10.1145/2675133.2675222}, doi = {10.1145/2675133.2675222}, timestamp = {Tue, 06 Nov 2018 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/cscw/VinesWSWO15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/HeinrichLS15, author = {Johannes Heinrich and Marc Lanctot and David Silver}, editor = {Francis R. Bach and David M. Blei}, title = {Fictitious Self-Play in Extensive-Form Games}, booktitle = {Proceedings of the 32nd International Conference on Machine Learning, {ICML} 2015, Lille, France, 6-11 July 2015}, series = {{JMLR} Workshop and Conference Proceedings}, volume = {37}, pages = {805--813}, publisher = {JMLR.org}, year = {2015}, url = {http://proceedings.mlr.press/v37/heinrich15.html}, timestamp = {Wed, 29 May 2019 08:41:45 +0200}, biburl = {https://dblp.org/rec/conf/icml/HeinrichLS15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/SchaulHGS15, author = {Tom Schaul and Daniel Horgan and Karol Gregor and David Silver}, editor = {Francis R. Bach and David M. Blei}, title = {Universal Value Function Approximators}, booktitle = {Proceedings of the 32nd International Conference on Machine Learning, {ICML} 2015, Lille, France, 6-11 July 2015}, series = {{JMLR} Workshop and Conference Proceedings}, volume = {37}, pages = {1312--1320}, publisher = {JMLR.org}, year = {2015}, url = {http://proceedings.mlr.press/v37/schaul15.html}, timestamp = {Wed, 29 May 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/SchaulHGS15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/HeinrichS15, author = {Johannes Heinrich and David Silver}, editor = {Qiang Yang and Michael J. Wooldridge}, title = {Smooth {UCT} Search in Computer Poker}, booktitle = {Proceedings of the Twenty-Fourth International Joint Conference on Artificial Intelligence, {IJCAI} 2015, Buenos Aires, Argentina, July 25-31, 2015}, pages = {554--560}, publisher = {{AAAI} Press}, year = {2015}, url = {http://ijcai.org/Abstract/15/084}, timestamp = {Tue, 20 Aug 2019 16:16:43 +0200}, biburl = {https://dblp.org/rec/conf/ijcai/HeinrichS15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iros/BradleyCSPHRS15, author = {David M. Bradley and Jonathan K. Chang and David Silver and Matthew Powers and Herman Herman and Peter Rander and Anthony Stentz}, title = {Scene understanding for a high-mobility walking robot}, booktitle = {2015 {IEEE/RSJ} International Conference on Intelligent Robots and Systems, {IROS} 2015, Hamburg, Germany, September 28 - October 2, 2015}, pages = {1144--1151}, publisher = {{IEEE}}, year = {2015}, url = {https://doi.org/10.1109/IROS.2015.7353514}, doi = {10.1109/IROS.2015.7353514}, timestamp = {Wed, 16 Oct 2019 14:14:51 +0200}, biburl = {https://dblp.org/rec/conf/iros/BradleyCSPHRS15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/HeessWSLET15, author = {Nicolas Heess and Gregory Wayne and David Silver and Timothy P. Lillicrap and Tom Erez and Yuval Tassa}, editor = {Corinna Cortes and Neil D. Lawrence and Daniel D. Lee and Masashi Sugiyama and Roman Garnett}, title = {Learning Continuous Control Policies by Stochastic Value Gradients}, booktitle = {Advances in Neural Information Processing Systems 28: Annual Conference on Neural Information Processing Systems 2015, December 7-12, 2015, Montreal, Quebec, Canada}, pages = {2944--2952}, year = {2015}, url = {https://proceedings.neurips.cc/paper/2015/hash/148510031349642de5ca0c544f31b2ef-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/HeessWSLET15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:journals/corr/MaddisonHSS14, author = {Chris J. Maddison and Aja Huang and Ilya Sutskever and David Silver}, editor = {Yoshua Bengio and Yann LeCun}, title = {Move Evaluation in Go Using Deep Convolutional Neural Networks}, booktitle = {3rd International Conference on Learning Representations, {ICLR} 2015, San Diego, CA, USA, May 7-9, 2015, Conference Track Proceedings}, year = {2015}, url = {http://arxiv.org/abs/1412.6564}, timestamp = {Thu, 25 Jul 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/MaddisonHSS14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/CiosekS15, author = {Kamil Ciosek and David Silver}, title = {Value Iteration with Options and State Aggregation}, journal = {CoRR}, volume = {abs/1501.03959}, year = {2015}, url = {http://arxiv.org/abs/1501.03959}, eprinttype = {arXiv}, eprint = {1501.03959}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/CiosekS15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/NairSBAFMPSBPLM15, author = {Arun Nair and Praveen Srinivasan and Sam Blackwell and Cagdas Alcicek and Rory Fearon and Alessandro De Maria and Vedavyas Panneershelvam and Mustafa Suleyman and Charles Beattie and Stig Petersen and Shane Legg and Volodymyr Mnih and Koray Kavukcuoglu and David Silver}, title = {Massively Parallel Methods for Deep Reinforcement Learning}, journal = {CoRR}, volume = {abs/1507.04296}, year = {2015}, url = {http://arxiv.org/abs/1507.04296}, eprinttype = {arXiv}, eprint = {1507.04296}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/NairSBAFMPSBPLM15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/HasseltGS15, author = {Hado van Hasselt and Arthur Guez and David Silver}, title = {Deep Reinforcement Learning with Double Q-learning}, journal = {CoRR}, volume = {abs/1509.06461}, year = {2015}, url = {http://arxiv.org/abs/1509.06461}, eprinttype = {arXiv}, eprint = {1509.06461}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/HasseltGS15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/HeessWSLTE15, author = {Nicolas Heess and Greg Wayne and David Silver and Timothy P. Lillicrap and Yuval Tassa and Tom Erez}, title = {Learning Continuous Control Policies by Stochastic Value Gradients}, journal = {CoRR}, volume = {abs/1510.09142}, year = {2015}, url = {http://arxiv.org/abs/1510.09142}, eprinttype = {arXiv}, eprint = {1510.09142}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/HeessWSLTE15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/HeessHLS15, author = {Nicolas Heess and Jonathan J. Hunt and Timothy P. Lillicrap and David Silver}, title = {Memory-based control with recurrent neural networks}, journal = {CoRR}, volume = {abs/1512.04455}, year = {2015}, url = {http://arxiv.org/abs/1512.04455}, eprinttype = {arXiv}, eprint = {1512.04455}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/HeessHLS15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/SilverLHDWR14, author = {David Silver and Guy Lever and Nicolas Heess and Thomas Degris and Daan Wierstra and Martin A. Riedmiller}, title = {Deterministic Policy Gradient Algorithms}, booktitle = {Proceedings of the 31th International Conference on Machine Learning, {ICML} 2014, Beijing, China, 21-26 June 2014}, series = {{JMLR} Workshop and Conference Proceedings}, volume = {32}, pages = {387--395}, publisher = {JMLR.org}, year = {2014}, url = {http://proceedings.mlr.press/v32/silver14.html}, timestamp = {Wed, 29 May 2019 08:41:45 +0200}, biburl = {https://dblp.org/rec/conf/icml/SilverLHDWR14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/GuezHSD14, author = {Arthur Guez and Nicolas Heess and David Silver and Peter Dayan}, editor = {Zoubin Ghahramani and Max Welling and Corinna Cortes and Neil D. Lawrence and Kilian Q. Weinberger}, title = {Bayes-Adaptive Simulation-based Search with Value Function Approximation}, booktitle = {Advances in Neural Information Processing Systems 27: Annual Conference on Neural Information Processing Systems 2014, December 8-13 2014, Montreal, Quebec, Canada}, pages = {451--459}, year = {2014}, url = {https://proceedings.neurips.cc/paper/2014/hash/839ab46820b524afda05122893c2fe8e-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/GuezHSD14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/uss/SilverJBCJ14, author = {David Silver and Suman Jana and Dan Boneh and Eric Yawei Chen and Collin Jackson}, editor = {Kevin Fu and Jaeyeon Jung}, title = {Password Managers: Attacks and Defenses}, booktitle = {Proceedings of the 23rd {USENIX} Security Symposium, San Diego, CA, USA, August 20-22, 2014}, pages = {449--464}, publisher = {{USENIX} Association}, year = {2014}, url = {https://www.usenix.org/conference/usenixsecurity14/technical-sessions/presentation/silver}, timestamp = {Mon, 01 Feb 2021 08:43:04 +0100}, biburl = {https://dblp.org/rec/conf/uss/SilverJBCJ14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:journals/corr/SchaulAS13, author = {Tom Schaul and Ioannis Antonoglou and David Silver}, editor = {Yoshua Bengio and Yann LeCun}, title = {Unit Tests for Stochastic Optimization}, booktitle = {2nd International Conference on Learning Representations, {ICLR} 2014, Banff, AB, Canada, April 14-16, 2014, Conference Track Proceedings}, year = {2014}, url = {http://arxiv.org/abs/1312.6055}, timestamp = {Thu, 04 Apr 2019 13:20:07 +0200}, biburl = {https://dblp.org/rec/journals/corr/SchaulAS13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/BranavanSB14, author = {S. R. K. Branavan and David Silver and Regina Barzilay}, title = {Learning to Win by Reading Manuals in a Monte-Carlo Framework}, journal = {CoRR}, volume = {abs/1401.5390}, year = {2014}, url = {http://arxiv.org/abs/1401.5390}, eprinttype = {arXiv}, eprint = {1401.5390}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/BranavanSB14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/GuezSD14, author = {Arthur Guez and David Silver and Peter Dayan}, title = {Better Optimism By Bayes: Adaptive Planning with Rich Models}, journal = {CoRR}, volume = {abs/1402.1958}, year = {2014}, url = {http://arxiv.org/abs/1402.1958}, eprinttype = {arXiv}, eprint = {1402.1958}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/GuezSD14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jair/GuezSD13, author = {Arthur Guez and David Silver and Peter Dayan}, title = {Scalable and Efficient Bayes-Adaptive Reinforcement Learning Based on Monte-Carlo Tree Search}, journal = {J. Artif. Intell. Res.}, volume = {48}, pages = {841--883}, year = {2013}, url = {https://doi.org/10.1613/jair.4117}, doi = {10.1613/JAIR.4117}, timestamp = {Mon, 21 Jan 2019 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/jair/GuezSD13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aips/SilverSM13, author = {David Silver and Richard S. Sutton and Martin M{\"{u}}ller}, editor = {Daniel Borrajo and Subbarao Kambhampati and Angelo Oddi and Simone Fratini}, title = {Temporal-Difference Search in Computer Go}, booktitle = {Proceedings of the Twenty-Third International Conference on Automated Planning and Scheduling, {ICAPS} 2013, Rome, Italy, June 10-14, 2013}, publisher = {{AAAI}}, year = {2013}, url = {http://www.aaai.org/ocs/index.php/ICAPS/ICAPS13/paper/view/6037}, timestamp = {Wed, 29 Mar 2017 16:45:27 +0200}, biburl = {https://dblp.org/rec/conf/aips/SilverSM13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/SilverNBWM13, author = {David Silver and Leonard Newnham and David Barker and Suzanne Weller and Jason McFall}, title = {Concurrent Reinforcement Learning from Customer Interactions}, booktitle = {Proceedings of the 30th International Conference on Machine Learning, {ICML} 2013, Atlanta, GA, USA, 16-21 June 2013}, series = {{JMLR} Workshop and Conference Proceedings}, volume = {28}, pages = {924--932}, publisher = {JMLR.org}, year = {2013}, url = {http://proceedings.mlr.press/v28/silver13.html}, timestamp = {Wed, 29 May 2019 08:41:45 +0200}, biburl = {https://dblp.org/rec/conf/icml/SilverNBWM13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/MnihKSGAWR13, author = {Volodymyr Mnih and Koray Kavukcuoglu and David Silver and Alex Graves and Ioannis Antonoglou and Daan Wierstra and Martin A. Riedmiller}, title = {Playing Atari with Deep Reinforcement Learning}, journal = {CoRR}, volume = {abs/1312.5602}, year = {2013}, url = {http://arxiv.org/abs/1312.5602}, eprinttype = {arXiv}, eprint = {1312.5602}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/MnihKSGAWR13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/cacm/GellyKSSSST12, author = {Sylvain Gelly and Levente Kocsis and Marc Schoenauer and Mich{\`{e}}le Sebag and David Silver and Csaba Szepesv{\'{a}}ri and Olivier Teytaud}, title = {The grand challenge of computer Go: Monte Carlo tree search and extensions}, journal = {Commun. {ACM}}, volume = {55}, number = {3}, pages = {106--113}, year = {2012}, url = {https://doi.org/10.1145/2093548.2093574}, doi = {10.1145/2093548.2093574}, timestamp = {Mon, 26 Oct 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/cacm/GellyKSSSST12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/isu/Silver12, author = {David Silver}, title = {Digital natives on a media fast}, journal = {Inf. Serv. Use}, volume = {32}, number = {3-4}, pages = {137--139}, year = {2012}, url = {https://doi.org/10.3233/ISU-2012-0681}, doi = {10.3233/ISU-2012-0681}, timestamp = {Mon, 11 May 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/isu/Silver12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jair/BranavanSB12, author = {S. R. K. Branavan and David Silver and Regina Barzilay}, title = {Learning to Win by Reading Manuals in a Monte-Carlo Framework}, journal = {J. Artif. Intell. Res.}, volume = {43}, pages = {661--704}, year = {2012}, url = {https://doi.org/10.1613/jair.3484}, doi = {10.1613/JAIR.3484}, timestamp = {Mon, 21 Jan 2019 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/jair/BranavanSB12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ml/SilverSM12, author = {David Silver and Richard S. Sutton and Martin M{\"{u}}ller}, title = {Temporal-difference search in computer Go}, journal = {Mach. Learn.}, volume = {87}, number = {2}, pages = {183--219}, year = {2012}, url = {https://doi.org/10.1007/s10994-012-5280-0}, doi = {10.1007/S10994-012-5280-0}, timestamp = {Mon, 02 Mar 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/ml/SilverSM12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ewrl/HeessST12, author = {Nicolas Heess and David Silver and Yee Whye Teh}, editor = {Marc Peter Deisenroth and Csaba Szepesv{\'{a}}ri and Jan Peters}, title = {Actor-Critic Reinforcement Learning with Energy-Based Policies}, booktitle = {Proceedings of the Tenth European Workshop on Reinforcement Learning, {EWRL} 2012, Edinburgh, Scotland, UK, June, 2012}, series = {{JMLR} Proceedings}, volume = {24}, pages = {43--58}, publisher = {JMLR.org}, year = {2012}, url = {http://proceedings.mlr.press/v24/heess12a.html}, timestamp = {Wed, 29 May 2019 08:41:44 +0200}, biburl = {https://dblp.org/rec/conf/ewrl/HeessST12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ewrl/Silver12, author = {David Silver}, editor = {Marc Peter Deisenroth and Csaba Szepesv{\'{a}}ri and Jan Peters}, title = {Gradient Temporal Difference Networks}, booktitle = {Proceedings of the Tenth European Workshop on Reinforcement Learning, {EWRL} 2012, Edinburgh, Scotland, UK, June, 2012}, series = {{JMLR} Proceedings}, volume = {24}, pages = {117--130}, publisher = {JMLR.org}, year = {2012}, url = {http://proceedings.mlr.press/v24/silver12a.html}, timestamp = {Wed, 29 May 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ewrl/Silver12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/SilverC12, author = {David Silver and Kamil Ciosek}, title = {Compositional Planning Using Optimal Option Models}, booktitle = {Proceedings of the 29th International Conference on Machine Learning, {ICML} 2012, Edinburgh, Scotland, UK, June 26 - July 1, 2012}, publisher = {icml.cc / Omnipress}, year = {2012}, url = {http://icml.cc/2012/papers/564.pdf}, timestamp = {Wed, 03 Apr 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/SilverC12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icra/SilverBS12, author = {David Silver and J. Andrew Bagnell and Anthony Stentz}, title = {Active learning from demonstration for robust autonomous navigation}, booktitle = {{IEEE} International Conference on Robotics and Automation, {ICRA} 2012, 14-18 May, 2012, St. Paul, Minnesota, {USA}}, pages = {200--207}, publisher = {{IEEE}}, year = {2012}, url = {https://doi.org/10.1109/ICRA.2012.6224757}, doi = {10.1109/ICRA.2012.6224757}, timestamp = {Wed, 16 Oct 2019 14:14:51 +0200}, biburl = {https://dblp.org/rec/conf/icra/SilverBS12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iser/SilverBS12, author = {David Silver and J. Andrew Bagnell and Anthony Stentz}, editor = {Jaydev P. Desai and Gregory Dudek and Oussama Khatib and Vijay Kumar}, title = {Learning Autonomous Driving Styles and Maneuvers from Expert Demonstration}, booktitle = {Experimental Robotics - The 13th International Symposium on Experimental Robotics, {ISER} 2012, June 18-21, 2012, Qu{\'{e}}bec City, Canada}, series = {Springer Tracts in Advanced Robotics}, volume = {88}, pages = {371--386}, publisher = {Springer}, year = {2012}, url = {https://doi.org/10.1007/978-3-319-00065-7\_26}, doi = {10.1007/978-3-319-00065-7\_26}, timestamp = {Fri, 26 May 2017 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iser/SilverBS12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/GuezSD12, author = {Arthur Guez and David Silver and Peter Dayan}, editor = {Peter L. Bartlett and Fernando C. N. Pereira and Christopher J. C. Burges and L{\'{e}}on Bottou and Kilian Q. Weinberger}, title = {Efficient Bayes-Adaptive Reinforcement Learning using Sample-Based Search}, booktitle = {Advances in Neural Information Processing Systems 25: 26th Annual Conference on Neural Information Processing Systems 2012. Proceedings of a meeting held December 3-6, 2012, Lake Tahoe, Nevada, United States}, pages = {1034--1042}, year = {2012}, url = {https://proceedings.neurips.cc/paper/2012/hash/35051070e572e47d2c26c241ab88307f-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/GuezSD12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1205-3109, author = {Arthur Guez and David Silver and Peter Dayan}, title = {Efficient Bayes-Adaptive Reinforcement Learning using Sample-Based Search}, journal = {CoRR}, volume = {abs/1205.3109}, year = {2012}, url = {http://arxiv.org/abs/1205.3109}, eprinttype = {arXiv}, eprint = {1205.3109}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1205-3109.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ai/GellyS11, author = {Sylvain Gelly and David Silver}, title = {Monte-Carlo tree search and rapid action value estimation in computer Go}, journal = {Artif. Intell.}, volume = {175}, number = {11}, pages = {1856--1875}, year = {2011}, url = {https://doi.org/10.1016/j.artint.2011.03.007}, doi = {10.1016/J.ARTINT.2011.03.007}, timestamp = {Sat, 27 May 2017 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ai/GellyS11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jair/VenessNHUS11, author = {Joel Veness and Kee Siong Ng and Marcus Hutter and William T. B. Uther and David Silver}, title = {A Monte-Carlo {AIXI} Approximation}, journal = {J. Artif. Intell. Res.}, volume = {40}, pages = {95--142}, year = {2011}, url = {https://doi.org/10.1613/jair.3125}, doi = {10.1613/JAIR.3125}, timestamp = {Mon, 21 Jan 2019 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/jair/VenessNHUS11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/acl/BranavanSB11, author = {S. R. K. Branavan and David Silver and Regina Barzilay}, editor = {Dekang Lin and Yuji Matsumoto and Rada Mihalcea}, title = {Learning to Win by Reading Manuals in a Monte-Carlo Framework}, booktitle = {The 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies, Proceedings of the Conference, 19-24 June, 2011, Portland, Oregon, {USA}}, pages = {268--277}, publisher = {The Association for Computer Linguistics}, year = {2011}, url = {https://aclanthology.org/P11-1028/}, timestamp = {Fri, 06 Aug 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/acl/BranavanSB11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/BranavanSB11, author = {S. R. K. Branavan and David Silver and Regina Barzilay}, editor = {Toby Walsh}, title = {Non-Linear Monte-Carlo Search in Civilization {II}}, booktitle = {{IJCAI} 2011, Proceedings of the 22nd International Joint Conference on Artificial Intelligence, Barcelona, Catalonia, Spain, July 16-22, 2011}, pages = {2404--2410}, publisher = {{IJCAI/AAAI}}, year = {2011}, url = {https://doi.org/10.5591/978-1-57735-516-8/IJCAI11-401}, doi = {10.5591/978-1-57735-516-8/IJCAI11-401}, timestamp = {Tue, 20 Aug 2019 16:16:04 +0200}, biburl = {https://dblp.org/rec/conf/ijcai/BranavanSB11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iros/SilverS11, author = {David Silver and Anthony Stentz}, title = {Monte Carlo Localization and registration to prior data for outdoor navigation}, booktitle = {2011 {IEEE/RSJ} International Conference on Intelligent Robots and Systems, {IROS} 2011, San Francisco, CA, USA, September 25-30, 2011}, pages = {510--517}, publisher = {{IEEE}}, year = {2011}, url = {https://doi.org/10.1109/IROS.2011.6094532}, doi = {10.1109/IROS.2011.6094532}, timestamp = {Wed, 16 Oct 2019 14:14:51 +0200}, biburl = {https://dblp.org/rec/conf/iros/SilverS11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@phdthesis{DBLP:phd/us/Silver18, author = {David Silver}, title = {Learning Preference Models for Autonomous Mobile Robots in Complex Domains}, school = {Carnegie Mellon University, {USA}}, year = {2010}, url = {https://doi.org/10.1184/r1/6720380.v1}, doi = {10.1184/R1/6720380.V1}, timestamp = {Wed, 27 Jul 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/phd/us/Silver18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ijrr/SilverBS10, author = {David Silver and J. Andrew Bagnell and Anthony Stentz}, title = {Learning from Demonstration for Autonomous Navigation in Complex Unstructured Terrain}, journal = {Int. J. Robotics Res.}, volume = {29}, number = {12}, pages = {1565--1592}, year = {2010}, url = {https://doi.org/10.1177/0278364910369715}, doi = {10.1177/0278364910369715}, timestamp = {Thu, 17 Sep 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ijrr/SilverBS10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ram/BagnellBSSS10, author = {J. Andrew Bagnell and David M. Bradley and David Silver and Boris Sofman and Anthony Stentz}, title = {Learning for Autonomous Navigation}, journal = {{IEEE} Robotics Autom. Mag.}, volume = {17}, number = {2}, pages = {74--84}, year = {2010}, url = {https://doi.org/10.1109/MRA.2010.936946}, doi = {10.1109/MRA.2010.936946}, timestamp = {Mon, 18 May 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ram/BagnellBSSS10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/VenessNHS10, author = {Joel Veness and Kee Siong Ng and Marcus Hutter and David Silver}, editor = {Maria Fox and David Poole}, title = {Reinforcement Learning via {AIXI} Approximation}, booktitle = {Proceedings of the Twenty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI} 2010, Atlanta, Georgia, USA, July 11-15, 2010}, pages = {605--611}, publisher = {{AAAI} Press}, year = {2010}, url = {https://doi.org/10.1609/aaai.v24i1.7667}, doi = {10.1609/AAAI.V24I1.7667}, timestamp = {Mon, 04 Sep 2023 16:23:45 +0200}, biburl = {https://dblp.org/rec/conf/aaai/VenessNHS10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/SilverV10, author = {David Silver and Joel Veness}, editor = {John D. Lafferty and Christopher K. I. Williams and John Shawe{-}Taylor and Richard S. Zemel and Aron Culotta}, title = {Monte-Carlo Planning in Large POMDPs}, booktitle = {Advances in Neural Information Processing Systems 23: 24th Annual Conference on Neural Information Processing Systems 2010. Proceedings of a meeting held 6-9 December 2010, Vancouver, British Columbia, Canada}, pages = {2164--2172}, publisher = {Curran Associates, Inc.}, year = {2010}, url = {https://proceedings.neurips.cc/paper/2010/hash/edfbe1afcf9246bb0d40eb4d8027d90f-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/SilverV10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1007-2049, author = {Joel Veness and Kee Siong Ng and Marcus Hutter and David Silver}, title = {Reinforcement Learning via {AIXI} Approximation}, journal = {CoRR}, volume = {abs/1007.2049}, year = {2010}, url = {http://arxiv.org/abs/1007.2049}, eprinttype = {arXiv}, eprint = {1007.2049}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1007-2049.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/arobots/RatliffSB09, author = {Nathan D. Ratliff and David Silver and J. Andrew Bagnell}, title = {Learning to search: Functional gradient techniques for imitation learning}, journal = {Auton. Robots}, volume = {27}, number = {1}, pages = {25--53}, year = {2009}, url = {https://doi.org/10.1007/s10514-009-9121-3}, doi = {10.1007/S10514-009-9121-3}, timestamp = {Thu, 18 May 2017 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/arobots/RatliffSB09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/fsr/SilverBS09, author = {David Silver and J. Andrew Bagnell and Anthony Stentz}, editor = {Andrew Howard and Karl Iagnemma and Alonzo Kelly}, title = {Applied Imitation Learning for Autonomous Navigation in Complex Natural Terrain}, booktitle = {Field and Service Robotics, Results of the 7th International Conference, {FSR} 2009, Cambridge, Massachusetts, USA, 14-16 July 2009}, series = {Springer Tracts in Advanced Robotics}, volume = {62}, pages = {249--259}, publisher = {Springer}, year = {2009}, url = {https://doi.org/10.1007/978-3-642-13408-1\_23}, doi = {10.1007/978-3-642-13408-1\_23}, timestamp = {Mon, 22 May 2017 17:10:59 +0200}, biburl = {https://dblp.org/rec/conf/fsr/SilverBS09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/SilverT09, author = {David Silver and Gerald Tesauro}, editor = {Andrea Pohoreckyj Danyluk and L{\'{e}}on Bottou and Michael L. Littman}, title = {Monte-Carlo simulation balancing}, booktitle = {Proceedings of the 26th Annual International Conference on Machine Learning, {ICML} 2009, Montreal, Quebec, Canada, June 14-18, 2009}, series = {{ACM} International Conference Proceeding Series}, volume = {382}, pages = {945--952}, publisher = {{ACM}}, year = {2009}, url = {https://doi.org/10.1145/1553374.1553495}, doi = {10.1145/1553374.1553495}, timestamp = {Tue, 06 Nov 2018 16:58:29 +0100}, biburl = {https://dblp.org/rec/conf/icml/SilverT09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/SuttonMPBSSW09, author = {Richard S. Sutton and Hamid Reza Maei and Doina Precup and Shalabh Bhatnagar and David Silver and Csaba Szepesv{\'{a}}ri and Eric Wiewiora}, editor = {Andrea Pohoreckyj Danyluk and L{\'{e}}on Bottou and Michael L. Littman}, title = {Fast gradient-descent methods for temporal-difference learning with linear function approximation}, booktitle = {Proceedings of the 26th Annual International Conference on Machine Learning, {ICML} 2009, Montreal, Quebec, Canada, June 14-18, 2009}, series = {{ACM} International Conference Proceeding Series}, volume = {382}, pages = {993--1000}, publisher = {{ACM}}, year = {2009}, url = {https://doi.org/10.1145/1553374.1553501}, doi = {10.1145/1553374.1553501}, timestamp = {Sat, 09 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/SuttonMPBSSW09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/isrr/SilverBS09, author = {David Silver and J. Andrew Bagnell and Anthony Stentz}, editor = {C{\'{e}}dric Pradalier and Roland Siegwart and Gerhard Hirzinger}, title = {Perceptual Interpretation for Autonomous Navigation through Dynamic Imitation Learning}, booktitle = {Robotics Research - The 14th International Symposium, {ISRR} 2009, August 31 - September 3, 2009, Lucerne, Switzerland}, series = {Springer Tracts in Advanced Robotics}, volume = {70}, pages = {433--449}, publisher = {Springer}, year = {2009}, url = {https://doi.org/10.1007/978-3-642-19457-3\_26}, doi = {10.1007/978-3-642-19457-3\_26}, timestamp = {Sun, 02 Jun 2019 21:26:25 +0200}, biburl = {https://dblp.org/rec/conf/isrr/SilverBS09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/MaeiSBPSS09, author = {Hamid Reza Maei and Csaba Szepesv{\'{a}}ri and Shalabh Bhatnagar and Doina Precup and David Silver and Richard S. Sutton}, editor = {Yoshua Bengio and Dale Schuurmans and John D. Lafferty and Christopher K. I. Williams and Aron Culotta}, title = {Convergent Temporal-Difference Learning with Arbitrary Smooth Function Approximation}, booktitle = {Advances in Neural Information Processing Systems 22: 23rd Annual Conference on Neural Information Processing Systems 2009. Proceedings of a meeting held 7-10 December 2009, Vancouver, British Columbia, Canada}, pages = {1204--1212}, publisher = {Curran Associates, Inc.}, year = {2009}, url = {https://proceedings.neurips.cc/paper/2009/hash/3a15c7d0bbe60300a39f76f8a5ba6896-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/MaeiSBPSS09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/VenessSUB09, author = {Joel Veness and David Silver and William T. B. Uther and Alan Blair}, editor = {Yoshua Bengio and Dale Schuurmans and John D. Lafferty and Christopher K. I. Williams and Aron Culotta}, title = {Bootstrapping from Game Tree Search}, booktitle = {Advances in Neural Information Processing Systems 22: 23rd Annual Conference on Neural Information Processing Systems 2009. Proceedings of a meeting held 7-10 December 2009, Vancouver, British Columbia, Canada}, pages = {1937--1945}, publisher = {Curran Associates, Inc.}, year = {2009}, url = {https://proceedings.neurips.cc/paper/2009/hash/389bc7bb1e1c2a5e7e147703232a88f6-Abstract.html}, timestamp = {Thu, 21 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/VenessSUB09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-0909-0801, author = {Joel Veness and Kee Siong Ng and Marcus Hutter and David Silver}, title = {A Monte Carlo {AIXI} Approximation}, journal = {CoRR}, volume = {abs/0909.0801}, year = {2009}, url = {http://arxiv.org/abs/0909.0801}, eprinttype = {arXiv}, eprint = {0909.0801}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-0909-0801.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/firstmonday/Silver08, author = {David Silver}, title = {History, Hype, and Hope: An Afterward}, journal = {First Monday}, volume = {13}, number = {3}, year = {2008}, url = {http://www.uic.edu/htbin/cgiwrap/bin/ojs/index.php/fm/article/view/2143/1950}, timestamp = {Thu, 26 Jun 2008 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/firstmonday/Silver08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/GellyS08, author = {Sylvain Gelly and David Silver}, editor = {Dieter Fox and Carla P. Gomes}, title = {Achieving Master Level Play in 9 x 9 Computer Go}, booktitle = {Proceedings of the Twenty-Third {AAAI} Conference on Artificial Intelligence, {AAAI} 2008, Chicago, Illinois, USA, July 13-17, 2008}, pages = {1537--1540}, publisher = {{AAAI} Press}, year = {2008}, url = {http://www.aaai.org/Library/AAAI/2008/aaai08-257.php}, timestamp = {Tue, 05 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/GellyS08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/SilverSM08, author = {David Silver and Richard S. Sutton and Martin M{\"{u}}ller}, editor = {William W. Cohen and Andrew McCallum and Sam T. Roweis}, title = {Sample-based learning and search with permanent and transient memories}, booktitle = {Machine Learning, Proceedings of the Twenty-Fifth International Conference {(ICML} 2008), Helsinki, Finland, June 5-9, 2008}, series = {{ACM} International Conference Proceeding Series}, volume = {307}, pages = {968--975}, publisher = {{ACM}}, year = {2008}, url = {https://doi.org/10.1145/1390156.1390278}, doi = {10.1145/1390156.1390278}, timestamp = {Tue, 06 Nov 2018 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icml/SilverSM08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/rss/SilverBS08, author = {David Silver and James A. Bagnell and Anthony Stentz}, editor = {Oliver Brock and Jeff Trinkle and Fabio Ramos}, title = {High Performance Outdoor Navigation from Overhead Data using Imitation Learning}, booktitle = {Robotics: Science and Systems IV, Eidgen{\"{o}}ssische Technische Hochschule Z{\"{u}}rich, Zurich, Switzerland, June 25-28, 2008}, publisher = {The {MIT} Press}, year = {2008}, url = {http://www.roboticsproceedings.org/rss04/p34.html}, doi = {10.15607/RSS.2008.IV.034}, timestamp = {Tue, 18 Oct 2022 08:35:38 +0200}, biburl = {https://dblp.org/rec/conf/rss/SilverBS08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/GellyS07, author = {Sylvain Gelly and David Silver}, editor = {Zoubin Ghahramani}, title = {Combining online and offline knowledge in {UCT}}, booktitle = {Machine Learning, Proceedings of the Twenty-Fourth International Conference {(ICML} 2007), Corvallis, Oregon, USA, June 20-24, 2007}, series = {{ACM} International Conference Proceeding Series}, volume = {227}, pages = {273--280}, publisher = {{ACM}}, year = {2007}, url = {https://doi.org/10.1145/1273496.1273531}, doi = {10.1145/1273496.1273531}, timestamp = {Tue, 06 Nov 2018 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icml/GellyS07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/SuttonKS07, author = {Richard S. Sutton and Anna Koop and David Silver}, editor = {Zoubin Ghahramani}, title = {On the role of tracking in stationary environments}, booktitle = {Machine Learning, Proceedings of the Twenty-Fourth International Conference {(ICML} 2007), Corvallis, Oregon, USA, June 20-24, 2007}, series = {{ACM} International Conference Proceeding Series}, volume = {227}, pages = {871--878}, publisher = {{ACM}}, year = {2007}, url = {https://doi.org/10.1145/1273496.1273606}, doi = {10.1145/1273496.1273606}, timestamp = {Tue, 06 Nov 2018 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icml/SuttonKS07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/SilverSM07, author = {David Silver and Richard S. Sutton and Martin M{\"{u}}ller}, editor = {Manuela M. Veloso}, title = {Reinforcement Learning of Local Shape in the Game of Go}, booktitle = {{IJCAI} 2007, Proceedings of the 20th International Joint Conference on Artificial Intelligence, Hyderabad, India, January 6-12, 2007}, pages = {1053--1058}, year = {2007}, url = {http://ijcai.org/Proceedings/07/Papers/170.pdf}, timestamp = {Tue, 20 Aug 2019 16:17:11 +0200}, biburl = {https://dblp.org/rec/conf/ijcai/SilverSM07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jfr/MorrisFOBSBTWW06, author = {Aaron Morris and Dave Ferguson and Zachary Omohundro and David M. Bradley and David Silver and Christopher R. Baker and Scott Thayer and Chuck Whittaker and William Whittaker}, title = {Recent developments in subterranean robotics}, journal = {J. Field Robotics}, volume = {23}, number = {1}, pages = {35--57}, year = {2006}, url = {https://doi.org/10.1002/rob.20106}, doi = {10.1002/ROB.20106}, timestamp = {Mon, 23 Oct 2017 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jfr/MorrisFOBSBTWW06.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jfr/SilverFMT06, author = {David Silver and Dave Ferguson and Aaron Morris and Scott Thayer}, title = {Topological exploration of subterranean environments}, journal = {J. Field Robotics}, volume = {23}, number = {6-7}, pages = {395--415}, year = {2006}, url = {https://doi.org/10.1002/rob.20130}, doi = {10.1002/ROB.20130}, timestamp = {Mon, 23 Oct 2017 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jfr/SilverFMT06.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iros/SilverSVBS06, author = {David Silver and Boris Sofman and Nicolas Vandapel and J. Andrew Bagnell and Anthony Stentz}, title = {Experimental Analysis of Overhead Data Processing To Support Long Range Navigation}, booktitle = {2006 {IEEE/RSJ} International Conference on Intelligent Robots and Systems, {IROS} 2006, October 9-15, 2006, Beijing, China}, pages = {2443--2450}, publisher = {{IEEE}}, year = {2006}, url = {https://doi.org/10.1109/IROS.2006.281686}, doi = {10.1109/IROS.2006.281686}, timestamp = {Wed, 16 Oct 2019 14:14:51 +0200}, biburl = {https://dblp.org/rec/conf/iros/SilverSVBS06.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/trob/LisienMSKRC05, author = {Brad Lisien and Deryck Morales and David Silver and George Kantor and Ioannis M. Rekleitis and Howie Choset}, title = {The hierarchical atlas}, journal = {{IEEE} Trans. Robotics}, volume = {21}, number = {3}, pages = {473--481}, year = {2005}, url = {https://doi.org/10.1109/TRO.2004.837237}, doi = {10.1109/TRO.2004.837237}, timestamp = {Mon, 15 Jun 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/trob/LisienMSKRC05.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aiide/Silver05, author = {David Silver}, editor = {R. Michael Young and John E. Laird}, title = {Cooperative Pathfinding}, booktitle = {Proceedings of the First Artificial Intelligence and Interactive Digital Entertainment Conference, June 1-5, 2005, Marina del Rey, California, {USA}}, pages = {117--122}, publisher = {{AAAI} Press}, year = {2005}, timestamp = {Mon, 26 Feb 2007 09:13:55 +0100}, biburl = {https://dblp.org/rec/conf/aiide/Silver05.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/fsr/SilverCT05, author = {David Silver and Joseph Carsten and Scott Thayer}, editor = {Peter I. Corke and Salah Sukkarieh}, title = {Topological Global Localization for Subterranean Voids}, booktitle = {Field and Service Robotics, Results of the 5th International Conference, {FSR} 2005, July 29-31, 2005, Port Douglas, QLD, Australia}, series = {Springer Tracts in Advanced Robotics}, volume = {25}, pages = {117--128}, publisher = {Springer}, year = {2005}, url = {https://doi.org/10.1007/978-3-540-33453-8\_11}, doi = {10.1007/978-3-540-33453-8\_11}, timestamp = {Mon, 22 May 2017 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/fsr/SilverCT05.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icra/MorrisSFT05, author = {Aaron Morris and David Silver and David I. Ferguson and Scott Thayer}, title = {Towards Topological Exploration of Abandoned Mines}, booktitle = {Proceedings of the 2005 {IEEE} International Conference on Robotics and Automation, {ICRA} 2005, April 18-22, 2005, Barcelona, Spain}, pages = {2117--2123}, publisher = {{IEEE}}, year = {2005}, url = {https://doi.org/10.1109/ROBOT.2005.1570426}, doi = {10.1109/ROBOT.2005.1570426}, timestamp = {Mon, 22 May 2017 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icra/MorrisSFT05.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/nms/Silver04, author = {David Silver}, title = {Internet/Cyberculture/ Digital Culture/New Media/ Fill-in-the-Blank Studies}, journal = {New Media Soc.}, volume = {6}, number = {1}, pages = {55--64}, year = {2004}, url = {https://doi.org/10.1177/1461444804039915}, doi = {10.1177/1461444804039915}, timestamp = {Thu, 17 Sep 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/nms/Silver04.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icra/SilverMRLC04, author = {David Silver and Deryck Morales and Ioannis M. Rekleitis and Brad Lisien and Howie Choset}, title = {Arc Carving: Obtaining Accurate, Low Latency Maps from Ultrasonic Range Sensors}, booktitle = {Proceedings of the 2004 {IEEE} International Conference on Robotics and Automation, {ICRA} 2004, April 26 - May 1, 2004, New Orleans, LA, {USA}}, pages = {1554--1561}, publisher = {{IEEE}}, year = {2004}, url = {https://doi.org/10.1109/ROBOT.2004.1308045}, doi = {10.1109/ROBOT.2004.1308045}, timestamp = {Mon, 15 Jun 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icra/SilverMRLC04.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iros/SilverFMT04, author = {David Silver and Dave Ferguson and Aaron Morris and Scott Thayer}, title = {Feature extraction for topological mine maps}, booktitle = {2004 {IEEE/RSJ} International Conference on Intelligent Robots and Systems, Sendai, Japan, September 28 - October 2, 2004}, pages = {773--779}, publisher = {{IEEE}}, year = {2004}, url = {https://doi.org/10.1109/IROS.2004.1389446}, doi = {10.1109/IROS.2004.1389446}, timestamp = {Wed, 16 Oct 2019 14:14:51 +0200}, biburl = {https://dblp.org/rec/conf/iros/SilverFMT04.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ram/SilverBT04, author = {David Silver and David M. Bradley and Scott Thayer}, title = {Scan matching for flooded subterranean voids}, booktitle = {2004 {IEEE} Conference on Robotics, Automation and Mechatronics, {RAM} 2004, December 1-3, 2004, Singapore}, pages = {422--427}, publisher = {{IEEE}}, year = {2004}, url = {https://doi.org/10.1109/RAMECH.2004.1438957}, doi = {10.1109/RAMECH.2004.1438957}, timestamp = {Thu, 12 Aug 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ram/SilverBT04.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ram/BradleyST04, author = {David M. Bradley and David Silver and Scott Thayer}, title = {A regional point descriptor for global topological localization in flooded subterranean environments}, booktitle = {2004 {IEEE} Conference on Robotics, Automation and Mechatronics, {RAM} 2004, December 1-3, 2004, Singapore}, pages = {440--445}, publisher = {{IEEE}}, year = {2004}, url = {https://doi.org/10.1109/RAMECH.2004.1438960}, doi = {10.1109/RAMECH.2004.1438960}, timestamp = {Thu, 12 Aug 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ram/BradleyST04.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iros/LisienMSKRC03, author = {Brad Lisien and Deryck Morales and David Silver and George Kantor and Ioannis M. Rekleitis and Howie Choset}, title = {Hierarchical simultaneous localization and mapping}, booktitle = {2003 {IEEE/RSJ} International Conference on Intelligent Robots and Systems, Las Vegas, Nevada, USA, October 27 - November 1, 2003}, pages = {448--453}, publisher = {{IEEE}}, year = {2003}, url = {https://doi.org/10.1109/IROS.2003.1250670}, doi = {10.1109/IROS.2003.1250670}, timestamp = {Mon, 15 Jun 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iros/LisienMSKRC03.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/nms/Silver00, author = {David Silver}, title = {Book Review: Life Online: Researching Real Experience in Virtual Space}, journal = {New Media Soc.}, volume = {2}, number = {2}, pages = {251--255}, year = {2000}, url = {https://doi.org/10.1177/1461444800002002008}, doi = {10.1177/1461444800002002008}, timestamp = {Thu, 17 Sep 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/nms/Silver00.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
manage site settings
To protect your privacy, all features that rely on external API calls from your browser are turned off by default. You need to opt-in for them to become active. All settings here will be stored as cookies with your web browser. For more information see our F.A.Q.