default search action
BibTeX records: Arthur Guez
@article{DBLP:journals/corr/abs-2406-02035, author = {Khimya Khetarpal and Zhaohan Daniel Guo and Bernardo {\'{A}}vila Pires and Yunhao Tang and Clare Lyle and Mark Rowland and Nicolas Heess and Diana Borsa and Arthur Guez and Will Dabney}, title = {A Unifying Framework for Action-Conditional Self-Predictive Reinforcement Learning}, journal = {CoRR}, volume = {abs/2406.02035}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2406.02035}, doi = {10.48550/ARXIV.2406.02035}, eprinttype = {arXiv}, eprint = {2406.02035}, timestamp = {Thu, 04 Jul 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2406-02035.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2306-10587, author = {Veronica Chelu and Tom Zahavy and Arthur Guez and Doina Precup and Sebastian Flennerhag}, title = {Optimism and Adaptivity in Policy Optimization}, journal = {CoRR}, volume = {abs/2306.10587}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2306.10587}, doi = {10.48550/ARXIV.2306.10587}, eprinttype = {arXiv}, eprint = {2306.10587}, timestamp = {Thu, 22 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2306-10587.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/0004PMHPKG22, author = {Jongmin Lee and Cosmin Paduraru and Daniel J. Mankowitz and Nicolas Heess and Doina Precup and Kee{-}Eung Kim and Arthur Guez}, title = {COptiDICE: Offline Constrained Reinforcement Learning via Stationary Distribution Correction Estimation}, booktitle = {The Tenth International Conference on Learning Representations, {ICLR} 2022, Virtual Event, April 25-29, 2022}, publisher = {OpenReview.net}, year = {2022}, url = {https://openreview.net/forum?id=FLA55mBee6Q}, timestamp = {Sat, 20 Aug 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/0004PMHPKG22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/DanihelkaGSS22, author = {Ivo Danihelka and Arthur Guez and Julian Schrittwieser and David Silver}, title = {Policy improvement by planning with Gumbel}, booktitle = {The Tenth International Conference on Learning Representations, {ICLR} 2022, Virtual Event, April 25-29, 2022}, publisher = {OpenReview.net}, year = {2022}, url = {https://openreview.net/forum?id=bERaNdoegnO}, timestamp = {Sat, 20 Aug 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/DanihelkaGSS22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/GoyalFBWKBGMHKV22, author = {Anirudh Goyal and Abram L. Friesen and Andrea Banino and Theophane Weber and Nan Rosemary Ke and Adri{\`{a}} Puigdom{\`{e}}nech Badia and Arthur Guez and Mehdi Mirza and Peter Conway Humphreys and Ksenia Konyushkova and Michal Valko and Simon Osindero and Timothy P. Lillicrap and Nicolas Heess and Charles Blundell}, editor = {Kamalika Chaudhuri and Stefanie Jegelka and Le Song and Csaba Szepesv{\'{a}}ri and Gang Niu and Sivan Sabato}, title = {Retrieval-Augmented Reinforcement Learning}, booktitle = {International Conference on Machine Learning, {ICML} 2022, 17-23 July 2022, Baltimore, Maryland, {USA}}, series = {Proceedings of Machine Learning Research}, volume = {162}, pages = {7740--7765}, publisher = {{PMLR}}, year = {2022}, url = {https://proceedings.mlr.press/v162/goyal22a.html}, timestamp = {Sun, 12 May 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/GoyalFBWKBGMHKV22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/HumphreysGTSWL22, author = {Peter Conway Humphreys and Arthur Guez and Olivier Tieleman and Laurent Sifre and Theophane Weber and Timothy P. Lillicrap}, editor = {Sanmi Koyejo and S. Mohamed and A. Agarwal and Danielle Belgrave and K. Cho and A. Oh}, title = {Large-Scale Retrieval for Reinforcement Learning}, booktitle = {Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans, LA, USA, November 28 - December 9, 2022}, year = {2022}, url = {http://papers.nips.cc/paper\_files/paper/2022/hash/7eca17ef54789b0663cab421f2e9dbf5-Abstract-Conference.html}, timestamp = {Sun, 12 May 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/HumphreysGTSWL22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2202-08417, author = {Anirudh Goyal and Abram L. Friesen and Andrea Banino and Theophane Weber and Nan Rosemary Ke and Adri{\`{a}} Puigdom{\`{e}}nech Badia and Arthur Guez and Mehdi Mirza and Ksenia Konyushkova and Michal Valko and Simon Osindero and Timothy P. Lillicrap and Nicolas Heess and Charles Blundell}, title = {Retrieval-Augmented Reinforcement Learning}, journal = {CoRR}, volume = {abs/2202.08417}, year = {2022}, url = {https://arxiv.org/abs/2202.08417}, eprinttype = {arXiv}, eprint = {2202.08417}, timestamp = {Tue, 01 Mar 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2202-08417.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2204-08957, author = {Jongmin Lee and Cosmin Paduraru and Daniel J. Mankowitz and Nicolas Heess and Doina Precup and Kee{-}Eung Kim and Arthur Guez}, title = {COptiDICE: Offline Constrained Reinforcement Learning via Stationary Distribution Correction Estimation}, journal = {CoRR}, volume = {abs/2204.08957}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2204.08957}, doi = {10.48550/ARXIV.2204.08957}, eprinttype = {arXiv}, eprint = {2204.08957}, timestamp = {Mon, 25 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2204-08957.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2206-05314, author = {Peter Conway Humphreys and Arthur Guez and Olivier Tieleman and Laurent Sifre and Th{\'{e}}ophane Weber and Timothy P. Lillicrap}, title = {Large-Scale Retrieval for Reinforcement Learning}, journal = {CoRR}, volume = {abs/2206.05314}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2206.05314}, doi = {10.48550/ARXIV.2206.05314}, eprinttype = {arXiv}, eprint = {2206.05314}, timestamp = {Sun, 12 May 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2206-05314.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/HamrickFBGVWABV21, author = {Jessica B. Hamrick and Abram L. Friesen and Feryal M. P. Behbahani and Arthur Guez and Fabio Viola and Sims Witherspoon and Thomas Anthony and Lars Holger Buesing and Petar Velickovic and Theophane Weber}, title = {On the role of planning in model-based deep reinforcement learning}, booktitle = {9th International Conference on Learning Representations, {ICLR} 2021, Virtual Event, Austria, May 3-7, 2021}, publisher = {OpenReview.net}, year = {2021}, url = {https://openreview.net/forum?id=IrM64DGB21}, timestamp = {Mon, 02 Jan 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/iclr/HamrickFBGVWABV21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/HesselDVGSSWSH21, author = {Matteo Hessel and Ivo Danihelka and Fabio Viola and Arthur Guez and Simon Schmitt and Laurent Sifre and Theophane Weber and David Silver and Hado van Hasselt}, editor = {Marina Meila and Tong Zhang}, title = {Muesli: Combining Improvements in Policy Optimization}, booktitle = {Proceedings of the 38th International Conference on Machine Learning, {ICML} 2021, 18-24 July 2021, Virtual Event}, series = {Proceedings of Machine Learning Research}, volume = {139}, pages = {4214--4226}, publisher = {{PMLR}}, year = {2021}, url = {http://proceedings.mlr.press/v139/hessel21a.html}, timestamp = {Wed, 25 Aug 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/HesselDVGSSWSH21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/MesnardWVTSHDSH21, author = {Thomas Mesnard and Theophane Weber and Fabio Viola and Shantanu Thakoor and Alaa Saade and Anna Harutyunyan and Will Dabney and Thomas S. Stepleton and Nicolas Heess and Arthur Guez and Eric Moulines and Marcus Hutter and Lars Buesing and R{\'{e}}mi Munos}, editor = {Marina Meila and Tong Zhang}, title = {Counterfactual Credit Assignment in Model-Free Reinforcement Learning}, booktitle = {Proceedings of the 38th International Conference on Machine Learning, {ICML} 2021, 18-24 July 2021, Virtual Event}, series = {Proceedings of Machine Learning Research}, volume = {139}, pages = {7654--7664}, publisher = {{PMLR}}, year = {2021}, url = {http://proceedings.mlr.press/v139/mesnard21a.html}, timestamp = {Wed, 25 Aug 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/MesnardWVTSHDSH21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2104-06159, author = {Matteo Hessel and Ivo Danihelka and Fabio Viola and Arthur Guez and Simon Schmitt and Laurent Sifre and Theophane Weber and David Silver and Hado van Hasselt}, title = {Muesli: Combining Improvements in Policy Optimization}, journal = {CoRR}, volume = {abs/2104.06159}, year = {2021}, url = {https://arxiv.org/abs/2104.06159}, eprinttype = {arXiv}, eprint = {2104.06159}, timestamp = {Mon, 19 Apr 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2104-06159.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/nature/SchrittwieserAH20, author = {Julian Schrittwieser and Ioannis Antonoglou and Thomas Hubert and Karen Simonyan and Laurent Sifre and Simon Schmitt and Arthur Guez and Edward Lockhart and Demis Hassabis and Thore Graepel and Timothy P. Lillicrap and David Silver}, title = {Mastering Atari, Go, chess and shogi by planning with a learned model}, journal = {Nat.}, volume = {588}, number = {7839}, pages = {604--609}, year = {2020}, url = {https://doi.org/10.1038/s41586-020-03051-4}, doi = {10.1038/S41586-020-03051-4}, timestamp = {Fri, 02 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/nature/SchrittwieserAH20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/GuezVWBKPSH20, author = {Arthur Guez and Fabio Viola and Theophane Weber and Lars Buesing and Steven Kapturowski and Doina Precup and David Silver and Nicolas Heess}, editor = {Hugo Larochelle and Marc'Aurelio Ranzato and Raia Hadsell and Maria{-}Florina Balcan and Hsuan{-}Tien Lin}, title = {Value-driven Hindsight Modelling}, booktitle = {Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6-12, 2020, virtual}, year = {2020}, url = {https://proceedings.neurips.cc/paper/2020/hash/9381fc93ad66f9ec4b2eef71147a6665-Abstract.html}, timestamp = {Tue, 19 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/GuezVWBKPSH20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2002-08329, author = {Arthur Guez and Fabio Viola and Th{\'{e}}ophane Weber and Lars Buesing and Steven Kapturowski and Doina Precup and David Silver and Nicolas Heess}, title = {Value-driven Hindsight Modelling}, journal = {CoRR}, volume = {abs/2002.08329}, year = {2020}, url = {https://arxiv.org/abs/2002.08329}, eprinttype = {arXiv}, eprint = {2002.08329}, timestamp = {Mon, 02 Mar 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2002-08329.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2009-05524, author = {Mehdi Mirza and Andrew Jaegle and Jonathan J. Hunt and Arthur Guez and Saran Tunyasuvunakool and Alistair Muldal and Th{\'{e}}ophane Weber and P{\'{e}}ter Karkus and S{\'{e}}bastien Racani{\`{e}}re and Lars Buesing and Timothy P. Lillicrap and Nicolas Heess}, title = {Physically Embedded Planning Problems: New Challenges for Reinforcement Learning}, journal = {CoRR}, volume = {abs/2009.05524}, year = {2020}, url = {https://arxiv.org/abs/2009.05524}, eprinttype = {arXiv}, eprint = {2009.05524}, timestamp = {Thu, 17 Sep 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2009-05524.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2010-01298, author = {P{\'{e}}ter Karkus and Mehdi Mirza and Arthur Guez and Andrew Jaegle and Timothy P. Lillicrap and Lars Buesing and Nicolas Heess and Theophane Weber}, title = {Beyond Tabula-Rasa: a Modular Reinforcement Learning Approach for Physically Embedded 3D Sokoban}, journal = {CoRR}, volume = {abs/2010.01298}, year = {2020}, url = {https://arxiv.org/abs/2010.01298}, eprinttype = {arXiv}, eprint = {2010.01298}, timestamp = {Mon, 12 Oct 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2010-01298.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2011-04021, author = {Jessica B. Hamrick and Abram L. Friesen and Feryal M. P. Behbahani and Arthur Guez and Fabio Viola and Sims Witherspoon and Thomas Anthony and Lars Buesing and Petar Velickovic and Th{\'{e}}ophane Weber}, title = {On the role of planning in model-based deep reinforcement learning}, journal = {CoRR}, volume = {abs/2011.04021}, year = {2020}, url = {https://arxiv.org/abs/2011.04021}, eprinttype = {arXiv}, eprint = {2011.04021}, timestamp = {Mon, 02 Jan 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2011-04021.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2011-09464, author = {Thomas Mesnard and Th{\'{e}}ophane Weber and Fabio Viola and Shantanu Thakoor and Alaa Saade and Anna Harutyunyan and Will Dabney and Tom Stepleton and Nicolas Heess and Arthur Guez and Marcus Hutter and Lars Buesing and R{\'{e}}mi Munos}, title = {Counterfactual Credit Assignment in Model-Free Reinforcement Learning}, journal = {CoRR}, volume = {abs/2011.09464}, year = {2020}, url = {https://arxiv.org/abs/2011.09464}, eprinttype = {arXiv}, eprint = {2011.09464}, timestamp = {Wed, 25 Nov 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2011-09464.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/BuesingWZHRGL19, author = {Lars Buesing and Theophane Weber and Yori Zwols and Nicolas Heess and S{\'{e}}bastien Racani{\`{e}}re and Arthur Guez and Jean{-}Baptiste Lespiau}, title = {Woulda, Coulda, Shoulda: Counterfactually-Guided Policy Search}, booktitle = {7th International Conference on Learning Representations, {ICLR} 2019, New Orleans, LA, USA, May 6-9, 2019}, publisher = {OpenReview.net}, year = {2019}, url = {https://openreview.net/forum?id=BJG0voC9YQ}, timestamp = {Thu, 25 Jul 2019 13:03:15 +0200}, biburl = {https://dblp.org/rec/conf/iclr/BuesingWZHRGL19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/GuezMGKRWRSOEWS19, author = {Arthur Guez and Mehdi Mirza and Karol Gregor and Rishabh Kabra and S{\'{e}}bastien Racani{\`{e}}re and Theophane Weber and David Raposo and Adam Santoro and Laurent Orseau and Tom Eccles and Greg Wayne and David Silver and Timothy P. Lillicrap}, editor = {Kamalika Chaudhuri and Ruslan Salakhutdinov}, title = {An Investigation of Model-Free Planning}, booktitle = {Proceedings of the 36th International Conference on Machine Learning, {ICML} 2019, 9-15 June 2019, Long Beach, California, {USA}}, series = {Proceedings of Machine Learning Research}, volume = {97}, pages = {2464--2473}, publisher = {{PMLR}}, year = {2019}, url = {http://proceedings.mlr.press/v97/guez19a.html}, timestamp = {Tue, 11 Jun 2019 15:37:38 +0200}, biburl = {https://dblp.org/rec/conf/icml/GuezMGKRWRSOEWS19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1901-03559, author = {Arthur Guez and Mehdi Mirza and Karol Gregor and Rishabh Kabra and S{\'{e}}bastien Racani{\`{e}}re and Th{\'{e}}ophane Weber and David Raposo and Adam Santoro and Laurent Orseau and Tom Eccles and Greg Wayne and David Silver and Timothy P. Lillicrap}, title = {An investigation of model-free planning}, journal = {CoRR}, volume = {abs/1901.03559}, year = {2019}, url = {http://arxiv.org/abs/1901.03559}, eprinttype = {arXiv}, eprint = {1901.03559}, timestamp = {Fri, 01 Feb 2019 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1901-03559.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1910-00528, author = {Shruti Mishra and Abbas Abdolmaleki and Arthur Guez and Piotr Trochim and Doina Precup}, title = {Augmenting learning using symmetry in a biologically-inspired domain}, journal = {CoRR}, volume = {abs/1910.00528}, year = {2019}, url = {http://arxiv.org/abs/1910.00528}, eprinttype = {arXiv}, eprint = {1910.00528}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1910-00528.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1911-08265, author = {Julian Schrittwieser and Ioannis Antonoglou and Thomas Hubert and Karen Simonyan and Laurent Sifre and Simon Schmitt and Arthur Guez and Edward Lockhart and Demis Hassabis and Thore Graepel and Timothy P. Lillicrap and David Silver}, title = {Mastering Atari, Go, Chess and Shogi by Planning with a Learned Model}, journal = {CoRR}, volume = {abs/1911.08265}, year = {2019}, url = {http://arxiv.org/abs/1911.08265}, eprinttype = {arXiv}, eprint = {1911.08265}, timestamp = {Mon, 02 Dec 2019 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1911-08265.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/cogsci/KruscheSGS18, author = {Moritz Krusche and Eric Schulz and Arthur Guez and Maarten Speekenbrink}, editor = {Chuck Kalish and Martina A. Rau and Xiaojin (Jerry) Zhu and Timothy T. Rogers}, title = {Adaptive planning in human search}, booktitle = {Proceedings of the 40th Annual Meeting of the Cognitive Science Society, CogSci 2018, Madison, WI, USA, July 25-28, 2018}, publisher = {cognitivesciencesociety.org}, year = {2018}, url = {https://mindmodeling.org/cogsci2018/papers/0379/index.html}, timestamp = {Wed, 17 Apr 2024 12:43:20 +0200}, biburl = {https://dblp.org/rec/conf/cogsci/KruscheSGS18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/GuezWASVWMS18, author = {Arthur Guez and Theophane Weber and Ioannis Antonoglou and Karen Simonyan and Oriol Vinyals and Daan Wierstra and R{\'{e}}mi Munos and David Silver}, editor = {Jennifer G. Dy and Andreas Krause}, title = {Learning to Search with MCTSnets}, booktitle = {Proceedings of the 35th International Conference on Machine Learning, {ICML} 2018, Stockholmsm{\"{a}}ssan, Stockholm, Sweden, July 10-15, 2018}, series = {Proceedings of Machine Learning Research}, volume = {80}, pages = {1817--1826}, publisher = {{PMLR}}, year = {2018}, url = {http://proceedings.mlr.press/v80/guez18a.html}, timestamp = {Wed, 03 Apr 2019 18:17:30 +0200}, biburl = {https://dblp.org/rec/conf/icml/GuezWASVWMS18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1802-04697, author = {Arthur Guez and Th{\'{e}}ophane Weber and Ioannis Antonoglou and Karen Simonyan and Oriol Vinyals and Daan Wierstra and R{\'{e}}mi Munos and David Silver}, title = {Learning to Search with MCTSnets}, journal = {CoRR}, volume = {abs/1802.04697}, year = {2018}, url = {http://arxiv.org/abs/1802.04697}, eprinttype = {arXiv}, eprint = {1802.04697}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1802-04697.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1811-06272, author = {Lars Buesing and Theophane Weber and Yori Zwols and S{\'{e}}bastien Racani{\`{e}}re and Arthur Guez and Jean{-}Baptiste Lespiau and Nicolas Heess}, title = {Woulda, Coulda, Shoulda: Counterfactually-Guided Policy Search}, journal = {CoRR}, volume = {abs/1811.06272}, year = {2018}, url = {http://arxiv.org/abs/1811.06272}, eprinttype = {arXiv}, eprint = {1811.06272}, timestamp = {Sun, 25 Nov 2018 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1811-06272.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/nature/SilverSSAHGHBLB17, author = {David Silver and Julian Schrittwieser and Karen Simonyan and Ioannis Antonoglou and Aja Huang and Arthur Guez and Thomas Hubert and Lucas Baker and Matthew Lai and Adrian Bolton and Yutian Chen and Timothy P. Lillicrap and Fan Hui and Laurent Sifre and George van den Driessche and Thore Graepel and Demis Hassabis}, title = {Mastering the game of Go without human knowledge}, journal = {Nat.}, volume = {550}, number = {7676}, pages = {354--359}, year = {2017}, url = {https://doi.org/10.1038/nature24270}, doi = {10.1038/NATURE24270}, timestamp = {Mon, 22 Jul 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/nature/SilverSSAHGHBLB17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/SilverHHSGHDRRB17, author = {David Silver and Hado van Hasselt and Matteo Hessel and Tom Schaul and Arthur Guez and Tim Harley and Gabriel Dulac{-}Arnold and David P. Reichert and Neil C. Rabinowitz and Andr{\'{e}} Barreto and Thomas Degris}, editor = {Doina Precup and Yee Whye Teh}, title = {The Predictron: End-To-End Learning and Planning}, booktitle = {Proceedings of the 34th International Conference on Machine Learning, {ICML} 2017, Sydney, NSW, Australia, 6-11 August 2017}, series = {Proceedings of Machine Learning Research}, volume = {70}, pages = {3191--3199}, publisher = {{PMLR}}, year = {2017}, url = {http://proceedings.mlr.press/v70/silver17a.html}, timestamp = {Mon, 10 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icml/SilverHHSGHDRRB17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/RacaniereWRBGRB17, author = {S{\'{e}}bastien Racani{\`{e}}re and Theophane Weber and David P. Reichert and Lars Buesing and Arthur Guez and Danilo Jimenez Rezende and Adri{\`{a}} Puigdom{\`{e}}nech Badia and Oriol Vinyals and Nicolas Heess and Yujia Li and Razvan Pascanu and Peter W. Battaglia and Demis Hassabis and David Silver and Daan Wierstra}, editor = {Isabelle Guyon and Ulrike von Luxburg and Samy Bengio and Hanna M. Wallach and Rob Fergus and S. V. N. Vishwanathan and Roman Garnett}, title = {Imagination-Augmented Agents for Deep Reinforcement Learning}, booktitle = {Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017, December 4-9, 2017, Long Beach, CA, {USA}}, pages = {5690--5701}, year = {2017}, url = {https://proceedings.neurips.cc/paper/2017/hash/9e82757e9a1c12cb710ad680db11f6f1-Abstract.html}, timestamp = {Sat, 02 Dec 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/RacaniereWRBGRB17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/WeberRRBGRBVHLP17, author = {Theophane Weber and S{\'{e}}bastien Racani{\`{e}}re and David P. Reichert and Lars Buesing and Arthur Guez and Danilo Jimenez Rezende and Adri{\`{a}} Puigdom{\`{e}}nech Badia and Oriol Vinyals and Nicolas Heess and Yujia Li and Razvan Pascanu and Peter W. Battaglia and David Silver and Daan Wierstra}, title = {Imagination-Augmented Agents for Deep Reinforcement Learning}, journal = {CoRR}, volume = {abs/1707.06203}, year = {2017}, url = {http://arxiv.org/abs/1707.06203}, eprinttype = {arXiv}, eprint = {1707.06203}, timestamp = {Sat, 02 Dec 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/WeberRRBGRBVHLP17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1712-01815, author = {David Silver and Thomas Hubert and Julian Schrittwieser and Ioannis Antonoglou and Matthew Lai and Arthur Guez and Marc Lanctot and Laurent Sifre and Dharshan Kumaran and Thore Graepel and Timothy P. Lillicrap and Karen Simonyan and Demis Hassabis}, title = {Mastering Chess and Shogi by Self-Play with a General Reinforcement Learning Algorithm}, journal = {CoRR}, volume = {abs/1712.01815}, year = {2017}, url = {http://arxiv.org/abs/1712.01815}, eprinttype = {arXiv}, eprint = {1712.01815}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1712-01815.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/nature/SilverHMGSDSAPL16, author = {David Silver and Aja Huang and Chris J. Maddison and Arthur Guez and Laurent Sifre and George van den Driessche and Julian Schrittwieser and Ioannis Antonoglou and Vedavyas Panneershelvam and Marc Lanctot and Sander Dieleman and Dominik Grewe and John Nham and Nal Kalchbrenner and Ilya Sutskever and Timothy P. Lillicrap and Madeleine Leach and Koray Kavukcuoglu and Thore Graepel and Demis Hassabis}, title = {Mastering the game of Go with deep neural networks and tree search}, journal = {Nat.}, volume = {529}, number = {7587}, pages = {484--489}, year = {2016}, url = {https://doi.org/10.1038/nature16961}, doi = {10.1038/NATURE16961}, timestamp = {Mon, 27 Sep 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/nature/SilverHMGSDSAPL16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/BellemareOGTM16, author = {Marc G. Bellemare and Georg Ostrovski and Arthur Guez and Philip S. Thomas and R{\'{e}}mi Munos}, editor = {Dale Schuurmans and Michael P. Wellman}, title = {Increasing the Action Gap: New Operators for Reinforcement Learning}, booktitle = {Proceedings of the Thirtieth {AAAI} Conference on Artificial Intelligence, February 12-17, 2016, Phoenix, Arizona, {USA}}, pages = {1476--1483}, publisher = {{AAAI} Press}, year = {2016}, url = {https://doi.org/10.1609/aaai.v30i1.10303}, doi = {10.1609/AAAI.V30I1.10303}, timestamp = {Mon, 04 Sep 2023 15:08:28 +0200}, biburl = {https://dblp.org/rec/conf/aaai/BellemareOGTM16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/HasseltGS16, author = {Hado van Hasselt and Arthur Guez and David Silver}, editor = {Dale Schuurmans and Michael P. Wellman}, title = {Deep Reinforcement Learning with Double Q-Learning}, booktitle = {Proceedings of the Thirtieth {AAAI} Conference on Artificial Intelligence, February 12-17, 2016, Phoenix, Arizona, {USA}}, pages = {2094--2100}, publisher = {{AAAI} Press}, year = {2016}, url = {https://doi.org/10.1609/aaai.v30i1.10295}, doi = {10.1609/AAAI.V30I1.10295}, timestamp = {Mon, 04 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/HasseltGS16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/HasseltGHMS16, author = {Hado van Hasselt and Arthur Guez and Matteo Hessel and Volodymyr Mnih and David Silver}, editor = {Daniel D. Lee and Masashi Sugiyama and Ulrike von Luxburg and Isabelle Guyon and Roman Garnett}, title = {Learning values across many orders of magnitude}, booktitle = {Advances in Neural Information Processing Systems 29: Annual Conference on Neural Information Processing Systems 2016, December 5-10, 2016, Barcelona, Spain}, pages = {4287--4295}, year = {2016}, url = {https://proceedings.neurips.cc/paper/2016/hash/5227b6aaf294f5f027273aebf16015f2-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/HasseltGHMS16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/HasseltGHS16, author = {Hado van Hasselt and Arthur Guez and Matteo Hessel and David Silver}, title = {Learning functions across many orders of magnitudes}, journal = {CoRR}, volume = {abs/1602.07714}, year = {2016}, url = {http://arxiv.org/abs/1602.07714}, eprinttype = {arXiv}, eprint = {1602.07714}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/HasseltGHS16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/SilverHHSGHDRRB16, author = {David Silver and Hado van Hasselt and Matteo Hessel and Tom Schaul and Arthur Guez and Tim Harley and Gabriel Dulac{-}Arnold and David P. Reichert and Neil C. Rabinowitz and Andr{\'{e}} Barreto and Thomas Degris}, title = {The Predictron: End-To-End Learning and Planning}, journal = {CoRR}, volume = {abs/1612.08810}, year = {2016}, url = {http://arxiv.org/abs/1612.08810}, eprinttype = {arXiv}, eprint = {1612.08810}, timestamp = {Mon, 10 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/SilverHHSGHDRRB16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/HasseltGS15, author = {Hado van Hasselt and Arthur Guez and David Silver}, title = {Deep Reinforcement Learning with Double Q-learning}, journal = {CoRR}, volume = {abs/1509.06461}, year = {2015}, url = {http://arxiv.org/abs/1509.06461}, eprinttype = {arXiv}, eprint = {1509.06461}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/HasseltGS15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/BellemareOGTM15, author = {Marc G. Bellemare and Georg Ostrovski and Arthur Guez and Philip S. Thomas and R{\'{e}}mi Munos}, title = {Increasing the Action Gap: New Operators for Reinforcement Learning}, journal = {CoRR}, volume = {abs/1512.04860}, year = {2015}, url = {http://arxiv.org/abs/1512.04860}, eprinttype = {arXiv}, eprint = {1512.04860}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/BellemareOGTM15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/GuezHSD14, author = {Arthur Guez and Nicolas Heess and David Silver and Peter Dayan}, editor = {Zoubin Ghahramani and Max Welling and Corinna Cortes and Neil D. Lawrence and Kilian Q. Weinberger}, title = {Bayes-Adaptive Simulation-based Search with Value Function Approximation}, booktitle = {Advances in Neural Information Processing Systems 27: Annual Conference on Neural Information Processing Systems 2014, December 8-13 2014, Montreal, Quebec, Canada}, pages = {451--459}, year = {2014}, url = {https://proceedings.neurips.cc/paper/2014/hash/839ab46820b524afda05122893c2fe8e-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/GuezHSD14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/GuezSD14, author = {Arthur Guez and David Silver and Peter Dayan}, title = {Better Optimism By Bayes: Adaptive Planning with Rich Models}, journal = {CoRR}, volume = {abs/1402.1958}, year = {2014}, url = {http://arxiv.org/abs/1402.1958}, eprinttype = {arXiv}, eprint = {1402.1958}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/GuezSD14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jair/GuezSD13, author = {Arthur Guez and David Silver and Peter Dayan}, title = {Scalable and Efficient Bayes-Adaptive Reinforcement Learning Based on Monte-Carlo Tree Search}, journal = {J. Artif. Intell. Res.}, volume = {48}, pages = {841--883}, year = {2013}, url = {https://doi.org/10.1613/jair.4117}, doi = {10.1613/JAIR.4117}, timestamp = {Mon, 21 Jan 2019 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/jair/GuezSD13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/GuezSD12, author = {Arthur Guez and David Silver and Peter Dayan}, editor = {Peter L. Bartlett and Fernando C. N. Pereira and Christopher J. C. Burges and L{\'{e}}on Bottou and Kilian Q. Weinberger}, title = {Efficient Bayes-Adaptive Reinforcement Learning using Sample-Based Search}, booktitle = {Advances in Neural Information Processing Systems 25: 26th Annual Conference on Neural Information Processing Systems 2012. Proceedings of a meeting held December 3-6, 2012, Lake Tahoe, Nevada, United States}, pages = {1034--1042}, year = {2012}, url = {https://proceedings.neurips.cc/paper/2012/hash/35051070e572e47d2c26c241ab88307f-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/GuezSD12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1205-3109, author = {Arthur Guez and David Silver and Peter Dayan}, title = {Efficient Bayes-Adaptive Reinforcement Learning using Sample-Based Search}, journal = {CoRR}, volume = {abs/1205.3109}, year = {2012}, url = {http://arxiv.org/abs/1205.3109}, eprinttype = {arXiv}, eprint = {1205.3109}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1205-3109.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icra/GuezP10, author = {Arthur Guez and Joelle Pineau}, title = {Multi-tasking {SLAM}}, booktitle = {{IEEE} International Conference on Robotics and Automation, {ICRA} 2010, Anchorage, Alaska, USA, 3-7 May 2010}, pages = {377--384}, publisher = {{IEEE}}, year = {2010}, url = {https://doi.org/10.1109/ROBOT.2010.5509969}, doi = {10.1109/ROBOT.2010.5509969}, timestamp = {Mon, 22 May 2017 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icra/GuezP10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ijns/PineauGVPA09, author = {Joelle Pineau and Arthur Guez and Robert D. Vincent and Gabriella Panuccio and Massimo Avoli}, title = {Treating Epilepsy via Adaptive Neurostimulation: a Reinforcement Learning Approach}, journal = {Int. J. Neural Syst.}, volume = {19}, number = {4}, pages = {227--240}, year = {2009}, url = {https://doi.org/10.1142/S0129065709001987}, doi = {10.1142/S0129065709001987}, timestamp = {Wed, 14 Nov 2018 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/ijns/PineauGVPA09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/GuezVAP08, author = {Arthur Guez and Robert D. Vincent and Massimo Avoli and Joelle Pineau}, editor = {Dieter Fox and Carla P. Gomes}, title = {Adaptive Treatment of Epilepsy via Batch-mode Reinforcement Learning}, booktitle = {Proceedings of the Twenty-Third {AAAI} Conference on Artificial Intelligence, {AAAI} 2008, Chicago, Illinois, USA, July 13-17, 2008}, pages = {1671--1678}, publisher = {{AAAI} Press}, year = {2008}, url = {http://www.aaai.org/Library/IAAI/2008/iaai08-008.php}, timestamp = {Tue, 05 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/GuezVAP08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
manage site settings
To protect your privacy, all features that rely on external API calls from your browser are turned off by default. You need to opt-in for them to become active. All settings here will be stored as cookies with your web browser. For more information see our F.A.Q.