BibTeX records: Arthur Guez

download as .bib file

@article{DBLP:journals/corr/abs-2406-02035,
  author       = {Khimya Khetarpal and
                  Zhaohan Daniel Guo and
                  Bernardo {\'{A}}vila Pires and
                  Yunhao Tang and
                  Clare Lyle and
                  Mark Rowland and
                  Nicolas Heess and
                  Diana Borsa and
                  Arthur Guez and
                  Will Dabney},
  title        = {A Unifying Framework for Action-Conditional Self-Predictive Reinforcement
                  Learning},
  journal      = {CoRR},
  volume       = {abs/2406.02035},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2406.02035},
  doi          = {10.48550/ARXIV.2406.02035},
  eprinttype    = {arXiv},
  eprint       = {2406.02035},
  timestamp    = {Thu, 04 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2406-02035.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2306-10587,
  author       = {Veronica Chelu and
                  Tom Zahavy and
                  Arthur Guez and
                  Doina Precup and
                  Sebastian Flennerhag},
  title        = {Optimism and Adaptivity in Policy Optimization},
  journal      = {CoRR},
  volume       = {abs/2306.10587},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2306.10587},
  doi          = {10.48550/ARXIV.2306.10587},
  eprinttype    = {arXiv},
  eprint       = {2306.10587},
  timestamp    = {Thu, 22 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2306-10587.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/0004PMHPKG22,
  author       = {Jongmin Lee and
                  Cosmin Paduraru and
                  Daniel J. Mankowitz and
                  Nicolas Heess and
                  Doina Precup and
                  Kee{-}Eung Kim and
                  Arthur Guez},
  title        = {COptiDICE: Offline Constrained Reinforcement Learning via Stationary
                  Distribution Correction Estimation},
  booktitle    = {The Tenth International Conference on Learning Representations, {ICLR}
                  2022, Virtual Event, April 25-29, 2022},
  publisher    = {OpenReview.net},
  year         = {2022},
  url          = {https://openreview.net/forum?id=FLA55mBee6Q},
  timestamp    = {Sat, 20 Aug 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/0004PMHPKG22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/DanihelkaGSS22,
  author       = {Ivo Danihelka and
                  Arthur Guez and
                  Julian Schrittwieser and
                  David Silver},
  title        = {Policy improvement by planning with Gumbel},
  booktitle    = {The Tenth International Conference on Learning Representations, {ICLR}
                  2022, Virtual Event, April 25-29, 2022},
  publisher    = {OpenReview.net},
  year         = {2022},
  url          = {https://openreview.net/forum?id=bERaNdoegnO},
  timestamp    = {Sat, 20 Aug 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/DanihelkaGSS22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/GoyalFBWKBGMHKV22,
  author       = {Anirudh Goyal and
                  Abram L. Friesen and
                  Andrea Banino and
                  Theophane Weber and
                  Nan Rosemary Ke and
                  Adri{\`{a}} Puigdom{\`{e}}nech Badia and
                  Arthur Guez and
                  Mehdi Mirza and
                  Peter Conway Humphreys and
                  Ksenia Konyushkova and
                  Michal Valko and
                  Simon Osindero and
                  Timothy P. Lillicrap and
                  Nicolas Heess and
                  Charles Blundell},
  editor       = {Kamalika Chaudhuri and
                  Stefanie Jegelka and
                  Le Song and
                  Csaba Szepesv{\'{a}}ri and
                  Gang Niu and
                  Sivan Sabato},
  title        = {Retrieval-Augmented Reinforcement Learning},
  booktitle    = {International Conference on Machine Learning, {ICML} 2022, 17-23 July
                  2022, Baltimore, Maryland, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {162},
  pages        = {7740--7765},
  publisher    = {{PMLR}},
  year         = {2022},
  url          = {https://proceedings.mlr.press/v162/goyal22a.html},
  timestamp    = {Sun, 12 May 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/GoyalFBWKBGMHKV22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/HumphreysGTSWL22,
  author       = {Peter Conway Humphreys and
                  Arthur Guez and
                  Olivier Tieleman and
                  Laurent Sifre and
                  Theophane Weber and
                  Timothy P. Lillicrap},
  editor       = {Sanmi Koyejo and
                  S. Mohamed and
                  A. Agarwal and
                  Danielle Belgrave and
                  K. Cho and
                  A. Oh},
  title        = {Large-Scale Retrieval for Reinforcement Learning},
  booktitle    = {Advances in Neural Information Processing Systems 35: Annual Conference
                  on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans,
                  LA, USA, November 28 - December 9, 2022},
  year         = {2022},
  url          = {http://papers.nips.cc/paper\_files/paper/2022/hash/7eca17ef54789b0663cab421f2e9dbf5-Abstract-Conference.html},
  timestamp    = {Sun, 12 May 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/HumphreysGTSWL22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2202-08417,
  author       = {Anirudh Goyal and
                  Abram L. Friesen and
                  Andrea Banino and
                  Theophane Weber and
                  Nan Rosemary Ke and
                  Adri{\`{a}} Puigdom{\`{e}}nech Badia and
                  Arthur Guez and
                  Mehdi Mirza and
                  Ksenia Konyushkova and
                  Michal Valko and
                  Simon Osindero and
                  Timothy P. Lillicrap and
                  Nicolas Heess and
                  Charles Blundell},
  title        = {Retrieval-Augmented Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2202.08417},
  year         = {2022},
  url          = {https://arxiv.org/abs/2202.08417},
  eprinttype    = {arXiv},
  eprint       = {2202.08417},
  timestamp    = {Tue, 01 Mar 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2202-08417.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2204-08957,
  author       = {Jongmin Lee and
                  Cosmin Paduraru and
                  Daniel J. Mankowitz and
                  Nicolas Heess and
                  Doina Precup and
                  Kee{-}Eung Kim and
                  Arthur Guez},
  title        = {COptiDICE: Offline Constrained Reinforcement Learning via Stationary
                  Distribution Correction Estimation},
  journal      = {CoRR},
  volume       = {abs/2204.08957},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2204.08957},
  doi          = {10.48550/ARXIV.2204.08957},
  eprinttype    = {arXiv},
  eprint       = {2204.08957},
  timestamp    = {Mon, 25 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2204-08957.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2206-05314,
  author       = {Peter Conway Humphreys and
                  Arthur Guez and
                  Olivier Tieleman and
                  Laurent Sifre and
                  Th{\'{e}}ophane Weber and
                  Timothy P. Lillicrap},
  title        = {Large-Scale Retrieval for Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2206.05314},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2206.05314},
  doi          = {10.48550/ARXIV.2206.05314},
  eprinttype    = {arXiv},
  eprint       = {2206.05314},
  timestamp    = {Sun, 12 May 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2206-05314.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/HamrickFBGVWABV21,
  author       = {Jessica B. Hamrick and
                  Abram L. Friesen and
                  Feryal M. P. Behbahani and
                  Arthur Guez and
                  Fabio Viola and
                  Sims Witherspoon and
                  Thomas Anthony and
                  Lars Holger Buesing and
                  Petar Velickovic and
                  Theophane Weber},
  title        = {On the role of planning in model-based deep reinforcement learning},
  booktitle    = {9th International Conference on Learning Representations, {ICLR} 2021,
                  Virtual Event, Austria, May 3-7, 2021},
  publisher    = {OpenReview.net},
  year         = {2021},
  url          = {https://openreview.net/forum?id=IrM64DGB21},
  timestamp    = {Mon, 02 Jan 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/iclr/HamrickFBGVWABV21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/HesselDVGSSWSH21,
  author       = {Matteo Hessel and
                  Ivo Danihelka and
                  Fabio Viola and
                  Arthur Guez and
                  Simon Schmitt and
                  Laurent Sifre and
                  Theophane Weber and
                  David Silver and
                  Hado van Hasselt},
  editor       = {Marina Meila and
                  Tong Zhang},
  title        = {Muesli: Combining Improvements in Policy Optimization},
  booktitle    = {Proceedings of the 38th International Conference on Machine Learning,
                  {ICML} 2021, 18-24 July 2021, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {139},
  pages        = {4214--4226},
  publisher    = {{PMLR}},
  year         = {2021},
  url          = {http://proceedings.mlr.press/v139/hessel21a.html},
  timestamp    = {Wed, 25 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/HesselDVGSSWSH21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/MesnardWVTSHDSH21,
  author       = {Thomas Mesnard and
                  Theophane Weber and
                  Fabio Viola and
                  Shantanu Thakoor and
                  Alaa Saade and
                  Anna Harutyunyan and
                  Will Dabney and
                  Thomas S. Stepleton and
                  Nicolas Heess and
                  Arthur Guez and
                  Eric Moulines and
                  Marcus Hutter and
                  Lars Buesing and
                  R{\'{e}}mi Munos},
  editor       = {Marina Meila and
                  Tong Zhang},
  title        = {Counterfactual Credit Assignment in Model-Free Reinforcement Learning},
  booktitle    = {Proceedings of the 38th International Conference on Machine Learning,
                  {ICML} 2021, 18-24 July 2021, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {139},
  pages        = {7654--7664},
  publisher    = {{PMLR}},
  year         = {2021},
  url          = {http://proceedings.mlr.press/v139/mesnard21a.html},
  timestamp    = {Wed, 25 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/MesnardWVTSHDSH21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2104-06159,
  author       = {Matteo Hessel and
                  Ivo Danihelka and
                  Fabio Viola and
                  Arthur Guez and
                  Simon Schmitt and
                  Laurent Sifre and
                  Theophane Weber and
                  David Silver and
                  Hado van Hasselt},
  title        = {Muesli: Combining Improvements in Policy Optimization},
  journal      = {CoRR},
  volume       = {abs/2104.06159},
  year         = {2021},
  url          = {https://arxiv.org/abs/2104.06159},
  eprinttype    = {arXiv},
  eprint       = {2104.06159},
  timestamp    = {Mon, 19 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2104-06159.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/nature/SchrittwieserAH20,
  author       = {Julian Schrittwieser and
                  Ioannis Antonoglou and
                  Thomas Hubert and
                  Karen Simonyan and
                  Laurent Sifre and
                  Simon Schmitt and
                  Arthur Guez and
                  Edward Lockhart and
                  Demis Hassabis and
                  Thore Graepel and
                  Timothy P. Lillicrap and
                  David Silver},
  title        = {Mastering Atari, Go, chess and shogi by planning with a learned model},
  journal      = {Nat.},
  volume       = {588},
  number       = {7839},
  pages        = {604--609},
  year         = {2020},
  url          = {https://doi.org/10.1038/s41586-020-03051-4},
  doi          = {10.1038/S41586-020-03051-4},
  timestamp    = {Fri, 02 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/nature/SchrittwieserAH20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/GuezVWBKPSH20,
  author       = {Arthur Guez and
                  Fabio Viola and
                  Theophane Weber and
                  Lars Buesing and
                  Steven Kapturowski and
                  Doina Precup and
                  David Silver and
                  Nicolas Heess},
  editor       = {Hugo Larochelle and
                  Marc'Aurelio Ranzato and
                  Raia Hadsell and
                  Maria{-}Florina Balcan and
                  Hsuan{-}Tien Lin},
  title        = {Value-driven Hindsight Modelling},
  booktitle    = {Advances in Neural Information Processing Systems 33: Annual Conference
                  on Neural Information Processing Systems 2020, NeurIPS 2020, December
                  6-12, 2020, virtual},
  year         = {2020},
  url          = {https://proceedings.neurips.cc/paper/2020/hash/9381fc93ad66f9ec4b2eef71147a6665-Abstract.html},
  timestamp    = {Tue, 19 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/GuezVWBKPSH20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2002-08329,
  author       = {Arthur Guez and
                  Fabio Viola and
                  Th{\'{e}}ophane Weber and
                  Lars Buesing and
                  Steven Kapturowski and
                  Doina Precup and
                  David Silver and
                  Nicolas Heess},
  title        = {Value-driven Hindsight Modelling},
  journal      = {CoRR},
  volume       = {abs/2002.08329},
  year         = {2020},
  url          = {https://arxiv.org/abs/2002.08329},
  eprinttype    = {arXiv},
  eprint       = {2002.08329},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2002-08329.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2009-05524,
  author       = {Mehdi Mirza and
                  Andrew Jaegle and
                  Jonathan J. Hunt and
                  Arthur Guez and
                  Saran Tunyasuvunakool and
                  Alistair Muldal and
                  Th{\'{e}}ophane Weber and
                  P{\'{e}}ter Karkus and
                  S{\'{e}}bastien Racani{\`{e}}re and
                  Lars Buesing and
                  Timothy P. Lillicrap and
                  Nicolas Heess},
  title        = {Physically Embedded Planning Problems: New Challenges for Reinforcement
                  Learning},
  journal      = {CoRR},
  volume       = {abs/2009.05524},
  year         = {2020},
  url          = {https://arxiv.org/abs/2009.05524},
  eprinttype    = {arXiv},
  eprint       = {2009.05524},
  timestamp    = {Thu, 17 Sep 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2009-05524.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2010-01298,
  author       = {P{\'{e}}ter Karkus and
                  Mehdi Mirza and
                  Arthur Guez and
                  Andrew Jaegle and
                  Timothy P. Lillicrap and
                  Lars Buesing and
                  Nicolas Heess and
                  Theophane Weber},
  title        = {Beyond Tabula-Rasa: a Modular Reinforcement Learning Approach for
                  Physically Embedded 3D Sokoban},
  journal      = {CoRR},
  volume       = {abs/2010.01298},
  year         = {2020},
  url          = {https://arxiv.org/abs/2010.01298},
  eprinttype    = {arXiv},
  eprint       = {2010.01298},
  timestamp    = {Mon, 12 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2010-01298.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2011-04021,
  author       = {Jessica B. Hamrick and
                  Abram L. Friesen and
                  Feryal M. P. Behbahani and
                  Arthur Guez and
                  Fabio Viola and
                  Sims Witherspoon and
                  Thomas Anthony and
                  Lars Buesing and
                  Petar Velickovic and
                  Th{\'{e}}ophane Weber},
  title        = {On the role of planning in model-based deep reinforcement learning},
  journal      = {CoRR},
  volume       = {abs/2011.04021},
  year         = {2020},
  url          = {https://arxiv.org/abs/2011.04021},
  eprinttype    = {arXiv},
  eprint       = {2011.04021},
  timestamp    = {Mon, 02 Jan 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2011-04021.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2011-09464,
  author       = {Thomas Mesnard and
                  Th{\'{e}}ophane Weber and
                  Fabio Viola and
                  Shantanu Thakoor and
                  Alaa Saade and
                  Anna Harutyunyan and
                  Will Dabney and
                  Tom Stepleton and
                  Nicolas Heess and
                  Arthur Guez and
                  Marcus Hutter and
                  Lars Buesing and
                  R{\'{e}}mi Munos},
  title        = {Counterfactual Credit Assignment in Model-Free Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2011.09464},
  year         = {2020},
  url          = {https://arxiv.org/abs/2011.09464},
  eprinttype    = {arXiv},
  eprint       = {2011.09464},
  timestamp    = {Wed, 25 Nov 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2011-09464.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/BuesingWZHRGL19,
  author       = {Lars Buesing and
                  Theophane Weber and
                  Yori Zwols and
                  Nicolas Heess and
                  S{\'{e}}bastien Racani{\`{e}}re and
                  Arthur Guez and
                  Jean{-}Baptiste Lespiau},
  title        = {Woulda, Coulda, Shoulda: Counterfactually-Guided Policy Search},
  booktitle    = {7th International Conference on Learning Representations, {ICLR} 2019,
                  New Orleans, LA, USA, May 6-9, 2019},
  publisher    = {OpenReview.net},
  year         = {2019},
  url          = {https://openreview.net/forum?id=BJG0voC9YQ},
  timestamp    = {Thu, 25 Jul 2019 13:03:15 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/BuesingWZHRGL19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/GuezMGKRWRSOEWS19,
  author       = {Arthur Guez and
                  Mehdi Mirza and
                  Karol Gregor and
                  Rishabh Kabra and
                  S{\'{e}}bastien Racani{\`{e}}re and
                  Theophane Weber and
                  David Raposo and
                  Adam Santoro and
                  Laurent Orseau and
                  Tom Eccles and
                  Greg Wayne and
                  David Silver and
                  Timothy P. Lillicrap},
  editor       = {Kamalika Chaudhuri and
                  Ruslan Salakhutdinov},
  title        = {An Investigation of Model-Free Planning},
  booktitle    = {Proceedings of the 36th International Conference on Machine Learning,
                  {ICML} 2019, 9-15 June 2019, Long Beach, California, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {97},
  pages        = {2464--2473},
  publisher    = {{PMLR}},
  year         = {2019},
  url          = {http://proceedings.mlr.press/v97/guez19a.html},
  timestamp    = {Tue, 11 Jun 2019 15:37:38 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/GuezMGKRWRSOEWS19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1901-03559,
  author       = {Arthur Guez and
                  Mehdi Mirza and
                  Karol Gregor and
                  Rishabh Kabra and
                  S{\'{e}}bastien Racani{\`{e}}re and
                  Th{\'{e}}ophane Weber and
                  David Raposo and
                  Adam Santoro and
                  Laurent Orseau and
                  Tom Eccles and
                  Greg Wayne and
                  David Silver and
                  Timothy P. Lillicrap},
  title        = {An investigation of model-free planning},
  journal      = {CoRR},
  volume       = {abs/1901.03559},
  year         = {2019},
  url          = {http://arxiv.org/abs/1901.03559},
  eprinttype    = {arXiv},
  eprint       = {1901.03559},
  timestamp    = {Fri, 01 Feb 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1901-03559.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1910-00528,
  author       = {Shruti Mishra and
                  Abbas Abdolmaleki and
                  Arthur Guez and
                  Piotr Trochim and
                  Doina Precup},
  title        = {Augmenting learning using symmetry in a biologically-inspired domain},
  journal      = {CoRR},
  volume       = {abs/1910.00528},
  year         = {2019},
  url          = {http://arxiv.org/abs/1910.00528},
  eprinttype    = {arXiv},
  eprint       = {1910.00528},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1910-00528.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1911-08265,
  author       = {Julian Schrittwieser and
                  Ioannis Antonoglou and
                  Thomas Hubert and
                  Karen Simonyan and
                  Laurent Sifre and
                  Simon Schmitt and
                  Arthur Guez and
                  Edward Lockhart and
                  Demis Hassabis and
                  Thore Graepel and
                  Timothy P. Lillicrap and
                  David Silver},
  title        = {Mastering Atari, Go, Chess and Shogi by Planning with a Learned Model},
  journal      = {CoRR},
  volume       = {abs/1911.08265},
  year         = {2019},
  url          = {http://arxiv.org/abs/1911.08265},
  eprinttype    = {arXiv},
  eprint       = {1911.08265},
  timestamp    = {Mon, 02 Dec 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1911-08265.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cogsci/KruscheSGS18,
  author       = {Moritz Krusche and
                  Eric Schulz and
                  Arthur Guez and
                  Maarten Speekenbrink},
  editor       = {Chuck Kalish and
                  Martina A. Rau and
                  Xiaojin (Jerry) Zhu and
                  Timothy T. Rogers},
  title        = {Adaptive planning in human search},
  booktitle    = {Proceedings of the 40th Annual Meeting of the Cognitive Science Society,
                  CogSci 2018, Madison, WI, USA, July 25-28, 2018},
  publisher    = {cognitivesciencesociety.org},
  year         = {2018},
  url          = {https://mindmodeling.org/cogsci2018/papers/0379/index.html},
  timestamp    = {Wed, 17 Apr 2024 12:43:20 +0200},
  biburl       = {https://dblp.org/rec/conf/cogsci/KruscheSGS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/GuezWASVWMS18,
  author       = {Arthur Guez and
                  Theophane Weber and
                  Ioannis Antonoglou and
                  Karen Simonyan and
                  Oriol Vinyals and
                  Daan Wierstra and
                  R{\'{e}}mi Munos and
                  David Silver},
  editor       = {Jennifer G. Dy and
                  Andreas Krause},
  title        = {Learning to Search with MCTSnets},
  booktitle    = {Proceedings of the 35th International Conference on Machine Learning,
                  {ICML} 2018, Stockholmsm{\"{a}}ssan, Stockholm, Sweden, July
                  10-15, 2018},
  series       = {Proceedings of Machine Learning Research},
  volume       = {80},
  pages        = {1817--1826},
  publisher    = {{PMLR}},
  year         = {2018},
  url          = {http://proceedings.mlr.press/v80/guez18a.html},
  timestamp    = {Wed, 03 Apr 2019 18:17:30 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/GuezWASVWMS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1802-04697,
  author       = {Arthur Guez and
                  Th{\'{e}}ophane Weber and
                  Ioannis Antonoglou and
                  Karen Simonyan and
                  Oriol Vinyals and
                  Daan Wierstra and
                  R{\'{e}}mi Munos and
                  David Silver},
  title        = {Learning to Search with MCTSnets},
  journal      = {CoRR},
  volume       = {abs/1802.04697},
  year         = {2018},
  url          = {http://arxiv.org/abs/1802.04697},
  eprinttype    = {arXiv},
  eprint       = {1802.04697},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1802-04697.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1811-06272,
  author       = {Lars Buesing and
                  Theophane Weber and
                  Yori Zwols and
                  S{\'{e}}bastien Racani{\`{e}}re and
                  Arthur Guez and
                  Jean{-}Baptiste Lespiau and
                  Nicolas Heess},
  title        = {Woulda, Coulda, Shoulda: Counterfactually-Guided Policy Search},
  journal      = {CoRR},
  volume       = {abs/1811.06272},
  year         = {2018},
  url          = {http://arxiv.org/abs/1811.06272},
  eprinttype    = {arXiv},
  eprint       = {1811.06272},
  timestamp    = {Sun, 25 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1811-06272.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/nature/SilverSSAHGHBLB17,
  author       = {David Silver and
                  Julian Schrittwieser and
                  Karen Simonyan and
                  Ioannis Antonoglou and
                  Aja Huang and
                  Arthur Guez and
                  Thomas Hubert and
                  Lucas Baker and
                  Matthew Lai and
                  Adrian Bolton and
                  Yutian Chen and
                  Timothy P. Lillicrap and
                  Fan Hui and
                  Laurent Sifre and
                  George van den Driessche and
                  Thore Graepel and
                  Demis Hassabis},
  title        = {Mastering the game of Go without human knowledge},
  journal      = {Nat.},
  volume       = {550},
  number       = {7676},
  pages        = {354--359},
  year         = {2017},
  url          = {https://doi.org/10.1038/nature24270},
  doi          = {10.1038/NATURE24270},
  timestamp    = {Mon, 22 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/nature/SilverSSAHGHBLB17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SilverHHSGHDRRB17,
  author       = {David Silver and
                  Hado van Hasselt and
                  Matteo Hessel and
                  Tom Schaul and
                  Arthur Guez and
                  Tim Harley and
                  Gabriel Dulac{-}Arnold and
                  David P. Reichert and
                  Neil C. Rabinowitz and
                  Andr{\'{e}} Barreto and
                  Thomas Degris},
  editor       = {Doina Precup and
                  Yee Whye Teh},
  title        = {The Predictron: End-To-End Learning and Planning},
  booktitle    = {Proceedings of the 34th International Conference on Machine Learning,
                  {ICML} 2017, Sydney, NSW, Australia, 6-11 August 2017},
  series       = {Proceedings of Machine Learning Research},
  volume       = {70},
  pages        = {3191--3199},
  publisher    = {{PMLR}},
  year         = {2017},
  url          = {http://proceedings.mlr.press/v70/silver17a.html},
  timestamp    = {Mon, 10 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/SilverHHSGHDRRB17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/RacaniereWRBGRB17,
  author       = {S{\'{e}}bastien Racani{\`{e}}re and
                  Theophane Weber and
                  David P. Reichert and
                  Lars Buesing and
                  Arthur Guez and
                  Danilo Jimenez Rezende and
                  Adri{\`{a}} Puigdom{\`{e}}nech Badia and
                  Oriol Vinyals and
                  Nicolas Heess and
                  Yujia Li and
                  Razvan Pascanu and
                  Peter W. Battaglia and
                  Demis Hassabis and
                  David Silver and
                  Daan Wierstra},
  editor       = {Isabelle Guyon and
                  Ulrike von Luxburg and
                  Samy Bengio and
                  Hanna M. Wallach and
                  Rob Fergus and
                  S. V. N. Vishwanathan and
                  Roman Garnett},
  title        = {Imagination-Augmented Agents for Deep Reinforcement Learning},
  booktitle    = {Advances in Neural Information Processing Systems 30: Annual Conference
                  on Neural Information Processing Systems 2017, December 4-9, 2017,
                  Long Beach, CA, {USA}},
  pages        = {5690--5701},
  year         = {2017},
  url          = {https://proceedings.neurips.cc/paper/2017/hash/9e82757e9a1c12cb710ad680db11f6f1-Abstract.html},
  timestamp    = {Sat, 02 Dec 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/RacaniereWRBGRB17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/WeberRRBGRBVHLP17,
  author       = {Theophane Weber and
                  S{\'{e}}bastien Racani{\`{e}}re and
                  David P. Reichert and
                  Lars Buesing and
                  Arthur Guez and
                  Danilo Jimenez Rezende and
                  Adri{\`{a}} Puigdom{\`{e}}nech Badia and
                  Oriol Vinyals and
                  Nicolas Heess and
                  Yujia Li and
                  Razvan Pascanu and
                  Peter W. Battaglia and
                  David Silver and
                  Daan Wierstra},
  title        = {Imagination-Augmented Agents for Deep Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1707.06203},
  year         = {2017},
  url          = {http://arxiv.org/abs/1707.06203},
  eprinttype    = {arXiv},
  eprint       = {1707.06203},
  timestamp    = {Sat, 02 Dec 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/WeberRRBGRBVHLP17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1712-01815,
  author       = {David Silver and
                  Thomas Hubert and
                  Julian Schrittwieser and
                  Ioannis Antonoglou and
                  Matthew Lai and
                  Arthur Guez and
                  Marc Lanctot and
                  Laurent Sifre and
                  Dharshan Kumaran and
                  Thore Graepel and
                  Timothy P. Lillicrap and
                  Karen Simonyan and
                  Demis Hassabis},
  title        = {Mastering Chess and Shogi by Self-Play with a General Reinforcement
                  Learning Algorithm},
  journal      = {CoRR},
  volume       = {abs/1712.01815},
  year         = {2017},
  url          = {http://arxiv.org/abs/1712.01815},
  eprinttype    = {arXiv},
  eprint       = {1712.01815},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1712-01815.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/nature/SilverHMGSDSAPL16,
  author       = {David Silver and
                  Aja Huang and
                  Chris J. Maddison and
                  Arthur Guez and
                  Laurent Sifre and
                  George van den Driessche and
                  Julian Schrittwieser and
                  Ioannis Antonoglou and
                  Vedavyas Panneershelvam and
                  Marc Lanctot and
                  Sander Dieleman and
                  Dominik Grewe and
                  John Nham and
                  Nal Kalchbrenner and
                  Ilya Sutskever and
                  Timothy P. Lillicrap and
                  Madeleine Leach and
                  Koray Kavukcuoglu and
                  Thore Graepel and
                  Demis Hassabis},
  title        = {Mastering the game of Go with deep neural networks and tree search},
  journal      = {Nat.},
  volume       = {529},
  number       = {7587},
  pages        = {484--489},
  year         = {2016},
  url          = {https://doi.org/10.1038/nature16961},
  doi          = {10.1038/NATURE16961},
  timestamp    = {Mon, 27 Sep 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/nature/SilverHMGSDSAPL16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/BellemareOGTM16,
  author       = {Marc G. Bellemare and
                  Georg Ostrovski and
                  Arthur Guez and
                  Philip S. Thomas and
                  R{\'{e}}mi Munos},
  editor       = {Dale Schuurmans and
                  Michael P. Wellman},
  title        = {Increasing the Action Gap: New Operators for Reinforcement Learning},
  booktitle    = {Proceedings of the Thirtieth {AAAI} Conference on Artificial Intelligence,
                  February 12-17, 2016, Phoenix, Arizona, {USA}},
  pages        = {1476--1483},
  publisher    = {{AAAI} Press},
  year         = {2016},
  url          = {https://doi.org/10.1609/aaai.v30i1.10303},
  doi          = {10.1609/AAAI.V30I1.10303},
  timestamp    = {Mon, 04 Sep 2023 15:08:28 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/BellemareOGTM16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/HasseltGS16,
  author       = {Hado van Hasselt and
                  Arthur Guez and
                  David Silver},
  editor       = {Dale Schuurmans and
                  Michael P. Wellman},
  title        = {Deep Reinforcement Learning with Double Q-Learning},
  booktitle    = {Proceedings of the Thirtieth {AAAI} Conference on Artificial Intelligence,
                  February 12-17, 2016, Phoenix, Arizona, {USA}},
  pages        = {2094--2100},
  publisher    = {{AAAI} Press},
  year         = {2016},
  url          = {https://doi.org/10.1609/aaai.v30i1.10295},
  doi          = {10.1609/AAAI.V30I1.10295},
  timestamp    = {Mon, 04 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/HasseltGS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/HasseltGHMS16,
  author       = {Hado van Hasselt and
                  Arthur Guez and
                  Matteo Hessel and
                  Volodymyr Mnih and
                  David Silver},
  editor       = {Daniel D. Lee and
                  Masashi Sugiyama and
                  Ulrike von Luxburg and
                  Isabelle Guyon and
                  Roman Garnett},
  title        = {Learning values across many orders of magnitude},
  booktitle    = {Advances in Neural Information Processing Systems 29: Annual Conference
                  on Neural Information Processing Systems 2016, December 5-10, 2016,
                  Barcelona, Spain},
  pages        = {4287--4295},
  year         = {2016},
  url          = {https://proceedings.neurips.cc/paper/2016/hash/5227b6aaf294f5f027273aebf16015f2-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/HasseltGHMS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HasseltGHS16,
  author       = {Hado van Hasselt and
                  Arthur Guez and
                  Matteo Hessel and
                  David Silver},
  title        = {Learning functions across many orders of magnitudes},
  journal      = {CoRR},
  volume       = {abs/1602.07714},
  year         = {2016},
  url          = {http://arxiv.org/abs/1602.07714},
  eprinttype    = {arXiv},
  eprint       = {1602.07714},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/HasseltGHS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/SilverHHSGHDRRB16,
  author       = {David Silver and
                  Hado van Hasselt and
                  Matteo Hessel and
                  Tom Schaul and
                  Arthur Guez and
                  Tim Harley and
                  Gabriel Dulac{-}Arnold and
                  David P. Reichert and
                  Neil C. Rabinowitz and
                  Andr{\'{e}} Barreto and
                  Thomas Degris},
  title        = {The Predictron: End-To-End Learning and Planning},
  journal      = {CoRR},
  volume       = {abs/1612.08810},
  year         = {2016},
  url          = {http://arxiv.org/abs/1612.08810},
  eprinttype    = {arXiv},
  eprint       = {1612.08810},
  timestamp    = {Mon, 10 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/SilverHHSGHDRRB16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HasseltGS15,
  author       = {Hado van Hasselt and
                  Arthur Guez and
                  David Silver},
  title        = {Deep Reinforcement Learning with Double Q-learning},
  journal      = {CoRR},
  volume       = {abs/1509.06461},
  year         = {2015},
  url          = {http://arxiv.org/abs/1509.06461},
  eprinttype    = {arXiv},
  eprint       = {1509.06461},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/HasseltGS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/BellemareOGTM15,
  author       = {Marc G. Bellemare and
                  Georg Ostrovski and
                  Arthur Guez and
                  Philip S. Thomas and
                  R{\'{e}}mi Munos},
  title        = {Increasing the Action Gap: New Operators for Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1512.04860},
  year         = {2015},
  url          = {http://arxiv.org/abs/1512.04860},
  eprinttype    = {arXiv},
  eprint       = {1512.04860},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/BellemareOGTM15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/GuezHSD14,
  author       = {Arthur Guez and
                  Nicolas Heess and
                  David Silver and
                  Peter Dayan},
  editor       = {Zoubin Ghahramani and
                  Max Welling and
                  Corinna Cortes and
                  Neil D. Lawrence and
                  Kilian Q. Weinberger},
  title        = {Bayes-Adaptive Simulation-based Search with Value Function Approximation},
  booktitle    = {Advances in Neural Information Processing Systems 27: Annual Conference
                  on Neural Information Processing Systems 2014, December 8-13 2014,
                  Montreal, Quebec, Canada},
  pages        = {451--459},
  year         = {2014},
  url          = {https://proceedings.neurips.cc/paper/2014/hash/839ab46820b524afda05122893c2fe8e-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/GuezHSD14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/GuezSD14,
  author       = {Arthur Guez and
                  David Silver and
                  Peter Dayan},
  title        = {Better Optimism By Bayes: Adaptive Planning with Rich Models},
  journal      = {CoRR},
  volume       = {abs/1402.1958},
  year         = {2014},
  url          = {http://arxiv.org/abs/1402.1958},
  eprinttype    = {arXiv},
  eprint       = {1402.1958},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/GuezSD14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jair/GuezSD13,
  author       = {Arthur Guez and
                  David Silver and
                  Peter Dayan},
  title        = {Scalable and Efficient Bayes-Adaptive Reinforcement Learning Based
                  on Monte-Carlo Tree Search},
  journal      = {J. Artif. Intell. Res.},
  volume       = {48},
  pages        = {841--883},
  year         = {2013},
  url          = {https://doi.org/10.1613/jair.4117},
  doi          = {10.1613/JAIR.4117},
  timestamp    = {Mon, 21 Jan 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/jair/GuezSD13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/GuezSD12,
  author       = {Arthur Guez and
                  David Silver and
                  Peter Dayan},
  editor       = {Peter L. Bartlett and
                  Fernando C. N. Pereira and
                  Christopher J. C. Burges and
                  L{\'{e}}on Bottou and
                  Kilian Q. Weinberger},
  title        = {Efficient Bayes-Adaptive Reinforcement Learning using Sample-Based
                  Search},
  booktitle    = {Advances in Neural Information Processing Systems 25: 26th Annual
                  Conference on Neural Information Processing Systems 2012. Proceedings
                  of a meeting held December 3-6, 2012, Lake Tahoe, Nevada, United States},
  pages        = {1034--1042},
  year         = {2012},
  url          = {https://proceedings.neurips.cc/paper/2012/hash/35051070e572e47d2c26c241ab88307f-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/GuezSD12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1205-3109,
  author       = {Arthur Guez and
                  David Silver and
                  Peter Dayan},
  title        = {Efficient Bayes-Adaptive Reinforcement Learning using Sample-Based
                  Search},
  journal      = {CoRR},
  volume       = {abs/1205.3109},
  year         = {2012},
  url          = {http://arxiv.org/abs/1205.3109},
  eprinttype    = {arXiv},
  eprint       = {1205.3109},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1205-3109.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icra/GuezP10,
  author       = {Arthur Guez and
                  Joelle Pineau},
  title        = {Multi-tasking {SLAM}},
  booktitle    = {{IEEE} International Conference on Robotics and Automation, {ICRA}
                  2010, Anchorage, Alaska, USA, 3-7 May 2010},
  pages        = {377--384},
  publisher    = {{IEEE}},
  year         = {2010},
  url          = {https://doi.org/10.1109/ROBOT.2010.5509969},
  doi          = {10.1109/ROBOT.2010.5509969},
  timestamp    = {Mon, 22 May 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icra/GuezP10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ijns/PineauGVPA09,
  author       = {Joelle Pineau and
                  Arthur Guez and
                  Robert D. Vincent and
                  Gabriella Panuccio and
                  Massimo Avoli},
  title        = {Treating Epilepsy via Adaptive Neurostimulation: a Reinforcement Learning
                  Approach},
  journal      = {Int. J. Neural Syst.},
  volume       = {19},
  number       = {4},
  pages        = {227--240},
  year         = {2009},
  url          = {https://doi.org/10.1142/S0129065709001987},
  doi          = {10.1142/S0129065709001987},
  timestamp    = {Wed, 14 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ijns/PineauGVPA09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/GuezVAP08,
  author       = {Arthur Guez and
                  Robert D. Vincent and
                  Massimo Avoli and
                  Joelle Pineau},
  editor       = {Dieter Fox and
                  Carla P. Gomes},
  title        = {Adaptive Treatment of Epilepsy via Batch-mode Reinforcement Learning},
  booktitle    = {Proceedings of the Twenty-Third {AAAI} Conference on Artificial Intelligence,
                  {AAAI} 2008, Chicago, Illinois, USA, July 13-17, 2008},
  pages        = {1671--1678},
  publisher    = {{AAAI} Press},
  year         = {2008},
  url          = {http://www.aaai.org/Library/IAAI/2008/iaai08-008.php},
  timestamp    = {Tue, 05 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/GuezVAP08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}