default search action

combined dblp search
author search
venue search
publication search

ask others

BibTeX records: Bilal Piot

Name: dblp XML data dump
Creator: Schloss Dagstuhl - Leibniz Center for Informatics
Published: 1993
License: https://creativecommons.org/publicdomain/zero/1.0/
Keywords: dblp, XML, computer science, scholarly publications, metadata

> Home > Persons > Bilal Piot

download as .bib file

@inproceedings{DBLP:conf/aistats/AzarGPMRVC24,
  author       = {Mohammad Gheshlaghi Azar and
                  Zhaohan Daniel Guo and
                  Bilal Piot and
                  R{\'{e}}mi Munos and
                  Mark Rowland and
                  Michal Valko and
                  Daniele Calandriello},
  editor       = {Sanjoy Dasgupta and
                  Stephan Mandt and
                  Yingzhen Li},
  title        = {A General Theoretical Paradigm to Understand Learning from Human Preferences},
  booktitle    = {International Conference on Artificial Intelligence and Statistics,
                  2-4 May 2024, Palau de Congressos, Valencia, Spain},
  series       = {Proceedings of Machine Learning Research},
  volume       = {238},
  pages        = {4447--4455},
  publisher    = {{PMLR}},
  year         = {2024},
  url          = {https://proceedings.mlr.press/v238/gheshlaghi-azar24a.html},
  timestamp    = {Mon, 13 May 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/AzarGPMRVC24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iclr/SaadeKCBSSGVP24,
  author       = {Alaa Saade and
                  Steven Kapturowski and
                  Daniele Calandriello and
                  Charles Blundell and
                  Pablo Sprechmann and
                  Leopoldo Sarra and
                  Oliver Groth and
                  Michal Valko and
                  Bilal Piot},
  title        = {Unlocking the Power of Representations in Long-term Novelty-based
                  Exploration},
  booktitle    = {The Twelfth International Conference on Learning Representations,
                  {ICLR} 2024, Vienna, Austria, May 7-11, 2024},
  publisher    = {OpenReview.net},
  year         = {2024},
  url          = {https://openreview.net/forum?id=OwtMhMSybu},
  timestamp    = {Mon, 29 Jul 2024 16:19:40 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/SaadeKCBSSGVP24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icml/CalandrielloGMR24,
  author       = {Daniele Calandriello and
                  Zhaohan Daniel Guo and
                  R{\'{e}}mi Munos and
                  Mark Rowland and
                  Yunhao Tang and
                  Bernardo {\'{A}}vila Pires and
                  Pierre Harvey Richemond and
                  Charline Le Lan and
                  Michal Valko and
                  Tianqi Liu and
                  Rishabh Joshi and
                  Zeyu Zheng and
                  Bilal Piot},
  title        = {Human Alignment of Large Language Models through Online Preference
                  Optimisation},
  booktitle    = {Forty-first International Conference on Machine Learning, {ICML} 2024,
                  Vienna, Austria, July 21-27, 2024},
  publisher    = {OpenReview.net},
  year         = {2024},
  url          = {https://openreview.net/forum?id=2RQqg2Y7Y6},
  timestamp    = {Mon, 02 Sep 2024 16:45:29 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/CalandrielloGMR24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icml/MunosVCARGTGMFM24,
  author       = {R{\'{e}}mi Munos and
                  Michal Valko and
                  Daniele Calandriello and
                  Mohammad Gheshlaghi Azar and
                  Mark Rowland and
                  Zhaohan Daniel Guo and
                  Yunhao Tang and
                  Matthieu Geist and
                  Thomas Mesnard and
                  C{\^{o}}me Fiegel and
                  Andrea Michi and
                  Marco Selvi and
                  Sertan Girgin and
                  Nikola Momchev and
                  Olivier Bachem and
                  Daniel J. Mankowitz and
                  Doina Precup and
                  Bilal Piot},
  title        = {Nash Learning from Human Feedback},
  booktitle    = {Forty-first International Conference on Machine Learning, {ICML} 2024,
                  Vienna, Austria, July 21-27, 2024},
  publisher    = {OpenReview.net},
  year         = {2024},
  url          = {https://openreview.net/forum?id=Y5AmNYiyCQ},
  timestamp    = {Mon, 02 Sep 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/MunosVCARGTGMFM24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icml/TangGZCMRRVPP24,
  author       = {Yunhao Tang and
                  Zhaohan Daniel Guo and
                  Zeyu Zheng and
                  Daniele Calandriello and
                  R{\'{e}}mi Munos and
                  Mark Rowland and
                  Pierre Harvey Richemond and
                  Michal Valko and
                  Bernardo {\'{A}}vila Pires and
                  Bilal Piot},
  title        = {Generalized Preference Optimization: {A} Unified Approach to Offline
                  Alignment},
  booktitle    = {Forty-first International Conference on Machine Learning, {ICML} 2024,
                  Vienna, Austria, July 21-27, 2024},
  publisher    = {OpenReview.net},
  year         = {2024},
  url          = {https://openreview.net/forum?id=gu3nacA9AH},
  timestamp    = {Mon, 02 Sep 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/TangGZCMRRVPP24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2402-04792,
  author       = {Shangmin Guo and
                  Biao Zhang and
                  Tianlin Liu and
                  Tianqi Liu and
                  Misha Khalman and
                  Felipe Llinares and
                  Alexandre Ram{\'{e}} and
                  Thomas Mesnard and
                  Yao Zhao and
                  Bilal Piot and
                  Johan Ferret and
                  Mathieu Blondel},
  title        = {Direct Language Model Alignment from Online {AI} Feedback},
  journal      = {CoRR},
  volume       = {abs/2402.04792},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2402.04792},
  doi          = {10.48550/ARXIV.2402.04792},
  eprinttype    = {arXiv},
  eprint       = {2402.04792},
  timestamp    = {Tue, 04 Jun 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2402-04792.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2402-05749,
  author       = {Yunhao Tang and
                  Zhaohan Daniel Guo and
                  Zeyu Zheng and
                  Daniele Calandriello and
                  R{\'{e}}mi Munos and
                  Mark Rowland and
                  Pierre Harvey Richemond and
                  Michal Valko and
                  Bernardo {\'{A}}vila Pires and
                  Bilal Piot},
  title        = {Generalized Preference Optimization: {A} Unified Approach to Offline
                  Alignment},
  journal      = {CoRR},
  volume       = {abs/2402.05749},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2402.05749},
  doi          = {10.48550/ARXIV.2402.05749},
  eprinttype    = {arXiv},
  eprint       = {2402.05749},
  timestamp    = {Wed, 14 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2402-05749.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2403-08635,
  author       = {Daniele Calandriello and
                  Daniel Guo and
                  R{\'{e}}mi Munos and
                  Mark Rowland and
                  Yunhao Tang and
                  Bernardo {\'{A}}vila Pires and
                  Pierre Harvey Richemond and
                  Charline Le Lan and
                  Michal Valko and
                  Tianqi Liu and
                  Rishabh Joshi and
                  Zeyu Zheng and
                  Bilal Piot},
  title        = {Human Alignment of Large Language Models through Online Preference
                  Optimisation},
  journal      = {CoRR},
  volume       = {abs/2403.08635},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2403.08635},
  doi          = {10.48550/ARXIV.2403.08635},
  eprinttype    = {arXiv},
  eprint       = {2403.08635},
  timestamp    = {Fri, 21 Jun 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2403-08635.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2405-14655,
  author       = {Lior Shani and
                  Aviv Rosenberg and
                  Asaf B. Cassel and
                  Oran Lang and
                  Daniele Calandriello and
                  Avital Zipori and
                  Hila Noga and
                  Orgad Keller and
                  Bilal Piot and
                  Idan Szpektor and
                  Avinatan Hassidim and
                  Yossi Matias and
                  R{\'{e}}mi Munos},
  title        = {Multi-turn Reinforcement Learning from Preference Human Feedback},
  journal      = {CoRR},
  volume       = {abs/2405.14655},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2405.14655},
  doi          = {10.48550/ARXIV.2405.14655},
  eprinttype    = {arXiv},
  eprint       = {2405.14655},
  timestamp    = {Mon, 02 Sep 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2405-14655.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2405-19107,
  author       = {Pierre Harvey Richemond and
                  Yunhao Tang and
                  Daniel Guo and
                  Daniele Calandriello and
                  Mohammad Gheshlaghi Azar and
                  Rafael Rafailov and
                  Bernardo {\'{A}}vila Pires and
                  Eugene Tarassov and
                  Lucas Spangher and
                  Will Ellsworth and
                  Aliaksei Severyn and
                  Jonathan Mallinson and
                  Lior Shani and
                  Gil Shamir and
                  Rishabh Joshi and
                  Tianqi Liu and
                  R{\'{e}}mi Munos and
                  Bilal Piot},
  title        = {Offline Regularised Reinforcement Learning for Large Language Models
                  Alignment},
  journal      = {CoRR},
  volume       = {abs/2405.19107},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2405.19107},
  doi          = {10.48550/ARXIV.2405.19107},
  eprinttype    = {arXiv},
  eprint       = {2405.19107},
  timestamp    = {Fri, 21 Jun 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2405-19107.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2408-00118,
  author       = {Morgane Rivi{\`{e}}re and
                  Shreya Pathak and
                  Pier Giuseppe Sessa and
                  Cassidy Hardin and
                  Surya Bhupatiraju and
                  L{\'{e}}onard Hussenot and
                  Thomas Mesnard and
                  Bobak Shahriari and
                  Alexandre Ram{\'{e}} and
                  Johan Ferret and
                  Peter Liu and
                  Pouya Tafti and
                  Abe Friesen and
                  Michelle Casbon and
                  Sabela Ramos and
                  Ravin Kumar and
                  Charline Le Lan and
                  Sammy Jerome and
                  Anton Tsitsulin and
                  Nino Vieillard and
                  Piotr Stanczyk and
                  Sertan Girgin and
                  Nikola Momchev and
                  Matt Hoffman and
                  Shantanu Thakoor and
                  Jean{-}Bastien Grill and
                  Behnam Neyshabur and
                  Olivier Bachem and
                  Alanna Walton and
                  Aliaksei Severyn and
                  Alicia Parrish and
                  Aliya Ahmad and
                  Allen Hutchison and
                  Alvin Abdagic and
                  Amanda Carl and
                  Amy Shen and
                  Andy Brock and
                  Andy Coenen and
                  Anthony Laforge and
                  Antonia Paterson and
                  Ben Bastian and
                  Bilal Piot and
                  Bo Wu and
                  Brandon Royal and
                  Charlie Chen and
                  Chintu Kumar and
                  Chris Perry and
                  Chris Welty and
                  Christopher A. Choquette{-}Choo and
                  Danila Sinopalnikov and
                  David Weinberger and
                  Dimple Vijaykumar and
                  Dominika Rogozinska and
                  Dustin Herbison and
                  Elisa Bandy and
                  Emma Wang and
                  Eric Noland and
                  Erica Moreira and
                  Evan Senter and
                  Evgenii Eltyshev and
                  Francesco Visin and
                  Gabriel Rasskin and
                  Gary Wei and
                  Glenn Cameron and
                  Gus Martins and
                  Hadi Hashemi and
                  Hanna Klimczak{-}Plucinska and
                  Harleen Batra and
                  Harsh Dhand and
                  Ivan Nardini and
                  Jacinda Mein and
                  Jack Zhou and
                  James Svensson and
                  Jeff Stanway and
                  Jetha Chan and
                  Jin Peng Zhou and
                  Joana Carrasqueira and
                  Joana Iljazi and
                  Jocelyn Becker and
                  Joe Fernandez and
                  Joost van Amersfoort and
                  Josh Gordon and
                  Josh Lipschultz and
                  Josh Newlan and
                  Ju{-}yeong Ji and
                  Kareem Mohamed and
                  Kartikeya Badola and
                  Kat Black and
                  Katie Millican and
                  Keelin McDonell and
                  Kelvin Nguyen and
                  Kiranbir Sodhia and
                  Kish Greene and
                  Lars Lowe Sj{\"{o}}sund and
                  Lauren Usui and
                  Laurent Sifre and
                  Lena Heuermann and
                  Leticia Lago and
                  Lilly McNealus},
  title        = {Gemma 2: Improving Open Language Models at a Practical Size},
  journal      = {CoRR},
  volume       = {abs/2408.00118},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2408.00118},
  doi          = {10.48550/ARXIV.2408.00118},
  eprinttype    = {arXiv},
  eprint       = {2408.00118},
  timestamp    = {Fri, 06 Sep 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2408-00118.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icml/RichemondTTSPH23,
  author       = {Pierre Harvey Richemond and
                  Allison C. Tam and
                  Yunhao Tang and
                  Florian Strub and
                  Bilal Piot and
                  Felix Hill},
  editor       = {Andreas Krause and
                  Emma Brunskill and
                  Kyunghyun Cho and
                  Barbara Engelhardt and
                  Sivan Sabato and
                  Jonathan Scarlett},
  title        = {The Edge of Orthogonality: {A} Simple View of What Makes {BYOL} Tick},
  booktitle    = {International Conference on Machine Learning, {ICML} 2023, 23-29 July
                  2023, Honolulu, Hawaii, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {202},
  pages        = {29063--29081},
  publisher    = {{PMLR}},
  year         = {2023},
  url          = {https://proceedings.mlr.press/v202/richemond23a.html},
  timestamp    = {Mon, 28 Aug 2023 17:23:08 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/RichemondTTSPH23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icml/TangGRPCMRALL0T23,
  author       = {Yunhao Tang and
                  Zhaohan Daniel Guo and
                  Pierre Harvey Richemond and
                  Bernardo {\'{A}}vila Pires and
                  Yash Chandak and
                  R{\'{e}}mi Munos and
                  Mark Rowland and
                  Mohammad Gheshlaghi Azar and
                  Charline Le Lan and
                  Clare Lyle and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Shantanu Thakoor and
                  Will Dabney and
                  Bilal Piot and
                  Daniele Calandriello and
                  Michal Valko},
  editor       = {Andreas Krause and
                  Emma Brunskill and
                  Kyunghyun Cho and
                  Barbara Engelhardt and
                  Sivan Sabato and
                  Jonathan Scarlett},
  title        = {Understanding Self-Predictive Learning for Reinforcement Learning},
  booktitle    = {International Conference on Machine Learning, {ICML} 2023, 23-29 July
                  2023, Honolulu, Hawaii, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {202},
  pages        = {33632--33656},
  publisher    = {{PMLR}},
  year         = {2023},
  url          = {https://proceedings.mlr.press/v202/tang23d.html},
  timestamp    = {Mon, 28 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/TangGRPCMRALL0T23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2302-04817,
  author       = {Pierre H. Richemond and
                  Allison C. Tam and
                  Yunhao Tang and
                  Florian Strub and
                  Bilal Piot and
                  Felix Hill},
  title        = {The Edge of Orthogonality: {A} Simple View of What Makes {BYOL} Tick},
  journal      = {CoRR},
  volume       = {abs/2302.04817},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2302.04817},
  doi          = {10.48550/ARXIV.2302.04817},
  eprinttype    = {arXiv},
  eprint       = {2302.04817},
  timestamp    = {Mon, 13 Feb 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2302-04817.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2305-01521,
  author       = {Alaa Saade and
                  Steven Kapturowski and
                  Daniele Calandriello and
                  Charles Blundell and
                  Pablo Sprechmann and
                  Leopoldo Sarra and
                  Oliver Groth and
                  Michal Valko and
                  Bilal Piot},
  title        = {Unlocking the Power of Representations in Long-term Novelty-based
                  Exploration},
  journal      = {CoRR},
  volume       = {abs/2305.01521},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2305.01521},
  doi          = {10.48550/ARXIV.2305.01521},
  eprinttype    = {arXiv},
  eprint       = {2305.01521},
  timestamp    = {Fri, 05 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2305-01521.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2310-12036,
  author       = {Mohammad Gheshlaghi Azar and
                  Mark Rowland and
                  Bilal Piot and
                  Daniel Guo and
                  Daniele Calandriello and
                  Michal Valko and
                  R{\'{e}}mi Munos},
  title        = {A General Theoretical Paradigm to Understand Learning from Human Preferences},
  journal      = {CoRR},
  volume       = {abs/2310.12036},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2310.12036},
  doi          = {10.48550/ARXIV.2310.12036},
  eprinttype    = {arXiv},
  eprint       = {2310.12036},
  timestamp    = {Fri, 27 Oct 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2310-12036.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2312-00886,
  author       = {R{\'{e}}mi Munos and
                  Michal Valko and
                  Daniele Calandriello and
                  Mohammad Gheshlaghi Azar and
                  Mark Rowland and
                  Zhaohan Daniel Guo and
                  Yunhao Tang and
                  Matthieu Geist and
                  Thomas Mesnard and
                  Andrea Michi and
                  Marco Selvi and
                  Sertan Girgin and
                  Nikola Momchev and
                  Olivier Bachem and
                  Daniel J. Mankowitz and
                  Doina Precup and
                  Bilal Piot},
  title        = {Nash Learning from Human Feedback},
  journal      = {CoRR},
  volume       = {abs/2312.00886},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2312.00886},
  doi          = {10.48550/ARXIV.2312.00886},
  eprinttype    = {arXiv},
  eprint       = {2312.00886},
  timestamp    = {Tue, 02 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2312-00886.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iclr/ChaabouniSATTDM22,
  author       = {Rahma Chaabouni and
                  Florian Strub and
                  Florent Altch{\'{e}} and
                  Eugene Tarassov and
                  Corentin Tallec and
                  Elnaz Davoodi and
                  Kory Wallace Mathewson and
                  Olivier Tieleman and
                  Angeliki Lazaridou and
                  Bilal Piot},
  title        = {Emergent Communication at Scale},
  booktitle    = {The Tenth International Conference on Learning Representations, {ICLR}
                  2022, Virtual Event, April 25-29, 2022},
  publisher    = {OpenReview.net},
  year         = {2022},
  url          = {https://openreview.net/forum?id=AUGBfDIV9rL},
  timestamp    = {Thu, 13 Jul 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/ChaabouniSATTDM22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/nips/GuoTPPATSCGTVMA22,
  author       = {Zhaohan Guo and
                  Shantanu Thakoor and
                  Miruna Pislar and
                  Bernardo {\'{A}}vila Pires and
                  Florent Altch{\'{e}} and
                  Corentin Tallec and
                  Alaa Saade and
                  Daniele Calandriello and
                  Jean{-}Bastien Grill and
                  Yunhao Tang and
                  Michal Valko and
                  R{\'{e}}mi Munos and
                  Mohammad Gheshlaghi Azar and
                  Bilal Piot},
  editor       = {Sanmi Koyejo and
                  S. Mohamed and
                  A. Agarwal and
                  Danielle Belgrave and
                  K. Cho and
                  A. Oh},
  title        = {BYOL-Explore: Exploration by Bootstrapped Prediction},
  booktitle    = {Advances in Neural Information Processing Systems 35: Annual Conference
                  on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans,
                  LA, USA, November 28 - December 9, 2022},
  year         = {2022},
  url          = {http://papers.nips.cc/paper\_files/paper/2022/hash/ced0d3b92bb83b15c43ee32c7f57d867-Abstract-Conference.html},
  timestamp    = {Mon, 08 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/GuoTPPATSCGTVMA22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2206-08332,
  author       = {Zhaohan Daniel Guo and
                  Shantanu Thakoor and
                  Miruna Pislar and
                  Bernardo {\'{A}}vila Pires and
                  Florent Altch{\'{e}} and
                  Corentin Tallec and
                  Alaa Saade and
                  Daniele Calandriello and
                  Jean{-}Bastien Grill and
                  Yunhao Tang and
                  Michal Valko and
                  R{\'{e}}mi Munos and
                  Mohammad Gheshlaghi Azar and
                  Bilal Piot},
  title        = {BYOL-Explore: Exploration by Bootstrapped Prediction},
  journal      = {CoRR},
  volume       = {abs/2206.08332},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2206.08332},
  doi          = {10.48550/ARXIV.2206.08332},
  eprinttype    = {arXiv},
  eprint       = {2206.08332},
  timestamp    = {Tue, 21 Jun 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2206-08332.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2206-15378,
  author       = {Julien P{\'{e}}rolat and
                  Bart De Vylder and
                  Daniel Hennes and
                  Eugene Tarassov and
                  Florian Strub and
                  Vincent de Boer and
                  Paul Muller and
                  Jerome T. Connor and
                  Neil Burch and
                  Thomas W. Anthony and
                  Stephen McAleer and
                  Romuald Elie and
                  Sarah H. Cen and
                  Zhe Wang and
                  Audrunas Gruslys and
                  Aleksandra Malysheva and
                  Mina Khan and
                  Sherjil Ozair and
                  Finbarr Timbers and
                  Toby Pohlen and
                  Tom Eccles and
                  Mark Rowland and
                  Marc Lanctot and
                  Jean{-}Baptiste Lespiau and
                  Bilal Piot and
                  Shayegan Omidshafiei and
                  Edward Lockhart and
                  Laurent Sifre and
                  Nathalie Beauguerlange and
                  R{\'{e}}mi Munos and
                  David Silver and
                  Satinder Singh and
                  Demis Hassabis and
                  Karl Tuyls},
  title        = {Mastering the Game of Stratego with Model-Free Multiagent Reinforcement
                  Learning},
  journal      = {CoRR},
  volume       = {abs/2206.15378},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2206.15378},
  doi          = {10.48550/ARXIV.2206.15378},
  eprinttype    = {arXiv},
  eprint       = {2206.15378},
  timestamp    = {Wed, 28 Sep 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2206-15378.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2212-03319,
  author       = {Yunhao Tang and
                  Zhaohan Daniel Guo and
                  Pierre Harvey Richemond and
                  Bernardo {\'{A}}vila Pires and
                  Yash Chandak and
                  R{\'{e}}mi Munos and
                  Mark Rowland and
                  Mohammad Gheshlaghi Azar and
                  Charline Le Lan and
                  Clare Lyle and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Shantanu Thakoor and
                  Will Dabney and
                  Bilal Piot and
                  Daniele Calandriello and
                  Michal Valko},
  title        = {Understanding Self-Predictive Learning for Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2212.03319},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2212.03319},
  doi          = {10.48550/ARXIV.2212.03319},
  eprinttype    = {arXiv},
  eprint       = {2212.03319},
  timestamp    = {Mon, 02 Jan 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2212-03319.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2101-02055,
  author       = {Zhaohan Daniel Guo and
                  Mohammad Gheshlaghi Azar and
                  Alaa Saade and
                  Shantanu Thakoor and
                  Bilal Piot and
                  Bernardo {\'{A}}vila Pires and
                  Michal Valko and
                  Thomas Mesnard and
                  Tor Lattimore and
                  R{\'{e}}mi Munos},
  title        = {Geometric Entropic Exploration},
  journal      = {CoRR},
  volume       = {abs/2101.02055},
  year         = {2021},
  url          = {https://arxiv.org/abs/2101.02055},
  eprinttype    = {arXiv},
  eprint       = {2101.02055},
  timestamp    = {Thu, 21 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2101-02055.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2110-10819,
  author       = {Pedro A. Ortega and
                  Markus Kunesch and
                  Gr{\'{e}}goire Del{\'{e}}tang and
                  Tim Genewein and
                  Jordi Grau{-}Moya and
                  Joel Veness and
                  Jonas Buchli and
                  Jonas Degrave and
                  Bilal Piot and
                  Julien P{\'{e}}rolat and
                  Tom Everitt and
                  Corentin Tallec and
                  Emilio Parisotto and
                  Tom Erez and
                  Yutian Chen and
                  Scott E. Reed and
                  Marcus Hutter and
                  Nando de Freitas and
                  Shane Legg},
  title        = {Shaking the foundations: delusions in sequence models for interaction
                  and control},
  journal      = {CoRR},
  volume       = {abs/2110.10819},
  year         = {2021},
  url          = {https://arxiv.org/abs/2110.10819},
  eprinttype    = {arXiv},
  eprint       = {2110.10819},
  timestamp    = {Fri, 26 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2110-10819.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iclr/BadiaSVGPKTAPBB20,
  author       = {Adri{\`{a}} Puigdom{\`{e}}nech Badia and
                  Pablo Sprechmann and
                  Alex Vitvitskyi and
                  Zhaohan Daniel Guo and
                  Bilal Piot and
                  Steven Kapturowski and
                  Olivier Tieleman and
                  Mart{\'{\i}}n Arjovsky and
                  Alexander Pritzel and
                  Andrew Bolt and
                  Charles Blundell},
  title        = {Never Give Up: Learning Directed Exploration Strategies},
  booktitle    = {8th International Conference on Learning Representations, {ICLR} 2020,
                  Addis Ababa, Ethiopia, April 26-30, 2020},
  publisher    = {OpenReview.net},
  year         = {2020},
  url          = {https://openreview.net/forum?id=Sye57xStvB},
  timestamp    = {Wed, 03 Feb 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/iclr/BadiaSVGPKTAPBB20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icml/BadiaPKSVGB20,
  author       = {Adri{\`{a}} Puigdom{\`{e}}nech Badia and
                  Bilal Piot and
                  Steven Kapturowski and
                  Pablo Sprechmann and
                  Alex Vitvitskyi and
                  Zhaohan Daniel Guo and
                  Charles Blundell},
  title        = {Agent57: Outperforming the Atari Human Benchmark},
  booktitle    = {Proceedings of the 37th International Conference on Machine Learning,
                  {ICML} 2020, 13-18 July 2020, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {119},
  pages        = {507--517},
  publisher    = {{PMLR}},
  year         = {2020},
  url          = {http://proceedings.mlr.press/v119/badia20a.html},
  timestamp    = {Tue, 15 Dec 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/BadiaPKSVGB20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icml/GuoPPGAMA20,
  author       = {Zhaohan Daniel Guo and
                  Bernardo {\'{A}}vila Pires and
                  Bilal Piot and
                  Jean{-}Bastien Grill and
                  Florent Altch{\'{e}} and
                  R{\'{e}}mi Munos and
                  Mohammad Gheshlaghi Azar},
  title        = {Bootstrap Latent-Predictive Representations for Multitask Reinforcement
                  Learning},
  booktitle    = {Proceedings of the 37th International Conference on Machine Learning,
                  {ICML} 2020, 13-18 July 2020, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {119},
  pages        = {3875--3886},
  publisher    = {{PMLR}},
  year         = {2020},
  url          = {http://proceedings.mlr.press/v119/guo20g.html},
  timestamp    = {Tue, 15 Dec 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/GuoPPGAMA20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/nips/GrillSATRBDPGAP20,
  author       = {Jean{-}Bastien Grill and
                  Florian Strub and
                  Florent Altch{\'{e}} and
                  Corentin Tallec and
                  Pierre H. Richemond and
                  Elena Buchatskaya and
                  Carl Doersch and
                  Bernardo {\'{A}}vila Pires and
                  Zhaohan Guo and
                  Mohammad Gheshlaghi Azar and
                  Bilal Piot and
                  Koray Kavukcuoglu and
                  R{\'{e}}mi Munos and
                  Michal Valko},
  editor       = {Hugo Larochelle and
                  Marc'Aurelio Ranzato and
                  Raia Hadsell and
                  Maria{-}Florina Balcan and
                  Hsuan{-}Tien Lin},
  title        = {Bootstrap Your Own Latent - {A} New Approach to Self-Supervised Learning},
  booktitle    = {Advances in Neural Information Processing Systems 33: Annual Conference
                  on Neural Information Processing Systems 2020, NeurIPS 2020, December
                  6-12, 2020, virtual},
  year         = {2020},
  url          = {https://proceedings.neurips.cc/paper/2020/hash/f3ada80d5c4ee70142b17b8192b2958e-Abstract.html},
  timestamp    = {Tue, 19 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/GrillSATRBDPGAP20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2002-06038,
  author       = {Adri{\`{a}} Puigdom{\`{e}}nech Badia and
                  Pablo Sprechmann and
                  Alex Vitvitskyi and
                  Zhaohan Daniel Guo and
                  Bilal Piot and
                  Steven Kapturowski and
                  Olivier Tieleman and
                  Mart{\'{\i}}n Arjovsky and
                  Alexander Pritzel and
                  Andrew Bolt and
                  Charles Blundell},
  title        = {Never Give Up: Learning Directed Exploration Strategies},
  journal      = {CoRR},
  volume       = {abs/2002.06038},
  year         = {2020},
  url          = {https://arxiv.org/abs/2002.06038},
  eprinttype    = {arXiv},
  eprint       = {2002.06038},
  timestamp    = {Wed, 03 Feb 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2002-06038.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2003-13350,
  author       = {Adri{\`{a}} Puigdom{\`{e}}nech Badia and
                  Bilal Piot and
                  Steven Kapturowski and
                  Pablo Sprechmann and
                  Alex Vitvitskyi and
                  Zhaohan Daniel Guo and
                  Charles Blundell},
  title        = {Agent57: Outperforming the Atari Human Benchmark},
  journal      = {CoRR},
  volume       = {abs/2003.13350},
  year         = {2020},
  url          = {https://arxiv.org/abs/2003.13350},
  eprinttype    = {arXiv},
  eprint       = {2003.13350},
  timestamp    = {Wed, 03 Feb 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2003-13350.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2004-14646,
  author       = {Zhaohan Daniel Guo and
                  Bernardo {\'{A}}vila Pires and
                  Bilal Piot and
                  Jean{-}Bastien Grill and
                  Florent Altch{\'{e}} and
                  R{\'{e}}mi Munos and
                  Mohammad Gheshlaghi Azar},
  title        = {Bootstrap Latent-Predictive Representations for Multitask Reinforcement
                  Learning},
  journal      = {CoRR},
  volume       = {abs/2004.14646},
  year         = {2020},
  url          = {https://arxiv.org/abs/2004.14646},
  eprinttype    = {arXiv},
  eprint       = {2004.14646},
  timestamp    = {Wed, 03 Feb 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2004-14646.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2006-00979,
  author       = {Matt Hoffman and
                  Bobak Shahriari and
                  John Aslanides and
                  Gabriel Barth{-}Maron and
                  Feryal M. P. Behbahani and
                  Tamara Norman and
                  Abbas Abdolmaleki and
                  Albin Cassirer and
                  Fan Yang and
                  Kate Baumli and
                  Sarah Henderson and
                  Alexander Novikov and
                  Sergio G{\'{o}}mez Colmenarejo and
                  Serkan Cabi and
                  {\c{C}}aglar G{\"{u}}l{\c{c}}ehre and
                  Tom Le Paine and
                  Andrew Cowie and
                  Ziyu Wang and
                  Bilal Piot and
                  Nando de Freitas},
  title        = {Acme: {A} Research Framework for Distributed Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2006.00979},
  year         = {2020},
  url          = {https://arxiv.org/abs/2006.00979},
  eprinttype    = {arXiv},
  eprint       = {2006.00979},
  timestamp    = {Mon, 02 Jan 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2006-00979.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2006-07733,
  author       = {Jean{-}Bastien Grill and
                  Florian Strub and
                  Florent Altch{\'{e}} and
                  Corentin Tallec and
                  Pierre H. Richemond and
                  Elena Buchatskaya and
                  Carl Doersch and
                  Bernardo {\'{A}}vila Pires and
                  Zhaohan Daniel Guo and
                  Mohammad Gheshlaghi Azar and
                  Bilal Piot and
                  Koray Kavukcuoglu and
                  R{\'{e}}mi Munos and
                  Michal Valko},
  title        = {Bootstrap Your Own Latent: {A} New Approach to Self-Supervised Learning},
  journal      = {CoRR},
  volume       = {abs/2006.07733},
  year         = {2020},
  url          = {https://arxiv.org/abs/2006.07733},
  eprinttype    = {arXiv},
  eprint       = {2006.07733},
  timestamp    = {Wed, 17 Jun 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2006-07733.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2010-10241,
  author       = {Pierre H. Richemond and
                  Jean{-}Bastien Grill and
                  Florent Altch{\'{e}} and
                  Corentin Tallec and
                  Florian Strub and
                  Andrew Brock and
                  Samuel L. Smith and
                  Soham De and
                  Razvan Pascanu and
                  Bilal Piot and
                  Michal Valko},
  title        = {{BYOL} works even without batch statistics},
  journal      = {CoRR},
  volume       = {abs/2010.10241},
  year         = {2020},
  url          = {https://arxiv.org/abs/2010.10241},
  eprinttype    = {arXiv},
  eprint       = {2010.10241},
  timestamp    = {Tue, 27 Oct 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2010-10241.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/atal/BorsaHPLHMP19,
  author       = {Diana Borsa and
                  Nicolas Heess and
                  Bilal Piot and
                  Siqi Liu and
                  Leonard Hasenclever and
                  R{\'{e}}mi Munos and
                  Olivier Pietquin},
  editor       = {Edith Elkind and
                  Manuela Veloso and
                  Noa Agmon and
                  Matthew E. Taylor},
  title        = {Observational Learning by Reinforcement Learning},
  booktitle    = {Proceedings of the 18th International Conference on Autonomous Agents
                  and MultiAgent Systems, {AAMAS} '19, Montreal, QC, Canada, May 13-17,
                  2019},
  pages        = {1117--1124},
  publisher    = {International Foundation for Autonomous Agents and Multiagent Systems},
  year         = {2019},
  url          = {http://dl.acm.org/citation.cfm?id=3331811},
  timestamp    = {Thu, 11 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/atal/BorsaHPLHMP19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/nips/HarutyunyanDMAP19,
  author       = {Anna Harutyunyan and
                  Will Dabney and
                  Thomas Mesnard and
                  Mohammad Gheshlaghi Azar and
                  Bilal Piot and
                  Nicolas Heess and
                  Hado van Hasselt and
                  Gregory Wayne and
                  Satinder Singh and
                  Doina Precup and
                  R{\'{e}}mi Munos},
  editor       = {Hanna M. Wallach and
                  Hugo Larochelle and
                  Alina Beygelzimer and
                  Florence d'Alch{\'{e}}{-}Buc and
                  Emily B. Fox and
                  Roman Garnett},
  title        = {Hindsight Credit Assignment},
  booktitle    = {Advances in Neural Information Processing Systems 32: Annual Conference
                  on Neural Information Processing Systems 2019, NeurIPS 2019, December
                  8-14, 2019, Vancouver, BC, Canada},
  pages        = {12467--12476},
  year         = {2019},
  url          = {https://proceedings.neurips.cc/paper/2019/hash/195f15384c2a79cedf293e4a847ce85c-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/HarutyunyanDMAP19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1902-07685,
  author       = {Mohammad Gheshlaghi Azar and
                  Bilal Piot and
                  Bernardo A. Pires and
                  Jean{-}Bastien Grill and
                  Florent Altch{\'{e}} and
                  R{\'{e}}mi Munos},
  title        = {World Discovery Models},
  journal      = {CoRR},
  volume       = {abs/1902.07685},
  year         = {2019},
  url          = {http://arxiv.org/abs/1902.07685},
  eprinttype    = {arXiv},
  eprint       = {1902.07685},
  timestamp    = {Tue, 21 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1902-07685.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1912-02503,
  author       = {Anna Harutyunyan and
                  Will Dabney and
                  Thomas Mesnard and
                  Mohammad Gheshlaghi Azar and
                  Bilal Piot and
                  Nicolas Heess and
                  Hado van Hasselt and
                  Greg Wayne and
                  Satinder Singh and
                  Doina Precup and
                  R{\'{e}}mi Munos},
  title        = {Hindsight Credit Assignment},
  journal      = {CoRR},
  volume       = {abs/1912.02503},
  year         = {2019},
  url          = {http://arxiv.org/abs/1912.02503},
  eprinttype    = {arXiv},
  eprint       = {1912.02503},
  timestamp    = {Wed, 20 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1912-02503.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/aaai/HesselMHSODHPAS18,
  author       = {Matteo Hessel and
                  Joseph Modayil and
                  Hado van Hasselt and
                  Tom Schaul and
                  Georg Ostrovski and
                  Will Dabney and
                  Dan Horgan and
                  Bilal Piot and
                  Mohammad Gheshlaghi Azar and
                  David Silver},
  editor       = {Sheila A. McIlraith and
                  Kilian Q. Weinberger},
  title        = {Rainbow: Combining Improvements in Deep Reinforcement Learning},
  booktitle    = {Proceedings of the Thirty-Second {AAAI} Conference on Artificial Intelligence,
                  (AAAI-18), the 30th innovative Applications of Artificial Intelligence
                  (IAAI-18), and the 8th {AAAI} Symposium on Educational Advances in
                  Artificial Intelligence (EAAI-18), New Orleans, Louisiana, USA, February
                  2-7, 2018},
  pages        = {3215--3222},
  publisher    = {{AAAI} Press},
  year         = {2018},
  url          = {https://doi.org/10.1609/aaai.v32i1.11796},
  doi          = {10.1609/AAAI.V32I1.11796},
  timestamp    = {Mon, 04 Sep 2023 12:29:24 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/HesselMHSODHPAS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/aaai/HesterVPLSPHQSO18,
  author       = {Todd Hester and
                  Matej Vecer{\'{\i}}k and
                  Olivier Pietquin and
                  Marc Lanctot and
                  Tom Schaul and
                  Bilal Piot and
                  Dan Horgan and
                  John Quan and
                  Andrew Sendonaris and
                  Ian Osband and
                  Gabriel Dulac{-}Arnold and
                  John P. Agapiou and
                  Joel Z. Leibo and
                  Audrunas Gruslys},
  editor       = {Sheila A. McIlraith and
                  Kilian Q. Weinberger},
  title        = {Deep Q-learning From Demonstrations},
  booktitle    = {Proceedings of the Thirty-Second {AAAI} Conference on Artificial Intelligence,
                  (AAAI-18), the 30th innovative Applications of Artificial Intelligence
                  (IAAI-18), and the 8th {AAAI} Symposium on Educational Advances in
                  Artificial Intelligence (EAAI-18), New Orleans, Louisiana, USA, February
                  2-7, 2018},
  pages        = {3223--3230},
  publisher    = {{AAAI} Press},
  year         = {2018},
  url          = {https://doi.org/10.1609/aaai.v32i1.11757},
  doi          = {10.1609/AAAI.V32I1.11757},
  timestamp    = {Mon, 04 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/HesterVPLSPHQSO18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/aistats/PerolatPP18,
  author       = {Julien P{\'{e}}rolat and
                  Bilal Piot and
                  Olivier Pietquin},
  editor       = {Amos J. Storkey and
                  Fernando P{\'{e}}rez{-}Cruz},
  title        = {Actor-Critic Fictitious Play in Simultaneous Move Multistage Games},
  booktitle    = {International Conference on Artificial Intelligence and Statistics,
                  {AISTATS} 2018, 9-11 April 2018, Playa Blanca, Lanzarote, Canary Islands,
                  Spain},
  series       = {Proceedings of Machine Learning Research},
  volume       = {84},
  pages        = {919--928},
  publisher    = {{PMLR}},
  year         = {2018},
  url          = {http://proceedings.mlr.press/v84/perolat18a.html},
  timestamp    = {Wed, 03 Apr 2019 18:17:22 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/PerolatPP18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iclr/FortunatoAPMHOG18,
  author       = {Meire Fortunato and
                  Mohammad Gheshlaghi Azar and
                  Bilal Piot and
                  Jacob Menick and
                  Matteo Hessel and
                  Ian Osband and
                  Alex Graves and
                  Volodymyr Mnih and
                  R{\'{e}}mi Munos and
                  Demis Hassabis and
                  Olivier Pietquin and
                  Charles Blundell and
                  Shane Legg},
  title        = {Noisy Networks For Exploration},
  booktitle    = {6th International Conference on Learning Representations, {ICLR} 2018,
                  Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings},
  publisher    = {OpenReview.net},
  year         = {2018},
  url          = {https://openreview.net/forum?id=rywHCPkAW},
  timestamp    = {Thu, 25 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/FortunatoAPMHOG18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iclr/GruslysDAPBM18,
  author       = {Audrunas Gruslys and
                  Will Dabney and
                  Mohammad Gheshlaghi Azar and
                  Bilal Piot and
                  Marc G. Bellemare and
                  R{\'{e}}mi Munos},
  title        = {The Reactor: {A} fast and sample-efficient Actor-Critic agent for
                  Reinforcement Learning},
  booktitle    = {6th International Conference on Learning Representations, {ICLR} 2018,
                  Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings},
  publisher    = {OpenReview.net},
  year         = {2018},
  url          = {https://openreview.net/forum?id=rkHVZWZAZ},
  timestamp    = {Thu, 25 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/GruslysDAPBM18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1805-11593,
  author       = {Tobias Pohlen and
                  Bilal Piot and
                  Todd Hester and
                  Mohammad Gheshlaghi Azar and
                  Dan Horgan and
                  David Budden and
                  Gabriel Barth{-}Maron and
                  Hado van Hasselt and
                  John Quan and
                  Mel Vecer{\'{\i}}k and
                  Matteo Hessel and
                  R{\'{e}}mi Munos and
                  Olivier Pietquin},
  title        = {Observe and Look Further: Achieving Consistent Performance on Atari},
  journal      = {CoRR},
  volume       = {abs/1805.11593},
  year         = {2018},
  url          = {http://arxiv.org/abs/1805.11593},
  eprinttype    = {arXiv},
  eprint       = {1805.11593},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1805-11593.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1809-07802,
  author       = {Julien P{\'{e}}rolat and
                  Mateusz Malinowski and
                  Bilal Piot and
                  Olivier Pietquin},
  title        = {Playing the Game of Universal Adversarial Perturbations},
  journal      = {CoRR},
  volume       = {abs/1809.07802},
  year         = {2018},
  url          = {http://arxiv.org/abs/1809.07802},
  eprinttype    = {arXiv},
  eprint       = {1809.07802},
  timestamp    = {Fri, 05 Oct 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1809-07802.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1811-06407,
  author       = {Zhaohan Daniel Guo and
                  Mohammad Gheshlaghi Azar and
                  Bilal Piot and
                  Bernardo A. Pires and
                  Toby Pohlen and
                  R{\'{e}}mi Munos},
  title        = {Neural Predictive Belief Representations},
  journal      = {CoRR},
  volume       = {abs/1811.06407},
  year         = {2018},
  url          = {http://arxiv.org/abs/1811.06407},
  eprinttype    = {arXiv},
  eprint       = {1811.06407},
  timestamp    = {Sun, 25 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1811-06407.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/tnn/PiotGP17,
  author       = {Bilal Piot and
                  Matthieu Geist and
                  Olivier Pietquin},
  title        = {Bridging the Gap Between Imitation Learning and Inverse Reinforcement
                  Learning},
  journal      = {{IEEE} Trans. Neural Networks Learn. Syst.},
  volume       = {28},
  number       = {8},
  pages        = {1814--1826},
  year         = {2017},
  url          = {https://doi.org/10.1109/TNNLS.2016.2543000},
  doi          = {10.1109/TNNLS.2016.2543000},
  timestamp    = {Mon, 09 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/tnn/PiotGP17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/aistats/PerolatSPP17,
  author       = {Julien P{\'{e}}rolat and
                  Florian Strub and
                  Bilal Piot and
                  Olivier Pietquin},
  editor       = {Aarti Singh and
                  Xiaojin (Jerry) Zhu},
  title        = {Learning Nash Equilibrium for General-Sum Markov Games from Batch
                  Data},
  booktitle    = {Proceedings of the 20th International Conference on Artificial Intelligence
                  and Statistics, {AISTATS} 2017, 20-22 April 2017, Fort Lauderdale,
                  FL, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {54},
  pages        = {232--241},
  publisher    = {{PMLR}},
  year         = {2017},
  url          = {http://proceedings.mlr.press/v54/perolat17a.html},
  timestamp    = {Wed, 29 May 2019 08:41:44 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/PerolatSPP17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/ijcai/StrubVMPCP17,
  author       = {Florian Strub and
                  Harm de Vries and
                  J{\'{e}}r{\'{e}}mie Mary and
                  Bilal Piot and
                  Aaron C. Courville and
                  Olivier Pietquin},
  editor       = {Carles Sierra},
  title        = {End-to-end optimization of goal-driven and visually grounded dialogue
                  systems},
  booktitle    = {Proceedings of the Twenty-Sixth International Joint Conference on
                  Artificial Intelligence, {IJCAI} 2017, Melbourne, Australia, August
                  19-25, 2017},
  pages        = {2765--2771},
  publisher    = {ijcai.org},
  year         = {2017},
  url          = {https://doi.org/10.24963/ijcai.2017/385},
  doi          = {10.24963/IJCAI.2017/385},
  timestamp    = {Tue, 20 Aug 2019 16:16:54 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/StrubVMPCP17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/nips/GeistPP17,
  author       = {Matthieu Geist and
                  Bilal Piot and
                  Olivier Pietquin},
  editor       = {Isabelle Guyon and
                  Ulrike von Luxburg and
                  Samy Bengio and
                  Hanna M. Wallach and
                  Rob Fergus and
                  S. V. N. Vishwanathan and
                  Roman Garnett},
  title        = {Is the Bellman residual a bad proxy?},
  booktitle    = {Advances in Neural Information Processing Systems 30: Annual Conference
                  on Neural Information Processing Systems 2017, December 4-9, 2017,
                  Long Beach, CA, {USA}},
  pages        = {3205--3214},
  year         = {2017},
  url          = {https://proceedings.neurips.cc/paper/2017/hash/e0ab531ec312161511493b002f9be2ee-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 13:58:27 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/GeistPP17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/StrubVMPCP17,
  author       = {Florian Strub and
                  Harm de Vries and
                  J{\'{e}}r{\'{e}}mie Mary and
                  Bilal Piot and
                  Aaron C. Courville and
                  Olivier Pietquin},
  title        = {End-to-end optimization of goal-driven and visually grounded dialogue
                  systems},
  journal      = {CoRR},
  volume       = {abs/1703.05423},
  year         = {2017},
  url          = {http://arxiv.org/abs/1703.05423},
  eprinttype    = {arXiv},
  eprint       = {1703.05423},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/StrubVMPCP17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/HesterVPLSPSDOA17,
  author       = {Todd Hester and
                  Matej Vecer{\'{\i}}k and
                  Olivier Pietquin and
                  Marc Lanctot and
                  Tom Schaul and
                  Bilal Piot and
                  Andrew Sendonaris and
                  Gabriel Dulac{-}Arnold and
                  Ian Osband and
                  John P. Agapiou and
                  Joel Z. Leibo and
                  Audrunas Gruslys},
  title        = {Learning from Demonstrations for Real World Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1704.03732},
  year         = {2017},
  url          = {http://arxiv.org/abs/1704.03732},
  eprinttype    = {arXiv},
  eprint       = {1704.03732},
  timestamp    = {Tue, 28 Jul 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/HesterVPLSPSDOA17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/BorsaPMP17,
  author       = {Diana Borsa and
                  Bilal Piot and
                  R{\'{e}}mi Munos and
                  Olivier Pietquin},
  title        = {Observational Learning by Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1706.06617},
  year         = {2017},
  url          = {http://arxiv.org/abs/1706.06617},
  eprinttype    = {arXiv},
  eprint       = {1706.06617},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/BorsaPMP17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/FortunatoAPMOGM17,
  author       = {Meire Fortunato and
                  Mohammad Gheshlaghi Azar and
                  Bilal Piot and
                  Jacob Menick and
                  Ian Osband and
                  Alex Graves and
                  Vlad Mnih and
                  R{\'{e}}mi Munos and
                  Demis Hassabis and
                  Olivier Pietquin and
                  Charles Blundell and
                  Shane Legg},
  title        = {Noisy Networks for Exploration},
  journal      = {CoRR},
  volume       = {abs/1706.10295},
  year         = {2017},
  url          = {http://arxiv.org/abs/1706.10295},
  eprinttype    = {arXiv},
  eprint       = {1706.10295},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/FortunatoAPMOGM17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/VecerikHSWPPHRL17,
  author       = {Matej Vecer{\'{\i}}k and
                  Todd Hester and
                  Jonathan Scholz and
                  Fumin Wang and
                  Olivier Pietquin and
                  Bilal Piot and
                  Nicolas Heess and
                  Thomas Roth{\"{o}}rl and
                  Thomas Lampe and
                  Martin A. Riedmiller},
  title        = {Leveraging Demonstrations for Deep Reinforcement Learning on Robotics
                  Problems with Sparse Rewards},
  journal      = {CoRR},
  volume       = {abs/1707.08817},
  year         = {2017},
  url          = {http://arxiv.org/abs/1707.08817},
  eprinttype    = {arXiv},
  eprint       = {1707.08817},
  timestamp    = {Wed, 24 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/VecerikHSWPPHRL17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1710-02298,
  author       = {Matteo Hessel and
                  Joseph Modayil and
                  Hado van Hasselt and
                  Tom Schaul and
                  Georg Ostrovski and
                  Will Dabney and
                  Daniel Horgan and
                  Bilal Piot and
                  Mohammad Gheshlaghi Azar and
                  David Silver},
  title        = {Rainbow: Combining Improvements in Deep Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1710.02298},
  year         = {2017},
  url          = {http://arxiv.org/abs/1710.02298},
  eprinttype    = {arXiv},
  eprint       = {1710.02298},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1710-02298.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/aistats/PerolatPSP16,
  author       = {Julien P{\'{e}}rolat and
                  Bilal Piot and
                  Bruno Scherrer and
                  Olivier Pietquin},
  editor       = {Arthur Gretton and
                  Christian C. Robert},
  title        = {On the Use of Non-Stationary Strategies for Solving Two-Player Zero-Sum
                  Markov Games},
  booktitle    = {Proceedings of the 19th International Conference on Artificial Intelligence
                  and Statistics, {AISTATS} 2016, Cadiz, Spain, May 9-11, 2016},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {51},
  pages        = {893--901},
  publisher    = {JMLR.org},
  year         = {2016},
  url          = {http://proceedings.mlr.press/v51/perolat16.html},
  timestamp    = {Wed, 29 May 2019 08:41:44 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/PerolatPSP16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/atal/AsriPGLP16,
  author       = {Layla El Asri and
                  Bilal Piot and
                  Matthieu Geist and
                  Romain Laroche and
                  Olivier Pietquin},
  editor       = {Catholijn M. Jonker and
                  Stacy Marsella and
                  John Thangarajah and
                  Karl Tuyls},
  title        = {Score-based Inverse Reinforcement Learning},
  booktitle    = {Proceedings of the 2016 International Conference on Autonomous Agents
                  {\&} Multiagent Systems, Singapore, May 9-13, 2016},
  pages        = {457--465},
  publisher    = {{ACM}},
  year         = {2016},
  url          = {http://dl.acm.org/citation.cfm?id=2936991},
  timestamp    = {Fri, 20 May 2016 20:33:29 +0200},
  biburl       = {https://dblp.org/rec/conf/atal/AsriPGLP16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icml/PerolatPGSP16,
  author       = {Julien P{\'{e}}rolat and
                  Bilal Piot and
                  Matthieu Geist and
                  Bruno Scherrer and
                  Olivier Pietquin},
  editor       = {Maria{-}Florina Balcan and
                  Kilian Q. Weinberger},
  title        = {Softened Approximate Policy Iteration for Markov Games},
  booktitle    = {Proceedings of the 33nd International Conference on Machine Learning,
                  {ICML} 2016, New York City, NY, USA, June 19-24, 2016},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {48},
  pages        = {1860--1868},
  publisher    = {JMLR.org},
  year         = {2016},
  url          = {http://proceedings.mlr.press/v48/perolat16.html},
  timestamp    = {Wed, 29 May 2019 08:41:46 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/PerolatPGSP16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/PiotGP16,
  author       = {Bilal Piot and
                  Matthieu Geist and
                  Olivier Pietquin},
  title        = {Difference of Convex Functions Programming Applied to Control with
                  Expert Data},
  journal      = {CoRR},
  volume       = {abs/1606.01128},
  year         = {2016},
  url          = {http://arxiv.org/abs/1606.01128},
  eprinttype    = {arXiv},
  eprint       = {1606.01128},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/PiotGP16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/GeistPP16,
  author       = {Matthieu Geist and
                  Bilal Piot and
                  Olivier Pietquin},
  title        = {Should one minimize the expected Bellman residual or maximize the
                  mean value?},
  journal      = {CoRR},
  volume       = {abs/1606.07636},
  year         = {2016},
  url          = {http://arxiv.org/abs/1606.07636},
  eprinttype    = {arXiv},
  eprint       = {1606.07636},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/GeistPP16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/PerolatSPP16,
  author       = {Julien P{\'{e}}rolat and
                  Florian Strub and
                  Bilal Piot and
                  Olivier Pietquin},
  title        = {Learning Nash Equilibrium for General-Sum Markov Games from Batch
                  Data},
  journal      = {CoRR},
  volume       = {abs/1606.08718},
  year         = {2016},
  url          = {http://arxiv.org/abs/1606.08718},
  eprinttype    = {arXiv},
  eprint       = {1606.08718},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/PerolatSPP16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icml/PiotPG15,
  author       = {Bilal Piot and
                  Olivier Pietquin and
                  Matthieu Geist},
  title        = {Imitation Learning Applied to Embodied Conversational Agents},
  booktitle    = {Proceedings of the 4th Workshop on Machine Learning for Interactive
                  Systems, {MLIS} 2015, co-located with the 32nd International Conference
                  on Machine Learning {(ICML} 2015), Lille, France, July 11th, 2015},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {43},
  pages        = {1--5},
  publisher    = {JMLR.org},
  year         = {2015},
  url          = {http://proceedings.mlr.press/v43/piot15.html},
  timestamp    = {Wed, 29 May 2019 08:41:45 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/PiotPG15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icml/PerolatSPP15,
  author       = {Julien P{\'{e}}rolat and
                  Bruno Scherrer and
                  Bilal Piot and
                  Olivier Pietquin},
  editor       = {Francis R. Bach and
                  David M. Blei},
  title        = {Approximate Dynamic Programming for Two-Player Zero-Sum Markov Games},
  booktitle    = {Proceedings of the 32nd International Conference on Machine Learning,
                  {ICML} 2015, Lille, France, 6-11 July 2015},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {37},
  pages        = {1321--1329},
  publisher    = {JMLR.org},
  year         = {2015},
  url          = {http://proceedings.mlr.press/v37/perolat15.html},
  timestamp    = {Wed, 29 May 2019 08:41:45 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/PerolatSPP15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/ijcai/MunzerPGPL15,
  author       = {Thibaut Munzer and
                  Bilal Piot and
                  Matthieu Geist and
                  Olivier Pietquin and
                  Manuel Lopes},
  editor       = {Qiang Yang and
                  Michael J. Wooldridge},
  title        = {Inverse Reinforcement Learning in Relational Domains},
  booktitle    = {Proceedings of the Twenty-Fourth International Joint Conference on
                  Artificial Intelligence, {IJCAI} 2015, Buenos Aires, Argentina, July
                  25-31, 2015},
  pages        = {3735--3741},
  publisher    = {{AAAI} Press},
  year         = {2015},
  url          = {http://ijcai.org/Abstract/15/525},
  timestamp    = {Tue, 20 Aug 2019 16:16:43 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/MunzerPGPL15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/atal/PiotGP14,
  author       = {Bilal Piot and
                  Matthieu Geist and
                  Olivier Pietquin},
  editor       = {Ana L. C. Bazzan and
                  Michael N. Huhns and
                  Alessio Lomuscio and
                  Paul Scerri},
  title        = {Boosted and reward-regularized classification for apprenticeship learning},
  booktitle    = {International conference on Autonomous Agents and Multi-Agent Systems,
                  {AAMAS} '14, Paris, France, May 5-9, 2014},
  pages        = {1249--1256},
  publisher    = {{IFAAMAS/ACM}},
  year         = {2014},
  url          = {http://dl.acm.org/citation.cfm?id=2617447},
  timestamp    = {Thu, 25 Sep 2014 07:46:15 +0200},
  biburl       = {https://dblp.org/rec/conf/atal/PiotGP14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/PiotPG14,
  author       = {Bilal Piot and
                  Olivier Pietquin and
                  Matthieu Geist},
  editor       = {Haizhou Li and
                  Helen M. Meng and
                  Bin Ma and
                  Engsiong Chng and
                  Lei Xie},
  title        = {Predicting when to laugh with structured classification},
  booktitle    = {15th Annual Conference of the International Speech Communication Association,
                  {INTERSPEECH} 2014, Singapore, September 14-18, 2014},
  pages        = {1786--1790},
  publisher    = {{ISCA}},
  year         = {2014},
  url          = {https://doi.org/10.21437/Interspeech.2014-407},
  doi          = {10.21437/INTERSPEECH.2014-407},
  timestamp    = {Tue, 11 Jun 2024 16:45:43 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/PiotPG14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/nips/PiotGP14,
  author       = {Bilal Piot and
                  Matthieu Geist and
                  Olivier Pietquin},
  editor       = {Zoubin Ghahramani and
                  Max Welling and
                  Corinna Cortes and
                  Neil D. Lawrence and
                  Kilian Q. Weinberger},
  title        = {Difference of Convex Functions Programming for Reinforcement Learning},
  booktitle    = {Advances in Neural Information Processing Systems 27: Annual Conference
                  on Neural Information Processing Systems 2014, December 8-13 2014,
                  Montreal, Quebec, Canada},
  pages        = {2519--2527},
  year         = {2014},
  url          = {https://proceedings.neurips.cc/paper/2014/hash/50905d7b2216bfeccb5b41016357176b-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/PiotGP14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/pkdd/PiotGP14,
  author       = {Bilal Piot and
                  Matthieu Geist and
                  Olivier Pietquin},
  editor       = {Toon Calders and
                  Floriana Esposito and
                  Eyke H{\"{u}}llermeier and
                  Rosa Meo},
  title        = {Boosted Bellman Residual Minimization Handling Expert Demonstrations},
  booktitle    = {Machine Learning and Knowledge Discovery in Databases - European Conference,
                  {ECML} {PKDD} 2014, Nancy, France, September 15-19, 2014. Proceedings,
                  Part {II}},
  series       = {Lecture Notes in Computer Science},
  volume       = {8725},
  pages        = {549--564},
  publisher    = {Springer},
  year         = {2014},
  url          = {https://doi.org/10.1007/978-3-662-44851-9\_35},
  doi          = {10.1007/978-3-662-44851-9\_35},
  timestamp    = {Thu, 31 Oct 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/pkdd/PiotGP14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/ria/KleinPGP13,
  author       = {Edouard Klein and
                  Bilal Piot and
                  Matthieu Geist and
                  Olivier Pietquin},
  title        = {Classification structur{\'{e}}e pour l'apprentissage par renforcement
                  inverse},
  journal      = {Rev. d'Intelligence Artif.},
  volume       = {27},
  number       = {2},
  pages        = {155--169},
  year         = {2013},
  url          = {https://doi.org/10.3166/ria.27.155-169},
  doi          = {10.3166/RIA.27.155-169},
  timestamp    = {Wed, 16 Mar 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ria/KleinPGP13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/atal/NiewiadomskiHUPWPCPBDGLMPR13,
  author       = {Radoslaw Niewiadomski and
                  Jennifer Hofmann and
                  J{\'{e}}r{\^{o}}me Urbain and
                  Tracey Platt and
                  Johannes Wagner and
                  Bilal Piot and
                  H{\"{u}}seyin {\c{C}}akmak and
                  Sathish Pammi and
                  Tobias Baur and
                  St{\'{e}}phane Dupont and
                  Matthieu Geist and
                  Florian Lingenfelser and
                  Gary McKeown and
                  Olivier Pietquin and
                  Willibald Ruch},
  editor       = {Maria L. Gini and
                  Onn Shehory and
                  Takayuki Ito and
                  Catholijn M. Jonker},
  title        = {Laugh-aware virtual agent and its impact on user amusement},
  booktitle    = {International conference on Autonomous Agents and Multi-Agent Systems,
                  {AAMAS} '13, Saint Paul, MN, USA, May 6-10, 2013},
  pages        = {619--626},
  publisher    = {{IFAAMAS}},
  year         = {2013},
  url          = {http://dl.acm.org/citation.cfm?id=2485018},
  timestamp    = {Thu, 09 Sep 2021 16:09:54 +0200},
  biburl       = {https://dblp.org/rec/conf/atal/NiewiadomskiHUPWPCPBDGLMPR13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/ifip5-5/ManciniABBBDDDGLNPPPUVW13,
  author       = {Maurizio Mancini and
                  Laurent Ach and
                  Emeline Bantegnie and
                  Tobias Baur and
                  Nadia Berthouze and
                  Debajyoti Datta and
                  Yu Ding and
                  St{\'{e}}phane Dupont and
                  Harry J. Griffin and
                  Florian Lingenfelser and
                  Radoslaw Niewiadomski and
                  Catherine Pelachaud and
                  Olivier Pietquin and
                  Bilal Piot and
                  J{\'{e}}r{\^{o}}me Urbain and
                  Gualtiero Volpe and
                  Johannes Wagner},
  editor       = {Yves Rybarczyk and
                  Tiago Cardoso and
                  Jo{\~{a}}o Rosas and
                  Luis M. Camarinha{-}Matos},
  title        = {Laugh When You're Winning},
  booktitle    = {Innovative and Creative Developments in Multimodal Interaction Systems
                  - 9th {IFIP} {WG} 5.5 International Summer Workshop on Multimodal
                  Interfaces, eNTERFACE 2013, Lisbon, Portugal, July 15 - August 9,
                  2013. Proceedings},
  series       = {{IFIP} Advances in Information and Communication Technology},
  volume       = {425},
  pages        = {50--79},
  publisher    = {Springer},
  year         = {2013},
  url          = {https://doi.org/10.1007/978-3-642-55143-7\_3},
  doi          = {10.1007/978-3-642-55143-7\_3},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ifip5-5/ManciniABBBDDDGLNPPPUVW13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/pkdd/KleinPGP13,
  author       = {Edouard Klein and
                  Bilal Piot and
                  Matthieu Geist and
                  Olivier Pietquin},
  editor       = {Hendrik Blockeel and
                  Kristian Kersting and
                  Siegfried Nijssen and
                  Filip Zelezn{\'{y}}},
  title        = {A Cascaded Supervised Learning Approach to Inverse Reinforcement Learning},
  booktitle    = {Machine Learning and Knowledge Discovery in Databases - European Conference,
                  {ECML} {PKDD} 2013, Prague, Czech Republic, September 23-27, 2013,
                  Proceedings, Part {I}},
  series       = {Lecture Notes in Computer Science},
  volume       = {8188},
  pages        = {1--16},
  publisher    = {Springer},
  year         = {2013},
  url          = {https://doi.org/10.1007/978-3-642-40988-2\_1},
  doi          = {10.1007/978-3-642-40988-2\_1},
  timestamp    = {Tue, 21 Mar 2023 21:00:11 +0100},
  biburl       = {https://dblp.org/rec/conf/pkdd/KleinPGP13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/pkdd/PiotGP13,
  author       = {Bilal Piot and
                  Matthieu Geist and
                  Olivier Pietquin},
  editor       = {Hendrik Blockeel and
                  Kristian Kersting and
                  Siegfried Nijssen and
                  Filip Zelezn{\'{y}}},
  title        = {Learning from Demonstrations: Is It Worth Estimating a Reward Function?},
  booktitle    = {Machine Learning and Knowledge Discovery in Databases - European Conference,
                  {ECML} {PKDD} 2013, Prague, Czech Republic, September 23-27, 2013,
                  Proceedings, Part {I}},
  series       = {Lecture Notes in Computer Science},
  volume       = {8188},
  pages        = {17--32},
  publisher    = {Springer},
  year         = {2013},
  url          = {https://doi.org/10.1007/978-3-642-40988-2\_2},
  doi          = {10.1007/978-3-642-40988-2\_2},
  timestamp    = {Sun, 02 Jun 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/pkdd/PiotGP13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/nips/KleinGPP12,
  author       = {Edouard Klein and
                  Matthieu Geist and
                  Bilal Piot and
                  Olivier Pietquin},
  editor       = {Peter L. Bartlett and
                  Fernando C. N. Pereira and
                  Christopher J. C. Burges and
                  L{\'{e}}on Bottou and
                  Kilian Q. Weinberger},
  title        = {Inverse Reinforcement Learning through Structured Classification},
  booktitle    = {Advances in Neural Information Processing Systems 25: 26th Annual
                  Conference on Neural Information Processing Systems 2012. Proceedings
                  of a meeting held December 3-6, 2012, Lake Tahoe, Nevada, United States},
  pages        = {1016--1024},
  year         = {2012},
  url          = {https://proceedings.neurips.cc/paper/2012/hash/559cb990c9dffd8675f6bc2186971dc2-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/KleinGPP12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

manage site settings

To protect your privacy, all features that rely on external API calls from your browser are turned off by default. You need to opt-in for them to become active. All settings here will be stored as cookies with your web browser. For more information see our F.A.Q.