default search action
BibTeX records: Bilal Piot
@inproceedings{DBLP:conf/aistats/AzarGPMRVC24, author = {Mohammad Gheshlaghi Azar and Zhaohan Daniel Guo and Bilal Piot and R{\'{e}}mi Munos and Mark Rowland and Michal Valko and Daniele Calandriello}, editor = {Sanjoy Dasgupta and Stephan Mandt and Yingzhen Li}, title = {A General Theoretical Paradigm to Understand Learning from Human Preferences}, booktitle = {International Conference on Artificial Intelligence and Statistics, 2-4 May 2024, Palau de Congressos, Valencia, Spain}, series = {Proceedings of Machine Learning Research}, volume = {238}, pages = {4447--4455}, publisher = {{PMLR}}, year = {2024}, url = {https://proceedings.mlr.press/v238/gheshlaghi-azar24a.html}, timestamp = {Mon, 13 May 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aistats/AzarGPMRVC24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/SaadeKCBSSGVP24, author = {Alaa Saade and Steven Kapturowski and Daniele Calandriello and Charles Blundell and Pablo Sprechmann and Leopoldo Sarra and Oliver Groth and Michal Valko and Bilal Piot}, title = {Unlocking the Power of Representations in Long-term Novelty-based Exploration}, booktitle = {The Twelfth International Conference on Learning Representations, {ICLR} 2024, Vienna, Austria, May 7-11, 2024}, publisher = {OpenReview.net}, year = {2024}, url = {https://openreview.net/forum?id=OwtMhMSybu}, timestamp = {Mon, 29 Jul 2024 16:19:40 +0200}, biburl = {https://dblp.org/rec/conf/iclr/SaadeKCBSSGVP24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/CalandrielloGMR24, author = {Daniele Calandriello and Zhaohan Daniel Guo and R{\'{e}}mi Munos and Mark Rowland and Yunhao Tang and Bernardo {\'{A}}vila Pires and Pierre Harvey Richemond and Charline Le Lan and Michal Valko and Tianqi Liu and Rishabh Joshi and Zeyu Zheng and Bilal Piot}, title = {Human Alignment of Large Language Models through Online Preference Optimisation}, booktitle = {Forty-first International Conference on Machine Learning, {ICML} 2024, Vienna, Austria, July 21-27, 2024}, publisher = {OpenReview.net}, year = {2024}, url = {https://openreview.net/forum?id=2RQqg2Y7Y6}, timestamp = {Mon, 02 Sep 2024 16:45:29 +0200}, biburl = {https://dblp.org/rec/conf/icml/CalandrielloGMR24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/MunosVCARGTGMFM24, author = {R{\'{e}}mi Munos and Michal Valko and Daniele Calandriello and Mohammad Gheshlaghi Azar and Mark Rowland and Zhaohan Daniel Guo and Yunhao Tang and Matthieu Geist and Thomas Mesnard and C{\^{o}}me Fiegel and Andrea Michi and Marco Selvi and Sertan Girgin and Nikola Momchev and Olivier Bachem and Daniel J. Mankowitz and Doina Precup and Bilal Piot}, title = {Nash Learning from Human Feedback}, booktitle = {Forty-first International Conference on Machine Learning, {ICML} 2024, Vienna, Austria, July 21-27, 2024}, publisher = {OpenReview.net}, year = {2024}, url = {https://openreview.net/forum?id=Y5AmNYiyCQ}, timestamp = {Mon, 02 Sep 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/MunosVCARGTGMFM24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/TangGZCMRRVPP24, author = {Yunhao Tang and Zhaohan Daniel Guo and Zeyu Zheng and Daniele Calandriello and R{\'{e}}mi Munos and Mark Rowland and Pierre Harvey Richemond and Michal Valko and Bernardo {\'{A}}vila Pires and Bilal Piot}, title = {Generalized Preference Optimization: {A} Unified Approach to Offline Alignment}, booktitle = {Forty-first International Conference on Machine Learning, {ICML} 2024, Vienna, Austria, July 21-27, 2024}, publisher = {OpenReview.net}, year = {2024}, url = {https://openreview.net/forum?id=gu3nacA9AH}, timestamp = {Mon, 02 Sep 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/TangGZCMRRVPP24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2402-04792, author = {Shangmin Guo and Biao Zhang and Tianlin Liu and Tianqi Liu and Misha Khalman and Felipe Llinares and Alexandre Ram{\'{e}} and Thomas Mesnard and Yao Zhao and Bilal Piot and Johan Ferret and Mathieu Blondel}, title = {Direct Language Model Alignment from Online {AI} Feedback}, journal = {CoRR}, volume = {abs/2402.04792}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2402.04792}, doi = {10.48550/ARXIV.2402.04792}, eprinttype = {arXiv}, eprint = {2402.04792}, timestamp = {Tue, 04 Jun 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2402-04792.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2402-05749, author = {Yunhao Tang and Zhaohan Daniel Guo and Zeyu Zheng and Daniele Calandriello and R{\'{e}}mi Munos and Mark Rowland and Pierre Harvey Richemond and Michal Valko and Bernardo {\'{A}}vila Pires and Bilal Piot}, title = {Generalized Preference Optimization: {A} Unified Approach to Offline Alignment}, journal = {CoRR}, volume = {abs/2402.05749}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2402.05749}, doi = {10.48550/ARXIV.2402.05749}, eprinttype = {arXiv}, eprint = {2402.05749}, timestamp = {Wed, 14 Feb 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2402-05749.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2403-08635, author = {Daniele Calandriello and Daniel Guo and R{\'{e}}mi Munos and Mark Rowland and Yunhao Tang and Bernardo {\'{A}}vila Pires and Pierre Harvey Richemond and Charline Le Lan and Michal Valko and Tianqi Liu and Rishabh Joshi and Zeyu Zheng and Bilal Piot}, title = {Human Alignment of Large Language Models through Online Preference Optimisation}, journal = {CoRR}, volume = {abs/2403.08635}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2403.08635}, doi = {10.48550/ARXIV.2403.08635}, eprinttype = {arXiv}, eprint = {2403.08635}, timestamp = {Fri, 21 Jun 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2403-08635.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2405-14655, author = {Lior Shani and Aviv Rosenberg and Asaf B. Cassel and Oran Lang and Daniele Calandriello and Avital Zipori and Hila Noga and Orgad Keller and Bilal Piot and Idan Szpektor and Avinatan Hassidim and Yossi Matias and R{\'{e}}mi Munos}, title = {Multi-turn Reinforcement Learning from Preference Human Feedback}, journal = {CoRR}, volume = {abs/2405.14655}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2405.14655}, doi = {10.48550/ARXIV.2405.14655}, eprinttype = {arXiv}, eprint = {2405.14655}, timestamp = {Mon, 02 Sep 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2405-14655.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2405-19107, author = {Pierre Harvey Richemond and Yunhao Tang and Daniel Guo and Daniele Calandriello and Mohammad Gheshlaghi Azar and Rafael Rafailov and Bernardo {\'{A}}vila Pires and Eugene Tarassov and Lucas Spangher and Will Ellsworth and Aliaksei Severyn and Jonathan Mallinson and Lior Shani and Gil Shamir and Rishabh Joshi and Tianqi Liu and R{\'{e}}mi Munos and Bilal Piot}, title = {Offline Regularised Reinforcement Learning for Large Language Models Alignment}, journal = {CoRR}, volume = {abs/2405.19107}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2405.19107}, doi = {10.48550/ARXIV.2405.19107}, eprinttype = {arXiv}, eprint = {2405.19107}, timestamp = {Fri, 21 Jun 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2405-19107.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2408-00118, author = {Morgane Rivi{\`{e}}re and Shreya Pathak and Pier Giuseppe Sessa and Cassidy Hardin and Surya Bhupatiraju and L{\'{e}}onard Hussenot and Thomas Mesnard and Bobak Shahriari and Alexandre Ram{\'{e}} and Johan Ferret and Peter Liu and Pouya Tafti and Abe Friesen and Michelle Casbon and Sabela Ramos and Ravin Kumar and Charline Le Lan and Sammy Jerome and Anton Tsitsulin and Nino Vieillard and Piotr Stanczyk and Sertan Girgin and Nikola Momchev and Matt Hoffman and Shantanu Thakoor and Jean{-}Bastien Grill and Behnam Neyshabur and Olivier Bachem and Alanna Walton and Aliaksei Severyn and Alicia Parrish and Aliya Ahmad and Allen Hutchison and Alvin Abdagic and Amanda Carl and Amy Shen and Andy Brock and Andy Coenen and Anthony Laforge and Antonia Paterson and Ben Bastian and Bilal Piot and Bo Wu and Brandon Royal and Charlie Chen and Chintu Kumar and Chris Perry and Chris Welty and Christopher A. Choquette{-}Choo and Danila Sinopalnikov and David Weinberger and Dimple Vijaykumar and Dominika Rogozinska and Dustin Herbison and Elisa Bandy and Emma Wang and Eric Noland and Erica Moreira and Evan Senter and Evgenii Eltyshev and Francesco Visin and Gabriel Rasskin and Gary Wei and Glenn Cameron and Gus Martins and Hadi Hashemi and Hanna Klimczak{-}Plucinska and Harleen Batra and Harsh Dhand and Ivan Nardini and Jacinda Mein and Jack Zhou and James Svensson and Jeff Stanway and Jetha Chan and Jin Peng Zhou and Joana Carrasqueira and Joana Iljazi and Jocelyn Becker and Joe Fernandez and Joost van Amersfoort and Josh Gordon and Josh Lipschultz and Josh Newlan and Ju{-}yeong Ji and Kareem Mohamed and Kartikeya Badola and Kat Black and Katie Millican and Keelin McDonell and Kelvin Nguyen and Kiranbir Sodhia and Kish Greene and Lars Lowe Sj{\"{o}}sund and Lauren Usui and Laurent Sifre and Lena Heuermann and Leticia Lago and Lilly McNealus}, title = {Gemma 2: Improving Open Language Models at a Practical Size}, journal = {CoRR}, volume = {abs/2408.00118}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2408.00118}, doi = {10.48550/ARXIV.2408.00118}, eprinttype = {arXiv}, eprint = {2408.00118}, timestamp = {Fri, 06 Sep 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2408-00118.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/RichemondTTSPH23, author = {Pierre Harvey Richemond and Allison C. Tam and Yunhao Tang and Florian Strub and Bilal Piot and Felix Hill}, editor = {Andreas Krause and Emma Brunskill and Kyunghyun Cho and Barbara Engelhardt and Sivan Sabato and Jonathan Scarlett}, title = {The Edge of Orthogonality: {A} Simple View of What Makes {BYOL} Tick}, booktitle = {International Conference on Machine Learning, {ICML} 2023, 23-29 July 2023, Honolulu, Hawaii, {USA}}, series = {Proceedings of Machine Learning Research}, volume = {202}, pages = {29063--29081}, publisher = {{PMLR}}, year = {2023}, url = {https://proceedings.mlr.press/v202/richemond23a.html}, timestamp = {Mon, 28 Aug 2023 17:23:08 +0200}, biburl = {https://dblp.org/rec/conf/icml/RichemondTTSPH23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/TangGRPCMRALL0T23, author = {Yunhao Tang and Zhaohan Daniel Guo and Pierre Harvey Richemond and Bernardo {\'{A}}vila Pires and Yash Chandak and R{\'{e}}mi Munos and Mark Rowland and Mohammad Gheshlaghi Azar and Charline Le Lan and Clare Lyle and Andr{\'{a}}s Gy{\"{o}}rgy and Shantanu Thakoor and Will Dabney and Bilal Piot and Daniele Calandriello and Michal Valko}, editor = {Andreas Krause and Emma Brunskill and Kyunghyun Cho and Barbara Engelhardt and Sivan Sabato and Jonathan Scarlett}, title = {Understanding Self-Predictive Learning for Reinforcement Learning}, booktitle = {International Conference on Machine Learning, {ICML} 2023, 23-29 July 2023, Honolulu, Hawaii, {USA}}, series = {Proceedings of Machine Learning Research}, volume = {202}, pages = {33632--33656}, publisher = {{PMLR}}, year = {2023}, url = {https://proceedings.mlr.press/v202/tang23d.html}, timestamp = {Mon, 28 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/TangGRPCMRALL0T23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2302-04817, author = {Pierre H. Richemond and Allison C. Tam and Yunhao Tang and Florian Strub and Bilal Piot and Felix Hill}, title = {The Edge of Orthogonality: {A} Simple View of What Makes {BYOL} Tick}, journal = {CoRR}, volume = {abs/2302.04817}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2302.04817}, doi = {10.48550/ARXIV.2302.04817}, eprinttype = {arXiv}, eprint = {2302.04817}, timestamp = {Mon, 13 Feb 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2302-04817.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2305-01521, author = {Alaa Saade and Steven Kapturowski and Daniele Calandriello and Charles Blundell and Pablo Sprechmann and Leopoldo Sarra and Oliver Groth and Michal Valko and Bilal Piot}, title = {Unlocking the Power of Representations in Long-term Novelty-based Exploration}, journal = {CoRR}, volume = {abs/2305.01521}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2305.01521}, doi = {10.48550/ARXIV.2305.01521}, eprinttype = {arXiv}, eprint = {2305.01521}, timestamp = {Fri, 05 May 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2305-01521.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2310-12036, author = {Mohammad Gheshlaghi Azar and Mark Rowland and Bilal Piot and Daniel Guo and Daniele Calandriello and Michal Valko and R{\'{e}}mi Munos}, title = {A General Theoretical Paradigm to Understand Learning from Human Preferences}, journal = {CoRR}, volume = {abs/2310.12036}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2310.12036}, doi = {10.48550/ARXIV.2310.12036}, eprinttype = {arXiv}, eprint = {2310.12036}, timestamp = {Fri, 27 Oct 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2310-12036.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2312-00886, author = {R{\'{e}}mi Munos and Michal Valko and Daniele Calandriello and Mohammad Gheshlaghi Azar and Mark Rowland and Zhaohan Daniel Guo and Yunhao Tang and Matthieu Geist and Thomas Mesnard and Andrea Michi and Marco Selvi and Sertan Girgin and Nikola Momchev and Olivier Bachem and Daniel J. Mankowitz and Doina Precup and Bilal Piot}, title = {Nash Learning from Human Feedback}, journal = {CoRR}, volume = {abs/2312.00886}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2312.00886}, doi = {10.48550/ARXIV.2312.00886}, eprinttype = {arXiv}, eprint = {2312.00886}, timestamp = {Tue, 02 Jan 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2312-00886.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/ChaabouniSATTDM22, author = {Rahma Chaabouni and Florian Strub and Florent Altch{\'{e}} and Eugene Tarassov and Corentin Tallec and Elnaz Davoodi and Kory Wallace Mathewson and Olivier Tieleman and Angeliki Lazaridou and Bilal Piot}, title = {Emergent Communication at Scale}, booktitle = {The Tenth International Conference on Learning Representations, {ICLR} 2022, Virtual Event, April 25-29, 2022}, publisher = {OpenReview.net}, year = {2022}, url = {https://openreview.net/forum?id=AUGBfDIV9rL}, timestamp = {Thu, 13 Jul 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/ChaabouniSATTDM22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/GuoTPPATSCGTVMA22, author = {Zhaohan Guo and Shantanu Thakoor and Miruna Pislar and Bernardo {\'{A}}vila Pires and Florent Altch{\'{e}} and Corentin Tallec and Alaa Saade and Daniele Calandriello and Jean{-}Bastien Grill and Yunhao Tang and Michal Valko and R{\'{e}}mi Munos and Mohammad Gheshlaghi Azar and Bilal Piot}, editor = {Sanmi Koyejo and S. Mohamed and A. Agarwal and Danielle Belgrave and K. Cho and A. Oh}, title = {BYOL-Explore: Exploration by Bootstrapped Prediction}, booktitle = {Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans, LA, USA, November 28 - December 9, 2022}, year = {2022}, url = {http://papers.nips.cc/paper\_files/paper/2022/hash/ced0d3b92bb83b15c43ee32c7f57d867-Abstract-Conference.html}, timestamp = {Mon, 08 Jan 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/GuoTPPATSCGTVMA22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2206-08332, author = {Zhaohan Daniel Guo and Shantanu Thakoor and Miruna Pislar and Bernardo {\'{A}}vila Pires and Florent Altch{\'{e}} and Corentin Tallec and Alaa Saade and Daniele Calandriello and Jean{-}Bastien Grill and Yunhao Tang and Michal Valko and R{\'{e}}mi Munos and Mohammad Gheshlaghi Azar and Bilal Piot}, title = {BYOL-Explore: Exploration by Bootstrapped Prediction}, journal = {CoRR}, volume = {abs/2206.08332}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2206.08332}, doi = {10.48550/ARXIV.2206.08332}, eprinttype = {arXiv}, eprint = {2206.08332}, timestamp = {Tue, 21 Jun 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2206-08332.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2206-15378, author = {Julien P{\'{e}}rolat and Bart De Vylder and Daniel Hennes and Eugene Tarassov and Florian Strub and Vincent de Boer and Paul Muller and Jerome T. Connor and Neil Burch and Thomas W. Anthony and Stephen McAleer and Romuald Elie and Sarah H. Cen and Zhe Wang and Audrunas Gruslys and Aleksandra Malysheva and Mina Khan and Sherjil Ozair and Finbarr Timbers and Toby Pohlen and Tom Eccles and Mark Rowland and Marc Lanctot and Jean{-}Baptiste Lespiau and Bilal Piot and Shayegan Omidshafiei and Edward Lockhart and Laurent Sifre and Nathalie Beauguerlange and R{\'{e}}mi Munos and David Silver and Satinder Singh and Demis Hassabis and Karl Tuyls}, title = {Mastering the Game of Stratego with Model-Free Multiagent Reinforcement Learning}, journal = {CoRR}, volume = {abs/2206.15378}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2206.15378}, doi = {10.48550/ARXIV.2206.15378}, eprinttype = {arXiv}, eprint = {2206.15378}, timestamp = {Wed, 28 Sep 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2206-15378.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2212-03319, author = {Yunhao Tang and Zhaohan Daniel Guo and Pierre Harvey Richemond and Bernardo {\'{A}}vila Pires and Yash Chandak and R{\'{e}}mi Munos and Mark Rowland and Mohammad Gheshlaghi Azar and Charline Le Lan and Clare Lyle and Andr{\'{a}}s Gy{\"{o}}rgy and Shantanu Thakoor and Will Dabney and Bilal Piot and Daniele Calandriello and Michal Valko}, title = {Understanding Self-Predictive Learning for Reinforcement Learning}, journal = {CoRR}, volume = {abs/2212.03319}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2212.03319}, doi = {10.48550/ARXIV.2212.03319}, eprinttype = {arXiv}, eprint = {2212.03319}, timestamp = {Mon, 02 Jan 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2212-03319.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2101-02055, author = {Zhaohan Daniel Guo and Mohammad Gheshlaghi Azar and Alaa Saade and Shantanu Thakoor and Bilal Piot and Bernardo {\'{A}}vila Pires and Michal Valko and Thomas Mesnard and Tor Lattimore and R{\'{e}}mi Munos}, title = {Geometric Entropic Exploration}, journal = {CoRR}, volume = {abs/2101.02055}, year = {2021}, url = {https://arxiv.org/abs/2101.02055}, eprinttype = {arXiv}, eprint = {2101.02055}, timestamp = {Thu, 21 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2101-02055.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2110-10819, author = {Pedro A. Ortega and Markus Kunesch and Gr{\'{e}}goire Del{\'{e}}tang and Tim Genewein and Jordi Grau{-}Moya and Joel Veness and Jonas Buchli and Jonas Degrave and Bilal Piot and Julien P{\'{e}}rolat and Tom Everitt and Corentin Tallec and Emilio Parisotto and Tom Erez and Yutian Chen and Scott E. Reed and Marcus Hutter and Nando de Freitas and Shane Legg}, title = {Shaking the foundations: delusions in sequence models for interaction and control}, journal = {CoRR}, volume = {abs/2110.10819}, year = {2021}, url = {https://arxiv.org/abs/2110.10819}, eprinttype = {arXiv}, eprint = {2110.10819}, timestamp = {Fri, 26 Jul 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2110-10819.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/BadiaSVGPKTAPBB20, author = {Adri{\`{a}} Puigdom{\`{e}}nech Badia and Pablo Sprechmann and Alex Vitvitskyi and Zhaohan Daniel Guo and Bilal Piot and Steven Kapturowski and Olivier Tieleman and Mart{\'{\i}}n Arjovsky and Alexander Pritzel and Andrew Bolt and Charles Blundell}, title = {Never Give Up: Learning Directed Exploration Strategies}, booktitle = {8th International Conference on Learning Representations, {ICLR} 2020, Addis Ababa, Ethiopia, April 26-30, 2020}, publisher = {OpenReview.net}, year = {2020}, url = {https://openreview.net/forum?id=Sye57xStvB}, timestamp = {Wed, 03 Feb 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/iclr/BadiaSVGPKTAPBB20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/BadiaPKSVGB20, author = {Adri{\`{a}} Puigdom{\`{e}}nech Badia and Bilal Piot and Steven Kapturowski and Pablo Sprechmann and Alex Vitvitskyi and Zhaohan Daniel Guo and Charles Blundell}, title = {Agent57: Outperforming the Atari Human Benchmark}, booktitle = {Proceedings of the 37th International Conference on Machine Learning, {ICML} 2020, 13-18 July 2020, Virtual Event}, series = {Proceedings of Machine Learning Research}, volume = {119}, pages = {507--517}, publisher = {{PMLR}}, year = {2020}, url = {http://proceedings.mlr.press/v119/badia20a.html}, timestamp = {Tue, 15 Dec 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icml/BadiaPKSVGB20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/GuoPPGAMA20, author = {Zhaohan Daniel Guo and Bernardo {\'{A}}vila Pires and Bilal Piot and Jean{-}Bastien Grill and Florent Altch{\'{e}} and R{\'{e}}mi Munos and Mohammad Gheshlaghi Azar}, title = {Bootstrap Latent-Predictive Representations for Multitask Reinforcement Learning}, booktitle = {Proceedings of the 37th International Conference on Machine Learning, {ICML} 2020, 13-18 July 2020, Virtual Event}, series = {Proceedings of Machine Learning Research}, volume = {119}, pages = {3875--3886}, publisher = {{PMLR}}, year = {2020}, url = {http://proceedings.mlr.press/v119/guo20g.html}, timestamp = {Tue, 15 Dec 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icml/GuoPPGAMA20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/GrillSATRBDPGAP20, author = {Jean{-}Bastien Grill and Florian Strub and Florent Altch{\'{e}} and Corentin Tallec and Pierre H. Richemond and Elena Buchatskaya and Carl Doersch and Bernardo {\'{A}}vila Pires and Zhaohan Guo and Mohammad Gheshlaghi Azar and Bilal Piot and Koray Kavukcuoglu and R{\'{e}}mi Munos and Michal Valko}, editor = {Hugo Larochelle and Marc'Aurelio Ranzato and Raia Hadsell and Maria{-}Florina Balcan and Hsuan{-}Tien Lin}, title = {Bootstrap Your Own Latent - {A} New Approach to Self-Supervised Learning}, booktitle = {Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6-12, 2020, virtual}, year = {2020}, url = {https://proceedings.neurips.cc/paper/2020/hash/f3ada80d5c4ee70142b17b8192b2958e-Abstract.html}, timestamp = {Tue, 19 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/GrillSATRBDPGAP20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2002-06038, author = {Adri{\`{a}} Puigdom{\`{e}}nech Badia and Pablo Sprechmann and Alex Vitvitskyi and Zhaohan Daniel Guo and Bilal Piot and Steven Kapturowski and Olivier Tieleman and Mart{\'{\i}}n Arjovsky and Alexander Pritzel and Andrew Bolt and Charles Blundell}, title = {Never Give Up: Learning Directed Exploration Strategies}, journal = {CoRR}, volume = {abs/2002.06038}, year = {2020}, url = {https://arxiv.org/abs/2002.06038}, eprinttype = {arXiv}, eprint = {2002.06038}, timestamp = {Wed, 03 Feb 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2002-06038.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2003-13350, author = {Adri{\`{a}} Puigdom{\`{e}}nech Badia and Bilal Piot and Steven Kapturowski and Pablo Sprechmann and Alex Vitvitskyi and Zhaohan Daniel Guo and Charles Blundell}, title = {Agent57: Outperforming the Atari Human Benchmark}, journal = {CoRR}, volume = {abs/2003.13350}, year = {2020}, url = {https://arxiv.org/abs/2003.13350}, eprinttype = {arXiv}, eprint = {2003.13350}, timestamp = {Wed, 03 Feb 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2003-13350.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2004-14646, author = {Zhaohan Daniel Guo and Bernardo {\'{A}}vila Pires and Bilal Piot and Jean{-}Bastien Grill and Florent Altch{\'{e}} and R{\'{e}}mi Munos and Mohammad Gheshlaghi Azar}, title = {Bootstrap Latent-Predictive Representations for Multitask Reinforcement Learning}, journal = {CoRR}, volume = {abs/2004.14646}, year = {2020}, url = {https://arxiv.org/abs/2004.14646}, eprinttype = {arXiv}, eprint = {2004.14646}, timestamp = {Wed, 03 Feb 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2004-14646.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2006-00979, author = {Matt Hoffman and Bobak Shahriari and John Aslanides and Gabriel Barth{-}Maron and Feryal M. P. Behbahani and Tamara Norman and Abbas Abdolmaleki and Albin Cassirer and Fan Yang and Kate Baumli and Sarah Henderson and Alexander Novikov and Sergio G{\'{o}}mez Colmenarejo and Serkan Cabi and {\c{C}}aglar G{\"{u}}l{\c{c}}ehre and Tom Le Paine and Andrew Cowie and Ziyu Wang and Bilal Piot and Nando de Freitas}, title = {Acme: {A} Research Framework for Distributed Reinforcement Learning}, journal = {CoRR}, volume = {abs/2006.00979}, year = {2020}, url = {https://arxiv.org/abs/2006.00979}, eprinttype = {arXiv}, eprint = {2006.00979}, timestamp = {Mon, 02 Jan 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2006-00979.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2006-07733, author = {Jean{-}Bastien Grill and Florian Strub and Florent Altch{\'{e}} and Corentin Tallec and Pierre H. Richemond and Elena Buchatskaya and Carl Doersch and Bernardo {\'{A}}vila Pires and Zhaohan Daniel Guo and Mohammad Gheshlaghi Azar and Bilal Piot and Koray Kavukcuoglu and R{\'{e}}mi Munos and Michal Valko}, title = {Bootstrap Your Own Latent: {A} New Approach to Self-Supervised Learning}, journal = {CoRR}, volume = {abs/2006.07733}, year = {2020}, url = {https://arxiv.org/abs/2006.07733}, eprinttype = {arXiv}, eprint = {2006.07733}, timestamp = {Wed, 17 Jun 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2006-07733.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2010-10241, author = {Pierre H. Richemond and Jean{-}Bastien Grill and Florent Altch{\'{e}} and Corentin Tallec and Florian Strub and Andrew Brock and Samuel L. Smith and Soham De and Razvan Pascanu and Bilal Piot and Michal Valko}, title = {{BYOL} works even without batch statistics}, journal = {CoRR}, volume = {abs/2010.10241}, year = {2020}, url = {https://arxiv.org/abs/2010.10241}, eprinttype = {arXiv}, eprint = {2010.10241}, timestamp = {Tue, 27 Oct 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2010-10241.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/atal/BorsaHPLHMP19, author = {Diana Borsa and Nicolas Heess and Bilal Piot and Siqi Liu and Leonard Hasenclever and R{\'{e}}mi Munos and Olivier Pietquin}, editor = {Edith Elkind and Manuela Veloso and Noa Agmon and Matthew E. Taylor}, title = {Observational Learning by Reinforcement Learning}, booktitle = {Proceedings of the 18th International Conference on Autonomous Agents and MultiAgent Systems, {AAMAS} '19, Montreal, QC, Canada, May 13-17, 2019}, pages = {1117--1124}, publisher = {International Foundation for Autonomous Agents and Multiagent Systems}, year = {2019}, url = {http://dl.acm.org/citation.cfm?id=3331811}, timestamp = {Thu, 11 May 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/atal/BorsaHPLHMP19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/HarutyunyanDMAP19, author = {Anna Harutyunyan and Will Dabney and Thomas Mesnard and Mohammad Gheshlaghi Azar and Bilal Piot and Nicolas Heess and Hado van Hasselt and Gregory Wayne and Satinder Singh and Doina Precup and R{\'{e}}mi Munos}, editor = {Hanna M. Wallach and Hugo Larochelle and Alina Beygelzimer and Florence d'Alch{\'{e}}{-}Buc and Emily B. Fox and Roman Garnett}, title = {Hindsight Credit Assignment}, booktitle = {Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS 2019, December 8-14, 2019, Vancouver, BC, Canada}, pages = {12467--12476}, year = {2019}, url = {https://proceedings.neurips.cc/paper/2019/hash/195f15384c2a79cedf293e4a847ce85c-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/HarutyunyanDMAP19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1902-07685, author = {Mohammad Gheshlaghi Azar and Bilal Piot and Bernardo A. Pires and Jean{-}Bastien Grill and Florent Altch{\'{e}} and R{\'{e}}mi Munos}, title = {World Discovery Models}, journal = {CoRR}, volume = {abs/1902.07685}, year = {2019}, url = {http://arxiv.org/abs/1902.07685}, eprinttype = {arXiv}, eprint = {1902.07685}, timestamp = {Tue, 21 May 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1902-07685.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1912-02503, author = {Anna Harutyunyan and Will Dabney and Thomas Mesnard and Mohammad Gheshlaghi Azar and Bilal Piot and Nicolas Heess and Hado van Hasselt and Greg Wayne and Satinder Singh and Doina Precup and R{\'{e}}mi Munos}, title = {Hindsight Credit Assignment}, journal = {CoRR}, volume = {abs/1912.02503}, year = {2019}, url = {http://arxiv.org/abs/1912.02503}, eprinttype = {arXiv}, eprint = {1912.02503}, timestamp = {Wed, 20 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1912-02503.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/HesselMHSODHPAS18, author = {Matteo Hessel and Joseph Modayil and Hado van Hasselt and Tom Schaul and Georg Ostrovski and Will Dabney and Dan Horgan and Bilal Piot and Mohammad Gheshlaghi Azar and David Silver}, editor = {Sheila A. McIlraith and Kilian Q. Weinberger}, title = {Rainbow: Combining Improvements in Deep Reinforcement Learning}, booktitle = {Proceedings of the Thirty-Second {AAAI} Conference on Artificial Intelligence, (AAAI-18), the 30th innovative Applications of Artificial Intelligence (IAAI-18), and the 8th {AAAI} Symposium on Educational Advances in Artificial Intelligence (EAAI-18), New Orleans, Louisiana, USA, February 2-7, 2018}, pages = {3215--3222}, publisher = {{AAAI} Press}, year = {2018}, url = {https://doi.org/10.1609/aaai.v32i1.11796}, doi = {10.1609/AAAI.V32I1.11796}, timestamp = {Mon, 04 Sep 2023 12:29:24 +0200}, biburl = {https://dblp.org/rec/conf/aaai/HesselMHSODHPAS18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/HesterVPLSPHQSO18, author = {Todd Hester and Matej Vecer{\'{\i}}k and Olivier Pietquin and Marc Lanctot and Tom Schaul and Bilal Piot and Dan Horgan and John Quan and Andrew Sendonaris and Ian Osband and Gabriel Dulac{-}Arnold and John P. Agapiou and Joel Z. Leibo and Audrunas Gruslys}, editor = {Sheila A. McIlraith and Kilian Q. Weinberger}, title = {Deep Q-learning From Demonstrations}, booktitle = {Proceedings of the Thirty-Second {AAAI} Conference on Artificial Intelligence, (AAAI-18), the 30th innovative Applications of Artificial Intelligence (IAAI-18), and the 8th {AAAI} Symposium on Educational Advances in Artificial Intelligence (EAAI-18), New Orleans, Louisiana, USA, February 2-7, 2018}, pages = {3223--3230}, publisher = {{AAAI} Press}, year = {2018}, url = {https://doi.org/10.1609/aaai.v32i1.11757}, doi = {10.1609/AAAI.V32I1.11757}, timestamp = {Mon, 04 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/HesterVPLSPHQSO18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aistats/PerolatPP18, author = {Julien P{\'{e}}rolat and Bilal Piot and Olivier Pietquin}, editor = {Amos J. Storkey and Fernando P{\'{e}}rez{-}Cruz}, title = {Actor-Critic Fictitious Play in Simultaneous Move Multistage Games}, booktitle = {International Conference on Artificial Intelligence and Statistics, {AISTATS} 2018, 9-11 April 2018, Playa Blanca, Lanzarote, Canary Islands, Spain}, series = {Proceedings of Machine Learning Research}, volume = {84}, pages = {919--928}, publisher = {{PMLR}}, year = {2018}, url = {http://proceedings.mlr.press/v84/perolat18a.html}, timestamp = {Wed, 03 Apr 2019 18:17:22 +0200}, biburl = {https://dblp.org/rec/conf/aistats/PerolatPP18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/FortunatoAPMHOG18, author = {Meire Fortunato and Mohammad Gheshlaghi Azar and Bilal Piot and Jacob Menick and Matteo Hessel and Ian Osband and Alex Graves and Volodymyr Mnih and R{\'{e}}mi Munos and Demis Hassabis and Olivier Pietquin and Charles Blundell and Shane Legg}, title = {Noisy Networks For Exploration}, booktitle = {6th International Conference on Learning Representations, {ICLR} 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings}, publisher = {OpenReview.net}, year = {2018}, url = {https://openreview.net/forum?id=rywHCPkAW}, timestamp = {Thu, 25 Jul 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/FortunatoAPMHOG18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/GruslysDAPBM18, author = {Audrunas Gruslys and Will Dabney and Mohammad Gheshlaghi Azar and Bilal Piot and Marc G. Bellemare and R{\'{e}}mi Munos}, title = {The Reactor: {A} fast and sample-efficient Actor-Critic agent for Reinforcement Learning}, booktitle = {6th International Conference on Learning Representations, {ICLR} 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings}, publisher = {OpenReview.net}, year = {2018}, url = {https://openreview.net/forum?id=rkHVZWZAZ}, timestamp = {Thu, 25 Jul 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/GruslysDAPBM18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1805-11593, author = {Tobias Pohlen and Bilal Piot and Todd Hester and Mohammad Gheshlaghi Azar and Dan Horgan and David Budden and Gabriel Barth{-}Maron and Hado van Hasselt and John Quan and Mel Vecer{\'{\i}}k and Matteo Hessel and R{\'{e}}mi Munos and Olivier Pietquin}, title = {Observe and Look Further: Achieving Consistent Performance on Atari}, journal = {CoRR}, volume = {abs/1805.11593}, year = {2018}, url = {http://arxiv.org/abs/1805.11593}, eprinttype = {arXiv}, eprint = {1805.11593}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1805-11593.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1809-07802, author = {Julien P{\'{e}}rolat and Mateusz Malinowski and Bilal Piot and Olivier Pietquin}, title = {Playing the Game of Universal Adversarial Perturbations}, journal = {CoRR}, volume = {abs/1809.07802}, year = {2018}, url = {http://arxiv.org/abs/1809.07802}, eprinttype = {arXiv}, eprint = {1809.07802}, timestamp = {Fri, 05 Oct 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1809-07802.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1811-06407, author = {Zhaohan Daniel Guo and Mohammad Gheshlaghi Azar and Bilal Piot and Bernardo A. Pires and Toby Pohlen and R{\'{e}}mi Munos}, title = {Neural Predictive Belief Representations}, journal = {CoRR}, volume = {abs/1811.06407}, year = {2018}, url = {http://arxiv.org/abs/1811.06407}, eprinttype = {arXiv}, eprint = {1811.06407}, timestamp = {Sun, 25 Nov 2018 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1811-06407.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/tnn/PiotGP17, author = {Bilal Piot and Matthieu Geist and Olivier Pietquin}, title = {Bridging the Gap Between Imitation Learning and Inverse Reinforcement Learning}, journal = {{IEEE} Trans. Neural Networks Learn. Syst.}, volume = {28}, number = {8}, pages = {1814--1826}, year = {2017}, url = {https://doi.org/10.1109/TNNLS.2016.2543000}, doi = {10.1109/TNNLS.2016.2543000}, timestamp = {Mon, 09 Mar 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/tnn/PiotGP17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aistats/PerolatSPP17, author = {Julien P{\'{e}}rolat and Florian Strub and Bilal Piot and Olivier Pietquin}, editor = {Aarti Singh and Xiaojin (Jerry) Zhu}, title = {Learning Nash Equilibrium for General-Sum Markov Games from Batch Data}, booktitle = {Proceedings of the 20th International Conference on Artificial Intelligence and Statistics, {AISTATS} 2017, 20-22 April 2017, Fort Lauderdale, FL, {USA}}, series = {Proceedings of Machine Learning Research}, volume = {54}, pages = {232--241}, publisher = {{PMLR}}, year = {2017}, url = {http://proceedings.mlr.press/v54/perolat17a.html}, timestamp = {Wed, 29 May 2019 08:41:44 +0200}, biburl = {https://dblp.org/rec/conf/aistats/PerolatSPP17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/StrubVMPCP17, author = {Florian Strub and Harm de Vries and J{\'{e}}r{\'{e}}mie Mary and Bilal Piot and Aaron C. Courville and Olivier Pietquin}, editor = {Carles Sierra}, title = {End-to-end optimization of goal-driven and visually grounded dialogue systems}, booktitle = {Proceedings of the Twenty-Sixth International Joint Conference on Artificial Intelligence, {IJCAI} 2017, Melbourne, Australia, August 19-25, 2017}, pages = {2765--2771}, publisher = {ijcai.org}, year = {2017}, url = {https://doi.org/10.24963/ijcai.2017/385}, doi = {10.24963/IJCAI.2017/385}, timestamp = {Tue, 20 Aug 2019 16:16:54 +0200}, biburl = {https://dblp.org/rec/conf/ijcai/StrubVMPCP17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/GeistPP17, author = {Matthieu Geist and Bilal Piot and Olivier Pietquin}, editor = {Isabelle Guyon and Ulrike von Luxburg and Samy Bengio and Hanna M. Wallach and Rob Fergus and S. V. N. Vishwanathan and Roman Garnett}, title = {Is the Bellman residual a bad proxy?}, booktitle = {Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017, December 4-9, 2017, Long Beach, CA, {USA}}, pages = {3205--3214}, year = {2017}, url = {https://proceedings.neurips.cc/paper/2017/hash/e0ab531ec312161511493b002f9be2ee-Abstract.html}, timestamp = {Thu, 21 Jan 2021 13:58:27 +0100}, biburl = {https://dblp.org/rec/conf/nips/GeistPP17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/StrubVMPCP17, author = {Florian Strub and Harm de Vries and J{\'{e}}r{\'{e}}mie Mary and Bilal Piot and Aaron C. Courville and Olivier Pietquin}, title = {End-to-end optimization of goal-driven and visually grounded dialogue systems}, journal = {CoRR}, volume = {abs/1703.05423}, year = {2017}, url = {http://arxiv.org/abs/1703.05423}, eprinttype = {arXiv}, eprint = {1703.05423}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/StrubVMPCP17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/HesterVPLSPSDOA17, author = {Todd Hester and Matej Vecer{\'{\i}}k and Olivier Pietquin and Marc Lanctot and Tom Schaul and Bilal Piot and Andrew Sendonaris and Gabriel Dulac{-}Arnold and Ian Osband and John P. Agapiou and Joel Z. Leibo and Audrunas Gruslys}, title = {Learning from Demonstrations for Real World Reinforcement Learning}, journal = {CoRR}, volume = {abs/1704.03732}, year = {2017}, url = {http://arxiv.org/abs/1704.03732}, eprinttype = {arXiv}, eprint = {1704.03732}, timestamp = {Tue, 28 Jul 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/HesterVPLSPSDOA17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/BorsaPMP17, author = {Diana Borsa and Bilal Piot and R{\'{e}}mi Munos and Olivier Pietquin}, title = {Observational Learning by Reinforcement Learning}, journal = {CoRR}, volume = {abs/1706.06617}, year = {2017}, url = {http://arxiv.org/abs/1706.06617}, eprinttype = {arXiv}, eprint = {1706.06617}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/BorsaPMP17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/FortunatoAPMOGM17, author = {Meire Fortunato and Mohammad Gheshlaghi Azar and Bilal Piot and Jacob Menick and Ian Osband and Alex Graves and Vlad Mnih and R{\'{e}}mi Munos and Demis Hassabis and Olivier Pietquin and Charles Blundell and Shane Legg}, title = {Noisy Networks for Exploration}, journal = {CoRR}, volume = {abs/1706.10295}, year = {2017}, url = {http://arxiv.org/abs/1706.10295}, eprinttype = {arXiv}, eprint = {1706.10295}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/FortunatoAPMOGM17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/VecerikHSWPPHRL17, author = {Matej Vecer{\'{\i}}k and Todd Hester and Jonathan Scholz and Fumin Wang and Olivier Pietquin and Bilal Piot and Nicolas Heess and Thomas Roth{\"{o}}rl and Thomas Lampe and Martin A. Riedmiller}, title = {Leveraging Demonstrations for Deep Reinforcement Learning on Robotics Problems with Sparse Rewards}, journal = {CoRR}, volume = {abs/1707.08817}, year = {2017}, url = {http://arxiv.org/abs/1707.08817}, eprinttype = {arXiv}, eprint = {1707.08817}, timestamp = {Wed, 24 Jul 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/VecerikHSWPPHRL17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1710-02298, author = {Matteo Hessel and Joseph Modayil and Hado van Hasselt and Tom Schaul and Georg Ostrovski and Will Dabney and Daniel Horgan and Bilal Piot and Mohammad Gheshlaghi Azar and David Silver}, title = {Rainbow: Combining Improvements in Deep Reinforcement Learning}, journal = {CoRR}, volume = {abs/1710.02298}, year = {2017}, url = {http://arxiv.org/abs/1710.02298}, eprinttype = {arXiv}, eprint = {1710.02298}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1710-02298.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aistats/PerolatPSP16, author = {Julien P{\'{e}}rolat and Bilal Piot and Bruno Scherrer and Olivier Pietquin}, editor = {Arthur Gretton and Christian C. Robert}, title = {On the Use of Non-Stationary Strategies for Solving Two-Player Zero-Sum Markov Games}, booktitle = {Proceedings of the 19th International Conference on Artificial Intelligence and Statistics, {AISTATS} 2016, Cadiz, Spain, May 9-11, 2016}, series = {{JMLR} Workshop and Conference Proceedings}, volume = {51}, pages = {893--901}, publisher = {JMLR.org}, year = {2016}, url = {http://proceedings.mlr.press/v51/perolat16.html}, timestamp = {Wed, 29 May 2019 08:41:44 +0200}, biburl = {https://dblp.org/rec/conf/aistats/PerolatPSP16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/atal/AsriPGLP16, author = {Layla El Asri and Bilal Piot and Matthieu Geist and Romain Laroche and Olivier Pietquin}, editor = {Catholijn M. Jonker and Stacy Marsella and John Thangarajah and Karl Tuyls}, title = {Score-based Inverse Reinforcement Learning}, booktitle = {Proceedings of the 2016 International Conference on Autonomous Agents {\&} Multiagent Systems, Singapore, May 9-13, 2016}, pages = {457--465}, publisher = {{ACM}}, year = {2016}, url = {http://dl.acm.org/citation.cfm?id=2936991}, timestamp = {Fri, 20 May 2016 20:33:29 +0200}, biburl = {https://dblp.org/rec/conf/atal/AsriPGLP16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/PerolatPGSP16, author = {Julien P{\'{e}}rolat and Bilal Piot and Matthieu Geist and Bruno Scherrer and Olivier Pietquin}, editor = {Maria{-}Florina Balcan and Kilian Q. Weinberger}, title = {Softened Approximate Policy Iteration for Markov Games}, booktitle = {Proceedings of the 33nd International Conference on Machine Learning, {ICML} 2016, New York City, NY, USA, June 19-24, 2016}, series = {{JMLR} Workshop and Conference Proceedings}, volume = {48}, pages = {1860--1868}, publisher = {JMLR.org}, year = {2016}, url = {http://proceedings.mlr.press/v48/perolat16.html}, timestamp = {Wed, 29 May 2019 08:41:46 +0200}, biburl = {https://dblp.org/rec/conf/icml/PerolatPGSP16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/PiotGP16, author = {Bilal Piot and Matthieu Geist and Olivier Pietquin}, title = {Difference of Convex Functions Programming Applied to Control with Expert Data}, journal = {CoRR}, volume = {abs/1606.01128}, year = {2016}, url = {http://arxiv.org/abs/1606.01128}, eprinttype = {arXiv}, eprint = {1606.01128}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/PiotGP16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/GeistPP16, author = {Matthieu Geist and Bilal Piot and Olivier Pietquin}, title = {Should one minimize the expected Bellman residual or maximize the mean value?}, journal = {CoRR}, volume = {abs/1606.07636}, year = {2016}, url = {http://arxiv.org/abs/1606.07636}, eprinttype = {arXiv}, eprint = {1606.07636}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/GeistPP16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/PerolatSPP16, author = {Julien P{\'{e}}rolat and Florian Strub and Bilal Piot and Olivier Pietquin}, title = {Learning Nash Equilibrium for General-Sum Markov Games from Batch Data}, journal = {CoRR}, volume = {abs/1606.08718}, year = {2016}, url = {http://arxiv.org/abs/1606.08718}, eprinttype = {arXiv}, eprint = {1606.08718}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/PerolatSPP16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/PiotPG15, author = {Bilal Piot and Olivier Pietquin and Matthieu Geist}, title = {Imitation Learning Applied to Embodied Conversational Agents}, booktitle = {Proceedings of the 4th Workshop on Machine Learning for Interactive Systems, {MLIS} 2015, co-located with the 32nd International Conference on Machine Learning {(ICML} 2015), Lille, France, July 11th, 2015}, series = {{JMLR} Workshop and Conference Proceedings}, volume = {43}, pages = {1--5}, publisher = {JMLR.org}, year = {2015}, url = {http://proceedings.mlr.press/v43/piot15.html}, timestamp = {Wed, 29 May 2019 08:41:45 +0200}, biburl = {https://dblp.org/rec/conf/icml/PiotPG15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/PerolatSPP15, author = {Julien P{\'{e}}rolat and Bruno Scherrer and Bilal Piot and Olivier Pietquin}, editor = {Francis R. Bach and David M. Blei}, title = {Approximate Dynamic Programming for Two-Player Zero-Sum Markov Games}, booktitle = {Proceedings of the 32nd International Conference on Machine Learning, {ICML} 2015, Lille, France, 6-11 July 2015}, series = {{JMLR} Workshop and Conference Proceedings}, volume = {37}, pages = {1321--1329}, publisher = {JMLR.org}, year = {2015}, url = {http://proceedings.mlr.press/v37/perolat15.html}, timestamp = {Wed, 29 May 2019 08:41:45 +0200}, biburl = {https://dblp.org/rec/conf/icml/PerolatSPP15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/MunzerPGPL15, author = {Thibaut Munzer and Bilal Piot and Matthieu Geist and Olivier Pietquin and Manuel Lopes}, editor = {Qiang Yang and Michael J. Wooldridge}, title = {Inverse Reinforcement Learning in Relational Domains}, booktitle = {Proceedings of the Twenty-Fourth International Joint Conference on Artificial Intelligence, {IJCAI} 2015, Buenos Aires, Argentina, July 25-31, 2015}, pages = {3735--3741}, publisher = {{AAAI} Press}, year = {2015}, url = {http://ijcai.org/Abstract/15/525}, timestamp = {Tue, 20 Aug 2019 16:16:43 +0200}, biburl = {https://dblp.org/rec/conf/ijcai/MunzerPGPL15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/atal/PiotGP14, author = {Bilal Piot and Matthieu Geist and Olivier Pietquin}, editor = {Ana L. C. Bazzan and Michael N. Huhns and Alessio Lomuscio and Paul Scerri}, title = {Boosted and reward-regularized classification for apprenticeship learning}, booktitle = {International conference on Autonomous Agents and Multi-Agent Systems, {AAMAS} '14, Paris, France, May 5-9, 2014}, pages = {1249--1256}, publisher = {{IFAAMAS/ACM}}, year = {2014}, url = {http://dl.acm.org/citation.cfm?id=2617447}, timestamp = {Thu, 25 Sep 2014 07:46:15 +0200}, biburl = {https://dblp.org/rec/conf/atal/PiotGP14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/PiotPG14, author = {Bilal Piot and Olivier Pietquin and Matthieu Geist}, editor = {Haizhou Li and Helen M. Meng and Bin Ma and Engsiong Chng and Lei Xie}, title = {Predicting when to laugh with structured classification}, booktitle = {15th Annual Conference of the International Speech Communication Association, {INTERSPEECH} 2014, Singapore, September 14-18, 2014}, pages = {1786--1790}, publisher = {{ISCA}}, year = {2014}, url = {https://doi.org/10.21437/Interspeech.2014-407}, doi = {10.21437/INTERSPEECH.2014-407}, timestamp = {Tue, 11 Jun 2024 16:45:43 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/PiotPG14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/PiotGP14, author = {Bilal Piot and Matthieu Geist and Olivier Pietquin}, editor = {Zoubin Ghahramani and Max Welling and Corinna Cortes and Neil D. Lawrence and Kilian Q. Weinberger}, title = {Difference of Convex Functions Programming for Reinforcement Learning}, booktitle = {Advances in Neural Information Processing Systems 27: Annual Conference on Neural Information Processing Systems 2014, December 8-13 2014, Montreal, Quebec, Canada}, pages = {2519--2527}, year = {2014}, url = {https://proceedings.neurips.cc/paper/2014/hash/50905d7b2216bfeccb5b41016357176b-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/PiotGP14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/pkdd/PiotGP14, author = {Bilal Piot and Matthieu Geist and Olivier Pietquin}, editor = {Toon Calders and Floriana Esposito and Eyke H{\"{u}}llermeier and Rosa Meo}, title = {Boosted Bellman Residual Minimization Handling Expert Demonstrations}, booktitle = {Machine Learning and Knowledge Discovery in Databases - European Conference, {ECML} {PKDD} 2014, Nancy, France, September 15-19, 2014. Proceedings, Part {II}}, series = {Lecture Notes in Computer Science}, volume = {8725}, pages = {549--564}, publisher = {Springer}, year = {2014}, url = {https://doi.org/10.1007/978-3-662-44851-9\_35}, doi = {10.1007/978-3-662-44851-9\_35}, timestamp = {Thu, 31 Oct 2019 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/pkdd/PiotGP14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ria/KleinPGP13, author = {Edouard Klein and Bilal Piot and Matthieu Geist and Olivier Pietquin}, title = {Classification structur{\'{e}}e pour l'apprentissage par renforcement inverse}, journal = {Rev. d'Intelligence Artif.}, volume = {27}, number = {2}, pages = {155--169}, year = {2013}, url = {https://doi.org/10.3166/ria.27.155-169}, doi = {10.3166/RIA.27.155-169}, timestamp = {Wed, 16 Mar 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/ria/KleinPGP13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/atal/NiewiadomskiHUPWPCPBDGLMPR13, author = {Radoslaw Niewiadomski and Jennifer Hofmann and J{\'{e}}r{\^{o}}me Urbain and Tracey Platt and Johannes Wagner and Bilal Piot and H{\"{u}}seyin {\c{C}}akmak and Sathish Pammi and Tobias Baur and St{\'{e}}phane Dupont and Matthieu Geist and Florian Lingenfelser and Gary McKeown and Olivier Pietquin and Willibald Ruch}, editor = {Maria L. Gini and Onn Shehory and Takayuki Ito and Catholijn M. Jonker}, title = {Laugh-aware virtual agent and its impact on user amusement}, booktitle = {International conference on Autonomous Agents and Multi-Agent Systems, {AAMAS} '13, Saint Paul, MN, USA, May 6-10, 2013}, pages = {619--626}, publisher = {{IFAAMAS}}, year = {2013}, url = {http://dl.acm.org/citation.cfm?id=2485018}, timestamp = {Thu, 09 Sep 2021 16:09:54 +0200}, biburl = {https://dblp.org/rec/conf/atal/NiewiadomskiHUPWPCPBDGLMPR13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ifip5-5/ManciniABBBDDDGLNPPPUVW13, author = {Maurizio Mancini and Laurent Ach and Emeline Bantegnie and Tobias Baur and Nadia Berthouze and Debajyoti Datta and Yu Ding and St{\'{e}}phane Dupont and Harry J. Griffin and Florian Lingenfelser and Radoslaw Niewiadomski and Catherine Pelachaud and Olivier Pietquin and Bilal Piot and J{\'{e}}r{\^{o}}me Urbain and Gualtiero Volpe and Johannes Wagner}, editor = {Yves Rybarczyk and Tiago Cardoso and Jo{\~{a}}o Rosas and Luis M. Camarinha{-}Matos}, title = {Laugh When You're Winning}, booktitle = {Innovative and Creative Developments in Multimodal Interaction Systems - 9th {IFIP} {WG} 5.5 International Summer Workshop on Multimodal Interfaces, eNTERFACE 2013, Lisbon, Portugal, July 15 - August 9, 2013. Proceedings}, series = {{IFIP} Advances in Information and Communication Technology}, volume = {425}, pages = {50--79}, publisher = {Springer}, year = {2013}, url = {https://doi.org/10.1007/978-3-642-55143-7\_3}, doi = {10.1007/978-3-642-55143-7\_3}, timestamp = {Sat, 30 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ifip5-5/ManciniABBBDDDGLNPPPUVW13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/pkdd/KleinPGP13, author = {Edouard Klein and Bilal Piot and Matthieu Geist and Olivier Pietquin}, editor = {Hendrik Blockeel and Kristian Kersting and Siegfried Nijssen and Filip Zelezn{\'{y}}}, title = {A Cascaded Supervised Learning Approach to Inverse Reinforcement Learning}, booktitle = {Machine Learning and Knowledge Discovery in Databases - European Conference, {ECML} {PKDD} 2013, Prague, Czech Republic, September 23-27, 2013, Proceedings, Part {I}}, series = {Lecture Notes in Computer Science}, volume = {8188}, pages = {1--16}, publisher = {Springer}, year = {2013}, url = {https://doi.org/10.1007/978-3-642-40988-2\_1}, doi = {10.1007/978-3-642-40988-2\_1}, timestamp = {Tue, 21 Mar 2023 21:00:11 +0100}, biburl = {https://dblp.org/rec/conf/pkdd/KleinPGP13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/pkdd/PiotGP13, author = {Bilal Piot and Matthieu Geist and Olivier Pietquin}, editor = {Hendrik Blockeel and Kristian Kersting and Siegfried Nijssen and Filip Zelezn{\'{y}}}, title = {Learning from Demonstrations: Is It Worth Estimating a Reward Function?}, booktitle = {Machine Learning and Knowledge Discovery in Databases - European Conference, {ECML} {PKDD} 2013, Prague, Czech Republic, September 23-27, 2013, Proceedings, Part {I}}, series = {Lecture Notes in Computer Science}, volume = {8188}, pages = {17--32}, publisher = {Springer}, year = {2013}, url = {https://doi.org/10.1007/978-3-642-40988-2\_2}, doi = {10.1007/978-3-642-40988-2\_2}, timestamp = {Sun, 02 Jun 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/pkdd/PiotGP13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/KleinGPP12, author = {Edouard Klein and Matthieu Geist and Bilal Piot and Olivier Pietquin}, editor = {Peter L. Bartlett and Fernando C. N. Pereira and Christopher J. C. Burges and L{\'{e}}on Bottou and Kilian Q. Weinberger}, title = {Inverse Reinforcement Learning through Structured Classification}, booktitle = {Advances in Neural Information Processing Systems 25: 26th Annual Conference on Neural Information Processing Systems 2012. Proceedings of a meeting held December 3-6, 2012, Lake Tahoe, Nevada, United States}, pages = {1016--1024}, year = {2012}, url = {https://proceedings.neurips.cc/paper/2012/hash/559cb990c9dffd8675f6bc2186971dc2-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/KleinGPP12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
manage site settings
To protect your privacy, all features that rely on external API calls from your browser are turned off by default. You need to opt-in for them to become active. All settings here will be stored as cookies with your web browser. For more information see our F.A.Q.