default search action
BibTeX records: William Fedus
@article{DBLP:journals/jmlr/ChungHLZTFL00BW24, author = {Hyung Won Chung and Le Hou and Shayne Longpre and Barret Zoph and Yi Tay and William Fedus and Yunxuan Li and Xuezhi Wang and Mostafa Dehghani and Siddhartha Brahma and Albert Webson and Shixiang Shane Gu and Zhuyun Dai and Mirac Suzgun and Xinyun Chen and Aakanksha Chowdhery and Alex Castro{-}Ros and Marie Pellat and Kevin Robinson and Dasha Valter and Sharan Narang and Gaurav Mishra and Adams Yu and Vincent Y. Zhao and Yanping Huang and Andrew M. Dai and Hongkun Yu and Slav Petrov and Ed H. Chi and Jeff Dean and Jacob Devlin and Adam Roberts and Denny Zhou and Quoc V. Le and Jason Wei}, title = {Scaling Instruction-Finetuned Language Models}, journal = {J. Mach. Learn. Res.}, volume = {25}, pages = {70:1--70:53}, year = {2024}, url = {https://jmlr.org/papers/v25/23-0870.html}, timestamp = {Mon, 16 Sep 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jmlr/ChungHLZTFL00BW24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/ShenHZ0LWCZFCVW24, author = {Sheng Shen and Le Hou and Yanqi Zhou and Nan Du and Shayne Longpre and Jason Wei and Hyung Won Chung and Barret Zoph and William Fedus and Xinyun Chen and Tu Vu and Yuexin Wu and Wuyang Chen and Albert Webson and Yunxuan Li and Vincent Y. Zhao and Hongkun Yu and Kurt Keutzer and Trevor Darrell and Denny Zhou}, title = {Mixture-of-Experts Meets Instruction Tuning: {A} Winning Combination for Large Language Models}, booktitle = {The Twelfth International Conference on Learning Representations, {ICLR} 2024, Vienna, Austria, May 7-11, 2024}, publisher = {OpenReview.net}, year = {2024}, url = {https://openreview.net/forum?id=6mLjDwYte5}, timestamp = {Wed, 07 Aug 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/ShenHZ0LWCZFCVW24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jmlr/ChowdheryNDBMRBCSGSSTMRBTSPRDHPBAI23, author = {Aakanksha Chowdhery and Sharan Narang and Jacob Devlin and Maarten Bosma and Gaurav Mishra and Adam Roberts and Paul Barham and Hyung Won Chung and Charles Sutton and Sebastian Gehrmann and Parker Schuh and Kensen Shi and Sasha Tsvyashchenko and Joshua Maynez and Abhishek Rao and Parker Barnes and Yi Tay and Noam Shazeer and Vinodkumar Prabhakaran and Emily Reif and Nan Du and Ben Hutchinson and Reiner Pope and James Bradbury and Jacob Austin and Michael Isard and Guy Gur{-}Ari and Pengcheng Yin and Toju Duke and Anselm Levskaya and Sanjay Ghemawat and Sunipa Dev and Henryk Michalewski and Xavier Garcia and Vedant Misra and Kevin Robinson and Liam Fedus and Denny Zhou and Daphne Ippolito and David Luan and Hyeontaek Lim and Barret Zoph and Alexander Spiridonov and Ryan Sepassi and David Dohan and Shivani Agrawal and Mark Omernick and Andrew M. Dai and Thanumalayan Sankaranarayana Pillai and Marie Pellat and Aitor Lewkowycz and Erica Moreira and Rewon Child and Oleksandr Polozov and Katherine Lee and Zongwei Zhou and Xuezhi Wang and Brennan Saeta and Mark Diaz and Orhan Firat and Michele Catasta and Jason Wei and Kathy Meier{-}Hellstern and Douglas Eck and Jeff Dean and Slav Petrov and Noah Fiedel}, title = {PaLM: Scaling Language Modeling with Pathways}, journal = {J. Mach. Learn. Res.}, volume = {24}, pages = {240:1--240:113}, year = {2023}, url = {https://jmlr.org/papers/v24/22-1144.html}, timestamp = {Wed, 11 Sep 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jmlr/ChowdheryNDBMRBCSGSSTMRBTSPRDHPBAI23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/tmlr/SrivastavaRRSAF23, author = {Aarohi Srivastava and Abhinav Rastogi and Abhishek Rao and Abu Awal Md Shoeb and Abubakar Abid and Adam Fisch and Adam R. Brown and Adam Santoro and Aditya Gupta and Adri{\`{a}} Garriga{-}Alonso and Agnieszka Kluska and Aitor Lewkowycz and Akshat Agarwal and Alethea Power and Alex Ray and Alex Warstadt and Alexander W. Kocurek and Ali Safaya and Ali Tazarv and Alice Xiang and Alicia Parrish and Allen Nie and Aman Hussain and Amanda Askell and Amanda Dsouza and Ambrose Slone and Ameet Rahane and Anantharaman S. Iyer and Anders Andreassen and Andrea Madotto and Andrea Santilli and Andreas Stuhlm{\"{u}}ller and Andrew M. Dai and Andrew La and Andrew K. Lampinen and Andy Zou and Angela Jiang and Angelica Chen and Anh Vuong and Animesh Gupta and Anna Gottardi and Antonio Norelli and Anu Venkatesh and Arash Gholamidavoodi and Arfa Tabassum and Arul Menezes and Arun Kirubarajan and Asher Mullokandov and Ashish Sabharwal and Austin Herrick and Avia Efrat and Aykut Erdem and Ayla Karakas and B. Ryan Roberts and Bao Sheng Loe and Barret Zoph and Bartlomiej Bojanowski and Batuhan {\"{O}}zyurt and Behnam Hedayatnia and Behnam Neyshabur and Benjamin Inden and Benno Stein and Berk Ekmekci and Bill Yuchen Lin and Blake Howald and Bryan Orinion and Cameron Diao and Cameron Dour and Catherine Stinson and Cedrick Argueta and C{\`{e}}sar Ferri Ram{\'{\i}}rez and Chandan Singh and Charles Rathkopf and Chenlin Meng and Chitta Baral and Chiyu Wu and Chris Callison{-}Burch and Chris Waites and Christian Voigt and Christopher D. Manning and Christopher Potts and Cindy Ramirez and Clara E. Rivera and Clemencia Siro and Colin Raffel and Courtney Ashcraft and Cristina Garbacea and Damien Sileo and Dan Garrette and Dan Hendrycks and Dan Kilman and Dan Roth and Daniel Freeman and Daniel Khashabi and Daniel Levy and Daniel Mosegu{\'{\i}} Gonz{\'{a}}lez and Danielle Perszyk and Danny Hernandez and Danqi Chen and Daphne Ippolito and Dar Gilboa and David Dohan and David Drakard and David Jurgens and Debajyoti Datta and Deep Ganguli and Denis Emelin and Denis Kleyko and Deniz Yuret and Derek Chen and Derek Tam and Dieuwke Hupkes and Diganta Misra and Dilyar Buzan and Dimitri Coelho Mollo and Diyi Yang and Dong{-}Ho Lee and Dylan Schrader and Ekaterina Shutova and Ekin Dogus Cubuk and Elad Segal and Eleanor Hagerman and Elizabeth Barnes and Elizabeth Donoway and Ellie Pavlick and Emanuele Rodol{\`{a}} and Emma Lam and Eric Chu and Eric Tang and Erkut Erdem and Ernie Chang and Ethan A. Chi and Ethan Dyer and Ethan J. Jerzak and Ethan Kim and Eunice Engefu Manyasi and Evgenii Zheltonozhskii and Fanyue Xia and Fatemeh Siar and Fernando Mart{\'{\i}}nez{-}Plumed and Francesca Happ{\'{e}} and Fran{\c{c}}ois Chollet and Frieda Rong and Gaurav Mishra and Genta Indra Winata and Gerard de Melo and Germ{\'{a}}n Kruszewski and Giambattista Parascandolo and Giorgio Mariani and Gloria Wang and Gonzalo Jaimovitch{-}L{\'{o}}pez and Gregor Betz and Guy Gur{-}Ari and Hana Galijasevic and Hannah Kim and Hannah Rashkin and Hannaneh Hajishirzi and Harsh Mehta and Hayden Bogar and Henry Shevlin and Hinrich Sch{\"{u}}tze and Hiromu Yakura and Hongming Zhang and Hugh Mee Wong and Ian Ng and Isaac Noble and Jaap Jumelet and Jack Geissinger and Jackson Kernion and Jacob Hilton and Jaehoon Lee and Jaime Fern{\'{a}}ndez Fisac and James B. Simon and James Koppel and James Zheng and James Zou and Jan Kocon and Jana Thompson and Janelle Wingfield and Jared Kaplan and Jarema Radom and Jascha Sohl{-}Dickstein and Jason Phang and Jason Wei and Jason Yosinski and Jekaterina Novikova and Jelle Bosscher and Jennifer Marsh and Jeremy Kim and Jeroen Taal and Jesse H. Engel and Jesujoba Alabi and Jiacheng Xu and Jiaming Song and Jillian Tang and Joan Waweru and John Burden and John Miller and John U. Balis and Jonathan Batchelder and Jonathan Berant and J{\"{o}}rg Frohberg and Jos Rozen and Jos{\'{e}} Hern{\'{a}}ndez{-}Orallo and Joseph Boudeman and Joseph Guerr and Joseph Jones and Joshua B. Tenenbaum and Joshua S. Rule and Joyce Chua and Kamil Kanclerz and Karen Livescu and Karl Krauth and Karthik Gopalakrishnan and Katerina Ignatyeva and Katja Markert and Kaustubh D. Dhole and Kevin Gimpel and Kevin Omondi and Kory Mathewson and Kristen Chiafullo and Ksenia Shkaruta and Kumar Shridhar and Kyle McDonell and Kyle Richardson and Laria Reynolds and Leo Gao and Li Zhang and Liam Dugan and Lianhui Qin and Lidia Contreras Ochando and Louis{-}Philippe Morency and Luca Moschella and Lucas Lam and Lucy Noble and Ludwig Schmidt and Luheng He and Luis Oliveros Col{\'{o}}n and Luke Metz and L{\"{u}}tfi Kerem Senel and Maarten Bosma and Maarten Sap and Maartje ter Hoeve and Maheen Farooqi and Manaal Faruqui and Mantas Mazeika and Marco Baturan and Marco Marelli and Marco Maru and Mar{\'{\i}}a Jos{\'{e}} Ram{\'{\i}}rez{-}Quintana and Marie Tolkiehn and Mario Giulianelli and Martha Lewis and Martin Potthast and Matthew L. Leavitt and Matthias Hagen and M{\'{a}}ty{\'{a}}s Schubert and Medina Baitemirova and Melody Arnaud and Melvin McElrath and Michael A. Yee and Michael Cohen and Michael Gu and Michael I. Ivanitskiy and Michael Starritt and Michael Strube and Michal Swedrowski and Michele Bevilacqua and Michihiro Yasunaga and Mihir Kale and Mike Cain and Mimee Xu and Mirac Suzgun and Mitch Walker and Mo Tiwari and Mohit Bansal and Moin Aminnaseri and Mor Geva and Mozhdeh Gheini and Mukund Varma T. and Nanyun Peng and Nathan A. Chi and Nayeon Lee and Neta Gur{-}Ari Krakover and Nicholas Cameron and Nicholas Roberts and Nick Doiron and Nicole Martinez and Nikita Nangia and Niklas Deckers and Niklas Muennighoff and Nitish Shirish Keskar and Niveditha Iyer and Noah Constant and Noah Fiedel and Nuan Wen and Oliver Zhang and Omar Agha and Omar Elbaghdadi and Omer Levy and Owain Evans and Pablo Antonio Moreno Casares and Parth Doshi and Pascale Fung and Paul Pu Liang and Paul Vicol and Pegah Alipoormolabashi and Peiyuan Liao and Percy Liang and Peter Chang and Peter Eckersley and Phu Mon Htut and Pinyu Hwang and Piotr Milkowski and Piyush Patil and Pouya Pezeshkpour and Priti Oli and Qiaozhu Mei and Qing Lyu and Qinlang Chen and Rabin Banjade and Rachel Etta Rudolph and Raefer Gabriel and Rahel Habacker and Ramon Risco and Rapha{\"{e}}l Milli{\`{e}}re and Rhythm Garg and Richard Barnes and Rif A. Saurous and Riku Arakawa and Robbe Raymaekers and Robert Frank and Rohan Sikand and Roman Novak and Roman Sitelew and Ronan LeBras and Rosanne Liu and Rowan Jacobs and Rui Zhang and Ruslan Salakhutdinov and Ryan Chi and Ryan Lee and Ryan Stovall and Ryan Teehan and Rylan Yang and Sahib Singh and Saif M. Mohammad and Sajant Anand and Sam Dillavou and Sam Shleifer and Sam Wiseman and Samuel Gruetter and Samuel R. Bowman and Samuel S. Schoenholz and Sanghyun Han and Sanjeev Kwatra and Sarah A. Rous and Sarik Ghazarian and Sayan Ghosh and Sean Casey and Sebastian Bischoff and Sebastian Gehrmann and Sebastian Schuster and Sepideh Sadeghi and Shadi Hamdan and Sharon Zhou and Shashank Srivastava and Sherry Shi and Shikhar Singh and Shima Asaadi and Shixiang Shane Gu and Shubh Pachchigar and Shubham Toshniwal and Shyam Upadhyay and Shyamolima (Shammie) Debnath and Siamak Shakeri and Simon Thormeyer and Simone Melzi and Siva Reddy and Sneha Priscilla Makini and Soo{-}Hwan Lee and Spencer Torene and Sriharsha Hatwar and Stanislas Dehaene and Stefan Divic and Stefano Ermon and Stella Biderman and Stephanie Lin and Stephen Prasad and Steven T. Piantadosi and Stuart M. Shieber and Summer Misherghi and Svetlana Kiritchenko and Swaroop Mishra and Tal Linzen and Tal Schuster and Tao Li and Tao Yu and Tariq Ali and Tatsu Hashimoto and Te{-}Lin Wu and Th{\'{e}}o Desbordes and Theodore Rothschild and Thomas Phan and Tianle Wang and Tiberius Nkinyili and Timo Schick and Timofei Kornev and Titus Tunduny and Tobias Gerstenberg and Trenton Chang and Trishala Neeraj and Tushar Khot and Tyler Shultz and Uri Shaham and Vedant Misra and Vera Demberg and Victoria Nyamai and Vikas Raunak and Vinay V. Ramasesh and Vinay Uday Prabhu and Vishakh Padmakumar and Vivek Srikumar and William Fedus and William Saunders and William Zhang and Wout Vossen and Xiang Ren and Xiaoyu Tong and Xinran Zhao and Xinyi Wu and Xudong Shen and Yadollah Yaghoobzadeh and Yair Lakretz and Yangqiu Song and Yasaman Bahri and Yejin Choi and Yichi Yang and Yiding Hao and Yifu Chen and Yonatan Belinkov and Yu Hou and Yufang Hou and Yuntao Bai and Zachary Seid and Zhuoye Zhao and Zijian Wang and Zijie J. Wang and Zirui Wang and Ziyi Wu}, title = {Beyond the Imitation Game: Quantifying and extrapolating the capabilities of language models}, journal = {Trans. Mach. Learn. Res.}, volume = {2023}, year = {2023}, url = {https://openreview.net/forum?id=uyTL5Bvosj}, timestamp = {Tue, 06 Aug 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/tmlr/SrivastavaRRSAF23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/emnlp/Tay0ACFRN0YM23, author = {Yi Tay and Mostafa Dehghani and Samira Abnar and Hyung Won Chung and William Fedus and Jinfeng Rao and Sharan Narang and Vinh Q. Tran and Dani Yogatama and Donald Metzler}, editor = {Houda Bouamor and Juan Pino and Kalika Bali}, title = {Scaling Laws vs Model Architectures: How does Inductive Bias Influence Scaling?}, booktitle = {Findings of the Association for Computational Linguistics: {EMNLP} 2023, Singapore, December 6-10, 2023}, pages = {12342--12364}, publisher = {Association for Computational Linguistics}, year = {2023}, url = {https://doi.org/10.18653/v1/2023.findings-emnlp.825}, doi = {10.18653/V1/2023.FINDINGS-EMNLP.825}, timestamp = {Fri, 12 Apr 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/emnlp/Tay0ACFRN0YM23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2305-14705, author = {Sheng Shen and Le Hou and Yanqi Zhou and Nan Du and Shayne Longpre and Jason Wei and Hyung Won Chung and Barret Zoph and William Fedus and Xinyun Chen and Tu Vu and Yuexin Wu and Wuyang Chen and Albert Webson and Yunxuan Li and Vincent Y. Zhao and Hongkun Yu and Kurt Keutzer and Trevor Darrell and Denny Zhou}, title = {Flan-MoE: Scaling Instruction-Finetuned Language Models with Sparse Mixture of Experts}, journal = {CoRR}, volume = {abs/2305.14705}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2305.14705}, doi = {10.48550/ARXIV.2305.14705}, eprinttype = {arXiv}, eprint = {2305.14705}, timestamp = {Tue, 27 Feb 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2305-14705.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jmlr/FedusZS22, author = {William Fedus and Barret Zoph and Noam Shazeer}, title = {Switch Transformers: Scaling to Trillion Parameter Models with Simple and Efficient Sparsity}, journal = {J. Mach. Learn. Res.}, volume = {23}, pages = {120:1--120:39}, year = {2022}, url = {https://jmlr.org/papers/v23/21-0998.html}, timestamp = {Wed, 11 Sep 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jmlr/FedusZS22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/tmlr/WeiTBRZBYBZMCHVLDF22, author = {Jason Wei and Yi Tay and Rishi Bommasani and Colin Raffel and Barret Zoph and Sebastian Borgeaud and Dani Yogatama and Maarten Bosma and Denny Zhou and Donald Metzler and Ed H. Chi and Tatsunori Hashimoto and Oriol Vinyals and Percy Liang and Jeff Dean and William Fedus}, title = {Emergent Abilities of Large Language Models}, journal = {Trans. Mach. Learn. Res.}, volume = {2022}, year = {2022}, url = {https://openreview.net/forum?id=yzkSU5zdwD}, timestamp = {Fri, 19 May 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/tmlr/WeiTBRZBYBZMCHVLDF22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/Tay0RFACNYVM22, author = {Yi Tay and Mostafa Dehghani and Jinfeng Rao and William Fedus and Samira Abnar and Hyung Won Chung and Sharan Narang and Dani Yogatama and Ashish Vaswani and Donald Metzler}, title = {Scale Efficiently: Insights from Pretraining and Finetuning Transformers}, booktitle = {The Tenth International Conference on Learning Representations, {ICLR} 2022, Virtual Event, April 25-29, 2022}, publisher = {OpenReview.net}, year = {2022}, url = {https://openreview.net/forum?id=f2OYVDyfIB}, timestamp = {Sat, 20 Aug 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/Tay0RFACNYVM22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/DuHDTLXKZYFZFBZ22, author = {Nan Du and Yanping Huang and Andrew M. Dai and Simon Tong and Dmitry Lepikhin and Yuanzhong Xu and Maxim Krikun and Yanqi Zhou and Adams Wei Yu and Orhan Firat and Barret Zoph and Liam Fedus and Maarten P. Bosma and Zongwei Zhou and Tao Wang and Yu Emma Wang and Kellie Webster and Marie Pellat and Kevin Robinson and Kathleen S. Meier{-}Hellstern and Toju Duke and Lucas Dixon and Kun Zhang and Quoc V. Le and Yonghui Wu and Zhifeng Chen and Claire Cui}, editor = {Kamalika Chaudhuri and Stefanie Jegelka and Le Song and Csaba Szepesv{\'{a}}ri and Gang Niu and Sivan Sabato}, title = {GLaM: Efficient Scaling of Language Models with Mixture-of-Experts}, booktitle = {International Conference on Machine Learning, {ICML} 2022, 17-23 July 2022, Baltimore, Maryland, {USA}}, series = {Proceedings of Machine Learning Research}, volume = {162}, pages = {5547--5569}, publisher = {{PMLR}}, year = {2022}, url = {https://proceedings.mlr.press/v162/du22c.html}, timestamp = {Wed, 16 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/DuHDTLXKZYFZFBZ22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2202-08906, author = {Barret Zoph and Irwan Bello and Sameer Kumar and Nan Du and Yanping Huang and Jeff Dean and Noam Shazeer and William Fedus}, title = {Designing Effective Sparse Expert Models}, journal = {CoRR}, volume = {abs/2202.08906}, year = {2022}, url = {https://arxiv.org/abs/2202.08906}, eprinttype = {arXiv}, eprint = {2202.08906}, timestamp = {Thu, 17 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2202-08906.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2204-02311, author = {Aakanksha Chowdhery and Sharan Narang and Jacob Devlin and Maarten Bosma and Gaurav Mishra and Adam Roberts and Paul Barham and Hyung Won Chung and Charles Sutton and Sebastian Gehrmann and Parker Schuh and Kensen Shi and Sasha Tsvyashchenko and Joshua Maynez and Abhishek Rao and Parker Barnes and Yi Tay and Noam Shazeer and Vinodkumar Prabhakaran and Emily Reif and Nan Du and Ben Hutchinson and Reiner Pope and James Bradbury and Jacob Austin and Michael Isard and Guy Gur{-}Ari and Pengcheng Yin and Toju Duke and Anselm Levskaya and Sanjay Ghemawat and Sunipa Dev and Henryk Michalewski and Xavier Garcia and Vedant Misra and Kevin Robinson and Liam Fedus and Denny Zhou and Daphne Ippolito and David Luan and Hyeontaek Lim and Barret Zoph and Alexander Spiridonov and Ryan Sepassi and David Dohan and Shivani Agrawal and Mark Omernick and Andrew M. Dai and Thanumalayan Sankaranarayana Pillai and Marie Pellat and Aitor Lewkowycz and Erica Moreira and Rewon Child and Oleksandr Polozov and Katherine Lee and Zongwei Zhou and Xuezhi Wang and Brennan Saeta and Mark Diaz and Orhan Firat and Michele Catasta and Jason Wei and Kathy Meier{-}Hellstern and Douglas Eck and Jeff Dean and Slav Petrov and Noah Fiedel}, title = {PaLM: Scaling Language Modeling with Pathways}, journal = {CoRR}, volume = {abs/2204.02311}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2204.02311}, doi = {10.48550/ARXIV.2204.02311}, eprinttype = {arXiv}, eprint = {2204.02311}, timestamp = {Wed, 16 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2204-02311.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2206-04615, author = {Aarohi Srivastava and Abhinav Rastogi and Abhishek Rao and Abu Awal Md Shoeb and Abubakar Abid and Adam Fisch and Adam R. Brown and Adam Santoro and Aditya Gupta and Adri{\`{a}} Garriga{-}Alonso and Agnieszka Kluska and Aitor Lewkowycz and Akshat Agarwal and Alethea Power and Alex Ray and Alex Warstadt and Alexander W. Kocurek and Ali Safaya and Ali Tazarv and Alice Xiang and Alicia Parrish and Allen Nie and Aman Hussain and Amanda Askell and Amanda Dsouza and Ambrose Slone and Ameet Rahane and Anantharaman S. Iyer and Anders Andreassen and Andrea Madotto and Andrea Santilli and Andreas Stuhlm{\"{u}}ller and Andrew M. Dai and Andrew La and Andrew K. Lampinen and Andy Zou and Angela Jiang and Angelica Chen and Anh Vuong and Animesh Gupta and Anna Gottardi and Antonio Norelli and Anu Venkatesh and Arash Gholamidavoodi and Arfa Tabassum and Arul Menezes and Arun Kirubarajan and Asher Mullokandov and Ashish Sabharwal and Austin Herrick and Avia Efrat and Aykut Erdem and Ayla Karakas and B. Ryan Roberts and Bao Sheng Loe and Barret Zoph and Bartlomiej Bojanowski and Batuhan {\"{O}}zyurt and Behnam Hedayatnia and Behnam Neyshabur and Benjamin Inden and Benno Stein and Berk Ekmekci and Bill Yuchen Lin and Blake Howald and Bryan Orinion and Cameron Diao and Cameron Dour and Catherine Stinson and Cedrick Argueta and C{\`{e}}sar Ferri Ram{\'{\i}}rez and Chandan Singh and Charles Rathkopf and Chenlin Meng and Chitta Baral and Chiyu Wu and Chris Callison{-}Burch and Chris Waites and Christian Voigt and Christopher D. Manning and Christopher Potts and Cindy Ramirez and Clara E. Rivera and Clemencia Siro and Colin Raffel and Courtney Ashcraft and Cristina Garbacea and Damien Sileo and Dan Garrette and Dan Hendrycks and Dan Kilman and Dan Roth and Daniel Freeman and Daniel Khashabi and Daniel Levy and Daniel Mosegu{\'{\i}} Gonz{\'{a}}lez and Danielle Perszyk and Danny Hernandez and Danqi Chen and Daphne Ippolito and Dar Gilboa and David Dohan and David Drakard and David Jurgens and Debajyoti Datta and Deep Ganguli and Denis Emelin and Denis Kleyko and Deniz Yuret and Derek Chen and Derek Tam and Dieuwke Hupkes and Diganta Misra and Dilyar Buzan and Dimitri Coelho Mollo and Diyi Yang and Dong{-}Ho Lee and Dylan Schrader and Ekaterina Shutova and Ekin Dogus Cubuk and Elad Segal and Eleanor Hagerman and Elizabeth Barnes and Elizabeth Donoway and Ellie Pavlick and Emanuele Rodol{\`{a}} and Emma Lam and Eric Chu and Eric Tang and Erkut Erdem and Ernie Chang and Ethan A. Chi and Ethan Dyer and Ethan J. Jerzak and Ethan Kim and Eunice Engefu Manyasi and Evgenii Zheltonozhskii and Fanyue Xia and Fatemeh Siar and Fernando Mart{\'{\i}}nez{-}Plumed and Francesca Happ{\'{e}} and Fran{\c{c}}ois Chollet and Frieda Rong and Gaurav Mishra and Genta Indra Winata and Gerard de Melo and Germ{\'{a}}n Kruszewski and Giambattista Parascandolo and Giorgio Mariani and Gloria Wang and Gonzalo Jaimovitch{-}L{\'{o}}pez and Gregor Betz and Guy Gur{-}Ari and Hana Galijasevic and Hannah Kim and Hannah Rashkin and Hannaneh Hajishirzi and Harsh Mehta and Hayden Bogar and Henry Shevlin and Hinrich Sch{\"{u}}tze and Hiromu Yakura and Hongming Zhang and Hugh Mee Wong and Ian Ng and Isaac Noble and Jaap Jumelet and Jack Geissinger and Jackson Kernion and Jacob Hilton and Jaehoon Lee and Jaime Fern{\'{a}}ndez Fisac and James B. Simon and James Koppel and James Zheng and James Zou and Jan Kocon and Jana Thompson and Janelle Wingfield and Jared Kaplan and Jarema Radom and Jascha Sohl{-}Dickstein and Jason Phang and Jason Wei and Jason Yosinski and Jekaterina Novikova and Jelle Bosscher and Jennifer Marsh and Jeremy Kim and Jeroen Taal and Jesse H. Engel and Jesujoba Alabi and Jiacheng Xu and Jiaming Song and Jillian Tang and Joan Waweru and John Burden and John Miller and John U. Balis and Jonathan Batchelder and Jonathan Berant and J{\"{o}}rg Frohberg and Jos Rozen and Jos{\'{e}} Hern{\'{a}}ndez{-}Orallo and Joseph Boudeman and Joseph Guerr and Joseph Jones and Joshua B. Tenenbaum and Joshua S. Rule and Joyce Chua and Kamil Kanclerz and Karen Livescu and Karl Krauth and Karthik Gopalakrishnan and Katerina Ignatyeva and Katja Markert and Kaustubh D. Dhole and Kevin Gimpel and Kevin Omondi and Kory Mathewson and Kristen Chiafullo and Ksenia Shkaruta and Kumar Shridhar and Kyle McDonell and Kyle Richardson and Laria Reynolds and Leo Gao and Li Zhang and Liam Dugan and Lianhui Qin and Lidia Contreras Ochando and Louis{-}Philippe Morency and Luca Moschella and Lucas Lam and Lucy Noble and Ludwig Schmidt and Luheng He and Luis Oliveros Col{\'{o}}n and Luke Metz and L{\"{u}}tfi Kerem Senel and Maarten Bosma and Maarten Sap and Maartje ter Hoeve and Maheen Farooqi and Manaal Faruqui and Mantas Mazeika and Marco Baturan and Marco Marelli and Marco Maru and Mar{\'{\i}}a Jos{\'{e}} Ram{\'{\i}}rez{-}Quintana and Marie Tolkiehn and Mario Giulianelli and Martha Lewis and Martin Potthast and Matthew L. Leavitt and Matthias Hagen and M{\'{a}}ty{\'{a}}s Schubert and Medina Baitemirova and Melody Arnaud and Melvin McElrath and Michael A. Yee and Michael Cohen and Michael Gu and Michael I. Ivanitskiy and Michael Starritt and Michael Strube and Michal Swedrowski and Michele Bevilacqua and Michihiro Yasunaga and Mihir Kale and Mike Cain and Mimee Xu and Mirac Suzgun and Mitch Walker and Mo Tiwari and Mohit Bansal and Moin Aminnaseri and Mor Geva and Mozhdeh Gheini and Mukund Varma T. and Nanyun Peng and Nathan A. Chi and Nayeon Lee and Neta Gur{-}Ari Krakover and Nicholas Cameron and Nicholas Roberts and Nick Doiron and Nicole Martinez and Nikita Nangia and Niklas Deckers and Niklas Muennighoff and Nitish Shirish Keskar and Niveditha Iyer and Noah Constant and Noah Fiedel and Nuan Wen and Oliver Zhang and Omar Agha and Omar Elbaghdadi and Omer Levy and Owain Evans and Pablo Antonio Moreno Casares and Parth Doshi and Pascale Fung and Paul Pu Liang and Paul Vicol and Pegah Alipoormolabashi and Peiyuan Liao and Percy Liang and Peter Chang and Peter Eckersley and Phu Mon Htut and Pinyu Hwang and Piotr Milkowski and Piyush Patil and Pouya Pezeshkpour and Priti Oli and Qiaozhu Mei and Qing Lyu and Qinlang Chen and Rabin Banjade and Rachel Etta Rudolph and Raefer Gabriel and Rahel Habacker and Ramon Risco and Rapha{\"{e}}l Milli{\`{e}}re and Rhythm Garg and Richard Barnes and Rif A. Saurous and Riku Arakawa and Robbe Raymaekers and Robert Frank and Rohan Sikand and Roman Novak and Roman Sitelew and Ronan LeBras and Rosanne Liu and Rowan Jacobs and Rui Zhang and Ruslan Salakhutdinov and Ryan Chi and Ryan Lee and Ryan Stovall and Ryan Teehan and Rylan Yang and Sahib Singh and Saif M. Mohammad and Sajant Anand and Sam Dillavou and Sam Shleifer and Sam Wiseman and Samuel Gruetter and Samuel R. Bowman and Samuel S. Schoenholz and Sanghyun Han and Sanjeev Kwatra and Sarah A. Rous and Sarik Ghazarian and Sayan Ghosh and Sean Casey and Sebastian Bischoff and Sebastian Gehrmann and Sebastian Schuster and Sepideh Sadeghi and Shadi Hamdan and Sharon Zhou and Shashank Srivastava and Sherry Shi and Shikhar Singh and Shima Asaadi and Shixiang Shane Gu and Shubh Pachchigar and Shubham Toshniwal and Shyam Upadhyay and Shyamolima (Shammie) Debnath and Siamak Shakeri and Simon Thormeyer and Simone Melzi and Siva Reddy and Sneha Priscilla Makini and Soo{-}Hwan Lee and Spencer Torene and Sriharsha Hatwar and Stanislas Dehaene and Stefan Divic and Stefano Ermon and Stella Biderman and Stephanie Lin and Stephen Prasad and Steven T. Piantadosi and Stuart M. Shieber and Summer Misherghi and Svetlana Kiritchenko and Swaroop Mishra and Tal Linzen and Tal Schuster and Tao Li and Tao Yu and Tariq Ali and Tatsu Hashimoto and Te{-}Lin Wu and Th{\'{e}}o Desbordes and Theodore Rothschild and Thomas Phan and Tianle Wang and Tiberius Nkinyili and Timo Schick and Timofei Kornev and Titus Tunduny and Tobias Gerstenberg and Trenton Chang and Trishala Neeraj and Tushar Khot and Tyler Shultz and Uri Shaham and Vedant Misra and Vera Demberg and Victoria Nyamai and Vikas Raunak and Vinay V. Ramasesh and Vinay Uday Prabhu and Vishakh Padmakumar and Vivek Srikumar and William Fedus and William Saunders and William Zhang and Wout Vossen and Xiang Ren and Xiaoyu Tong and Xinran Zhao and Xinyi Wu and Xudong Shen and Yadollah Yaghoobzadeh and Yair Lakretz and Yangqiu Song and Yasaman Bahri and Yejin Choi and Yichi Yang and Yiding Hao and Yifu Chen and Yonatan Belinkov and Yu Hou and Yufang Hou and Yuntao Bai and Zachary Seid and Zhuoye Zhao and Zijian Wang and Zijie J. Wang and Zirui Wang and Ziyi Wu}, title = {Beyond the Imitation Game: Quantifying and extrapolating the capabilities of language models}, journal = {CoRR}, volume = {abs/2206.04615}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2206.04615}, doi = {10.48550/ARXIV.2206.04615}, eprinttype = {arXiv}, eprint = {2206.04615}, timestamp = {Mon, 05 Aug 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2206-04615.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2206-07682, author = {Jason Wei and Yi Tay and Rishi Bommasani and Colin Raffel and Barret Zoph and Sebastian Borgeaud and Dani Yogatama and Maarten Bosma and Denny Zhou and Donald Metzler and Ed H. Chi and Tatsunori Hashimoto and Oriol Vinyals and Percy Liang and Jeff Dean and William Fedus}, title = {Emergent Abilities of Large Language Models}, journal = {CoRR}, volume = {abs/2206.07682}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2206.07682}, doi = {10.48550/ARXIV.2206.07682}, eprinttype = {arXiv}, eprint = {2206.07682}, timestamp = {Tue, 21 Jun 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2206-07682.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2207-10551, author = {Yi Tay and Mostafa Dehghani and Samira Abnar and Hyung Won Chung and William Fedus and Jinfeng Rao and Sharan Narang and Vinh Q. Tran and Dani Yogatama and Donald Metzler}, title = {Scaling Laws vs Model Architectures: How does Inductive Bias Influence Scaling?}, journal = {CoRR}, volume = {abs/2207.10551}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2207.10551}, doi = {10.48550/ARXIV.2207.10551}, eprinttype = {arXiv}, eprint = {2207.10551}, timestamp = {Mon, 25 Jul 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2207-10551.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2209-01667, author = {William Fedus and Jeff Dean and Barret Zoph}, title = {A Review of Sparse Expert Models in Deep Learning}, journal = {CoRR}, volume = {abs/2209.01667}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2209.01667}, doi = {10.48550/ARXIV.2209.01667}, eprinttype = {arXiv}, eprint = {2209.01667}, timestamp = {Mon, 26 Sep 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2209-01667.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2210-11416, author = {Hyung Won Chung and Le Hou and Shayne Longpre and Barret Zoph and Yi Tay and William Fedus and Eric Li and Xuezhi Wang and Mostafa Dehghani and Siddhartha Brahma and Albert Webson and Shixiang Shane Gu and Zhuyun Dai and Mirac Suzgun and Xinyun Chen and Aakanksha Chowdhery and Sharan Narang and Gaurav Mishra and Adams Yu and Vincent Y. Zhao and Yanping Huang and Andrew M. Dai and Hongkun Yu and Slav Petrov and Ed H. Chi and Jeff Dean and Jacob Devlin and Adam Roberts and Denny Zhou and Quoc V. Le and Jason Wei}, title = {Scaling Instruction-Finetuned Language Models}, journal = {CoRR}, volume = {abs/2210.11416}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2210.11416}, doi = {10.48550/ARXIV.2210.11416}, eprinttype = {arXiv}, eprint = {2210.11416}, timestamp = {Mon, 04 Dec 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2210-11416.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/emnlp/NarangCTFFMMFSL21, author = {Sharan Narang and Hyung Won Chung and Yi Tay and Liam Fedus and Thibault F{\'{e}}vry and Michael Matena and Karishma Malkan and Noah Fiedel and Noam Shazeer and Zhenzhong Lan and Yanqi Zhou and Wei Li and Nan Ding and Jake Marcus and Adam Roberts and Colin Raffel}, editor = {Marie{-}Francine Moens and Xuanjing Huang and Lucia Specia and Scott Wen{-}tau Yih}, title = {Do Transformer Modifications Transfer Across Implementations and Applications?}, booktitle = {Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, {EMNLP} 2021, Virtual Event / Punta Cana, Dominican Republic, 7-11 November, 2021}, pages = {5758--5773}, publisher = {Association for Computational Linguistics}, year = {2021}, url = {https://doi.org/10.18653/v1/2021.emnlp-main.465}, doi = {10.18653/V1/2021.EMNLP-MAIN.465}, timestamp = {Fri, 16 Feb 2024 08:27:36 +0100}, biburl = {https://dblp.org/rec/conf/emnlp/NarangCTFFMMFSL21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/BelloFDCSLSZ21, author = {Irwan Bello and William Fedus and Xianzhi Du and Ekin Dogus Cubuk and Aravind Srinivas and Tsung{-}Yi Lin and Jonathon Shlens and Barret Zoph}, editor = {Marc'Aurelio Ranzato and Alina Beygelzimer and Yann N. Dauphin and Percy Liang and Jennifer Wortman Vaughan}, title = {Revisiting ResNets: Improved Training and Scaling Strategies}, booktitle = {Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021, NeurIPS 2021, December 6-14, 2021, virtual}, pages = {22614--22627}, year = {2021}, url = {https://proceedings.neurips.cc/paper/2021/hash/bef4d169d8bddd17d68303877a3ea945-Abstract.html}, timestamp = {Tue, 03 May 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/BelloFDCSLSZ21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2101-03961, author = {William Fedus and Barret Zoph and Noam Shazeer}, title = {Switch Transformers: Scaling to Trillion Parameter Models with Simple and Efficient Sparsity}, journal = {CoRR}, volume = {abs/2101.03961}, year = {2021}, url = {https://arxiv.org/abs/2101.03961}, eprinttype = {arXiv}, eprint = {2101.03961}, timestamp = {Thu, 21 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2101-03961.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2102-11972, author = {Sharan Narang and Hyung Won Chung and Yi Tay and William Fedus and Thibault F{\'{e}}vry and Michael Matena and Karishma Malkan and Noah Fiedel and Noam Shazeer and Zhenzhong Lan and Yanqi Zhou and Wei Li and Nan Ding and Jake Marcus and Adam Roberts and Colin Raffel}, title = {Do Transformer Modifications Transfer Across Implementations and Applications?}, journal = {CoRR}, volume = {abs/2102.11972}, year = {2021}, url = {https://arxiv.org/abs/2102.11972}, eprinttype = {arXiv}, eprint = {2102.11972}, timestamp = {Wed, 24 Nov 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2102-11972.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2103-07579, author = {Irwan Bello and William Fedus and Xianzhi Du and Ekin D. Cubuk and Aravind Srinivas and Tsung{-}Yi Lin and Jonathon Shlens and Barret Zoph}, title = {Revisiting ResNets: Improved Training and Scaling Strategies}, journal = {CoRR}, volume = {abs/2103.07579}, year = {2021}, url = {https://arxiv.org/abs/2103.07579}, eprinttype = {arXiv}, eprint = {2103.07579}, timestamp = {Tue, 23 Mar 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2103-07579.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2109-10686, author = {Yi Tay and Mostafa Dehghani and Jinfeng Rao and William Fedus and Samira Abnar and Hyung Won Chung and Sharan Narang and Dani Yogatama and Ashish Vaswani and Donald Metzler}, title = {Scale Efficiently: Insights from Pre-training and Fine-tuning Transformers}, journal = {CoRR}, volume = {abs/2109.10686}, year = {2021}, url = {https://arxiv.org/abs/2109.10686}, eprinttype = {arXiv}, eprint = {2109.10686}, timestamp = {Mon, 27 Sep 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2109-10686.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2109-11052, author = {Adrien Ali Ta{\"{\i}}ga and William Fedus and Marlos C. Machado and Aaron C. Courville and Marc G. Bellemare}, title = {On Bonus-Based Exploration Methods in the Arcade Learning Environment}, journal = {CoRR}, volume = {abs/2109.11052}, year = {2021}, url = {https://arxiv.org/abs/2109.11052}, eprinttype = {arXiv}, eprint = {2109.11052}, timestamp = {Mon, 27 Sep 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2109-11052.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2112-06905, author = {Nan Du and Yanping Huang and Andrew M. Dai and Simon Tong and Dmitry Lepikhin and Yuanzhong Xu and Maxim Krikun and Yanqi Zhou and Adams Wei Yu and Orhan Firat and Barret Zoph and Liam Fedus and Maarten Bosma and Zongwei Zhou and Tao Wang and Yu Emma Wang and Kellie Webster and Marie Pellat and Kevin Robinson and Kathy Meier{-}Hellstern and Toju Duke and Lucas Dixon and Kun Zhang and Quoc V. Le and Yonghui Wu and Zhifeng Chen and Claire Cui}, title = {GLaM: Efficient Scaling of Language Models with Mixture-of-Experts}, journal = {CoRR}, volume = {abs/2112.06905}, year = {2021}, url = {https://arxiv.org/abs/2112.06905}, eprinttype = {arXiv}, eprint = {2112.06905}, timestamp = {Wed, 16 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2112-06905.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/JainFLPB20, author = {Vishal Jain and William Fedus and Hugo Larochelle and Doina Precup and Marc G. Bellemare}, title = {Algorithmic Improvements for Deep Reinforcement Learning Applied to Interactive Fiction}, booktitle = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI} 2020, The Thirty-Second Innovative Applications of Artificial Intelligence Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA, February 7-12, 2020}, pages = {4328--4336}, publisher = {{AAAI} Press}, year = {2020}, url = {https://doi.org/10.1609/aaai.v34i04.5857}, doi = {10.1609/AAAI.V34I04.5857}, timestamp = {Mon, 04 Sep 2023 12:29:24 +0200}, biburl = {https://dblp.org/rec/conf/aaai/JainFLPB20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/CacciaCFLPC20, author = {Massimo Caccia and Lucas Caccia and William Fedus and Hugo Larochelle and Joelle Pineau and Laurent Charlin}, title = {Language GANs Falling Short}, booktitle = {8th International Conference on Learning Representations, {ICLR} 2020, Addis Ababa, Ethiopia, April 26-30, 2020}, publisher = {OpenReview.net}, year = {2020}, url = {https://openreview.net/forum?id=BJgza6VtPB}, timestamp = {Thu, 07 May 2020 17:11:47 +0200}, biburl = {https://dblp.org/rec/conf/iclr/CacciaCFLPC20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/TaigaFMCB20, author = {Adrien Ali Ta{\"{\i}}ga and William Fedus and Marlos C. Machado and Aaron C. Courville and Marc G. Bellemare}, title = {On Bonus Based Exploration Methods In The Arcade Learning Environment}, booktitle = {8th International Conference on Learning Representations, {ICLR} 2020, Addis Ababa, Ethiopia, April 26-30, 2020}, publisher = {OpenReview.net}, year = {2020}, url = {https://openreview.net/forum?id=BJewlyStDr}, timestamp = {Sat, 30 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/TaigaFMCB20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/FedusRABLRD20, author = {William Fedus and Prajit Ramachandran and Rishabh Agarwal and Yoshua Bengio and Hugo Larochelle and Mark Rowland and Will Dabney}, title = {Revisiting Fundamentals of Experience Replay}, booktitle = {Proceedings of the 37th International Conference on Machine Learning, {ICML} 2020, 13-18 July 2020, Virtual Event}, series = {Proceedings of Machine Learning Research}, volume = {119}, pages = {3061--3071}, publisher = {{PMLR}}, year = {2020}, url = {http://proceedings.mlr.press/v119/fedus20a.html}, timestamp = {Tue, 15 Dec 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icml/FedusRABLRD20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2002-12499, author = {William Fedus and Dibya Ghosh and John D. Martin and Marc G. Bellemare and Yoshua Bengio and Hugo Larochelle}, title = {On Catastrophic Interference in Atari 2600 Games}, journal = {CoRR}, volume = {abs/2002.12499}, year = {2020}, url = {https://arxiv.org/abs/2002.12499}, eprinttype = {arXiv}, eprint = {2002.12499}, timestamp = {Tue, 05 May 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2002-12499.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2007-06700, author = {William Fedus and Prajit Ramachandran and Rishabh Agarwal and Yoshua Bengio and Hugo Larochelle and Mark Rowland and Will Dabney}, title = {Revisiting Fundamentals of Experience Replay}, journal = {CoRR}, volume = {abs/2007.06700}, year = {2020}, url = {https://arxiv.org/abs/2007.06700}, eprinttype = {arXiv}, eprint = {2007.06700}, timestamp = {Tue, 21 Jul 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2007-06700.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/GoyalBFSLLLB19, author = {Anirudh Goyal and Philemon Brakel and William Fedus and Soumye Singhal and Timothy P. Lillicrap and Sergey Levine and Hugo Larochelle and Yoshua Bengio}, title = {Recall Traces: Backtracking Models for Efficient Reinforcement Learning}, booktitle = {7th International Conference on Learning Representations, {ICLR} 2019, New Orleans, LA, USA, May 6-9, 2019}, publisher = {OpenReview.net}, year = {2019}, url = {https://openreview.net/forum?id=HygsfnR9Ym}, timestamp = {Thu, 25 Jul 2019 13:03:15 +0200}, biburl = {https://dblp.org/rec/conf/iclr/GoyalBFSLLLB19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/VelickovicFHLBH19, author = {Petar Velickovic and William Fedus and William L. Hamilton and Pietro Li{\`{o}} and Yoshua Bengio and R. Devon Hjelm}, title = {Deep Graph Infomax}, booktitle = {7th International Conference on Learning Representations, {ICLR} 2019, New Orleans, LA, USA, May 6-9, 2019}, publisher = {OpenReview.net}, year = {2019}, url = {https://openreview.net/forum?id=rklz9iAcKQ}, timestamp = {Thu, 25 Jul 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/VelickovicFHLBH19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1902-06865, author = {William Fedus and Carles Gelada and Yoshua Bengio and Marc G. Bellemare and Hugo Larochelle}, title = {Hyperbolic Discounting and Learning over Multiple Horizons}, journal = {CoRR}, volume = {abs/1902.06865}, year = {2019}, url = {http://arxiv.org/abs/1902.06865}, eprinttype = {arXiv}, eprint = {1902.06865}, timestamp = {Tue, 21 May 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1902-06865.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1908-02388, author = {Adrien Ali Ta{\"{\i}}ga and William Fedus and Marlos C. Machado and Aaron C. Courville and Marc G. Bellemare}, title = {Benchmarking Bonus-Based Exploration Methods on the Arcade Learning Environment}, journal = {CoRR}, volume = {abs/1908.02388}, year = {2019}, url = {http://arxiv.org/abs/1908.02388}, eprinttype = {arXiv}, eprint = {1908.02388}, timestamp = {Fri, 09 Aug 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1908-02388.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1911-12511, author = {Vishal Jain and William Fedus and Hugo Larochelle and Doina Precup and Marc G. Bellemare}, title = {Algorithmic Improvements for Deep Reinforcement Learning applied to Interactive Fiction}, journal = {CoRR}, volume = {abs/1911.12511}, year = {2019}, url = {http://arxiv.org/abs/1911.12511}, eprinttype = {arXiv}, eprint = {1911.12511}, timestamp = {Wed, 08 Jan 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1911-12511.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/FedusGD18, author = {William Fedus and Ian J. Goodfellow and Andrew M. Dai}, title = {MaskGAN: Better Text Generation via Filling in the {\_}{\_}{\_}{\_}{\_}{\_}{\_}}, booktitle = {6th International Conference on Learning Representations, {ICLR} 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings}, publisher = {OpenReview.net}, year = {2018}, url = {https://openreview.net/forum?id=ByOExmWAb}, timestamp = {Thu, 25 Jul 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/FedusGD18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/FedusRLDMG18, author = {William Fedus and Mihaela Rosca and Balaji Lakshminarayanan and Andrew M. Dai and Shakir Mohamed and Ian J. Goodfellow}, title = {Many Paths to Equilibrium: GANs Do Not Need to Decrease a Divergence At Every Step}, booktitle = {6th International Conference on Learning Representations, {ICLR} 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings}, publisher = {OpenReview.net}, year = {2018}, url = {https://openreview.net/forum?id=ByQpn1ZA-}, timestamp = {Thu, 25 Jul 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/FedusRLDMG18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1801-07736, author = {William Fedus and Ian J. Goodfellow and Andrew M. Dai}, title = {MaskGAN: Better Text Generation via Filling in the {\_}{\_}{\_}{\_}{\_}{\_}}, journal = {CoRR}, volume = {abs/1801.07736}, year = {2018}, url = {http://arxiv.org/abs/1801.07736}, eprinttype = {arXiv}, eprint = {1801.07736}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1801-07736.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1802-09484, author = {Valentin Thomas and Emmanuel Bengio and William Fedus and Jules Pondard and Philippe Beaudoin and Hugo Larochelle and Joelle Pineau and Doina Precup and Yoshua Bengio}, title = {Disentangling the independently controllable factors of variation by interacting with the world}, journal = {CoRR}, volume = {abs/1802.09484}, year = {2018}, url = {http://arxiv.org/abs/1802.09484}, eprinttype = {arXiv}, eprint = {1802.09484}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1802-09484.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1804-00379, author = {Anirudh Goyal and Philemon Brakel and William Fedus and Timothy P. Lillicrap and Sergey Levine and Hugo Larochelle and Yoshua Bengio}, title = {Recall Traces: Backtracking Models for Efficient Reinforcement Learning}, journal = {CoRR}, volume = {abs/1804.00379}, year = {2018}, url = {http://arxiv.org/abs/1804.00379}, eprinttype = {arXiv}, eprint = {1804.00379}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1804-00379.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1809-10341, author = {Petar Velickovic and William Fedus and William L. Hamilton and Pietro Li{\`{o}} and Yoshua Bengio and R. Devon Hjelm}, title = {Deep Graph Infomax}, journal = {CoRR}, volume = {abs/1809.10341}, year = {2018}, url = {http://arxiv.org/abs/1809.10341}, eprinttype = {arXiv}, eprint = {1809.10341}, timestamp = {Fri, 05 Oct 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1809-10341.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1811-02549, author = {Massimo Caccia and Lucas Caccia and William Fedus and Hugo Larochelle and Joelle Pineau and Laurent Charlin}, title = {Language GANs Falling Short}, journal = {CoRR}, volume = {abs/1811.02549}, year = {2018}, url = {http://arxiv.org/abs/1811.02549}, eprinttype = {arXiv}, eprint = {1811.02549}, timestamp = {Thu, 22 Nov 2018 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1811-02549.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1710-08446, author = {William Fedus and Mihaela Rosca and Balaji Lakshminarayanan and Andrew M. Dai and Shakir Mohamed and Ian J. Goodfellow}, title = {Many Paths to Equilibrium: GANs Do Not Need to Decrease a Divergence At Every Step}, journal = {CoRR}, volume = {abs/1710.08446}, year = {2017}, url = {http://arxiv.org/abs/1710.08446}, eprinttype = {arXiv}, eprint = {1710.08446}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1710-08446.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
manage site settings
To protect your privacy, all features that rely on external API calls from your browser are turned off by default. You need to opt-in for them to become active. All settings here will be stored as cookies with your web browser. For more information see our F.A.Q.