default search action

combined dblp search
author search
venue search
publication search

ask others

BibTeX records: William Fedus

Name: dblp XML data dump
Creator: Schloss Dagstuhl - Leibniz Center for Informatics
Published: 1993
License: https://creativecommons.org/publicdomain/zero/1.0/
Keywords: dblp, XML, computer science, scholarly publications, metadata

> Home > Persons > William Fedus

download as .bib file

@article{DBLP:journals/jmlr/ChungHLZTFL00BW24,
  author       = {Hyung Won Chung and
                  Le Hou and
                  Shayne Longpre and
                  Barret Zoph and
                  Yi Tay and
                  William Fedus and
                  Yunxuan Li and
                  Xuezhi Wang and
                  Mostafa Dehghani and
                  Siddhartha Brahma and
                  Albert Webson and
                  Shixiang Shane Gu and
                  Zhuyun Dai and
                  Mirac Suzgun and
                  Xinyun Chen and
                  Aakanksha Chowdhery and
                  Alex Castro{-}Ros and
                  Marie Pellat and
                  Kevin Robinson and
                  Dasha Valter and
                  Sharan Narang and
                  Gaurav Mishra and
                  Adams Yu and
                  Vincent Y. Zhao and
                  Yanping Huang and
                  Andrew M. Dai and
                  Hongkun Yu and
                  Slav Petrov and
                  Ed H. Chi and
                  Jeff Dean and
                  Jacob Devlin and
                  Adam Roberts and
                  Denny Zhou and
                  Quoc V. Le and
                  Jason Wei},
  title        = {Scaling Instruction-Finetuned Language Models},
  journal      = {J. Mach. Learn. Res.},
  volume       = {25},
  pages        = {70:1--70:53},
  year         = {2024},
  url          = {https://jmlr.org/papers/v25/23-0870.html},
  timestamp    = {Mon, 16 Sep 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/ChungHLZTFL00BW24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iclr/ShenHZ0LWCZFCVW24,
  author       = {Sheng Shen and
                  Le Hou and
                  Yanqi Zhou and
                  Nan Du and
                  Shayne Longpre and
                  Jason Wei and
                  Hyung Won Chung and
                  Barret Zoph and
                  William Fedus and
                  Xinyun Chen and
                  Tu Vu and
                  Yuexin Wu and
                  Wuyang Chen and
                  Albert Webson and
                  Yunxuan Li and
                  Vincent Y. Zhao and
                  Hongkun Yu and
                  Kurt Keutzer and
                  Trevor Darrell and
                  Denny Zhou},
  title        = {Mixture-of-Experts Meets Instruction Tuning: {A} Winning Combination
                  for Large Language Models},
  booktitle    = {The Twelfth International Conference on Learning Representations,
                  {ICLR} 2024, Vienna, Austria, May 7-11, 2024},
  publisher    = {OpenReview.net},
  year         = {2024},
  url          = {https://openreview.net/forum?id=6mLjDwYte5},
  timestamp    = {Wed, 07 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/ShenHZ0LWCZFCVW24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/jmlr/ChowdheryNDBMRBCSGSSTMRBTSPRDHPBAI23,
  author       = {Aakanksha Chowdhery and
                  Sharan Narang and
                  Jacob Devlin and
                  Maarten Bosma and
                  Gaurav Mishra and
                  Adam Roberts and
                  Paul Barham and
                  Hyung Won Chung and
                  Charles Sutton and
                  Sebastian Gehrmann and
                  Parker Schuh and
                  Kensen Shi and
                  Sasha Tsvyashchenko and
                  Joshua Maynez and
                  Abhishek Rao and
                  Parker Barnes and
                  Yi Tay and
                  Noam Shazeer and
                  Vinodkumar Prabhakaran and
                  Emily Reif and
                  Nan Du and
                  Ben Hutchinson and
                  Reiner Pope and
                  James Bradbury and
                  Jacob Austin and
                  Michael Isard and
                  Guy Gur{-}Ari and
                  Pengcheng Yin and
                  Toju Duke and
                  Anselm Levskaya and
                  Sanjay Ghemawat and
                  Sunipa Dev and
                  Henryk Michalewski and
                  Xavier Garcia and
                  Vedant Misra and
                  Kevin Robinson and
                  Liam Fedus and
                  Denny Zhou and
                  Daphne Ippolito and
                  David Luan and
                  Hyeontaek Lim and
                  Barret Zoph and
                  Alexander Spiridonov and
                  Ryan Sepassi and
                  David Dohan and
                  Shivani Agrawal and
                  Mark Omernick and
                  Andrew M. Dai and
                  Thanumalayan Sankaranarayana Pillai and
                  Marie Pellat and
                  Aitor Lewkowycz and
                  Erica Moreira and
                  Rewon Child and
                  Oleksandr Polozov and
                  Katherine Lee and
                  Zongwei Zhou and
                  Xuezhi Wang and
                  Brennan Saeta and
                  Mark Diaz and
                  Orhan Firat and
                  Michele Catasta and
                  Jason Wei and
                  Kathy Meier{-}Hellstern and
                  Douglas Eck and
                  Jeff Dean and
                  Slav Petrov and
                  Noah Fiedel},
  title        = {PaLM: Scaling Language Modeling with Pathways},
  journal      = {J. Mach. Learn. Res.},
  volume       = {24},
  pages        = {240:1--240:113},
  year         = {2023},
  url          = {https://jmlr.org/papers/v24/22-1144.html},
  timestamp    = {Wed, 11 Sep 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/ChowdheryNDBMRBCSGSSTMRBTSPRDHPBAI23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/tmlr/SrivastavaRRSAF23,
  author       = {Aarohi Srivastava and
                  Abhinav Rastogi and
                  Abhishek Rao and
                  Abu Awal Md Shoeb and
                  Abubakar Abid and
                  Adam Fisch and
                  Adam R. Brown and
                  Adam Santoro and
                  Aditya Gupta and
                  Adri{\`{a}} Garriga{-}Alonso and
                  Agnieszka Kluska and
                  Aitor Lewkowycz and
                  Akshat Agarwal and
                  Alethea Power and
                  Alex Ray and
                  Alex Warstadt and
                  Alexander W. Kocurek and
                  Ali Safaya and
                  Ali Tazarv and
                  Alice Xiang and
                  Alicia Parrish and
                  Allen Nie and
                  Aman Hussain and
                  Amanda Askell and
                  Amanda Dsouza and
                  Ambrose Slone and
                  Ameet Rahane and
                  Anantharaman S. Iyer and
                  Anders Andreassen and
                  Andrea Madotto and
                  Andrea Santilli and
                  Andreas Stuhlm{\"{u}}ller and
                  Andrew M. Dai and
                  Andrew La and
                  Andrew K. Lampinen and
                  Andy Zou and
                  Angela Jiang and
                  Angelica Chen and
                  Anh Vuong and
                  Animesh Gupta and
                  Anna Gottardi and
                  Antonio Norelli and
                  Anu Venkatesh and
                  Arash Gholamidavoodi and
                  Arfa Tabassum and
                  Arul Menezes and
                  Arun Kirubarajan and
                  Asher Mullokandov and
                  Ashish Sabharwal and
                  Austin Herrick and
                  Avia Efrat and
                  Aykut Erdem and
                  Ayla Karakas and
                  B. Ryan Roberts and
                  Bao Sheng Loe and
                  Barret Zoph and
                  Bartlomiej Bojanowski and
                  Batuhan {\"{O}}zyurt and
                  Behnam Hedayatnia and
                  Behnam Neyshabur and
                  Benjamin Inden and
                  Benno Stein and
                  Berk Ekmekci and
                  Bill Yuchen Lin and
                  Blake Howald and
                  Bryan Orinion and
                  Cameron Diao and
                  Cameron Dour and
                  Catherine Stinson and
                  Cedrick Argueta and
                  C{\`{e}}sar Ferri Ram{\'{\i}}rez and
                  Chandan Singh and
                  Charles Rathkopf and
                  Chenlin Meng and
                  Chitta Baral and
                  Chiyu Wu and
                  Chris Callison{-}Burch and
                  Chris Waites and
                  Christian Voigt and
                  Christopher D. Manning and
                  Christopher Potts and
                  Cindy Ramirez and
                  Clara E. Rivera and
                  Clemencia Siro and
                  Colin Raffel and
                  Courtney Ashcraft and
                  Cristina Garbacea and
                  Damien Sileo and
                  Dan Garrette and
                  Dan Hendrycks and
                  Dan Kilman and
                  Dan Roth and
                  Daniel Freeman and
                  Daniel Khashabi and
                  Daniel Levy and
                  Daniel Mosegu{\'{\i}} Gonz{\'{a}}lez and
                  Danielle Perszyk and
                  Danny Hernandez and
                  Danqi Chen and
                  Daphne Ippolito and
                  Dar Gilboa and
                  David Dohan and
                  David Drakard and
                  David Jurgens and
                  Debajyoti Datta and
                  Deep Ganguli and
                  Denis Emelin and
                  Denis Kleyko and
                  Deniz Yuret and
                  Derek Chen and
                  Derek Tam and
                  Dieuwke Hupkes and
                  Diganta Misra and
                  Dilyar Buzan and
                  Dimitri Coelho Mollo and
                  Diyi Yang and
                  Dong{-}Ho Lee and
                  Dylan Schrader and
                  Ekaterina Shutova and
                  Ekin Dogus Cubuk and
                  Elad Segal and
                  Eleanor Hagerman and
                  Elizabeth Barnes and
                  Elizabeth Donoway and
                  Ellie Pavlick and
                  Emanuele Rodol{\`{a}} and
                  Emma Lam and
                  Eric Chu and
                  Eric Tang and
                  Erkut Erdem and
                  Ernie Chang and
                  Ethan A. Chi and
                  Ethan Dyer and
                  Ethan J. Jerzak and
                  Ethan Kim and
                  Eunice Engefu Manyasi and
                  Evgenii Zheltonozhskii and
                  Fanyue Xia and
                  Fatemeh Siar and
                  Fernando Mart{\'{\i}}nez{-}Plumed and
                  Francesca Happ{\'{e}} and
                  Fran{\c{c}}ois Chollet and
                  Frieda Rong and
                  Gaurav Mishra and
                  Genta Indra Winata and
                  Gerard de Melo and
                  Germ{\'{a}}n Kruszewski and
                  Giambattista Parascandolo and
                  Giorgio Mariani and
                  Gloria Wang and
                  Gonzalo Jaimovitch{-}L{\'{o}}pez and
                  Gregor Betz and
                  Guy Gur{-}Ari and
                  Hana Galijasevic and
                  Hannah Kim and
                  Hannah Rashkin and
                  Hannaneh Hajishirzi and
                  Harsh Mehta and
                  Hayden Bogar and
                  Henry Shevlin and
                  Hinrich Sch{\"{u}}tze and
                  Hiromu Yakura and
                  Hongming Zhang and
                  Hugh Mee Wong and
                  Ian Ng and
                  Isaac Noble and
                  Jaap Jumelet and
                  Jack Geissinger and
                  Jackson Kernion and
                  Jacob Hilton and
                  Jaehoon Lee and
                  Jaime Fern{\'{a}}ndez Fisac and
                  James B. Simon and
                  James Koppel and
                  James Zheng and
                  James Zou and
                  Jan Kocon and
                  Jana Thompson and
                  Janelle Wingfield and
                  Jared Kaplan and
                  Jarema Radom and
                  Jascha Sohl{-}Dickstein and
                  Jason Phang and
                  Jason Wei and
                  Jason Yosinski and
                  Jekaterina Novikova and
                  Jelle Bosscher and
                  Jennifer Marsh and
                  Jeremy Kim and
                  Jeroen Taal and
                  Jesse H. Engel and
                  Jesujoba Alabi and
                  Jiacheng Xu and
                  Jiaming Song and
                  Jillian Tang and
                  Joan Waweru and
                  John Burden and
                  John Miller and
                  John U. Balis and
                  Jonathan Batchelder and
                  Jonathan Berant and
                  J{\"{o}}rg Frohberg and
                  Jos Rozen and
                  Jos{\'{e}} Hern{\'{a}}ndez{-}Orallo and
                  Joseph Boudeman and
                  Joseph Guerr and
                  Joseph Jones and
                  Joshua B. Tenenbaum and
                  Joshua S. Rule and
                  Joyce Chua and
                  Kamil Kanclerz and
                  Karen Livescu and
                  Karl Krauth and
                  Karthik Gopalakrishnan and
                  Katerina Ignatyeva and
                  Katja Markert and
                  Kaustubh D. Dhole and
                  Kevin Gimpel and
                  Kevin Omondi and
                  Kory Mathewson and
                  Kristen Chiafullo and
                  Ksenia Shkaruta and
                  Kumar Shridhar and
                  Kyle McDonell and
                  Kyle Richardson and
                  Laria Reynolds and
                  Leo Gao and
                  Li Zhang and
                  Liam Dugan and
                  Lianhui Qin and
                  Lidia Contreras Ochando and
                  Louis{-}Philippe Morency and
                  Luca Moschella and
                  Lucas Lam and
                  Lucy Noble and
                  Ludwig Schmidt and
                  Luheng He and
                  Luis Oliveros Col{\'{o}}n and
                  Luke Metz and
                  L{\"{u}}tfi Kerem Senel and
                  Maarten Bosma and
                  Maarten Sap and
                  Maartje ter Hoeve and
                  Maheen Farooqi and
                  Manaal Faruqui and
                  Mantas Mazeika and
                  Marco Baturan and
                  Marco Marelli and
                  Marco Maru and
                  Mar{\'{\i}}a Jos{\'{e}} Ram{\'{\i}}rez{-}Quintana and
                  Marie Tolkiehn and
                  Mario Giulianelli and
                  Martha Lewis and
                  Martin Potthast and
                  Matthew L. Leavitt and
                  Matthias Hagen and
                  M{\'{a}}ty{\'{a}}s Schubert and
                  Medina Baitemirova and
                  Melody Arnaud and
                  Melvin McElrath and
                  Michael A. Yee and
                  Michael Cohen and
                  Michael Gu and
                  Michael I. Ivanitskiy and
                  Michael Starritt and
                  Michael Strube and
                  Michal Swedrowski and
                  Michele Bevilacqua and
                  Michihiro Yasunaga and
                  Mihir Kale and
                  Mike Cain and
                  Mimee Xu and
                  Mirac Suzgun and
                  Mitch Walker and
                  Mo Tiwari and
                  Mohit Bansal and
                  Moin Aminnaseri and
                  Mor Geva and
                  Mozhdeh Gheini and
                  Mukund Varma T. and
                  Nanyun Peng and
                  Nathan A. Chi and
                  Nayeon Lee and
                  Neta Gur{-}Ari Krakover and
                  Nicholas Cameron and
                  Nicholas Roberts and
                  Nick Doiron and
                  Nicole Martinez and
                  Nikita Nangia and
                  Niklas Deckers and
                  Niklas Muennighoff and
                  Nitish Shirish Keskar and
                  Niveditha Iyer and
                  Noah Constant and
                  Noah Fiedel and
                  Nuan Wen and
                  Oliver Zhang and
                  Omar Agha and
                  Omar Elbaghdadi and
                  Omer Levy and
                  Owain Evans and
                  Pablo Antonio Moreno Casares and
                  Parth Doshi and
                  Pascale Fung and
                  Paul Pu Liang and
                  Paul Vicol and
                  Pegah Alipoormolabashi and
                  Peiyuan Liao and
                  Percy Liang and
                  Peter Chang and
                  Peter Eckersley and
                  Phu Mon Htut and
                  Pinyu Hwang and
                  Piotr Milkowski and
                  Piyush Patil and
                  Pouya Pezeshkpour and
                  Priti Oli and
                  Qiaozhu Mei and
                  Qing Lyu and
                  Qinlang Chen and
                  Rabin Banjade and
                  Rachel Etta Rudolph and
                  Raefer Gabriel and
                  Rahel Habacker and
                  Ramon Risco and
                  Rapha{\"{e}}l Milli{\`{e}}re and
                  Rhythm Garg and
                  Richard Barnes and
                  Rif A. Saurous and
                  Riku Arakawa and
                  Robbe Raymaekers and
                  Robert Frank and
                  Rohan Sikand and
                  Roman Novak and
                  Roman Sitelew and
                  Ronan LeBras and
                  Rosanne Liu and
                  Rowan Jacobs and
                  Rui Zhang and
                  Ruslan Salakhutdinov and
                  Ryan Chi and
                  Ryan Lee and
                  Ryan Stovall and
                  Ryan Teehan and
                  Rylan Yang and
                  Sahib Singh and
                  Saif M. Mohammad and
                  Sajant Anand and
                  Sam Dillavou and
                  Sam Shleifer and
                  Sam Wiseman and
                  Samuel Gruetter and
                  Samuel R. Bowman and
                  Samuel S. Schoenholz and
                  Sanghyun Han and
                  Sanjeev Kwatra and
                  Sarah A. Rous and
                  Sarik Ghazarian and
                  Sayan Ghosh and
                  Sean Casey and
                  Sebastian Bischoff and
                  Sebastian Gehrmann and
                  Sebastian Schuster and
                  Sepideh Sadeghi and
                  Shadi Hamdan and
                  Sharon Zhou and
                  Shashank Srivastava and
                  Sherry Shi and
                  Shikhar Singh and
                  Shima Asaadi and
                  Shixiang Shane Gu and
                  Shubh Pachchigar and
                  Shubham Toshniwal and
                  Shyam Upadhyay and
                  Shyamolima (Shammie) Debnath and
                  Siamak Shakeri and
                  Simon Thormeyer and
                  Simone Melzi and
                  Siva Reddy and
                  Sneha Priscilla Makini and
                  Soo{-}Hwan Lee and
                  Spencer Torene and
                  Sriharsha Hatwar and
                  Stanislas Dehaene and
                  Stefan Divic and
                  Stefano Ermon and
                  Stella Biderman and
                  Stephanie Lin and
                  Stephen Prasad and
                  Steven T. Piantadosi and
                  Stuart M. Shieber and
                  Summer Misherghi and
                  Svetlana Kiritchenko and
                  Swaroop Mishra and
                  Tal Linzen and
                  Tal Schuster and
                  Tao Li and
                  Tao Yu and
                  Tariq Ali and
                  Tatsu Hashimoto and
                  Te{-}Lin Wu and
                  Th{\'{e}}o Desbordes and
                  Theodore Rothschild and
                  Thomas Phan and
                  Tianle Wang and
                  Tiberius Nkinyili and
                  Timo Schick and
                  Timofei Kornev and
                  Titus Tunduny and
                  Tobias Gerstenberg and
                  Trenton Chang and
                  Trishala Neeraj and
                  Tushar Khot and
                  Tyler Shultz and
                  Uri Shaham and
                  Vedant Misra and
                  Vera Demberg and
                  Victoria Nyamai and
                  Vikas Raunak and
                  Vinay V. Ramasesh and
                  Vinay Uday Prabhu and
                  Vishakh Padmakumar and
                  Vivek Srikumar and
                  William Fedus and
                  William Saunders and
                  William Zhang and
                  Wout Vossen and
                  Xiang Ren and
                  Xiaoyu Tong and
                  Xinran Zhao and
                  Xinyi Wu and
                  Xudong Shen and
                  Yadollah Yaghoobzadeh and
                  Yair Lakretz and
                  Yangqiu Song and
                  Yasaman Bahri and
                  Yejin Choi and
                  Yichi Yang and
                  Yiding Hao and
                  Yifu Chen and
                  Yonatan Belinkov and
                  Yu Hou and
                  Yufang Hou and
                  Yuntao Bai and
                  Zachary Seid and
                  Zhuoye Zhao and
                  Zijian Wang and
                  Zijie J. Wang and
                  Zirui Wang and
                  Ziyi Wu},
  title        = {Beyond the Imitation Game: Quantifying and extrapolating the capabilities
                  of language models},
  journal      = {Trans. Mach. Learn. Res.},
  volume       = {2023},
  year         = {2023},
  url          = {https://openreview.net/forum?id=uyTL5Bvosj},
  timestamp    = {Tue, 06 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tmlr/SrivastavaRRSAF23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/emnlp/Tay0ACFRN0YM23,
  author       = {Yi Tay and
                  Mostafa Dehghani and
                  Samira Abnar and
                  Hyung Won Chung and
                  William Fedus and
                  Jinfeng Rao and
                  Sharan Narang and
                  Vinh Q. Tran and
                  Dani Yogatama and
                  Donald Metzler},
  editor       = {Houda Bouamor and
                  Juan Pino and
                  Kalika Bali},
  title        = {Scaling Laws vs Model Architectures: How does Inductive Bias Influence
                  Scaling?},
  booktitle    = {Findings of the Association for Computational Linguistics: {EMNLP}
                  2023, Singapore, December 6-10, 2023},
  pages        = {12342--12364},
  publisher    = {Association for Computational Linguistics},
  year         = {2023},
  url          = {https://doi.org/10.18653/v1/2023.findings-emnlp.825},
  doi          = {10.18653/V1/2023.FINDINGS-EMNLP.825},
  timestamp    = {Fri, 12 Apr 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/emnlp/Tay0ACFRN0YM23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2305-14705,
  author       = {Sheng Shen and
                  Le Hou and
                  Yanqi Zhou and
                  Nan Du and
                  Shayne Longpre and
                  Jason Wei and
                  Hyung Won Chung and
                  Barret Zoph and
                  William Fedus and
                  Xinyun Chen and
                  Tu Vu and
                  Yuexin Wu and
                  Wuyang Chen and
                  Albert Webson and
                  Yunxuan Li and
                  Vincent Y. Zhao and
                  Hongkun Yu and
                  Kurt Keutzer and
                  Trevor Darrell and
                  Denny Zhou},
  title        = {Flan-MoE: Scaling Instruction-Finetuned Language Models with Sparse
                  Mixture of Experts},
  journal      = {CoRR},
  volume       = {abs/2305.14705},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2305.14705},
  doi          = {10.48550/ARXIV.2305.14705},
  eprinttype    = {arXiv},
  eprint       = {2305.14705},
  timestamp    = {Tue, 27 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2305-14705.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/jmlr/FedusZS22,
  author       = {William Fedus and
                  Barret Zoph and
                  Noam Shazeer},
  title        = {Switch Transformers: Scaling to Trillion Parameter Models with Simple
                  and Efficient Sparsity},
  journal      = {J. Mach. Learn. Res.},
  volume       = {23},
  pages        = {120:1--120:39},
  year         = {2022},
  url          = {https://jmlr.org/papers/v23/21-0998.html},
  timestamp    = {Wed, 11 Sep 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/FedusZS22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/tmlr/WeiTBRZBYBZMCHVLDF22,
  author       = {Jason Wei and
                  Yi Tay and
                  Rishi Bommasani and
                  Colin Raffel and
                  Barret Zoph and
                  Sebastian Borgeaud and
                  Dani Yogatama and
                  Maarten Bosma and
                  Denny Zhou and
                  Donald Metzler and
                  Ed H. Chi and
                  Tatsunori Hashimoto and
                  Oriol Vinyals and
                  Percy Liang and
                  Jeff Dean and
                  William Fedus},
  title        = {Emergent Abilities of Large Language Models},
  journal      = {Trans. Mach. Learn. Res.},
  volume       = {2022},
  year         = {2022},
  url          = {https://openreview.net/forum?id=yzkSU5zdwD},
  timestamp    = {Fri, 19 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tmlr/WeiTBRZBYBZMCHVLDF22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iclr/Tay0RFACNYVM22,
  author       = {Yi Tay and
                  Mostafa Dehghani and
                  Jinfeng Rao and
                  William Fedus and
                  Samira Abnar and
                  Hyung Won Chung and
                  Sharan Narang and
                  Dani Yogatama and
                  Ashish Vaswani and
                  Donald Metzler},
  title        = {Scale Efficiently: Insights from Pretraining and Finetuning Transformers},
  booktitle    = {The Tenth International Conference on Learning Representations, {ICLR}
                  2022, Virtual Event, April 25-29, 2022},
  publisher    = {OpenReview.net},
  year         = {2022},
  url          = {https://openreview.net/forum?id=f2OYVDyfIB},
  timestamp    = {Sat, 20 Aug 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/Tay0RFACNYVM22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icml/DuHDTLXKZYFZFBZ22,
  author       = {Nan Du and
                  Yanping Huang and
                  Andrew M. Dai and
                  Simon Tong and
                  Dmitry Lepikhin and
                  Yuanzhong Xu and
                  Maxim Krikun and
                  Yanqi Zhou and
                  Adams Wei Yu and
                  Orhan Firat and
                  Barret Zoph and
                  Liam Fedus and
                  Maarten P. Bosma and
                  Zongwei Zhou and
                  Tao Wang and
                  Yu Emma Wang and
                  Kellie Webster and
                  Marie Pellat and
                  Kevin Robinson and
                  Kathleen S. Meier{-}Hellstern and
                  Toju Duke and
                  Lucas Dixon and
                  Kun Zhang and
                  Quoc V. Le and
                  Yonghui Wu and
                  Zhifeng Chen and
                  Claire Cui},
  editor       = {Kamalika Chaudhuri and
                  Stefanie Jegelka and
                  Le Song and
                  Csaba Szepesv{\'{a}}ri and
                  Gang Niu and
                  Sivan Sabato},
  title        = {GLaM: Efficient Scaling of Language Models with Mixture-of-Experts},
  booktitle    = {International Conference on Machine Learning, {ICML} 2022, 17-23 July
                  2022, Baltimore, Maryland, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {162},
  pages        = {5547--5569},
  publisher    = {{PMLR}},
  year         = {2022},
  url          = {https://proceedings.mlr.press/v162/du22c.html},
  timestamp    = {Wed, 16 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/DuHDTLXKZYFZFBZ22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2202-08906,
  author       = {Barret Zoph and
                  Irwan Bello and
                  Sameer Kumar and
                  Nan Du and
                  Yanping Huang and
                  Jeff Dean and
                  Noam Shazeer and
                  William Fedus},
  title        = {Designing Effective Sparse Expert Models},
  journal      = {CoRR},
  volume       = {abs/2202.08906},
  year         = {2022},
  url          = {https://arxiv.org/abs/2202.08906},
  eprinttype    = {arXiv},
  eprint       = {2202.08906},
  timestamp    = {Thu, 17 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2202-08906.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2204-02311,
  author       = {Aakanksha Chowdhery and
                  Sharan Narang and
                  Jacob Devlin and
                  Maarten Bosma and
                  Gaurav Mishra and
                  Adam Roberts and
                  Paul Barham and
                  Hyung Won Chung and
                  Charles Sutton and
                  Sebastian Gehrmann and
                  Parker Schuh and
                  Kensen Shi and
                  Sasha Tsvyashchenko and
                  Joshua Maynez and
                  Abhishek Rao and
                  Parker Barnes and
                  Yi Tay and
                  Noam Shazeer and
                  Vinodkumar Prabhakaran and
                  Emily Reif and
                  Nan Du and
                  Ben Hutchinson and
                  Reiner Pope and
                  James Bradbury and
                  Jacob Austin and
                  Michael Isard and
                  Guy Gur{-}Ari and
                  Pengcheng Yin and
                  Toju Duke and
                  Anselm Levskaya and
                  Sanjay Ghemawat and
                  Sunipa Dev and
                  Henryk Michalewski and
                  Xavier Garcia and
                  Vedant Misra and
                  Kevin Robinson and
                  Liam Fedus and
                  Denny Zhou and
                  Daphne Ippolito and
                  David Luan and
                  Hyeontaek Lim and
                  Barret Zoph and
                  Alexander Spiridonov and
                  Ryan Sepassi and
                  David Dohan and
                  Shivani Agrawal and
                  Mark Omernick and
                  Andrew M. Dai and
                  Thanumalayan Sankaranarayana Pillai and
                  Marie Pellat and
                  Aitor Lewkowycz and
                  Erica Moreira and
                  Rewon Child and
                  Oleksandr Polozov and
                  Katherine Lee and
                  Zongwei Zhou and
                  Xuezhi Wang and
                  Brennan Saeta and
                  Mark Diaz and
                  Orhan Firat and
                  Michele Catasta and
                  Jason Wei and
                  Kathy Meier{-}Hellstern and
                  Douglas Eck and
                  Jeff Dean and
                  Slav Petrov and
                  Noah Fiedel},
  title        = {PaLM: Scaling Language Modeling with Pathways},
  journal      = {CoRR},
  volume       = {abs/2204.02311},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2204.02311},
  doi          = {10.48550/ARXIV.2204.02311},
  eprinttype    = {arXiv},
  eprint       = {2204.02311},
  timestamp    = {Wed, 16 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2204-02311.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2206-04615,
  author       = {Aarohi Srivastava and
                  Abhinav Rastogi and
                  Abhishek Rao and
                  Abu Awal Md Shoeb and
                  Abubakar Abid and
                  Adam Fisch and
                  Adam R. Brown and
                  Adam Santoro and
                  Aditya Gupta and
                  Adri{\`{a}} Garriga{-}Alonso and
                  Agnieszka Kluska and
                  Aitor Lewkowycz and
                  Akshat Agarwal and
                  Alethea Power and
                  Alex Ray and
                  Alex Warstadt and
                  Alexander W. Kocurek and
                  Ali Safaya and
                  Ali Tazarv and
                  Alice Xiang and
                  Alicia Parrish and
                  Allen Nie and
                  Aman Hussain and
                  Amanda Askell and
                  Amanda Dsouza and
                  Ambrose Slone and
                  Ameet Rahane and
                  Anantharaman S. Iyer and
                  Anders Andreassen and
                  Andrea Madotto and
                  Andrea Santilli and
                  Andreas Stuhlm{\"{u}}ller and
                  Andrew M. Dai and
                  Andrew La and
                  Andrew K. Lampinen and
                  Andy Zou and
                  Angela Jiang and
                  Angelica Chen and
                  Anh Vuong and
                  Animesh Gupta and
                  Anna Gottardi and
                  Antonio Norelli and
                  Anu Venkatesh and
                  Arash Gholamidavoodi and
                  Arfa Tabassum and
                  Arul Menezes and
                  Arun Kirubarajan and
                  Asher Mullokandov and
                  Ashish Sabharwal and
                  Austin Herrick and
                  Avia Efrat and
                  Aykut Erdem and
                  Ayla Karakas and
                  B. Ryan Roberts and
                  Bao Sheng Loe and
                  Barret Zoph and
                  Bartlomiej Bojanowski and
                  Batuhan {\"{O}}zyurt and
                  Behnam Hedayatnia and
                  Behnam Neyshabur and
                  Benjamin Inden and
                  Benno Stein and
                  Berk Ekmekci and
                  Bill Yuchen Lin and
                  Blake Howald and
                  Bryan Orinion and
                  Cameron Diao and
                  Cameron Dour and
                  Catherine Stinson and
                  Cedrick Argueta and
                  C{\`{e}}sar Ferri Ram{\'{\i}}rez and
                  Chandan Singh and
                  Charles Rathkopf and
                  Chenlin Meng and
                  Chitta Baral and
                  Chiyu Wu and
                  Chris Callison{-}Burch and
                  Chris Waites and
                  Christian Voigt and
                  Christopher D. Manning and
                  Christopher Potts and
                  Cindy Ramirez and
                  Clara E. Rivera and
                  Clemencia Siro and
                  Colin Raffel and
                  Courtney Ashcraft and
                  Cristina Garbacea and
                  Damien Sileo and
                  Dan Garrette and
                  Dan Hendrycks and
                  Dan Kilman and
                  Dan Roth and
                  Daniel Freeman and
                  Daniel Khashabi and
                  Daniel Levy and
                  Daniel Mosegu{\'{\i}} Gonz{\'{a}}lez and
                  Danielle Perszyk and
                  Danny Hernandez and
                  Danqi Chen and
                  Daphne Ippolito and
                  Dar Gilboa and
                  David Dohan and
                  David Drakard and
                  David Jurgens and
                  Debajyoti Datta and
                  Deep Ganguli and
                  Denis Emelin and
                  Denis Kleyko and
                  Deniz Yuret and
                  Derek Chen and
                  Derek Tam and
                  Dieuwke Hupkes and
                  Diganta Misra and
                  Dilyar Buzan and
                  Dimitri Coelho Mollo and
                  Diyi Yang and
                  Dong{-}Ho Lee and
                  Dylan Schrader and
                  Ekaterina Shutova and
                  Ekin Dogus Cubuk and
                  Elad Segal and
                  Eleanor Hagerman and
                  Elizabeth Barnes and
                  Elizabeth Donoway and
                  Ellie Pavlick and
                  Emanuele Rodol{\`{a}} and
                  Emma Lam and
                  Eric Chu and
                  Eric Tang and
                  Erkut Erdem and
                  Ernie Chang and
                  Ethan A. Chi and
                  Ethan Dyer and
                  Ethan J. Jerzak and
                  Ethan Kim and
                  Eunice Engefu Manyasi and
                  Evgenii Zheltonozhskii and
                  Fanyue Xia and
                  Fatemeh Siar and
                  Fernando Mart{\'{\i}}nez{-}Plumed and
                  Francesca Happ{\'{e}} and
                  Fran{\c{c}}ois Chollet and
                  Frieda Rong and
                  Gaurav Mishra and
                  Genta Indra Winata and
                  Gerard de Melo and
                  Germ{\'{a}}n Kruszewski and
                  Giambattista Parascandolo and
                  Giorgio Mariani and
                  Gloria Wang and
                  Gonzalo Jaimovitch{-}L{\'{o}}pez and
                  Gregor Betz and
                  Guy Gur{-}Ari and
                  Hana Galijasevic and
                  Hannah Kim and
                  Hannah Rashkin and
                  Hannaneh Hajishirzi and
                  Harsh Mehta and
                  Hayden Bogar and
                  Henry Shevlin and
                  Hinrich Sch{\"{u}}tze and
                  Hiromu Yakura and
                  Hongming Zhang and
                  Hugh Mee Wong and
                  Ian Ng and
                  Isaac Noble and
                  Jaap Jumelet and
                  Jack Geissinger and
                  Jackson Kernion and
                  Jacob Hilton and
                  Jaehoon Lee and
                  Jaime Fern{\'{a}}ndez Fisac and
                  James B. Simon and
                  James Koppel and
                  James Zheng and
                  James Zou and
                  Jan Kocon and
                  Jana Thompson and
                  Janelle Wingfield and
                  Jared Kaplan and
                  Jarema Radom and
                  Jascha Sohl{-}Dickstein and
                  Jason Phang and
                  Jason Wei and
                  Jason Yosinski and
                  Jekaterina Novikova and
                  Jelle Bosscher and
                  Jennifer Marsh and
                  Jeremy Kim and
                  Jeroen Taal and
                  Jesse H. Engel and
                  Jesujoba Alabi and
                  Jiacheng Xu and
                  Jiaming Song and
                  Jillian Tang and
                  Joan Waweru and
                  John Burden and
                  John Miller and
                  John U. Balis and
                  Jonathan Batchelder and
                  Jonathan Berant and
                  J{\"{o}}rg Frohberg and
                  Jos Rozen and
                  Jos{\'{e}} Hern{\'{a}}ndez{-}Orallo and
                  Joseph Boudeman and
                  Joseph Guerr and
                  Joseph Jones and
                  Joshua B. Tenenbaum and
                  Joshua S. Rule and
                  Joyce Chua and
                  Kamil Kanclerz and
                  Karen Livescu and
                  Karl Krauth and
                  Karthik Gopalakrishnan and
                  Katerina Ignatyeva and
                  Katja Markert and
                  Kaustubh D. Dhole and
                  Kevin Gimpel and
                  Kevin Omondi and
                  Kory Mathewson and
                  Kristen Chiafullo and
                  Ksenia Shkaruta and
                  Kumar Shridhar and
                  Kyle McDonell and
                  Kyle Richardson and
                  Laria Reynolds and
                  Leo Gao and
                  Li Zhang and
                  Liam Dugan and
                  Lianhui Qin and
                  Lidia Contreras Ochando and
                  Louis{-}Philippe Morency and
                  Luca Moschella and
                  Lucas Lam and
                  Lucy Noble and
                  Ludwig Schmidt and
                  Luheng He and
                  Luis Oliveros Col{\'{o}}n and
                  Luke Metz and
                  L{\"{u}}tfi Kerem Senel and
                  Maarten Bosma and
                  Maarten Sap and
                  Maartje ter Hoeve and
                  Maheen Farooqi and
                  Manaal Faruqui and
                  Mantas Mazeika and
                  Marco Baturan and
                  Marco Marelli and
                  Marco Maru and
                  Mar{\'{\i}}a Jos{\'{e}} Ram{\'{\i}}rez{-}Quintana and
                  Marie Tolkiehn and
                  Mario Giulianelli and
                  Martha Lewis and
                  Martin Potthast and
                  Matthew L. Leavitt and
                  Matthias Hagen and
                  M{\'{a}}ty{\'{a}}s Schubert and
                  Medina Baitemirova and
                  Melody Arnaud and
                  Melvin McElrath and
                  Michael A. Yee and
                  Michael Cohen and
                  Michael Gu and
                  Michael I. Ivanitskiy and
                  Michael Starritt and
                  Michael Strube and
                  Michal Swedrowski and
                  Michele Bevilacqua and
                  Michihiro Yasunaga and
                  Mihir Kale and
                  Mike Cain and
                  Mimee Xu and
                  Mirac Suzgun and
                  Mitch Walker and
                  Mo Tiwari and
                  Mohit Bansal and
                  Moin Aminnaseri and
                  Mor Geva and
                  Mozhdeh Gheini and
                  Mukund Varma T. and
                  Nanyun Peng and
                  Nathan A. Chi and
                  Nayeon Lee and
                  Neta Gur{-}Ari Krakover and
                  Nicholas Cameron and
                  Nicholas Roberts and
                  Nick Doiron and
                  Nicole Martinez and
                  Nikita Nangia and
                  Niklas Deckers and
                  Niklas Muennighoff and
                  Nitish Shirish Keskar and
                  Niveditha Iyer and
                  Noah Constant and
                  Noah Fiedel and
                  Nuan Wen and
                  Oliver Zhang and
                  Omar Agha and
                  Omar Elbaghdadi and
                  Omer Levy and
                  Owain Evans and
                  Pablo Antonio Moreno Casares and
                  Parth Doshi and
                  Pascale Fung and
                  Paul Pu Liang and
                  Paul Vicol and
                  Pegah Alipoormolabashi and
                  Peiyuan Liao and
                  Percy Liang and
                  Peter Chang and
                  Peter Eckersley and
                  Phu Mon Htut and
                  Pinyu Hwang and
                  Piotr Milkowski and
                  Piyush Patil and
                  Pouya Pezeshkpour and
                  Priti Oli and
                  Qiaozhu Mei and
                  Qing Lyu and
                  Qinlang Chen and
                  Rabin Banjade and
                  Rachel Etta Rudolph and
                  Raefer Gabriel and
                  Rahel Habacker and
                  Ramon Risco and
                  Rapha{\"{e}}l Milli{\`{e}}re and
                  Rhythm Garg and
                  Richard Barnes and
                  Rif A. Saurous and
                  Riku Arakawa and
                  Robbe Raymaekers and
                  Robert Frank and
                  Rohan Sikand and
                  Roman Novak and
                  Roman Sitelew and
                  Ronan LeBras and
                  Rosanne Liu and
                  Rowan Jacobs and
                  Rui Zhang and
                  Ruslan Salakhutdinov and
                  Ryan Chi and
                  Ryan Lee and
                  Ryan Stovall and
                  Ryan Teehan and
                  Rylan Yang and
                  Sahib Singh and
                  Saif M. Mohammad and
                  Sajant Anand and
                  Sam Dillavou and
                  Sam Shleifer and
                  Sam Wiseman and
                  Samuel Gruetter and
                  Samuel R. Bowman and
                  Samuel S. Schoenholz and
                  Sanghyun Han and
                  Sanjeev Kwatra and
                  Sarah A. Rous and
                  Sarik Ghazarian and
                  Sayan Ghosh and
                  Sean Casey and
                  Sebastian Bischoff and
                  Sebastian Gehrmann and
                  Sebastian Schuster and
                  Sepideh Sadeghi and
                  Shadi Hamdan and
                  Sharon Zhou and
                  Shashank Srivastava and
                  Sherry Shi and
                  Shikhar Singh and
                  Shima Asaadi and
                  Shixiang Shane Gu and
                  Shubh Pachchigar and
                  Shubham Toshniwal and
                  Shyam Upadhyay and
                  Shyamolima (Shammie) Debnath and
                  Siamak Shakeri and
                  Simon Thormeyer and
                  Simone Melzi and
                  Siva Reddy and
                  Sneha Priscilla Makini and
                  Soo{-}Hwan Lee and
                  Spencer Torene and
                  Sriharsha Hatwar and
                  Stanislas Dehaene and
                  Stefan Divic and
                  Stefano Ermon and
                  Stella Biderman and
                  Stephanie Lin and
                  Stephen Prasad and
                  Steven T. Piantadosi and
                  Stuart M. Shieber and
                  Summer Misherghi and
                  Svetlana Kiritchenko and
                  Swaroop Mishra and
                  Tal Linzen and
                  Tal Schuster and
                  Tao Li and
                  Tao Yu and
                  Tariq Ali and
                  Tatsu Hashimoto and
                  Te{-}Lin Wu and
                  Th{\'{e}}o Desbordes and
                  Theodore Rothschild and
                  Thomas Phan and
                  Tianle Wang and
                  Tiberius Nkinyili and
                  Timo Schick and
                  Timofei Kornev and
                  Titus Tunduny and
                  Tobias Gerstenberg and
                  Trenton Chang and
                  Trishala Neeraj and
                  Tushar Khot and
                  Tyler Shultz and
                  Uri Shaham and
                  Vedant Misra and
                  Vera Demberg and
                  Victoria Nyamai and
                  Vikas Raunak and
                  Vinay V. Ramasesh and
                  Vinay Uday Prabhu and
                  Vishakh Padmakumar and
                  Vivek Srikumar and
                  William Fedus and
                  William Saunders and
                  William Zhang and
                  Wout Vossen and
                  Xiang Ren and
                  Xiaoyu Tong and
                  Xinran Zhao and
                  Xinyi Wu and
                  Xudong Shen and
                  Yadollah Yaghoobzadeh and
                  Yair Lakretz and
                  Yangqiu Song and
                  Yasaman Bahri and
                  Yejin Choi and
                  Yichi Yang and
                  Yiding Hao and
                  Yifu Chen and
                  Yonatan Belinkov and
                  Yu Hou and
                  Yufang Hou and
                  Yuntao Bai and
                  Zachary Seid and
                  Zhuoye Zhao and
                  Zijian Wang and
                  Zijie J. Wang and
                  Zirui Wang and
                  Ziyi Wu},
  title        = {Beyond the Imitation Game: Quantifying and extrapolating the capabilities
                  of language models},
  journal      = {CoRR},
  volume       = {abs/2206.04615},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2206.04615},
  doi          = {10.48550/ARXIV.2206.04615},
  eprinttype    = {arXiv},
  eprint       = {2206.04615},
  timestamp    = {Mon, 05 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2206-04615.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2206-07682,
  author       = {Jason Wei and
                  Yi Tay and
                  Rishi Bommasani and
                  Colin Raffel and
                  Barret Zoph and
                  Sebastian Borgeaud and
                  Dani Yogatama and
                  Maarten Bosma and
                  Denny Zhou and
                  Donald Metzler and
                  Ed H. Chi and
                  Tatsunori Hashimoto and
                  Oriol Vinyals and
                  Percy Liang and
                  Jeff Dean and
                  William Fedus},
  title        = {Emergent Abilities of Large Language Models},
  journal      = {CoRR},
  volume       = {abs/2206.07682},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2206.07682},
  doi          = {10.48550/ARXIV.2206.07682},
  eprinttype    = {arXiv},
  eprint       = {2206.07682},
  timestamp    = {Tue, 21 Jun 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2206-07682.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2207-10551,
  author       = {Yi Tay and
                  Mostafa Dehghani and
                  Samira Abnar and
                  Hyung Won Chung and
                  William Fedus and
                  Jinfeng Rao and
                  Sharan Narang and
                  Vinh Q. Tran and
                  Dani Yogatama and
                  Donald Metzler},
  title        = {Scaling Laws vs Model Architectures: How does Inductive Bias Influence
                  Scaling?},
  journal      = {CoRR},
  volume       = {abs/2207.10551},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2207.10551},
  doi          = {10.48550/ARXIV.2207.10551},
  eprinttype    = {arXiv},
  eprint       = {2207.10551},
  timestamp    = {Mon, 25 Jul 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2207-10551.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2209-01667,
  author       = {William Fedus and
                  Jeff Dean and
                  Barret Zoph},
  title        = {A Review of Sparse Expert Models in Deep Learning},
  journal      = {CoRR},
  volume       = {abs/2209.01667},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2209.01667},
  doi          = {10.48550/ARXIV.2209.01667},
  eprinttype    = {arXiv},
  eprint       = {2209.01667},
  timestamp    = {Mon, 26 Sep 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2209-01667.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2210-11416,
  author       = {Hyung Won Chung and
                  Le Hou and
                  Shayne Longpre and
                  Barret Zoph and
                  Yi Tay and
                  William Fedus and
                  Eric Li and
                  Xuezhi Wang and
                  Mostafa Dehghani and
                  Siddhartha Brahma and
                  Albert Webson and
                  Shixiang Shane Gu and
                  Zhuyun Dai and
                  Mirac Suzgun and
                  Xinyun Chen and
                  Aakanksha Chowdhery and
                  Sharan Narang and
                  Gaurav Mishra and
                  Adams Yu and
                  Vincent Y. Zhao and
                  Yanping Huang and
                  Andrew M. Dai and
                  Hongkun Yu and
                  Slav Petrov and
                  Ed H. Chi and
                  Jeff Dean and
                  Jacob Devlin and
                  Adam Roberts and
                  Denny Zhou and
                  Quoc V. Le and
                  Jason Wei},
  title        = {Scaling Instruction-Finetuned Language Models},
  journal      = {CoRR},
  volume       = {abs/2210.11416},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2210.11416},
  doi          = {10.48550/ARXIV.2210.11416},
  eprinttype    = {arXiv},
  eprint       = {2210.11416},
  timestamp    = {Mon, 04 Dec 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2210-11416.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/emnlp/NarangCTFFMMFSL21,
  author       = {Sharan Narang and
                  Hyung Won Chung and
                  Yi Tay and
                  Liam Fedus and
                  Thibault F{\'{e}}vry and
                  Michael Matena and
                  Karishma Malkan and
                  Noah Fiedel and
                  Noam Shazeer and
                  Zhenzhong Lan and
                  Yanqi Zhou and
                  Wei Li and
                  Nan Ding and
                  Jake Marcus and
                  Adam Roberts and
                  Colin Raffel},
  editor       = {Marie{-}Francine Moens and
                  Xuanjing Huang and
                  Lucia Specia and
                  Scott Wen{-}tau Yih},
  title        = {Do Transformer Modifications Transfer Across Implementations and Applications?},
  booktitle    = {Proceedings of the 2021 Conference on Empirical Methods in Natural
                  Language Processing, {EMNLP} 2021, Virtual Event / Punta Cana, Dominican
                  Republic, 7-11 November, 2021},
  pages        = {5758--5773},
  publisher    = {Association for Computational Linguistics},
  year         = {2021},
  url          = {https://doi.org/10.18653/v1/2021.emnlp-main.465},
  doi          = {10.18653/V1/2021.EMNLP-MAIN.465},
  timestamp    = {Fri, 16 Feb 2024 08:27:36 +0100},
  biburl       = {https://dblp.org/rec/conf/emnlp/NarangCTFFMMFSL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/nips/BelloFDCSLSZ21,
  author       = {Irwan Bello and
                  William Fedus and
                  Xianzhi Du and
                  Ekin Dogus Cubuk and
                  Aravind Srinivas and
                  Tsung{-}Yi Lin and
                  Jonathon Shlens and
                  Barret Zoph},
  editor       = {Marc'Aurelio Ranzato and
                  Alina Beygelzimer and
                  Yann N. Dauphin and
                  Percy Liang and
                  Jennifer Wortman Vaughan},
  title        = {Revisiting ResNets: Improved Training and Scaling Strategies},
  booktitle    = {Advances in Neural Information Processing Systems 34: Annual Conference
                  on Neural Information Processing Systems 2021, NeurIPS 2021, December
                  6-14, 2021, virtual},
  pages        = {22614--22627},
  year         = {2021},
  url          = {https://proceedings.neurips.cc/paper/2021/hash/bef4d169d8bddd17d68303877a3ea945-Abstract.html},
  timestamp    = {Tue, 03 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/BelloFDCSLSZ21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2101-03961,
  author       = {William Fedus and
                  Barret Zoph and
                  Noam Shazeer},
  title        = {Switch Transformers: Scaling to Trillion Parameter Models with Simple
                  and Efficient Sparsity},
  journal      = {CoRR},
  volume       = {abs/2101.03961},
  year         = {2021},
  url          = {https://arxiv.org/abs/2101.03961},
  eprinttype    = {arXiv},
  eprint       = {2101.03961},
  timestamp    = {Thu, 21 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2101-03961.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2102-11972,
  author       = {Sharan Narang and
                  Hyung Won Chung and
                  Yi Tay and
                  William Fedus and
                  Thibault F{\'{e}}vry and
                  Michael Matena and
                  Karishma Malkan and
                  Noah Fiedel and
                  Noam Shazeer and
                  Zhenzhong Lan and
                  Yanqi Zhou and
                  Wei Li and
                  Nan Ding and
                  Jake Marcus and
                  Adam Roberts and
                  Colin Raffel},
  title        = {Do Transformer Modifications Transfer Across Implementations and Applications?},
  journal      = {CoRR},
  volume       = {abs/2102.11972},
  year         = {2021},
  url          = {https://arxiv.org/abs/2102.11972},
  eprinttype    = {arXiv},
  eprint       = {2102.11972},
  timestamp    = {Wed, 24 Nov 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2102-11972.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2103-07579,
  author       = {Irwan Bello and
                  William Fedus and
                  Xianzhi Du and
                  Ekin D. Cubuk and
                  Aravind Srinivas and
                  Tsung{-}Yi Lin and
                  Jonathon Shlens and
                  Barret Zoph},
  title        = {Revisiting ResNets: Improved Training and Scaling Strategies},
  journal      = {CoRR},
  volume       = {abs/2103.07579},
  year         = {2021},
  url          = {https://arxiv.org/abs/2103.07579},
  eprinttype    = {arXiv},
  eprint       = {2103.07579},
  timestamp    = {Tue, 23 Mar 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2103-07579.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2109-10686,
  author       = {Yi Tay and
                  Mostafa Dehghani and
                  Jinfeng Rao and
                  William Fedus and
                  Samira Abnar and
                  Hyung Won Chung and
                  Sharan Narang and
                  Dani Yogatama and
                  Ashish Vaswani and
                  Donald Metzler},
  title        = {Scale Efficiently: Insights from Pre-training and Fine-tuning Transformers},
  journal      = {CoRR},
  volume       = {abs/2109.10686},
  year         = {2021},
  url          = {https://arxiv.org/abs/2109.10686},
  eprinttype    = {arXiv},
  eprint       = {2109.10686},
  timestamp    = {Mon, 27 Sep 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2109-10686.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2109-11052,
  author       = {Adrien Ali Ta{\"{\i}}ga and
                  William Fedus and
                  Marlos C. Machado and
                  Aaron C. Courville and
                  Marc G. Bellemare},
  title        = {On Bonus-Based Exploration Methods in the Arcade Learning Environment},
  journal      = {CoRR},
  volume       = {abs/2109.11052},
  year         = {2021},
  url          = {https://arxiv.org/abs/2109.11052},
  eprinttype    = {arXiv},
  eprint       = {2109.11052},
  timestamp    = {Mon, 27 Sep 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2109-11052.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2112-06905,
  author       = {Nan Du and
                  Yanping Huang and
                  Andrew M. Dai and
                  Simon Tong and
                  Dmitry Lepikhin and
                  Yuanzhong Xu and
                  Maxim Krikun and
                  Yanqi Zhou and
                  Adams Wei Yu and
                  Orhan Firat and
                  Barret Zoph and
                  Liam Fedus and
                  Maarten Bosma and
                  Zongwei Zhou and
                  Tao Wang and
                  Yu Emma Wang and
                  Kellie Webster and
                  Marie Pellat and
                  Kevin Robinson and
                  Kathy Meier{-}Hellstern and
                  Toju Duke and
                  Lucas Dixon and
                  Kun Zhang and
                  Quoc V. Le and
                  Yonghui Wu and
                  Zhifeng Chen and
                  Claire Cui},
  title        = {GLaM: Efficient Scaling of Language Models with Mixture-of-Experts},
  journal      = {CoRR},
  volume       = {abs/2112.06905},
  year         = {2021},
  url          = {https://arxiv.org/abs/2112.06905},
  eprinttype    = {arXiv},
  eprint       = {2112.06905},
  timestamp    = {Wed, 16 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2112-06905.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/aaai/JainFLPB20,
  author       = {Vishal Jain and
                  William Fedus and
                  Hugo Larochelle and
                  Doina Precup and
                  Marc G. Bellemare},
  title        = {Algorithmic Improvements for Deep Reinforcement Learning Applied to
                  Interactive Fiction},
  booktitle    = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI}
                  2020, The Thirty-Second Innovative Applications of Artificial Intelligence
                  Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational
                  Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA,
                  February 7-12, 2020},
  pages        = {4328--4336},
  publisher    = {{AAAI} Press},
  year         = {2020},
  url          = {https://doi.org/10.1609/aaai.v34i04.5857},
  doi          = {10.1609/AAAI.V34I04.5857},
  timestamp    = {Mon, 04 Sep 2023 12:29:24 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/JainFLPB20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iclr/CacciaCFLPC20,
  author       = {Massimo Caccia and
                  Lucas Caccia and
                  William Fedus and
                  Hugo Larochelle and
                  Joelle Pineau and
                  Laurent Charlin},
  title        = {Language GANs Falling Short},
  booktitle    = {8th International Conference on Learning Representations, {ICLR} 2020,
                  Addis Ababa, Ethiopia, April 26-30, 2020},
  publisher    = {OpenReview.net},
  year         = {2020},
  url          = {https://openreview.net/forum?id=BJgza6VtPB},
  timestamp    = {Thu, 07 May 2020 17:11:47 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/CacciaCFLPC20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iclr/TaigaFMCB20,
  author       = {Adrien Ali Ta{\"{\i}}ga and
                  William Fedus and
                  Marlos C. Machado and
                  Aaron C. Courville and
                  Marc G. Bellemare},
  title        = {On Bonus Based Exploration Methods In The Arcade Learning Environment},
  booktitle    = {8th International Conference on Learning Representations, {ICLR} 2020,
                  Addis Ababa, Ethiopia, April 26-30, 2020},
  publisher    = {OpenReview.net},
  year         = {2020},
  url          = {https://openreview.net/forum?id=BJewlyStDr},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/TaigaFMCB20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icml/FedusRABLRD20,
  author       = {William Fedus and
                  Prajit Ramachandran and
                  Rishabh Agarwal and
                  Yoshua Bengio and
                  Hugo Larochelle and
                  Mark Rowland and
                  Will Dabney},
  title        = {Revisiting Fundamentals of Experience Replay},
  booktitle    = {Proceedings of the 37th International Conference on Machine Learning,
                  {ICML} 2020, 13-18 July 2020, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {119},
  pages        = {3061--3071},
  publisher    = {{PMLR}},
  year         = {2020},
  url          = {http://proceedings.mlr.press/v119/fedus20a.html},
  timestamp    = {Tue, 15 Dec 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/FedusRABLRD20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2002-12499,
  author       = {William Fedus and
                  Dibya Ghosh and
                  John D. Martin and
                  Marc G. Bellemare and
                  Yoshua Bengio and
                  Hugo Larochelle},
  title        = {On Catastrophic Interference in Atari 2600 Games},
  journal      = {CoRR},
  volume       = {abs/2002.12499},
  year         = {2020},
  url          = {https://arxiv.org/abs/2002.12499},
  eprinttype    = {arXiv},
  eprint       = {2002.12499},
  timestamp    = {Tue, 05 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2002-12499.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2007-06700,
  author       = {William Fedus and
                  Prajit Ramachandran and
                  Rishabh Agarwal and
                  Yoshua Bengio and
                  Hugo Larochelle and
                  Mark Rowland and
                  Will Dabney},
  title        = {Revisiting Fundamentals of Experience Replay},
  journal      = {CoRR},
  volume       = {abs/2007.06700},
  year         = {2020},
  url          = {https://arxiv.org/abs/2007.06700},
  eprinttype    = {arXiv},
  eprint       = {2007.06700},
  timestamp    = {Tue, 21 Jul 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2007-06700.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iclr/GoyalBFSLLLB19,
  author       = {Anirudh Goyal and
                  Philemon Brakel and
                  William Fedus and
                  Soumye Singhal and
                  Timothy P. Lillicrap and
                  Sergey Levine and
                  Hugo Larochelle and
                  Yoshua Bengio},
  title        = {Recall Traces: Backtracking Models for Efficient Reinforcement Learning},
  booktitle    = {7th International Conference on Learning Representations, {ICLR} 2019,
                  New Orleans, LA, USA, May 6-9, 2019},
  publisher    = {OpenReview.net},
  year         = {2019},
  url          = {https://openreview.net/forum?id=HygsfnR9Ym},
  timestamp    = {Thu, 25 Jul 2019 13:03:15 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/GoyalBFSLLLB19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iclr/VelickovicFHLBH19,
  author       = {Petar Velickovic and
                  William Fedus and
                  William L. Hamilton and
                  Pietro Li{\`{o}} and
                  Yoshua Bengio and
                  R. Devon Hjelm},
  title        = {Deep Graph Infomax},
  booktitle    = {7th International Conference on Learning Representations, {ICLR} 2019,
                  New Orleans, LA, USA, May 6-9, 2019},
  publisher    = {OpenReview.net},
  year         = {2019},
  url          = {https://openreview.net/forum?id=rklz9iAcKQ},
  timestamp    = {Thu, 25 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/VelickovicFHLBH19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1902-06865,
  author       = {William Fedus and
                  Carles Gelada and
                  Yoshua Bengio and
                  Marc G. Bellemare and
                  Hugo Larochelle},
  title        = {Hyperbolic Discounting and Learning over Multiple Horizons},
  journal      = {CoRR},
  volume       = {abs/1902.06865},
  year         = {2019},
  url          = {http://arxiv.org/abs/1902.06865},
  eprinttype    = {arXiv},
  eprint       = {1902.06865},
  timestamp    = {Tue, 21 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1902-06865.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1908-02388,
  author       = {Adrien Ali Ta{\"{\i}}ga and
                  William Fedus and
                  Marlos C. Machado and
                  Aaron C. Courville and
                  Marc G. Bellemare},
  title        = {Benchmarking Bonus-Based Exploration Methods on the Arcade Learning
                  Environment},
  journal      = {CoRR},
  volume       = {abs/1908.02388},
  year         = {2019},
  url          = {http://arxiv.org/abs/1908.02388},
  eprinttype    = {arXiv},
  eprint       = {1908.02388},
  timestamp    = {Fri, 09 Aug 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1908-02388.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1911-12511,
  author       = {Vishal Jain and
                  William Fedus and
                  Hugo Larochelle and
                  Doina Precup and
                  Marc G. Bellemare},
  title        = {Algorithmic Improvements for Deep Reinforcement Learning applied to
                  Interactive Fiction},
  journal      = {CoRR},
  volume       = {abs/1911.12511},
  year         = {2019},
  url          = {http://arxiv.org/abs/1911.12511},
  eprinttype    = {arXiv},
  eprint       = {1911.12511},
  timestamp    = {Wed, 08 Jan 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1911-12511.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iclr/FedusGD18,
  author       = {William Fedus and
                  Ian J. Goodfellow and
                  Andrew M. Dai},
  title        = {MaskGAN: Better Text Generation via Filling in the {\_}{\_}{\_}{\_}{\_}{\_}{\_}},
  booktitle    = {6th International Conference on Learning Representations, {ICLR} 2018,
                  Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings},
  publisher    = {OpenReview.net},
  year         = {2018},
  url          = {https://openreview.net/forum?id=ByOExmWAb},
  timestamp    = {Thu, 25 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/FedusGD18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iclr/FedusRLDMG18,
  author       = {William Fedus and
                  Mihaela Rosca and
                  Balaji Lakshminarayanan and
                  Andrew M. Dai and
                  Shakir Mohamed and
                  Ian J. Goodfellow},
  title        = {Many Paths to Equilibrium: GANs Do Not Need to Decrease a Divergence
                  At Every Step},
  booktitle    = {6th International Conference on Learning Representations, {ICLR} 2018,
                  Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings},
  publisher    = {OpenReview.net},
  year         = {2018},
  url          = {https://openreview.net/forum?id=ByQpn1ZA-},
  timestamp    = {Thu, 25 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/FedusRLDMG18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1801-07736,
  author       = {William Fedus and
                  Ian J. Goodfellow and
                  Andrew M. Dai},
  title        = {MaskGAN: Better Text Generation via Filling in the {\_}{\_}{\_}{\_}{\_}{\_}},
  journal      = {CoRR},
  volume       = {abs/1801.07736},
  year         = {2018},
  url          = {http://arxiv.org/abs/1801.07736},
  eprinttype    = {arXiv},
  eprint       = {1801.07736},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1801-07736.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1802-09484,
  author       = {Valentin Thomas and
                  Emmanuel Bengio and
                  William Fedus and
                  Jules Pondard and
                  Philippe Beaudoin and
                  Hugo Larochelle and
                  Joelle Pineau and
                  Doina Precup and
                  Yoshua Bengio},
  title        = {Disentangling the independently controllable factors of variation
                  by interacting with the world},
  journal      = {CoRR},
  volume       = {abs/1802.09484},
  year         = {2018},
  url          = {http://arxiv.org/abs/1802.09484},
  eprinttype    = {arXiv},
  eprint       = {1802.09484},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1802-09484.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1804-00379,
  author       = {Anirudh Goyal and
                  Philemon Brakel and
                  William Fedus and
                  Timothy P. Lillicrap and
                  Sergey Levine and
                  Hugo Larochelle and
                  Yoshua Bengio},
  title        = {Recall Traces: Backtracking Models for Efficient Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1804.00379},
  year         = {2018},
  url          = {http://arxiv.org/abs/1804.00379},
  eprinttype    = {arXiv},
  eprint       = {1804.00379},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1804-00379.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1809-10341,
  author       = {Petar Velickovic and
                  William Fedus and
                  William L. Hamilton and
                  Pietro Li{\`{o}} and
                  Yoshua Bengio and
                  R. Devon Hjelm},
  title        = {Deep Graph Infomax},
  journal      = {CoRR},
  volume       = {abs/1809.10341},
  year         = {2018},
  url          = {http://arxiv.org/abs/1809.10341},
  eprinttype    = {arXiv},
  eprint       = {1809.10341},
  timestamp    = {Fri, 05 Oct 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1809-10341.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1811-02549,
  author       = {Massimo Caccia and
                  Lucas Caccia and
                  William Fedus and
                  Hugo Larochelle and
                  Joelle Pineau and
                  Laurent Charlin},
  title        = {Language GANs Falling Short},
  journal      = {CoRR},
  volume       = {abs/1811.02549},
  year         = {2018},
  url          = {http://arxiv.org/abs/1811.02549},
  eprinttype    = {arXiv},
  eprint       = {1811.02549},
  timestamp    = {Thu, 22 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1811-02549.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1710-08446,
  author       = {William Fedus and
                  Mihaela Rosca and
                  Balaji Lakshminarayanan and
                  Andrew M. Dai and
                  Shakir Mohamed and
                  Ian J. Goodfellow},
  title        = {Many Paths to Equilibrium: GANs Do Not Need to Decrease a Divergence
                  At Every Step},
  journal      = {CoRR},
  volume       = {abs/1710.08446},
  year         = {2017},
  url          = {http://arxiv.org/abs/1710.08446},
  eprinttype    = {arXiv},
  eprint       = {1710.08446},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1710-08446.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

manage site settings

To protect your privacy, all features that rely on external API calls from your browser are turned off by default. You need to opt-in for them to become active. All settings here will be stored as cookies with your web browser. For more information see our F.A.Q.