BibTeX records: John Schulman

download as .bib file

@inproceedings{DBLP:conf/iclr/LightmanKBEBLLS24,
  author       = {Hunter Lightman and
                  Vineet Kosaraju and
                  Yuri Burda and
                  Harrison Edwards and
                  Bowen Baker and
                  Teddy Lee and
                  Jan Leike and
                  John Schulman and
                  Ilya Sutskever and
                  Karl Cobbe},
  title        = {Let's Verify Step by Step},
  booktitle    = {The Twelfth International Conference on Learning Representations,
                  {ICLR} 2024, Vienna, Austria, May 7-11, 2024},
  publisher    = {OpenReview.net},
  year         = {2024},
  url          = {https://openreview.net/forum?id=v8L0pN6EOi},
  timestamp    = {Wed, 07 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/LightmanKBEBLLS24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/GaoSH23,
  author       = {Leo Gao and
                  John Schulman and
                  Jacob Hilton},
  editor       = {Andreas Krause and
                  Emma Brunskill and
                  Kyunghyun Cho and
                  Barbara Engelhardt and
                  Sivan Sabato and
                  Jonathan Scarlett},
  title        = {Scaling Laws for Reward Model Overoptimization},
  booktitle    = {International Conference on Machine Learning, {ICML} 2023, 23-29 July
                  2023, Honolulu, Hawaii, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {202},
  pages        = {10835--10866},
  publisher    = {{PMLR}},
  year         = {2023},
  url          = {https://proceedings.mlr.press/v202/gao23h.html},
  timestamp    = {Mon, 28 Aug 2023 17:23:08 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/GaoSH23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2301-13442,
  author       = {Jacob Hilton and
                  Jie Tang and
                  John Schulman},
  title        = {Scaling laws for single-agent reinforcement learning},
  journal      = {CoRR},
  volume       = {abs/2301.13442},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2301.13442},
  doi          = {10.48550/ARXIV.2301.13442},
  eprinttype    = {arXiv},
  eprint       = {2301.13442},
  timestamp    = {Thu, 02 Feb 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2301-13442.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2305-20050,
  author       = {Hunter Lightman and
                  Vineet Kosaraju and
                  Yura Burda and
                  Harri Edwards and
                  Bowen Baker and
                  Teddy Lee and
                  Jan Leike and
                  John Schulman and
                  Ilya Sutskever and
                  Karl Cobbe},
  title        = {Let's Verify Step by Step},
  journal      = {CoRR},
  volume       = {abs/2305.20050},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2305.20050},
  doi          = {10.48550/ARXIV.2305.20050},
  eprinttype    = {arXiv},
  eprint       = {2305.20050},
  timestamp    = {Tue, 20 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2305-20050.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/HiltonCS22,
  author       = {Jacob Hilton and
                  Karl Cobbe and
                  John Schulman},
  editor       = {Sanmi Koyejo and
                  S. Mohamed and
                  A. Agarwal and
                  Danielle Belgrave and
                  K. Cho and
                  A. Oh},
  title        = {Batch size-invariance for policy optimization},
  booktitle    = {Advances in Neural Information Processing Systems 35: Annual Conference
                  on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans,
                  LA, USA, November 28 - December 9, 2022},
  year         = {2022},
  url          = {http://papers.nips.cc/paper\_files/paper/2022/hash/6ceb6c2150bbf46fd75528a6cd6be793-Abstract-Conference.html},
  timestamp    = {Mon, 08 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/HiltonCS22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/Ouyang0JAWMZASR22,
  author       = {Long Ouyang and
                  Jeffrey Wu and
                  Xu Jiang and
                  Diogo Almeida and
                  Carroll L. Wainwright and
                  Pamela Mishkin and
                  Chong Zhang and
                  Sandhini Agarwal and
                  Katarina Slama and
                  Alex Ray and
                  John Schulman and
                  Jacob Hilton and
                  Fraser Kelton and
                  Luke Miller and
                  Maddie Simens and
                  Amanda Askell and
                  Peter Welinder and
                  Paul F. Christiano and
                  Jan Leike and
                  Ryan Lowe},
  editor       = {Sanmi Koyejo and
                  S. Mohamed and
                  A. Agarwal and
                  Danielle Belgrave and
                  K. Cho and
                  A. Oh},
  title        = {Training language models to follow instructions with human feedback},
  booktitle    = {Advances in Neural Information Processing Systems 35: Annual Conference
                  on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans,
                  LA, USA, November 28 - December 9, 2022},
  year         = {2022},
  url          = {http://papers.nips.cc/paper\_files/paper/2022/hash/b1efde53be364a73914f58805a001731-Abstract-Conference.html},
  timestamp    = {Mon, 08 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/Ouyang0JAWMZASR22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2203-02155,
  author       = {Long Ouyang and
                  Jeff Wu and
                  Xu Jiang and
                  Diogo Almeida and
                  Carroll L. Wainwright and
                  Pamela Mishkin and
                  Chong Zhang and
                  Sandhini Agarwal and
                  Katarina Slama and
                  Alex Ray and
                  John Schulman and
                  Jacob Hilton and
                  Fraser Kelton and
                  Luke Miller and
                  Maddie Simens and
                  Amanda Askell and
                  Peter Welinder and
                  Paul F. Christiano and
                  Jan Leike and
                  Ryan Lowe},
  title        = {Training language models to follow instructions with human feedback},
  journal      = {CoRR},
  volume       = {abs/2203.02155},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2203.02155},
  doi          = {10.48550/ARXIV.2203.02155},
  eprinttype    = {arXiv},
  eprint       = {2203.02155},
  timestamp    = {Sat, 21 Oct 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2203-02155.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2207-14255,
  author       = {Mohammad Bavarian and
                  Heewoo Jun and
                  Nikolas Tezak and
                  John Schulman and
                  Christine McLeavey and
                  Jerry Tworek and
                  Mark Chen},
  title        = {Efficient Training of Language Models to Fill in the Middle},
  journal      = {CoRR},
  volume       = {abs/2207.14255},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2207.14255},
  doi          = {10.48550/ARXIV.2207.14255},
  eprinttype    = {arXiv},
  eprint       = {2207.14255},
  timestamp    = {Thu, 25 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2207-14255.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2210-10760,
  author       = {Leo Gao and
                  John Schulman and
                  Jacob Hilton},
  title        = {Scaling Laws for Reward Model Overoptimization},
  journal      = {CoRR},
  volume       = {abs/2210.10760},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2210.10760},
  doi          = {10.48550/ARXIV.2210.10760},
  eprinttype    = {arXiv},
  eprint       = {2210.10760},
  timestamp    = {Mon, 28 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2210-10760.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/CobbeHKS21,
  author       = {Karl Cobbe and
                  Jacob Hilton and
                  Oleg Klimov and
                  John Schulman},
  editor       = {Marina Meila and
                  Tong Zhang},
  title        = {Phasic Policy Gradient},
  booktitle    = {Proceedings of the 38th International Conference on Machine Learning,
                  {ICML} 2021, 18-24 July 2021, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {139},
  pages        = {2020--2027},
  publisher    = {{PMLR}},
  year         = {2021},
  url          = {http://proceedings.mlr.press/v139/cobbe21a.html},
  timestamp    = {Wed, 25 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/CobbeHKS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2101-11071,
  author       = {William H. Guss and
                  Mario Ynocente Castro and
                  Sam Devlin and
                  Brandon Houghton and
                  Noboru Sean Kuno and
                  Crissman Loomis and
                  Stephanie Milani and
                  Sharada P. Mohanty and
                  Keisuke Nakata and
                  Ruslan Salakhutdinov and
                  John Schulman and
                  Shinya Shiroshita and
                  Nicholay Topin and
                  Avinash Ummadisingu and
                  Oriol Vinyals},
  title        = {The MineRL 2020 Competition on Sample Efficient Reinforcement Learning
                  using Human Priors},
  journal      = {CoRR},
  volume       = {abs/2101.11071},
  year         = {2021},
  url          = {https://arxiv.org/abs/2101.11071},
  eprinttype    = {arXiv},
  eprint       = {2101.11071},
  timestamp    = {Sun, 31 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2101-11071.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2103-15332,
  author       = {Sharada P. Mohanty and
                  Jyotish Poonganam and
                  Adrien Gaidon and
                  Andrey Kolobov and
                  Blake Wulfe and
                  Dipam Chakraborty and
                  Grazvydas Semetulskis and
                  Jo{\~{a}}o Schapke and
                  Jonas Kubilius and
                  Jurgis Pasukonis and
                  Linas Klimas and
                  Matthew J. Hausknecht and
                  Patrick MacAlpine and
                  Quang Nhat Tran and
                  Thomas Tumiel and
                  Xiaocheng Tang and
                  Xinwei Chen and
                  Christopher Hesse and
                  Jacob Hilton and
                  William Hebgen Guss and
                  Sahika Genc and
                  John Schulman and
                  Karl Cobbe},
  title        = {Measuring Sample Efficiency and Generalization in Reinforcement Learning
                  Benchmarks: NeurIPS 2020 Procgen Benchmark},
  journal      = {CoRR},
  volume       = {abs/2103.15332},
  year         = {2021},
  url          = {https://arxiv.org/abs/2103.15332},
  eprinttype    = {arXiv},
  eprint       = {2103.15332},
  timestamp    = {Wed, 07 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2103-15332.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2109-13916,
  author       = {Dan Hendrycks and
                  Nicholas Carlini and
                  John Schulman and
                  Jacob Steinhardt},
  title        = {Unsolved Problems in {ML} Safety},
  journal      = {CoRR},
  volume       = {abs/2109.13916},
  year         = {2021},
  url          = {https://arxiv.org/abs/2109.13916},
  eprinttype    = {arXiv},
  eprint       = {2109.13916},
  timestamp    = {Mon, 04 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2109-13916.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2110-00641,
  author       = {Jacob Hilton and
                  Karl Cobbe and
                  John Schulman},
  title        = {Batch size-invariance for policy optimization},
  journal      = {CoRR},
  volume       = {abs/2110.00641},
  year         = {2021},
  url          = {https://arxiv.org/abs/2110.00641},
  eprinttype    = {arXiv},
  eprint       = {2110.00641},
  timestamp    = {Fri, 08 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2110-00641.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2110-14168,
  author       = {Karl Cobbe and
                  Vineet Kosaraju and
                  Mohammad Bavarian and
                  Mark Chen and
                  Heewoo Jun and
                  Lukasz Kaiser and
                  Matthias Plappert and
                  Jerry Tworek and
                  Jacob Hilton and
                  Reiichiro Nakano and
                  Christopher Hesse and
                  John Schulman},
  title        = {Training Verifiers to Solve Math Word Problems},
  journal      = {CoRR},
  volume       = {abs/2110.14168},
  year         = {2021},
  url          = {https://arxiv.org/abs/2110.14168},
  eprinttype    = {arXiv},
  eprint       = {2110.14168},
  timestamp    = {Mon, 12 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2110-14168.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2112-09332,
  author       = {Reiichiro Nakano and
                  Jacob Hilton and
                  Suchir Balaji and
                  Jeff Wu and
                  Long Ouyang and
                  Christina Kim and
                  Christopher Hesse and
                  Shantanu Jain and
                  Vineet Kosaraju and
                  William Saunders and
                  Xu Jiang and
                  Karl Cobbe and
                  Tyna Eloundou and
                  Gretchen Krueger and
                  Kevin Button and
                  Matthew Knight and
                  Benjamin Chess and
                  John Schulman},
  title        = {WebGPT: Browser-assisted question-answering with human feedback},
  journal      = {CoRR},
  volume       = {abs/2112.09332},
  year         = {2021},
  url          = {https://arxiv.org/abs/2112.09332},
  eprinttype    = {arXiv},
  eprint       = {2112.09332},
  timestamp    = {Mon, 03 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2112-09332.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tnn/MatiisenOCS20,
  author       = {Tambet Matiisen and
                  Avital Oliver and
                  Taco Cohen and
                  John Schulman},
  title        = {Teacher-Student Curriculum Learning},
  journal      = {{IEEE} Trans. Neural Networks Learn. Syst.},
  volume       = {31},
  number       = {9},
  pages        = {3732--3740},
  year         = {2020},
  url          = {https://doi.org/10.1109/TNNLS.2019.2934906},
  doi          = {10.1109/TNNLS.2019.2934906},
  timestamp    = {Sun, 11 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tnn/MatiisenOCS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/CobbeHHS20,
  author       = {Karl Cobbe and
                  Christopher Hesse and
                  Jacob Hilton and
                  John Schulman},
  title        = {Leveraging Procedural Generation to Benchmark Reinforcement Learning},
  booktitle    = {Proceedings of the 37th International Conference on Machine Learning,
                  {ICML} 2020, 13-18 July 2020, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {119},
  pages        = {2048--2056},
  publisher    = {{PMLR}},
  year         = {2020},
  url          = {http://proceedings.mlr.press/v119/cobbe20a.html},
  timestamp    = {Tue, 15 Dec 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/CobbeHHS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/JunCCSRRS20,
  author       = {Heewoo Jun and
                  Rewon Child and
                  Mark Chen and
                  John Schulman and
                  Aditya Ramesh and
                  Alec Radford and
                  Ilya Sutskever},
  title        = {Distribution Augmentation for Generative Modeling},
  booktitle    = {Proceedings of the 37th International Conference on Machine Learning,
                  {ICML} 2020, 13-18 July 2020, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {119},
  pages        = {5006--5019},
  publisher    = {{PMLR}},
  year         = {2020},
  url          = {http://proceedings.mlr.press/v119/jun20a.html},
  timestamp    = {Thu, 25 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/JunCCSRRS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/MohantyPGKWCSSK20,
  author       = {Sharada P. Mohanty and
                  Jyotish Poonganam and
                  Adrien Gaidon and
                  Andrey Kolobov and
                  Blake Wulfe and
                  Dipam Chakraborty and
                  Grazvydas Semetulskis and
                  Jo{\~{a}}o Schapke and
                  Jonas Kubilius and
                  Jurgis Pasukonis and
                  Linas Klimas and
                  Matthew J. Hausknecht and
                  Patrick MacAlpine and
                  Quang Nhat Tran and
                  Thomas Tumiel and
                  Xiaocheng Tang and
                  Xinwei Chen and
                  Christopher Hesse and
                  Jacob Hilton and
                  William Hebgen Guss and
                  Sahika Genc and
                  John Schulman and
                  Karl Cobbe},
  editor       = {Hugo Jair Escalante and
                  Katja Hofmann},
  title        = {Measuring Sample Efficiency and Generalization in Reinforcement Learning
                  Benchmarks: NeurIPS 2020 Procgen Benchmark},
  booktitle    = {NeurIPS 2020 Competition and Demonstration Track, 6-12 December 2020,
                  Virtual Event / Vancouver, BC, Canada},
  series       = {Proceedings of Machine Learning Research},
  volume       = {133},
  pages        = {361--395},
  publisher    = {{PMLR}},
  year         = {2020},
  url          = {http://proceedings.mlr.press/v133/mohanty21a.html},
  timestamp    = {Wed, 25 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/MohantyPGKWCSSK20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2009-04416,
  author       = {Karl Cobbe and
                  Jacob Hilton and
                  Oleg Klimov and
                  John Schulman},
  title        = {Phasic Policy Gradient},
  journal      = {CoRR},
  volume       = {abs/2009.04416},
  year         = {2020},
  url          = {https://arxiv.org/abs/2009.04416},
  eprinttype    = {arXiv},
  eprint       = {2009.04416},
  timestamp    = {Thu, 17 Sep 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2009-04416.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2010-14701,
  author       = {Tom Henighan and
                  Jared Kaplan and
                  Mor Katz and
                  Mark Chen and
                  Christopher Hesse and
                  Jacob Jackson and
                  Heewoo Jun and
                  Tom B. Brown and
                  Prafulla Dhariwal and
                  Scott Gray and
                  Chris Hallacy and
                  Benjamin Mann and
                  Alec Radford and
                  Aditya Ramesh and
                  Nick Ryder and
                  Daniel M. Ziegler and
                  John Schulman and
                  Dario Amodei and
                  Sam McCandlish},
  title        = {Scaling Laws for Autoregressive Generative Modeling},
  journal      = {CoRR},
  volume       = {abs/2010.14701},
  year         = {2020},
  url          = {https://arxiv.org/abs/2010.14701},
  eprinttype    = {arXiv},
  eprint       = {2010.14701},
  timestamp    = {Thu, 25 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2010-14701.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/CobbeKHKS19,
  author       = {Karl Cobbe and
                  Oleg Klimov and
                  Christopher Hesse and
                  Taehoon Kim and
                  John Schulman},
  editor       = {Kamalika Chaudhuri and
                  Ruslan Salakhutdinov},
  title        = {Quantifying Generalization in Reinforcement Learning},
  booktitle    = {Proceedings of the 36th International Conference on Machine Learning,
                  {ICML} 2019, 9-15 June 2019, Long Beach, California, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {97},
  pages        = {1282--1289},
  publisher    = {{PMLR}},
  year         = {2019},
  url          = {http://proceedings.mlr.press/v97/cobbe19a.html},
  timestamp    = {Tue, 11 Jun 2019 15:37:38 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/CobbeKHKS19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1902-02336,
  author       = {Jacob Jackson and
                  John Schulman},
  title        = {Semi-Supervised Learning by Label Gradient Alignment},
  journal      = {CoRR},
  volume       = {abs/1902.02336},
  year         = {2019},
  url          = {http://arxiv.org/abs/1902.02336},
  eprinttype    = {arXiv},
  eprint       = {1902.02336},
  timestamp    = {Tue, 21 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1902-02336.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1904-03646,
  author       = {Thomas Anthony and
                  Robert Nishihara and
                  Philipp Moritz and
                  Tim Salimans and
                  John Schulman},
  title        = {Policy Gradient Search: Online Planning and Expert Iteration without
                  Search Trees},
  journal      = {CoRR},
  volume       = {abs/1904.03646},
  year         = {2019},
  url          = {http://arxiv.org/abs/1904.03646},
  eprinttype    = {arXiv},
  eprint       = {1904.03646},
  timestamp    = {Wed, 28 Sep 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1904-03646.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1912-01588,
  author       = {Karl Cobbe and
                  Christopher Hesse and
                  Jacob Hilton and
                  John Schulman},
  title        = {Leveraging Procedural Generation to Benchmark Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1912.01588},
  year         = {2019},
  url          = {http://arxiv.org/abs/1912.01588},
  eprinttype    = {arXiv},
  eprint       = {1912.01588},
  timestamp    = {Thu, 02 Jan 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1912-01588.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/corl/ClaveraRS0AA18,
  author       = {Ignasi Clavera and
                  Jonas Rothfuss and
                  John Schulman and
                  Yasuhiro Fujita and
                  Tamim Asfour and
                  Pieter Abbeel},
  title        = {Model-Based Reinforcement Learning via Meta-Policy Optimization},
  booktitle    = {2nd Annual Conference on Robot Learning, CoRL 2018, Z{\"{u}}rich,
                  Switzerland, 29-31 October 2018, Proceedings},
  series       = {Proceedings of Machine Learning Research},
  volume       = {87},
  pages        = {617--629},
  publisher    = {{PMLR}},
  year         = {2018},
  url          = {http://proceedings.mlr.press/v87/clavera18a.html},
  timestamp    = {Wed, 03 Apr 2019 18:17:24 +0200},
  biburl       = {https://dblp.org/rec/conf/corl/ClaveraRS0AA18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/FransH0AS18,
  author       = {Kevin Frans and
                  Jonathan Ho and
                  Xi Chen and
                  Pieter Abbeel and
                  John Schulman},
  title        = {Meta Learning Shared Hierarchies},
  booktitle    = {6th International Conference on Learning Representations, {ICLR} 2018,
                  Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings},
  publisher    = {OpenReview.net},
  year         = {2018},
  url          = {https://openreview.net/forum?id=SyX0IeWAW},
  timestamp    = {Thu, 25 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/FransH0AS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/rss/RajeswaranKGVST18,
  author       = {Aravind Rajeswaran and
                  Vikash Kumar and
                  Abhishek Gupta and
                  Giulia Vezzani and
                  John Schulman and
                  Emanuel Todorov and
                  Sergey Levine},
  editor       = {Hadas Kress{-}Gazit and
                  Siddhartha S. Srinivasa and
                  Tom Howard and
                  Nikolay Atanasov},
  title        = {Learning Complex Dexterous Manipulation with Deep Reinforcement Learning
                  and Demonstrations},
  booktitle    = {Robotics: Science and Systems XIV, Carnegie Mellon University, Pittsburgh,
                  Pennsylvania, USA, June 26-30, 2018},
  year         = {2018},
  url          = {http://www.roboticsproceedings.org/rss14/p49.html},
  doi          = {10.15607/RSS.2018.XIV.049},
  timestamp    = {Fri, 04 Aug 2023 08:25:46 +0200},
  biburl       = {https://dblp.org/rec/conf/rss/RajeswaranKGVST18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1803-02999,
  author       = {Alex Nichol and
                  Joshua Achiam and
                  John Schulman},
  title        = {On First-Order Meta-Learning Algorithms},
  journal      = {CoRR},
  volume       = {abs/1803.02999},
  year         = {2018},
  url          = {http://arxiv.org/abs/1803.02999},
  eprinttype    = {arXiv},
  eprint       = {1803.02999},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1803-02999.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1804-03720,
  author       = {Alex Nichol and
                  Vicki Pfau and
                  Christopher Hesse and
                  Oleg Klimov and
                  John Schulman},
  title        = {Gotta Learn Fast: {A} New Benchmark for Generalization in {RL}},
  journal      = {CoRR},
  volume       = {abs/1804.03720},
  year         = {2018},
  url          = {http://arxiv.org/abs/1804.03720},
  eprinttype    = {arXiv},
  eprint       = {1804.03720},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1804-03720.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1809-05214,
  author       = {Ignasi Clavera and
                  Jonas Rothfuss and
                  John Schulman and
                  Yasuhiro Fujita and
                  Tamim Asfour and
                  Pieter Abbeel},
  title        = {Model-Based Reinforcement Learning via Meta-Policy Optimization},
  journal      = {CoRR},
  volume       = {abs/1809.05214},
  year         = {2018},
  url          = {http://arxiv.org/abs/1809.05214},
  eprinttype    = {arXiv},
  eprint       = {1809.05214},
  timestamp    = {Sun, 14 Oct 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1809-05214.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1812-02341,
  author       = {Karl Cobbe and
                  Oleg Klimov and
                  Christopher Hesse and
                  Taehoon Kim and
                  John Schulman},
  title        = {Quantifying Generalization in Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1812.02341},
  year         = {2018},
  url          = {http://arxiv.org/abs/1812.02341},
  eprinttype    = {arXiv},
  eprint       = {1812.02341},
  timestamp    = {Tue, 01 Jan 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1812-02341.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/0022KSDDSSA17,
  author       = {Xi Chen and
                  Diederik P. Kingma and
                  Tim Salimans and
                  Yan Duan and
                  Prafulla Dhariwal and
                  John Schulman and
                  Ilya Sutskever and
                  Pieter Abbeel},
  title        = {Variational Lossy Autoencoder},
  booktitle    = {5th International Conference on Learning Representations, {ICLR} 2017,
                  Toulon, France, April 24-26, 2017, Conference Track Proceedings},
  publisher    = {OpenReview.net},
  year         = {2017},
  url          = {https://openreview.net/forum?id=BysvGP5ee},
  timestamp    = {Thu, 25 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/0022KSDDSSA17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/TangHFSCDSTA17,
  author       = {Haoran Tang and
                  Rein Houthooft and
                  Davis Foote and
                  Adam Stooke and
                  Xi Chen and
                  Yan Duan and
                  John Schulman and
                  Filip De Turck and
                  Pieter Abbeel},
  editor       = {Isabelle Guyon and
                  Ulrike von Luxburg and
                  Samy Bengio and
                  Hanna M. Wallach and
                  Rob Fergus and
                  S. V. N. Vishwanathan and
                  Roman Garnett},
  title        = {{\#}Exploration: {A} Study of Count-Based Exploration for Deep Reinforcement
                  Learning},
  booktitle    = {Advances in Neural Information Processing Systems 30: Annual Conference
                  on Neural Information Processing Systems 2017, December 4-9, 2017,
                  Long Beach, CA, {USA}},
  pages        = {2753--2762},
  year         = {2017},
  url          = {https://proceedings.neurips.cc/paper/2017/hash/3a20f62a0af1aa152670bab3c602feed-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 13:58:27 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/TangHFSCDSTA17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/SchulmanAC17,
  author       = {John Schulman and
                  Pieter Abbeel and
                  Xi Chen},
  title        = {Equivalence Between Policy Gradients and Soft Q-Learning},
  journal      = {CoRR},
  volume       = {abs/1704.06440},
  year         = {2017},
  url          = {http://arxiv.org/abs/1704.06440},
  eprinttype    = {arXiv},
  eprint       = {1704.06440},
  timestamp    = {Mon, 03 Sep 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/SchulmanAC17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/ChenSAS17,
  author       = {Richard Y. Chen and
                  Szymon Sidor and
                  Pieter Abbeel and
                  John Schulman},
  title        = {{UCB} and InfoGain Exploration via {\textdollar}{\textbackslash}boldsymbol\{Q\}{\textdollar}-Ensembles},
  journal      = {CoRR},
  volume       = {abs/1706.01502},
  year         = {2017},
  url          = {http://arxiv.org/abs/1706.01502},
  eprinttype    = {arXiv},
  eprint       = {1706.01502},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/ChenSAS17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/MatiisenOCS17,
  author       = {Tambet Matiisen and
                  Avital Oliver and
                  Taco Cohen and
                  John Schulman},
  title        = {Teacher-Student Curriculum Learning},
  journal      = {CoRR},
  volume       = {abs/1707.00183},
  year         = {2017},
  url          = {http://arxiv.org/abs/1707.00183},
  eprinttype    = {arXiv},
  eprint       = {1707.00183},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/MatiisenOCS17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/SchulmanWDRK17,
  author       = {John Schulman and
                  Filip Wolski and
                  Prafulla Dhariwal and
                  Alec Radford and
                  Oleg Klimov},
  title        = {Proximal Policy Optimization Algorithms},
  journal      = {CoRR},
  volume       = {abs/1707.06347},
  year         = {2017},
  url          = {http://arxiv.org/abs/1707.06347},
  eprinttype    = {arXiv},
  eprint       = {1707.06347},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/SchulmanWDRK17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1709-10087,
  author       = {Aravind Rajeswaran and
                  Vikash Kumar and
                  Abhishek Gupta and
                  John Schulman and
                  Emanuel Todorov and
                  Sergey Levine},
  title        = {Learning Complex Dexterous Manipulation with Deep Reinforcement Learning
                  and Demonstrations},
  journal      = {CoRR},
  volume       = {abs/1709.10087},
  year         = {2017},
  url          = {http://arxiv.org/abs/1709.10087},
  eprinttype    = {arXiv},
  eprint       = {1709.10087},
  timestamp    = {Thu, 20 Dec 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1709-10087.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1710-09767,
  author       = {Kevin Frans and
                  Jonathan Ho and
                  Xi Chen and
                  Pieter Abbeel and
                  John Schulman},
  title        = {Meta Learning Shared Hierarchies},
  journal      = {CoRR},
  volume       = {abs/1710.09767},
  year         = {2017},
  url          = {http://arxiv.org/abs/1710.09767},
  eprinttype    = {arXiv},
  eprint       = {1710.09767},
  timestamp    = {Mon, 03 Sep 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1710-09767.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@phdthesis{DBLP:phd/us/Schulman16,
  author       = {John Schulman},
  title        = {Optimizing Expectations: From Deep Reinforcement Learning to Stochastic
                  Computation Graphs},
  school       = {University of California, Berkeley, {USA}},
  year         = {2016},
  url          = {https://www.escholarship.org/uc/item/9z908523},
  timestamp    = {Wed, 22 Jun 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/phd/us/Schulman16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/DuanCHSA16,
  author       = {Yan Duan and
                  Xi Chen and
                  Rein Houthooft and
                  John Schulman and
                  Pieter Abbeel},
  editor       = {Maria{-}Florina Balcan and
                  Kilian Q. Weinberger},
  title        = {Benchmarking Deep Reinforcement Learning for Continuous Control},
  booktitle    = {Proceedings of the 33nd International Conference on Machine Learning,
                  {ICML} 2016, New York City, NY, USA, June 19-24, 2016},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {48},
  pages        = {1329--1338},
  publisher    = {JMLR.org},
  year         = {2016},
  url          = {http://proceedings.mlr.press/v48/duan16.html},
  timestamp    = {Wed, 29 May 2019 08:41:46 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/DuanCHSA16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/HouthooftCCDSTA16,
  author       = {Rein Houthooft and
                  Xi Chen and
                  Yan Duan and
                  John Schulman and
                  Filip De Turck and
                  Pieter Abbeel},
  editor       = {Daniel D. Lee and
                  Masashi Sugiyama and
                  Ulrike von Luxburg and
                  Isabelle Guyon and
                  Roman Garnett},
  title        = {{VIME:} Variational Information Maximizing Exploration},
  booktitle    = {Advances in Neural Information Processing Systems 29: Annual Conference
                  on Neural Information Processing Systems 2016, December 5-10, 2016,
                  Barcelona, Spain},
  pages        = {1109--1117},
  year         = {2016},
  url          = {https://proceedings.neurips.cc/paper/2016/hash/abd815286ba1007abfbb8415b83ae2cf-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/HouthooftCCDSTA16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/ChenCDHSSA16,
  author       = {Xi Chen and
                  Yan Duan and
                  Rein Houthooft and
                  John Schulman and
                  Ilya Sutskever and
                  Pieter Abbeel},
  editor       = {Daniel D. Lee and
                  Masashi Sugiyama and
                  Ulrike von Luxburg and
                  Isabelle Guyon and
                  Roman Garnett},
  title        = {InfoGAN: Interpretable Representation Learning by Information Maximizing
                  Generative Adversarial Nets},
  booktitle    = {Advances in Neural Information Processing Systems 29: Annual Conference
                  on Neural Information Processing Systems 2016, December 5-10, 2016,
                  Barcelona, Spain},
  pages        = {2172--2180},
  year         = {2016},
  url          = {https://proceedings.neurips.cc/paper/2016/hash/7c9d0b1f96aebd7b5eca8c3edaa19ebb-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/ChenCDHSSA16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:journals/corr/SchulmanMLJA15,
  author       = {John Schulman and
                  Philipp Moritz and
                  Sergey Levine and
                  Michael I. Jordan and
                  Pieter Abbeel},
  editor       = {Yoshua Bengio and
                  Yann LeCun},
  title        = {High-Dimensional Continuous Control Using Generalized Advantage Estimation},
  booktitle    = {4th International Conference on Learning Representations, {ICLR} 2016,
                  San Juan, Puerto Rico, May 2-4, 2016, Conference Track Proceedings},
  year         = {2016},
  url          = {http://arxiv.org/abs/1506.02438},
  timestamp    = {Thu, 25 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/SchulmanMLJA15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/DuanCHSA16,
  author       = {Yan Duan and
                  Xi Chen and
                  Rein Houthooft and
                  John Schulman and
                  Pieter Abbeel},
  title        = {Benchmarking Deep Reinforcement Learning for Continuous Control},
  journal      = {CoRR},
  volume       = {abs/1604.06778},
  year         = {2016},
  url          = {http://arxiv.org/abs/1604.06778},
  eprinttype    = {arXiv},
  eprint       = {1604.06778},
  timestamp    = {Mon, 03 Sep 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/DuanCHSA16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/Al-RfouAAa16,
  author       = {Rami Al{-}Rfou and
                  Guillaume Alain and
                  Amjad Almahairi and
                  Christof Angerm{\"{u}}ller and
                  Dzmitry Bahdanau and
                  Nicolas Ballas and
                  Fr{\'{e}}d{\'{e}}ric Bastien and
                  Justin Bayer and
                  Anatoly Belikov and
                  Alexander Belopolsky and
                  Yoshua Bengio and
                  Arnaud Bergeron and
                  James Bergstra and
                  Valentin Bisson and
                  Josh Bleecher Snyder and
                  Nicolas Bouchard and
                  Nicolas Boulanger{-}Lewandowski and
                  Xavier Bouthillier and
                  Alexandre de Br{\'{e}}bisson and
                  Olivier Breuleux and
                  Pierre Luc Carrier and
                  Kyunghyun Cho and
                  Jan Chorowski and
                  Paul F. Christiano and
                  Tim Cooijmans and
                  Marc{-}Alexandre C{\^{o}}t{\'{e}} and
                  Myriam C{\^{o}}t{\'{e}} and
                  Aaron C. Courville and
                  Yann N. Dauphin and
                  Olivier Delalleau and
                  Julien Demouth and
                  Guillaume Desjardins and
                  Sander Dieleman and
                  Laurent Dinh and
                  Melanie Ducoffe and
                  Vincent Dumoulin and
                  Samira Ebrahimi Kahou and
                  Dumitru Erhan and
                  Ziye Fan and
                  Orhan Firat and
                  Mathieu Germain and
                  Xavier Glorot and
                  Ian J. Goodfellow and
                  Matthew Graham and
                  {\c{C}}aglar G{\"{u}}l{\c{c}}ehre and
                  Philippe Hamel and
                  Iban Harlouchet and
                  Jean{-}Philippe Heng and
                  Bal{\'{a}}zs Hidasi and
                  Sina Honari and
                  Arjun Jain and
                  S{\'{e}}bastien Jean and
                  Kai Jia and
                  Mikhail Korobov and
                  Vivek Kulkarni and
                  Alex Lamb and
                  Pascal Lamblin and
                  Eric Larsen and
                  C{\'{e}}sar Laurent and
                  Sean Lee and
                  Simon Lefran{\c{c}}ois and
                  Simon Lemieux and
                  Nicholas L{\'{e}}onard and
                  Zhouhan Lin and
                  Jesse A. Livezey and
                  Cory Lorenz and
                  Jeremiah Lowin and
                  Qianli Ma and
                  Pierre{-}Antoine Manzagol and
                  Olivier Mastropietro and
                  Robert McGibbon and
                  Roland Memisevic and
                  Bart van Merri{\"{e}}nboer and
                  Vincent Michalski and
                  Mehdi Mirza and
                  Alberto Orlandi and
                  Christopher Joseph Pal and
                  Razvan Pascanu and
                  Mohammad Pezeshki and
                  Colin Raffel and
                  Daniel Renshaw and
                  Matthew Rocklin and
                  Adriana Romero and
                  Markus Roth and
                  Peter Sadowski and
                  John Salvatier and
                  Fran{\c{c}}ois Savard and
                  Jan Schl{\"{u}}ter and
                  John Schulman and
                  Gabriel Schwartz and
                  Iulian Vlad Serban and
                  Dmitriy Serdyuk and
                  Samira Shabanian and
                  {\'{E}}tienne Simon and
                  Sigurd Spieckermann and
                  S. Ramana Subramanyam and
                  Jakub Sygnowski and
                  J{\'{e}}r{\'{e}}mie Tanguay and
                  Gijs van Tulder and
                  Joseph P. Turian and
                  Sebastian Urban and
                  Pascal Vincent and
                  Francesco Visin and
                  Harm de Vries and
                  David Warde{-}Farley and
                  Dustin J. Webb and
                  Matthew Willson and
                  Kelvin Xu and
                  Lijun Xue and
                  Li Yao and
                  Saizheng Zhang and
                  Ying Zhang},
  title        = {Theano: {A} Python framework for fast computation of mathematical
                  expressions},
  journal      = {CoRR},
  volume       = {abs/1605.02688},
  year         = {2016},
  url          = {http://arxiv.org/abs/1605.02688},
  eprinttype    = {arXiv},
  eprint       = {1605.02688},
  timestamp    = {Thu, 30 Apr 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/Al-RfouAAa16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HouthooftCDSTA16,
  author       = {Rein Houthooft and
                  Xi Chen and
                  Yan Duan and
                  John Schulman and
                  Filip De Turck and
                  Pieter Abbeel},
  title        = {Curiosity-driven Exploration in Deep Reinforcement Learning via Bayesian
                  Neural Networks},
  journal      = {CoRR},
  volume       = {abs/1605.09674},
  year         = {2016},
  url          = {http://arxiv.org/abs/1605.09674},
  eprinttype    = {arXiv},
  eprint       = {1605.09674},
  timestamp    = {Mon, 03 Sep 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/HouthooftCDSTA16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/BrockmanCPSSTZ16,
  author       = {Greg Brockman and
                  Vicki Cheung and
                  Ludwig Pettersson and
                  Jonas Schneider and
                  John Schulman and
                  Jie Tang and
                  Wojciech Zaremba},
  title        = {OpenAI Gym},
  journal      = {CoRR},
  volume       = {abs/1606.01540},
  year         = {2016},
  url          = {http://arxiv.org/abs/1606.01540},
  eprinttype    = {arXiv},
  eprint       = {1606.01540},
  timestamp    = {Fri, 08 Nov 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/BrockmanCPSSTZ16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/ChenDHSSA16,
  author       = {Xi Chen and
                  Yan Duan and
                  Rein Houthooft and
                  John Schulman and
                  Ilya Sutskever and
                  Pieter Abbeel},
  title        = {InfoGAN: Interpretable Representation Learning by Information Maximizing
                  Generative Adversarial Nets},
  journal      = {CoRR},
  volume       = {abs/1606.03657},
  year         = {2016},
  url          = {http://arxiv.org/abs/1606.03657},
  eprinttype    = {arXiv},
  eprint       = {1606.03657},
  timestamp    = {Mon, 03 Sep 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/ChenDHSSA16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/AmodeiOSCSM16,
  author       = {Dario Amodei and
                  Chris Olah and
                  Jacob Steinhardt and
                  Paul F. Christiano and
                  John Schulman and
                  Dan Man{\'{e}}},
  title        = {Concrete Problems in {AI} Safety},
  journal      = {CoRR},
  volume       = {abs/1606.06565},
  year         = {2016},
  url          = {http://arxiv.org/abs/1606.06565},
  eprinttype    = {arXiv},
  eprint       = {1606.06565},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/AmodeiOSCSM16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/ChenKSDDSSA16,
  author       = {Xi Chen and
                  Diederik P. Kingma and
                  Tim Salimans and
                  Yan Duan and
                  Prafulla Dhariwal and
                  John Schulman and
                  Ilya Sutskever and
                  Pieter Abbeel},
  title        = {Variational Lossy Autoencoder},
  journal      = {CoRR},
  volume       = {abs/1611.02731},
  year         = {2016},
  url          = {http://arxiv.org/abs/1611.02731},
  eprinttype    = {arXiv},
  eprint       = {1611.02731},
  timestamp    = {Mon, 03 Sep 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/ChenKSDDSSA16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/DuanSCBSA16,
  author       = {Yan Duan and
                  John Schulman and
                  Xi Chen and
                  Peter L. Bartlett and
                  Ilya Sutskever and
                  Pieter Abbeel},
  title        = {RL{\textdollar}{\^{}}2{\textdollar}: Fast Reinforcement Learning via
                  Slow Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1611.02779},
  year         = {2016},
  url          = {http://arxiv.org/abs/1611.02779},
  eprinttype    = {arXiv},
  eprint       = {1611.02779},
  timestamp    = {Mon, 03 Sep 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/DuanSCBSA16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/TangHFSCDSTA16,
  author       = {Haoran Tang and
                  Rein Houthooft and
                  Davis Foote and
                  Adam Stooke and
                  Xi Chen and
                  Yan Duan and
                  John Schulman and
                  Filip De Turck and
                  Pieter Abbeel},
  title        = {{\#}Exploration: {A} Study of Count-Based Exploration for Deep Reinforcement
                  Learning},
  journal      = {CoRR},
  volume       = {abs/1611.04717},
  year         = {2016},
  url          = {http://arxiv.org/abs/1611.04717},
  eprinttype    = {arXiv},
  eprint       = {1611.04717},
  timestamp    = {Mon, 03 Sep 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/TangHFSCDSTA16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SchulmanLAJM15,
  author       = {John Schulman and
                  Sergey Levine and
                  Pieter Abbeel and
                  Michael I. Jordan and
                  Philipp Moritz},
  editor       = {Francis R. Bach and
                  David M. Blei},
  title        = {Trust Region Policy Optimization},
  booktitle    = {Proceedings of the 32nd International Conference on Machine Learning,
                  {ICML} 2015, Lille, France, 6-11 July 2015},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {37},
  pages        = {1889--1897},
  publisher    = {JMLR.org},
  year         = {2015},
  url          = {http://proceedings.mlr.press/v37/schulman15.html},
  timestamp    = {Wed, 29 May 2019 08:41:45 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/SchulmanLAJM15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/SchulmanHWA15,
  author       = {John Schulman and
                  Nicolas Heess and
                  Theophane Weber and
                  Pieter Abbeel},
  editor       = {Corinna Cortes and
                  Neil D. Lawrence and
                  Daniel D. Lee and
                  Masashi Sugiyama and
                  Roman Garnett},
  title        = {Gradient Estimation Using Stochastic Computation Graphs},
  booktitle    = {Advances in Neural Information Processing Systems 28: Annual Conference
                  on Neural Information Processing Systems 2015, December 7-12, 2015,
                  Montreal, Quebec, Canada},
  pages        = {3528--3536},
  year         = {2015},
  url          = {https://proceedings.neurips.cc/paper/2015/hash/de03beffeed9da5f3639a621bcab5dd4-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/SchulmanHWA15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/SchulmanLMJA15,
  author       = {John Schulman and
                  Sergey Levine and
                  Philipp Moritz and
                  Michael I. Jordan and
                  Pieter Abbeel},
  title        = {Trust Region Policy Optimization},
  journal      = {CoRR},
  volume       = {abs/1502.05477},
  year         = {2015},
  url          = {http://arxiv.org/abs/1502.05477},
  eprinttype    = {arXiv},
  eprint       = {1502.05477},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/SchulmanLMJA15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/SchulmanHWA15,
  author       = {John Schulman and
                  Nicolas Heess and
                  Theophane Weber and
                  Pieter Abbeel},
  title        = {Gradient Estimation Using Stochastic Computation Graphs},
  journal      = {CoRR},
  volume       = {abs/1506.05254},
  year         = {2015},
  url          = {http://arxiv.org/abs/1506.05254},
  eprinttype    = {arXiv},
  eprint       = {1506.05254},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/SchulmanHWA15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ijrr/SchulmanDHLABPPGA14,
  author       = {John Schulman and
                  Yan Duan and
                  Jonathan Ho and
                  Alex X. Lee and
                  Ibrahim Awwal and
                  Henry Bradlow and
                  Jia Pan and
                  Sachin Patil and
                  Ken Goldberg and
                  Pieter Abbeel},
  title        = {Motion planning with sequential convex optimization and convex collision
                  checking},
  journal      = {Int. J. Robotics Res.},
  volume       = {33},
  number       = {9},
  pages        = {1251--1270},
  year         = {2014},
  url          = {https://doi.org/10.1177/0278364914528132},
  doi          = {10.1177/0278364914528132},
  timestamp    = {Thu, 17 Sep 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ijrr/SchulmanDHLABPPGA14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icra/DuanPSGA14,
  author       = {Yan Duan and
                  Sachin Patil and
                  John Schulman and
                  Kenneth Y. Goldberg and
                  Pieter Abbeel},
  title        = {Planning locally optimal, curvature-constrained trajectories in 3D
                  using sequential convex optimization},
  booktitle    = {2014 {IEEE} International Conference on Robotics and Automation, {ICRA}
                  2014, Hong Kong, China, May 31 - June 7, 2014},
  pages        = {5889--5895},
  publisher    = {{IEEE}},
  year         = {2014},
  url          = {https://doi.org/10.1109/ICRA.2014.6907726},
  doi          = {10.1109/ICRA.2014.6907726},
  timestamp    = {Wed, 16 Oct 2019 14:14:51 +0200},
  biburl       = {https://dblp.org/rec/conf/icra/DuanPSGA14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icra/PatilDSGA14,
  author       = {Sachin Patil and
                  Yan Duan and
                  John Schulman and
                  Ken Goldberg and
                  Pieter Abbeel},
  title        = {Gaussian belief space planning with discontinuities in sensing domains},
  booktitle    = {2014 {IEEE} International Conference on Robotics and Automation, {ICRA}
                  2014, Hong Kong, China, May 31 - June 7, 2014},
  pages        = {6483--6490},
  publisher    = {{IEEE}},
  year         = {2014},
  url          = {https://doi.org/10.1109/ICRA.2014.6907816},
  doi          = {10.1109/ICRA.2014.6907816},
  timestamp    = {Wed, 14 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icra/PatilDSGA14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/wafr/PatilKLSGA14,
  author       = {Sachin Patil and
                  Gregory Kahn and
                  Michael Laskey and
                  John Schulman and
                  Ken Goldberg and
                  Pieter Abbeel},
  editor       = {H. Levent Akin and
                  Nancy M. Amato and
                  Volkan Isler and
                  A. Frank van der Stappen},
  title        = {Scaling up Gaussian Belief Space Planning Through Covariance-Free
                  Trajectory Optimization and Automatic Differentiation},
  booktitle    = {Algorithmic Foundations of Robotics {XI} - Selected Contributions
                  of the Eleventh International Workshop on the Algorithmic Foundations
                  of Robotics, {WAFR} 2014, 3-5 August 2014, Bo{\u{g}}azi{\c{c}}i University,
                  {\.{I}}stanbul, Turkey},
  series       = {Springer Tracts in Advanced Robotics},
  volume       = {107},
  pages        = {515--533},
  publisher    = {Springer},
  year         = {2014},
  url          = {https://doi.org/10.1007/978-3-319-16595-0\_30},
  doi          = {10.1007/978-3-319-16595-0\_30},
  timestamp    = {Sun, 25 Oct 2020 23:03:12 +0100},
  biburl       = {https://dblp.org/rec/conf/wafr/PatilKLSGA14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icra/SchulmanLHA13,
  author       = {John Schulman and
                  Alex X. Lee and
                  Jonathan Ho and
                  Pieter Abbeel},
  title        = {Tracking deformable objects with point clouds},
  booktitle    = {2013 {IEEE} International Conference on Robotics and Automation, Karlsruhe,
                  Germany, May 6-10, 2013},
  pages        = {1130--1137},
  publisher    = {{IEEE}},
  year         = {2013},
  url          = {https://doi.org/10.1109/ICRA.2013.6630714},
  doi          = {10.1109/ICRA.2013.6630714},
  timestamp    = {Wed, 16 Oct 2019 14:14:51 +0200},
  biburl       = {https://dblp.org/rec/conf/icra/SchulmanLHA13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iros/SchulmanGVTA13,
  author       = {John Schulman and
                  Ankush Gupta and
                  Sibi Venkatesan and
                  Mallory Tayson{-}Frederick and
                  Pieter Abbeel},
  title        = {A case study of trajectory transfer through non-rigid registration
                  for a simplified suturing scenario},
  booktitle    = {2013 {IEEE/RSJ} International Conference on Intelligent Robots and
                  Systems, {IROS} 2013, Tokyo, Japan, November 3-7, 2013},
  pages        = {4111--4117},
  publisher    = {{IEEE}},
  year         = {2013},
  url          = {https://doi.org/10.1109/IROS.2013.6696945},
  doi          = {10.1109/IROS.2013.6696945},
  timestamp    = {Tue, 05 Sep 2023 15:06:24 +0200},
  biburl       = {https://dblp.org/rec/conf/iros/SchulmanGVTA13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iros/LeeDPSMBGA13,
  author       = {Alex X. Lee and
                  Yan Duan and
                  Sachin Patil and
                  John Schulman and
                  Zoe McCarthy and
                  Jur van den Berg and
                  Ken Goldberg and
                  Pieter Abbeel},
  title        = {Sigma hulls for Gaussian belief space planning for imprecise articulated
                  robots amid obstacles},
  booktitle    = {2013 {IEEE/RSJ} International Conference on Intelligent Robots and
                  Systems, {IROS} 2013, Tokyo, Japan, November 3-7, 2013},
  pages        = {5660--5667},
  publisher    = {{IEEE}},
  year         = {2013},
  url          = {https://doi.org/10.1109/IROS.2013.6697176},
  doi          = {10.1109/IROS.2013.6697176},
  timestamp    = {Wed, 14 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/iros/LeeDPSMBGA13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/isrr/SchulmanHLA13,
  author       = {John Schulman and
                  Jonathan Ho and
                  Cameron Lee and
                  Pieter Abbeel},
  editor       = {Masayuki Inaba and
                  Peter Corke},
  title        = {Learning from Demonstrations Through the Use of Non-rigid Registration},
  booktitle    = {Robotics Research - The 16th International Symposium ISRR, 16-19 December
                  2013, Singapore},
  series       = {Springer Tracts in Advanced Robotics},
  volume       = {114},
  pages        = {339--354},
  publisher    = {Springer},
  year         = {2013},
  url          = {https://doi.org/10.1007/978-3-319-28872-7\_20},
  doi          = {10.1007/978-3-319-28872-7\_20},
  timestamp    = {Sat, 24 Nov 2018 11:59:52 +0100},
  biburl       = {https://dblp.org/rec/conf/isrr/SchulmanHLA13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/rss/SchulmanHLABA13,
  author       = {John Schulman and
                  Jonathan Ho and
                  Alex X. Lee and
                  Ibrahim Awwal and
                  Henry Bradlow and
                  Pieter Abbeel},
  editor       = {Paul Newman and
                  Dieter Fox and
                  David Hsu},
  title        = {Finding Locally Optimal, Collision-Free Trajectories with Sequential
                  Convex Optimization},
  booktitle    = {Robotics: Science and Systems IX, Technische Universit{\"{a}}t
                  Berlin, Berlin, Germany, June 24 - June 28, 2013},
  year         = {2013},
  url          = {http://www.roboticsproceedings.org/rss09/p31.html},
  doi          = {10.15607/RSS.2013.IX.031},
  timestamp    = {Fri, 29 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/rss/SchulmanHLABA13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/isrr/SchulmanGA11,
  author       = {John D. Schulman and
                  Ken Goldberg and
                  Pieter Abbeel},
  editor       = {Henrik I. Christensen and
                  Oussama Khatib},
  title        = {Grasping and Fixturing as Submodular Coverage Problems},
  booktitle    = {Robotics Research - The 15th International Symposium ISRR, 9-12 December
                  2011, Flagstaff, Arizona, {USA}},
  series       = {Springer Tracts in Advanced Robotics},
  volume       = {100},
  pages        = {571--583},
  publisher    = {Springer},
  year         = {2011},
  url          = {https://doi.org/10.1007/978-3-319-29363-9\_32},
  doi          = {10.1007/978-3-319-29363-9\_32},
  timestamp    = {Fri, 26 May 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/isrr/SchulmanGA11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}