BibTeX records: Kavosh Asadi

download as .bib file

@inproceedings{DBLP:conf/aistats/GottesmanAAL0L23,
  author       = {Omer Gottesman and
                  Kavosh Asadi and
                  Cameron S. Allen and
                  Samuel Lobel and
                  George Konidaris and
                  Michael Littman},
  editor       = {Francisco J. R. Ruiz and
                  Jennifer G. Dy and
                  Jan{-}Willem van de Meent},
  title        = {Coarse-Grained Smoothness for Reinforcement Learning in Metric Spaces},
  booktitle    = {International Conference on Artificial Intelligence and Statistics,
                  25-27 April 2023, Palau de Congressos, Valencia, Spain},
  series       = {Proceedings of Machine Learning Research},
  volume       = {206},
  pages        = {1390--1410},
  publisher    = {{PMLR}},
  year         = {2023},
  url          = {https://proceedings.mlr.press/v206/gottesman23a.html},
  timestamp    = {Mon, 09 Oct 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/GottesmanAAL0L23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/AsadiFS23,
  author       = {Kavosh Asadi and
                  Rasool Fakoor and
                  Shoham Sabach},
  editor       = {Alice Oh and
                  Tristan Naumann and
                  Amir Globerson and
                  Kate Saenko and
                  Moritz Hardt and
                  Sergey Levine},
  title        = {Resetting the Optimizer in Deep {RL:} An Empirical Study},
  booktitle    = {Advances in Neural Information Processing Systems 36: Annual Conference
                  on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans,
                  LA, USA, December 10 - 16, 2023},
  year         = {2023},
  url          = {http://papers.nips.cc/paper\_files/paper/2023/hash/e4bf5c3245fd92a4554a16af9803b757-Abstract-Conference.html},
  timestamp    = {Fri, 01 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/AsadiFS23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/AsadiS0GF23,
  author       = {Kavosh Asadi and
                  Shoham Sabach and
                  Yao Liu and
                  Omer Gottesman and
                  Rasool Fakoor},
  editor       = {Alice Oh and
                  Tristan Naumann and
                  Amir Globerson and
                  Kate Saenko and
                  Moritz Hardt and
                  Sergey Levine},
  title        = {{TD} Convergence: An Optimization Perspective},
  booktitle    = {Advances in Neural Information Processing Systems 36: Annual Conference
                  on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans,
                  LA, USA, December 10 - 16, 2023},
  year         = {2023},
  url          = {http://papers.nips.cc/paper\_files/paper/2023/hash/9a08fbb992f15faa695c42b6a2c8e000-Abstract-Conference.html},
  timestamp    = {Fri, 01 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/AsadiS0GF23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2306-17750,
  author       = {Kavosh Asadi and
                  Shoham Sabach and
                  Yao Liu and
                  Omer Gottesman and
                  Rasool Fakoor},
  title        = {{TD} Convergence: An Optimization Perspective},
  journal      = {CoRR},
  volume       = {abs/2306.17750},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2306.17750},
  doi          = {10.48550/ARXIV.2306.17750},
  eprinttype    = {arXiv},
  eprint       = {2306.17750},
  timestamp    = {Mon, 03 Jul 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2306-17750.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2306-17833,
  author       = {Kavosh Asadi and
                  Rasool Fakoor and
                  Shoham Sabach},
  title        = {Resetting the Optimizer in Deep {RL:} An Empirical Study},
  journal      = {CoRR},
  volume       = {abs/2306.17833},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2306.17833},
  doi          = {10.48550/ARXIV.2306.17833},
  eprinttype    = {arXiv},
  eprint       = {2306.17833},
  timestamp    = {Mon, 03 Jul 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2306-17833.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2310-05905,
  author       = {Zuxin Liu and
                  Jesse Zhang and
                  Kavosh Asadi and
                  Yao Liu and
                  Ding Zhao and
                  Shoham Sabach and
                  Rasool Fakoor},
  title        = {{TAIL:} Task-specific Adapters for Imitation Learning with Large Pretrained
                  Models},
  journal      = {CoRR},
  volume       = {abs/2310.05905},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2310.05905},
  doi          = {10.48550/ARXIV.2310.05905},
  eprinttype    = {arXiv},
  eprint       = {2310.05905},
  timestamp    = {Tue, 24 Oct 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2310-05905.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/AsadiFGKLS22,
  author       = {Kavosh Asadi and
                  Rasool Fakoor and
                  Omer Gottesman and
                  Taesup Kim and
                  Michael L. Littman and
                  Alexander J. Smola},
  editor       = {Sanmi Koyejo and
                  S. Mohamed and
                  A. Agarwal and
                  Danielle Belgrave and
                  K. Cho and
                  A. Oh},
  title        = {Faster Deep Reinforcement Learning with Slower Online Network},
  booktitle    = {Advances in Neural Information Processing Systems 35: Annual Conference
                  on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans,
                  LA, USA, November 28 - December 9, 2022},
  year         = {2022},
  url          = {http://papers.nips.cc/paper\_files/paper/2022/hash/7dfa77fcef807c9a078b58fd619ad897-Abstract-Conference.html},
  timestamp    = {Mon, 08 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/AsadiFGKLS22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/KlissarovFMAKS22,
  author       = {Martin Klissarov and
                  Rasool Fakoor and
                  Jonas W. Mueller and
                  Kavosh Asadi and
                  Taesup Kim and
                  Alexander J. Smola},
  editor       = {Sanmi Koyejo and
                  S. Mohamed and
                  A. Agarwal and
                  Danielle Belgrave and
                  K. Cho and
                  A. Oh},
  title        = {Adaptive Interest for Emphatic Reinforcement Learning},
  booktitle    = {Advances in Neural Information Processing Systems 35: Annual Conference
                  on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans,
                  LA, USA, November 28 - December 9, 2022},
  year         = {2022},
  url          = {http://papers.nips.cc/paper\_files/paper/2022/hash/008079ec00eec9760ee93af5434ee932-Abstract-Conference.html},
  timestamp    = {Mon, 08 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/KlissarovFMAKS22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2205-05588,
  author       = {Zhiyuan Zhou and
                  Cameron Allen and
                  Kavosh Asadi and
                  George Konidaris},
  title        = {Characterizing the Action-Generalization Gap in Deep Q-Learning},
  journal      = {CoRR},
  volume       = {abs/2205.05588},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2205.05588},
  doi          = {10.48550/ARXIV.2205.05588},
  eprinttype    = {arXiv},
  eprint       = {2205.05588},
  timestamp    = {Tue, 17 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2205-05588.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@phdthesis{DBLP:phd/us/Asadi21,
  author       = {Kavosh Asadi},
  title        = {Smoothness in Reinforcement Learning with Large State and Action Spaces},
  school       = {Brown University, {USA}},
  year         = {2021},
  url          = {https://cs.brown.edu/research/pubs/theses/phd/2021/asadi.kavosh.pdf},
  timestamp    = {Tue, 28 Jun 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/phd/us/Asadi21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/AsadiPPKL21,
  author       = {Kavosh Asadi and
                  Neev Parikh and
                  Ronald E. Parr and
                  George Dimitri Konidaris and
                  Michael L. Littman},
  title        = {Deep Radial-Basis Value Functions for Continuous Control},
  booktitle    = {Thirty-Fifth {AAAI} Conference on Artificial Intelligence, {AAAI}
                  2021, Thirty-Third Conference on Innovative Applications of Artificial
                  Intelligence, {IAAI} 2021, The Eleventh Symposium on Educational Advances
                  in Artificial Intelligence, {EAAI} 2021, Virtual Event, February 2-9,
                  2021},
  pages        = {6696--6704},
  publisher    = {{AAAI} Press},
  year         = {2021},
  url          = {https://doi.org/10.1609/aaai.v35i8.16828},
  doi          = {10.1609/AAAI.V35I8.16828},
  timestamp    = {Mon, 04 Sep 2023 12:29:24 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/AsadiPPKL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/LecarpentierAAJ21,
  author       = {Erwan Lecarpentier and
                  David Abel and
                  Kavosh Asadi and
                  Yuu Jinnai and
                  Emmanuel Rachelson and
                  Michael L. Littman},
  title        = {Lipschitz Lifelong Reinforcement Learning},
  booktitle    = {Thirty-Fifth {AAAI} Conference on Artificial Intelligence, {AAAI}
                  2021, Thirty-Third Conference on Innovative Applications of Artificial
                  Intelligence, {IAAI} 2021, The Eleventh Symposium on Educational Advances
                  in Artificial Intelligence, {EAAI} 2021, Virtual Event, February 2-9,
                  2021},
  pages        = {8270--8278},
  publisher    = {{AAAI} Press},
  year         = {2021},
  url          = {https://doi.org/10.1609/aaai.v35i9.17006},
  doi          = {10.1609/AAAI.V35I9.17006},
  timestamp    = {Mon, 04 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/LecarpentierAAJ21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/FakoorMACS21,
  author       = {Rasool Fakoor and
                  Jonas Mueller and
                  Kavosh Asadi and
                  Pratik Chaudhari and
                  Alexander J. Smola},
  editor       = {Marc'Aurelio Ranzato and
                  Alina Beygelzimer and
                  Yann N. Dauphin and
                  Percy Liang and
                  Jennifer Wortman Vaughan},
  title        = {Continuous Doubly Constrained Batch Reinforcement Learning},
  booktitle    = {Advances in Neural Information Processing Systems 34: Annual Conference
                  on Neural Information Processing Systems 2021, NeurIPS 2021, December
                  6-14, 2021, virtual},
  pages        = {11260--11273},
  year         = {2021},
  url          = {https://proceedings.neurips.cc/paper/2021/hash/5da713a690c067105aeb2fae32403405-Abstract.html},
  timestamp    = {Wed, 19 Apr 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/FakoorMACS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2109-07054,
  author       = {Ishaan Shah and
                  David Halpern and
                  Kavosh Asadi and
                  Michael L. Littman},
  title        = {Convergence of a Human-in-the-Loop Policy-Gradient Algorithm With
                  Eligibility Trace Under Reward, Policy, and Advantage Feedback},
  journal      = {CoRR},
  volume       = {abs/2109.07054},
  year         = {2021},
  url          = {https://arxiv.org/abs/2109.07054},
  eprinttype    = {arXiv},
  eprint       = {2109.07054},
  timestamp    = {Wed, 22 Sep 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2109-07054.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2110-12276,
  author       = {Omer Gottesman and
                  Kavosh Asadi and
                  Cameron Allen and
                  Sam Lobel and
                  George Konidaris and
                  Michael Littman},
  title        = {Coarse-Grained Smoothness for {RL} in Metric Spaces},
  journal      = {CoRR},
  volume       = {abs/2110.12276},
  year         = {2021},
  url          = {https://arxiv.org/abs/2110.12276},
  eprinttype    = {arXiv},
  eprint       = {2110.12276},
  timestamp    = {Mon, 09 Oct 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2110-12276.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2112-05848,
  author       = {Kavosh Asadi and
                  Rasool Fakoor and
                  Omer Gottesman and
                  Michael L. Littman and
                  Alexander J. Smola},
  title        = {Deep Q-Network with Proximal Iteration},
  journal      = {CoRR},
  volume       = {abs/2112.05848},
  year         = {2021},
  url          = {https://arxiv.org/abs/2112.05848},
  eprinttype    = {arXiv},
  eprint       = {2112.05848},
  timestamp    = {Mon, 03 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2112-05848.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2001-05411,
  author       = {Erwan Lecarpentier and
                  David Abel and
                  Kavosh Asadi and
                  Yuu Jinnai and
                  Emmanuel Rachelson and
                  Michael L. Littman},
  title        = {Lipschitz Lifelong Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2001.05411},
  year         = {2020},
  url          = {https://arxiv.org/abs/2001.05411},
  eprinttype    = {arXiv},
  eprint       = {2001.05411},
  timestamp    = {Fri, 17 Jan 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2001-05411.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2002-01883,
  author       = {Kavosh Asadi and
                  Ronald E. Parr and
                  George Dimitri Konidaris and
                  Michael L. Littman},
  title        = {Deep {RBF} Value Functions for Continuous Control},
  journal      = {CoRR},
  volume       = {abs/2002.01883},
  year         = {2020},
  url          = {https://arxiv.org/abs/2002.01883},
  eprinttype    = {arXiv},
  eprint       = {2002.01883},
  timestamp    = {Mon, 10 Feb 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2002-01883.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2002-05518,
  author       = {Kavosh Asadi and
                  David Abel and
                  Michael Littman},
  title        = {Learning State Abstractions for Transfer in Continuous Control},
  journal      = {CoRR},
  volume       = {abs/2002.05518},
  year         = {2020},
  url          = {https://arxiv.org/abs/2002.05518},
  eprinttype    = {arXiv},
  eprint       = {2002.05518},
  timestamp    = {Mon, 09 Oct 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2002-05518.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/AbelAAJLW19,
  author       = {David Abel and
                  Dilip Arumugam and
                  Kavosh Asadi and
                  Yuu Jinnai and
                  Michael L. Littman and
                  Lawson L. S. Wong},
  title        = {State Abstraction as Compression in Apprenticeship Learning},
  booktitle    = {The Thirty-Third {AAAI} Conference on Artificial Intelligence, {AAAI}
                  2019, The Thirty-First Innovative Applications of Artificial Intelligence
                  Conference, {IAAI} 2019, The Ninth {AAAI} Symposium on Educational
                  Advances in Artificial Intelligence, {EAAI} 2019, Honolulu, Hawaii,
                  USA, January 27 - February 1, 2019},
  pages        = {3134--3142},
  publisher    = {{AAAI} Press},
  year         = {2019},
  url          = {https://doi.org/10.1609/aaai.v33i01.33013134},
  doi          = {10.1609/AAAI.V33I01.33013134},
  timestamp    = {Mon, 04 Sep 2023 12:29:24 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/AbelAAJLW19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/atal/KimALK19,
  author       = {Seungchan Kim and
                  Kavosh Asadi and
                  Michael L. Littman and
                  George Dimitri Konidaris},
  editor       = {Edith Elkind and
                  Manuela Veloso and
                  Noa Agmon and
                  Matthew E. Taylor},
  title        = {Removing the Target Network from Deep Q-Networks with the Mellowmax
                  Operator},
  booktitle    = {Proceedings of the 18th International Conference on Autonomous Agents
                  and MultiAgent Systems, {AAMAS} '19, Montreal, QC, Canada, May 13-17,
                  2019},
  pages        = {2060--2062},
  publisher    = {International Foundation for Autonomous Agents and Multiagent Systems},
  year         = {2019},
  url          = {http://dl.acm.org/citation.cfm?id=3332010},
  timestamp    = {Tue, 19 Nov 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/atal/KimALK19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/KimALK19,
  author       = {Seungchan Kim and
                  Kavosh Asadi and
                  Michael L. Littman and
                  George Dimitri Konidaris},
  editor       = {Sarit Kraus},
  title        = {DeepMellow: Removing the Need for a Target Network in Deep Q-Learning},
  booktitle    = {Proceedings of the Twenty-Eighth International Joint Conference on
                  Artificial Intelligence, {IJCAI} 2019, Macao, China, August 10-16,
                  2019},
  pages        = {2733--2739},
  publisher    = {ijcai.org},
  year         = {2019},
  url          = {https://doi.org/10.24963/ijcai.2019/379},
  doi          = {10.24963/IJCAI.2019/379},
  timestamp    = {Mon, 05 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ijcai/KimALK19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1905-13320,
  author       = {Kavosh Asadi and
                  Dipendra Misra and
                  Seungchan Kim and
                  Michael L. Littman},
  title        = {Combating the Compounding-Error Problem with a Multi-step Model},
  journal      = {CoRR},
  volume       = {abs/1905.13320},
  year         = {2019},
  url          = {http://arxiv.org/abs/1905.13320},
  eprinttype    = {arXiv},
  eprint       = {1905.13320},
  timestamp    = {Mon, 03 Jun 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1905-13320.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/AsadiML18,
  author       = {Kavosh Asadi and
                  Dipendra Misra and
                  Michael L. Littman},
  editor       = {Jennifer G. Dy and
                  Andreas Krause},
  title        = {Lipschitz Continuity in Model-based Reinforcement Learning},
  booktitle    = {Proceedings of the 35th International Conference on Machine Learning,
                  {ICML} 2018, Stockholmsm{\"{a}}ssan, Stockholm, Sweden, July
                  10-15, 2018},
  series       = {Proceedings of Machine Learning Research},
  volume       = {80},
  pages        = {264--273},
  publisher    = {{PMLR}},
  year         = {2018},
  url          = {http://proceedings.mlr.press/v80/asadi18a.html},
  timestamp    = {Wed, 03 Apr 2019 18:17:30 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/AsadiML18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1804-07193,
  author       = {Kavosh Asadi and
                  Dipendra Misra and
                  Michael L. Littman},
  title        = {Lipschitz Continuity in Model-based Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1804.07193},
  year         = {2018},
  url          = {http://arxiv.org/abs/1804.07193},
  eprinttype    = {arXiv},
  eprint       = {1804.07193},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1804-07193.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1806-01265,
  author       = {Kavosh Asadi and
                  Evan Cater and
                  Dipendra Misra and
                  Michael L. Littman},
  title        = {Equivalence Between Wasserstein and Value-Aware Model-based Reinforcement
                  Learning},
  journal      = {CoRR},
  volume       = {abs/1806.01265},
  year         = {2018},
  url          = {http://arxiv.org/abs/1806.01265},
  eprinttype    = {arXiv},
  eprint       = {1806.01265},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1806-01265.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1811-00128,
  author       = {Kavosh Asadi and
                  Evan Cater and
                  Dipendra Misra and
                  Michael L. Littman},
  title        = {Towards a Simple Approach to Multi-step Model-based Reinforcement
                  Learning},
  journal      = {CoRR},
  volume       = {abs/1811.00128},
  year         = {2018},
  url          = {http://arxiv.org/abs/1811.00128},
  eprinttype    = {arXiv},
  eprint       = {1811.00128},
  timestamp    = {Thu, 22 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1811-00128.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1812-01129,
  author       = {Dilip Arumugam and
                  David Abel and
                  Kavosh Asadi and
                  Nakul Gopalan and
                  Christopher Grimm and
                  Jun Ki Lee and
                  Lucas Lehnert and
                  Michael L. Littman},
  title        = {Mitigating Planner Overfitting in Model-Based Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1812.01129},
  year         = {2018},
  url          = {http://arxiv.org/abs/1812.01129},
  eprinttype    = {arXiv},
  eprint       = {1812.01129},
  timestamp    = {Tue, 01 Jan 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1812-01129.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/acl/WilliamsAZ17,
  author       = {Jason D. Williams and
                  Kavosh Asadi and
                  Geoffrey Zweig},
  editor       = {Regina Barzilay and
                  Min{-}Yen Kan},
  title        = {Hybrid Code Networks: practical and efficient end-to-end dialog control
                  with supervised and reinforcement learning},
  booktitle    = {Proceedings of the 55th Annual Meeting of the Association for Computational
                  Linguistics, {ACL} 2017, Vancouver, Canada, July 30 - August 4, Volume
                  1: Long Papers},
  pages        = {665--677},
  publisher    = {Association for Computational Linguistics},
  year         = {2017},
  url          = {https://doi.org/10.18653/v1/P17-1062},
  doi          = {10.18653/V1/P17-1062},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/acl/WilliamsAZ17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/AsadiL17,
  author       = {Kavosh Asadi and
                  Michael L. Littman},
  editor       = {Doina Precup and
                  Yee Whye Teh},
  title        = {An Alternative Softmax Operator for Reinforcement Learning},
  booktitle    = {Proceedings of the 34th International Conference on Machine Learning,
                  {ICML} 2017, Sydney, NSW, Australia, 6-11 August 2017},
  series       = {Proceedings of Machine Learning Research},
  volume       = {70},
  pages        = {243--252},
  publisher    = {{PMLR}},
  year         = {2017},
  url          = {http://proceedings.mlr.press/v70/asadi17a.html},
  timestamp    = {Wed, 29 May 2019 08:41:45 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/AsadiL17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/WilliamsAZ17,
  author       = {Jason D. Williams and
                  Kavosh Asadi and
                  Geoffrey Zweig},
  title        = {Hybrid Code Networks: practical and efficient end-to-end dialog control
                  with supervised and reinforcement learning},
  journal      = {CoRR},
  volume       = {abs/1702.03274},
  year         = {2017},
  url          = {http://arxiv.org/abs/1702.03274},
  eprinttype    = {arXiv},
  eprint       = {1702.03274},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/WilliamsAZ17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1709-00503,
  author       = {Kavosh Asadi and
                  Cameron Allen and
                  Melrose Roderick and
                  Abdel{-}rahman Mohamed and
                  George Dimitri Konidaris and
                  Michael L. Littman},
  title        = {Mean Actor Critic},
  journal      = {CoRR},
  volume       = {abs/1709.00503},
  year         = {2017},
  url          = {http://arxiv.org/abs/1709.00503},
  eprinttype    = {arXiv},
  eprint       = {1709.00503},
  timestamp    = {Tue, 19 Nov 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1709-00503.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/AsadiL16,
  author       = {Kavosh Asadi and
                  Michael L. Littman},
  title        = {A New Softmax Operator for Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1612.05628},
  year         = {2016},
  url          = {http://arxiv.org/abs/1612.05628},
  eprinttype    = {arXiv},
  eprint       = {1612.05628},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/AsadiL16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/AsadiW16,
  author       = {Kavosh Asadi and
                  Jason D. Williams},
  title        = {Sample-efficient Deep Reinforcement Learning for Dialog Control},
  journal      = {CoRR},
  volume       = {abs/1612.06000},
  year         = {2016},
  url          = {http://arxiv.org/abs/1612.06000},
  eprinttype    = {arXiv},
  eprint       = {1612.06000},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/AsadiW16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
a service of  Schloss Dagstuhl - Leibniz Center for Informatics