BibTeX records: Satinder Singh 0001

download as .bib file

@article{DBLP:journals/corr/abs-2402-15391,
  author       = {Jake Bruce and
                  Michael Dennis and
                  Ashley Edwards and
                  Jack Parker{-}Holder and
                  Yuge Shi and
                  Edward Hughes and
                  Matthew Lai and
                  Aditi Mavalankar and
                  Richie Steigerwald and
                  Chris Apps and
                  Yusuf Aytar and
                  Sarah Bechtle and
                  Feryal M. P. Behbahani and
                  Stephanie Chan and
                  Nicolas Heess and
                  Lucy Gonzalez and
                  Simon Osindero and
                  Sherjil Ozair and
                  Scott E. Reed and
                  Jingwei Zhang and
                  Konrad Zolna and
                  Jeff Clune and
                  Nando de Freitas and
                  Satinder Singh and
                  Tim Rockt{\"{a}}schel},
  title        = {Genie: Generative Interactive Environments},
  journal      = {CoRR},
  volume       = {abs/2402.15391},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2402.15391},
  doi          = {10.48550/ARXIV.2402.15391},
  eprinttype    = {arXiv},
  eprint       = {2402.15391},
  timestamp    = {Fri, 22 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2402-15391.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ai/ZhangDS23,
  author       = {Qi Zhang and
                  Edmund H. Durfee and
                  Satinder Singh},
  title        = {Risk-aware analysis for interpretations of probabilistic achievement
                  and maintenance commitments},
  journal      = {Artif. Intell.},
  volume       = {317},
  pages        = {103864},
  year         = {2023},
  url          = {https://doi.org/10.1016/j.artint.2023.103864},
  doi          = {10.1016/J.ARTINT.2023.103864},
  timestamp    = {Sat, 11 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ai/ZhangDS23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/gecco/LangeSCZD00F23,
  author       = {Robert Tjarko Lange and
                  Tom Schaul and
                  Yutian Chen and
                  Tom Zahavy and
                  Valentin Dalibard and
                  Chris Lu and
                  Satinder Singh and
                  Sebastian Flennerhag},
  editor       = {Sara Silva and
                  Lu{\'{\i}}s Paquete},
  title        = {Discovering Evolution Strategies via Meta-Black-Box Optimization},
  booktitle    = {Companion Proceedings of the Conference on Genetic and Evolutionary
                  Computation, {GECCO} 2023, Companion Volume, Lisbon, Portugal, July
                  15-19, 2023},
  pages        = {29--30},
  publisher    = {{ACM}},
  year         = {2023},
  url          = {https://doi.org/10.1145/3583133.3595822},
  doi          = {10.1145/3583133.3595822},
  timestamp    = {Sat, 16 Dec 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/gecco/LangeSCZD00F23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/CarvalhoFLL023,
  author       = {Wilka Carvalho and
                  Angelos Filos and
                  Richard L. Lewis and
                  Honglak Lee and
                  Satinder Singh},
  title        = {Composing Task Knowledge With Modular Successor Feature Approximators},
  booktitle    = {The Eleventh International Conference on Learning Representations,
                  {ICLR} 2023, Kigali, Rwanda, May 1-5, 2023},
  publisher    = {OpenReview.net},
  year         = {2023},
  url          = {https://openreview.net/pdf?id=DrtSx1z40Ib},
  timestamp    = {Fri, 30 Jun 2023 14:38:38 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/CarvalhoFLL023.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/LangeSCZD00F23,
  author       = {Robert Tjarko Lange and
                  Tom Schaul and
                  Yutian Chen and
                  Tom Zahavy and
                  Valentin Dalibard and
                  Chris Lu and
                  Satinder Singh and
                  Sebastian Flennerhag},
  title        = {Discovering Evolution Strategies via Meta-Black-Box Optimization},
  booktitle    = {The Eleventh International Conference on Learning Representations,
                  {ICLR} 2023, Kigali, Rwanda, May 1-5, 2023},
  publisher    = {OpenReview.net},
  year         = {2023},
  url          = {https://openreview.net/pdf?id=mFDU0fP3EQH},
  timestamp    = {Sat, 16 Dec 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/iclr/LangeSCZD00F23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/LaskinWOPSSSHFB23,
  author       = {Michael Laskin and
                  Luyu Wang and
                  Junhyuk Oh and
                  Emilio Parisotto and
                  Stephen Spencer and
                  Richie Steigerwald and
                  DJ Strouse and
                  Steven Stenberg Hansen and
                  Angelos Filos and
                  Ethan A. Brooks and
                  Maxime Gazeau and
                  Himanshu Sahni and
                  Satinder Singh and
                  Volodymyr Mnih},
  title        = {In-context Reinforcement Learning with Algorithm Distillation},
  booktitle    = {The Eleventh International Conference on Learning Representations,
                  {ICLR} 2023, Kigali, Rwanda, May 1-5, 2023},
  publisher    = {OpenReview.net},
  year         = {2023},
  url          = {https://openreview.net/pdf?id=hy0a5MMPUv},
  timestamp    = {Fri, 30 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/LaskinWOPSSSHFB23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/ZahavySBBFH023,
  author       = {Tom Zahavy and
                  Yannick Schroecker and
                  Feryal M. P. Behbahani and
                  Kate Baumli and
                  Sebastian Flennerhag and
                  Shaobo Hou and
                  Satinder Singh},
  title        = {Discovering Policies with DOMiNO: Diversity Optimization Maintaining
                  Near Optimality},
  booktitle    = {The Eleventh International Conference on Learning Representations,
                  {ICLR} 2023, Kigali, Rwanda, May 1-5, 2023},
  publisher    = {OpenReview.net},
  year         = {2023},
  url          = {https://openreview.net/pdf?id=kjkdzBW3b8p},
  timestamp    = {Fri, 30 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/ZahavySBBFH023.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/BauerBBBBCCCDGG23,
  author       = {Jakob Bauer and
                  Kate Baumli and
                  Feryal M. P. Behbahani and
                  Avishkar Bhoopchand and
                  Nathalie Bradley{-}Schmieg and
                  Michael Chang and
                  Natalie Clay and
                  Adrian Collister and
                  Vibhavari Dasagi and
                  Lucy Gonzalez and
                  Karol Gregor and
                  Edward Hughes and
                  Sheleem Kashem and
                  Maria Loks{-}Thompson and
                  Hannah Openshaw and
                  Jack Parker{-}Holder and
                  Shreya Pathak and
                  Nicolas Perez Nieves and
                  Nemanja Rakicevic and
                  Tim Rockt{\"{a}}schel and
                  Yannick Schroecker and
                  Satinder Singh and
                  Jakub Sygnowski and
                  Karl Tuyls and
                  Sarah York and
                  Alexander Zacherl and
                  Lei M. Zhang},
  editor       = {Andreas Krause and
                  Emma Brunskill and
                  Kyunghyun Cho and
                  Barbara Engelhardt and
                  Sivan Sabato and
                  Jonathan Scarlett},
  title        = {Human-Timescale Adaptation in an Open-Ended Task Space},
  booktitle    = {International Conference on Machine Learning, {ICML} 2023, 23-29 July
                  2023, Honolulu, Hawaii, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {202},
  pages        = {1887--1935},
  publisher    = {{PMLR}},
  year         = {2023},
  url          = {https://proceedings.mlr.press/v202/bauer23a.html},
  timestamp    = {Mon, 28 Aug 2023 17:23:08 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/BauerBBBBCCCDGG23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/MoskovitzOVF0Z23,
  author       = {Ted Moskovitz and
                  Brendan O'Donoghue and
                  Vivek Veeriah and
                  Sebastian Flennerhag and
                  Satinder Singh and
                  Tom Zahavy},
  editor       = {Andreas Krause and
                  Emma Brunskill and
                  Kyunghyun Cho and
                  Barbara Engelhardt and
                  Sivan Sabato and
                  Jonathan Scarlett},
  title        = {ReLOAD: Reinforcement Learning with Optimistic Ascent-Descent for
                  Last-Iterate Convergence in Constrained MDPs},
  booktitle    = {International Conference on Machine Learning, {ICML} 2023, 23-29 July
                  2023, Honolulu, Hawaii, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {202},
  pages        = {25303--25336},
  publisher    = {{PMLR}},
  year         = {2023},
  url          = {https://proceedings.mlr.press/v202/moskovitz23a.html},
  timestamp    = {Mon, 28 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/MoskovitzOVF0Z23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/0001SGPF0B23,
  author       = {Chris Lu and
                  Yannick Schroecker and
                  Albert Gu and
                  Emilio Parisotto and
                  Jakob N. Foerster and
                  Satinder Singh and
                  Feryal M. P. Behbahani},
  editor       = {Alice Oh and
                  Tristan Naumann and
                  Amir Globerson and
                  Kate Saenko and
                  Moritz Hardt and
                  Sergey Levine},
  title        = {Structured State Space Models for In-Context Reinforcement Learning},
  booktitle    = {Advances in Neural Information Processing Systems 36: Annual Conference
                  on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans,
                  LA, USA, December 10 - 16, 2023},
  year         = {2023},
  url          = {http://papers.nips.cc/paper\_files/paper/2023/hash/92d3d2a9801211ca3693ccb2faa1316f-Abstract-Conference.html},
  timestamp    = {Fri, 01 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/0001SGPF0B23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/BrooksWL023,
  author       = {Ethan A. Brooks and
                  Logan Walls and
                  Richard L. Lewis and
                  Satinder Singh},
  editor       = {Alice Oh and
                  Tristan Naumann and
                  Amir Globerson and
                  Kate Saenko and
                  Moritz Hardt and
                  Sergey Levine},
  title        = {Large Language Models can Implement Policy Iteration},
  booktitle    = {Advances in Neural Information Processing Systems 36: Annual Conference
                  on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans,
                  LA, USA, December 10 - 16, 2023},
  year         = {2023},
  url          = {http://papers.nips.cc/paper\_files/paper/2023/hash/60dc7fa827f5f761ad481e2ad40b5573-Abstract-Conference.html},
  timestamp    = {Fri, 01 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/BrooksWL023.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/Carvalho0FLMLL023,
  author       = {Wilka Carvalho and
                  Andre Saraiva and
                  Angelos Filos and
                  Andrew K. Lampinen and
                  Loic Matthey and
                  Richard L. Lewis and
                  Honglak Lee and
                  Satinder Singh and
                  Danilo Jimenez Rezende and
                  Daniel Zoran},
  editor       = {Alice Oh and
                  Tristan Naumann and
                  Amir Globerson and
                  Kate Saenko and
                  Moritz Hardt and
                  Sergey Levine},
  title        = {Combining Behaviors with the Successor Features Keyboard},
  booktitle    = {Advances in Neural Information Processing Systems 36: Annual Conference
                  on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans,
                  LA, USA, December 10 - 16, 2023},
  year         = {2023},
  url          = {http://papers.nips.cc/paper\_files/paper/2023/hash/1f69928210578f4cf5b538a8c8806798-Abstract-Conference.html},
  timestamp    = {Fri, 01 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/Carvalho0FLMLL023.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2301-03236,
  author       = {Sebastian Flennerhag and
                  Tom Zahavy and
                  Brendan O'Donoghue and
                  Hado van Hasselt and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Satinder Singh},
  title        = {Optimistic Meta-Gradients},
  journal      = {CoRR},
  volume       = {abs/2301.03236},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2301.03236},
  doi          = {10.48550/ARXIV.2301.03236},
  eprinttype    = {arXiv},
  eprint       = {2301.03236},
  timestamp    = {Tue, 10 Jan 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2301-03236.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2301-12305,
  author       = {Wilka Carvalho and
                  Angelos Filos and
                  Richard L. Lewis and
                  Honglak Lee and
                  Satinder Singh},
  title        = {Composing Task Knowledge with Modular Successor Feature Approximators},
  journal      = {CoRR},
  volume       = {abs/2301.12305},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2301.12305},
  doi          = {10.48550/ARXIV.2301.12305},
  eprinttype    = {arXiv},
  eprint       = {2301.12305},
  timestamp    = {Wed, 01 Feb 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2301-12305.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2302-01275,
  author       = {Ted Moskovitz and
                  Brendan O'Donoghue and
                  Vivek Veeriah and
                  Sebastian Flennerhag and
                  Satinder Singh and
                  Tom Zahavy},
  title        = {ReLOAD: Reinforcement Learning with Optimistic Ascent-Descent for
                  Last-Iterate Convergence in Constrained MDPs},
  journal      = {CoRR},
  volume       = {abs/2302.01275},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2302.01275},
  doi          = {10.48550/ARXIV.2302.01275},
  eprinttype    = {arXiv},
  eprint       = {2302.01275},
  timestamp    = {Thu, 09 Feb 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2302-01275.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2302-14451,
  author       = {Bernardo {\'{A}}vila Pires and
                  Feryal M. P. Behbahani and
                  Hubert Soyer and
                  Kyriacos Nikiforou and
                  Thomas Keck and
                  Satinder Singh},
  title        = {Hierarchical Reinforcement Learning in Complex 3D Environments},
  journal      = {CoRR},
  volume       = {abs/2302.14451},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2302.14451},
  doi          = {10.48550/ARXIV.2302.14451},
  eprinttype    = {arXiv},
  eprint       = {2302.14451},
  timestamp    = {Fri, 03 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2302-14451.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2303-03982,
  author       = {Chris Lu and
                  Yannick Schroecker and
                  Albert Gu and
                  Emilio Parisotto and
                  Jakob N. Foerster and
                  Satinder Singh and
                  Feryal M. P. Behbahani},
  title        = {Structured State Space Models for In-Context Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2303.03982},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2303.03982},
  doi          = {10.48550/ARXIV.2303.03982},
  eprinttype    = {arXiv},
  eprint       = {2303.03982},
  timestamp    = {Tue, 18 Apr 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2303-03982.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2307-11044,
  author       = {David Abel and
                  Andr{\'{e}} Barreto and
                  Hado van Hasselt and
                  Benjamin Van Roy and
                  Doina Precup and
                  Satinder Singh},
  title        = {On the Convergence of Bounded Agents},
  journal      = {CoRR},
  volume       = {abs/2307.11044},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2307.11044},
  doi          = {10.48550/ARXIV.2307.11044},
  eprinttype    = {arXiv},
  eprint       = {2307.11044},
  timestamp    = {Wed, 26 Jul 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2307-11044.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2307-11046,
  author       = {David Abel and
                  Andr{\'{e}} Barreto and
                  Benjamin Van Roy and
                  Doina Precup and
                  Hado van Hasselt and
                  Satinder Singh},
  title        = {A Definition of Continual Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2307.11046},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2307.11046},
  doi          = {10.48550/ARXIV.2307.11046},
  eprinttype    = {arXiv},
  eprint       = {2307.11046},
  timestamp    = {Wed, 26 Jul 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2307-11046.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2308-09175,
  author       = {Tom Zahavy and
                  Vivek Veeriah and
                  Shaobo Hou and
                  Kevin Waugh and
                  Matthew Lai and
                  Edouard Leurent and
                  Nenad Tomasev and
                  Lisa Schut and
                  Demis Hassabis and
                  Satinder Singh},
  title        = {Diversifying {AI:} Towards Creative Chess with AlphaZero},
  journal      = {CoRR},
  volume       = {abs/2308.09175},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2308.09175},
  doi          = {10.48550/ARXIV.2308.09175},
  eprinttype    = {arXiv},
  eprint       = {2308.09175},
  timestamp    = {Fri, 25 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2308-09175.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2310-15940,
  author       = {Wilka Carvalho and
                  Andre Saraiva and
                  Angelos Filos and
                  Andrew Kyle Lampinen and
                  Loic Matthey and
                  Richard L. Lewis and
                  Honglak Lee and
                  Satinder Singh and
                  Danilo J. Rezende and
                  Daniel Zoran},
  title        = {Combining Behaviors with the Successor Features Keyboard},
  journal      = {CoRR},
  volume       = {abs/2310.15940},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2310.15940},
  doi          = {10.48550/ARXIV.2310.15940},
  eprinttype    = {arXiv},
  eprint       = {2310.15940},
  timestamp    = {Tue, 31 Oct 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2310-15940.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/ZhengVL022,
  author       = {Zeyu Zheng and
                  Risto Vuorio and
                  Richard L. Lewis and
                  Satinder Singh},
  title        = {Adaptive Pairwise Weights for Temporal Credit Assignment},
  booktitle    = {Thirty-Sixth {AAAI} Conference on Artificial Intelligence, {AAAI}
                  2022, Thirty-Fourth Conference on Innovative Applications of Artificial
                  Intelligence, {IAAI} 2022, The Twelveth Symposium on Educational Advances
                  in Artificial Intelligence, {EAAI} 2022 Virtual Event, February 22
                  - March 1, 2022},
  pages        = {9225--9232},
  publisher    = {{AAAI} Press},
  year         = {2022},
  url          = {https://doi.org/10.1609/aaai.v36i8.20909},
  doi          = {10.1609/AAAI.V36I8.20909},
  timestamp    = {Mon, 04 Sep 2023 12:29:24 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/ZhengVL022.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/collas/LuketinaFSAZ022,
  author       = {Jelena Luketina and
                  Sebastian Flennerhag and
                  Yannick Schroecker and
                  David Abel and
                  Tom Zahavy and
                  Satinder Singh},
  editor       = {Sarath Chandar and
                  Razvan Pascanu and
                  Doina Precup},
  title        = {Meta-Gradients in Non-Stationary Environments},
  booktitle    = {Conference on Lifelong Learning Agents, CoLLAs 2022, 22-24 August
                  2022, McGill University, Montr{\'{e}}al, Qu{\'{e}}bec, Canada},
  series       = {Proceedings of Machine Learning Research},
  volume       = {199},
  pages        = {886--901},
  publisher    = {{PMLR}},
  year         = {2022},
  url          = {https://proceedings.mlr.press/v199/luketina22a.html},
  timestamp    = {Fri, 17 Feb 2023 16:29:10 +0100},
  biburl       = {https://dblp.org/rec/conf/collas/LuketinaFSAZ022.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/FlennerhagSZHS022,
  author       = {Sebastian Flennerhag and
                  Yannick Schroecker and
                  Tom Zahavy and
                  Hado van Hasselt and
                  David Silver and
                  Satinder Singh},
  title        = {Bootstrapped Meta-Learning},
  booktitle    = {The Tenth International Conference on Learning Representations, {ICLR}
                  2022, Virtual Event, April 25-29, 2022},
  publisher    = {OpenReview.net},
  year         = {2022},
  url          = {https://openreview.net/forum?id=b-ny3x071E5},
  timestamp    = {Sat, 20 Aug 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/FlennerhagSZHS022.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/AbelDHHLP022,
  author       = {David Abel and
                  Will Dabney and
                  Anna Harutyunyan and
                  Mark K. Ho and
                  Michael L. Littman and
                  Doina Precup and
                  Satinder Singh},
  editor       = {Luc De Raedt},
  title        = {On the Expressivity of Markov Reward (Extended Abstract)},
  booktitle    = {Proceedings of the Thirty-First International Joint Conference on
                  Artificial Intelligence, {IJCAI} 2022, Vienna, Austria, 23-29 July
                  2022},
  pages        = {5254--5258},
  publisher    = {ijcai.org},
  year         = {2022},
  url          = {https://doi.org/10.24963/ijcai.2022/730},
  doi          = {10.24963/IJCAI.2022/730},
  timestamp    = {Wed, 27 Jul 2022 16:43:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/AbelDHHLP022.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/Arumugam022,
  author       = {Dilip Arumugam and
                  Satinder Singh},
  editor       = {Sanmi Koyejo and
                  S. Mohamed and
                  A. Agarwal and
                  Danielle Belgrave and
                  K. Cho and
                  A. Oh},
  title        = {Planning to the Information Horizon of BAMDPs via Epistemic State
                  Abstraction},
  booktitle    = {Advances in Neural Information Processing Systems 35: Annual Conference
                  on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans,
                  LA, USA, November 28 - December 9, 2022},
  year         = {2022},
  url          = {http://papers.nips.cc/paper\_files/paper/2022/hash/80b7bec60081f95d900973509744a306-Abstract-Conference.html},
  timestamp    = {Mon, 08 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/Arumugam022.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/GrimmBS22,
  author       = {Christopher Grimm and
                  Andr{\'{e}} Barreto and
                  Satinder Singh},
  editor       = {Sanmi Koyejo and
                  S. Mohamed and
                  A. Agarwal and
                  Danielle Belgrave and
                  K. Cho and
                  A. Oh},
  title        = {Approximate Value Equivalence},
  booktitle    = {Advances in Neural Information Processing Systems 35: Annual Conference
                  on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans,
                  LA, USA, November 28 - December 9, 2022},
  year         = {2022},
  url          = {http://papers.nips.cc/paper\_files/paper/2022/hash/d53538ba21c05fa361d2b21704172753-Abstract-Conference.html},
  timestamp    = {Mon, 08 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/GrimmBS22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/LiuZM022,
  author       = {Hao Liu and
                  Tom Zahavy and
                  Volodymyr Mnih and
                  Satinder Singh},
  editor       = {Sanmi Koyejo and
                  S. Mohamed and
                  A. Agarwal and
                  Danielle Belgrave and
                  K. Cho and
                  A. Oh},
  title        = {Palm up: Playing in the Latent Manifold for Unsupervised Pretraining},
  booktitle    = {Advances in Neural Information Processing Systems 35: Annual Conference
                  on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans,
                  LA, USA, November 28 - December 9, 2022},
  year         = {2022},
  url          = {http://papers.nips.cc/paper\_files/paper/2022/hash/e92381dba235a8309f08ce46376189a9-Abstract-Conference.html},
  timestamp    = {Mon, 08 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/LiuZM022.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2202-04772,
  author       = {Vivek Veeriah and
                  Zeyu Zheng and
                  Richard L. Lewis and
                  Satinder Singh},
  title        = {GrASP: Gradient-Based Affordance Selection for Planning},
  journal      = {CoRR},
  volume       = {abs/2202.04772},
  year         = {2022},
  url          = {https://arxiv.org/abs/2202.04772},
  eprinttype    = {arXiv},
  eprint       = {2202.04772},
  timestamp    = {Wed, 20 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2202-04772.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2205-13521,
  author       = {Tom Zahavy and
                  Yannick Schroecker and
                  Feryal M. P. Behbahani and
                  Kate Baumli and
                  Sebastian Flennerhag and
                  Shaobo Hou and
                  Satinder Singh},
  title        = {Discovering Policies with DOMiNO: Diversity Optimization Maintaining
                  Near Optimality},
  journal      = {CoRR},
  volume       = {abs/2205.13521},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2205.13521},
  doi          = {10.48550/ARXIV.2205.13521},
  eprinttype    = {arXiv},
  eprint       = {2205.13521},
  timestamp    = {Mon, 02 Jan 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2205-13521.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2206-15378,
  author       = {Julien P{\'{e}}rolat and
                  Bart De Vylder and
                  Daniel Hennes and
                  Eugene Tarassov and
                  Florian Strub and
                  Vincent de Boer and
                  Paul Muller and
                  Jerome T. Connor and
                  Neil Burch and
                  Thomas W. Anthony and
                  Stephen McAleer and
                  Romuald Elie and
                  Sarah H. Cen and
                  Zhe Wang and
                  Audrunas Gruslys and
                  Aleksandra Malysheva and
                  Mina Khan and
                  Sherjil Ozair and
                  Finbarr Timbers and
                  Toby Pohlen and
                  Tom Eccles and
                  Mark Rowland and
                  Marc Lanctot and
                  Jean{-}Baptiste Lespiau and
                  Bilal Piot and
                  Shayegan Omidshafiei and
                  Edward Lockhart and
                  Laurent Sifre and
                  Nathalie Beauguerlange and
                  R{\'{e}}mi Munos and
                  David Silver and
                  Satinder Singh and
                  Demis Hassabis and
                  Karl Tuyls},
  title        = {Mastering the Game of Stratego with Model-Free Multiagent Reinforcement
                  Learning},
  journal      = {CoRR},
  volume       = {abs/2206.15378},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2206.15378},
  doi          = {10.48550/ARXIV.2206.15378},
  eprinttype    = {arXiv},
  eprint       = {2206.15378},
  timestamp    = {Wed, 28 Sep 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2206-15378.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2209-06159,
  author       = {Jelena Luketina and
                  Sebastian Flennerhag and
                  Yannick Schroecker and
                  David Abel and
                  Tom Zahavy and
                  Satinder Singh},
  title        = {Meta-Gradients in Non-Stationary Environments},
  journal      = {CoRR},
  volume       = {abs/2209.06159},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2209.06159},
  doi          = {10.48550/ARXIV.2209.06159},
  eprinttype    = {arXiv},
  eprint       = {2209.06159},
  timestamp    = {Tue, 27 Sep 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2209-06159.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2210-03821,
  author       = {Ethan A. Brooks and
                  Logan Walls and
                  Richard L. Lewis and
                  Satinder Singh},
  title        = {In-Context Policy Iteration},
  journal      = {CoRR},
  volume       = {abs/2210.03821},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2210.03821},
  doi          = {10.48550/ARXIV.2210.03821},
  eprinttype    = {arXiv},
  eprint       = {2210.03821},
  timestamp    = {Wed, 12 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2210-03821.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2210-10913,
  author       = {Hao Liu and
                  Tom Zahavy and
                  Volodymyr Mnih and
                  Satinder Singh},
  title        = {Palm up: Playing in the Latent Manifold for Unsupervised Pretraining},
  journal      = {CoRR},
  volume       = {abs/2210.10913},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2210.10913},
  doi          = {10.48550/ARXIV.2210.10913},
  eprinttype    = {arXiv},
  eprint       = {2210.10913},
  timestamp    = {Tue, 25 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2210-10913.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2210-14215,
  author       = {Michael Laskin and
                  Luyu Wang and
                  Junhyuk Oh and
                  Emilio Parisotto and
                  Stephen Spencer and
                  Richie Steigerwald and
                  DJ Strouse and
                  Steven Hansen and
                  Angelos Filos and
                  Ethan A. Brooks and
                  Maxime Gazeau and
                  Himanshu Sahni and
                  Satinder Singh and
                  Volodymyr Mnih},
  title        = {In-context Reinforcement Learning with Algorithm Distillation},
  journal      = {CoRR},
  volume       = {abs/2210.14215},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2210.14215},
  doi          = {10.48550/ARXIV.2210.14215},
  eprinttype    = {arXiv},
  eprint       = {2210.14215},
  timestamp    = {Wed, 19 Apr 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2210-14215.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2210-16872,
  author       = {Dilip Arumugam and
                  Satinder Singh},
  title        = {Planning to the Information Horizon of BAMDPs via Epistemic State
                  Abstraction},
  journal      = {CoRR},
  volume       = {abs/2210.16872},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2210.16872},
  doi          = {10.48550/ARXIV.2210.16872},
  eprinttype    = {arXiv},
  eprint       = {2210.16872},
  timestamp    = {Fri, 09 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2210-16872.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2211-11260,
  author       = {Robert Tjarko Lange and
                  Tom Schaul and
                  Yutian Chen and
                  Tom Zahavy and
                  Valentin Dallibard and
                  Chris Lu and
                  Satinder Singh and
                  Sebastian Flennerhag},
  title        = {Discovering Evolution Strategies via Meta-Black-Box Optimization},
  journal      = {CoRR},
  volume       = {abs/2211.11260},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2211.11260},
  doi          = {10.48550/ARXIV.2211.11260},
  eprinttype    = {arXiv},
  eprint       = {2211.11260},
  timestamp    = {Sat, 16 Dec 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2211-11260.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2212-14530,
  author       = {Khimya Khetarpal and
                  Claire Vernade and
                  Brendan O'Donoghue and
                  Satinder Singh and
                  Tom Zahavy},
  title        = {{POMRL:} No-Regret Learning-to-Plan with Increasing Horizons},
  journal      = {CoRR},
  volume       = {abs/2212.14530},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2212.14530},
  doi          = {10.48550/ARXIV.2212.14530},
  eprinttype    = {arXiv},
  eprint       = {2212.14530},
  timestamp    = {Sun, 08 Jan 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2212-14530.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ai/SilverSPS21,
  author       = {David Silver and
                  Satinder Singh and
                  Doina Precup and
                  Richard S. Sutton},
  title        = {Reward is enough},
  journal      = {Artif. Intell.},
  volume       = {299},
  pages        = {103535},
  year         = {2021},
  url          = {https://doi.org/10.1016/j.artint.2021.103535},
  doi          = {10.1016/J.ARTINT.2021.103535},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ai/SilverSPS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/0038DS21,
  author       = {Qi Zhang and
                  Edmund H. Durfee and
                  Satinder Singh},
  title        = {Efficient Querying for Cooperative Probabilistic Commitments},
  booktitle    = {Thirty-Fifth {AAAI} Conference on Artificial Intelligence, {AAAI}
                  2021, Thirty-Third Conference on Innovative Applications of Artificial
                  Intelligence, {IAAI} 2021, The Eleventh Symposium on Educational Advances
                  in Artificial Intelligence, {EAAI} 2021, Virtual Event, February 2-9,
                  2021},
  pages        = {11378--11386},
  publisher    = {{AAAI} Press},
  year         = {2021},
  url          = {https://doi.org/10.1609/aaai.v35i13.17356},
  doi          = {10.1609/AAAI.V35I13.17356},
  timestamp    = {Mon, 04 Sep 2023 12:29:24 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/0038DS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/ZahavyBMHOKS21,
  author       = {Tom Zahavy and
                  Andr{\'{e}} Barreto and
                  Daniel J. Mankowitz and
                  Shaobo Hou and
                  Brendan O'Donoghue and
                  Iurii Kemaev and
                  Satinder Singh},
  title        = {Discovering a set of policies for the worst case reward},
  booktitle    = {9th International Conference on Learning Representations, {ICLR} 2021,
                  Virtual Event, Austria, May 3-7, 2021},
  publisher    = {OpenReview.net},
  year         = {2021},
  url          = {https://openreview.net/forum?id=PUkhWz65dy5},
  timestamp    = {Wed, 20 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/ZahavyBMHOKS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/BrooksRLS21,
  author       = {Ethan A. Brooks and
                  Janarthanan Rajendran and
                  Richard L. Lewis and
                  Satinder Singh},
  editor       = {Marina Meila and
                  Tong Zhang},
  title        = {Reinforcement Learning of Implicit and Explicit Control Flow Instructions},
  booktitle    = {Proceedings of the 38th International Conference on Machine Learning,
                  {ICML} 2021, 18-24 July 2021, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {139},
  pages        = {1082--1091},
  publisher    = {{PMLR}},
  year         = {2021},
  url          = {http://proceedings.mlr.press/v139/brooks21a.html},
  timestamp    = {Wed, 20 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/BrooksRLS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/CarvalhoLLSLLS21,
  author       = {Wilka Carvalho and
                  Anthony Liang and
                  Kimin Lee and
                  Sungryull Sohn and
                  Honglak Lee and
                  Richard L. Lewis and
                  Satinder Singh},
  editor       = {Zhi{-}Hua Zhou},
  title        = {Reinforcement Learning for Sparse-Reward Object-Interaction Tasks
                  in a First-person Simulated 3D Environment},
  booktitle    = {Proceedings of the Thirtieth International Joint Conference on Artificial
                  Intelligence, {IJCAI} 2021, Virtual Event / Montreal, Canada, 19-27
                  August 2021},
  pages        = {2219--2226},
  publisher    = {ijcai.org},
  year         = {2021},
  url          = {https://doi.org/10.24963/ijcai.2021/306},
  doi          = {10.24963/IJCAI.2021/306},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/CarvalhoLLSLLS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/GrimmBFSS21,
  author       = {Christopher Grimm and
                  Andr{\'{e}} Barreto and
                  Gregory Farquhar and
                  David Silver and
                  Satinder Singh},
  editor       = {Marc'Aurelio Ranzato and
                  Alina Beygelzimer and
                  Yann N. Dauphin and
                  Percy Liang and
                  Jennifer Wortman Vaughan},
  title        = {Proper Value Equivalence},
  booktitle    = {Advances in Neural Information Processing Systems 34: Annual Conference
                  on Neural Information Processing Systems 2021, NeurIPS 2021, December
                  6-14, 2021, virtual},
  pages        = {7773--7786},
  year         = {2021},
  url          = {https://proceedings.neurips.cc/paper/2021/hash/400e5e6a7ce0c754f281525fae75a873-Abstract.html},
  timestamp    = {Tue, 03 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/GrimmBFSS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/AbelDHHLPS21,
  author       = {David Abel and
                  Will Dabney and
                  Anna Harutyunyan and
                  Mark K. Ho and
                  Michael L. Littman and
                  Doina Precup and
                  Satinder Singh},
  editor       = {Marc'Aurelio Ranzato and
                  Alina Beygelzimer and
                  Yann N. Dauphin and
                  Percy Liang and
                  Jennifer Wortman Vaughan},
  title        = {On the Expressivity of Markov Reward},
  booktitle    = {Advances in Neural Information Processing Systems 34: Annual Conference
                  on Neural Information Processing Systems 2021, NeurIPS 2021, December
                  6-14, 2021, virtual},
  pages        = {7799--7812},
  year         = {2021},
  url          = {https://proceedings.neurips.cc/paper/2021/hash/4079016d940210b4ae9ae7d41c4a2065-Abstract.html},
  timestamp    = {Tue, 03 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/AbelDHHLPS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/ZhengVVLS21,
  author       = {Zeyu Zheng and
                  Vivek Veeriah and
                  Risto Vuorio and
                  Richard L. Lewis and
                  Satinder Singh},
  editor       = {Marc'Aurelio Ranzato and
                  Alina Beygelzimer and
                  Yann N. Dauphin and
                  Percy Liang and
                  Jennifer Wortman Vaughan},
  title        = {Learning State Representations from Random Deep Action-conditional
                  Predictions},
  booktitle    = {Advances in Neural Information Processing Systems 34: Annual Conference
                  on Neural Information Processing Systems 2021, NeurIPS 2021, December
                  6-14, 2021, virtual},
  pages        = {23679--23691},
  year         = {2021},
  url          = {https://proceedings.neurips.cc/paper/2021/hash/c71df24045cfddab4a963d3ac9bdc9a3-Abstract.html},
  timestamp    = {Tue, 03 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/ZhengVVLS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/ZahavyODS21,
  author       = {Tom Zahavy and
                  Brendan O'Donoghue and
                  Guillaume Desjardins and
                  Satinder Singh},
  editor       = {Marc'Aurelio Ranzato and
                  Alina Beygelzimer and
                  Yann N. Dauphin and
                  Percy Liang and
                  Jennifer Wortman Vaughan},
  title        = {Reward is enough for convex MDPs},
  booktitle    = {Advances in Neural Information Processing Systems 34: Annual Conference
                  on Neural Information Processing Systems 2021, NeurIPS 2021, December
                  6-14, 2021, virtual},
  pages        = {25746--25759},
  year         = {2021},
  url          = {https://proceedings.neurips.cc/paper/2021/hash/d7e4cdde82a894b8f633e6d61a01ef15-Abstract.html},
  timestamp    = {Tue, 03 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/ZahavyODS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/VeeriahZHXOKHSS21,
  author       = {Vivek Veeriah and
                  Tom Zahavy and
                  Matteo Hessel and
                  Zhongwen Xu and
                  Junhyuk Oh and
                  Iurii Kemaev and
                  Hado van Hasselt and
                  David Silver and
                  Satinder Singh},
  editor       = {Marc'Aurelio Ranzato and
                  Alina Beygelzimer and
                  Yann N. Dauphin and
                  Percy Liang and
                  Jennifer Wortman Vaughan},
  title        = {Discovery of Options via Meta-Learned Subgoals},
  booktitle    = {Advances in Neural Information Processing Systems 34: Annual Conference
                  on Neural Information Processing Systems 2021, NeurIPS 2021, December
                  6-14, 2021, virtual},
  pages        = {29861--29873},
  year         = {2021},
  url          = {https://proceedings.neurips.cc/paper/2021/hash/fa246d0262c3925617b0c72bb20eeb1d-Abstract.html},
  timestamp    = {Tue, 03 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/VeeriahZHXOKHSS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2102-04323,
  author       = {Tom Zahavy and
                  Andr{\'{e}} Barreto and
                  Daniel J. Mankowitz and
                  Shaobo Hou and
                  Brendan O'Donoghue and
                  Iurii Kemaev and
                  Satinder Singh},
  title        = {Discovering a set of policies for the worst case reward},
  journal      = {CoRR},
  volume       = {abs/2102.04323},
  year         = {2021},
  url          = {https://arxiv.org/abs/2102.04323},
  eprinttype    = {arXiv},
  eprint       = {2102.04323},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2102-04323.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2102-04897,
  author       = {Zeyu Zheng and
                  Vivek Veeriah and
                  Risto Vuorio and
                  Richard L. Lewis and
                  Satinder Singh},
  title        = {Learning State Representations from Random Deep Action-conditional
                  Predictions},
  journal      = {CoRR},
  volume       = {abs/2102.04897},
  year         = {2021},
  url          = {https://arxiv.org/abs/2102.04897},
  eprinttype    = {arXiv},
  eprint       = {2102.04897},
  timestamp    = {Wed, 20 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2102-04897.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2102-04999,
  author       = {Zeyu Zheng and
                  Risto Vuorio and
                  Richard L. Lewis and
                  Satinder Singh},
  title        = {Pairwise Weights for Temporal Credit Assignment},
  journal      = {CoRR},
  volume       = {abs/2102.04999},
  year         = {2021},
  url          = {https://arxiv.org/abs/2102.04999},
  eprinttype    = {arXiv},
  eprint       = {2102.04999},
  timestamp    = {Wed, 20 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2102-04999.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2102-06741,
  author       = {Vivek Veeriah and
                  Tom Zahavy and
                  Matteo Hessel and
                  Zhongwen Xu and
                  Junhyuk Oh and
                  Iurii Kemaev and
                  Hado van Hasselt and
                  David Silver and
                  Satinder Singh},
  title        = {Discovery of Options via Meta-Learned Subgoals},
  journal      = {CoRR},
  volume       = {abs/2102.06741},
  year         = {2021},
  url          = {https://arxiv.org/abs/2102.06741},
  eprinttype    = {arXiv},
  eprint       = {2102.06741},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2102-06741.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2102-13195,
  author       = {Ethan A. Brooks and
                  Janarthanan Rajendran and
                  Richard L. Lewis and
                  Satinder Singh},
  title        = {Reinforcement Learning of Implicit and Explicit Control Flow in Instructions},
  journal      = {CoRR},
  volume       = {abs/2102.13195},
  year         = {2021},
  url          = {https://arxiv.org/abs/2102.13195},
  eprinttype    = {arXiv},
  eprint       = {2102.13195},
  timestamp    = {Wed, 20 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2102-13195.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2106-00661,
  author       = {Tom Zahavy and
                  Brendan O'Donoghue and
                  Guillaume Desjardins and
                  Satinder Singh},
  title        = {Reward is enough for convex MDPs},
  journal      = {CoRR},
  volume       = {abs/2106.00661},
  year         = {2021},
  url          = {https://arxiv.org/abs/2106.00661},
  eprinttype    = {arXiv},
  eprint       = {2106.00661},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2106-00661.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2106-00669,
  author       = {Tom Zahavy and
                  Brendan O'Donoghue and
                  Andr{\'{e}} Barreto and
                  Volodymyr Mnih and
                  Sebastian Flennerhag and
                  Satinder Singh},
  title        = {Discovering Diverse Nearly Optimal Policies withSuccessor Features},
  journal      = {CoRR},
  volume       = {abs/2106.00669},
  year         = {2021},
  url          = {https://arxiv.org/abs/2106.00669},
  eprinttype    = {arXiv},
  eprint       = {2106.00669},
  timestamp    = {Wed, 20 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2106-00669.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2106-10316,
  author       = {Christopher Grimm and
                  Andr{\'{e}} Barreto and
                  Gregory Farquhar and
                  David Silver and
                  Satinder Singh},
  title        = {Proper Value Equivalence},
  journal      = {CoRR},
  volume       = {abs/2106.10316},
  year         = {2021},
  url          = {https://arxiv.org/abs/2106.10316},
  eprinttype    = {arXiv},
  eprint       = {2106.10316},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2106-10316.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2109-04504,
  author       = {Sebastian Flennerhag and
                  Yannick Schroecker and
                  Tom Zahavy and
                  Hado van Hasselt and
                  David Silver and
                  Satinder Singh},
  title        = {Bootstrapped Meta-Learning},
  journal      = {CoRR},
  volume       = {abs/2109.04504},
  year         = {2021},
  url          = {https://arxiv.org/abs/2109.04504},
  eprinttype    = {arXiv},
  eprint       = {2109.04504},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2109-04504.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2110-15724,
  author       = {Janarthanan Rajendran and
                  Jonathan K. Kummerfeld and
                  Satinder Singh},
  title        = {Learning to Learn End-to-End Goal-Oriented Dialog From Related Dialog
                  Tasks},
  journal      = {CoRR},
  volume       = {abs/2110.15724},
  year         = {2021},
  url          = {https://arxiv.org/abs/2110.15724},
  eprinttype    = {arXiv},
  eprint       = {2110.15724},
  timestamp    = {Wed, 20 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2110-15724.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2111-00876,
  author       = {David Abel and
                  Will Dabney and
                  Anna Harutyunyan and
                  Mark K. Ho and
                  Michael L. Littman and
                  Doina Precup and
                  Satinder Singh},
  title        = {On the Expressivity of Markov Reward},
  journal      = {CoRR},
  volume       = {abs/2111.00876},
  year         = {2021},
  url          = {https://arxiv.org/abs/2111.00876},
  eprinttype    = {arXiv},
  eprint       = {2111.00876},
  timestamp    = {Wed, 20 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2111-00876.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/aamas/ZhangDS20,
  author       = {Qi Zhang and
                  Edmund H. Durfee and
                  Satinder Singh},
  title        = {Semantics and algorithms for trustworthy commitment achievement under
                  model uncertainty},
  journal      = {Auton. Agents Multi Agent Syst.},
  volume       = {34},
  number       = {1},
  pages        = {19},
  year         = {2020},
  url          = {https://doi.org/10.1007/s10458-020-09443-0},
  doi          = {10.1007/S10458-020-09443-0},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/aamas/ZhangDS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/ZhangDS20,
  author       = {Shun Zhang and
                  Edmund H. Durfee and
                  Satinder Singh},
  title        = {Querying to Find a Safe Policy under Uncertain Safety Constraints
                  in Markov Decision Processes},
  booktitle    = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI}
                  2020, The Thirty-Second Innovative Applications of Artificial Intelligence
                  Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational
                  Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA,
                  February 7-12, 2020},
  pages        = {2552--2559},
  publisher    = {{AAAI} Press},
  year         = {2020},
  url          = {https://doi.org/10.1609/aaai.v34i03.5638},
  doi          = {10.1609/AAAI.V34I03.5638},
  timestamp    = {Sat, 21 Oct 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/ZhangDS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/RajendranLVLS20,
  author       = {Janarthanan Rajendran and
                  Richard L. Lewis and
                  Vivek Veeriah and
                  Honglak Lee and
                  Satinder Singh},
  title        = {How Should an Agent Practice?},
  booktitle    = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI}
                  2020, The Thirty-Second Innovative Applications of Artificial Intelligence
                  Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational
                  Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA,
                  February 7-12, 2020},
  pages        = {5454--5461},
  publisher    = {{AAAI} Press},
  year         = {2020},
  url          = {https://doi.org/10.1609/aaai.v34i04.5995},
  doi          = {10.1609/AAAI.V34I04.5995},
  timestamp    = {Mon, 04 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/RajendranLVLS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/0038DS20,
  author       = {Qi Zhang and
                  Edmund H. Durfee and
                  Satinder Singh},
  title        = {Modeling Probabilistic Commitments for Maintenance Is Inherently Harder
                  than for Achievement},
  booktitle    = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI}
                  2020, The Thirty-Second Innovative Applications of Artificial Intelligence
                  Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational
                  Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA,
                  February 7-12, 2020},
  pages        = {10326--10333},
  publisher    = {{AAAI} Press},
  year         = {2020},
  url          = {https://doi.org/10.1609/aaai.v34i06.6596},
  doi          = {10.1609/AAAI.V34I06.6596},
  timestamp    = {Mon, 04 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/0038DS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/0002JTS20,
  author       = {Aditya Modi and
                  Nan Jiang and
                  Ambuj Tewari and
                  Satinder Singh},
  editor       = {Silvia Chiappa and
                  Roberto Calandra},
  title        = {Sample Complexity of Reinforcement Learning using Linearly Combined
                  Model Ensembles},
  booktitle    = {The 23rd International Conference on Artificial Intelligence and Statistics,
                  {AISTATS} 2020, 26-28 August 2020, Online [Palermo, Sicily, Italy]},
  series       = {Proceedings of Machine Learning Research},
  volume       = {108},
  pages        = {2010--2020},
  publisher    = {{PMLR}},
  year         = {2020},
  url          = {http://proceedings.mlr.press/v108/modi20a.html},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/0002JTS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/OsbandDHASSMLSS20,
  author       = {Ian Osband and
                  Yotam Doron and
                  Matteo Hessel and
                  John Aslanides and
                  Eren Sezener and
                  Andre Saraiva and
                  Katrina McKinney and
                  Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri and
                  Satinder Singh and
                  Benjamin Van Roy and
                  Richard S. Sutton and
                  David Silver and
                  Hado van Hasselt},
  title        = {Behaviour Suite for Reinforcement Learning},
  booktitle    = {8th International Conference on Learning Representations, {ICLR} 2020,
                  Addis Ababa, Ethiopia, April 26-30, 2020},
  publisher    = {OpenReview.net},
  year         = {2020},
  url          = {https://openreview.net/forum?id=rygf-kSYwH},
  timestamp    = {Mon, 15 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/OsbandDHASSMLSS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/ZhengOHXKHSS20,
  author       = {Zeyu Zheng and
                  Junhyuk Oh and
                  Matteo Hessel and
                  Zhongwen Xu and
                  Manuel Kroiss and
                  Hado van Hasselt and
                  David Silver and
                  Satinder Singh},
  title        = {What Can Learned Intrinsic Rewards Capture?},
  booktitle    = {Proceedings of the 37th International Conference on Machine Learning,
                  {ICML} 2020, 13-18 July 2020, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {119},
  pages        = {11436--11446},
  publisher    = {{PMLR}},
  year         = {2020},
  url          = {http://proceedings.mlr.press/v119/zheng20b.html},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/ZhengOHXKHSS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/AnthonyETKGHPLP20,
  author       = {Thomas W. Anthony and
                  Tom Eccles and
                  Andrea Tacchetti and
                  J{\'{a}}nos Kram{\'{a}}r and
                  Ian Gemp and
                  Thomas C. Hudson and
                  Nicolas Porcel and
                  Marc Lanctot and
                  Julien P{\'{e}}rolat and
                  Richard Everett and
                  Satinder Singh and
                  Thore Graepel and
                  Yoram Bachrach},
  editor       = {Hugo Larochelle and
                  Marc'Aurelio Ranzato and
                  Raia Hadsell and
                  Maria{-}Florina Balcan and
                  Hsuan{-}Tien Lin},
  title        = {Learning to Play No-Press Diplomacy with Best Response Policy Iteration},
  booktitle    = {Advances in Neural Information Processing Systems 33: Annual Conference
                  on Neural Information Processing Systems 2020, NeurIPS 2020, December
                  6-12, 2020, virtual},
  year         = {2020},
  url          = {https://proceedings.neurips.cc/paper/2020/hash/d1419302db9c022ab1d48681b13d5f8b-Abstract.html},
  timestamp    = {Wed, 06 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/AnthonyETKGHPLP20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/GrimmBSS20,
  author       = {Christopher Grimm and
                  Andr{\'{e}} Barreto and
                  Satinder Singh and
                  David Silver},
  editor       = {Hugo Larochelle and
                  Marc'Aurelio Ranzato and
                  Raia Hadsell and
                  Maria{-}Florina Balcan and
                  Hsuan{-}Tien Lin},
  title        = {The Value Equivalence Principle for Model-Based Reinforcement Learning},
  booktitle    = {Advances in Neural Information Processing Systems 33: Annual Conference
                  on Neural Information Processing Systems 2020, NeurIPS 2020, December
                  6-12, 2020, virtual},
  year         = {2020},
  url          = {https://proceedings.neurips.cc/paper/2020/hash/3bb585ea00014b0e3ebe4c6dd165a358-Abstract.html},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/GrimmBSS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/OhHCXHSS20,
  author       = {Junhyuk Oh and
                  Matteo Hessel and
                  Wojciech M. Czarnecki and
                  Zhongwen Xu and
                  Hado van Hasselt and
                  Satinder Singh and
                  David Silver},
  editor       = {Hugo Larochelle and
                  Marc'Aurelio Ranzato and
                  Raia Hadsell and
                  Maria{-}Florina Balcan and
                  Hsuan{-}Tien Lin},
  title        = {Discovering Reinforcement Learning Algorithms},
  booktitle    = {Advances in Neural Information Processing Systems 33: Annual Conference
                  on Neural Information Processing Systems 2020, NeurIPS 2020, December
                  6-12, 2020, virtual},
  year         = {2020},
  url          = {https://proceedings.neurips.cc/paper/2020/hash/0b96d81f0494fde5428c7aea243c9157-Abstract.html},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/OhHCXHSS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/WenPIBRS20,
  author       = {Zheng Wen and
                  Doina Precup and
                  Morteza Ibrahimi and
                  Andr{\'{e}} Barreto and
                  Benjamin Van Roy and
                  Satinder Singh},
  editor       = {Hugo Larochelle and
                  Marc'Aurelio Ranzato and
                  Raia Hadsell and
                  Maria{-}Florina Balcan and
                  Hsuan{-}Tien Lin},
  title        = {On Efficiency in Hierarchical Reinforcement Learning},
  booktitle    = {Advances in Neural Information Processing Systems 33: Annual Conference
                  on Neural Information Processing Systems 2020, NeurIPS 2020, December
                  6-12, 2020, virtual},
  year         = {2020},
  url          = {https://proceedings.neurips.cc/paper/2020/hash/4a5cfa9281924139db466a8a19291aff-Abstract.html},
  timestamp    = {Thu, 31 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/WenPIBRS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/XuHHOSS20,
  author       = {Zhongwen Xu and
                  Hado Philip van Hasselt and
                  Matteo Hessel and
                  Junhyuk Oh and
                  Satinder Singh and
                  David Silver},
  editor       = {Hugo Larochelle and
                  Marc'Aurelio Ranzato and
                  Raia Hadsell and
                  Maria{-}Florina Balcan and
                  Hsuan{-}Tien Lin},
  title        = {Meta-Gradient Reinforcement Learning with an Objective Discovered
                  Online},
  booktitle    = {Advances in Neural Information Processing Systems 33: Annual Conference
                  on Neural Information Processing Systems 2020, NeurIPS 2020, December
                  6-12, 2020, virtual},
  year         = {2020},
  url          = {https://proceedings.neurips.cc/paper/2020/hash/ae3d525daf92cee0003a7f2d92c34ea3-Abstract.html},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/XuHHOSS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/ZahavyXVHOHSS20,
  author       = {Tom Zahavy and
                  Zhongwen Xu and
                  Vivek Veeriah and
                  Matteo Hessel and
                  Junhyuk Oh and
                  Hado van Hasselt and
                  David Silver and
                  Satinder Singh},
  editor       = {Hugo Larochelle and
                  Marc'Aurelio Ranzato and
                  Raia Hadsell and
                  Maria{-}Florina Balcan and
                  Hsuan{-}Tien Lin},
  title        = {A Self-Tuning Actor-Critic Algorithm},
  booktitle    = {Advances in Neural Information Processing Systems 33: Annual Conference
                  on Neural Information Processing Systems 2020, NeurIPS 2020, December
                  6-12, 2020, virtual},
  year         = {2020},
  url          = {https://proceedings.neurips.cc/paper/2020/hash/f02208a057804ee16ac72ff4d3cec53b-Abstract.html},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/ZahavyXVHOHSS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2002-12928,
  author       = {Tom Zahavy and
                  Zhongwen Xu and
                  Vivek Veeriah and
                  Matteo Hessel and
                  Junhyuk Oh and
                  Hado van Hasselt and
                  David Silver and
                  Satinder Singh},
  title        = {Self-Tuning Deep Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2002.12928},
  year         = {2020},
  url          = {https://arxiv.org/abs/2002.12928},
  eprinttype    = {arXiv},
  eprint       = {2002.12928},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2002-12928.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2006-04635,
  author       = {Thomas W. Anthony and
                  Tom Eccles and
                  Andrea Tacchetti and
                  J{\'{a}}nos Kram{\'{a}}r and
                  Ian Gemp and
                  Thomas C. Hudson and
                  Nicolas Porcel and
                  Marc Lanctot and
                  Julien P{\'{e}}rolat and
                  Richard Everett and
                  Satinder Singh and
                  Thore Graepel and
                  Yoram Bachrach},
  title        = {Learning to Play No-Press Diplomacy with Best Response Policy Iteration},
  journal      = {CoRR},
  volume       = {abs/2006.04635},
  year         = {2020},
  url          = {https://arxiv.org/abs/2006.04635},
  eprinttype    = {arXiv},
  eprint       = {2006.04635},
  timestamp    = {Wed, 06 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2006-04635.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2007-08433,
  author       = {Zhongwen Xu and
                  Hado van Hasselt and
                  Matteo Hessel and
                  Junhyuk Oh and
                  Satinder Singh and
                  David Silver},
  title        = {Meta-Gradient Reinforcement Learning with an Objective Discovered
                  Online},
  journal      = {CoRR},
  volume       = {abs/2007.08433},
  year         = {2020},
  url          = {https://arxiv.org/abs/2007.08433},
  eprinttype    = {arXiv},
  eprint       = {2007.08433},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2007-08433.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2007-08794,
  author       = {Junhyuk Oh and
                  Matteo Hessel and
                  Wojciech M. Czarnecki and
                  Zhongwen Xu and
                  Hado van Hasselt and
                  Satinder Singh and
                  David Silver},
  title        = {Discovering Reinforcement Learning Algorithms},
  journal      = {CoRR},
  volume       = {abs/2007.08794},
  year         = {2020},
  url          = {https://arxiv.org/abs/2007.08794},
  eprinttype    = {arXiv},
  eprint       = {2007.08794},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2007-08794.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2010-15195,
  author       = {Wilka Carvalho and
                  Anthony Liang and
                  Kimin Lee and
                  Sungryull Sohn and
                  Honglak Lee and
                  Richard L. Lewis and
                  Satinder Singh},
  title        = {Reinforcement Learning for Sparse-Reward Object-Interaction Tasks
                  in First-person Simulated 3D Environments},
  journal      = {CoRR},
  volume       = {abs/2010.15195},
  year         = {2020},
  url          = {https://arxiv.org/abs/2010.15195},
  eprinttype    = {arXiv},
  eprint       = {2010.15195},
  timestamp    = {Wed, 20 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2010-15195.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2011-03506,
  author       = {Christopher Grimm and
                  Andr{\'{e}} Barreto and
                  Satinder Singh and
                  David Silver},
  title        = {The Value Equivalence Principle for Model-Based Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2011.03506},
  year         = {2020},
  url          = {https://arxiv.org/abs/2011.03506},
  eprinttype    = {arXiv},
  eprint       = {2011.03506},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2011-03506.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2012-07195,
  author       = {Qi Zhang and
                  Edmund H. Durfee and
                  Satinder Singh},
  title        = {Efficient Querying for Cooperative Probabilistic Commitments},
  journal      = {CoRR},
  volume       = {abs/2012.07195},
  year         = {2020},
  url          = {https://arxiv.org/abs/2012.07195},
  eprinttype    = {arXiv},
  eprint       = {2012.07195},
  timestamp    = {Wed, 20 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2012-07195.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/ZhangLSD19,
  author       = {Qi Zhang and
                  Richard L. Lewis and
                  Satinder Singh and
                  Edmund H. Durfee},
  title        = {Learning to Communicate and Solve Visual Blocks-World Tasks},
  booktitle    = {The Thirty-Third {AAAI} Conference on Artificial Intelligence, {AAAI}
                  2019, The Thirty-First Innovative Applications of Artificial Intelligence
                  Conference, {IAAI} 2019, The Ninth {AAAI} Symposium on Educational
                  Advances in Artificial Intelligence, {EAAI} 2019, Honolulu, Hawaii,
                  USA, January 27 - February 1, 2019},
  pages        = {5781--5788},
  publisher    = {{AAAI} Press},
  year         = {2019},
  url          = {https://doi.org/10.1609/aaai.v33i01.33015781},
  doi          = {10.1609/AAAI.V33I01.33015781},
  timestamp    = {Mon, 04 Sep 2023 12:29:24 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/ZhangLSD19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icdm/HollerVQTJJSWY19,
  author       = {John Holler and
                  Risto Vuorio and
                  Zhiwei (Tony) Qin and
                  Xiaocheng Tang and
                  Yan Jiao and
                  Tiancheng Jin and
                  Satinder Singh and
                  Chenxi Wang and
                  Jieping Ye},
  editor       = {Jianyong Wang and
                  Kyuseok Shim and
                  Xindong Wu},
  title        = {Deep Reinforcement Learning for Multi-driver Vehicle Dispatching and
                  Repositioning Problem},
  booktitle    = {2019 {IEEE} International Conference on Data Mining, {ICDM} 2019,
                  Beijing, China, November 8-11, 2019},
  pages        = {1090--1095},
  publisher    = {{IEEE}},
  year         = {2019},
  url          = {https://doi.org/10.1109/ICDM.2019.00129},
  doi          = {10.1109/ICDM.2019.00129},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icdm/HollerVQTJJSWY19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/ZhangDS19,
  author       = {Qi Zhang and
                  Edmund H. Durfee and
                  Satinder Singh},
  editor       = {Hu{\'{a}}scar Espinoza and
                  Han Yu and
                  Xiaowei Huang and
                  Freddy L{\'{e}}cu{\'{e}} and
                  Cynthia Chen and
                  Jos{\'{e}} Hern{\'{a}}ndez{-}Orallo and
                  Se{\'{a}}n {\'{O}} h{\'{E}}igeartaigh and
                  Richard Mallah},
  title        = {Computational Strategies for the Trustworthy Pursuit and the Safe
                  Modeling of Probabilistic Maintenance Commitments},
  booktitle    = {Proceedings of the Workshop on Artificial Intelligence Safety 2019
                  co-located with the 28th International Joint Conference on Artificial
                  Intelligence, AISafety@IJCAI 2019, Macao, China, August 11-12, 2019},
  series       = {{CEUR} Workshop Proceedings},
  volume       = {2419},
  publisher    = {CEUR-WS.org},
  year         = {2019},
  url          = {https://ceur-ws.org/Vol-2419/paper\_8.pdf},
  timestamp    = {Fri, 10 Mar 2023 16:23:31 +0100},
  biburl       = {https://dblp.org/rec/conf/ijcai/ZhangDS19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/PaquetteLBSOKPS19,
  author       = {Philip Paquette and
                  Yuchen Lu and
                  Steven Bocco and
                  Max O. Smith and
                  Satya Ortiz{-}Gagne and
                  Jonathan K. Kummerfeld and
                  Joelle Pineau and
                  Satinder Singh and
                  Aaron C. Courville},
  editor       = {Hanna M. Wallach and
                  Hugo Larochelle and
                  Alina Beygelzimer and
                  Florence d'Alch{\'{e}}{-}Buc and
                  Emily B. Fox and
                  Roman Garnett},
  title        = {No-Press Diplomacy: Modeling Multi-Agent Gameplay},
  booktitle    = {Advances in Neural Information Processing Systems 32: Annual Conference
                  on Neural Information Processing Systems 2019, NeurIPS 2019, December
                  8-14, 2019, Vancouver, BC, Canada},
  pages        = {4476--4487},
  year         = {2019},
  url          = {https://proceedings.neurips.cc/paper/2019/hash/84b20b1f5a0d103f5710bb67a043cd78-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/PaquetteLBSOKPS19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/VeeriahHXRLOHSS19,
  author       = {Vivek Veeriah and
                  Matteo Hessel and
                  Zhongwen Xu and
                  Janarthanan Rajendran and
                  Richard L. Lewis and
                  Junhyuk Oh and
                  Hado van Hasselt and
                  David Silver and
                  Satinder Singh},
  editor       = {Hanna M. Wallach and
                  Hugo Larochelle and
                  Alina Beygelzimer and
                  Florence d'Alch{\'{e}}{-}Buc and
                  Emily B. Fox and
                  Roman Garnett},
  title        = {Discovery of Useful Questions as Auxiliary Tasks},
  booktitle    = {Advances in Neural Information Processing Systems 32: Annual Conference
                  on Neural Information Processing Systems 2019, NeurIPS 2019, December
                  8-14, 2019, Vancouver, BC, Canada},
  pages        = {9306--9317},
  year         = {2019},
  url          = {https://proceedings.neurips.cc/paper/2019/hash/10ff0b5e85e5b85cc3095d431d8c08b4-Abstract.html},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/VeeriahHXRLOHSS19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/HarutyunyanDMAP19,
  author       = {Anna Harutyunyan and
                  Will Dabney and
                  Thomas Mesnard and
                  Mohammad Gheshlaghi Azar and
                  Bilal Piot and
                  Nicolas Heess and
                  Hado van Hasselt and
                  Gregory Wayne and
                  Satinder Singh and
                  Doina Precup and
                  R{\'{e}}mi Munos},
  editor       = {Hanna M. Wallach and
                  Hugo Larochelle and
                  Alina Beygelzimer and
                  Florence d'Alch{\'{e}}{-}Buc and
                  Emily B. Fox and
                  Roman Garnett},
  title        = {Hindsight Credit Assignment},
  booktitle    = {Advances in Neural Information Processing Systems 32: Annual Conference
                  on Neural Information Processing Systems 2019, NeurIPS 2019, December
                  8-14, 2019, Vancouver, BC, Canada},
  pages        = {12467--12476},
  year         = {2019},
  url          = {https://proceedings.neurips.cc/paper/2019/hash/195f15384c2a79cedf293e4a847ce85c-Abstract.html},
  timestamp    = {Wed, 20 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/HarutyunyanDMAP19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ranlp/RajendranGGYSP19,
  author       = {Janarthanan Rajendran and
                  Jatin Ganhotra and
                  Xiaoxiao Guo and
                  Mo Yu and
                  Satinder Singh and
                  Lazaros Polymenakos},
  editor       = {Ruslan Mitkov and
                  Galia Angelova},
  title        = {NE-Table: {A} Neural key-value table for Named Entities},
  booktitle    = {Proceedings of the International Conference on Recent Advances in
                  Natural Language Processing, {RANLP} 2019, Varna, Bulgaria, September
                  2-4, 2019},
  pages        = {980--993},
  publisher    = {{INCOMA} Ltd.},
  year         = {2019},
  url          = {https://doi.org/10.26615/978-954-452-056-4\_114},
  doi          = {10.26615/978-954-452-056-4\_114},
  timestamp    = {Wed, 20 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ranlp/RajendranGGYSP19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@incollection{DBLP:series/lncs/PriestCSA019,
  author       = {Benjamin W. Priest and
                  George Cybenko and
                  Satinder Singh and
                  Massimiliano Albanese and
                  Peng Liu},
  editor       = {Sushil Jajodia and
                  George Cybenko and
                  Peng Liu and
                  Cliff Wang and
                  Michael P. Wellman},
  title        = {Online and Scalable Adaptive Cyber Defense},
  booktitle    = {Adversarial and Uncertain Reasoning for Adaptive Cyber Defense - Control-
                  and Game-Theoretic Approaches to Cyber Security},
  series       = {Lecture Notes in Computer Science},
  volume       = {11830},
  pages        = {232--261},
  publisher    = {Springer},
  year         = {2019},
  url          = {https://doi.org/10.1007/978-3-030-30719-6\_10},
  doi          = {10.1007/978-3-030-30719-6\_10},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/series/lncs/PriestCSA019.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1901-08649,
  author       = {Christopher Grimm and
                  Satinder Singh},
  title        = {Learning Independently-Obtainable Reward Functions},
  journal      = {CoRR},
  volume       = {abs/1901.08649},
  year         = {2019},
  url          = {http://arxiv.org/abs/1901.08649},
  eprinttype    = {arXiv},
  eprint       = {1901.08649},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1901-08649.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1908-03568,
  author       = {Ian Osband and
                  Yotam Doron and
                  Matteo Hessel and
                  John Aslanides and
                  Eren Sezener and
                  Andre Saraiva and
                  Katrina McKinney and
                  Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri and
                  Satinder Singh and
                  Benjamin Van Roy and
                  Richard S. Sutton and
                  David Silver and
                  Hado van Hasselt},
  title        = {Behaviour Suite for Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1908.03568},
  year         = {2019},
  url          = {http://arxiv.org/abs/1908.03568},
  eprinttype    = {arXiv},
  eprint       = {1908.03568},
  timestamp    = {Mon, 15 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1908-03568.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1909-02128,
  author       = {Philip Paquette and
                  Yuchen Lu and
                  Steven Bocco and
                  Max O. Smith and
                  Satya Ortiz{-}Gagne and
                  Jonathan K. Kummerfeld and
                  Satinder Singh and
                  Joelle Pineau and
                  Aaron C. Courville},
  title        = {No Press Diplomacy: Modeling Multi-Agent Gameplay},
  journal      = {CoRR},
  volume       = {abs/1909.02128},
  year         = {2019},
  url          = {http://arxiv.org/abs/1909.02128},
  eprinttype    = {arXiv},
  eprint       = {1909.02128},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1909-02128.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1909-04607,
  author       = {Vivek Veeriah and
                  Matteo Hessel and
                  Zhongwen Xu and
                  Richard L. Lewis and
                  Janarthanan Rajendran and
                  Junhyuk Oh and
                  Hado van Hasselt and
                  David Silver and
                  Satinder Singh},
  title        = {Discovery of Useful Questions as Auxiliary Tasks},
  journal      = {CoRR},
  volume       = {abs/1909.04607},
  year         = {2019},
  url          = {http://arxiv.org/abs/1909.04607},
  eprinttype    = {arXiv},
  eprint       = {1909.04607},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1909-04607.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1910-10597,
  author       = {Aditya Modi and
                  Nan Jiang and
                  Ambuj Tewari and
                  Satinder Singh},
  title        = {Sample Complexity of Reinforcement Learning using Linearly Combined
                  Model Ensembles},
  journal      = {CoRR},
  volume       = {abs/1910.10597},
  year         = {2019},
  url          = {http://arxiv.org/abs/1910.10597},
  eprinttype    = {arXiv},
  eprint       = {1910.10597},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1910-10597.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1910-14361,
  author       = {Victor Bapst and
                  Alvaro Sanchez{-}Gonzalez and
                  Omar Shams and
                  Kimberly L. Stachenfeld and
                  Peter W. Battaglia and
                  Satinder Singh and
                  Jessica B. Hamrick},
  title        = {Object-oriented state editing for {HRL}},
  journal      = {CoRR},
  volume       = {abs/1910.14361},
  year         = {2019},
  url          = {http://arxiv.org/abs/1910.14361},
  eprinttype    = {arXiv},
  eprint       = {1910.14361},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1910-14361.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1911-10866,
  author       = {Christopher Grimm and
                  Irina Higgins and
                  Andr{\'{e}} Barreto and
                  Denis Teplyashin and
                  Markus Wulfmeier and
                  Tim Hertweck and
                  Raia Hadsell and
                  Satinder Singh},
  title        = {Disentangled Cumulants Help Successor Representations Transfer to
                  New Tasks},
  journal      = {CoRR},
  volume       = {abs/1911.10866},
  year         = {2019},
  url          = {http://arxiv.org/abs/1911.10866},
  eprinttype    = {arXiv},
  eprint       = {1911.10866},
  timestamp    = {Wed, 20 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1911-10866.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1911-11260,
  author       = {John Holler and
                  Risto Vuorio and
                  Zhiwei (Tony) Qin and
                  Xiaocheng Tang and
                  Yan Jiao and
                  Tiancheng Jin and
                  Satinder Singh and
                  Chenxi Wang and
                  Jieping Ye},
  title        = {Deep Reinforcement Learning for Multi-Driver Vehicle Dispatching and
                  Repositioning Problem},
  journal      = {CoRR},
  volume       = {abs/1911.11260},
  year         = {2019},
  url          = {http://arxiv.org/abs/1911.11260},
  eprinttype    = {arXiv},
  eprint       = {1911.11260},
  timestamp    = {Wed, 20 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1911-11260.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1912-02503,
  author       = {Anna Harutyunyan and
                  Will Dabney and
                  Thomas Mesnard and
                  Mohammad Gheshlaghi Azar and
                  Bilal Piot and
                  Nicolas Heess and
                  Hado van Hasselt and
                  Greg Wayne and
                  Satinder Singh and
                  Doina Precup and
                  R{\'{e}}mi Munos},
  title        = {Hindsight Credit Assignment},
  journal      = {CoRR},
  volume       = {abs/1912.02503},
  year         = {2019},
  url          = {http://arxiv.org/abs/1912.02503},
  eprinttype    = {arXiv},
  eprint       = {1912.02503},
  timestamp    = {Wed, 20 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1912-02503.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1912-05500,
  author       = {Zeyu Zheng and
                  Junhyuk Oh and
                  Matteo Hessel and
                  Zhongwen Xu and
                  Manuel Kroiss and
                  Hado van Hasselt and
                  David Silver and
                  Satinder Singh},
  title        = {What Can Learned Intrinsic Rewards Capture?},
  journal      = {CoRR},
  volume       = {abs/1912.05500},
  year         = {2019},
  url          = {http://arxiv.org/abs/1912.05500},
  eprinttype    = {arXiv},
  eprint       = {1912.05500},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1912-05500.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1912-07045,
  author       = {Janarthanan Rajendran and
                  Richard L. Lewis and
                  Vivek Veeriah and
                  Honglak Lee and
                  Satinder Singh},
  title        = {How Should an Agent Practice?},
  journal      = {CoRR},
  volume       = {abs/1912.07045},
  year         = {2019},
  url          = {http://arxiv.org/abs/1912.07045},
  eprinttype    = {arXiv},
  eprint       = {1912.07045},
  timestamp    = {Wed, 20 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1912-07045.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/scn/NguyenWWS18,
  author       = {Thanh Hong Nguyen and
                  Mason Wright and
                  Michael P. Wellman and
                  Satinder Singh},
  title        = {Multistage Attack Graph Security Games: Heuristic Strategies, with
                  Empirical Game-Theoretic Analysis},
  journal      = {Secur. Commun. Networks},
  volume       = {2018},
  pages        = {2864873:1--2864873:28},
  year         = {2018},
  url          = {https://doi.org/10.1155/2018/2864873},
  doi          = {10.1155/2018/2864873},
  timestamp    = {Thu, 07 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/scn/NguyenWWS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/alt/0002JST18,
  author       = {Aditya Modi and
                  Nan Jiang and
                  Satinder Singh and
                  Ambuj Tewari},
  editor       = {Firdaus Janoos and
                  Mehryar Mohri and
                  Karthik Sridharan},
  title        = {Markov Decision Processes with Continuous Side Information},
  booktitle    = {Algorithmic Learning Theory, {ALT} 2018, 7-9 April 2018, Lanzarote,
                  Canary Islands, Spain},
  series       = {Proceedings of Machine Learning Research},
  volume       = {83},
  pages        = {597--618},
  publisher    = {{PMLR}},
  year         = {2018},
  url          = {http://proceedings.mlr.press/v83/modi18a.html},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/alt/0002JST18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/atal/ZhangDS18a,
  author       = {Qi Zhang and
                  Edmund H. Durfee and
                  Satinder Singh},
  editor       = {Robin Cohen and
                  Murat Sensoy and
                  Timothy J. Norman},
  title        = {Challenges in the Trustworthy Pursuit of Maintenance Commitments Under
                  Uncertainty},
  booktitle    = {Proceedings of the 20th International Trust Workshop co-located with
                  {AAMAS/IJCAI/ECAI/ICML} 2018, Stockholm, Sweden, July 14, 2018},
  series       = {{CEUR} Workshop Proceedings},
  volume       = {2154},
  pages        = {75--86},
  publisher    = {CEUR-WS.org},
  year         = {2018},
  url          = {https://ceur-ws.org/Vol-2154/paper7.pdf},
  timestamp    = {Fri, 10 Mar 2023 16:22:57 +0100},
  biburl       = {https://dblp.org/rec/conf/atal/ZhangDS18a.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/atal/ZhangDS18,
  author       = {Shun Zhang and
                  Edmund H. Durfee and
                  Satinder Singh},
  editor       = {Elisabeth Andr{\'{e}} and
                  Sven Koenig and
                  Mehdi Dastani and
                  Gita Sukthankar},
  title        = {On Querying for Safe Optimality in Factored Markov Decision Processes},
  booktitle    = {Proceedings of the 17th International Conference on Autonomous Agents
                  and MultiAgent Systems, {AAMAS} 2018, Stockholm, Sweden, July 10-15,
                  2018},
  pages        = {2168--2170},
  publisher    = {International Foundation for Autonomous Agents and Multiagent Systems
                  Richland, SC, {USA} / {ACM}},
  year         = {2018},
  url          = {http://dl.acm.org/citation.cfm?id=3238108},
  timestamp    = {Sat, 30 Sep 2023 09:34:53 +0200},
  biburl       = {https://dblp.org/rec/conf/atal/ZhangDS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/emnlp/RajendranGSP18,
  author       = {Janarthanan Rajendran and
                  Jatin Ganhotra and
                  Satinder Singh and
                  Lazaros Polymenakos},
  editor       = {Ellen Riloff and
                  David Chiang and
                  Julia Hockenmaier and
                  Jun'ichi Tsujii},
  title        = {Learning End-to-End Goal-Oriented Dialog with Multiple Answers},
  booktitle    = {Proceedings of the 2018 Conference on Empirical Methods in Natural
                  Language Processing, Brussels, Belgium, October 31 - November 4, 2018},
  pages        = {3834--3843},
  publisher    = {Association for Computational Linguistics},
  year         = {2018},
  url          = {https://doi.org/10.18653/v1/d18-1418},
  doi          = {10.18653/V1/D18-1418},
  timestamp    = {Wed, 20 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/emnlp/RajendranGSP18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/OhGSL18,
  author       = {Junhyuk Oh and
                  Yijie Guo and
                  Satinder Singh and
                  Honglak Lee},
  editor       = {Jennifer G. Dy and
                  Andreas Krause},
  title        = {Self-Imitation Learning},
  booktitle    = {Proceedings of the 35th International Conference on Machine Learning,
                  {ICML} 2018, Stockholmsm{\"{a}}ssan, Stockholm, Sweden, July
                  10-15, 2018},
  series       = {Proceedings of Machine Learning Research},
  volume       = {80},
  pages        = {3875--3884},
  publisher    = {{PMLR}},
  year         = {2018},
  url          = {http://proceedings.mlr.press/v80/oh18b.html},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/OhGSL18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/ZhangDS18,
  author       = {Shun Zhang and
                  Edmund H. Durfee and
                  Satinder Singh},
  editor       = {J{\'{e}}r{\^{o}}me Lang},
  title        = {Minimax-Regret Querying on Side Effects for Safe Optimality in Factored
                  Markov Decision Processes},
  booktitle    = {Proceedings of the Twenty-Seventh International Joint Conference on
                  Artificial Intelligence, {IJCAI} 2018, July 13-19, 2018, Stockholm,
                  Sweden},
  pages        = {4867--4873},
  publisher    = {ijcai.org},
  year         = {2018},
  url          = {https://doi.org/10.24963/ijcai.2018/676},
  doi          = {10.24963/IJCAI.2018/676},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/ZhangDS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/JiangKS18,
  author       = {Nan Jiang and
                  Alex Kulesza and
                  Satinder Singh},
  editor       = {Samy Bengio and
                  Hanna M. Wallach and
                  Hugo Larochelle and
                  Kristen Grauman and
                  Nicol{\`{o}} Cesa{-}Bianchi and
                  Roman Garnett},
  title        = {Completing State Representations using Spectral Learning},
  booktitle    = {Advances in Neural Information Processing Systems 31: Annual Conference
                  on Neural Information Processing Systems 2018, NeurIPS 2018, December
                  3-8, 2018, Montr{\'{e}}al, Canada},
  pages        = {4333--4342},
  year         = {2018},
  url          = {https://proceedings.neurips.cc/paper/2018/hash/51174add1c52758f33d414ceaf3fe6ba-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/JiangKS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/ZhengOS18,
  author       = {Zeyu Zheng and
                  Junhyuk Oh and
                  Satinder Singh},
  editor       = {Samy Bengio and
                  Hanna M. Wallach and
                  Hugo Larochelle and
                  Kristen Grauman and
                  Nicol{\`{o}} Cesa{-}Bianchi and
                  Roman Garnett},
  title        = {On Learning Intrinsic Rewards for Policy Gradient Methods},
  booktitle    = {Advances in Neural Information Processing Systems 31: Annual Conference
                  on Neural Information Processing Systems 2018, NeurIPS 2018, December
                  3-8, 2018, Montr{\'{e}}al, Canada},
  pages        = {4649--4659},
  year         = {2018},
  url          = {https://proceedings.neurips.cc/paper/2018/hash/51de85ddd068f0bc787691d356176df9-Abstract.html},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/ZhengOS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1803-02940,
  author       = {Jiaxuan Wang and
                  Ian Fox and
                  Jonathan Skaza and
                  Nick Linck and
                  Satinder Singh and
                  Jenna Wiens},
  title        = {The Advantage of Doubling: {A} Deep Reinforcement Learning Approach
                  to Studying the Double Team in the {NBA}},
  journal      = {CoRR},
  volume       = {abs/1803.02940},
  year         = {2018},
  url          = {http://arxiv.org/abs/1803.02940},
  eprinttype    = {arXiv},
  eprint       = {1803.02940},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1803-02940.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1804-06459,
  author       = {Zeyu Zheng and
                  Junhyuk Oh and
                  Satinder Singh},
  title        = {On Learning Intrinsic Rewards for Policy Gradient Methods},
  journal      = {CoRR},
  volume       = {abs/1804.06459},
  year         = {2018},
  url          = {http://arxiv.org/abs/1804.06459},
  eprinttype    = {arXiv},
  eprint       = {1804.06459},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1804-06459.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1804-09540,
  author       = {Janarthanan Rajendran and
                  Jatin Ganhotra and
                  Xiaoxiao Guo and
                  Mo Yu and
                  Satinder Singh},
  title        = {Named Entities troubling your Neural Methods? Build NE-Table: {A}
                  neural approach for handling Named Entities},
  journal      = {CoRR},
  volume       = {abs/1804.09540},
  year         = {2018},
  url          = {http://arxiv.org/abs/1804.09540},
  eprinttype    = {arXiv},
  eprint       = {1804.09540},
  timestamp    = {Wed, 20 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1804-09540.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1806-05635,
  author       = {Junhyuk Oh and
                  Yijie Guo and
                  Satinder Singh and
                  Honglak Lee},
  title        = {Self-Imitation Learning},
  journal      = {CoRR},
  volume       = {abs/1806.05635},
  year         = {2018},
  url          = {http://arxiv.org/abs/1806.05635},
  eprinttype    = {arXiv},
  eprint       = {1806.05635},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1806-05635.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1806-09605,
  author       = {Vivek Veeriah and
                  Junhyuk Oh and
                  Satinder Singh},
  title        = {Many-Goals Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1806.09605},
  year         = {2018},
  url          = {http://arxiv.org/abs/1806.09605},
  eprinttype    = {arXiv},
  eprint       = {1806.09605},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1806-09605.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1808-09996,
  author       = {Janarthanan Rajendran and
                  Jatin Ganhotra and
                  Satinder Singh and
                  Lazaros Polymenakos},
  title        = {Learning End-to-End Goal-Oriented Dialog with Multiple Answers},
  journal      = {CoRR},
  volume       = {abs/1808.09996},
  year         = {2018},
  url          = {http://arxiv.org/abs/1808.09996},
  eprinttype    = {arXiv},
  eprint       = {1808.09996},
  timestamp    = {Wed, 20 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1808-09996.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1812-00950,
  author       = {Yijie Guo and
                  Junhyuk Oh and
                  Satinder Singh and
                  Honglak Lee},
  title        = {Generative Adversarial Self-Imitation Learning},
  journal      = {CoRR},
  volume       = {abs/1812.00950},
  year         = {2018},
  url          = {http://arxiv.org/abs/1812.00950},
  eprinttype    = {arXiv},
  eprint       = {1812.00950},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1812-00950.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/NguyenWS17,
  author       = {Thanh Hong Nguyen and
                  Michael P. Wellman and
                  Satinder Singh},
  title        = {A Stackelberg Game Model for Botnet Traffic Exfiltration},
  booktitle    = {The Workshops of the The Thirty-First {AAAI} Conference on Artificial
                  Intelligence, Saturday, February 4-9, 2017, San Francisco, California,
                  {USA}},
  series       = {{AAAI} Technical Report},
  volume       = {{WS-17}},
  publisher    = {{AAAI} Press},
  year         = {2017},
  url          = {http://aaai.org/ocs/index.php/WS/AAAIW17/paper/view/15090},
  timestamp    = {Mon, 04 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/NguyenWS17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/acl/Perez-RosasMRSA17,
  author       = {Ver{\'{o}}nica P{\'{e}}rez{-}Rosas and
                  Rada Mihalcea and
                  Kenneth Resnicow and
                  Satinder Singh and
                  Lawrence C. An},
  editor       = {Regina Barzilay and
                  Min{-}Yen Kan},
  title        = {Understanding and Predicting Empathic Behavior in Counseling Therapy},
  booktitle    = {Proceedings of the 55th Annual Meeting of the Association for Computational
                  Linguistics, {ACL} 2017, Vancouver, Canada, July 30 - August 4, Volume
                  1: Long Papers},
  pages        = {1426--1435},
  publisher    = {Association for Computational Linguistics},
  year         = {2017},
  url          = {https://doi.org/10.18653/v1/P17-1131},
  doi          = {10.18653/V1/P17-1131},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/acl/Perez-RosasMRSA17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aips/ZhangDS17,
  author       = {Shun Zhang and
                  Edmund H. Durfee and
                  Satinder Singh},
  editor       = {Laura Barbulescu and
                  Jeremy Frank and
                  Mausam and
                  Stephen F. Smith},
  title        = {Approximately-Optimal Queries for Planning in Reward-Uncertain Markov
                  Decision Processes},
  booktitle    = {Proceedings of the Twenty-Seventh International Conference on Automated
                  Planning and Scheduling, {ICAPS} 2017, Pittsburgh, Pennsylvania, USA,
                  June 18-23, 2017},
  pages        = {339--347},
  publisher    = {{AAAI} Press},
  year         = {2017},
  url          = {https://aaai.org/ocs/index.php/ICAPS/ICAPS17/paper/view/15763},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aips/ZhangDS17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aips/ZhangSD17,
  author       = {Qi Zhang and
                  Satinder Singh and
                  Edmund H. Durfee},
  editor       = {Laura Barbulescu and
                  Jeremy Frank and
                  Mausam and
                  Stephen F. Smith},
  title        = {Minimizing Maximum Regret in Commitment Constrained Sequential Decision
                  Making},
  booktitle    = {Proceedings of the Twenty-Seventh International Conference on Automated
                  Planning and Scheduling, {ICAPS} 2017, Pittsburgh, Pennsylvania, USA,
                  June 18-23, 2017},
  pages        = {348--357},
  publisher    = {{AAAI} Press},
  year         = {2017},
  url          = {https://aaai.org/ocs/index.php/ICAPS/ICAPS17/paper/view/15766},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aips/ZhangSD17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ccs/NguyenWWS17,
  author       = {Thanh Hong Nguyen and
                  Mason Wright and
                  Michael P. Wellman and
                  Satinder Singh},
  editor       = {Hamed Okhravi and
                  Xinming Ou},
  title        = {Multi-Stage Attack Graph Security Games: Heuristic Strategies, with
                  Empirical Game-Theoretic Analysis},
  booktitle    = {Proceedings of the 2017 Workshop on Moving Target Defense, MTD@CCS
                  2017, Dallas, TX, USA, October 30, 2017},
  pages        = {87--97},
  publisher    = {{ACM}},
  year         = {2017},
  url          = {https://doi.org/10.1145/3140549.3140562},
  doi          = {10.1145/3140549.3140562},
  timestamp    = {Thu, 07 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ccs/NguyenWWS17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/eacl/SinghMPRAGC17,
  author       = {Ver{\'{o}}nica P{\'{e}}rez{-}Rosas and
                  Rada Mihalcea and
                  Kenneth Resnicow and
                  Satinder Singh and
                  Lawrence C. An and
                  Kathy J. Goggin and
                  Delwyn Catley},
  editor       = {Mirella Lapata and
                  Phil Blunsom and
                  Alexander Koller},
  title        = {Predicting Counselor Behaviors in Motivational Interviewing Encounters},
  booktitle    = {Proceedings of the 15th Conference of the European Chapter of the
                  Association for Computational Linguistics, {EACL} 2017, Valencia,
                  Spain, April 3-7, 2017, Volume 1: Long Papers},
  pages        = {1128--1137},
  publisher    = {Association for Computational Linguistics},
  year         = {2017},
  url          = {https://doi.org/10.18653/v1/e17-1106},
  doi          = {10.18653/V1/E17-1106},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/eacl/SinghMPRAGC17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/gamesec/NguyenWS17,
  author       = {Thanh Hong Nguyen and
                  Michael P. Wellman and
                  Satinder Singh},
  editor       = {Stefan Rass and
                  Bo An and
                  Christopher Kiekintveld and
                  Fei Fang and
                  Stefan Schauer},
  title        = {A Stackelberg Game Model for Botnet Data Exfiltration},
  booktitle    = {Decision and Game Theory for Security - 8th International Conference,
                  GameSec 2017, Vienna, Austria, October 23-25, 2017, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {10575},
  pages        = {151--170},
  publisher    = {Springer},
  year         = {2017},
  url          = {https://doi.org/10.1007/978-3-319-68711-7\_9},
  doi          = {10.1007/978-3-319-68711-7\_9},
  timestamp    = {Wed, 07 Feb 2024 17:23:12 +0100},
  biburl       = {https://dblp.org/rec/conf/gamesec/NguyenWS17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/GuoKRBCKTTS17,
  author       = {Xiaoxiao Guo and
                  Tim Klinger and
                  Clemens Rosenbaum and
                  Joseph P. Bigus and
                  Murray Campbell and
                  Ban Kawas and
                  Kartik Talamadupula and
                  Gerry Tesauro and
                  Satinder Singh},
  title        = {Learning to Query, Reason, and Answer Questions On Ambiguous Texts},
  booktitle    = {5th International Conference on Learning Representations, {ICLR} 2017,
                  Toulon, France, April 24-26, 2017, Conference Track Proceedings},
  publisher    = {OpenReview.net},
  year         = {2017},
  url          = {https://openreview.net/forum?id=rJ0-tY5xe},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/GuoKRBCKTTS17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/OhSLK17,
  author       = {Junhyuk Oh and
                  Satinder Singh and
                  Honglak Lee and
                  Pushmeet Kohli},
  editor       = {Doina Precup and
                  Yee Whye Teh},
  title        = {Zero-Shot Task Generalization with Multi-Task Deep Reinforcement Learning},
  booktitle    = {Proceedings of the 34th International Conference on Machine Learning,
                  {ICML} 2017, Sydney, NSW, Australia, 6-11 August 2017},
  series       = {Proceedings of Machine Learning Research},
  volume       = {70},
  pages        = {2661--2670},
  publisher    = {{PMLR}},
  year         = {2017},
  url          = {http://proceedings.mlr.press/v70/oh17a.html},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/OhSLK17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/AminJS17,
  author       = {Kareem Amin and
                  Nan Jiang and
                  Satinder Singh},
  editor       = {Isabelle Guyon and
                  Ulrike von Luxburg and
                  Samy Bengio and
                  Hanna M. Wallach and
                  Rob Fergus and
                  S. V. N. Vishwanathan and
                  Roman Garnett},
  title        = {Repeated Inverse Reinforcement Learning},
  booktitle    = {Advances in Neural Information Processing Systems 30: Annual Conference
                  on Neural Information Processing Systems 2017, December 4-9, 2017,
                  Long Beach, CA, {USA}},
  pages        = {1815--1824},
  year         = {2017},
  url          = {https://proceedings.neurips.cc/paper/2017/hash/8ce6790cc6a94e65f17f908f462fae85-Abstract.html},
  timestamp    = {Mon, 22 Aug 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/AminJS17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/OhSL17,
  author       = {Junhyuk Oh and
                  Satinder Singh and
                  Honglak Lee},
  editor       = {Isabelle Guyon and
                  Ulrike von Luxburg and
                  Samy Bengio and
                  Hanna M. Wallach and
                  Rob Fergus and
                  S. V. N. Vishwanathan and
                  Roman Garnett},
  title        = {Value Prediction Network},
  booktitle    = {Advances in Neural Information Processing Systems 30: Annual Conference
                  on Neural Information Processing Systems 2017, December 4-9, 2017,
                  Long Beach, CA, {USA}},
  pages        = {6118--6128},
  year         = {2017},
  url          = {https://proceedings.neurips.cc/paper/2017/hash/ffbd6cbb019a1413183c8d08f2929307-Abstract.html},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/OhSL17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@proceedings{DBLP:conf/aaai/2017,
  editor       = {Satinder Singh and
                  Shaul Markovitch},
  title        = {Proceedings of the Thirty-First {AAAI} Conference on Artificial Intelligence,
                  February 4-9, 2017, San Francisco, California, {USA}},
  publisher    = {{AAAI} Press},
  year         = {2017},
  url          = {https://ojs.aaai.org/index.php/AAAI/issue/view/302},
  timestamp    = {Mon, 04 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/2017.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/ZhangSD17,
  author       = {Qi Zhang and
                  Satinder Singh and
                  Edmund H. Durfee},
  title        = {Minimizing Maximum Regret in Commitment Constrained Sequential Decision
                  Making},
  journal      = {CoRR},
  volume       = {abs/1703.04587},
  year         = {2017},
  url          = {http://arxiv.org/abs/1703.04587},
  eprinttype    = {arXiv},
  eprint       = {1703.04587},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/ZhangSD17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/AminJS17,
  author       = {Kareem Amin and
                  Nan Jiang and
                  Satinder Singh},
  title        = {Repeated Inverse Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1705.05427},
  year         = {2017},
  url          = {http://arxiv.org/abs/1705.05427},
  eprinttype    = {arXiv},
  eprint       = {1705.05427},
  timestamp    = {Mon, 22 Aug 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/AminJS17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/OhSLK17,
  author       = {Junhyuk Oh and
                  Satinder Singh and
                  Honglak Lee and
                  Pushmeet Kohli},
  title        = {Zero-Shot Task Generalization with Multi-Task Deep Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1706.05064},
  year         = {2017},
  url          = {http://arxiv.org/abs/1706.05064},
  eprinttype    = {arXiv},
  eprint       = {1706.05064},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/OhSLK17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/OhSL17,
  author       = {Junhyuk Oh and
                  Satinder Singh and
                  Honglak Lee},
  title        = {Value Prediction Network},
  journal      = {CoRR},
  volume       = {abs/1707.03497},
  year         = {2017},
  url          = {http://arxiv.org/abs/1707.03497},
  eprinttype    = {arXiv},
  eprint       = {1707.03497},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/OhSL17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1711-05726,
  author       = {Aditya Modi and
                  Nan Jiang and
                  Satinder Singh and
                  Ambuj Tewari},
  title        = {Markov Decision Processes with Continuous Side Information},
  journal      = {CoRR},
  volume       = {abs/1711.05726},
  year         = {2017},
  url          = {http://arxiv.org/abs/1711.05726},
  eprinttype    = {arXiv},
  eprint       = {1711.05726},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1711-05726.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/EsbroeckSSSK16,
  author       = {Alexander Van Esbroeck and
                  Landon Smith and
                  Zeeshan Syed and
                  Satinder Singh and
                  Zahi N. Karam},
  title        = {Multi-task seizure detection: addressing intra-patient variation in
                  seizure morphologies},
  journal      = {Mach. Learn.},
  volume       = {102},
  number       = {3},
  pages        = {309--321},
  year         = {2016},
  url          = {https://doi.org/10.1007/s10994-015-5519-7},
  doi          = {10.1007/S10994-015-5519-7},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ml/EsbroeckSSSK16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/JiangKS16,
  author       = {Nan Jiang and
                  Alex Kulesza and
                  Satinder Singh},
  editor       = {Dale Schuurmans and
                  Michael P. Wellman},
  title        = {Improving Predictive State Representations via Gradient Descent},
  booktitle    = {Proceedings of the Thirtieth {AAAI} Conference on Artificial Intelligence,
                  February 12-17, 2016, Phoenix, Arizona, {USA}},
  pages        = {1709--1715},
  publisher    = {{AAAI} Press},
  year         = {2016},
  url          = {https://doi.org/10.1609/aaai.v30i1.10270},
  doi          = {10.1609/AAAI.V30I1.10270},
  timestamp    = {Mon, 04 Sep 2023 15:08:28 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/JiangKS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/atal/DurfeeS16a,
  author       = {Edmund H. Durfee and
                  Satinder Singh},
  editor       = {Nardine Osman and
                  Carles Sierra},
  title        = {On the Trustworthy Fulfillment of Commitments},
  booktitle    = {Autonomous Agents and Multiagent Systems - {AAMAS} 2016 Workshops,
                  - Best Papers - , Singapore, Singapore, May 9-10, 2016, Revised Selected
                  Papers},
  series       = {Lecture Notes in Computer Science},
  volume       = {10002},
  pages        = {1--13},
  year         = {2016},
  url          = {https://doi.org/10.1007/978-3-319-46882-2\_1},
  doi          = {10.1007/978-3-319-46882-2\_1},
  timestamp    = {Thu, 08 Sep 2022 19:48:31 +0200},
  biburl       = {https://dblp.org/rec/conf/atal/DurfeeS16a.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/atal/DurfeeS16,
  author       = {Edmund H. Durfee and
                  Satinder Singh},
  editor       = {Jie Zhang and
                  Robin Cohen and
                  Murat Sensoy},
  title        = {On the Trustworthy Fulfillment of Commitments},
  booktitle    = {Proceedings of the 18th International Workshop on Trust in Agent Societies
                  co-located with the 15th International Conference on Autonomous Agents
                  and Multiagent Systems {(AAMAS} 2016), Singapore, Singapore, May 10,
                  2016},
  series       = {{CEUR} Workshop Proceedings},
  volume       = {1578},
  pages        = {54--62},
  publisher    = {CEUR-WS.org},
  year         = {2016},
  url          = {https://ceur-ws.org/Vol-1578/paper9.pdf},
  timestamp    = {Fri, 10 Mar 2023 16:22:57 +0100},
  biburl       = {https://dblp.org/rec/conf/atal/DurfeeS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/OhCSL16,
  author       = {Junhyuk Oh and
                  Valliappa Chockalingam and
                  Satinder Singh and
                  Honglak Lee},
  editor       = {Maria{-}Florina Balcan and
                  Kilian Q. Weinberger},
  title        = {Control of Memory, Active Perception, and Action in Minecraft},
  booktitle    = {Proceedings of the 33nd International Conference on Machine Learning,
                  {ICML} 2016, New York City, NY, USA, June 19-24, 2016},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {48},
  pages        = {2790--2799},
  publisher    = {JMLR.org},
  year         = {2016},
  url          = {http://proceedings.mlr.press/v48/oh16.html},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/OhCSL16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/GuoSLL16,
  author       = {Xiaoxiao Guo and
                  Satinder Singh and
                  Richard L. Lewis and
                  Honglak Lee},
  editor       = {Subbarao Kambhampati},
  title        = {Deep Learning for Reward Design to Improve Monte Carlo Tree Search
                  in {ATARI} Games},
  booktitle    = {Proceedings of the Twenty-Fifth International Joint Conference on
                  Artificial Intelligence, {IJCAI} 2016, New York, NY, USA, 9-15 July
                  2016},
  pages        = {1519--1525},
  publisher    = {{IJCAI/AAAI} Press},
  year         = {2016},
  url          = {http://www.ijcai.org/Abstract/16/218},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/GuoSLL16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/JiangST16,
  author       = {Nan Jiang and
                  Satinder Singh and
                  Ambuj Tewari},
  editor       = {Subbarao Kambhampati},
  title        = {On Structural Properties of MDPs that Bound Loss Due to Shallow Planning},
  booktitle    = {Proceedings of the Twenty-Fifth International Joint Conference on
                  Artificial Intelligence, {IJCAI} 2016, New York, NY, USA, 9-15 July
                  2016},
  pages        = {1640--1647},
  publisher    = {{IJCAI/AAAI} Press},
  year         = {2016},
  url          = {http://www.ijcai.org/Abstract/16/235},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/JiangST16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/ZhangDSCW16,
  author       = {Qi Zhang and
                  Edmund H. Durfee and
                  Satinder Singh and
                  Anna Chen and
                  Stefan J. Witwicki},
  editor       = {Subbarao Kambhampati},
  title        = {Commitment Semantics for Sequential Decision Making under Reward Uncertainty},
  booktitle    = {Proceedings of the Twenty-Fifth International Joint Conference on
                  Artificial Intelligence, {IJCAI} 2016, New York, NY, USA, 9-15 July
                  2016},
  pages        = {3315--3323},
  publisher    = {{IJCAI/AAAI} Press},
  year         = {2016},
  url          = {http://www.ijcai.org/Abstract/16/469},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/ZhangDSCW16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/JiangKSL16,
  author       = {Nan Jiang and
                  Alex Kulesza and
                  Satinder Singh and
                  Richard L. Lewis},
  editor       = {Subbarao Kambhampati},
  title        = {The Dependence of Effective Planning Horizon on Model Accuracy},
  booktitle    = {Proceedings of the Twenty-Fifth International Joint Conference on
                  Artificial Intelligence, {IJCAI} 2016, New York, NY, USA, 9-15 July
                  2016},
  pages        = {4180--4189},
  publisher    = {{IJCAI/AAAI} Press},
  year         = {2016},
  url          = {http://www.ijcai.org/Abstract/16/626},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/JiangKSL16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/naacl/Perez-RosasMRSA16,
  author       = {Ver{\'{o}}nica P{\'{e}}rez{-}Rosas and
                  Rada Mihalcea and
                  Kenneth Resnicow and
                  Satinder Singh and
                  Lawrence C. An},
  editor       = {Kristy Hollingshead and
                  Lyle H. Ungar},
  title        = {Building a Motivational Interviewing Dataset},
  booktitle    = {Proceedings of the 3rd Workshop on Computational Linguistics and Clinical
                  Psychology: From Linguistic Signal to Clinical Reality, CLPsych@NAACL-HLT
                  2016, June 16, 2016, San Diego, California, {USA}},
  pages        = {42--51},
  publisher    = {The Association for Computational Linguistics},
  year         = {2016},
  url          = {https://doi.org/10.18653/v1/w16-0305},
  doi          = {10.18653/V1/W16-0305},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/naacl/Perez-RosasMRSA16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/uai/AminWS16,
  author       = {Kareem Amin and
                  Michael P. Wellman and
                  Satinder Singh},
  editor       = {Alexander Ihler and
                  Dominik Janzing},
  title        = {Gradient Methods for Stackelberg Games},
  booktitle    = {Proceedings of the Thirty-Second Conference on Uncertainty in Artificial
                  Intelligence, {UAI} 2016, June 25-29, 2016, New York City, NY, {USA}},
  publisher    = {{AUAI} Press},
  year         = {2016},
  url          = {http://auai.org/uai2016/proceedings/papers/167.pdf},
  timestamp    = {Mon, 05 Dec 2022 15:54:38 +0100},
  biburl       = {https://dblp.org/rec/conf/uai/AminWS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/AminS16,
  author       = {Kareem Amin and
                  Satinder Singh},
  title        = {Towards Resolving Unidentifiability in Inverse Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1601.06569},
  year         = {2016},
  url          = {http://arxiv.org/abs/1601.06569},
  eprinttype    = {arXiv},
  eprint       = {1601.06569},
  timestamp    = {Mon, 22 Aug 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/AminS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/GuoSLL16,
  author       = {Xiaoxiao Guo and
                  Satinder Singh and
                  Richard L. Lewis and
                  Honglak Lee},
  title        = {Deep Learning for Reward Design to Improve Monte Carlo Tree Search
                  in {ATARI} Games},
  journal      = {CoRR},
  volume       = {abs/1604.07095},
  year         = {2016},
  url          = {http://arxiv.org/abs/1604.07095},
  eprinttype    = {arXiv},
  eprint       = {1604.07095},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/GuoSLL16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/OhCSL16,
  author       = {Junhyuk Oh and
                  Valliappa Chockalingam and
                  Satinder Singh and
                  Honglak Lee},
  title        = {Control of Memory, Active Perception, and Action in Minecraft},
  journal      = {CoRR},
  volume       = {abs/1605.09128},
  year         = {2016},
  url          = {http://arxiv.org/abs/1605.09128},
  eprinttype    = {arXiv},
  eprint       = {1605.09128},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/OhCSL16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/KuleszaJS15,
  author       = {Alex Kulesza and
                  Nan Jiang and
                  Satinder Singh},
  editor       = {Blai Bonet and
                  Sven Koenig},
  title        = {Spectral Learning of Predictive State Representations with Insufficient
                  Statistics},
  booktitle    = {Proceedings of the Twenty-Ninth {AAAI} Conference on Artificial Intelligence,
                  January 25-30, 2015, Austin, Texas, {USA}},
  pages        = {2715--2721},
  publisher    = {{AAAI} Press},
  year         = {2015},
  url          = {https://doi.org/10.1609/aaai.v29i1.9635},
  doi          = {10.1609/AAAI.V29I1.9635},
  timestamp    = {Mon, 18 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/KuleszaJS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaaifs/DurfeeS15,
  author       = {Edmund H. Durfee and
                  Satinder Singh},
  title        = {Commitment Semantics for Sequential Decision Making Under Reward Uncertainty},
  booktitle    = {2015 {AAAI} Fall Symposia, Arlington, Virginia, USA, November 12-14,
                  2015},
  pages        = {13--20},
  publisher    = {{AAAI} Press},
  year         = {2015},
  url          = {http://www.aaai.org/ocs/index.php/FSS/FSS15/paper/view/11652},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaaifs/DurfeeS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/KuleszaJS15,
  author       = {Alex Kulesza and
                  Nan Jiang and
                  Satinder Singh},
  editor       = {Guy Lebanon and
                  S. V. N. Vishwanathan},
  title        = {Low-Rank Spectral Learning with Weighted Loss Functions},
  booktitle    = {Proceedings of the Eighteenth International Conference on Artificial
                  Intelligence and Statistics, {AISTATS} 2015, San Diego, California,
                  USA, May 9-12, 2015},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {38},
  publisher    = {JMLR.org},
  year         = {2015},
  url          = {http://proceedings.mlr.press/v38/kulesza15.html},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/KuleszaJS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/atal/JiangKSL15,
  author       = {Nan Jiang and
                  Alex Kulesza and
                  Satinder Singh and
                  Richard L. Lewis},
  editor       = {Gerhard Weiss and
                  Pinar Yolum and
                  Rafael H. Bordini and
                  Edith Elkind},
  title        = {The Dependence of Effective Planning Horizon on Model Accuracy},
  booktitle    = {Proceedings of the 2015 International Conference on Autonomous Agents
                  and Multiagent Systems, {AAMAS} 2015, Istanbul, Turkey, May 4-8, 2015},
  pages        = {1181--1189},
  publisher    = {{ACM}},
  year         = {2015},
  url          = {http://dl.acm.org/citation.cfm?id=2773300},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/atal/JiangKSL15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/JiangKS15,
  author       = {Nan Jiang and
                  Alex Kulesza and
                  Satinder Singh},
  editor       = {Francis R. Bach and
                  David M. Blei},
  title        = {Abstraction Selection in Model-based Reinforcement Learning},
  booktitle    = {Proceedings of the 32nd International Conference on Machine Learning,
                  {ICML} 2015, Lille, France, 6-11 July 2015},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {37},
  pages        = {179--188},
  publisher    = {JMLR.org},
  year         = {2015},
  url          = {http://proceedings.mlr.press/v37/jiang15.html},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/JiangKS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/OhGLLS15,
  author       = {Junhyuk Oh and
                  Xiaoxiao Guo and
                  Honglak Lee and
                  Richard L. Lewis and
                  Satinder Singh},
  editor       = {Corinna Cortes and
                  Neil D. Lawrence and
                  Daniel D. Lee and
                  Masashi Sugiyama and
                  Roman Garnett},
  title        = {Action-Conditional Video Prediction using Deep Networks in Atari Games},
  booktitle    = {Advances in Neural Information Processing Systems 28: Annual Conference
                  on Neural Information Processing Systems 2015, December 7-12, 2015,
                  Montreal, Quebec, Canada},
  pages        = {2863--2871},
  year         = {2015},
  url          = {https://proceedings.neurips.cc/paper/2015/hash/6ba3af5d7b2790e73f0de32e5c8c1798-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/OhGLLS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/OhGLLS15,
  author       = {Junhyuk Oh and
                  Xiaoxiao Guo and
                  Honglak Lee and
                  Richard L. Lewis and
                  Satinder Singh},
  title        = {Action-Conditional Video Prediction using Deep Networks in Atari Games},
  journal      = {CoRR},
  volume       = {abs/1507.08750},
  year         = {2015},
  url          = {http://arxiv.org/abs/1507.08750},
  eprinttype    = {arXiv},
  eprint       = {1507.08750},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/OhGLLS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tamd/LiuSLQ14,
  author       = {Bingyao Liu and
                  Satinder Singh and
                  Richard L. Lewis and
                  Shiyin Qin},
  title        = {Optimal Rewards for Cooperative Agents},
  journal      = {{IEEE} Trans. Auton. Ment. Dev.},
  volume       = {6},
  number       = {4},
  pages        = {286--297},
  year         = {2014},
  url          = {https://doi.org/10.1109/TAMD.2014.2362682},
  doi          = {10.1109/TAMD.2014.2362682},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tamd/LiuSLQ14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/topics/HowesLS14,
  author       = {Andrew Howes and
                  Richard L. Lewis and
                  Satinder Singh},
  title        = {Utility Maximization and Bounds on Human Information Processing},
  journal      = {Top. Cogn. Sci.},
  volume       = {6},
  number       = {2},
  pages        = {198--203},
  year         = {2014},
  url          = {https://doi.org/10.1111/tops.12089},
  doi          = {10.1111/TOPS.12089},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/topics/HowesLS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/topics/LewisHS14,
  author       = {Richard L. Lewis and
                  Andrew Howes and
                  Satinder Singh},
  title        = {Computational Rationality: Linking Mechanism and Behavior Through
                  Bounded Utility Maximization},
  journal      = {Top. Cogn. Sci.},
  volume       = {6},
  number       = {2},
  pages        = {279--311},
  year         = {2014},
  url          = {https://doi.org/10.1111/tops.12086},
  doi          = {10.1111/TOPS.12086},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/topics/LewisHS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/ChiaBKSS14,
  author       = {Chih{-}Chun Chia and
                  James Blum and
                  Zahi N. Karam and
                  Satinder Singh and
                  Zeeshan Syed},
  editor       = {Carla E. Brodley and
                  Peter Stone},
  title        = {Predicting Postoperative Atrial Fibrillation from Independent {ECG}
                  Components},
  booktitle    = {Proceedings of the Twenty-Eighth {AAAI} Conference on Artificial Intelligence,
                  July 27 -31, 2014, Qu{\'{e}}bec City, Qu{\'{e}}bec, Canada},
  pages        = {1178--1184},
  publisher    = {{AAAI} Press},
  year         = {2014},
  url          = {https://doi.org/10.1609/aaai.v28i1.8921},
  doi          = {10.1609/AAAI.V28I1.8921},
  timestamp    = {Mon, 04 Sep 2023 15:06:24 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/ChiaBKSS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/EsbroeckSRS14,
  author       = {Alexander Van Esbroeck and
                  Satinder Singh and
                  Ilan Rubinfeld and
                  Zeeshan Syed},
  editor       = {Carla E. Brodley and
                  Peter Stone},
  title        = {Evaluating Trauma Patients: Addressing Missing Covariates with Joint
                  Optimization},
  booktitle    = {Proceedings of the Twenty-Eighth {AAAI} Conference on Artificial Intelligence,
                  July 27 -31, 2014, Qu{\'{e}}bec City, Qu{\'{e}}bec, Canada},
  pages        = {1307--1313},
  publisher    = {{AAAI} Press},
  year         = {2014},
  url          = {https://doi.org/10.1609/aaai.v28i1.8912},
  doi          = {10.1609/AAAI.V28I1.8912},
  timestamp    = {Mon, 04 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/EsbroeckSRS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/acl-cmcl/ShvartsmanLS14,
  author       = {Michael Shvartsman and
                  Richard L. Lewis and
                  Satinder Singh},
  editor       = {Vera Demberg and
                  Timothy O'Donnell},
  title        = {Computationally Rational Saccadic Control: An Explanation of Spillover
                  Effects Based on Sampling from Noisy Perception and Memory},
  booktitle    = {Proceedings of the Fifth Workshop on Cognitive Modeling and Computational
                  Linguistics, CMCL@ACL 2014, Baltimore, Maryland, USA, June 26, 2014},
  pages        = {1--9},
  publisher    = {Association for Computational Linguistics},
  year         = {2014},
  url          = {https://doi.org/10.3115/v1/W14-2001},
  doi          = {10.3115/V1/W14-2001},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/acl-cmcl/ShvartsmanLS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aips/VorobeychikATS14,
  author       = {Yevgeniy Vorobeychik and
                  Bo An and
                  Milind Tambe and
                  Satinder Singh},
  editor       = {Steve A. Chien and
                  Minh Binh Do and
                  Alan Fern and
                  Wheeler Ruml},
  title        = {Computing Solutions in Infinite-Horizon Discounted Adversarial Patrolling
                  Games},
  booktitle    = {Proceedings of the Twenty-Fourth International Conference on Automated
                  Planning and Scheduling, {ICAPS} 2014, Portsmouth, New Hampshire,
                  USA, June 21-26, 2014},
  publisher    = {{AAAI}},
  year         = {2014},
  url          = {http://www.aaai.org/ocs/index.php/ICAPS/ICAPS14/paper/view/7783},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aips/VorobeychikATS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/CohnSD14,
  author       = {Robert Cohn and
                  Satinder Singh and
                  Edmund H. Durfee},
  title        = {Characterizing EVOI-Sufficient k-Response Query Sets in Decision Problems},
  booktitle    = {Proceedings of the Seventeenth International Conference on Artificial
                  Intelligence and Statistics, {AISTATS} 2014, Reykjavik, Iceland, April
                  22-25, 2014},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {33},
  pages        = {131--139},
  publisher    = {JMLR.org},
  year         = {2014},
  url          = {http://proceedings.mlr.press/v33/cohn14.html},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/CohnSD14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/KuleszaRS14,
  author       = {Alex Kulesza and
                  N. Raj Rao and
                  Satinder Singh},
  title        = {Low-Rank Spectral Learning},
  booktitle    = {Proceedings of the Seventeenth International Conference on Artificial
                  Intelligence and Statistics, {AISTATS} 2014, Reykjavik, Iceland, April
                  22-25, 2014},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {33},
  pages        = {522--530},
  publisher    = {JMLR.org},
  year         = {2014},
  url          = {http://proceedings.mlr.press/v33/kulesza14.html},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/KuleszaRS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/atal/JiangSL14,
  author       = {Nan Jiang and
                  Satinder Singh and
                  Richard L. Lewis},
  editor       = {Ana L. C. Bazzan and
                  Michael N. Huhns and
                  Alessio Lomuscio and
                  Paul Scerri},
  title        = {Improving {UCT} planning via approximate homomorphisms},
  booktitle    = {International conference on Autonomous Agents and Multi-Agent Systems,
                  {AAMAS} '14, Paris, France, May 5-9, 2014},
  pages        = {1289--1296},
  publisher    = {{IFAAMAS/ACM}},
  year         = {2014},
  url          = {http://dl.acm.org/citation.cfm?id=2617453},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/atal/JiangSL14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/KaramPSMAHM14,
  author       = {Zahi N. Karam and
                  Emily Mower Provost and
                  Satinder Singh and
                  Jennifer Montgomery and
                  Christopher Archer and
                  Gloria Harrington and
                  Melvin G. McInnis},
  title        = {Ecologically valid long-term mood monitoring of individuals with bipolar
                  disorder using speech},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2014, Florence, Italy, May 4-9, 2014},
  pages        = {4858--4862},
  publisher    = {{IEEE}},
  year         = {2014},
  url          = {https://doi.org/10.1109/ICASSP.2014.6854525},
  doi          = {10.1109/ICASSP.2014.6854525},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/KaramPSMAHM14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/GuoSLLW14,
  author       = {Xiaoxiao Guo and
                  Satinder Singh and
                  Honglak Lee and
                  Richard L. Lewis and
                  Xiaoshi Wang},
  editor       = {Zoubin Ghahramani and
                  Max Welling and
                  Corinna Cortes and
                  Neil D. Lawrence and
                  Kilian Q. Weinberger},
  title        = {Deep Learning for Real-Time Atari Game Play Using Offline Monte-Carlo
                  Tree Search Planning},
  booktitle    = {Advances in Neural Information Processing Systems 27: Annual Conference
                  on Neural Information Processing Systems 2014, December 8-13 2014,
                  Montreal, Quebec, Canada},
  pages        = {3338--3346},
  year         = {2014},
  url          = {https://proceedings.neurips.cc/paper/2014/hash/8bb88f80d334b1869781beb89f7b73be-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/GuoSLLW14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/TalvitieS14,
  author       = {Erik Talvitie and
                  Satinder Singh},
  title        = {Learning to Make Predictions In Partially Observable Environments
                  Without a Generative Model},
  journal      = {CoRR},
  volume       = {abs/1401.3870},
  year         = {2014},
  url          = {http://arxiv.org/abs/1401.3870},
  eprinttype    = {arXiv},
  eprint       = {1401.3870},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/TalvitieS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/topics/LewisSS13,
  author       = {Richard L. Lewis and
                  Michael Shvartsman and
                  Satinder Singh},
  title        = {The Adaptive Nature of Eye Movements in Linguistic Tasks: How Payoff
                  and Architecture Shape Speed-Accuracy Trade-Offs},
  journal      = {Top. Cogn. Sci.},
  volume       = {5},
  number       = {3},
  pages        = {581--610},
  year         = {2013},
  url          = {https://doi.org/10.1111/tops.12032},
  doi          = {10.1111/TOPS.12032},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/topics/LewisSS13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/hci/FearyBCHLSS13,
  author       = {Michael Feary and
                  Dorrit Billman and
                  Xiuli Chen and
                  Andrew Howes and
                  Richard L. Lewis and
                  Lance Sherry and
                  Satinder Singh},
  editor       = {Masaaki Kurosu},
  title        = {Linking Context to Evaluation in the Design of Safety Critical Interfaces},
  booktitle    = {Human-Computer Interaction. Human-Centred Design Approaches, Methods,
                  Tools, and Environments - 15th International Conference, {HCI} International
                  2013, Las Vegas, NV, USA, July 21-26, 2013, Proceedings, Part {I}},
  series       = {Lecture Notes in Computer Science},
  volume       = {8004},
  pages        = {193--202},
  publisher    = {Springer},
  year         = {2013},
  url          = {https://doi.org/10.1007/978-3-642-39232-0\_22},
  doi          = {10.1007/978-3-642-39232-0\_22},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/hci/FearyBCHLSS13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/GuoSL13,
  author       = {Xiaoxiao Guo and
                  Satinder Singh and
                  Richard L. Lewis},
  editor       = {Christopher J. C. Burges and
                  L{\'{e}}on Bottou and
                  Zoubin Ghahramani and
                  Kilian Q. Weinberger},
  title        = {Reward Mapping for Transfer in Long-Lived Agents},
  booktitle    = {Advances in Neural Information Processing Systems 26: 27th Annual
                  Conference on Neural Information Processing Systems 2013. Proceedings
                  of a meeting held December 5-8, 2013, Lake Tahoe, Nevada, United States},
  pages        = {2130--2138},
  year         = {2013},
  url          = {https://proceedings.neurips.cc/paper/2013/hash/58c54802a9fb9526cd0923353a34a7ae-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/GuoSL13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1301-2281,
  author       = {Michael J. Kearns and
                  Michael L. Littman and
                  Satinder Singh},
  title        = {Graphical Models for Game Theory},
  journal      = {CoRR},
  volume       = {abs/1301.2281},
  year         = {2013},
  url          = {http://arxiv.org/abs/1301.2281},
  eprinttype    = {arXiv},
  eprint       = {1301.2281},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1301-2281.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1301-3867,
  author       = {Michael J. Kearns and
                  Yishay Mansour and
                  Satinder Singh},
  title        = {Fast Planning in Stochastic Games},
  journal      = {CoRR},
  volume       = {abs/1301.3867},
  year         = {2013},
  url          = {http://arxiv.org/abs/1301.3867},
  eprinttype    = {arXiv},
  eprint       = {1301.3867},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1301-3867.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1301-3892,
  author       = {Satinder Singh and
                  Michael J. Kearns and
                  Yishay Mansour},
  title        = {Nash Convergence of Gradient Dynamics in Iterated General-Sum Games},
  journal      = {CoRR},
  volume       = {abs/1301.3892},
  year         = {2013},
  url          = {http://arxiv.org/abs/1301.3892},
  eprinttype    = {arXiv},
  eprint       = {1301.3892},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1301-3892.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1301-6718,
  author       = {Yishay Mansour and
                  Satinder Singh},
  title        = {On the Complexity of Policy Iteration},
  journal      = {CoRR},
  volume       = {abs/1301.6718},
  year         = {2013},
  url          = {http://arxiv.org/abs/1301.6718},
  eprinttype    = {arXiv},
  eprint       = {1301.6718},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1301-6718.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1301-6719,
  author       = {David A. McAllester and
                  Satinder Singh},
  title        = {Approximate Planning for Factored POMDPs using Belief State Simplification},
  journal      = {CoRR},
  volume       = {abs/1301.6719},
  year         = {2013},
  url          = {http://arxiv.org/abs/1301.6719},
  eprinttype    = {arXiv},
  eprint       = {1301.6719},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1301-6719.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/aim/AgmonAAABDGGGJKKLLMMMOOOPPRRTSSSSSSUW12,
  author       = {Noa Agmon and
                  Vikas Agrawal and
                  David W. Aha and
                  Yiannis Aloimonos and
                  Donagh Buckley and
                  Prashant Doshi and
                  Christopher W. Geib and
                  Floriana Grasso and
                  Nancy L. Green and
                  Benjamin Johnston and
                  Burt Kaliski and
                  Christopher Kiekintveld and
                  Edith Law and
                  Henry Lieberman and
                  Ole J. Mengshoel and
                  Ted Metzler and
                  Joseph Modayil and
                  Douglas W. Oard and
                  Nilufer Onder and
                  Barry O'Sullivan and
                  Katerina Pastra and
                  Doina Precup and
                  Sowmya Ramachandran and
                  Chris Reed and
                  Sanem Sariel Talay and
                  Ted Selker and
                  Lokendra Shastri and
                  Stephen F. Smith and
                  Satinder Singh and
                  Siddharth Srivastava and
                  Gita Sukthankar and
                  David C. Uthus and
                  Mary{-}Anne Williams},
  title        = {Reports of the {AAAI} 2011 Conference Workshops},
  journal      = {{AI} Mag.},
  volume       = {33},
  number       = {1},
  pages        = {57--70},
  year         = {2012},
  url          = {https://doi.org/10.1609/aimag.v33i1.2390},
  doi          = {10.1609/AIMAG.V33I1.2390},
  timestamp    = {Thu, 10 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/aim/AgmonAAABDGGGJKKLLMMMOOOPPRRTSSSSSSUW12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/AnKKSSTV12,
  author       = {Bo An and
                  David Kempe and
                  Christopher Kiekintveld and
                  Eric Shieh and
                  Satinder Singh and
                  Milind Tambe and
                  Yevgeniy Vorobeychik},
  editor       = {J{\"{o}}rg Hoffmann and
                  Bart Selman},
  title        = {Security Games with Limited Surveillance},
  booktitle    = {Proceedings of the Twenty-Sixth {AAAI} Conference on Artificial Intelligence,
                  July 22-26, 2012, Toronto, Ontario, Canada},
  pages        = {1241--1248},
  publisher    = {{AAAI} Press},
  year         = {2012},
  url          = {https://doi.org/10.1609/aaai.v26i1.8236},
  doi          = {10.1609/AAAI.V26I1.8236},
  timestamp    = {Mon, 04 Sep 2023 15:56:47 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/AnKKSSTV12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/VorobeychikS12,
  author       = {Yevgeniy Vorobeychik and
                  Satinder Singh},
  editor       = {J{\"{o}}rg Hoffmann and
                  Bart Selman},
  title        = {Computing Stackelberg Equilibria in Discounted Stochastic Games},
  booktitle    = {Proceedings of the Twenty-Sixth {AAAI} Conference on Artificial Intelligence,
                  July 22-26, 2012, Toronto, Ontario, Canada},
  pages        = {1478--1484},
  publisher    = {{AAAI} Press},
  year         = {2012},
  url          = {https://doi.org/10.1609/aaai.v26i1.8234},
  doi          = {10.1609/AAAI.V26I1.8234},
  timestamp    = {Mon, 04 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/VorobeychikS12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaaiss/AnKKSSTV12,
  author       = {Bo An and
                  David Kempe and
                  Christopher Kiekintveld and
                  Eric Anyung Shieh and
                  Satinder Singh and
                  Milind Tambe and
                  Yevgeniy Vorobeychik},
  title        = {Security Games with Limited Surveillance: An Initial Report},
  booktitle    = {Game Theory for Security, Sustainability, and Health, Papers from
                  the 2012 {AAAI} Spring Symposium, Palo Alto, California, USA, March
                  26-28, 2012},
  series       = {{AAAI} Technical Report},
  volume       = {{SS-12-03}},
  publisher    = {{AAAI}},
  year         = {2012},
  url          = {http://www.aaai.org/ocs/index.php/SSS/SSS12/paper/view/4262},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaaiss/AnKKSSTV12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aamas/BratmanSSL12,
  author       = {Jeshua Bratman and
                  Satinder Singh and
                  Jonathan Sorg and
                  Richard L. Lewis},
  editor       = {Wiebe van der Hoek and
                  Lin Padgham and
                  Vincent Conitzer and
                  Michael Winikoff},
  title        = {Strong mitigation: nesting search for good policies within search
                  for good reward},
  booktitle    = {International Conference on Autonomous Agents and Multiagent Systems,
                  {AAMAS} 2012, Valencia, Spain, June 4-8, 2012 {(3} Volumes)},
  pages        = {407--414},
  publisher    = {{IFAAMAS}},
  year         = {2012},
  url          = {http://dl.acm.org/citation.cfm?id=2343634},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aamas/BratmanSSL12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aamas/DuongWSK12,
  author       = {Quang Duong and
                  Michael P. Wellman and
                  Satinder Singh and
                  Michael J. Kearns},
  editor       = {Wiebe van der Hoek and
                  Lin Padgham and
                  Vincent Conitzer and
                  Michael Winikoff},
  title        = {Learning and predicting dynamic networked behavior with graphical
                  multiagent models},
  booktitle    = {International Conference on Autonomous Agents and Multiagent Systems,
                  {AAMAS} 2012, Valencia, Spain, June 4-8, 2012 {(3} Volumes)},
  pages        = {441--448},
  publisher    = {{IFAAMAS}},
  year         = {2012},
  url          = {http://dl.acm.org/citation.cfm?id=2343639},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aamas/DuongWSK12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aamas/WitwickiCDS12,
  author       = {Stefan J. Witwicki and
                  Inn{-}Tung Chen and
                  Edmund H. Durfee and
                  Satinder Singh},
  editor       = {Wiebe van der Hoek and
                  Lin Padgham and
                  Vincent Conitzer and
                  Michael Winikoff},
  title        = {Planning and evaluating multiagent influences under reward uncertainty},
  booktitle    = {International Conference on Autonomous Agents and Multiagent Systems,
                  {AAMAS} 2012, Valencia, Spain, June 4-8, 2012 {(3} Volumes)},
  pages        = {1277--1278},
  publisher    = {{IFAAMAS}},
  year         = {2012},
  url          = {http://dl.acm.org/citation.cfm?id=2343961},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aamas/WitwickiCDS12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icdl-epirob/LiuSLQ12,
  author       = {Bingyao Liu and
                  Satinder Singh and
                  Richard L. Lewis and
                  Shiyin Qin},
  title        = {Optimal rewards in multiagent teams},
  booktitle    = {2012 {IEEE} International Conference on Development and Learning and
                  Epigenetic Robotics, {ICDL-EPIROB} 2012, San Diego, CA, USA, November
                  7-9, 2012},
  pages        = {1--8},
  publisher    = {{IEEE}},
  year         = {2012},
  url          = {https://doi.org/10.1109/DevLrn.2012.6400862},
  doi          = {10.1109/DEVLRN.2012.6400862},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icdl-epirob/LiuSLQ12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sigecom/SandholmS12,
  author       = {Tuomas Sandholm and
                  Satinder Singh},
  editor       = {Boi Faltings and
                  Kevin Leyton{-}Brown and
                  Panos Ipeirotis},
  title        = {Lossy stochastic game abstraction with bounds},
  booktitle    = {Proceedings of the 13th {ACM} Conference on Electronic Commerce, {EC}
                  2012, Valencia, Spain, June 4-8, 2012},
  pages        = {880--897},
  publisher    = {{ACM}},
  year         = {2012},
  url          = {https://doi.org/10.1145/2229012.2229079},
  doi          = {10.1145/2229012.2229079},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/sigecom/SandholmS12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1203-3518,
  author       = {Jonathan Sorg and
                  Satinder Singh and
                  Richard L. Lewis},
  title        = {Variance-Based Rewards for Approximate Bayesian Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1203.3518},
  year         = {2012},
  url          = {http://arxiv.org/abs/1203.3518},
  eprinttype    = {arXiv},
  eprint       = {1203.3518},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1203-3518.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1206-3248,
  author       = {Quang Duong and
                  Michael P. Wellman and
                  Satinder Singh},
  title        = {Knowledge Combination in Graphical Multiagent Model},
  journal      = {CoRR},
  volume       = {abs/1206.3248},
  year         = {2012},
  url          = {http://arxiv.org/abs/1206.3248},
  eprinttype    = {arXiv},
  eprint       = {1206.3248},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1206-3248.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/CavalloPS12,
  author       = {Ruggiero Cavallo and
                  David C. Parkes and
                  Satinder Singh},
  title        = {Optimal Coordinated Planning Amongst Self-Interested Agents with Private
                  State},
  journal      = {CoRR},
  volume       = {abs/1206.6820},
  year         = {2012},
  url          = {http://arxiv.org/abs/1206.6820},
  eprinttype    = {arXiv},
  eprint       = {1206.6820},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/CavalloPS12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1207-1416,
  author       = {Matthew R. Rudary and
                  Satinder Singh and
                  David Wingate},
  title        = {Predictive Linear-Gaussian Models of Stochastic Dynamical Systems},
  journal      = {CoRR},
  volume       = {abs/1207.1416},
  year         = {2012},
  url          = {http://arxiv.org/abs/1207.1416},
  eprinttype    = {arXiv},
  eprint       = {1207.1416},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1207-1416.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1207-4167,
  author       = {Satinder Singh and
                  Michael R. James and
                  Matthew R. Rudary},
  title        = {Predictive State Representations: {A} New Theory for Modeling Dynamical
                  Systems},
  journal      = {CoRR},
  volume       = {abs/1207.4167},
  year         = {2012},
  url          = {http://arxiv.org/abs/1207.4167},
  eprinttype    = {arXiv},
  eprint       = {1207.4167},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1207-4167.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@phdthesis{DBLP:phd/basesearch/Singh11,
  author       = {Satinder Pal Singh},
  title        = {{IP} Geolocation in Metropolitan Areas},
  school       = {University of Maryland, College Park, MD, {USA}},
  year         = {2011},
  url          = {https://hdl.handle.net/1903/11505},
  timestamp    = {Wed, 04 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/phd/basesearch/Singh11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jair/TalvitieS11,
  author       = {Erik Talvitie and
                  Satinder Singh},
  title        = {Learning to Make Predictions In Partially Observable Environments
                  Without a Generative Model},
  journal      = {J. Artif. Intell. Res.},
  volume       = {42},
  pages        = {353--392},
  year         = {2011},
  url          = {https://doi.org/10.1613/jair.3396},
  doi          = {10.1613/JAIR.3396},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jair/TalvitieS11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/SorgSL11,
  author       = {Jonathan Sorg and
                  Satinder Singh and
                  Richard L. Lewis},
  editor       = {Wolfram Burgard and
                  Dan Roth},
  title        = {Optimal Rewards versus Leaf-Evaluation Heuristics in Planning Agents},
  booktitle    = {Proceedings of the Twenty-Fifth {AAAI} Conference on Artificial Intelligence,
                  {AAAI} 2011, San Francisco, California, USA, August 7-11, 2011},
  pages        = {465--470},
  publisher    = {{AAAI} Press},
  year         = {2011},
  url          = {https://doi.org/10.1609/aaai.v25i1.7931},
  doi          = {10.1609/AAAI.V25I1.7931},
  timestamp    = {Mon, 04 Sep 2023 16:05:54 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/SorgSL11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/CohnDS11,
  author       = {Robert Cohn and
                  Edmund H. Durfee and
                  Satinder Singh},
  editor       = {Wolfram Burgard and
                  Dan Roth},
  title        = {Comparing Action-Query Strategies in Semi-Autonomous Agents},
  booktitle    = {Proceedings of the Twenty-Fifth {AAAI} Conference on Artificial Intelligence,
                  {AAAI} 2011, San Francisco, California, USA, August 7-11, 2011},
  pages        = {1102--1107},
  publisher    = {{AAAI} Press},
  year         = {2011},
  url          = {https://doi.org/10.1609/aaai.v25i1.7992},
  doi          = {10.1609/AAAI.V25I1.7992},
  timestamp    = {Mon, 04 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/CohnDS11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/atal/CohnDS11,
  author       = {Robert Cohn and
                  Edmund H. Durfee and
                  Satinder Singh},
  editor       = {Liz Sonenberg and
                  Peter Stone and
                  Kagan Tumer and
                  Pinar Yolum},
  title        = {Comparing action-query strategies in semi-autonomous agents},
  booktitle    = {10th International Conference on Autonomous Agents and Multiagent
                  Systems {(AAMAS} 2011), Taipei, Taiwan, May 2-6, 2011, Volume 1-3},
  pages        = {1287--1288},
  publisher    = {{IFAAMAS}},
  year         = {2011},
  url          = {http://portal.acm.org/citation.cfm?id=2034529\&\#38;CFID=69154334\&\#38;CFTOKEN=45298625},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/atal/CohnDS11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sigmetrics/SinghBLBLS11,
  author       = {Satinder Pal Singh and
                  Randolph Baden and
                  Choon Lee and
                  Bobby Bhattacharjee and
                  Richard J. La and
                  Mark A. Shayman},
  editor       = {Arif Merchant and
                  Kimberly Keeton and
                  Dan Rubenstein},
  title        = {{IP} geolocation in metropolitan areas},
  booktitle    = {{SIGMETRICS} 2011, Proceedings of the 2011 {ACM} {SIGMETRICS} International
                  Conference on Measurement and Modeling of Computer Systems, San Jose,
                  CA, USA, 07-11 June 2011 (Co-located with {FCRC} 2011)},
  pages        = {155--156},
  publisher    = {{ACM}},
  year         = {2011},
  url          = {https://doi.org/10.1145/1993744.1993803},
  doi          = {10.1145/1993744.1993803},
  timestamp    = {Sun, 01 Aug 2021 14:20:40 +0200},
  biburl       = {https://dblp.org/rec/conf/sigmetrics/SinghBLBLS11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/socialcom/DuongWS11,
  author       = {Quang Duong and
                  Michael P. Wellman and
                  Satinder Singh},
  title        = {Modeling Information Diffusion in Networks with Unobserved Links},
  booktitle    = {PASSAT/SocialCom 2011, Privacy, Security, Risk and Trust (PASSAT),
                  2011 {IEEE} Third International Conference on and 2011 {IEEE} Third
                  International Conference on Social Computing (SocialCom), Boston,
                  MA, USA, 9-11 Oct., 2011},
  pages        = {362--369},
  publisher    = {{IEEE} Computer Society},
  year         = {2011},
  url          = {https://doi.org/10.1109/PASSAT/SocialCom.2011.50},
  doi          = {10.1109/PASSAT/SOCIALCOM.2011.50},
  timestamp    = {Thu, 23 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/socialcom/DuongWS11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1106-0676,
  author       = {Michael J. Kearns and
                  Diane J. Litman and
                  Satinder Singh and
                  Marilyn A. Walker},
  title        = {Optimizing Dialogue Management with Reinforcement Learning: Experiments
                  with the NJFun System},
  journal      = {CoRR},
  volume       = {abs/1106.0676},
  year         = {2011},
  url          = {http://arxiv.org/abs/1106.0676},
  eprinttype    = {arXiv},
  eprint       = {1106.0676},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1106-0676.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1106-0678,
  author       = {Michael J. Kearns and
                  Michael L. Littman and
                  Satinder Singh and
                  Peter Stone},
  title        = {ATTac-2000: An Adaptive Autonomous Bidding Agent},
  journal      = {CoRR},
  volume       = {abs/1106.0678},
  year         = {2011},
  url          = {http://arxiv.org/abs/1106.0678},
  eprinttype    = {arXiv},
  eprint       = {1106.0678},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1106-0678.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/aim/ParkesCCS10,
  author       = {David C. Parkes and
                  Ruggiero Cavallo and
                  Florin Constantin and
                  Satinder Singh},
  title        = {Dynamic Incentive Mechanisms},
  journal      = {{AI} Mag.},
  volume       = {31},
  number       = {4},
  pages        = {79--94},
  year         = {2010},
  url          = {https://doi.org/10.1609/aimag.v31i4.2316},
  doi          = {10.1609/AIMAG.V31I4.2316},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/aim/ParkesCCS10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tamd/SinghLBS10,
  author       = {Satinder Singh and
                  Richard L. Lewis and
                  Andrew G. Barto and
                  Jonathan Sorg},
  title        = {Intrinsically Motivated Reinforcement Learning: An Evolutionary Perspective},
  journal      = {{IEEE} Trans. Auton. Ment. Dev.},
  volume       = {2},
  number       = {2},
  pages        = {70--82},
  year         = {2010},
  url          = {https://doi.org/10.1109/TAMD.2010.2051031},
  doi          = {10.1109/TAMD.2010.2051031},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tamd/SinghLBS10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/atal/SorgS10,
  author       = {Jonathan Sorg and
                  Satinder Singh},
  editor       = {Wiebe van der Hoek and
                  Gal A. Kaminka and
                  Yves Lesp{\'{e}}rance and
                  Michael Luck and
                  Sandip Sen},
  title        = {Linear options},
  booktitle    = {9th International Conference on Autonomous Agents and Multiagent Systems
                  {(AAMAS} 2010), Toronto, Canada, May 10-14, 2010, Volume 1-3},
  pages        = {31--38},
  publisher    = {{IFAAMAS}},
  year         = {2010},
  url          = {https://dl.acm.org/citation.cfm?id=1838211},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/atal/SorgS10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/atal/DuongWSV10,
  author       = {Quang Duong and
                  Michael P. Wellman and
                  Satinder Singh and
                  Yevgeniy Vorobeychik},
  editor       = {Wiebe van der Hoek and
                  Gal A. Kaminka and
                  Yves Lesp{\'{e}}rance and
                  Michael Luck and
                  Sandip Sen},
  title        = {History-dependent graphical multiagent models},
  booktitle    = {9th International Conference on Autonomous Agents and Multiagent Systems
                  {(AAMAS} 2010), Toronto, Canada, May 10-14, 2010, Volume 1-3},
  pages        = {1215--1222},
  publisher    = {{IFAAMAS}},
  year         = {2010},
  url          = {https://dl.acm.org/citation.cfm?id=1838364},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/atal/DuongWSV10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iat/CohnMDS10,
  author       = {Robert Cohn and
                  Michael Maxim and
                  Edmund H. Durfee and
                  Satinder Singh},
  editor       = {Jimmy Xiangji Huang and
                  Ali A. Ghorbani and
                  Mohand{-}Said Hacid and
                  Takahira Yamaguchi},
  title        = {Selecting Operator Queries Using Expected Myopic Gain},
  booktitle    = {Proceedings of the 2010 {IEEE/WIC/ACM} International Conference on
                  Intelligent Agent Technology, {IAT} 2010, Toronto, Canada, August
                  31 - September 3, 2010},
  pages        = {40--47},
  publisher    = {{IEEE} Computer Society Press},
  year         = {2010},
  url          = {https://doi.org/10.1109/WI-IAT.2010.142},
  doi          = {10.1109/WI-IAT.2010.142},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/iat/CohnMDS10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SorgSL10,
  author       = {Jonathan Sorg and
                  Satinder Singh and
                  Richard L. Lewis},
  editor       = {Johannes F{\"{u}}rnkranz and
                  Thorsten Joachims},
  title        = {Internal Rewards Mitigate Agent Boundedness},
  booktitle    = {Proceedings of the 27th International Conference on Machine Learning
                  (ICML-10), June 21-24, 2010, Haifa, Israel},
  pages        = {1007--1014},
  publisher    = {Omnipress},
  year         = {2010},
  url          = {https://icml.cc/Conferences/2010/papers/442.pdf},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/SorgSL10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/SorgSL10,
  author       = {Jonathan Sorg and
                  Satinder Singh and
                  Richard L. Lewis},
  editor       = {John D. Lafferty and
                  Christopher K. I. Williams and
                  John Shawe{-}Taylor and
                  Richard S. Zemel and
                  Aron Culotta},
  title        = {Reward Design via Online Gradient Ascent},
  booktitle    = {Advances in Neural Information Processing Systems 23: 24th Annual
                  Conference on Neural Information Processing Systems 2010. Proceedings
                  of a meeting held 6-9 December 2010, Vancouver, British Columbia,
                  Canada},
  pages        = {2190--2198},
  publisher    = {Curran Associates, Inc.},
  year         = {2010},
  url          = {https://proceedings.neurips.cc/paper/2010/hash/168908dd3227b8358eababa07fcaf091-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/SorgSL10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/uai/SorgSL10,
  author       = {Jonathan Sorg and
                  Satinder Singh and
                  Richard L. Lewis},
  editor       = {Peter Gr{\"{u}}nwald and
                  Peter Spirtes},
  title        = {Variance-Based Rewards for Approximate Bayesian Reinforcement Learning},
  booktitle    = {{UAI} 2010, Proceedings of the Twenty-Sixth Conference on Uncertainty
                  in Artificial Intelligence, Catalina Island, CA, USA, July 8-11, 2010},
  pages        = {564--571},
  publisher    = {{AUAI} Press},
  year         = {2010},
  url          = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1\&\#38;smnu=2\&\#38;article\_id=2150\&\#38;proceeding\_id=26},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/uai/SorgSL10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/atal/JamesS09,
  author       = {Michael R. James and
                  Satinder Singh},
  editor       = {Carles Sierra and
                  Cristiano Castelfranchi and
                  Keith S. Decker and
                  Jaime Sim{\~{a}}o Sichman},
  title        = {SarsaLandmark: an algorithm for learning in POMDPs with landmarks},
  booktitle    = {8th International Joint Conference on Autonomous Agents and Multiagent
                  Systems {(AAMAS} 2009), Budapest, Hungary, May 10-15, 2009, Volume
                  1},
  pages        = {585--591},
  publisher    = {{IFAAMAS}},
  year         = {2009},
  url          = {https://dl.acm.org/citation.cfm?id=1558094},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/atal/JamesS09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/atal/SorgS09,
  author       = {Jonathan Sorg and
                  Satinder Singh},
  editor       = {Carles Sierra and
                  Cristiano Castelfranchi and
                  Keith S. Decker and
                  Jaime Sim{\~{a}}o Sichman},
  title        = {Transfer via soft homomorphisms},
  booktitle    = {8th International Joint Conference on Autonomous Agents and Multiagent
                  Systems {(AAMAS} 2009), Budapest, Hungary, May 10-15, 2009, Volume
                  2},
  pages        = {741--748},
  publisher    = {{IFAAMAS}},
  year         = {2009},
  url          = {https://dl.acm.org/citation.cfm?id=1558114},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/atal/SorgS09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/DuongVSW09,
  author       = {Quang Duong and
                  Yevgeniy Vorobeychik and
                  Satinder Singh and
                  Michael P. Wellman},
  editor       = {Craig Boutilier},
  title        = {Learning Graphical Game Models},
  booktitle    = {{IJCAI} 2009, Proceedings of the 21st International Joint Conference
                  on Artificial Intelligence, Pasadena, California, USA, July 11-17,
                  2009},
  pages        = {116--121},
  year         = {2009},
  url          = {http://ijcai.org/Proceedings/09/Papers/030.pdf},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/DuongVSW09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/TalvitieS09,
  author       = {Erik Talvitie and
                  Satinder Singh},
  editor       = {Craig Boutilier},
  title        = {Maintaining Predictions over Time without a Model},
  booktitle    = {{IJCAI} 2009, Proceedings of the 21st International Joint Conference
                  on Artificial Intelligence, Pasadena, California, USA, July 11-17,
                  2009},
  pages        = {1249--1254},
  year         = {2009},
  url          = {http://ijcai.org/Proceedings/09/Papers/210.pdf},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/TalvitieS09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/atal/WolfeJS08,
  author       = {Britton Wolfe and
                  Michael R. James and
                  Satinder Singh},
  editor       = {Lin Padgham and
                  David C. Parkes and
                  J{\"{o}}rg P. M{\"{u}}ller and
                  Simon Parsons},
  title        = {Approximate predictive state representations},
  booktitle    = {7th International Joint Conference on Autonomous Agents and Multiagent
                  Systems {(AAMAS} 2008), Estoril, Portugal, May 12-16, 2008, Volume
                  1},
  pages        = {363--370},
  publisher    = {{IFAAMAS}},
  year         = {2008},
  url          = {https://dl.acm.org/citation.cfm?id=1402437},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/atal/WolfeJS08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/WingateS08,
  author       = {David Wingate and
                  Satinder Singh},
  editor       = {William W. Cohen and
                  Andrew McCallum and
                  Sam T. Roweis},
  title        = {Efficiently learning linear-linear exponential family predictive representations
                  of state},
  booktitle    = {Machine Learning, Proceedings of the Twenty-Fifth International Conference
                  {(ICML} 2008), Helsinki, Finland, June 5-9, 2008},
  series       = {{ACM} International Conference Proceeding Series},
  volume       = {307},
  pages        = {1176--1183},
  publisher    = {{ACM}},
  year         = {2008},
  url          = {https://doi.org/10.1145/1390156.1390304},
  doi          = {10.1145/1390156.1390304},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/WingateS08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/isaim/RudaryS08,
  author       = {Matthew R. Rudary and
                  Satinder Singh},
  title        = {Predictive Linear-Gaussian Models of Dynamical Systems with Vector-Valued
                  Actions and Observations},
  booktitle    = {International Symposium on Artificial Intelligence and Mathematics,
                  {ISAIM} 2008, Fort Lauderdale, Florida, USA, January 2-4, 2008},
  year         = {2008},
  url          = {http://isaim2008.unl.edu/PAPERS/TechnicalProgram/ISAIM2008\_0042\_8f2d860dcc7387e100135b1737766088.pdf},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/isaim/RudaryS08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/isaim/TalvitieWS08,
  author       = {Erik Talvitie and
                  Britton Wolfe and
                  Satinder Singh},
  title        = {Building Incomplete but Accurate Models},
  booktitle    = {International Symposium on Artificial Intelligence and Mathematics,
                  {ISAIM} 2008, Fort Lauderdale, Florida, USA, January 2-4, 2008},
  year         = {2008},
  url          = {http://isaim2008.unl.edu/PAPERS/TechnicalProgram/ISAIM2008\_0037\_334f1e59f8f09d4cca350159a673fd78.pdf},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/isaim/TalvitieWS08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/TalvitieS08,
  author       = {Erik Talvitie and
                  Satinder Singh},
  editor       = {Daphne Koller and
                  Dale Schuurmans and
                  Yoshua Bengio and
                  L{\'{e}}on Bottou},
  title        = {Simple Local Models for Complex Dynamical Systems},
  booktitle    = {Advances in Neural Information Processing Systems 21, Proceedings
                  of the Twenty-Second Annual Conference on Neural Information Processing
                  Systems, Vancouver, British Columbia, Canada, December 8-11, 2008},
  pages        = {1617--1624},
  publisher    = {Curran Associates, Inc.},
  year         = {2008},
  url          = {https://proceedings.neurips.cc/paper/2008/hash/f76a89f0cb91bc419542ce9fa43902dc-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/TalvitieS08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/uai/DuongWS08,
  author       = {Quang Duong and
                  Michael P. Wellman and
                  Satinder Singh},
  editor       = {David A. McAllester and
                  Petri Myllym{\"{a}}ki},
  title        = {Knowledge Combination in Graphical Multiagent Models},
  booktitle    = {{UAI} 2008, Proceedings of the 24th Conference in Uncertainty in Artificial
                  Intelligence, Helsinki, Finland, July 9-12, 2008},
  pages        = {153--160},
  publisher    = {{AUAI} Press},
  year         = {2008},
  url          = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1\&\#38;smnu=2\&\#38;article\_id=1947\&\#38;proceeding\_id=24},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/uai/DuongWS08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/VorobeychikWS07,
  author       = {Yevgeniy Vorobeychik and
                  Michael P. Wellman and
                  Satinder Singh},
  title        = {Learning payoff functions in infinite games},
  journal      = {Mach. Learn.},
  volume       = {67},
  number       = {1-2},
  pages        = {145--168},
  year         = {2007},
  url          = {https://doi.org/10.1007/s10994-007-0715-8},
  doi          = {10.1007/S10994-007-0715-8},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ml/VorobeychikWS07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/SoniS07,
  author       = {Vishal Soni and
                  Satinder Singh},
  title        = {Abstraction in Predictive State Representations},
  booktitle    = {Proceedings of the Twenty-Second {AAAI} Conference on Artificial Intelligence,
                  July 22-26, 2007, Vancouver, British Columbia, Canada},
  pages        = {639--644},
  publisher    = {{AAAI} Press},
  year         = {2007},
  url          = {http://www.aaai.org/Library/AAAI/2007/aaai07-101.php},
  timestamp    = {Tue, 05 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/SoniS07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/LiCYSJ07,
  author       = {Yunyao Li and
                  Ishan Chaudhuri and
                  Huahai Yang and
                  Satinder Singh and
                  H. V. Jagadish},
  title        = {Enabling Domain-Awareness for a Generic Natural Language Interface},
  booktitle    = {Proceedings of the Twenty-Second {AAAI} Conference on Artificial Intelligence,
                  July 22-26, 2007, Vancouver, British Columbia, Canada},
  pages        = {833--838},
  publisher    = {{AAAI} Press},
  year         = {2007},
  url          = {http://www.aaai.org/Library/AAAI/2007/aaai07-132.php},
  timestamp    = {Tue, 05 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/LiCYSJ07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/atal/SoniSW07,
  author       = {Vishal Soni and
                  Satinder Singh and
                  Michael P. Wellman},
  editor       = {Edmund H. Durfee and
                  Makoto Yokoo and
                  Michael N. Huhns and
                  Onn Shehory},
  title        = {Constraint satisfaction algorithms for graphical games},
  booktitle    = {6th International Joint Conference on Autonomous Agents and Multiagent
                  Systems {(AAMAS} 2007), Honolulu, Hawaii, USA, May 14-18, 2007},
  pages        = {67},
  publisher    = {{IFAAMAS}},
  year         = {2007},
  url          = {https://doi.org/10.1145/1329125.1329206},
  doi          = {10.1145/1329125.1329206},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/atal/SoniSW07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/atal/WingateS07,
  author       = {David Wingate and
                  Satinder Singh},
  editor       = {Edmund H. Durfee and
                  Makoto Yokoo and
                  Michael N. Huhns and
                  Onn Shehory},
  title        = {On discovery and learning of models with predictive representations
                  of state for agents with continuous actions and observations},
  booktitle    = {6th International Joint Conference on Autonomous Agents and Multiagent
                  Systems {(AAMAS} 2007), Honolulu, Hawaii, USA, May 14-18, 2007},
  pages        = {187},
  publisher    = {{IFAAMAS}},
  year         = {2007},
  url          = {https://doi.org/10.1145/1329125.1329352},
  doi          = {10.1145/1329125.1329352},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/atal/WingateS07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/TalvitieS07,
  author       = {Erik Talvitie and
                  Satinder Singh},
  editor       = {Manuela M. Veloso},
  title        = {An Experts Algorithm for Transfer Learning},
  booktitle    = {{IJCAI} 2007, Proceedings of the 20th International Joint Conference
                  on Artificial Intelligence, Hyderabad, India, January 6-12, 2007},
  pages        = {1065--1070},
  year         = {2007},
  url          = {http://ijcai.org/Proceedings/07/Papers/172.pdf},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/TalvitieS07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/WingateSWS07,
  author       = {David Wingate and
                  Vishal Soni and
                  Britton Wolfe and
                  Satinder Singh},
  editor       = {Manuela M. Veloso},
  title        = {Relational Knowledge with Predictive State Representations},
  booktitle    = {{IJCAI} 2007, Proceedings of the 20th International Joint Conference
                  on Artificial Intelligence, Hyderabad, India, January 6-12, 2007},
  pages        = {2035--2040},
  year         = {2007},
  url          = {http://ijcai.org/Proceedings/07/Papers/328.pdf},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/WingateSWS07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/WingateB07,
  author       = {David Wingate and
                  Satinder Singh},
  editor       = {John C. Platt and
                  Daphne Koller and
                  Yoram Singer and
                  Sam T. Roweis},
  title        = {Exponential Family Predictive Representations of State},
  booktitle    = {Advances in Neural Information Processing Systems 20, Proceedings
                  of the Twenty-First Annual Conference on Neural Information Processing
                  Systems, Vancouver, British Columbia, Canada, December 3-6, 2007},
  pages        = {1617--1624},
  publisher    = {Curran Associates, Inc.},
  year         = {2007},
  url          = {https://proceedings.neurips.cc/paper/2007/hash/a9a1d5317a33ae8cef33961c34144f84-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/WingateB07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sigmod/LiCYSJ07,
  author       = {Yunyao Li and
                  Ishan Chaudhuri and
                  Huahai Yang and
                  Satinder Singh and
                  H. V. Jagadish},
  editor       = {Chee Yong Chan and
                  Beng Chin Ooi and
                  Aoying Zhou},
  title        = {DaNaLIX: a domain-adaptive natural language interface for querying
                  {XML}},
  booktitle    = {Proceedings of the {ACM} {SIGMOD} International Conference on Management
                  of Data, Beijing, China, June 12-14, 2007},
  pages        = {1165--1168},
  publisher    = {{ACM}},
  year         = {2007},
  url          = {https://doi.org/10.1145/1247480.1247643},
  doi          = {10.1145/1247480.1247643},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/sigmod/LiCYSJ07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/aamas/IsbellKSSSK06,
  author       = {Charles Lee Isbell Jr. and
                  Michael J. Kearns and
                  Satinder Singh and
                  Christian R. Shelton and
                  Peter Stone and
                  David P. Kormann},
  title        = {Cobot in LambdaMOO: An Adaptive Social Statistics Agent},
  journal      = {Auton. Agents Multi Agent Syst.},
  volume       = {13},
  number       = {3},
  pages        = {327--354},
  year         = {2006},
  url          = {https://doi.org/10.1007/s10458-006-0005-z},
  doi          = {10.1007/S10458-006-0005-Z},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/aamas/IsbellKSSSK06.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/SoniS06,
  author       = {Vishal Soni and
                  Satinder Singh},
  title        = {Using Homomorphisms to Transfer Options across Continuous Reinforcement
                  Learning Domains},
  booktitle    = {Proceedings, The Twenty-First National Conference on Artificial Intelligence
                  and the Eighteenth Innovative Applications of Artificial Intelligence
                  Conference, July 16-20, 2006, Boston, Massachusetts, {USA}},
  pages        = {494--499},
  publisher    = {{AAAI} Press},
  year         = {2006},
  url          = {http://www.aaai.org/Library/AAAI/2006/aaai06-079.php},
  timestamp    = {Tue, 05 Sep 2023 09:10:47 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/SoniS06.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/WingateS06,
  author       = {David Wingate and
                  Satinder Singh},
  title        = {Mixtures of Predictive Linear Gaussian Models for Nonlinear, Stochastic
                  Dynamical Systems},
  booktitle    = {Proceedings, The Twenty-First National Conference on Artificial Intelligence
                  and the Eighteenth Innovative Applications of Artificial Intelligence
                  Conference, July 16-20, 2006, Boston, Massachusetts, {USA}},
  pages        = {524--529},
  publisher    = {{AAAI} Press},
  year         = {2006},
  url          = {http://www.aaai.org/Library/AAAI/2006/aaai06-084.php},
  timestamp    = {Tue, 05 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/WingateS06.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/RudaryS06,
  author       = {Matthew R. Rudary and
                  Satinder Singh},
  editor       = {William W. Cohen and
                  Andrew W. Moore},
  title        = {Predictive linear-Gaussian models of controlled stochastic dynamical
                  systems},
  booktitle    = {Machine Learning, Proceedings of the Twenty-Third International Conference
                  {(ICML} 2006), Pittsburgh, Pennsylvania, USA, June 25-29, 2006},
  series       = {{ACM} International Conference Proceeding Series},
  volume       = {148},
  pages        = {777--784},
  publisher    = {{ACM}},
  year         = {2006},
  url          = {https://doi.org/10.1145/1143844.1143942},
  doi          = {10.1145/1143844.1143942},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/RudaryS06.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/WingateS06,
  author       = {David Wingate and
                  Satinder Singh},
  editor       = {William W. Cohen and
                  Andrew W. Moore},
  title        = {Kernel Predictive Linear Gaussian models for nonlinear stochastic
                  dynamical systems},
  booktitle    = {Machine Learning, Proceedings of the Twenty-Third International Conference
                  {(ICML} 2006), Pittsburgh, Pennsylvania, USA, June 25-29, 2006},
  series       = {{ACM} International Conference Proceeding Series},
  volume       = {148},
  pages        = {1017--1024},
  publisher    = {{ACM}},
  year         = {2006},
  url          = {https://doi.org/10.1145/1143844.1143972},
  doi          = {10.1145/1143844.1143972},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/WingateS06.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/WolfeS06,
  author       = {Britton Wolfe and
                  Satinder Singh},
  editor       = {William W. Cohen and
                  Andrew W. Moore},
  title        = {Predictive state representations with options},
  booktitle    = {Machine Learning, Proceedings of the Twenty-Third International Conference
                  {(ICML} 2006), Pittsburgh, Pennsylvania, USA, June 25-29, 2006},
  series       = {{ACM} International Conference Proceeding Series},
  volume       = {148},
  pages        = {1025--1032},
  publisher    = {{ACM}},
  year         = {2006},
  url          = {https://doi.org/10.1145/1143844.1143973},
  doi          = {10.1145/1143844.1143973},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/WolfeS06.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/uai/CavalloPS06,
  author       = {Ruggiero Cavallo and
                  David C. Parkes and
                  Satinder Singh},
  title        = {Optimal Coordinated Planning Amongst Self-Interested Agents with Private
                  State},
  booktitle    = {{UAI} '06, Proceedings of the 22nd Conference in Uncertainty in Artificial
                  Intelligence, Cambridge, MA, USA, July 13-16, 2006},
  publisher    = {{AUAI} Press},
  year         = {2006},
  url          = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1\&\#38;smnu=2\&\#38;article\_id=1300\&\#38;proceeding\_id=22},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/uai/CavalloPS06.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/aim/CassimatisLLGKEBSDLMSSLSAD05,
  author       = {Nicholas L. Cassimatis and
                  Sean Luke and
                  Simon D. Levy and
                  Ross W. Gayler and
                  Pentti Kanerva and
                  Chris Eliasmith and
                  Timothy W. Bickmore and
                  Alan C. Schultz and
                  Randall Davis and
                  James A. Landay and
                  Robert C. Miller and
                  Eric Saund and
                  Thomas F. Stahovich and
                  Michael L. Littman and
                  Satinder Singh and
                  Shlomo Argamon and
                  Shlomo Dubnov},
  title        = {Reports on the 2004 {AAAI} Fall Symposia},
  journal      = {{AI} Mag.},
  volume       = {26},
  number       = {1},
  pages        = {98--102},
  year         = {2005},
  url          = {https://doi.org/10.1609/aimag.v26i1.1805},
  doi          = {10.1609/AIMAG.V26I1.1805},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/aim/CassimatisLLGKEBSDLMSSLSAD05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ci/WellmanESVKS05,
  author       = {Michael P. Wellman and
                  Joshua Estelle and
                  Satinder Singh and
                  Yevgeniy Vorobeychik and
                  Christopher Kiekintveld and
                  Vishal Soni},
  title        = {Strategic Interactions in a Supply Chain Game},
  journal      = {Comput. Intell.},
  volume       = {21},
  number       = {1},
  pages        = {1--26},
  year         = {2005},
  url          = {https://doi.org/10.1111/j.0824-7935.2005.00263.x},
  doi          = {10.1111/J.0824-7935.2005.00263.X},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ci/WellmanESVKS05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/JamesS05,
  author       = {Michael R. James and
                  Satinder Singh},
  editor       = {Manuela M. Veloso and
                  Subbarao Kambhampati},
  title        = {Planning in Models that Combine Memory with Predictive Representations
                  of State},
  booktitle    = {Proceedings, The Twentieth National Conference on Artificial Intelligence
                  and the Seventeenth Innovative Applications of Artificial Intelligence
                  Conference, July 9-13, 2005, Pittsburgh, Pennsylvania, {USA}},
  pages        = {987--992},
  publisher    = {{AAAI} Press / The {MIT} Press},
  year         = {2005},
  url          = {http://www.aaai.org/Library/AAAI/2005/aaai05-156.php},
  timestamp    = {Tue, 05 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/JamesS05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/WolfeJS05,
  author       = {Britton Wolfe and
                  Michael R. James and
                  Satinder Singh},
  editor       = {Luc De Raedt and
                  Stefan Wrobel},
  title        = {Learning predictive state representations in dynamical systems without
                  reset},
  booktitle    = {Machine Learning, Proceedings of the Twenty-Second International Conference
                  {(ICML} 2005), Bonn, Germany, August 7-11, 2005},
  series       = {{ACM} International Conference Proceeding Series},
  volume       = {119},
  pages        = {980--987},
  publisher    = {{ACM}},
  year         = {2005},
  url          = {https://doi.org/10.1145/1102351.1102475},
  doi          = {10.1145/1102351.1102475},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/WolfeJS05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/JamesWS05,
  author       = {Michael R. James and
                  Britton Wolfe and
                  Satinder Singh},
  editor       = {Leslie Pack Kaelbling and
                  Alessandro Saffiotti},
  title        = {Combining Memory and Landmarks with Predictive State Representations},
  booktitle    = {IJCAI-05, Proceedings of the Nineteenth International Joint Conference
                  on Artificial Intelligence, Edinburgh, Scotland, UK, July 30 - August
                  5, 2005},
  pages        = {734--739},
  publisher    = {Professional Book Center},
  year         = {2005},
  url          = {http://ijcai.org/Proceedings/05/Papers/1621.pdf},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/JamesWS05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/VorobeychikWS05,
  author       = {Yevgeniy Vorobeychik and
                  Michael P. Wellman and
                  Satinder Singh},
  editor       = {Leslie Pack Kaelbling and
                  Alessandro Saffiotti},
  title        = {Learning Payoff Functions in Infinite Games},
  booktitle    = {IJCAI-05, Proceedings of the Nineteenth International Joint Conference
                  on Artificial Intelligence, Edinburgh, Scotland, UK, July 30 - August
                  5, 2005},
  pages        = {977--982},
  publisher    = {Professional Book Center},
  year         = {2005},
  url          = {http://ijcai.org/Proceedings/05/Papers/0527.pdf},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/VorobeychikWS05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/PrecupSPKS05,
  author       = {Doina Precup and
                  Richard S. Sutton and
                  Cosmin Paduraru and
                  Anna Koop and
                  Satinder Singh},
  title        = {Off-policy Learning with Options and Recognizers},
  booktitle    = {Advances in Neural Information Processing Systems 18 [Neural Information
                  Processing Systems, {NIPS} 2005, December 5-8, 2005, Vancouver, British
                  Columbia, Canada]},
  pages        = {1097--1104},
  year         = {2005},
  url          = {https://proceedings.neurips.cc/paper/2005/hash/f75526659f31040afeb61cb7133e4e6d-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/PrecupSPKS05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/uai/RudarySW05,
  author       = {Matthew R. Rudary and
                  Satinder Singh and
                  David Wingate},
  title        = {Predictive Linear-Gaussian Models of Stochastic Dynamical Systems},
  booktitle    = {{UAI} '05, Proceedings of the 21st Conference in Uncertainty in Artificial
                  Intelligence, Edinburgh, Scotland, July 26-29, 2005},
  pages        = {501--508},
  publisher    = {{AUAI} Press},
  year         = {2005},
  url          = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1\&\#38;smnu=2\&\#38;article\_id=1187\&\#38;proceeding\_id=21},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/uai/RudarySW05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/sigecom/KiekintveldWSS04,
  author       = {Christopher Kiekintveld and
                  Michael P. Wellman and
                  Satinder Singh and
                  Vishal Soni},
  title        = {Value-driven procurement in the {TAC} supply chain game},
  journal      = {SIGecom Exch.},
  volume       = {4},
  number       = {3},
  pages        = {9--18},
  year         = {2004},
  url          = {https://doi.org/10.1145/1120701.1120704},
  doi          = {10.1145/1120701.1120704},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/sigecom/KiekintveldWSS04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaaifs/VorobeychikWS04,
  author       = {Yevgeniy Vorobeychik and
                  Michael P. Wellman and
                  Satinder Singh},
  title        = {Learning Payoff Functions in Infinite Games},
  booktitle    = {Artificial Multiagent Learning, Papers from the 2004 {AAAI} Fall Symposium.
                  Arlington, VA, USA, October 22-24, 2004},
  volume       = {{FS-04-02}},
  pages        = {60--65},
  publisher    = {{AAAI} Press},
  year         = {2004},
  url          = {https://www.aaai.org/Library/Symposia/Fall/2004/fs04-02-008.php},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaaifs/VorobeychikWS04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aips/KiekintveldWSEVSR04,
  author       = {Christopher Kiekintveld and
                  Michael P. Wellman and
                  Satinder Singh and
                  Joshua Estelle and
                  Yevgeniy Vorobeychik and
                  Vishal Soni and
                  Matthew R. Rudary},
  editor       = {Shlomo Zilberstein and
                  Jana Koehler and
                  Sven Koenig},
  title        = {Distributed Feedback Control for Decision Making on Supply Chains},
  booktitle    = {Proceedings of the Fourteenth International Conference on Automated
                  Planning and Scheduling {(ICAPS} 2004), June 3-7 2004, Whistler, British
                  Columbia, Canada},
  pages        = {384--392},
  publisher    = {{AAAI}},
  year         = {2004},
  url          = {http://www.aaai.org/Library/ICAPS/2004/icaps04-045.php},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aips/KiekintveldWSEVSR04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cg/EstelleVWSKS04,
  author       = {Joshua Estelle and
                  Yevgeniy Vorobeychik and
                  Michael P. Wellman and
                  Satinder Singh and
                  Christopher Kiekintveld and
                  Vishal Soni},
  editor       = {H. Jaap van den Herik and
                  Yngvi Bj{\"{o}}rnsson and
                  Nathan S. Netanyahu},
  title        = {Strategic Interactions in the {TAC} 2003 Supply Chain Tournament},
  booktitle    = {Computers and Games, 4th International Conference, {CG} 2004, Ramat-Gan,
                  Israel, July 5-7, 2004, Revised Papers},
  series       = {Lecture Notes in Computer Science},
  volume       = {3846},
  pages        = {316--331},
  publisher    = {Springer},
  year         = {2004},
  url          = {https://doi.org/10.1007/11674399\_22},
  doi          = {10.1007/11674399\_22},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/cg/EstelleVWSKS04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/JamesS04,
  author       = {Michael R. James and
                  Satinder Singh},
  editor       = {Carla E. Brodley},
  title        = {Learning and discovery of predictive state representations in dynamical
                  systems with reset},
  booktitle    = {Machine Learning, Proceedings of the Twenty-first International Conference
                  {(ICML} 2004), Banff, Alberta, Canada, July 4-8, 2004},
  series       = {{ACM} International Conference Proceeding Series},
  volume       = {69},
  publisher    = {{ACM}},
  year         = {2004},
  url          = {https://doi.org/10.1145/1015330.1015359},
  doi          = {10.1145/1015330.1015359},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/JamesS04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/RudarySP04,
  author       = {Matthew R. Rudary and
                  Satinder Singh and
                  Martha E. Pollack},
  editor       = {Carla E. Brodley},
  title        = {Adaptive cognitive orthotics: combining reinforcement learning and
                  constraint-based temporal reasoning},
  booktitle    = {Machine Learning, Proceedings of the Twenty-first International Conference
                  {(ICML} 2004), Banff, Alberta, Canada, July 4-8, 2004},
  series       = {{ACM} International Conference Proceeding Series},
  volume       = {69},
  publisher    = {{ACM}},
  year         = {2004},
  url          = {https://doi.org/10.1145/1015330.1015411},
  doi          = {10.1145/1015330.1015411},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/RudarySP04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icmla/JamesSL04,
  author       = {Michael R. James and
                  Satinder Singh and
                  Michael L. Littman},
  editor       = {Mehmed M. Kantardzic and
                  Mariofanna G. Milanova and
                  Olfa Nasraoui},
  title        = {Planning with predictive state representations},
  booktitle    = {Proceedings of the 2004 International Conference on Machine Learning
                  and Applications - {ICMLA} 2004, 16-18 December 2004, Louisville,
                  KY, {USA}},
  pages        = {304--311},
  publisher    = {{IEEE} Computer Society},
  year         = {2004},
  url          = {https://doi.org/10.1109/ICMLA.2004.1383528},
  doi          = {10.1109/ICMLA.2004.1383528},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icmla/JamesSL04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/ParkesSY04,
  author       = {David C. Parkes and
                  Satinder Singh and
                  Dimah Yanovsky},
  title        = {Approximately Efficient Online Mechanism Design},
  booktitle    = {Advances in Neural Information Processing Systems 17 [Neural Information
                  Processing Systems, {NIPS} 2004, December 13-18, 2004, Vancouver,
                  British Columbia, Canada]},
  pages        = {1049--1056},
  year         = {2004},
  url          = {https://proceedings.neurips.cc/paper/2004/hash/fc03d48253286a798f5116ec00e99b2b-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/ParkesSY04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/SinghBC04,
  author       = {Satinder Singh and
                  Andrew G. Barto and
                  Nuttapong Chentanez},
  title        = {Intrinsically Motivated Reinforcement Learning},
  booktitle    = {Advances in Neural Information Processing Systems 17 [Neural Information
                  Processing Systems, {NIPS} 2004, December 13-18, 2004, Vancouver,
                  British Columbia, Canada]},
  pages        = {1281--1288},
  year         = {2004},
  url          = {https://proceedings.neurips.cc/paper/2004/hash/4be5a36cbaca8ab9d2066debfe4e65c1-Abstract.html},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/SinghBC04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sigecom/SinghSW04,
  author       = {Satinder Singh and
                  Vishal Soni and
                  Michael P. Wellman},
  editor       = {Jack S. Breese and
                  Joan Feigenbaum and
                  Margo I. Seltzer},
  title        = {Computing approximate bayes-nash equilibria in tree-games of incomplete
                  information},
  booktitle    = {Proceedings 5th {ACM} Conference on Electronic Commerce (EC-2004),
                  New York, NY, USA, May 17-20, 2004},
  pages        = {81--90},
  publisher    = {{ACM}},
  year         = {2004},
  url          = {https://doi.org/10.1145/988772.988785},
  doi          = {10.1145/988772.988785},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/sigecom/SinghSW04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/uai/SinghJR04,
  author       = {Satinder Singh and
                  Michael R. James and
                  Matthew R. Rudary},
  editor       = {David Maxwell Chickering and
                  Joseph Y. Halpern},
  title        = {Predictive State Representations: {A} New Theory for Modeling Dynamical
                  Systems},
  booktitle    = {{UAI} '04, Proceedings of the 20th Conference in Uncertainty in Artificial
                  Intelligence, Banff, Canada, July 7-11, 2004},
  pages        = {512--518},
  publisher    = {{AUAI} Press},
  year         = {2004},
  url          = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1\&\#38;smnu=2\&\#38;article\_id=1148\&\#38;proceeding\_id=20},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/uai/SinghJR04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SinghLJPS03,
  author       = {Satinder Singh and
                  Michael L. Littman and
                  Nicholas K. Jong and
                  David Pardoe and
                  Peter Stone},
  editor       = {Tom Fawcett and
                  Nina Mishra},
  title        = {Learning Predictive State Representations},
  booktitle    = {Machine Learning, Proceedings of the Twentieth International Conference
                  {(ICML} 2003), August 21-24, 2003, Washington, DC, {USA}},
  pages        = {712--719},
  publisher    = {{AAAI} Press},
  year         = {2003},
  url          = {http://www.aaai.org/Library/ICML/2003/icml03-093.php},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/SinghLJPS03.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/ParkesS03,
  author       = {David C. Parkes and
                  Satinder Singh},
  editor       = {Sebastian Thrun and
                  Lawrence K. Saul and
                  Bernhard Sch{\"{o}}lkopf},
  title        = {An MDP-Based Approach to Online Mechanism Design},
  booktitle    = {Advances in Neural Information Processing Systems 16 [Neural Information
                  Processing Systems, {NIPS} 2003, December 8-13, 2003, Vancouver and
                  Whistler, British Columbia, Canada]},
  pages        = {791--798},
  publisher    = {{MIT} Press},
  year         = {2003},
  url          = {https://proceedings.neurips.cc/paper/2003/hash/d16509f6eaca1022bd8f28d6bc582cae-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/ParkesS03.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/RudaryS03,
  author       = {Matthew R. Rudary and
                  Satinder Singh},
  editor       = {Sebastian Thrun and
                  Lawrence K. Saul and
                  Bernhard Sch{\"{o}}lkopf},
  title        = {A Nonlinear Predictive State Representation},
  booktitle    = {Advances in Neural Information Processing Systems 16 [Neural Information
                  Processing Systems, {NIPS} 2003, December 8-13, 2003, Vancouver and
                  Whistler, British Columbia, Canada]},
  pages        = {855--862},
  publisher    = {{MIT} Press},
  year         = {2003},
  url          = {https://proceedings.neurips.cc/paper/2003/hash/72e6d3238361fe70f22fb0ac624a7072-Abstract.html},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/RudaryS03.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jair/SinghLKW02,
  author       = {Satinder Singh and
                  Diane J. Litman and
                  Michael J. Kearns and
                  Marilyn A. Walker},
  title        = {Optimizing Dialogue Management with Reinforcement Learning: Experiments
                  with the NJFun System},
  journal      = {J. Artif. Intell. Res.},
  volume       = {16},
  pages        = {105--133},
  year         = {2002},
  url          = {https://doi.org/10.1613/jair.859},
  doi          = {10.1613/JAIR.859},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jair/SinghLKW02.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/Singh02,
  author       = {Satinder Singh},
  title        = {Introduction},
  journal      = {Mach. Learn.},
  volume       = {49},
  number       = {2-3},
  pages        = {107--109},
  year         = {2002},
  url          = {https://doi.org/10.1023/A:1017917511082},
  doi          = {10.1023/A:1017917511082},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ml/Singh02.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/KearnsS02,
  author       = {Michael J. Kearns and
                  Satinder Singh},
  title        = {Near-Optimal Reinforcement Learning in Polynomial Time},
  journal      = {Mach. Learn.},
  volume       = {49},
  number       = {2-3},
  pages        = {209--232},
  year         = {2002},
  url          = {https://doi.org/10.1023/A:1017984413808},
  doi          = {10.1023/A:1017984413808},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ml/KearnsS02.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/KearnsISLH02,
  author       = {Michael J. Kearns and
                  Charles Lee Isbell Jr. and
                  Satinder Singh and
                  Diane J. Litman and
                  Jessica Howe},
  editor       = {Rina Dechter and
                  Michael J. Kearns and
                  Richard S. Sutton},
  title        = {CobotDS: {A} Spoken Dialogue System for Chat},
  booktitle    = {Proceedings of the Eighteenth National Conference on Artificial Intelligence
                  and Fourteenth Conference on Innovative Applications of Artificial
                  Intelligence, July 28 - August 1, 2002, Edmonton, Alberta, Canada},
  pages        = {425--430},
  publisher    = {{AAAI} Press / The {MIT} Press},
  year         = {2002},
  url          = {http://www.aaai.org/Library/AAAI/2002/aaai02-065.php},
  timestamp    = {Tue, 05 Sep 2023 09:10:47 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/KearnsISLH02.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jair/StoneLSK01,
  author       = {Peter Stone and
                  Michael L. Littman and
                  Satinder Singh and
                  Michael J. Kearns},
  title        = {ATTac-2000: An Adaptive Autonomous Bidding Agent},
  journal      = {J. Artif. Intell. Res.},
  volume       = {15},
  pages        = {189--206},
  year         = {2001},
  url          = {https://doi.org/10.1613/jair.865},
  doi          = {10.1613/JAIR.865},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jair/StoneLSK01.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/agents/StoneLSK01,
  author       = {Peter Stone and
                  Michael L. Littman and
                  Satinder Singh and
                  Michael J. Kearns},
  editor       = {Elisabeth Andr{\'{e}} and
                  Sandip Sen and
                  Claude Frasson and
                  J{\"{o}}rg P. M{\"{u}}ller},
  title        = {ATTac-2000: an adaptive autonomous bidding agent},
  booktitle    = {Proceedings of the Fifth International Conference on Autonomous Agents,
                  {AGENTS} 2001, Montreal, Canada, May 28 - June 1, 2001},
  pages        = {238--245},
  publisher    = {{ACM}},
  year         = {2001},
  url          = {https://doi.org/10.1145/375735.376301},
  doi          = {10.1145/375735.376301},
  timestamp    = {Sat, 30 Sep 2023 09:33:47 +0200},
  biburl       = {https://dblp.org/rec/conf/agents/StoneLSK01.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/agents/IsbellSKSS01,
  author       = {Charles Lee Isbell Jr. and
                  Christian R. Shelton and
                  Michael J. Kearns and
                  Satinder Singh and
                  Peter Stone},
  editor       = {Elisabeth Andr{\'{e}} and
                  Sandip Sen and
                  Claude Frasson and
                  J{\"{o}}rg P. M{\"{u}}ller},
  title        = {A social reinforcement learning agent},
  booktitle    = {Proceedings of the Fifth International Conference on Autonomous Agents,
                  {AGENTS} 2001, Montreal, Canada, May 28 - June 1, 2001},
  pages        = {377--384},
  publisher    = {{ACM}},
  year         = {2001},
  url          = {https://doi.org/10.1145/375735.376334},
  doi          = {10.1145/375735.376334},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/agents/IsbellSKSS01.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/LittmanKS01,
  author       = {Michael L. Littman and
                  Michael J. Kearns and
                  Satinder Singh},
  editor       = {Thomas G. Dietterich and
                  Suzanna Becker and
                  Zoubin Ghahramani},
  title        = {An Efficient, Exact Algorithm for Solving Tree-Structured Graphical
                  Games},
  booktitle    = {Advances in Neural Information Processing Systems 14 [Neural Information
                  Processing Systems: Natural and Synthetic, {NIPS} 2001, December 3-8,
                  2001, Vancouver, British Columbia, Canada]},
  pages        = {817--823},
  publisher    = {{MIT} Press},
  year         = {2001},
  url          = {https://proceedings.neurips.cc/paper/2001/hash/c5866e93cab1776890fe343c9e7063fb-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/LittmanKS01.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/IsbellSKSS01,
  author       = {Charles Lee Isbell Jr. and
                  Christian R. Shelton and
                  Michael J. Kearns and
                  Satinder Singh and
                  Peter Stone},
  editor       = {Thomas G. Dietterich and
                  Suzanna Becker and
                  Zoubin Ghahramani},
  title        = {Cobot: {A} Social Reinforcement Learning Agent},
  booktitle    = {Advances in Neural Information Processing Systems 14 [Neural Information
                  Processing Systems: Natural and Synthetic, {NIPS} 2001, December 3-8,
                  2001, Vancouver, British Columbia, Canada]},
  pages        = {1393--1400},
  publisher    = {{MIT} Press},
  year         = {2001},
  url          = {https://proceedings.neurips.cc/paper/2001/hash/92bbd31f8e0e43a7da8a6295b251725f-Abstract.html},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/IsbellSKSS01.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/LittmanSS01,
  author       = {Michael L. Littman and
                  Richard S. Sutton and
                  Satinder Singh},
  editor       = {Thomas G. Dietterich and
                  Suzanna Becker and
                  Zoubin Ghahramani},
  title        = {Predictive Representations of State},
  booktitle    = {Advances in Neural Information Processing Systems 14 [Neural Information
                  Processing Systems: Natural and Synthetic, {NIPS} 2001, December 3-8,
                  2001, Vancouver, British Columbia, Canada]},
  pages        = {1555--1561},
  publisher    = {{MIT} Press},
  year         = {2001},
  url          = {https://proceedings.neurips.cc/paper/2001/hash/1e4d36177d71bbb3558e43af9577d70e-Abstract.html},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/LittmanSS01.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/uai/KearnsLS01,
  author       = {Michael J. Kearns and
                  Michael L. Littman and
                  Satinder Singh},
  editor       = {Jack S. Breese and
                  Daphne Koller},
  title        = {Graphical Models for Game Theory},
  booktitle    = {{UAI} '01: Proceedings of the 17th Conference in Uncertainty in Artificial
                  Intelligence, University of Washington, Seattle, Washington, USA,
                  August 2-5, 2001},
  pages        = {253--260},
  publisher    = {Morgan Kaufmann},
  year         = {2001},
  url          = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1\&\#38;smnu=2\&\#38;article\_id=107\&\#38;proceeding\_id=17},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/uai/KearnsLS01.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/welcom/CsirikLSS01,
  author       = {J{\'{a}}nos A. Csirik and
                  Michael L. Littman and
                  Satinder Singh and
                  Peter Stone},
  editor       = {Ludger Fiege and
                  Gero M{\"{u}}hl and
                  Uwe G. Wilhelm},
  title        = {FAucS : An {FCC} Spectrum Auction Simulator for Autonomous Bidding
                  Agents},
  booktitle    = {Electronic Commerce, Second International Workshop, {WELCOM} 2001
                  Heidelberg, Germany, November 16-17, 2001, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {2232},
  pages        = {139--151},
  publisher    = {Springer},
  year         = {2001},
  url          = {https://doi.org/10.1007/3-540-45598-1\_14},
  doi          = {10.1007/3-540-45598-1\_14},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/welcom/CsirikLSS01.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/SinghJLS00,
  author       = {Satinder Singh and
                  Tommi S. Jaakkola and
                  Michael L. Littman and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Convergence Results for Single-Step On-Policy Reinforcement-Learning
                  Algorithms},
  journal      = {Mach. Learn.},
  volume       = {38},
  number       = {3},
  pages        = {287--308},
  year         = {2000},
  url          = {https://doi.org/10.1023/A:1007678930559},
  doi          = {10.1023/A:1007678930559},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ml/SinghJLS00.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/IsbellKKSS00,
  author       = {Charles Lee Isbell Jr. and
                  Michael J. Kearns and
                  David P. Kormann and
                  Satinder Singh and
                  Peter Stone},
  editor       = {Henry A. Kautz and
                  Bruce W. Porter},
  title        = {Cobot in LambdaMOO: {A} Social Statistics Agent},
  booktitle    = {Proceedings of the Seventeenth National Conference on Artificial Intelligence
                  and Twelfth Conference on on Innovative Applications of Artificial
                  Intelligence, July 30 - August 3, 2000, Austin, Texas, {USA}},
  pages        = {36--41},
  publisher    = {{AAAI} Press / The {MIT} Press},
  year         = {2000},
  url          = {http://www.aaai.org/Library/AAAI/2000/aaai00-006.php},
  timestamp    = {Tue, 05 Sep 2023 09:10:47 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/IsbellKKSS00.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/SinghKLW00,
  author       = {Satinder Singh and
                  Michael J. Kearns and
                  Diane J. Litman and
                  Marilyn A. Walker},
  editor       = {Henry A. Kautz and
                  Bruce W. Porter},
  title        = {Empirical Evaluation of a Reinforcement Learning Spoken Dialogue System},
  booktitle    = {Proceedings of the Seventeenth National Conference on Artificial Intelligence
                  and Twelfth Conference on on Innovative Applications of Artificial
                  Intelligence, July 30 - August 3, 2000, Austin, Texas, {USA}},
  pages        = {645--651},
  publisher    = {{AAAI} Press / The {MIT} Press},
  year         = {2000},
  url          = {http://www.aaai.org/Library/AAAI/2000/aaai00-099.php},
  timestamp    = {Tue, 05 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/SinghKLW00.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/coling/LitmanKSW00,
  author       = {Diane J. Litman and
                  Michael S. Kearns and
                  Satinder Singh and
                  Marilyn A. Walker},
  title        = {Automatic Optimization of Dialogue Management},
  booktitle    = {{COLING} 2000, 18th International Conference on Computational Linguistics,
                  Proceedings of the Conference, 2 Volumes, July 31 - August 4, 2000,
                  Universit{\"{a}}t des Saarlandes, Saarbr{\"{u}}cken, Germany},
  pages        = {502--508},
  publisher    = {Morgan Kaufmann},
  year         = {2000},
  url          = {https://aclanthology.org/C00-1073/},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/coling/LitmanKSW00.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/colt/KearnsS00,
  author       = {Michael J. Kearns and
                  Satinder Singh},
  editor       = {Nicol{\`{o}} Cesa{-}Bianchi and
                  Sally A. Goldman},
  title        = {Bias-Variance Error Bounds for Temporal Difference Updates},
  booktitle    = {Proceedings of the Thirteenth Annual Conference on Computational Learning
                  Theory {(COLT} 2000), June 28 - July 1, 2000, Palo Alto, California,
                  {USA}},
  pages        = {142--147},
  publisher    = {Morgan Kaufmann},
  year         = {2000},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/colt/KearnsS00.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/MyersKSW00,
  author       = {Kary L. Myers and
                  Michael J. Kearns and
                  Satinder Singh and
                  Marilyn A. Walker},
  editor       = {Pat Langley},
  title        = {A Boosting Approach to Topic Spotting on Subdialogues},
  booktitle    = {Proceedings of the Seventeenth International Conference on Machine
                  Learning {(ICML} 2000), Stanford University, Stanford, CA, USA, June
                  29 - July 2, 2000},
  pages        = {655--662},
  publisher    = {Morgan Kaufmann},
  year         = {2000},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/MyersKSW00.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/PrecupSS00,
  author       = {Doina Precup and
                  Richard S. Sutton and
                  Satinder Singh},
  editor       = {Pat Langley},
  title        = {Eligibility Traces for Off-Policy Policy Evaluation},
  booktitle    = {Proceedings of the Seventeenth International Conference on Machine
                  Learning {(ICML} 2000), Stanford University, Stanford, CA, USA, June
                  29 - July 2, 2000},
  pages        = {759--766},
  publisher    = {Morgan Kaufmann},
  year         = {2000},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/PrecupSS00.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/robocup/StoneSS00,
  author       = {Peter Stone and
                  Richard S. Sutton and
                  Satinder Singh},
  editor       = {Peter Stone and
                  Tucker R. Balch and
                  Gerhard K. Kraetzschmar},
  title        = {Reinforcement Learning for 3 vs. 2 Keepaway},
  booktitle    = {RoboCup 2000: Robot Soccer World Cup {IV}},
  series       = {Lecture Notes in Computer Science},
  volume       = {2019},
  pages        = {249--258},
  publisher    = {Springer},
  year         = {2000},
  url          = {https://doi.org/10.1007/3-540-45324-5\_23},
  doi          = {10.1007/3-540-45324-5\_23},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/robocup/StoneSS00.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/uai/KearnsMS00,
  author       = {Michael J. Kearns and
                  Yishay Mansour and
                  Satinder Singh},
  editor       = {Craig Boutilier and
                  Mois{\'{e}}s Goldszmidt},
  title        = {Fast Planning in Stochastic Games},
  booktitle    = {{UAI} '00: Proceedings of the 16th Conference in Uncertainty in Artificial
                  Intelligence, Stanford University, Stanford, California, USA, June
                  30 - July 3, 2000},
  pages        = {309--316},
  publisher    = {Morgan Kaufmann},
  year         = {2000},
  url          = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1\&\#38;smnu=2\&\#38;article\_id=37\&\#38;proceeding\_id=16},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/uai/KearnsMS00.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/uai/SinghKM00,
  author       = {Satinder Singh and
                  Michael J. Kearns and
                  Yishay Mansour},
  editor       = {Craig Boutilier and
                  Mois{\'{e}}s Goldszmidt},
  title        = {Nash Convergence of Gradient Dynamics in General-Sum Games},
  booktitle    = {{UAI} '00: Proceedings of the 16th Conference in Uncertainty in Artificial
                  Intelligence, Stanford University, Stanford, California, USA, June
                  30 - July 3, 2000},
  pages        = {541--548},
  publisher    = {Morgan Kaufmann},
  year         = {2000},
  url          = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1\&\#38;smnu=2\&\#38;article\_id=63\&\#38;proceeding\_id=16},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/uai/SinghKM00.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ai/SuttonPS99,
  author       = {Richard S. Sutton and
                  Doina Precup and
                  Satinder Singh},
  title        = {Between MDPs and Semi-MDPs: {A} Framework for Temporal Abstraction
                  in Reinforcement Learning},
  journal      = {Artif. Intell.},
  volume       = {112},
  number       = {1-2},
  pages        = {181--211},
  year         = {1999},
  url          = {https://doi.org/10.1016/S0004-3702(99)00052-1},
  doi          = {10.1016/S0004-3702(99)00052-1},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ai/SuttonPS99.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/SinghKLW99,
  author       = {Satinder Singh and
                  Michael J. Kearns and
                  Diane J. Litman and
                  Marilyn A. Walker},
  editor       = {Sara A. Solla and
                  Todd K. Leen and
                  Klaus{-}Robert M{\"{u}}ller},
  title        = {Reinforcement Learning for Spoken Dialogue Systems},
  booktitle    = {Advances in Neural Information Processing Systems 12, {[NIPS} Conference,
                  Denver, Colorado, USA, November 29 - December 4, 1999]},
  pages        = {956--962},
  publisher    = {The {MIT} Press},
  year         = {1999},
  url          = {http://papers.nips.cc/paper/1775-reinforcement-learning-for-spoken-dialogue-systems},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/SinghKLW99.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/SuttonMSM99,
  author       = {Richard S. Sutton and
                  David A. McAllester and
                  Satinder Singh and
                  Yishay Mansour},
  editor       = {Sara A. Solla and
                  Todd K. Leen and
                  Klaus{-}Robert M{\"{u}}ller},
  title        = {Policy Gradient Methods for Reinforcement Learning with Function Approximation},
  booktitle    = {Advances in Neural Information Processing Systems 12, {[NIPS} Conference,
                  Denver, Colorado, USA, November 29 - December 4, 1999]},
  pages        = {1057--1063},
  publisher    = {The {MIT} Press},
  year         = {1999},
  url          = {http://papers.nips.cc/paper/1713-policy-gradient-methods-for-reinforcement-learning-with-function-approximation},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/SuttonMSM99.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/uai/MansourS99,
  author       = {Yishay Mansour and
                  Satinder Singh},
  editor       = {Kathryn B. Laskey and
                  Henri Prade},
  title        = {On the Complexity of Policy Iteration},
  booktitle    = {{UAI} '99: Proceedings of the Fifteenth Conference on Uncertainty
                  in Artificial Intelligence, Stockholm, Sweden, July 30 - August 1,
                  1999},
  pages        = {401--408},
  publisher    = {Morgan Kaufmann},
  year         = {1999},
  url          = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1\&\#38;smnu=2\&\#38;article\_id=192\&\#38;proceeding\_id=15},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/uai/MansourS99.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/uai/McAllesterS99,
  author       = {David A. McAllester and
                  Satinder Singh},
  editor       = {Kathryn B. Laskey and
                  Henri Prade},
  title        = {Approximate Planning for Factored POMDPs using Belief State Simplification},
  booktitle    = {{UAI} '99: Proceedings of the Fifteenth Conference on Uncertainty
                  in Artificial Intelligence, Stockholm, Sweden, July 30 - August 1,
                  1999},
  pages        = {409--416},
  publisher    = {Morgan Kaufmann},
  year         = {1999},
  url          = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1\&\#38;smnu=2\&\#38;article\_id=193\&\#38;proceeding\_id=15},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/uai/McAllesterS99.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/SinghD98,
  author       = {Satinder Singh and
                  Peter Dayan},
  title        = {Analytical Mean Squared Error Curves for Temporal Difference Learning},
  journal      = {Mach. Learn.},
  volume       = {32},
  number       = {1},
  pages        = {5--40},
  year         = {1998},
  url          = {https://doi.org/10.1023/A:1007495401240},
  doi          = {10.1023/A:1007495401240},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ml/SinghD98.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ecml/PrecupSS98,
  author       = {Doina Precup and
                  Richard S. Sutton and
                  Satinder Singh},
  editor       = {Claire Nedellec and
                  C{\'{e}}line Rouveirol},
  title        = {Theoretical Results on Reinforcement Learning with Temporally Abstract
                  Options},
  booktitle    = {Machine Learning: ECML-98, 10th European Conference on Machine Learning,
                  Chemnitz, Germany, April 21-23, 1998, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {1398},
  pages        = {382--393},
  publisher    = {Springer},
  year         = {1998},
  url          = {https://doi.org/10.1007/BFb0026709},
  doi          = {10.1007/BFB0026709},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ecml/PrecupSS98.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/KearnsS98,
  author       = {Michael J. Kearns and
                  Satinder Singh},
  editor       = {Jude W. Shavlik},
  title        = {Near-Optimal Reinforcement Learning in Polynominal Time},
  booktitle    = {Proceedings of the Fifteenth International Conference on Machine Learning
                  {(ICML} 1998), Madison, Wisconsin, USA, July 24-27, 1998},
  pages        = {260--268},
  publisher    = {Morgan Kaufmann},
  year         = {1998},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/KearnsS98.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/LochS98,
  author       = {John Loch and
                  Satinder Singh},
  editor       = {Jude W. Shavlik},
  title        = {Using Eligibility Traces to Find the Best Memoryless Policy in Partially
                  Observable Markov Decision Processes},
  booktitle    = {Proceedings of the Fifteenth International Conference on Machine Learning
                  {(ICML} 1998), Madison, Wisconsin, USA, July 24-27, 1998},
  pages        = {323--331},
  publisher    = {Morgan Kaufmann},
  year         = {1998},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/LochS98.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SuttonPS98,
  author       = {Richard S. Sutton and
                  Doina Precup and
                  Satinder Singh},
  editor       = {Jude W. Shavlik},
  title        = {Intra-Option Learning about Temporally Abstract Actions},
  booktitle    = {Proceedings of the Fifteenth International Conference on Machine Learning
                  {(ICML} 1998), Madison, Wisconsin, USA, July 24-27, 1998},
  pages        = {556--564},
  publisher    = {Morgan Kaufmann},
  year         = {1998},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/SuttonPS98.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/BrownTS98,
  author       = {Timothy X. Brown and
                  Hui Tong and
                  Satinder Singh},
  editor       = {Michael J. Kearns and
                  Sara A. Solla and
                  David A. Cohn},
  title        = {Optimizing Admission Control while Ensuring Quality of Service in
                  Multimedia Networks via Reinforcement Learning},
  booktitle    = {Advances in Neural Information Processing Systems 11, {[NIPS} Conference,
                  Denver, Colorado, USA, November 30 - December 5, 1998]},
  pages        = {982--988},
  publisher    = {The {MIT} Press},
  year         = {1998},
  url          = {http://papers.nips.cc/paper/1599-optimizing-admission-control-while-ensuring-quality-of-service-in-multimedia-networks-via-reinforcement-learning},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/BrownTS98.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/KearnsS98a,
  author       = {Michael J. Kearns and
                  Satinder Singh},
  editor       = {Michael J. Kearns and
                  Sara A. Solla and
                  David A. Cohn},
  title        = {Finite-Sample Convergence Rates for Q-Learning and Indirect Algorithms},
  booktitle    = {Advances in Neural Information Processing Systems 11, {[NIPS} Conference,
                  Denver, Colorado, USA, November 30 - December 5, 1998]},
  pages        = {996--1002},
  publisher    = {The {MIT} Press},
  year         = {1998},
  url          = {http://papers.nips.cc/paper/1531-finite-sample-convergence-rates-for-q-learning-and-indirect-algorithms},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/KearnsS98a.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/SuttonSPR98,
  author       = {Richard S. Sutton and
                  Satinder Singh and
                  Doina Precup and
                  Balaraman Ravindran},
  editor       = {Michael J. Kearns and
                  Sara A. Solla and
                  David A. Cohn},
  title        = {Improved Switching among Temporally Abstract Actions},
  booktitle    = {Advances in Neural Information Processing Systems 11, {[NIPS} Conference,
                  Denver, Colorado, USA, November 30 - December 5, 1998]},
  pages        = {1066--1072},
  publisher    = {The {MIT} Press},
  year         = {1998},
  url          = {http://papers.nips.cc/paper/1607-improved-switching-among-temporally-abstract-actions},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/SuttonSPR98.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/WilliamsS98,
  author       = {John K. Williams and
                  Satinder Singh},
  editor       = {Michael J. Kearns and
                  Sara A. Solla and
                  David A. Cohn},
  title        = {Experimental Results on Learning Stochastic Memoryless Policies for
                  Partially Observable Markov Decision Processes},
  booktitle    = {Advances in Neural Information Processing Systems 11, {[NIPS} Conference,
                  Denver, Colorado, USA, November 30 - December 5, 1998]},
  pages        = {1073--1080},
  publisher    = {The {MIT} Press},
  year         = {1998},
  url          = {http://papers.nips.cc/paper/1509-experimental-results-on-learning-stochastic-memoryless-policies-for-partially-observable-markov-decision-processes},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/WilliamsS98.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/SinghC97,
  author       = {Satinder Singh and
                  David Cohn},
  editor       = {Michael I. Jordan and
                  Michael J. Kearns and
                  Sara A. Solla},
  title        = {How to Dynamically Merge Markov Decision Processes},
  booktitle    = {Advances in Neural Information Processing Systems 10, {[NIPS} Conference,
                  Denver, Colorado, USA, 1997]},
  pages        = {1057--1063},
  publisher    = {The {MIT} Press},
  year         = {1997},
  url          = {http://papers.nips.cc/paper/1420-how-to-dynamically-merge-markov-decision-processes},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/SinghC97.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/SinghS96,
  author       = {Satinder P. Singh and
                  Richard S. Sutton},
  title        = {Reinforcement Learning with Replacing Eligibility Traces},
  journal      = {Mach. Learn.},
  volume       = {22},
  number       = {1-3},
  pages        = {123--158},
  year         = {1996},
  url          = {https://doi.org/10.1023/A:1018012322525},
  doi          = {10.1023/A:1018012322525},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ml/SinghS96.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/colt/SaulS96,
  author       = {Lawrence K. Saul and
                  Satinder P. Singh},
  editor       = {Avrim Blum and
                  Michael J. Kearns},
  title        = {Learning Curve Bounds for a Markov Decision Process with Undiscounted
                  Rewards},
  booktitle    = {Proceedings of the Ninth Annual Conference on Computational Learning
                  Theory, {COLT} 1996, Desenzano del Garda, Italy, June 28-July 1, 1996},
  pages        = {147--156},
  publisher    = {{ACM}},
  year         = {1996},
  url          = {https://doi.org/10.1145/238061.238084},
  doi          = {10.1145/238061.238084},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/colt/SaulS96.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/CohnS96,
  author       = {David A. Cohn and
                  Satinder Singh},
  editor       = {Michael Mozer and
                  Michael I. Jordan and
                  Thomas Petsche},
  title        = {Predicting Lifetimes in Dynamically Allocated Memory},
  booktitle    = {Advances in Neural Information Processing Systems 9, NIPS, Denver,
                  CO, USA, December 2-5, 1996},
  pages        = {939--945},
  publisher    = {{MIT} Press},
  year         = {1996},
  url          = {http://papers.nips.cc/paper/1240-predicting-lifetimes-in-dynamically-allocated-memory},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/CohnS96.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/SinghB96,
  author       = {Satinder Singh and
                  Dimitri P. Bertsekas},
  editor       = {Michael Mozer and
                  Michael I. Jordan and
                  Thomas Petsche},
  title        = {Reinforcement Learning for Dynamic Channel Allocation in Cellular
                  Telephone Systems},
  booktitle    = {Advances in Neural Information Processing Systems 9, NIPS, Denver,
                  CO, USA, December 2-5, 1996},
  pages        = {974--980},
  publisher    = {{MIT} Press},
  year         = {1996},
  url          = {http://papers.nips.cc/paper/1216-reinforcement-learning-for-dynamic-channel-allocation-in-cellular-telephone-systems},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/SinghB96.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/SinghD96,
  author       = {Satinder Singh and
                  Peter Dayan},
  editor       = {Michael Mozer and
                  Michael I. Jordan and
                  Thomas Petsche},
  title        = {Analytical Mean Squared Error Curves in Temporal Difference Learning},
  booktitle    = {Advances in Neural Information Processing Systems 9, NIPS, Denver,
                  CO, USA, December 2-5, 1996},
  pages        = {1054--1060},
  publisher    = {{MIT} Press},
  year         = {1996},
  url          = {http://papers.nips.cc/paper/1284-analytical-mean-squared-error-curves-in-temporal-difference-learning},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/SinghD96.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ai/BartoBS95,
  author       = {Andrew G. Barto and
                  Steven J. Bradtke and
                  Satinder P. Singh},
  title        = {Learning to Act Using Real-Time Dynamic Programming},
  journal      = {Artif. Intell.},
  volume       = {72},
  number       = {1-2},
  pages        = {81--138},
  year         = {1995},
  url          = {https://doi.org/10.1016/0004-3702(94)00011-O},
  doi          = {10.1016/0004-3702(94)00011-O},
  timestamp    = {Sat, 27 May 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ai/BartoBS95.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/colt/SaulS95,
  author       = {Lawrence K. Saul and
                  Satinder P. Singh},
  editor       = {Wolfgang Maass},
  title        = {Markov Decision Processes in Large State Spaces},
  booktitle    = {Proceedings of the Eigth Annual Conference on Computational Learning
                  Theory, {COLT} 1995, Santa Cruz, California, USA, July 5-8, 1995},
  pages        = {281--288},
  publisher    = {{ACM}},
  year         = {1995},
  url          = {https://doi.org/10.1145/225298.225332},
  doi          = {10.1145/225298.225332},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/colt/SaulS95.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/DayanS95,
  author       = {Peter Dayan and
                  Satinder Singh},
  editor       = {David S. Touretzky and
                  Michael Mozer and
                  Michael E. Hasselmo},
  title        = {Improving Policies without Measuring Merits},
  booktitle    = {Advances in Neural Information Processing Systems 8, NIPS, Denver,
                  CO, USA, November 27-30, 1995},
  pages        = {1059--1065},
  publisher    = {{MIT} Press},
  year         = {1995},
  url          = {http://papers.nips.cc/paper/1143-improving-policies-without-measuring-merits},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/DayanS95.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/SinghY94,
  author       = {Satinder P. Singh and
                  Richard C. Yee},
  title        = {An Upper Bound on the Loss from Approximate Optimal-Value Functions},
  journal      = {Mach. Learn.},
  volume       = {16},
  number       = {3},
  pages        = {227--233},
  year         = {1994},
  url          = {https://doi.org/10.1007/BF00993308},
  doi          = {10.1007/BF00993308},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ml/SinghY94.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/neco/JaakkolaJS94,
  author       = {Tommi S. Jaakkola and
                  Michael I. Jordan and
                  Satinder P. Singh},
  title        = {On the Convergence of Stochastic Iterative Dynamic Programming Algorithms},
  journal      = {Neural Comput.},
  volume       = {6},
  number       = {6},
  pages        = {1185--1201},
  year         = {1994},
  url          = {https://doi.org/10.1162/neco.1994.6.6.1185},
  doi          = {10.1162/NECO.1994.6.6.1185},
  timestamp    = {Tue, 01 Sep 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/neco/JaakkolaJS94.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/Singh94,
  author       = {Satinder P. Singh},
  editor       = {Barbara Hayes{-}Roth and
                  Richard E. Korf},
  title        = {Reinforcement Learning Algorithms for Average-Payoff Markovian Decision
                  Processes},
  booktitle    = {Proceedings of the 12th National Conference on Artificial Intelligence,
                  Seattle, WA, USA, July 31 - August 4, 1994, Volume 1},
  pages        = {700--705},
  publisher    = {{AAAI} Press / The {MIT} Press},
  year         = {1994},
  url          = {http://www.aaai.org/Library/AAAI/1994/aaai94-107.php},
  timestamp    = {Mon, 04 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/Singh94.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SinghJJ94,
  author       = {Satinder P. Singh and
                  Tommi S. Jaakkola and
                  Michael I. Jordan},
  editor       = {William W. Cohen and
                  Haym Hirsh},
  title        = {Learning Without State-Estimation in Partially Observable Markovian
                  Decision Processes},
  booktitle    = {Machine Learning, Proceedings of the Eleventh International Conference,
                  Rutgers University, New Brunswick, NJ, USA, July 10-13, 1994},
  pages        = {284--292},
  publisher    = {Morgan Kaufmann},
  year         = {1994},
  url          = {https://doi.org/10.1016/b978-1-55860-335-6.50042-8},
  doi          = {10.1016/B978-1-55860-335-6.50042-8},
  timestamp    = {Mon, 24 Jun 2019 13:56:31 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/SinghJJ94.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/JaakkolaSJ94,
  author       = {Tommi S. Jaakkola and
                  Satinder Singh and
                  Michael I. Jordan},
  editor       = {Gerald Tesauro and
                  David S. Touretzky and
                  Todd K. Leen},
  title        = {Reinforcement Learning Algorithm for Partially Observable Markov Decision
                  Problems},
  booktitle    = {Advances in Neural Information Processing Systems 7, {[NIPS} Conference,
                  Denver, Colorado, USA, 1994]},
  pages        = {345--352},
  publisher    = {{MIT} Press},
  year         = {1994},
  url          = {http://papers.nips.cc/paper/951-reinforcement-learning-algorithm-for-partially-observable-markov-decision-problems},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/JaakkolaSJ94.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/SinghJJ94,
  author       = {Satinder Singh and
                  Tommi S. Jaakkola and
                  Michael I. Jordan},
  editor       = {Gerald Tesauro and
                  David S. Touretzky and
                  Todd K. Leen},
  title        = {Reinforcement Learning with Soft State Aggregation},
  booktitle    = {Advances in Neural Information Processing Systems 7, {[NIPS} Conference,
                  Denver, Colorado, USA, 1994]},
  pages        = {361--368},
  publisher    = {{MIT} Press},
  year         = {1994},
  url          = {http://papers.nips.cc/paper/981-reinforcement-learning-with-soft-state-aggregation},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/SinghJJ94.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/SinghBGC93,
  author       = {Satinder Singh and
                  Andrew G. Barto and
                  Roderic A. Grupen and
                  Christopher I. Connolly},
  editor       = {Jack D. Cowan and
                  Gerald Tesauro and
                  Joshua Alspector},
  title        = {Robust Reinforcement Learning in Motion Planning},
  booktitle    = {Advances in Neural Information Processing Systems 6, [7th {NIPS} Conference,
                  Denver, Colorado, USA, 1993]},
  pages        = {655--662},
  publisher    = {Morgan Kaufmann},
  year         = {1993},
  url          = {http://papers.nips.cc/paper/843-robust-reinforcement-learning-in-motion-planning},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/SinghBGC93.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/JaakkolaJS93,
  author       = {Tommi S. Jaakkola and
                  Michael I. Jordan and
                  Satinder Singh},
  editor       = {Jack D. Cowan and
                  Gerald Tesauro and
                  Joshua Alspector},
  title        = {Convergence of Stochastic Iterative Dynamic Programming Algorithms},
  booktitle    = {Advances in Neural Information Processing Systems 6, [7th {NIPS} Conference,
                  Denver, Colorado, USA, 1993]},
  pages        = {703--710},
  publisher    = {Morgan Kaufmann},
  year         = {1993},
  url          = {http://papers.nips.cc/paper/764-convergence-of-stochastic-iterative-dynamic-programming-algorithms},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/JaakkolaJS93.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/Singh92,
  author       = {Satinder Pal Singh},
  title        = {Transfer of Learning by Composing Solutions of Elemental Sequential
                  Tasks},
  journal      = {Mach. Learn.},
  volume       = {8},
  pages        = {323--339},
  year         = {1992},
  url          = {https://doi.org/10.1007/BF00992700},
  doi          = {10.1007/BF00992700},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ml/Singh92.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/Singh92,
  author       = {Satinder P. Singh},
  editor       = {William R. Swartout},
  title        = {Reinforcement Learning with a Hierarchy of Abstract Models},
  booktitle    = {Proceedings of the 10th National Conference on Artificial Intelligence,
                  San Jose, CA, USA, July 12-16, 1992},
  pages        = {202--207},
  publisher    = {{AAAI} Press / The {MIT} Press},
  year         = {1992},
  url          = {http://www.aaai.org/Library/AAAI/1992/aaai92-032.php},
  timestamp    = {Mon, 04 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/Singh92.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/Singh92,
  author       = {Satinder P. Singh},
  editor       = {Derek H. Sleeman and
                  Peter Edwards},
  title        = {Scaling Reinforcement Learning Algorithms by Learning Variable Temporal
                  Resolution Models},
  booktitle    = {Proceedings of the Ninth International Workshop on Machine Learning
                  {(ML} 1992), Aberdeen, Scotland, UK, July 1-3, 1992},
  pages        = {406--415},
  publisher    = {Morgan Kaufmann},
  year         = {1992},
  url          = {https://doi.org/10.1016/b978-1-55860-247-2.50058-9},
  doi          = {10.1016/B978-1-55860-247-2.50058-9},
  timestamp    = {Fri, 21 Jun 2019 11:43:03 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/Singh92.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/Singh91,
  author       = {Satinder P. Singh},
  editor       = {Lawrence Birnbaum and
                  Gregg Collins},
  title        = {Transfer of Learning Across Compositions of Sequentail Tasks},
  booktitle    = {Proceedings of the Eighth International Workshop (ML91), Northwestern
                  University, Evanston, Illinois, {USA}},
  pages        = {348--352},
  publisher    = {Morgan Kaufmann},
  year         = {1991},
  url          = {https://doi.org/10.1016/b978-1-55860-200-7.50072-6},
  doi          = {10.1016/B978-1-55860-200-7.50072-6},
  timestamp    = {Wed, 19 Jun 2019 17:09:09 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/Singh91.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/Singh91,
  author       = {Satinder Singh},
  editor       = {John E. Moody and
                  Stephen Jose Hanson and
                  Richard Lippmann},
  title        = {The Efficient Learning of Multiple Task Sequences},
  booktitle    = {Advances in Neural Information Processing Systems 4, {[NIPS} Conference,
                  Denver, Colorado, USA, December 2-5, 1991]},
  pages        = {251--258},
  publisher    = {Morgan Kaufmann},
  year         = {1991},
  url          = {http://papers.nips.cc/paper/569-the-efficient-learning-of-multiple-task-sequences},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/Singh91.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/BerthierSBH91,
  author       = {N. E. Berthier and
                  Satinder P. Singh and
                  Andrew G. Barto and
                  James C. Houk},
  editor       = {John E. Moody and
                  Stephen Jose Hanson and
                  Richard Lippmann},
  title        = {A Cortico-Cerebellar Model that Learns to Generate Distributed Motor
                  Commands to Control a Kinematic Arm},
  booktitle    = {Advances in Neural Information Processing Systems 4, {[NIPS} Conference,
                  Denver, Colorado, USA, December 2-5, 1991]},
  pages        = {611--618},
  publisher    = {Morgan Kaufmann},
  year         = {1991},
  url          = {http://papers.nips.cc/paper/532-a-cortico-cerebellar-model-that-learns-to-generate-distributed-motor-commands-to-control-a-kinematic-arm},
  timestamp    = {Fri, 06 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/BerthierSBH91.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
a service of  Schloss Dagstuhl - Leibniz Center for Informatics