Stop the war!
Остановите войну!
for scientists:
default search action
BibTeX records: Satinder Singh 0001
@article{DBLP:journals/corr/abs-2402-15391, author = {Jake Bruce and Michael Dennis and Ashley Edwards and Jack Parker{-}Holder and Yuge Shi and Edward Hughes and Matthew Lai and Aditi Mavalankar and Richie Steigerwald and Chris Apps and Yusuf Aytar and Sarah Bechtle and Feryal M. P. Behbahani and Stephanie Chan and Nicolas Heess and Lucy Gonzalez and Simon Osindero and Sherjil Ozair and Scott E. Reed and Jingwei Zhang and Konrad Zolna and Jeff Clune and Nando de Freitas and Satinder Singh and Tim Rockt{\"{a}}schel}, title = {Genie: Generative Interactive Environments}, journal = {CoRR}, volume = {abs/2402.15391}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2402.15391}, doi = {10.48550/ARXIV.2402.15391}, eprinttype = {arXiv}, eprint = {2402.15391}, timestamp = {Fri, 22 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2402-15391.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ai/ZhangDS23, author = {Qi Zhang and Edmund H. Durfee and Satinder Singh}, title = {Risk-aware analysis for interpretations of probabilistic achievement and maintenance commitments}, journal = {Artif. Intell.}, volume = {317}, pages = {103864}, year = {2023}, url = {https://doi.org/10.1016/j.artint.2023.103864}, doi = {10.1016/J.ARTINT.2023.103864}, timestamp = {Sat, 11 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/ai/ZhangDS23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/gecco/LangeSCZD00F23, author = {Robert Tjarko Lange and Tom Schaul and Yutian Chen and Tom Zahavy and Valentin Dalibard and Chris Lu and Satinder Singh and Sebastian Flennerhag}, editor = {Sara Silva and Lu{\'{\i}}s Paquete}, title = {Discovering Evolution Strategies via Meta-Black-Box Optimization}, booktitle = {Companion Proceedings of the Conference on Genetic and Evolutionary Computation, {GECCO} 2023, Companion Volume, Lisbon, Portugal, July 15-19, 2023}, pages = {29--30}, publisher = {{ACM}}, year = {2023}, url = {https://doi.org/10.1145/3583133.3595822}, doi = {10.1145/3583133.3595822}, timestamp = {Sat, 16 Dec 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/gecco/LangeSCZD00F23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/CarvalhoFLL023, author = {Wilka Carvalho and Angelos Filos and Richard L. Lewis and Honglak Lee and Satinder Singh}, title = {Composing Task Knowledge With Modular Successor Feature Approximators}, booktitle = {The Eleventh International Conference on Learning Representations, {ICLR} 2023, Kigali, Rwanda, May 1-5, 2023}, publisher = {OpenReview.net}, year = {2023}, url = {https://openreview.net/pdf?id=DrtSx1z40Ib}, timestamp = {Fri, 30 Jun 2023 14:38:38 +0200}, biburl = {https://dblp.org/rec/conf/iclr/CarvalhoFLL023.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/LangeSCZD00F23, author = {Robert Tjarko Lange and Tom Schaul and Yutian Chen and Tom Zahavy and Valentin Dalibard and Chris Lu and Satinder Singh and Sebastian Flennerhag}, title = {Discovering Evolution Strategies via Meta-Black-Box Optimization}, booktitle = {The Eleventh International Conference on Learning Representations, {ICLR} 2023, Kigali, Rwanda, May 1-5, 2023}, publisher = {OpenReview.net}, year = {2023}, url = {https://openreview.net/pdf?id=mFDU0fP3EQH}, timestamp = {Sat, 16 Dec 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/iclr/LangeSCZD00F23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/LaskinWOPSSSHFB23, author = {Michael Laskin and Luyu Wang and Junhyuk Oh and Emilio Parisotto and Stephen Spencer and Richie Steigerwald and DJ Strouse and Steven Stenberg Hansen and Angelos Filos and Ethan A. Brooks and Maxime Gazeau and Himanshu Sahni and Satinder Singh and Volodymyr Mnih}, title = {In-context Reinforcement Learning with Algorithm Distillation}, booktitle = {The Eleventh International Conference on Learning Representations, {ICLR} 2023, Kigali, Rwanda, May 1-5, 2023}, publisher = {OpenReview.net}, year = {2023}, url = {https://openreview.net/pdf?id=hy0a5MMPUv}, timestamp = {Fri, 30 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/LaskinWOPSSSHFB23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/ZahavySBBFH023, author = {Tom Zahavy and Yannick Schroecker and Feryal M. P. Behbahani and Kate Baumli and Sebastian Flennerhag and Shaobo Hou and Satinder Singh}, title = {Discovering Policies with DOMiNO: Diversity Optimization Maintaining Near Optimality}, booktitle = {The Eleventh International Conference on Learning Representations, {ICLR} 2023, Kigali, Rwanda, May 1-5, 2023}, publisher = {OpenReview.net}, year = {2023}, url = {https://openreview.net/pdf?id=kjkdzBW3b8p}, timestamp = {Fri, 30 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/ZahavySBBFH023.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/BauerBBBBCCCDGG23, author = {Jakob Bauer and Kate Baumli and Feryal M. P. Behbahani and Avishkar Bhoopchand and Nathalie Bradley{-}Schmieg and Michael Chang and Natalie Clay and Adrian Collister and Vibhavari Dasagi and Lucy Gonzalez and Karol Gregor and Edward Hughes and Sheleem Kashem and Maria Loks{-}Thompson and Hannah Openshaw and Jack Parker{-}Holder and Shreya Pathak and Nicolas Perez Nieves and Nemanja Rakicevic and Tim Rockt{\"{a}}schel and Yannick Schroecker and Satinder Singh and Jakub Sygnowski and Karl Tuyls and Sarah York and Alexander Zacherl and Lei M. Zhang}, editor = {Andreas Krause and Emma Brunskill and Kyunghyun Cho and Barbara Engelhardt and Sivan Sabato and Jonathan Scarlett}, title = {Human-Timescale Adaptation in an Open-Ended Task Space}, booktitle = {International Conference on Machine Learning, {ICML} 2023, 23-29 July 2023, Honolulu, Hawaii, {USA}}, series = {Proceedings of Machine Learning Research}, volume = {202}, pages = {1887--1935}, publisher = {{PMLR}}, year = {2023}, url = {https://proceedings.mlr.press/v202/bauer23a.html}, timestamp = {Mon, 28 Aug 2023 17:23:08 +0200}, biburl = {https://dblp.org/rec/conf/icml/BauerBBBBCCCDGG23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/MoskovitzOVF0Z23, author = {Ted Moskovitz and Brendan O'Donoghue and Vivek Veeriah and Sebastian Flennerhag and Satinder Singh and Tom Zahavy}, editor = {Andreas Krause and Emma Brunskill and Kyunghyun Cho and Barbara Engelhardt and Sivan Sabato and Jonathan Scarlett}, title = {ReLOAD: Reinforcement Learning with Optimistic Ascent-Descent for Last-Iterate Convergence in Constrained MDPs}, booktitle = {International Conference on Machine Learning, {ICML} 2023, 23-29 July 2023, Honolulu, Hawaii, {USA}}, series = {Proceedings of Machine Learning Research}, volume = {202}, pages = {25303--25336}, publisher = {{PMLR}}, year = {2023}, url = {https://proceedings.mlr.press/v202/moskovitz23a.html}, timestamp = {Mon, 28 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/MoskovitzOVF0Z23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/0001SGPF0B23, author = {Chris Lu and Yannick Schroecker and Albert Gu and Emilio Parisotto and Jakob N. Foerster and Satinder Singh and Feryal M. P. Behbahani}, editor = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine}, title = {Structured State Space Models for In-Context Reinforcement Learning}, booktitle = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023}, year = {2023}, url = {http://papers.nips.cc/paper\_files/paper/2023/hash/92d3d2a9801211ca3693ccb2faa1316f-Abstract-Conference.html}, timestamp = {Fri, 01 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/0001SGPF0B23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/BrooksWL023, author = {Ethan A. Brooks and Logan Walls and Richard L. Lewis and Satinder Singh}, editor = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine}, title = {Large Language Models can Implement Policy Iteration}, booktitle = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023}, year = {2023}, url = {http://papers.nips.cc/paper\_files/paper/2023/hash/60dc7fa827f5f761ad481e2ad40b5573-Abstract-Conference.html}, timestamp = {Fri, 01 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/BrooksWL023.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/Carvalho0FLMLL023, author = {Wilka Carvalho and Andre Saraiva and Angelos Filos and Andrew K. Lampinen and Loic Matthey and Richard L. Lewis and Honglak Lee and Satinder Singh and Danilo Jimenez Rezende and Daniel Zoran}, editor = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine}, title = {Combining Behaviors with the Successor Features Keyboard}, booktitle = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023}, year = {2023}, url = {http://papers.nips.cc/paper\_files/paper/2023/hash/1f69928210578f4cf5b538a8c8806798-Abstract-Conference.html}, timestamp = {Fri, 01 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/Carvalho0FLMLL023.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2301-03236, author = {Sebastian Flennerhag and Tom Zahavy and Brendan O'Donoghue and Hado van Hasselt and Andr{\'{a}}s Gy{\"{o}}rgy and Satinder Singh}, title = {Optimistic Meta-Gradients}, journal = {CoRR}, volume = {abs/2301.03236}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2301.03236}, doi = {10.48550/ARXIV.2301.03236}, eprinttype = {arXiv}, eprint = {2301.03236}, timestamp = {Tue, 10 Jan 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2301-03236.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2301-12305, author = {Wilka Carvalho and Angelos Filos and Richard L. Lewis and Honglak Lee and Satinder Singh}, title = {Composing Task Knowledge with Modular Successor Feature Approximators}, journal = {CoRR}, volume = {abs/2301.12305}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2301.12305}, doi = {10.48550/ARXIV.2301.12305}, eprinttype = {arXiv}, eprint = {2301.12305}, timestamp = {Wed, 01 Feb 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2301-12305.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2302-01275, author = {Ted Moskovitz and Brendan O'Donoghue and Vivek Veeriah and Sebastian Flennerhag and Satinder Singh and Tom Zahavy}, title = {ReLOAD: Reinforcement Learning with Optimistic Ascent-Descent for Last-Iterate Convergence in Constrained MDPs}, journal = {CoRR}, volume = {abs/2302.01275}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2302.01275}, doi = {10.48550/ARXIV.2302.01275}, eprinttype = {arXiv}, eprint = {2302.01275}, timestamp = {Thu, 09 Feb 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2302-01275.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2302-14451, author = {Bernardo {\'{A}}vila Pires and Feryal M. P. Behbahani and Hubert Soyer and Kyriacos Nikiforou and Thomas Keck and Satinder Singh}, title = {Hierarchical Reinforcement Learning in Complex 3D Environments}, journal = {CoRR}, volume = {abs/2302.14451}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2302.14451}, doi = {10.48550/ARXIV.2302.14451}, eprinttype = {arXiv}, eprint = {2302.14451}, timestamp = {Fri, 03 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2302-14451.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2303-03982, author = {Chris Lu and Yannick Schroecker and Albert Gu and Emilio Parisotto and Jakob N. Foerster and Satinder Singh and Feryal M. P. Behbahani}, title = {Structured State Space Models for In-Context Reinforcement Learning}, journal = {CoRR}, volume = {abs/2303.03982}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2303.03982}, doi = {10.48550/ARXIV.2303.03982}, eprinttype = {arXiv}, eprint = {2303.03982}, timestamp = {Tue, 18 Apr 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2303-03982.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2307-11044, author = {David Abel and Andr{\'{e}} Barreto and Hado van Hasselt and Benjamin Van Roy and Doina Precup and Satinder Singh}, title = {On the Convergence of Bounded Agents}, journal = {CoRR}, volume = {abs/2307.11044}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2307.11044}, doi = {10.48550/ARXIV.2307.11044}, eprinttype = {arXiv}, eprint = {2307.11044}, timestamp = {Wed, 26 Jul 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2307-11044.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2307-11046, author = {David Abel and Andr{\'{e}} Barreto and Benjamin Van Roy and Doina Precup and Hado van Hasselt and Satinder Singh}, title = {A Definition of Continual Reinforcement Learning}, journal = {CoRR}, volume = {abs/2307.11046}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2307.11046}, doi = {10.48550/ARXIV.2307.11046}, eprinttype = {arXiv}, eprint = {2307.11046}, timestamp = {Wed, 26 Jul 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2307-11046.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2308-09175, author = {Tom Zahavy and Vivek Veeriah and Shaobo Hou and Kevin Waugh and Matthew Lai and Edouard Leurent and Nenad Tomasev and Lisa Schut and Demis Hassabis and Satinder Singh}, title = {Diversifying {AI:} Towards Creative Chess with AlphaZero}, journal = {CoRR}, volume = {abs/2308.09175}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2308.09175}, doi = {10.48550/ARXIV.2308.09175}, eprinttype = {arXiv}, eprint = {2308.09175}, timestamp = {Fri, 25 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2308-09175.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2310-15940, author = {Wilka Carvalho and Andre Saraiva and Angelos Filos and Andrew Kyle Lampinen and Loic Matthey and Richard L. Lewis and Honglak Lee and Satinder Singh and Danilo J. Rezende and Daniel Zoran}, title = {Combining Behaviors with the Successor Features Keyboard}, journal = {CoRR}, volume = {abs/2310.15940}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2310.15940}, doi = {10.48550/ARXIV.2310.15940}, eprinttype = {arXiv}, eprint = {2310.15940}, timestamp = {Tue, 31 Oct 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2310-15940.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/ZhengVL022, author = {Zeyu Zheng and Risto Vuorio and Richard L. Lewis and Satinder Singh}, title = {Adaptive Pairwise Weights for Temporal Credit Assignment}, booktitle = {Thirty-Sixth {AAAI} Conference on Artificial Intelligence, {AAAI} 2022, Thirty-Fourth Conference on Innovative Applications of Artificial Intelligence, {IAAI} 2022, The Twelveth Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2022 Virtual Event, February 22 - March 1, 2022}, pages = {9225--9232}, publisher = {{AAAI} Press}, year = {2022}, url = {https://doi.org/10.1609/aaai.v36i8.20909}, doi = {10.1609/AAAI.V36I8.20909}, timestamp = {Mon, 04 Sep 2023 12:29:24 +0200}, biburl = {https://dblp.org/rec/conf/aaai/ZhengVL022.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/collas/LuketinaFSAZ022, author = {Jelena Luketina and Sebastian Flennerhag and Yannick Schroecker and David Abel and Tom Zahavy and Satinder Singh}, editor = {Sarath Chandar and Razvan Pascanu and Doina Precup}, title = {Meta-Gradients in Non-Stationary Environments}, booktitle = {Conference on Lifelong Learning Agents, CoLLAs 2022, 22-24 August 2022, McGill University, Montr{\'{e}}al, Qu{\'{e}}bec, Canada}, series = {Proceedings of Machine Learning Research}, volume = {199}, pages = {886--901}, publisher = {{PMLR}}, year = {2022}, url = {https://proceedings.mlr.press/v199/luketina22a.html}, timestamp = {Fri, 17 Feb 2023 16:29:10 +0100}, biburl = {https://dblp.org/rec/conf/collas/LuketinaFSAZ022.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/FlennerhagSZHS022, author = {Sebastian Flennerhag and Yannick Schroecker and Tom Zahavy and Hado van Hasselt and David Silver and Satinder Singh}, title = {Bootstrapped Meta-Learning}, booktitle = {The Tenth International Conference on Learning Representations, {ICLR} 2022, Virtual Event, April 25-29, 2022}, publisher = {OpenReview.net}, year = {2022}, url = {https://openreview.net/forum?id=b-ny3x071E5}, timestamp = {Sat, 20 Aug 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/FlennerhagSZHS022.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/AbelDHHLP022, author = {David Abel and Will Dabney and Anna Harutyunyan and Mark K. Ho and Michael L. Littman and Doina Precup and Satinder Singh}, editor = {Luc De Raedt}, title = {On the Expressivity of Markov Reward (Extended Abstract)}, booktitle = {Proceedings of the Thirty-First International Joint Conference on Artificial Intelligence, {IJCAI} 2022, Vienna, Austria, 23-29 July 2022}, pages = {5254--5258}, publisher = {ijcai.org}, year = {2022}, url = {https://doi.org/10.24963/ijcai.2022/730}, doi = {10.24963/IJCAI.2022/730}, timestamp = {Wed, 27 Jul 2022 16:43:00 +0200}, biburl = {https://dblp.org/rec/conf/ijcai/AbelDHHLP022.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/Arumugam022, author = {Dilip Arumugam and Satinder Singh}, editor = {Sanmi Koyejo and S. Mohamed and A. Agarwal and Danielle Belgrave and K. Cho and A. Oh}, title = {Planning to the Information Horizon of BAMDPs via Epistemic State Abstraction}, booktitle = {Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans, LA, USA, November 28 - December 9, 2022}, year = {2022}, url = {http://papers.nips.cc/paper\_files/paper/2022/hash/80b7bec60081f95d900973509744a306-Abstract-Conference.html}, timestamp = {Mon, 08 Jan 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/Arumugam022.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/GrimmBS22, author = {Christopher Grimm and Andr{\'{e}} Barreto and Satinder Singh}, editor = {Sanmi Koyejo and S. Mohamed and A. Agarwal and Danielle Belgrave and K. Cho and A. Oh}, title = {Approximate Value Equivalence}, booktitle = {Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans, LA, USA, November 28 - December 9, 2022}, year = {2022}, url = {http://papers.nips.cc/paper\_files/paper/2022/hash/d53538ba21c05fa361d2b21704172753-Abstract-Conference.html}, timestamp = {Mon, 08 Jan 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/GrimmBS22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/LiuZM022, author = {Hao Liu and Tom Zahavy and Volodymyr Mnih and Satinder Singh}, editor = {Sanmi Koyejo and S. Mohamed and A. Agarwal and Danielle Belgrave and K. Cho and A. Oh}, title = {Palm up: Playing in the Latent Manifold for Unsupervised Pretraining}, booktitle = {Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans, LA, USA, November 28 - December 9, 2022}, year = {2022}, url = {http://papers.nips.cc/paper\_files/paper/2022/hash/e92381dba235a8309f08ce46376189a9-Abstract-Conference.html}, timestamp = {Mon, 08 Jan 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/LiuZM022.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2202-04772, author = {Vivek Veeriah and Zeyu Zheng and Richard L. Lewis and Satinder Singh}, title = {GrASP: Gradient-Based Affordance Selection for Planning}, journal = {CoRR}, volume = {abs/2202.04772}, year = {2022}, url = {https://arxiv.org/abs/2202.04772}, eprinttype = {arXiv}, eprint = {2202.04772}, timestamp = {Wed, 20 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2202-04772.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2205-13521, author = {Tom Zahavy and Yannick Schroecker and Feryal M. P. Behbahani and Kate Baumli and Sebastian Flennerhag and Shaobo Hou and Satinder Singh}, title = {Discovering Policies with DOMiNO: Diversity Optimization Maintaining Near Optimality}, journal = {CoRR}, volume = {abs/2205.13521}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2205.13521}, doi = {10.48550/ARXIV.2205.13521}, eprinttype = {arXiv}, eprint = {2205.13521}, timestamp = {Mon, 02 Jan 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2205-13521.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2206-15378, author = {Julien P{\'{e}}rolat and Bart De Vylder and Daniel Hennes and Eugene Tarassov and Florian Strub and Vincent de Boer and Paul Muller and Jerome T. Connor and Neil Burch and Thomas W. Anthony and Stephen McAleer and Romuald Elie and Sarah H. Cen and Zhe Wang and Audrunas Gruslys and Aleksandra Malysheva and Mina Khan and Sherjil Ozair and Finbarr Timbers and Toby Pohlen and Tom Eccles and Mark Rowland and Marc Lanctot and Jean{-}Baptiste Lespiau and Bilal Piot and Shayegan Omidshafiei and Edward Lockhart and Laurent Sifre and Nathalie Beauguerlange and R{\'{e}}mi Munos and David Silver and Satinder Singh and Demis Hassabis and Karl Tuyls}, title = {Mastering the Game of Stratego with Model-Free Multiagent Reinforcement Learning}, journal = {CoRR}, volume = {abs/2206.15378}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2206.15378}, doi = {10.48550/ARXIV.2206.15378}, eprinttype = {arXiv}, eprint = {2206.15378}, timestamp = {Wed, 28 Sep 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2206-15378.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2209-06159, author = {Jelena Luketina and Sebastian Flennerhag and Yannick Schroecker and David Abel and Tom Zahavy and Satinder Singh}, title = {Meta-Gradients in Non-Stationary Environments}, journal = {CoRR}, volume = {abs/2209.06159}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2209.06159}, doi = {10.48550/ARXIV.2209.06159}, eprinttype = {arXiv}, eprint = {2209.06159}, timestamp = {Tue, 27 Sep 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2209-06159.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2210-03821, author = {Ethan A. Brooks and Logan Walls and Richard L. Lewis and Satinder Singh}, title = {In-Context Policy Iteration}, journal = {CoRR}, volume = {abs/2210.03821}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2210.03821}, doi = {10.48550/ARXIV.2210.03821}, eprinttype = {arXiv}, eprint = {2210.03821}, timestamp = {Wed, 12 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2210-03821.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2210-10913, author = {Hao Liu and Tom Zahavy and Volodymyr Mnih and Satinder Singh}, title = {Palm up: Playing in the Latent Manifold for Unsupervised Pretraining}, journal = {CoRR}, volume = {abs/2210.10913}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2210.10913}, doi = {10.48550/ARXIV.2210.10913}, eprinttype = {arXiv}, eprint = {2210.10913}, timestamp = {Tue, 25 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2210-10913.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2210-14215, author = {Michael Laskin and Luyu Wang and Junhyuk Oh and Emilio Parisotto and Stephen Spencer and Richie Steigerwald and DJ Strouse and Steven Hansen and Angelos Filos and Ethan A. Brooks and Maxime Gazeau and Himanshu Sahni and Satinder Singh and Volodymyr Mnih}, title = {In-context Reinforcement Learning with Algorithm Distillation}, journal = {CoRR}, volume = {abs/2210.14215}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2210.14215}, doi = {10.48550/ARXIV.2210.14215}, eprinttype = {arXiv}, eprint = {2210.14215}, timestamp = {Wed, 19 Apr 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2210-14215.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2210-16872, author = {Dilip Arumugam and Satinder Singh}, title = {Planning to the Information Horizon of BAMDPs via Epistemic State Abstraction}, journal = {CoRR}, volume = {abs/2210.16872}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2210.16872}, doi = {10.48550/ARXIV.2210.16872}, eprinttype = {arXiv}, eprint = {2210.16872}, timestamp = {Fri, 09 Feb 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2210-16872.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2211-11260, author = {Robert Tjarko Lange and Tom Schaul and Yutian Chen and Tom Zahavy and Valentin Dallibard and Chris Lu and Satinder Singh and Sebastian Flennerhag}, title = {Discovering Evolution Strategies via Meta-Black-Box Optimization}, journal = {CoRR}, volume = {abs/2211.11260}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2211.11260}, doi = {10.48550/ARXIV.2211.11260}, eprinttype = {arXiv}, eprint = {2211.11260}, timestamp = {Sat, 16 Dec 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2211-11260.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2212-14530, author = {Khimya Khetarpal and Claire Vernade and Brendan O'Donoghue and Satinder Singh and Tom Zahavy}, title = {{POMRL:} No-Regret Learning-to-Plan with Increasing Horizons}, journal = {CoRR}, volume = {abs/2212.14530}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2212.14530}, doi = {10.48550/ARXIV.2212.14530}, eprinttype = {arXiv}, eprint = {2212.14530}, timestamp = {Sun, 08 Jan 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2212-14530.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ai/SilverSPS21, author = {David Silver and Satinder Singh and Doina Precup and Richard S. Sutton}, title = {Reward is enough}, journal = {Artif. Intell.}, volume = {299}, pages = {103535}, year = {2021}, url = {https://doi.org/10.1016/j.artint.2021.103535}, doi = {10.1016/J.ARTINT.2021.103535}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ai/SilverSPS21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/0038DS21, author = {Qi Zhang and Edmund H. Durfee and Satinder Singh}, title = {Efficient Querying for Cooperative Probabilistic Commitments}, booktitle = {Thirty-Fifth {AAAI} Conference on Artificial Intelligence, {AAAI} 2021, Thirty-Third Conference on Innovative Applications of Artificial Intelligence, {IAAI} 2021, The Eleventh Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2021, Virtual Event, February 2-9, 2021}, pages = {11378--11386}, publisher = {{AAAI} Press}, year = {2021}, url = {https://doi.org/10.1609/aaai.v35i13.17356}, doi = {10.1609/AAAI.V35I13.17356}, timestamp = {Mon, 04 Sep 2023 12:29:24 +0200}, biburl = {https://dblp.org/rec/conf/aaai/0038DS21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/ZahavyBMHOKS21, author = {Tom Zahavy and Andr{\'{e}} Barreto and Daniel J. Mankowitz and Shaobo Hou and Brendan O'Donoghue and Iurii Kemaev and Satinder Singh}, title = {Discovering a set of policies for the worst case reward}, booktitle = {9th International Conference on Learning Representations, {ICLR} 2021, Virtual Event, Austria, May 3-7, 2021}, publisher = {OpenReview.net}, year = {2021}, url = {https://openreview.net/forum?id=PUkhWz65dy5}, timestamp = {Wed, 20 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/ZahavyBMHOKS21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/BrooksRLS21, author = {Ethan A. Brooks and Janarthanan Rajendran and Richard L. Lewis and Satinder Singh}, editor = {Marina Meila and Tong Zhang}, title = {Reinforcement Learning of Implicit and Explicit Control Flow Instructions}, booktitle = {Proceedings of the 38th International Conference on Machine Learning, {ICML} 2021, 18-24 July 2021, Virtual Event}, series = {Proceedings of Machine Learning Research}, volume = {139}, pages = {1082--1091}, publisher = {{PMLR}}, year = {2021}, url = {http://proceedings.mlr.press/v139/brooks21a.html}, timestamp = {Wed, 20 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/BrooksRLS21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/CarvalhoLLSLLS21, author = {Wilka Carvalho and Anthony Liang and Kimin Lee and Sungryull Sohn and Honglak Lee and Richard L. Lewis and Satinder Singh}, editor = {Zhi{-}Hua Zhou}, title = {Reinforcement Learning for Sparse-Reward Object-Interaction Tasks in a First-person Simulated 3D Environment}, booktitle = {Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence, {IJCAI} 2021, Virtual Event / Montreal, Canada, 19-27 August 2021}, pages = {2219--2226}, publisher = {ijcai.org}, year = {2021}, url = {https://doi.org/10.24963/ijcai.2021/306}, doi = {10.24963/IJCAI.2021/306}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ijcai/CarvalhoLLSLLS21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/GrimmBFSS21, author = {Christopher Grimm and Andr{\'{e}} Barreto and Gregory Farquhar and David Silver and Satinder Singh}, editor = {Marc'Aurelio Ranzato and Alina Beygelzimer and Yann N. Dauphin and Percy Liang and Jennifer Wortman Vaughan}, title = {Proper Value Equivalence}, booktitle = {Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021, NeurIPS 2021, December 6-14, 2021, virtual}, pages = {7773--7786}, year = {2021}, url = {https://proceedings.neurips.cc/paper/2021/hash/400e5e6a7ce0c754f281525fae75a873-Abstract.html}, timestamp = {Tue, 03 May 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/GrimmBFSS21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/AbelDHHLPS21, author = {David Abel and Will Dabney and Anna Harutyunyan and Mark K. Ho and Michael L. Littman and Doina Precup and Satinder Singh}, editor = {Marc'Aurelio Ranzato and Alina Beygelzimer and Yann N. Dauphin and Percy Liang and Jennifer Wortman Vaughan}, title = {On the Expressivity of Markov Reward}, booktitle = {Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021, NeurIPS 2021, December 6-14, 2021, virtual}, pages = {7799--7812}, year = {2021}, url = {https://proceedings.neurips.cc/paper/2021/hash/4079016d940210b4ae9ae7d41c4a2065-Abstract.html}, timestamp = {Tue, 03 May 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/AbelDHHLPS21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/ZhengVVLS21, author = {Zeyu Zheng and Vivek Veeriah and Risto Vuorio and Richard L. Lewis and Satinder Singh}, editor = {Marc'Aurelio Ranzato and Alina Beygelzimer and Yann N. Dauphin and Percy Liang and Jennifer Wortman Vaughan}, title = {Learning State Representations from Random Deep Action-conditional Predictions}, booktitle = {Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021, NeurIPS 2021, December 6-14, 2021, virtual}, pages = {23679--23691}, year = {2021}, url = {https://proceedings.neurips.cc/paper/2021/hash/c71df24045cfddab4a963d3ac9bdc9a3-Abstract.html}, timestamp = {Tue, 03 May 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/ZhengVVLS21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/ZahavyODS21, author = {Tom Zahavy and Brendan O'Donoghue and Guillaume Desjardins and Satinder Singh}, editor = {Marc'Aurelio Ranzato and Alina Beygelzimer and Yann N. Dauphin and Percy Liang and Jennifer Wortman Vaughan}, title = {Reward is enough for convex MDPs}, booktitle = {Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021, NeurIPS 2021, December 6-14, 2021, virtual}, pages = {25746--25759}, year = {2021}, url = {https://proceedings.neurips.cc/paper/2021/hash/d7e4cdde82a894b8f633e6d61a01ef15-Abstract.html}, timestamp = {Tue, 03 May 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/ZahavyODS21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/VeeriahZHXOKHSS21, author = {Vivek Veeriah and Tom Zahavy and Matteo Hessel and Zhongwen Xu and Junhyuk Oh and Iurii Kemaev and Hado van Hasselt and David Silver and Satinder Singh}, editor = {Marc'Aurelio Ranzato and Alina Beygelzimer and Yann N. Dauphin and Percy Liang and Jennifer Wortman Vaughan}, title = {Discovery of Options via Meta-Learned Subgoals}, booktitle = {Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021, NeurIPS 2021, December 6-14, 2021, virtual}, pages = {29861--29873}, year = {2021}, url = {https://proceedings.neurips.cc/paper/2021/hash/fa246d0262c3925617b0c72bb20eeb1d-Abstract.html}, timestamp = {Tue, 03 May 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/VeeriahZHXOKHSS21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2102-04323, author = {Tom Zahavy and Andr{\'{e}} Barreto and Daniel J. Mankowitz and Shaobo Hou and Brendan O'Donoghue and Iurii Kemaev and Satinder Singh}, title = {Discovering a set of policies for the worst case reward}, journal = {CoRR}, volume = {abs/2102.04323}, year = {2021}, url = {https://arxiv.org/abs/2102.04323}, eprinttype = {arXiv}, eprint = {2102.04323}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2102-04323.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2102-04897, author = {Zeyu Zheng and Vivek Veeriah and Risto Vuorio and Richard L. Lewis and Satinder Singh}, title = {Learning State Representations from Random Deep Action-conditional Predictions}, journal = {CoRR}, volume = {abs/2102.04897}, year = {2021}, url = {https://arxiv.org/abs/2102.04897}, eprinttype = {arXiv}, eprint = {2102.04897}, timestamp = {Wed, 20 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2102-04897.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2102-04999, author = {Zeyu Zheng and Risto Vuorio and Richard L. Lewis and Satinder Singh}, title = {Pairwise Weights for Temporal Credit Assignment}, journal = {CoRR}, volume = {abs/2102.04999}, year = {2021}, url = {https://arxiv.org/abs/2102.04999}, eprinttype = {arXiv}, eprint = {2102.04999}, timestamp = {Wed, 20 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2102-04999.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2102-06741, author = {Vivek Veeriah and Tom Zahavy and Matteo Hessel and Zhongwen Xu and Junhyuk Oh and Iurii Kemaev and Hado van Hasselt and David Silver and Satinder Singh}, title = {Discovery of Options via Meta-Learned Subgoals}, journal = {CoRR}, volume = {abs/2102.06741}, year = {2021}, url = {https://arxiv.org/abs/2102.06741}, eprinttype = {arXiv}, eprint = {2102.06741}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2102-06741.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2102-13195, author = {Ethan A. Brooks and Janarthanan Rajendran and Richard L. Lewis and Satinder Singh}, title = {Reinforcement Learning of Implicit and Explicit Control Flow in Instructions}, journal = {CoRR}, volume = {abs/2102.13195}, year = {2021}, url = {https://arxiv.org/abs/2102.13195}, eprinttype = {arXiv}, eprint = {2102.13195}, timestamp = {Wed, 20 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2102-13195.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2106-00661, author = {Tom Zahavy and Brendan O'Donoghue and Guillaume Desjardins and Satinder Singh}, title = {Reward is enough for convex MDPs}, journal = {CoRR}, volume = {abs/2106.00661}, year = {2021}, url = {https://arxiv.org/abs/2106.00661}, eprinttype = {arXiv}, eprint = {2106.00661}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2106-00661.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2106-00669, author = {Tom Zahavy and Brendan O'Donoghue and Andr{\'{e}} Barreto and Volodymyr Mnih and Sebastian Flennerhag and Satinder Singh}, title = {Discovering Diverse Nearly Optimal Policies withSuccessor Features}, journal = {CoRR}, volume = {abs/2106.00669}, year = {2021}, url = {https://arxiv.org/abs/2106.00669}, eprinttype = {arXiv}, eprint = {2106.00669}, timestamp = {Wed, 20 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2106-00669.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2106-10316, author = {Christopher Grimm and Andr{\'{e}} Barreto and Gregory Farquhar and David Silver and Satinder Singh}, title = {Proper Value Equivalence}, journal = {CoRR}, volume = {abs/2106.10316}, year = {2021}, url = {https://arxiv.org/abs/2106.10316}, eprinttype = {arXiv}, eprint = {2106.10316}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2106-10316.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2109-04504, author = {Sebastian Flennerhag and Yannick Schroecker and Tom Zahavy and Hado van Hasselt and David Silver and Satinder Singh}, title = {Bootstrapped Meta-Learning}, journal = {CoRR}, volume = {abs/2109.04504}, year = {2021}, url = {https://arxiv.org/abs/2109.04504}, eprinttype = {arXiv}, eprint = {2109.04504}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2109-04504.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2110-15724, author = {Janarthanan Rajendran and Jonathan K. Kummerfeld and Satinder Singh}, title = {Learning to Learn End-to-End Goal-Oriented Dialog From Related Dialog Tasks}, journal = {CoRR}, volume = {abs/2110.15724}, year = {2021}, url = {https://arxiv.org/abs/2110.15724}, eprinttype = {arXiv}, eprint = {2110.15724}, timestamp = {Wed, 20 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2110-15724.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2111-00876, author = {David Abel and Will Dabney and Anna Harutyunyan and Mark K. Ho and Michael L. Littman and Doina Precup and Satinder Singh}, title = {On the Expressivity of Markov Reward}, journal = {CoRR}, volume = {abs/2111.00876}, year = {2021}, url = {https://arxiv.org/abs/2111.00876}, eprinttype = {arXiv}, eprint = {2111.00876}, timestamp = {Wed, 20 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2111-00876.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/aamas/ZhangDS20, author = {Qi Zhang and Edmund H. Durfee and Satinder Singh}, title = {Semantics and algorithms for trustworthy commitment achievement under model uncertainty}, journal = {Auton. Agents Multi Agent Syst.}, volume = {34}, number = {1}, pages = {19}, year = {2020}, url = {https://doi.org/10.1007/s10458-020-09443-0}, doi = {10.1007/S10458-020-09443-0}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/aamas/ZhangDS20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/ZhangDS20, author = {Shun Zhang and Edmund H. Durfee and Satinder Singh}, title = {Querying to Find a Safe Policy under Uncertain Safety Constraints in Markov Decision Processes}, booktitle = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI} 2020, The Thirty-Second Innovative Applications of Artificial Intelligence Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA, February 7-12, 2020}, pages = {2552--2559}, publisher = {{AAAI} Press}, year = {2020}, url = {https://doi.org/10.1609/aaai.v34i03.5638}, doi = {10.1609/AAAI.V34I03.5638}, timestamp = {Sat, 21 Oct 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/ZhangDS20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/RajendranLVLS20, author = {Janarthanan Rajendran and Richard L. Lewis and Vivek Veeriah and Honglak Lee and Satinder Singh}, title = {How Should an Agent Practice?}, booktitle = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI} 2020, The Thirty-Second Innovative Applications of Artificial Intelligence Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA, February 7-12, 2020}, pages = {5454--5461}, publisher = {{AAAI} Press}, year = {2020}, url = {https://doi.org/10.1609/aaai.v34i04.5995}, doi = {10.1609/AAAI.V34I04.5995}, timestamp = {Mon, 04 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/RajendranLVLS20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/0038DS20, author = {Qi Zhang and Edmund H. Durfee and Satinder Singh}, title = {Modeling Probabilistic Commitments for Maintenance Is Inherently Harder than for Achievement}, booktitle = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI} 2020, The Thirty-Second Innovative Applications of Artificial Intelligence Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA, February 7-12, 2020}, pages = {10326--10333}, publisher = {{AAAI} Press}, year = {2020}, url = {https://doi.org/10.1609/aaai.v34i06.6596}, doi = {10.1609/AAAI.V34I06.6596}, timestamp = {Mon, 04 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/0038DS20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aistats/0002JTS20, author = {Aditya Modi and Nan Jiang and Ambuj Tewari and Satinder Singh}, editor = {Silvia Chiappa and Roberto Calandra}, title = {Sample Complexity of Reinforcement Learning using Linearly Combined Model Ensembles}, booktitle = {The 23rd International Conference on Artificial Intelligence and Statistics, {AISTATS} 2020, 26-28 August 2020, Online [Palermo, Sicily, Italy]}, series = {Proceedings of Machine Learning Research}, volume = {108}, pages = {2010--2020}, publisher = {{PMLR}}, year = {2020}, url = {http://proceedings.mlr.press/v108/modi20a.html}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aistats/0002JTS20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/OsbandDHASSMLSS20, author = {Ian Osband and Yotam Doron and Matteo Hessel and John Aslanides and Eren Sezener and Andre Saraiva and Katrina McKinney and Tor Lattimore and Csaba Szepesv{\'{a}}ri and Satinder Singh and Benjamin Van Roy and Richard S. Sutton and David Silver and Hado van Hasselt}, title = {Behaviour Suite for Reinforcement Learning}, booktitle = {8th International Conference on Learning Representations, {ICLR} 2020, Addis Ababa, Ethiopia, April 26-30, 2020}, publisher = {OpenReview.net}, year = {2020}, url = {https://openreview.net/forum?id=rygf-kSYwH}, timestamp = {Mon, 15 May 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/OsbandDHASSMLSS20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/ZhengOHXKHSS20, author = {Zeyu Zheng and Junhyuk Oh and Matteo Hessel and Zhongwen Xu and Manuel Kroiss and Hado van Hasselt and David Silver and Satinder Singh}, title = {What Can Learned Intrinsic Rewards Capture?}, booktitle = {Proceedings of the 37th International Conference on Machine Learning, {ICML} 2020, 13-18 July 2020, Virtual Event}, series = {Proceedings of Machine Learning Research}, volume = {119}, pages = {11436--11446}, publisher = {{PMLR}}, year = {2020}, url = {http://proceedings.mlr.press/v119/zheng20b.html}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/ZhengOHXKHSS20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/AnthonyETKGHPLP20, author = {Thomas W. Anthony and Tom Eccles and Andrea Tacchetti and J{\'{a}}nos Kram{\'{a}}r and Ian Gemp and Thomas C. Hudson and Nicolas Porcel and Marc Lanctot and Julien P{\'{e}}rolat and Richard Everett and Satinder Singh and Thore Graepel and Yoram Bachrach}, editor = {Hugo Larochelle and Marc'Aurelio Ranzato and Raia Hadsell and Maria{-}Florina Balcan and Hsuan{-}Tien Lin}, title = {Learning to Play No-Press Diplomacy with Best Response Policy Iteration}, booktitle = {Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6-12, 2020, virtual}, year = {2020}, url = {https://proceedings.neurips.cc/paper/2020/hash/d1419302db9c022ab1d48681b13d5f8b-Abstract.html}, timestamp = {Wed, 06 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/AnthonyETKGHPLP20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/GrimmBSS20, author = {Christopher Grimm and Andr{\'{e}} Barreto and Satinder Singh and David Silver}, editor = {Hugo Larochelle and Marc'Aurelio Ranzato and Raia Hadsell and Maria{-}Florina Balcan and Hsuan{-}Tien Lin}, title = {The Value Equivalence Principle for Model-Based Reinforcement Learning}, booktitle = {Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6-12, 2020, virtual}, year = {2020}, url = {https://proceedings.neurips.cc/paper/2020/hash/3bb585ea00014b0e3ebe4c6dd165a358-Abstract.html}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/GrimmBSS20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/OhHCXHSS20, author = {Junhyuk Oh and Matteo Hessel and Wojciech M. Czarnecki and Zhongwen Xu and Hado van Hasselt and Satinder Singh and David Silver}, editor = {Hugo Larochelle and Marc'Aurelio Ranzato and Raia Hadsell and Maria{-}Florina Balcan and Hsuan{-}Tien Lin}, title = {Discovering Reinforcement Learning Algorithms}, booktitle = {Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6-12, 2020, virtual}, year = {2020}, url = {https://proceedings.neurips.cc/paper/2020/hash/0b96d81f0494fde5428c7aea243c9157-Abstract.html}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/OhHCXHSS20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/WenPIBRS20, author = {Zheng Wen and Doina Precup and Morteza Ibrahimi and Andr{\'{e}} Barreto and Benjamin Van Roy and Satinder Singh}, editor = {Hugo Larochelle and Marc'Aurelio Ranzato and Raia Hadsell and Maria{-}Florina Balcan and Hsuan{-}Tien Lin}, title = {On Efficiency in Hierarchical Reinforcement Learning}, booktitle = {Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6-12, 2020, virtual}, year = {2020}, url = {https://proceedings.neurips.cc/paper/2020/hash/4a5cfa9281924139db466a8a19291aff-Abstract.html}, timestamp = {Thu, 31 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/WenPIBRS20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/XuHHOSS20, author = {Zhongwen Xu and Hado Philip van Hasselt and Matteo Hessel and Junhyuk Oh and Satinder Singh and David Silver}, editor = {Hugo Larochelle and Marc'Aurelio Ranzato and Raia Hadsell and Maria{-}Florina Balcan and Hsuan{-}Tien Lin}, title = {Meta-Gradient Reinforcement Learning with an Objective Discovered Online}, booktitle = {Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6-12, 2020, virtual}, year = {2020}, url = {https://proceedings.neurips.cc/paper/2020/hash/ae3d525daf92cee0003a7f2d92c34ea3-Abstract.html}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/XuHHOSS20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/ZahavyXVHOHSS20, author = {Tom Zahavy and Zhongwen Xu and Vivek Veeriah and Matteo Hessel and Junhyuk Oh and Hado van Hasselt and David Silver and Satinder Singh}, editor = {Hugo Larochelle and Marc'Aurelio Ranzato and Raia Hadsell and Maria{-}Florina Balcan and Hsuan{-}Tien Lin}, title = {A Self-Tuning Actor-Critic Algorithm}, booktitle = {Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6-12, 2020, virtual}, year = {2020}, url = {https://proceedings.neurips.cc/paper/2020/hash/f02208a057804ee16ac72ff4d3cec53b-Abstract.html}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/ZahavyXVHOHSS20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2002-12928, author = {Tom Zahavy and Zhongwen Xu and Vivek Veeriah and Matteo Hessel and Junhyuk Oh and Hado van Hasselt and David Silver and Satinder Singh}, title = {Self-Tuning Deep Reinforcement Learning}, journal = {CoRR}, volume = {abs/2002.12928}, year = {2020}, url = {https://arxiv.org/abs/2002.12928}, eprinttype = {arXiv}, eprint = {2002.12928}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2002-12928.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2006-04635, author = {Thomas W. Anthony and Tom Eccles and Andrea Tacchetti and J{\'{a}}nos Kram{\'{a}}r and Ian Gemp and Thomas C. Hudson and Nicolas Porcel and Marc Lanctot and Julien P{\'{e}}rolat and Richard Everett and Satinder Singh and Thore Graepel and Yoram Bachrach}, title = {Learning to Play No-Press Diplomacy with Best Response Policy Iteration}, journal = {CoRR}, volume = {abs/2006.04635}, year = {2020}, url = {https://arxiv.org/abs/2006.04635}, eprinttype = {arXiv}, eprint = {2006.04635}, timestamp = {Wed, 06 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2006-04635.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2007-08433, author = {Zhongwen Xu and Hado van Hasselt and Matteo Hessel and Junhyuk Oh and Satinder Singh and David Silver}, title = {Meta-Gradient Reinforcement Learning with an Objective Discovered Online}, journal = {CoRR}, volume = {abs/2007.08433}, year = {2020}, url = {https://arxiv.org/abs/2007.08433}, eprinttype = {arXiv}, eprint = {2007.08433}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2007-08433.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2007-08794, author = {Junhyuk Oh and Matteo Hessel and Wojciech M. Czarnecki and Zhongwen Xu and Hado van Hasselt and Satinder Singh and David Silver}, title = {Discovering Reinforcement Learning Algorithms}, journal = {CoRR}, volume = {abs/2007.08794}, year = {2020}, url = {https://arxiv.org/abs/2007.08794}, eprinttype = {arXiv}, eprint = {2007.08794}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2007-08794.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2010-15195, author = {Wilka Carvalho and Anthony Liang and Kimin Lee and Sungryull Sohn and Honglak Lee and Richard L. Lewis and Satinder Singh}, title = {Reinforcement Learning for Sparse-Reward Object-Interaction Tasks in First-person Simulated 3D Environments}, journal = {CoRR}, volume = {abs/2010.15195}, year = {2020}, url = {https://arxiv.org/abs/2010.15195}, eprinttype = {arXiv}, eprint = {2010.15195}, timestamp = {Wed, 20 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2010-15195.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2011-03506, author = {Christopher Grimm and Andr{\'{e}} Barreto and Satinder Singh and David Silver}, title = {The Value Equivalence Principle for Model-Based Reinforcement Learning}, journal = {CoRR}, volume = {abs/2011.03506}, year = {2020}, url = {https://arxiv.org/abs/2011.03506}, eprinttype = {arXiv}, eprint = {2011.03506}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2011-03506.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2012-07195, author = {Qi Zhang and Edmund H. Durfee and Satinder Singh}, title = {Efficient Querying for Cooperative Probabilistic Commitments}, journal = {CoRR}, volume = {abs/2012.07195}, year = {2020}, url = {https://arxiv.org/abs/2012.07195}, eprinttype = {arXiv}, eprint = {2012.07195}, timestamp = {Wed, 20 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2012-07195.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/ZhangLSD19, author = {Qi Zhang and Richard L. Lewis and Satinder Singh and Edmund H. Durfee}, title = {Learning to Communicate and Solve Visual Blocks-World Tasks}, booktitle = {The Thirty-Third {AAAI} Conference on Artificial Intelligence, {AAAI} 2019, The Thirty-First Innovative Applications of Artificial Intelligence Conference, {IAAI} 2019, The Ninth {AAAI} Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2019, Honolulu, Hawaii, USA, January 27 - February 1, 2019}, pages = {5781--5788}, publisher = {{AAAI} Press}, year = {2019}, url = {https://doi.org/10.1609/aaai.v33i01.33015781}, doi = {10.1609/AAAI.V33I01.33015781}, timestamp = {Mon, 04 Sep 2023 12:29:24 +0200}, biburl = {https://dblp.org/rec/conf/aaai/ZhangLSD19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icdm/HollerVQTJJSWY19, author = {John Holler and Risto Vuorio and Zhiwei (Tony) Qin and Xiaocheng Tang and Yan Jiao and Tiancheng Jin and Satinder Singh and Chenxi Wang and Jieping Ye}, editor = {Jianyong Wang and Kyuseok Shim and Xindong Wu}, title = {Deep Reinforcement Learning for Multi-driver Vehicle Dispatching and Repositioning Problem}, booktitle = {2019 {IEEE} International Conference on Data Mining, {ICDM} 2019, Beijing, China, November 8-11, 2019}, pages = {1090--1095}, publisher = {{IEEE}}, year = {2019}, url = {https://doi.org/10.1109/ICDM.2019.00129}, doi = {10.1109/ICDM.2019.00129}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icdm/HollerVQTJJSWY19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/ZhangDS19, author = {Qi Zhang and Edmund H. Durfee and Satinder Singh}, editor = {Hu{\'{a}}scar Espinoza and Han Yu and Xiaowei Huang and Freddy L{\'{e}}cu{\'{e}} and Cynthia Chen and Jos{\'{e}} Hern{\'{a}}ndez{-}Orallo and Se{\'{a}}n {\'{O}} h{\'{E}}igeartaigh and Richard Mallah}, title = {Computational Strategies for the Trustworthy Pursuit and the Safe Modeling of Probabilistic Maintenance Commitments}, booktitle = {Proceedings of the Workshop on Artificial Intelligence Safety 2019 co-located with the 28th International Joint Conference on Artificial Intelligence, AISafety@IJCAI 2019, Macao, China, August 11-12, 2019}, series = {{CEUR} Workshop Proceedings}, volume = {2419}, publisher = {CEUR-WS.org}, year = {2019}, url = {https://ceur-ws.org/Vol-2419/paper\_8.pdf}, timestamp = {Fri, 10 Mar 2023 16:23:31 +0100}, biburl = {https://dblp.org/rec/conf/ijcai/ZhangDS19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/PaquetteLBSOKPS19, author = {Philip Paquette and Yuchen Lu and Steven Bocco and Max O. Smith and Satya Ortiz{-}Gagne and Jonathan K. Kummerfeld and Joelle Pineau and Satinder Singh and Aaron C. Courville}, editor = {Hanna M. Wallach and Hugo Larochelle and Alina Beygelzimer and Florence d'Alch{\'{e}}{-}Buc and Emily B. Fox and Roman Garnett}, title = {No-Press Diplomacy: Modeling Multi-Agent Gameplay}, booktitle = {Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS 2019, December 8-14, 2019, Vancouver, BC, Canada}, pages = {4476--4487}, year = {2019}, url = {https://proceedings.neurips.cc/paper/2019/hash/84b20b1f5a0d103f5710bb67a043cd78-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/PaquetteLBSOKPS19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/VeeriahHXRLOHSS19, author = {Vivek Veeriah and Matteo Hessel and Zhongwen Xu and Janarthanan Rajendran and Richard L. Lewis and Junhyuk Oh and Hado van Hasselt and David Silver and Satinder Singh}, editor = {Hanna M. Wallach and Hugo Larochelle and Alina Beygelzimer and Florence d'Alch{\'{e}}{-}Buc and Emily B. Fox and Roman Garnett}, title = {Discovery of Useful Questions as Auxiliary Tasks}, booktitle = {Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS 2019, December 8-14, 2019, Vancouver, BC, Canada}, pages = {9306--9317}, year = {2019}, url = {https://proceedings.neurips.cc/paper/2019/hash/10ff0b5e85e5b85cc3095d431d8c08b4-Abstract.html}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/VeeriahHXRLOHSS19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/HarutyunyanDMAP19, author = {Anna Harutyunyan and Will Dabney and Thomas Mesnard and Mohammad Gheshlaghi Azar and Bilal Piot and Nicolas Heess and Hado van Hasselt and Gregory Wayne and Satinder Singh and Doina Precup and R{\'{e}}mi Munos}, editor = {Hanna M. Wallach and Hugo Larochelle and Alina Beygelzimer and Florence d'Alch{\'{e}}{-}Buc and Emily B. Fox and Roman Garnett}, title = {Hindsight Credit Assignment}, booktitle = {Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS 2019, December 8-14, 2019, Vancouver, BC, Canada}, pages = {12467--12476}, year = {2019}, url = {https://proceedings.neurips.cc/paper/2019/hash/195f15384c2a79cedf293e4a847ce85c-Abstract.html}, timestamp = {Wed, 20 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/HarutyunyanDMAP19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ranlp/RajendranGGYSP19, author = {Janarthanan Rajendran and Jatin Ganhotra and Xiaoxiao Guo and Mo Yu and Satinder Singh and Lazaros Polymenakos}, editor = {Ruslan Mitkov and Galia Angelova}, title = {NE-Table: {A} Neural key-value table for Named Entities}, booktitle = {Proceedings of the International Conference on Recent Advances in Natural Language Processing, {RANLP} 2019, Varna, Bulgaria, September 2-4, 2019}, pages = {980--993}, publisher = {{INCOMA} Ltd.}, year = {2019}, url = {https://doi.org/10.26615/978-954-452-056-4\_114}, doi = {10.26615/978-954-452-056-4\_114}, timestamp = {Wed, 20 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ranlp/RajendranGGYSP19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@incollection{DBLP:series/lncs/PriestCSA019, author = {Benjamin W. Priest and George Cybenko and Satinder Singh and Massimiliano Albanese and Peng Liu}, editor = {Sushil Jajodia and George Cybenko and Peng Liu and Cliff Wang and Michael P. Wellman}, title = {Online and Scalable Adaptive Cyber Defense}, booktitle = {Adversarial and Uncertain Reasoning for Adaptive Cyber Defense - Control- and Game-Theoretic Approaches to Cyber Security}, series = {Lecture Notes in Computer Science}, volume = {11830}, pages = {232--261}, publisher = {Springer}, year = {2019}, url = {https://doi.org/10.1007/978-3-030-30719-6\_10}, doi = {10.1007/978-3-030-30719-6\_10}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/series/lncs/PriestCSA019.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1901-08649, author = {Christopher Grimm and Satinder Singh}, title = {Learning Independently-Obtainable Reward Functions}, journal = {CoRR}, volume = {abs/1901.08649}, year = {2019}, url = {http://arxiv.org/abs/1901.08649}, eprinttype = {arXiv}, eprint = {1901.08649}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1901-08649.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1908-03568, author = {Ian Osband and Yotam Doron and Matteo Hessel and John Aslanides and Eren Sezener and Andre Saraiva and Katrina McKinney and Tor Lattimore and Csaba Szepesv{\'{a}}ri and Satinder Singh and Benjamin Van Roy and Richard S. Sutton and David Silver and Hado van Hasselt}, title = {Behaviour Suite for Reinforcement Learning}, journal = {CoRR}, volume = {abs/1908.03568}, year = {2019}, url = {http://arxiv.org/abs/1908.03568}, eprinttype = {arXiv}, eprint = {1908.03568}, timestamp = {Mon, 15 May 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1908-03568.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1909-02128, author = {Philip Paquette and Yuchen Lu and Steven Bocco and Max O. Smith and Satya Ortiz{-}Gagne and Jonathan K. Kummerfeld and Satinder Singh and Joelle Pineau and Aaron C. Courville}, title = {No Press Diplomacy: Modeling Multi-Agent Gameplay}, journal = {CoRR}, volume = {abs/1909.02128}, year = {2019}, url = {http://arxiv.org/abs/1909.02128}, eprinttype = {arXiv}, eprint = {1909.02128}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1909-02128.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1909-04607, author = {Vivek Veeriah and Matteo Hessel and Zhongwen Xu and Richard L. Lewis and Janarthanan Rajendran and Junhyuk Oh and Hado van Hasselt and David Silver and Satinder Singh}, title = {Discovery of Useful Questions as Auxiliary Tasks}, journal = {CoRR}, volume = {abs/1909.04607}, year = {2019}, url = {http://arxiv.org/abs/1909.04607}, eprinttype = {arXiv}, eprint = {1909.04607}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1909-04607.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1910-10597, author = {Aditya Modi and Nan Jiang and Ambuj Tewari and Satinder Singh}, title = {Sample Complexity of Reinforcement Learning using Linearly Combined Model Ensembles}, journal = {CoRR}, volume = {abs/1910.10597}, year = {2019}, url = {http://arxiv.org/abs/1910.10597}, eprinttype = {arXiv}, eprint = {1910.10597}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1910-10597.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1910-14361, author = {Victor Bapst and Alvaro Sanchez{-}Gonzalez and Omar Shams and Kimberly L. Stachenfeld and Peter W. Battaglia and Satinder Singh and Jessica B. Hamrick}, title = {Object-oriented state editing for {HRL}}, journal = {CoRR}, volume = {abs/1910.14361}, year = {2019}, url = {http://arxiv.org/abs/1910.14361}, eprinttype = {arXiv}, eprint = {1910.14361}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1910-14361.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1911-10866, author = {Christopher Grimm and Irina Higgins and Andr{\'{e}} Barreto and Denis Teplyashin and Markus Wulfmeier and Tim Hertweck and Raia Hadsell and Satinder Singh}, title = {Disentangled Cumulants Help Successor Representations Transfer to New Tasks}, journal = {CoRR}, volume = {abs/1911.10866}, year = {2019}, url = {http://arxiv.org/abs/1911.10866}, eprinttype = {arXiv}, eprint = {1911.10866}, timestamp = {Wed, 20 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1911-10866.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1911-11260, author = {John Holler and Risto Vuorio and Zhiwei (Tony) Qin and Xiaocheng Tang and Yan Jiao and Tiancheng Jin and Satinder Singh and Chenxi Wang and Jieping Ye}, title = {Deep Reinforcement Learning for Multi-Driver Vehicle Dispatching and Repositioning Problem}, journal = {CoRR}, volume = {abs/1911.11260}, year = {2019}, url = {http://arxiv.org/abs/1911.11260}, eprinttype = {arXiv}, eprint = {1911.11260}, timestamp = {Wed, 20 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1911-11260.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1912-02503, author = {Anna Harutyunyan and Will Dabney and Thomas Mesnard and Mohammad Gheshlaghi Azar and Bilal Piot and Nicolas Heess and Hado van Hasselt and Greg Wayne and Satinder Singh and Doina Precup and R{\'{e}}mi Munos}, title = {Hindsight Credit Assignment}, journal = {CoRR}, volume = {abs/1912.02503}, year = {2019}, url = {http://arxiv.org/abs/1912.02503}, eprinttype = {arXiv}, eprint = {1912.02503}, timestamp = {Wed, 20 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1912-02503.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1912-05500, author = {Zeyu Zheng and Junhyuk Oh and Matteo Hessel and Zhongwen Xu and Manuel Kroiss and Hado van Hasselt and David Silver and Satinder Singh}, title = {What Can Learned Intrinsic Rewards Capture?}, journal = {CoRR}, volume = {abs/1912.05500}, year = {2019}, url = {http://arxiv.org/abs/1912.05500}, eprinttype = {arXiv}, eprint = {1912.05500}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1912-05500.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1912-07045, author = {Janarthanan Rajendran and Richard L. Lewis and Vivek Veeriah and Honglak Lee and Satinder Singh}, title = {How Should an Agent Practice?}, journal = {CoRR}, volume = {abs/1912.07045}, year = {2019}, url = {http://arxiv.org/abs/1912.07045}, eprinttype = {arXiv}, eprint = {1912.07045}, timestamp = {Wed, 20 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1912-07045.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/scn/NguyenWWS18, author = {Thanh Hong Nguyen and Mason Wright and Michael P. Wellman and Satinder Singh}, title = {Multistage Attack Graph Security Games: Heuristic Strategies, with Empirical Game-Theoretic Analysis}, journal = {Secur. Commun. Networks}, volume = {2018}, pages = {2864873:1--2864873:28}, year = {2018}, url = {https://doi.org/10.1155/2018/2864873}, doi = {10.1155/2018/2864873}, timestamp = {Thu, 07 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/scn/NguyenWWS18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/alt/0002JST18, author = {Aditya Modi and Nan Jiang and Satinder Singh and Ambuj Tewari}, editor = {Firdaus Janoos and Mehryar Mohri and Karthik Sridharan}, title = {Markov Decision Processes with Continuous Side Information}, booktitle = {Algorithmic Learning Theory, {ALT} 2018, 7-9 April 2018, Lanzarote, Canary Islands, Spain}, series = {Proceedings of Machine Learning Research}, volume = {83}, pages = {597--618}, publisher = {{PMLR}}, year = {2018}, url = {http://proceedings.mlr.press/v83/modi18a.html}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/alt/0002JST18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/atal/ZhangDS18a, author = {Qi Zhang and Edmund H. Durfee and Satinder Singh}, editor = {Robin Cohen and Murat Sensoy and Timothy J. Norman}, title = {Challenges in the Trustworthy Pursuit of Maintenance Commitments Under Uncertainty}, booktitle = {Proceedings of the 20th International Trust Workshop co-located with {AAMAS/IJCAI/ECAI/ICML} 2018, Stockholm, Sweden, July 14, 2018}, series = {{CEUR} Workshop Proceedings}, volume = {2154}, pages = {75--86}, publisher = {CEUR-WS.org}, year = {2018}, url = {https://ceur-ws.org/Vol-2154/paper7.pdf}, timestamp = {Fri, 10 Mar 2023 16:22:57 +0100}, biburl = {https://dblp.org/rec/conf/atal/ZhangDS18a.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/atal/ZhangDS18, author = {Shun Zhang and Edmund H. Durfee and Satinder Singh}, editor = {Elisabeth Andr{\'{e}} and Sven Koenig and Mehdi Dastani and Gita Sukthankar}, title = {On Querying for Safe Optimality in Factored Markov Decision Processes}, booktitle = {Proceedings of the 17th International Conference on Autonomous Agents and MultiAgent Systems, {AAMAS} 2018, Stockholm, Sweden, July 10-15, 2018}, pages = {2168--2170}, publisher = {International Foundation for Autonomous Agents and Multiagent Systems Richland, SC, {USA} / {ACM}}, year = {2018}, url = {http://dl.acm.org/citation.cfm?id=3238108}, timestamp = {Sat, 30 Sep 2023 09:34:53 +0200}, biburl = {https://dblp.org/rec/conf/atal/ZhangDS18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/emnlp/RajendranGSP18, author = {Janarthanan Rajendran and Jatin Ganhotra and Satinder Singh and Lazaros Polymenakos}, editor = {Ellen Riloff and David Chiang and Julia Hockenmaier and Jun'ichi Tsujii}, title = {Learning End-to-End Goal-Oriented Dialog with Multiple Answers}, booktitle = {Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, Brussels, Belgium, October 31 - November 4, 2018}, pages = {3834--3843}, publisher = {Association for Computational Linguistics}, year = {2018}, url = {https://doi.org/10.18653/v1/d18-1418}, doi = {10.18653/V1/D18-1418}, timestamp = {Wed, 20 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/emnlp/RajendranGSP18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/OhGSL18, author = {Junhyuk Oh and Yijie Guo and Satinder Singh and Honglak Lee}, editor = {Jennifer G. Dy and Andreas Krause}, title = {Self-Imitation Learning}, booktitle = {Proceedings of the 35th International Conference on Machine Learning, {ICML} 2018, Stockholmsm{\"{a}}ssan, Stockholm, Sweden, July 10-15, 2018}, series = {Proceedings of Machine Learning Research}, volume = {80}, pages = {3875--3884}, publisher = {{PMLR}}, year = {2018}, url = {http://proceedings.mlr.press/v80/oh18b.html}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/OhGSL18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/ZhangDS18, author = {Shun Zhang and Edmund H. Durfee and Satinder Singh}, editor = {J{\'{e}}r{\^{o}}me Lang}, title = {Minimax-Regret Querying on Side Effects for Safe Optimality in Factored Markov Decision Processes}, booktitle = {Proceedings of the Twenty-Seventh International Joint Conference on Artificial Intelligence, {IJCAI} 2018, July 13-19, 2018, Stockholm, Sweden}, pages = {4867--4873}, publisher = {ijcai.org}, year = {2018}, url = {https://doi.org/10.24963/ijcai.2018/676}, doi = {10.24963/IJCAI.2018/676}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ijcai/ZhangDS18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/JiangKS18, author = {Nan Jiang and Alex Kulesza and Satinder Singh}, editor = {Samy Bengio and Hanna M. Wallach and Hugo Larochelle and Kristen Grauman and Nicol{\`{o}} Cesa{-}Bianchi and Roman Garnett}, title = {Completing State Representations using Spectral Learning}, booktitle = {Advances in Neural Information Processing Systems 31: Annual Conference on Neural Information Processing Systems 2018, NeurIPS 2018, December 3-8, 2018, Montr{\'{e}}al, Canada}, pages = {4333--4342}, year = {2018}, url = {https://proceedings.neurips.cc/paper/2018/hash/51174add1c52758f33d414ceaf3fe6ba-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/JiangKS18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/ZhengOS18, author = {Zeyu Zheng and Junhyuk Oh and Satinder Singh}, editor = {Samy Bengio and Hanna M. Wallach and Hugo Larochelle and Kristen Grauman and Nicol{\`{o}} Cesa{-}Bianchi and Roman Garnett}, title = {On Learning Intrinsic Rewards for Policy Gradient Methods}, booktitle = {Advances in Neural Information Processing Systems 31: Annual Conference on Neural Information Processing Systems 2018, NeurIPS 2018, December 3-8, 2018, Montr{\'{e}}al, Canada}, pages = {4649--4659}, year = {2018}, url = {https://proceedings.neurips.cc/paper/2018/hash/51de85ddd068f0bc787691d356176df9-Abstract.html}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/ZhengOS18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1803-02940, author = {Jiaxuan Wang and Ian Fox and Jonathan Skaza and Nick Linck and Satinder Singh and Jenna Wiens}, title = {The Advantage of Doubling: {A} Deep Reinforcement Learning Approach to Studying the Double Team in the {NBA}}, journal = {CoRR}, volume = {abs/1803.02940}, year = {2018}, url = {http://arxiv.org/abs/1803.02940}, eprinttype = {arXiv}, eprint = {1803.02940}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1803-02940.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1804-06459, author = {Zeyu Zheng and Junhyuk Oh and Satinder Singh}, title = {On Learning Intrinsic Rewards for Policy Gradient Methods}, journal = {CoRR}, volume = {abs/1804.06459}, year = {2018}, url = {http://arxiv.org/abs/1804.06459}, eprinttype = {arXiv}, eprint = {1804.06459}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1804-06459.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1804-09540, author = {Janarthanan Rajendran and Jatin Ganhotra and Xiaoxiao Guo and Mo Yu and Satinder Singh}, title = {Named Entities troubling your Neural Methods? Build NE-Table: {A} neural approach for handling Named Entities}, journal = {CoRR}, volume = {abs/1804.09540}, year = {2018}, url = {http://arxiv.org/abs/1804.09540}, eprinttype = {arXiv}, eprint = {1804.09540}, timestamp = {Wed, 20 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1804-09540.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1806-05635, author = {Junhyuk Oh and Yijie Guo and Satinder Singh and Honglak Lee}, title = {Self-Imitation Learning}, journal = {CoRR}, volume = {abs/1806.05635}, year = {2018}, url = {http://arxiv.org/abs/1806.05635}, eprinttype = {arXiv}, eprint = {1806.05635}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1806-05635.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1806-09605, author = {Vivek Veeriah and Junhyuk Oh and Satinder Singh}, title = {Many-Goals Reinforcement Learning}, journal = {CoRR}, volume = {abs/1806.09605}, year = {2018}, url = {http://arxiv.org/abs/1806.09605}, eprinttype = {arXiv}, eprint = {1806.09605}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1806-09605.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1808-09996, author = {Janarthanan Rajendran and Jatin Ganhotra and Satinder Singh and Lazaros Polymenakos}, title = {Learning End-to-End Goal-Oriented Dialog with Multiple Answers}, journal = {CoRR}, volume = {abs/1808.09996}, year = {2018}, url = {http://arxiv.org/abs/1808.09996}, eprinttype = {arXiv}, eprint = {1808.09996}, timestamp = {Wed, 20 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1808-09996.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1812-00950, author = {Yijie Guo and Junhyuk Oh and Satinder Singh and Honglak Lee}, title = {Generative Adversarial Self-Imitation Learning}, journal = {CoRR}, volume = {abs/1812.00950}, year = {2018}, url = {http://arxiv.org/abs/1812.00950}, eprinttype = {arXiv}, eprint = {1812.00950}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1812-00950.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/NguyenWS17, author = {Thanh Hong Nguyen and Michael P. Wellman and Satinder Singh}, title = {A Stackelberg Game Model for Botnet Traffic Exfiltration}, booktitle = {The Workshops of the The Thirty-First {AAAI} Conference on Artificial Intelligence, Saturday, February 4-9, 2017, San Francisco, California, {USA}}, series = {{AAAI} Technical Report}, volume = {{WS-17}}, publisher = {{AAAI} Press}, year = {2017}, url = {http://aaai.org/ocs/index.php/WS/AAAIW17/paper/view/15090}, timestamp = {Mon, 04 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/NguyenWS17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/acl/Perez-RosasMRSA17, author = {Ver{\'{o}}nica P{\'{e}}rez{-}Rosas and Rada Mihalcea and Kenneth Resnicow and Satinder Singh and Lawrence C. An}, editor = {Regina Barzilay and Min{-}Yen Kan}, title = {Understanding and Predicting Empathic Behavior in Counseling Therapy}, booktitle = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics, {ACL} 2017, Vancouver, Canada, July 30 - August 4, Volume 1: Long Papers}, pages = {1426--1435}, publisher = {Association for Computational Linguistics}, year = {2017}, url = {https://doi.org/10.18653/v1/P17-1131}, doi = {10.18653/V1/P17-1131}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/acl/Perez-RosasMRSA17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aips/ZhangDS17, author = {Shun Zhang and Edmund H. Durfee and Satinder Singh}, editor = {Laura Barbulescu and Jeremy Frank and Mausam and Stephen F. Smith}, title = {Approximately-Optimal Queries for Planning in Reward-Uncertain Markov Decision Processes}, booktitle = {Proceedings of the Twenty-Seventh International Conference on Automated Planning and Scheduling, {ICAPS} 2017, Pittsburgh, Pennsylvania, USA, June 18-23, 2017}, pages = {339--347}, publisher = {{AAAI} Press}, year = {2017}, url = {https://aaai.org/ocs/index.php/ICAPS/ICAPS17/paper/view/15763}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aips/ZhangDS17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aips/ZhangSD17, author = {Qi Zhang and Satinder Singh and Edmund H. Durfee}, editor = {Laura Barbulescu and Jeremy Frank and Mausam and Stephen F. Smith}, title = {Minimizing Maximum Regret in Commitment Constrained Sequential Decision Making}, booktitle = {Proceedings of the Twenty-Seventh International Conference on Automated Planning and Scheduling, {ICAPS} 2017, Pittsburgh, Pennsylvania, USA, June 18-23, 2017}, pages = {348--357}, publisher = {{AAAI} Press}, year = {2017}, url = {https://aaai.org/ocs/index.php/ICAPS/ICAPS17/paper/view/15766}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aips/ZhangSD17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ccs/NguyenWWS17, author = {Thanh Hong Nguyen and Mason Wright and Michael P. Wellman and Satinder Singh}, editor = {Hamed Okhravi and Xinming Ou}, title = {Multi-Stage Attack Graph Security Games: Heuristic Strategies, with Empirical Game-Theoretic Analysis}, booktitle = {Proceedings of the 2017 Workshop on Moving Target Defense, MTD@CCS 2017, Dallas, TX, USA, October 30, 2017}, pages = {87--97}, publisher = {{ACM}}, year = {2017}, url = {https://doi.org/10.1145/3140549.3140562}, doi = {10.1145/3140549.3140562}, timestamp = {Thu, 07 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ccs/NguyenWWS17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/eacl/SinghMPRAGC17, author = {Ver{\'{o}}nica P{\'{e}}rez{-}Rosas and Rada Mihalcea and Kenneth Resnicow and Satinder Singh and Lawrence C. An and Kathy J. Goggin and Delwyn Catley}, editor = {Mirella Lapata and Phil Blunsom and Alexander Koller}, title = {Predicting Counselor Behaviors in Motivational Interviewing Encounters}, booktitle = {Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics, {EACL} 2017, Valencia, Spain, April 3-7, 2017, Volume 1: Long Papers}, pages = {1128--1137}, publisher = {Association for Computational Linguistics}, year = {2017}, url = {https://doi.org/10.18653/v1/e17-1106}, doi = {10.18653/V1/E17-1106}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/eacl/SinghMPRAGC17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/gamesec/NguyenWS17, author = {Thanh Hong Nguyen and Michael P. Wellman and Satinder Singh}, editor = {Stefan Rass and Bo An and Christopher Kiekintveld and Fei Fang and Stefan Schauer}, title = {A Stackelberg Game Model for Botnet Data Exfiltration}, booktitle = {Decision and Game Theory for Security - 8th International Conference, GameSec 2017, Vienna, Austria, October 23-25, 2017, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {10575}, pages = {151--170}, publisher = {Springer}, year = {2017}, url = {https://doi.org/10.1007/978-3-319-68711-7\_9}, doi = {10.1007/978-3-319-68711-7\_9}, timestamp = {Wed, 07 Feb 2024 17:23:12 +0100}, biburl = {https://dblp.org/rec/conf/gamesec/NguyenWS17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/GuoKRBCKTTS17, author = {Xiaoxiao Guo and Tim Klinger and Clemens Rosenbaum and Joseph P. Bigus and Murray Campbell and Ban Kawas and Kartik Talamadupula and Gerry Tesauro and Satinder Singh}, title = {Learning to Query, Reason, and Answer Questions On Ambiguous Texts}, booktitle = {5th International Conference on Learning Representations, {ICLR} 2017, Toulon, France, April 24-26, 2017, Conference Track Proceedings}, publisher = {OpenReview.net}, year = {2017}, url = {https://openreview.net/forum?id=rJ0-tY5xe}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/GuoKRBCKTTS17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/OhSLK17, author = {Junhyuk Oh and Satinder Singh and Honglak Lee and Pushmeet Kohli}, editor = {Doina Precup and Yee Whye Teh}, title = {Zero-Shot Task Generalization with Multi-Task Deep Reinforcement Learning}, booktitle = {Proceedings of the 34th International Conference on Machine Learning, {ICML} 2017, Sydney, NSW, Australia, 6-11 August 2017}, series = {Proceedings of Machine Learning Research}, volume = {70}, pages = {2661--2670}, publisher = {{PMLR}}, year = {2017}, url = {http://proceedings.mlr.press/v70/oh17a.html}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/OhSLK17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/AminJS17, author = {Kareem Amin and Nan Jiang and Satinder Singh}, editor = {Isabelle Guyon and Ulrike von Luxburg and Samy Bengio and Hanna M. Wallach and Rob Fergus and S. V. N. Vishwanathan and Roman Garnett}, title = {Repeated Inverse Reinforcement Learning}, booktitle = {Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017, December 4-9, 2017, Long Beach, CA, {USA}}, pages = {1815--1824}, year = {2017}, url = {https://proceedings.neurips.cc/paper/2017/hash/8ce6790cc6a94e65f17f908f462fae85-Abstract.html}, timestamp = {Mon, 22 Aug 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/AminJS17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/OhSL17, author = {Junhyuk Oh and Satinder Singh and Honglak Lee}, editor = {Isabelle Guyon and Ulrike von Luxburg and Samy Bengio and Hanna M. Wallach and Rob Fergus and S. V. N. Vishwanathan and Roman Garnett}, title = {Value Prediction Network}, booktitle = {Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017, December 4-9, 2017, Long Beach, CA, {USA}}, pages = {6118--6128}, year = {2017}, url = {https://proceedings.neurips.cc/paper/2017/hash/ffbd6cbb019a1413183c8d08f2929307-Abstract.html}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/OhSL17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@proceedings{DBLP:conf/aaai/2017, editor = {Satinder Singh and Shaul Markovitch}, title = {Proceedings of the Thirty-First {AAAI} Conference on Artificial Intelligence, February 4-9, 2017, San Francisco, California, {USA}}, publisher = {{AAAI} Press}, year = {2017}, url = {https://ojs.aaai.org/index.php/AAAI/issue/view/302}, timestamp = {Mon, 04 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/2017.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/ZhangSD17, author = {Qi Zhang and Satinder Singh and Edmund H. Durfee}, title = {Minimizing Maximum Regret in Commitment Constrained Sequential Decision Making}, journal = {CoRR}, volume = {abs/1703.04587}, year = {2017}, url = {http://arxiv.org/abs/1703.04587}, eprinttype = {arXiv}, eprint = {1703.04587}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/ZhangSD17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/AminJS17, author = {Kareem Amin and Nan Jiang and Satinder Singh}, title = {Repeated Inverse Reinforcement Learning}, journal = {CoRR}, volume = {abs/1705.05427}, year = {2017}, url = {http://arxiv.org/abs/1705.05427}, eprinttype = {arXiv}, eprint = {1705.05427}, timestamp = {Mon, 22 Aug 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/AminJS17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/OhSLK17, author = {Junhyuk Oh and Satinder Singh and Honglak Lee and Pushmeet Kohli}, title = {Zero-Shot Task Generalization with Multi-Task Deep Reinforcement Learning}, journal = {CoRR}, volume = {abs/1706.05064}, year = {2017}, url = {http://arxiv.org/abs/1706.05064}, eprinttype = {arXiv}, eprint = {1706.05064}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/OhSLK17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/OhSL17, author = {Junhyuk Oh and Satinder Singh and Honglak Lee}, title = {Value Prediction Network}, journal = {CoRR}, volume = {abs/1707.03497}, year = {2017}, url = {http://arxiv.org/abs/1707.03497}, eprinttype = {arXiv}, eprint = {1707.03497}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/OhSL17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1711-05726, author = {Aditya Modi and Nan Jiang and Satinder Singh and Ambuj Tewari}, title = {Markov Decision Processes with Continuous Side Information}, journal = {CoRR}, volume = {abs/1711.05726}, year = {2017}, url = {http://arxiv.org/abs/1711.05726}, eprinttype = {arXiv}, eprint = {1711.05726}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1711-05726.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ml/EsbroeckSSSK16, author = {Alexander Van Esbroeck and Landon Smith and Zeeshan Syed and Satinder Singh and Zahi N. Karam}, title = {Multi-task seizure detection: addressing intra-patient variation in seizure morphologies}, journal = {Mach. Learn.}, volume = {102}, number = {3}, pages = {309--321}, year = {2016}, url = {https://doi.org/10.1007/s10994-015-5519-7}, doi = {10.1007/S10994-015-5519-7}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ml/EsbroeckSSSK16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/JiangKS16, author = {Nan Jiang and Alex Kulesza and Satinder Singh}, editor = {Dale Schuurmans and Michael P. Wellman}, title = {Improving Predictive State Representations via Gradient Descent}, booktitle = {Proceedings of the Thirtieth {AAAI} Conference on Artificial Intelligence, February 12-17, 2016, Phoenix, Arizona, {USA}}, pages = {1709--1715}, publisher = {{AAAI} Press}, year = {2016}, url = {https://doi.org/10.1609/aaai.v30i1.10270}, doi = {10.1609/AAAI.V30I1.10270}, timestamp = {Mon, 04 Sep 2023 15:08:28 +0200}, biburl = {https://dblp.org/rec/conf/aaai/JiangKS16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/atal/DurfeeS16a, author = {Edmund H. Durfee and Satinder Singh}, editor = {Nardine Osman and Carles Sierra}, title = {On the Trustworthy Fulfillment of Commitments}, booktitle = {Autonomous Agents and Multiagent Systems - {AAMAS} 2016 Workshops, - Best Papers - , Singapore, Singapore, May 9-10, 2016, Revised Selected Papers}, series = {Lecture Notes in Computer Science}, volume = {10002}, pages = {1--13}, year = {2016}, url = {https://doi.org/10.1007/978-3-319-46882-2\_1}, doi = {10.1007/978-3-319-46882-2\_1}, timestamp = {Thu, 08 Sep 2022 19:48:31 +0200}, biburl = {https://dblp.org/rec/conf/atal/DurfeeS16a.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/atal/DurfeeS16, author = {Edmund H. Durfee and Satinder Singh}, editor = {Jie Zhang and Robin Cohen and Murat Sensoy}, title = {On the Trustworthy Fulfillment of Commitments}, booktitle = {Proceedings of the 18th International Workshop on Trust in Agent Societies co-located with the 15th International Conference on Autonomous Agents and Multiagent Systems {(AAMAS} 2016), Singapore, Singapore, May 10, 2016}, series = {{CEUR} Workshop Proceedings}, volume = {1578}, pages = {54--62}, publisher = {CEUR-WS.org}, year = {2016}, url = {https://ceur-ws.org/Vol-1578/paper9.pdf}, timestamp = {Fri, 10 Mar 2023 16:22:57 +0100}, biburl = {https://dblp.org/rec/conf/atal/DurfeeS16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/OhCSL16, author = {Junhyuk Oh and Valliappa Chockalingam and Satinder Singh and Honglak Lee}, editor = {Maria{-}Florina Balcan and Kilian Q. Weinberger}, title = {Control of Memory, Active Perception, and Action in Minecraft}, booktitle = {Proceedings of the 33nd International Conference on Machine Learning, {ICML} 2016, New York City, NY, USA, June 19-24, 2016}, series = {{JMLR} Workshop and Conference Proceedings}, volume = {48}, pages = {2790--2799}, publisher = {JMLR.org}, year = {2016}, url = {http://proceedings.mlr.press/v48/oh16.html}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/OhCSL16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/GuoSLL16, author = {Xiaoxiao Guo and Satinder Singh and Richard L. Lewis and Honglak Lee}, editor = {Subbarao Kambhampati}, title = {Deep Learning for Reward Design to Improve Monte Carlo Tree Search in {ATARI} Games}, booktitle = {Proceedings of the Twenty-Fifth International Joint Conference on Artificial Intelligence, {IJCAI} 2016, New York, NY, USA, 9-15 July 2016}, pages = {1519--1525}, publisher = {{IJCAI/AAAI} Press}, year = {2016}, url = {http://www.ijcai.org/Abstract/16/218}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ijcai/GuoSLL16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/JiangST16, author = {Nan Jiang and Satinder Singh and Ambuj Tewari}, editor = {Subbarao Kambhampati}, title = {On Structural Properties of MDPs that Bound Loss Due to Shallow Planning}, booktitle = {Proceedings of the Twenty-Fifth International Joint Conference on Artificial Intelligence, {IJCAI} 2016, New York, NY, USA, 9-15 July 2016}, pages = {1640--1647}, publisher = {{IJCAI/AAAI} Press}, year = {2016}, url = {http://www.ijcai.org/Abstract/16/235}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ijcai/JiangST16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/ZhangDSCW16, author = {Qi Zhang and Edmund H. Durfee and Satinder Singh and Anna Chen and Stefan J. Witwicki}, editor = {Subbarao Kambhampati}, title = {Commitment Semantics for Sequential Decision Making under Reward Uncertainty}, booktitle = {Proceedings of the Twenty-Fifth International Joint Conference on Artificial Intelligence, {IJCAI} 2016, New York, NY, USA, 9-15 July 2016}, pages = {3315--3323}, publisher = {{IJCAI/AAAI} Press}, year = {2016}, url = {http://www.ijcai.org/Abstract/16/469}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ijcai/ZhangDSCW16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/JiangKSL16, author = {Nan Jiang and Alex Kulesza and Satinder Singh and Richard L. Lewis}, editor = {Subbarao Kambhampati}, title = {The Dependence of Effective Planning Horizon on Model Accuracy}, booktitle = {Proceedings of the Twenty-Fifth International Joint Conference on Artificial Intelligence, {IJCAI} 2016, New York, NY, USA, 9-15 July 2016}, pages = {4180--4189}, publisher = {{IJCAI/AAAI} Press}, year = {2016}, url = {http://www.ijcai.org/Abstract/16/626}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ijcai/JiangKSL16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/naacl/Perez-RosasMRSA16, author = {Ver{\'{o}}nica P{\'{e}}rez{-}Rosas and Rada Mihalcea and Kenneth Resnicow and Satinder Singh and Lawrence C. An}, editor = {Kristy Hollingshead and Lyle H. Ungar}, title = {Building a Motivational Interviewing Dataset}, booktitle = {Proceedings of the 3rd Workshop on Computational Linguistics and Clinical Psychology: From Linguistic Signal to Clinical Reality, CLPsych@NAACL-HLT 2016, June 16, 2016, San Diego, California, {USA}}, pages = {42--51}, publisher = {The Association for Computational Linguistics}, year = {2016}, url = {https://doi.org/10.18653/v1/w16-0305}, doi = {10.18653/V1/W16-0305}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/naacl/Perez-RosasMRSA16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/uai/AminWS16, author = {Kareem Amin and Michael P. Wellman and Satinder Singh}, editor = {Alexander Ihler and Dominik Janzing}, title = {Gradient Methods for Stackelberg Games}, booktitle = {Proceedings of the Thirty-Second Conference on Uncertainty in Artificial Intelligence, {UAI} 2016, June 25-29, 2016, New York City, NY, {USA}}, publisher = {{AUAI} Press}, year = {2016}, url = {http://auai.org/uai2016/proceedings/papers/167.pdf}, timestamp = {Mon, 05 Dec 2022 15:54:38 +0100}, biburl = {https://dblp.org/rec/conf/uai/AminWS16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/AminS16, author = {Kareem Amin and Satinder Singh}, title = {Towards Resolving Unidentifiability in Inverse Reinforcement Learning}, journal = {CoRR}, volume = {abs/1601.06569}, year = {2016}, url = {http://arxiv.org/abs/1601.06569}, eprinttype = {arXiv}, eprint = {1601.06569}, timestamp = {Mon, 22 Aug 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/AminS16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/GuoSLL16, author = {Xiaoxiao Guo and Satinder Singh and Richard L. Lewis and Honglak Lee}, title = {Deep Learning for Reward Design to Improve Monte Carlo Tree Search in {ATARI} Games}, journal = {CoRR}, volume = {abs/1604.07095}, year = {2016}, url = {http://arxiv.org/abs/1604.07095}, eprinttype = {arXiv}, eprint = {1604.07095}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/GuoSLL16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/OhCSL16, author = {Junhyuk Oh and Valliappa Chockalingam and Satinder Singh and Honglak Lee}, title = {Control of Memory, Active Perception, and Action in Minecraft}, journal = {CoRR}, volume = {abs/1605.09128}, year = {2016}, url = {http://arxiv.org/abs/1605.09128}, eprinttype = {arXiv}, eprint = {1605.09128}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/OhCSL16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/KuleszaJS15, author = {Alex Kulesza and Nan Jiang and Satinder Singh}, editor = {Blai Bonet and Sven Koenig}, title = {Spectral Learning of Predictive State Representations with Insufficient Statistics}, booktitle = {Proceedings of the Twenty-Ninth {AAAI} Conference on Artificial Intelligence, January 25-30, 2015, Austin, Texas, {USA}}, pages = {2715--2721}, publisher = {{AAAI} Press}, year = {2015}, url = {https://doi.org/10.1609/aaai.v29i1.9635}, doi = {10.1609/AAAI.V29I1.9635}, timestamp = {Mon, 18 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/KuleszaJS15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaaifs/DurfeeS15, author = {Edmund H. Durfee and Satinder Singh}, title = {Commitment Semantics for Sequential Decision Making Under Reward Uncertainty}, booktitle = {2015 {AAAI} Fall Symposia, Arlington, Virginia, USA, November 12-14, 2015}, pages = {13--20}, publisher = {{AAAI} Press}, year = {2015}, url = {http://www.aaai.org/ocs/index.php/FSS/FSS15/paper/view/11652}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaaifs/DurfeeS15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aistats/KuleszaJS15, author = {Alex Kulesza and Nan Jiang and Satinder Singh}, editor = {Guy Lebanon and S. V. N. Vishwanathan}, title = {Low-Rank Spectral Learning with Weighted Loss Functions}, booktitle = {Proceedings of the Eighteenth International Conference on Artificial Intelligence and Statistics, {AISTATS} 2015, San Diego, California, USA, May 9-12, 2015}, series = {{JMLR} Workshop and Conference Proceedings}, volume = {38}, publisher = {JMLR.org}, year = {2015}, url = {http://proceedings.mlr.press/v38/kulesza15.html}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aistats/KuleszaJS15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/atal/JiangKSL15, author = {Nan Jiang and Alex Kulesza and Satinder Singh and Richard L. Lewis}, editor = {Gerhard Weiss and Pinar Yolum and Rafael H. Bordini and Edith Elkind}, title = {The Dependence of Effective Planning Horizon on Model Accuracy}, booktitle = {Proceedings of the 2015 International Conference on Autonomous Agents and Multiagent Systems, {AAMAS} 2015, Istanbul, Turkey, May 4-8, 2015}, pages = {1181--1189}, publisher = {{ACM}}, year = {2015}, url = {http://dl.acm.org/citation.cfm?id=2773300}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/atal/JiangKSL15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/JiangKS15, author = {Nan Jiang and Alex Kulesza and Satinder Singh}, editor = {Francis R. Bach and David M. Blei}, title = {Abstraction Selection in Model-based Reinforcement Learning}, booktitle = {Proceedings of the 32nd International Conference on Machine Learning, {ICML} 2015, Lille, France, 6-11 July 2015}, series = {{JMLR} Workshop and Conference Proceedings}, volume = {37}, pages = {179--188}, publisher = {JMLR.org}, year = {2015}, url = {http://proceedings.mlr.press/v37/jiang15.html}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/JiangKS15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/OhGLLS15, author = {Junhyuk Oh and Xiaoxiao Guo and Honglak Lee and Richard L. Lewis and Satinder Singh}, editor = {Corinna Cortes and Neil D. Lawrence and Daniel D. Lee and Masashi Sugiyama and Roman Garnett}, title = {Action-Conditional Video Prediction using Deep Networks in Atari Games}, booktitle = {Advances in Neural Information Processing Systems 28: Annual Conference on Neural Information Processing Systems 2015, December 7-12, 2015, Montreal, Quebec, Canada}, pages = {2863--2871}, year = {2015}, url = {https://proceedings.neurips.cc/paper/2015/hash/6ba3af5d7b2790e73f0de32e5c8c1798-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/OhGLLS15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/OhGLLS15, author = {Junhyuk Oh and Xiaoxiao Guo and Honglak Lee and Richard L. Lewis and Satinder Singh}, title = {Action-Conditional Video Prediction using Deep Networks in Atari Games}, journal = {CoRR}, volume = {abs/1507.08750}, year = {2015}, url = {http://arxiv.org/abs/1507.08750}, eprinttype = {arXiv}, eprint = {1507.08750}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/OhGLLS15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/tamd/LiuSLQ14, author = {Bingyao Liu and Satinder Singh and Richard L. Lewis and Shiyin Qin}, title = {Optimal Rewards for Cooperative Agents}, journal = {{IEEE} Trans. Auton. Ment. Dev.}, volume = {6}, number = {4}, pages = {286--297}, year = {2014}, url = {https://doi.org/10.1109/TAMD.2014.2362682}, doi = {10.1109/TAMD.2014.2362682}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/tamd/LiuSLQ14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/topics/HowesLS14, author = {Andrew Howes and Richard L. Lewis and Satinder Singh}, title = {Utility Maximization and Bounds on Human Information Processing}, journal = {Top. Cogn. Sci.}, volume = {6}, number = {2}, pages = {198--203}, year = {2014}, url = {https://doi.org/10.1111/tops.12089}, doi = {10.1111/TOPS.12089}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/topics/HowesLS14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/topics/LewisHS14, author = {Richard L. Lewis and Andrew Howes and Satinder Singh}, title = {Computational Rationality: Linking Mechanism and Behavior Through Bounded Utility Maximization}, journal = {Top. Cogn. Sci.}, volume = {6}, number = {2}, pages = {279--311}, year = {2014}, url = {https://doi.org/10.1111/tops.12086}, doi = {10.1111/TOPS.12086}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/topics/LewisHS14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/ChiaBKSS14, author = {Chih{-}Chun Chia and James Blum and Zahi N. Karam and Satinder Singh and Zeeshan Syed}, editor = {Carla E. Brodley and Peter Stone}, title = {Predicting Postoperative Atrial Fibrillation from Independent {ECG} Components}, booktitle = {Proceedings of the Twenty-Eighth {AAAI} Conference on Artificial Intelligence, July 27 -31, 2014, Qu{\'{e}}bec City, Qu{\'{e}}bec, Canada}, pages = {1178--1184}, publisher = {{AAAI} Press}, year = {2014}, url = {https://doi.org/10.1609/aaai.v28i1.8921}, doi = {10.1609/AAAI.V28I1.8921}, timestamp = {Mon, 04 Sep 2023 15:06:24 +0200}, biburl = {https://dblp.org/rec/conf/aaai/ChiaBKSS14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/EsbroeckSRS14, author = {Alexander Van Esbroeck and Satinder Singh and Ilan Rubinfeld and Zeeshan Syed}, editor = {Carla E. Brodley and Peter Stone}, title = {Evaluating Trauma Patients: Addressing Missing Covariates with Joint Optimization}, booktitle = {Proceedings of the Twenty-Eighth {AAAI} Conference on Artificial Intelligence, July 27 -31, 2014, Qu{\'{e}}bec City, Qu{\'{e}}bec, Canada}, pages = {1307--1313}, publisher = {{AAAI} Press}, year = {2014}, url = {https://doi.org/10.1609/aaai.v28i1.8912}, doi = {10.1609/AAAI.V28I1.8912}, timestamp = {Mon, 04 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/EsbroeckSRS14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/acl-cmcl/ShvartsmanLS14, author = {Michael Shvartsman and Richard L. Lewis and Satinder Singh}, editor = {Vera Demberg and Timothy O'Donnell}, title = {Computationally Rational Saccadic Control: An Explanation of Spillover Effects Based on Sampling from Noisy Perception and Memory}, booktitle = {Proceedings of the Fifth Workshop on Cognitive Modeling and Computational Linguistics, CMCL@ACL 2014, Baltimore, Maryland, USA, June 26, 2014}, pages = {1--9}, publisher = {Association for Computational Linguistics}, year = {2014}, url = {https://doi.org/10.3115/v1/W14-2001}, doi = {10.3115/V1/W14-2001}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/acl-cmcl/ShvartsmanLS14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aips/VorobeychikATS14, author = {Yevgeniy Vorobeychik and Bo An and Milind Tambe and Satinder Singh}, editor = {Steve A. Chien and Minh Binh Do and Alan Fern and Wheeler Ruml}, title = {Computing Solutions in Infinite-Horizon Discounted Adversarial Patrolling Games}, booktitle = {Proceedings of the Twenty-Fourth International Conference on Automated Planning and Scheduling, {ICAPS} 2014, Portsmouth, New Hampshire, USA, June 21-26, 2014}, publisher = {{AAAI}}, year = {2014}, url = {http://www.aaai.org/ocs/index.php/ICAPS/ICAPS14/paper/view/7783}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aips/VorobeychikATS14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aistats/CohnSD14, author = {Robert Cohn and Satinder Singh and Edmund H. Durfee}, title = {Characterizing EVOI-Sufficient k-Response Query Sets in Decision Problems}, booktitle = {Proceedings of the Seventeenth International Conference on Artificial Intelligence and Statistics, {AISTATS} 2014, Reykjavik, Iceland, April 22-25, 2014}, series = {{JMLR} Workshop and Conference Proceedings}, volume = {33}, pages = {131--139}, publisher = {JMLR.org}, year = {2014}, url = {http://proceedings.mlr.press/v33/cohn14.html}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aistats/CohnSD14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aistats/KuleszaRS14, author = {Alex Kulesza and N. Raj Rao and Satinder Singh}, title = {Low-Rank Spectral Learning}, booktitle = {Proceedings of the Seventeenth International Conference on Artificial Intelligence and Statistics, {AISTATS} 2014, Reykjavik, Iceland, April 22-25, 2014}, series = {{JMLR} Workshop and Conference Proceedings}, volume = {33}, pages = {522--530}, publisher = {JMLR.org}, year = {2014}, url = {http://proceedings.mlr.press/v33/kulesza14.html}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aistats/KuleszaRS14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/atal/JiangSL14, author = {Nan Jiang and Satinder Singh and Richard L. Lewis}, editor = {Ana L. C. Bazzan and Michael N. Huhns and Alessio Lomuscio and Paul Scerri}, title = {Improving {UCT} planning via approximate homomorphisms}, booktitle = {International conference on Autonomous Agents and Multi-Agent Systems, {AAMAS} '14, Paris, France, May 5-9, 2014}, pages = {1289--1296}, publisher = {{IFAAMAS/ACM}}, year = {2014}, url = {http://dl.acm.org/citation.cfm?id=2617453}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/atal/JiangSL14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icassp/KaramPSMAHM14, author = {Zahi N. Karam and Emily Mower Provost and Satinder Singh and Jennifer Montgomery and Christopher Archer and Gloria Harrington and Melvin G. McInnis}, title = {Ecologically valid long-term mood monitoring of individuals with bipolar disorder using speech}, booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2014, Florence, Italy, May 4-9, 2014}, pages = {4858--4862}, publisher = {{IEEE}}, year = {2014}, url = {https://doi.org/10.1109/ICASSP.2014.6854525}, doi = {10.1109/ICASSP.2014.6854525}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icassp/KaramPSMAHM14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/GuoSLLW14, author = {Xiaoxiao Guo and Satinder Singh and Honglak Lee and Richard L. Lewis and Xiaoshi Wang}, editor = {Zoubin Ghahramani and Max Welling and Corinna Cortes and Neil D. Lawrence and Kilian Q. Weinberger}, title = {Deep Learning for Real-Time Atari Game Play Using Offline Monte-Carlo Tree Search Planning}, booktitle = {Advances in Neural Information Processing Systems 27: Annual Conference on Neural Information Processing Systems 2014, December 8-13 2014, Montreal, Quebec, Canada}, pages = {3338--3346}, year = {2014}, url = {https://proceedings.neurips.cc/paper/2014/hash/8bb88f80d334b1869781beb89f7b73be-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/GuoSLLW14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/TalvitieS14, author = {Erik Talvitie and Satinder Singh}, title = {Learning to Make Predictions In Partially Observable Environments Without a Generative Model}, journal = {CoRR}, volume = {abs/1401.3870}, year = {2014}, url = {http://arxiv.org/abs/1401.3870}, eprinttype = {arXiv}, eprint = {1401.3870}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/TalvitieS14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/topics/LewisSS13, author = {Richard L. Lewis and Michael Shvartsman and Satinder Singh}, title = {The Adaptive Nature of Eye Movements in Linguistic Tasks: How Payoff and Architecture Shape Speed-Accuracy Trade-Offs}, journal = {Top. Cogn. Sci.}, volume = {5}, number = {3}, pages = {581--610}, year = {2013}, url = {https://doi.org/10.1111/tops.12032}, doi = {10.1111/TOPS.12032}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/topics/LewisSS13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/hci/FearyBCHLSS13, author = {Michael Feary and Dorrit Billman and Xiuli Chen and Andrew Howes and Richard L. Lewis and Lance Sherry and Satinder Singh}, editor = {Masaaki Kurosu}, title = {Linking Context to Evaluation in the Design of Safety Critical Interfaces}, booktitle = {Human-Computer Interaction. Human-Centred Design Approaches, Methods, Tools, and Environments - 15th International Conference, {HCI} International 2013, Las Vegas, NV, USA, July 21-26, 2013, Proceedings, Part {I}}, series = {Lecture Notes in Computer Science}, volume = {8004}, pages = {193--202}, publisher = {Springer}, year = {2013}, url = {https://doi.org/10.1007/978-3-642-39232-0\_22}, doi = {10.1007/978-3-642-39232-0\_22}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/hci/FearyBCHLSS13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/GuoSL13, author = {Xiaoxiao Guo and Satinder Singh and Richard L. Lewis}, editor = {Christopher J. C. Burges and L{\'{e}}on Bottou and Zoubin Ghahramani and Kilian Q. Weinberger}, title = {Reward Mapping for Transfer in Long-Lived Agents}, booktitle = {Advances in Neural Information Processing Systems 26: 27th Annual Conference on Neural Information Processing Systems 2013. Proceedings of a meeting held December 5-8, 2013, Lake Tahoe, Nevada, United States}, pages = {2130--2138}, year = {2013}, url = {https://proceedings.neurips.cc/paper/2013/hash/58c54802a9fb9526cd0923353a34a7ae-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/GuoSL13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1301-2281, author = {Michael J. Kearns and Michael L. Littman and Satinder Singh}, title = {Graphical Models for Game Theory}, journal = {CoRR}, volume = {abs/1301.2281}, year = {2013}, url = {http://arxiv.org/abs/1301.2281}, eprinttype = {arXiv}, eprint = {1301.2281}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1301-2281.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1301-3867, author = {Michael J. Kearns and Yishay Mansour and Satinder Singh}, title = {Fast Planning in Stochastic Games}, journal = {CoRR}, volume = {abs/1301.3867}, year = {2013}, url = {http://arxiv.org/abs/1301.3867}, eprinttype = {arXiv}, eprint = {1301.3867}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1301-3867.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1301-3892, author = {Satinder Singh and Michael J. Kearns and Yishay Mansour}, title = {Nash Convergence of Gradient Dynamics in Iterated General-Sum Games}, journal = {CoRR}, volume = {abs/1301.3892}, year = {2013}, url = {http://arxiv.org/abs/1301.3892}, eprinttype = {arXiv}, eprint = {1301.3892}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1301-3892.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1301-6718, author = {Yishay Mansour and Satinder Singh}, title = {On the Complexity of Policy Iteration}, journal = {CoRR}, volume = {abs/1301.6718}, year = {2013}, url = {http://arxiv.org/abs/1301.6718}, eprinttype = {arXiv}, eprint = {1301.6718}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1301-6718.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1301-6719, author = {David A. McAllester and Satinder Singh}, title = {Approximate Planning for Factored POMDPs using Belief State Simplification}, journal = {CoRR}, volume = {abs/1301.6719}, year = {2013}, url = {http://arxiv.org/abs/1301.6719}, eprinttype = {arXiv}, eprint = {1301.6719}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1301-6719.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/aim/AgmonAAABDGGGJKKLLMMMOOOPPRRTSSSSSSUW12, author = {Noa Agmon and Vikas Agrawal and David W. Aha and Yiannis Aloimonos and Donagh Buckley and Prashant Doshi and Christopher W. Geib and Floriana Grasso and Nancy L. Green and Benjamin Johnston and Burt Kaliski and Christopher Kiekintveld and Edith Law and Henry Lieberman and Ole J. Mengshoel and Ted Metzler and Joseph Modayil and Douglas W. Oard and Nilufer Onder and Barry O'Sullivan and Katerina Pastra and Doina Precup and Sowmya Ramachandran and Chris Reed and Sanem Sariel Talay and Ted Selker and Lokendra Shastri and Stephen F. Smith and Satinder Singh and Siddharth Srivastava and Gita Sukthankar and David C. Uthus and Mary{-}Anne Williams}, title = {Reports of the {AAAI} 2011 Conference Workshops}, journal = {{AI} Mag.}, volume = {33}, number = {1}, pages = {57--70}, year = {2012}, url = {https://doi.org/10.1609/aimag.v33i1.2390}, doi = {10.1609/AIMAG.V33I1.2390}, timestamp = {Thu, 10 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/aim/AgmonAAABDGGGJKKLLMMMOOOPPRRTSSSSSSUW12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/AnKKSSTV12, author = {Bo An and David Kempe and Christopher Kiekintveld and Eric Shieh and Satinder Singh and Milind Tambe and Yevgeniy Vorobeychik}, editor = {J{\"{o}}rg Hoffmann and Bart Selman}, title = {Security Games with Limited Surveillance}, booktitle = {Proceedings of the Twenty-Sixth {AAAI} Conference on Artificial Intelligence, July 22-26, 2012, Toronto, Ontario, Canada}, pages = {1241--1248}, publisher = {{AAAI} Press}, year = {2012}, url = {https://doi.org/10.1609/aaai.v26i1.8236}, doi = {10.1609/AAAI.V26I1.8236}, timestamp = {Mon, 04 Sep 2023 15:56:47 +0200}, biburl = {https://dblp.org/rec/conf/aaai/AnKKSSTV12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/VorobeychikS12, author = {Yevgeniy Vorobeychik and Satinder Singh}, editor = {J{\"{o}}rg Hoffmann and Bart Selman}, title = {Computing Stackelberg Equilibria in Discounted Stochastic Games}, booktitle = {Proceedings of the Twenty-Sixth {AAAI} Conference on Artificial Intelligence, July 22-26, 2012, Toronto, Ontario, Canada}, pages = {1478--1484}, publisher = {{AAAI} Press}, year = {2012}, url = {https://doi.org/10.1609/aaai.v26i1.8234}, doi = {10.1609/AAAI.V26I1.8234}, timestamp = {Mon, 04 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/VorobeychikS12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaaiss/AnKKSSTV12, author = {Bo An and David Kempe and Christopher Kiekintveld and Eric Anyung Shieh and Satinder Singh and Milind Tambe and Yevgeniy Vorobeychik}, title = {Security Games with Limited Surveillance: An Initial Report}, booktitle = {Game Theory for Security, Sustainability, and Health, Papers from the 2012 {AAAI} Spring Symposium, Palo Alto, California, USA, March 26-28, 2012}, series = {{AAAI} Technical Report}, volume = {{SS-12-03}}, publisher = {{AAAI}}, year = {2012}, url = {http://www.aaai.org/ocs/index.php/SSS/SSS12/paper/view/4262}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaaiss/AnKKSSTV12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aamas/BratmanSSL12, author = {Jeshua Bratman and Satinder Singh and Jonathan Sorg and Richard L. Lewis}, editor = {Wiebe van der Hoek and Lin Padgham and Vincent Conitzer and Michael Winikoff}, title = {Strong mitigation: nesting search for good policies within search for good reward}, booktitle = {International Conference on Autonomous Agents and Multiagent Systems, {AAMAS} 2012, Valencia, Spain, June 4-8, 2012 {(3} Volumes)}, pages = {407--414}, publisher = {{IFAAMAS}}, year = {2012}, url = {http://dl.acm.org/citation.cfm?id=2343634}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aamas/BratmanSSL12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aamas/DuongWSK12, author = {Quang Duong and Michael P. Wellman and Satinder Singh and Michael J. Kearns}, editor = {Wiebe van der Hoek and Lin Padgham and Vincent Conitzer and Michael Winikoff}, title = {Learning and predicting dynamic networked behavior with graphical multiagent models}, booktitle = {International Conference on Autonomous Agents and Multiagent Systems, {AAMAS} 2012, Valencia, Spain, June 4-8, 2012 {(3} Volumes)}, pages = {441--448}, publisher = {{IFAAMAS}}, year = {2012}, url = {http://dl.acm.org/citation.cfm?id=2343639}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aamas/DuongWSK12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aamas/WitwickiCDS12, author = {Stefan J. Witwicki and Inn{-}Tung Chen and Edmund H. Durfee and Satinder Singh}, editor = {Wiebe van der Hoek and Lin Padgham and Vincent Conitzer and Michael Winikoff}, title = {Planning and evaluating multiagent influences under reward uncertainty}, booktitle = {International Conference on Autonomous Agents and Multiagent Systems, {AAMAS} 2012, Valencia, Spain, June 4-8, 2012 {(3} Volumes)}, pages = {1277--1278}, publisher = {{IFAAMAS}}, year = {2012}, url = {http://dl.acm.org/citation.cfm?id=2343961}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aamas/WitwickiCDS12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icdl-epirob/LiuSLQ12, author = {Bingyao Liu and Satinder Singh and Richard L. Lewis and Shiyin Qin}, title = {Optimal rewards in multiagent teams}, booktitle = {2012 {IEEE} International Conference on Development and Learning and Epigenetic Robotics, {ICDL-EPIROB} 2012, San Diego, CA, USA, November 7-9, 2012}, pages = {1--8}, publisher = {{IEEE}}, year = {2012}, url = {https://doi.org/10.1109/DevLrn.2012.6400862}, doi = {10.1109/DEVLRN.2012.6400862}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icdl-epirob/LiuSLQ12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/sigecom/SandholmS12, author = {Tuomas Sandholm and Satinder Singh}, editor = {Boi Faltings and Kevin Leyton{-}Brown and Panos Ipeirotis}, title = {Lossy stochastic game abstraction with bounds}, booktitle = {Proceedings of the 13th {ACM} Conference on Electronic Commerce, {EC} 2012, Valencia, Spain, June 4-8, 2012}, pages = {880--897}, publisher = {{ACM}}, year = {2012}, url = {https://doi.org/10.1145/2229012.2229079}, doi = {10.1145/2229012.2229079}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/sigecom/SandholmS12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1203-3518, author = {Jonathan Sorg and Satinder Singh and Richard L. Lewis}, title = {Variance-Based Rewards for Approximate Bayesian Reinforcement Learning}, journal = {CoRR}, volume = {abs/1203.3518}, year = {2012}, url = {http://arxiv.org/abs/1203.3518}, eprinttype = {arXiv}, eprint = {1203.3518}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1203-3518.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1206-3248, author = {Quang Duong and Michael P. Wellman and Satinder Singh}, title = {Knowledge Combination in Graphical Multiagent Model}, journal = {CoRR}, volume = {abs/1206.3248}, year = {2012}, url = {http://arxiv.org/abs/1206.3248}, eprinttype = {arXiv}, eprint = {1206.3248}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1206-3248.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/CavalloPS12, author = {Ruggiero Cavallo and David C. Parkes and Satinder Singh}, title = {Optimal Coordinated Planning Amongst Self-Interested Agents with Private State}, journal = {CoRR}, volume = {abs/1206.6820}, year = {2012}, url = {http://arxiv.org/abs/1206.6820}, eprinttype = {arXiv}, eprint = {1206.6820}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/CavalloPS12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1207-1416, author = {Matthew R. Rudary and Satinder Singh and David Wingate}, title = {Predictive Linear-Gaussian Models of Stochastic Dynamical Systems}, journal = {CoRR}, volume = {abs/1207.1416}, year = {2012}, url = {http://arxiv.org/abs/1207.1416}, eprinttype = {arXiv}, eprint = {1207.1416}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1207-1416.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1207-4167, author = {Satinder Singh and Michael R. James and Matthew R. Rudary}, title = {Predictive State Representations: {A} New Theory for Modeling Dynamical Systems}, journal = {CoRR}, volume = {abs/1207.4167}, year = {2012}, url = {http://arxiv.org/abs/1207.4167}, eprinttype = {arXiv}, eprint = {1207.4167}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1207-4167.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@phdthesis{DBLP:phd/basesearch/Singh11, author = {Satinder Pal Singh}, title = {{IP} Geolocation in Metropolitan Areas}, school = {University of Maryland, College Park, MD, {USA}}, year = {2011}, url = {https://hdl.handle.net/1903/11505}, timestamp = {Wed, 04 May 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/phd/basesearch/Singh11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jair/TalvitieS11, author = {Erik Talvitie and Satinder Singh}, title = {Learning to Make Predictions In Partially Observable Environments Without a Generative Model}, journal = {J. Artif. Intell. Res.}, volume = {42}, pages = {353--392}, year = {2011}, url = {https://doi.org/10.1613/jair.3396}, doi = {10.1613/JAIR.3396}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jair/TalvitieS11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/SorgSL11, author = {Jonathan Sorg and Satinder Singh and Richard L. Lewis}, editor = {Wolfram Burgard and Dan Roth}, title = {Optimal Rewards versus Leaf-Evaluation Heuristics in Planning Agents}, booktitle = {Proceedings of the Twenty-Fifth {AAAI} Conference on Artificial Intelligence, {AAAI} 2011, San Francisco, California, USA, August 7-11, 2011}, pages = {465--470}, publisher = {{AAAI} Press}, year = {2011}, url = {https://doi.org/10.1609/aaai.v25i1.7931}, doi = {10.1609/AAAI.V25I1.7931}, timestamp = {Mon, 04 Sep 2023 16:05:54 +0200}, biburl = {https://dblp.org/rec/conf/aaai/SorgSL11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/CohnDS11, author = {Robert Cohn and Edmund H. Durfee and Satinder Singh}, editor = {Wolfram Burgard and Dan Roth}, title = {Comparing Action-Query Strategies in Semi-Autonomous Agents}, booktitle = {Proceedings of the Twenty-Fifth {AAAI} Conference on Artificial Intelligence, {AAAI} 2011, San Francisco, California, USA, August 7-11, 2011}, pages = {1102--1107}, publisher = {{AAAI} Press}, year = {2011}, url = {https://doi.org/10.1609/aaai.v25i1.7992}, doi = {10.1609/AAAI.V25I1.7992}, timestamp = {Mon, 04 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/CohnDS11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/atal/CohnDS11, author = {Robert Cohn and Edmund H. Durfee and Satinder Singh}, editor = {Liz Sonenberg and Peter Stone and Kagan Tumer and Pinar Yolum}, title = {Comparing action-query strategies in semi-autonomous agents}, booktitle = {10th International Conference on Autonomous Agents and Multiagent Systems {(AAMAS} 2011), Taipei, Taiwan, May 2-6, 2011, Volume 1-3}, pages = {1287--1288}, publisher = {{IFAAMAS}}, year = {2011}, url = {http://portal.acm.org/citation.cfm?id=2034529\&\#38;CFID=69154334\&\#38;CFTOKEN=45298625}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/atal/CohnDS11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/sigmetrics/SinghBLBLS11, author = {Satinder Pal Singh and Randolph Baden and Choon Lee and Bobby Bhattacharjee and Richard J. La and Mark A. Shayman}, editor = {Arif Merchant and Kimberly Keeton and Dan Rubenstein}, title = {{IP} geolocation in metropolitan areas}, booktitle = {{SIGMETRICS} 2011, Proceedings of the 2011 {ACM} {SIGMETRICS} International Conference on Measurement and Modeling of Computer Systems, San Jose, CA, USA, 07-11 June 2011 (Co-located with {FCRC} 2011)}, pages = {155--156}, publisher = {{ACM}}, year = {2011}, url = {https://doi.org/10.1145/1993744.1993803}, doi = {10.1145/1993744.1993803}, timestamp = {Sun, 01 Aug 2021 14:20:40 +0200}, biburl = {https://dblp.org/rec/conf/sigmetrics/SinghBLBLS11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/socialcom/DuongWS11, author = {Quang Duong and Michael P. Wellman and Satinder Singh}, title = {Modeling Information Diffusion in Networks with Unobserved Links}, booktitle = {PASSAT/SocialCom 2011, Privacy, Security, Risk and Trust (PASSAT), 2011 {IEEE} Third International Conference on and 2011 {IEEE} Third International Conference on Social Computing (SocialCom), Boston, MA, USA, 9-11 Oct., 2011}, pages = {362--369}, publisher = {{IEEE} Computer Society}, year = {2011}, url = {https://doi.org/10.1109/PASSAT/SocialCom.2011.50}, doi = {10.1109/PASSAT/SOCIALCOM.2011.50}, timestamp = {Thu, 23 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/socialcom/DuongWS11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1106-0676, author = {Michael J. Kearns and Diane J. Litman and Satinder Singh and Marilyn A. Walker}, title = {Optimizing Dialogue Management with Reinforcement Learning: Experiments with the NJFun System}, journal = {CoRR}, volume = {abs/1106.0676}, year = {2011}, url = {http://arxiv.org/abs/1106.0676}, eprinttype = {arXiv}, eprint = {1106.0676}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1106-0676.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1106-0678, author = {Michael J. Kearns and Michael L. Littman and Satinder Singh and Peter Stone}, title = {ATTac-2000: An Adaptive Autonomous Bidding Agent}, journal = {CoRR}, volume = {abs/1106.0678}, year = {2011}, url = {http://arxiv.org/abs/1106.0678}, eprinttype = {arXiv}, eprint = {1106.0678}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1106-0678.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/aim/ParkesCCS10, author = {David C. Parkes and Ruggiero Cavallo and Florin Constantin and Satinder Singh}, title = {Dynamic Incentive Mechanisms}, journal = {{AI} Mag.}, volume = {31}, number = {4}, pages = {79--94}, year = {2010}, url = {https://doi.org/10.1609/aimag.v31i4.2316}, doi = {10.1609/AIMAG.V31I4.2316}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/aim/ParkesCCS10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/tamd/SinghLBS10, author = {Satinder Singh and Richard L. Lewis and Andrew G. Barto and Jonathan Sorg}, title = {Intrinsically Motivated Reinforcement Learning: An Evolutionary Perspective}, journal = {{IEEE} Trans. Auton. Ment. Dev.}, volume = {2}, number = {2}, pages = {70--82}, year = {2010}, url = {https://doi.org/10.1109/TAMD.2010.2051031}, doi = {10.1109/TAMD.2010.2051031}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/tamd/SinghLBS10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/atal/SorgS10, author = {Jonathan Sorg and Satinder Singh}, editor = {Wiebe van der Hoek and Gal A. Kaminka and Yves Lesp{\'{e}}rance and Michael Luck and Sandip Sen}, title = {Linear options}, booktitle = {9th International Conference on Autonomous Agents and Multiagent Systems {(AAMAS} 2010), Toronto, Canada, May 10-14, 2010, Volume 1-3}, pages = {31--38}, publisher = {{IFAAMAS}}, year = {2010}, url = {https://dl.acm.org/citation.cfm?id=1838211}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/atal/SorgS10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/atal/DuongWSV10, author = {Quang Duong and Michael P. Wellman and Satinder Singh and Yevgeniy Vorobeychik}, editor = {Wiebe van der Hoek and Gal A. Kaminka and Yves Lesp{\'{e}}rance and Michael Luck and Sandip Sen}, title = {History-dependent graphical multiagent models}, booktitle = {9th International Conference on Autonomous Agents and Multiagent Systems {(AAMAS} 2010), Toronto, Canada, May 10-14, 2010, Volume 1-3}, pages = {1215--1222}, publisher = {{IFAAMAS}}, year = {2010}, url = {https://dl.acm.org/citation.cfm?id=1838364}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/atal/DuongWSV10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iat/CohnMDS10, author = {Robert Cohn and Michael Maxim and Edmund H. Durfee and Satinder Singh}, editor = {Jimmy Xiangji Huang and Ali A. Ghorbani and Mohand{-}Said Hacid and Takahira Yamaguchi}, title = {Selecting Operator Queries Using Expected Myopic Gain}, booktitle = {Proceedings of the 2010 {IEEE/WIC/ACM} International Conference on Intelligent Agent Technology, {IAT} 2010, Toronto, Canada, August 31 - September 3, 2010}, pages = {40--47}, publisher = {{IEEE} Computer Society Press}, year = {2010}, url = {https://doi.org/10.1109/WI-IAT.2010.142}, doi = {10.1109/WI-IAT.2010.142}, timestamp = {Fri, 24 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/iat/CohnMDS10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/SorgSL10, author = {Jonathan Sorg and Satinder Singh and Richard L. Lewis}, editor = {Johannes F{\"{u}}rnkranz and Thorsten Joachims}, title = {Internal Rewards Mitigate Agent Boundedness}, booktitle = {Proceedings of the 27th International Conference on Machine Learning (ICML-10), June 21-24, 2010, Haifa, Israel}, pages = {1007--1014}, publisher = {Omnipress}, year = {2010}, url = {https://icml.cc/Conferences/2010/papers/442.pdf}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/SorgSL10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/SorgSL10, author = {Jonathan Sorg and Satinder Singh and Richard L. Lewis}, editor = {John D. Lafferty and Christopher K. I. Williams and John Shawe{-}Taylor and Richard S. Zemel and Aron Culotta}, title = {Reward Design via Online Gradient Ascent}, booktitle = {Advances in Neural Information Processing Systems 23: 24th Annual Conference on Neural Information Processing Systems 2010. Proceedings of a meeting held 6-9 December 2010, Vancouver, British Columbia, Canada}, pages = {2190--2198}, publisher = {Curran Associates, Inc.}, year = {2010}, url = {https://proceedings.neurips.cc/paper/2010/hash/168908dd3227b8358eababa07fcaf091-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/SorgSL10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/uai/SorgSL10, author = {Jonathan Sorg and Satinder Singh and Richard L. Lewis}, editor = {Peter Gr{\"{u}}nwald and Peter Spirtes}, title = {Variance-Based Rewards for Approximate Bayesian Reinforcement Learning}, booktitle = {{UAI} 2010, Proceedings of the Twenty-Sixth Conference on Uncertainty in Artificial Intelligence, Catalina Island, CA, USA, July 8-11, 2010}, pages = {564--571}, publisher = {{AUAI} Press}, year = {2010}, url = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1\&\#38;smnu=2\&\#38;article\_id=2150\&\#38;proceeding\_id=26}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/uai/SorgSL10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/atal/JamesS09, author = {Michael R. James and Satinder Singh}, editor = {Carles Sierra and Cristiano Castelfranchi and Keith S. Decker and Jaime Sim{\~{a}}o Sichman}, title = {SarsaLandmark: an algorithm for learning in POMDPs with landmarks}, booktitle = {8th International Joint Conference on Autonomous Agents and Multiagent Systems {(AAMAS} 2009), Budapest, Hungary, May 10-15, 2009, Volume 1}, pages = {585--591}, publisher = {{IFAAMAS}}, year = {2009}, url = {https://dl.acm.org/citation.cfm?id=1558094}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/atal/JamesS09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/atal/SorgS09, author = {Jonathan Sorg and Satinder Singh}, editor = {Carles Sierra and Cristiano Castelfranchi and Keith S. Decker and Jaime Sim{\~{a}}o Sichman}, title = {Transfer via soft homomorphisms}, booktitle = {8th International Joint Conference on Autonomous Agents and Multiagent Systems {(AAMAS} 2009), Budapest, Hungary, May 10-15, 2009, Volume 2}, pages = {741--748}, publisher = {{IFAAMAS}}, year = {2009}, url = {https://dl.acm.org/citation.cfm?id=1558114}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/atal/SorgS09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/DuongVSW09, author = {Quang Duong and Yevgeniy Vorobeychik and Satinder Singh and Michael P. Wellman}, editor = {Craig Boutilier}, title = {Learning Graphical Game Models}, booktitle = {{IJCAI} 2009, Proceedings of the 21st International Joint Conference on Artificial Intelligence, Pasadena, California, USA, July 11-17, 2009}, pages = {116--121}, year = {2009}, url = {http://ijcai.org/Proceedings/09/Papers/030.pdf}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ijcai/DuongVSW09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/TalvitieS09, author = {Erik Talvitie and Satinder Singh}, editor = {Craig Boutilier}, title = {Maintaining Predictions over Time without a Model}, booktitle = {{IJCAI} 2009, Proceedings of the 21st International Joint Conference on Artificial Intelligence, Pasadena, California, USA, July 11-17, 2009}, pages = {1249--1254}, year = {2009}, url = {http://ijcai.org/Proceedings/09/Papers/210.pdf}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ijcai/TalvitieS09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/atal/WolfeJS08, author = {Britton Wolfe and Michael R. James and Satinder Singh}, editor = {Lin Padgham and David C. Parkes and J{\"{o}}rg P. M{\"{u}}ller and Simon Parsons}, title = {Approximate predictive state representations}, booktitle = {7th International Joint Conference on Autonomous Agents and Multiagent Systems {(AAMAS} 2008), Estoril, Portugal, May 12-16, 2008, Volume 1}, pages = {363--370}, publisher = {{IFAAMAS}}, year = {2008}, url = {https://dl.acm.org/citation.cfm?id=1402437}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/atal/WolfeJS08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/WingateS08, author = {David Wingate and Satinder Singh}, editor = {William W. Cohen and Andrew McCallum and Sam T. Roweis}, title = {Efficiently learning linear-linear exponential family predictive representations of state}, booktitle = {Machine Learning, Proceedings of the Twenty-Fifth International Conference {(ICML} 2008), Helsinki, Finland, June 5-9, 2008}, series = {{ACM} International Conference Proceeding Series}, volume = {307}, pages = {1176--1183}, publisher = {{ACM}}, year = {2008}, url = {https://doi.org/10.1145/1390156.1390304}, doi = {10.1145/1390156.1390304}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/WingateS08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/isaim/RudaryS08, author = {Matthew R. Rudary and Satinder Singh}, title = {Predictive Linear-Gaussian Models of Dynamical Systems with Vector-Valued Actions and Observations}, booktitle = {International Symposium on Artificial Intelligence and Mathematics, {ISAIM} 2008, Fort Lauderdale, Florida, USA, January 2-4, 2008}, year = {2008}, url = {http://isaim2008.unl.edu/PAPERS/TechnicalProgram/ISAIM2008\_0042\_8f2d860dcc7387e100135b1737766088.pdf}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/isaim/RudaryS08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/isaim/TalvitieWS08, author = {Erik Talvitie and Britton Wolfe and Satinder Singh}, title = {Building Incomplete but Accurate Models}, booktitle = {International Symposium on Artificial Intelligence and Mathematics, {ISAIM} 2008, Fort Lauderdale, Florida, USA, January 2-4, 2008}, year = {2008}, url = {http://isaim2008.unl.edu/PAPERS/TechnicalProgram/ISAIM2008\_0037\_334f1e59f8f09d4cca350159a673fd78.pdf}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/isaim/TalvitieWS08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/TalvitieS08, author = {Erik Talvitie and Satinder Singh}, editor = {Daphne Koller and Dale Schuurmans and Yoshua Bengio and L{\'{e}}on Bottou}, title = {Simple Local Models for Complex Dynamical Systems}, booktitle = {Advances in Neural Information Processing Systems 21, Proceedings of the Twenty-Second Annual Conference on Neural Information Processing Systems, Vancouver, British Columbia, Canada, December 8-11, 2008}, pages = {1617--1624}, publisher = {Curran Associates, Inc.}, year = {2008}, url = {https://proceedings.neurips.cc/paper/2008/hash/f76a89f0cb91bc419542ce9fa43902dc-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/TalvitieS08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/uai/DuongWS08, author = {Quang Duong and Michael P. Wellman and Satinder Singh}, editor = {David A. McAllester and Petri Myllym{\"{a}}ki}, title = {Knowledge Combination in Graphical Multiagent Models}, booktitle = {{UAI} 2008, Proceedings of the 24th Conference in Uncertainty in Artificial Intelligence, Helsinki, Finland, July 9-12, 2008}, pages = {153--160}, publisher = {{AUAI} Press}, year = {2008}, url = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1\&\#38;smnu=2\&\#38;article\_id=1947\&\#38;proceeding\_id=24}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/uai/DuongWS08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ml/VorobeychikWS07, author = {Yevgeniy Vorobeychik and Michael P. Wellman and Satinder Singh}, title = {Learning payoff functions in infinite games}, journal = {Mach. Learn.}, volume = {67}, number = {1-2}, pages = {145--168}, year = {2007}, url = {https://doi.org/10.1007/s10994-007-0715-8}, doi = {10.1007/S10994-007-0715-8}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ml/VorobeychikWS07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/SoniS07, author = {Vishal Soni and Satinder Singh}, title = {Abstraction in Predictive State Representations}, booktitle = {Proceedings of the Twenty-Second {AAAI} Conference on Artificial Intelligence, July 22-26, 2007, Vancouver, British Columbia, Canada}, pages = {639--644}, publisher = {{AAAI} Press}, year = {2007}, url = {http://www.aaai.org/Library/AAAI/2007/aaai07-101.php}, timestamp = {Tue, 05 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/SoniS07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/LiCYSJ07, author = {Yunyao Li and Ishan Chaudhuri and Huahai Yang and Satinder Singh and H. V. Jagadish}, title = {Enabling Domain-Awareness for a Generic Natural Language Interface}, booktitle = {Proceedings of the Twenty-Second {AAAI} Conference on Artificial Intelligence, July 22-26, 2007, Vancouver, British Columbia, Canada}, pages = {833--838}, publisher = {{AAAI} Press}, year = {2007}, url = {http://www.aaai.org/Library/AAAI/2007/aaai07-132.php}, timestamp = {Tue, 05 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/LiCYSJ07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/atal/SoniSW07, author = {Vishal Soni and Satinder Singh and Michael P. Wellman}, editor = {Edmund H. Durfee and Makoto Yokoo and Michael N. Huhns and Onn Shehory}, title = {Constraint satisfaction algorithms for graphical games}, booktitle = {6th International Joint Conference on Autonomous Agents and Multiagent Systems {(AAMAS} 2007), Honolulu, Hawaii, USA, May 14-18, 2007}, pages = {67}, publisher = {{IFAAMAS}}, year = {2007}, url = {https://doi.org/10.1145/1329125.1329206}, doi = {10.1145/1329125.1329206}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/atal/SoniSW07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/atal/WingateS07, author = {David Wingate and Satinder Singh}, editor = {Edmund H. Durfee and Makoto Yokoo and Michael N. Huhns and Onn Shehory}, title = {On discovery and learning of models with predictive representations of state for agents with continuous actions and observations}, booktitle = {6th International Joint Conference on Autonomous Agents and Multiagent Systems {(AAMAS} 2007), Honolulu, Hawaii, USA, May 14-18, 2007}, pages = {187}, publisher = {{IFAAMAS}}, year = {2007}, url = {https://doi.org/10.1145/1329125.1329352}, doi = {10.1145/1329125.1329352}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/atal/WingateS07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/TalvitieS07, author = {Erik Talvitie and Satinder Singh}, editor = {Manuela M. Veloso}, title = {An Experts Algorithm for Transfer Learning}, booktitle = {{IJCAI} 2007, Proceedings of the 20th International Joint Conference on Artificial Intelligence, Hyderabad, India, January 6-12, 2007}, pages = {1065--1070}, year = {2007}, url = {http://ijcai.org/Proceedings/07/Papers/172.pdf}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ijcai/TalvitieS07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/WingateSWS07, author = {David Wingate and Vishal Soni and Britton Wolfe and Satinder Singh}, editor = {Manuela M. Veloso}, title = {Relational Knowledge with Predictive State Representations}, booktitle = {{IJCAI} 2007, Proceedings of the 20th International Joint Conference on Artificial Intelligence, Hyderabad, India, January 6-12, 2007}, pages = {2035--2040}, year = {2007}, url = {http://ijcai.org/Proceedings/07/Papers/328.pdf}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ijcai/WingateSWS07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/WingateB07, author = {David Wingate and Satinder Singh}, editor = {John C. Platt and Daphne Koller and Yoram Singer and Sam T. Roweis}, title = {Exponential Family Predictive Representations of State}, booktitle = {Advances in Neural Information Processing Systems 20, Proceedings of the Twenty-First Annual Conference on Neural Information Processing Systems, Vancouver, British Columbia, Canada, December 3-6, 2007}, pages = {1617--1624}, publisher = {Curran Associates, Inc.}, year = {2007}, url = {https://proceedings.neurips.cc/paper/2007/hash/a9a1d5317a33ae8cef33961c34144f84-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/WingateB07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/sigmod/LiCYSJ07, author = {Yunyao Li and Ishan Chaudhuri and Huahai Yang and Satinder Singh and H. V. Jagadish}, editor = {Chee Yong Chan and Beng Chin Ooi and Aoying Zhou}, title = {DaNaLIX: a domain-adaptive natural language interface for querying {XML}}, booktitle = {Proceedings of the {ACM} {SIGMOD} International Conference on Management of Data, Beijing, China, June 12-14, 2007}, pages = {1165--1168}, publisher = {{ACM}}, year = {2007}, url = {https://doi.org/10.1145/1247480.1247643}, doi = {10.1145/1247480.1247643}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/sigmod/LiCYSJ07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/aamas/IsbellKSSSK06, author = {Charles Lee Isbell Jr. and Michael J. Kearns and Satinder Singh and Christian R. Shelton and Peter Stone and David P. Kormann}, title = {Cobot in LambdaMOO: An Adaptive Social Statistics Agent}, journal = {Auton. Agents Multi Agent Syst.}, volume = {13}, number = {3}, pages = {327--354}, year = {2006}, url = {https://doi.org/10.1007/s10458-006-0005-z}, doi = {10.1007/S10458-006-0005-Z}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/aamas/IsbellKSSSK06.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/SoniS06, author = {Vishal Soni and Satinder Singh}, title = {Using Homomorphisms to Transfer Options across Continuous Reinforcement Learning Domains}, booktitle = {Proceedings, The Twenty-First National Conference on Artificial Intelligence and the Eighteenth Innovative Applications of Artificial Intelligence Conference, July 16-20, 2006, Boston, Massachusetts, {USA}}, pages = {494--499}, publisher = {{AAAI} Press}, year = {2006}, url = {http://www.aaai.org/Library/AAAI/2006/aaai06-079.php}, timestamp = {Tue, 05 Sep 2023 09:10:47 +0200}, biburl = {https://dblp.org/rec/conf/aaai/SoniS06.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/WingateS06, author = {David Wingate and Satinder Singh}, title = {Mixtures of Predictive Linear Gaussian Models for Nonlinear, Stochastic Dynamical Systems}, booktitle = {Proceedings, The Twenty-First National Conference on Artificial Intelligence and the Eighteenth Innovative Applications of Artificial Intelligence Conference, July 16-20, 2006, Boston, Massachusetts, {USA}}, pages = {524--529}, publisher = {{AAAI} Press}, year = {2006}, url = {http://www.aaai.org/Library/AAAI/2006/aaai06-084.php}, timestamp = {Tue, 05 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/WingateS06.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/RudaryS06, author = {Matthew R. Rudary and Satinder Singh}, editor = {William W. Cohen and Andrew W. Moore}, title = {Predictive linear-Gaussian models of controlled stochastic dynamical systems}, booktitle = {Machine Learning, Proceedings of the Twenty-Third International Conference {(ICML} 2006), Pittsburgh, Pennsylvania, USA, June 25-29, 2006}, series = {{ACM} International Conference Proceeding Series}, volume = {148}, pages = {777--784}, publisher = {{ACM}}, year = {2006}, url = {https://doi.org/10.1145/1143844.1143942}, doi = {10.1145/1143844.1143942}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/RudaryS06.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/WingateS06, author = {David Wingate and Satinder Singh}, editor = {William W. Cohen and Andrew W. Moore}, title = {Kernel Predictive Linear Gaussian models for nonlinear stochastic dynamical systems}, booktitle = {Machine Learning, Proceedings of the Twenty-Third International Conference {(ICML} 2006), Pittsburgh, Pennsylvania, USA, June 25-29, 2006}, series = {{ACM} International Conference Proceeding Series}, volume = {148}, pages = {1017--1024}, publisher = {{ACM}}, year = {2006}, url = {https://doi.org/10.1145/1143844.1143972}, doi = {10.1145/1143844.1143972}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/WingateS06.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/WolfeS06, author = {Britton Wolfe and Satinder Singh}, editor = {William W. Cohen and Andrew W. Moore}, title = {Predictive state representations with options}, booktitle = {Machine Learning, Proceedings of the Twenty-Third International Conference {(ICML} 2006), Pittsburgh, Pennsylvania, USA, June 25-29, 2006}, series = {{ACM} International Conference Proceeding Series}, volume = {148}, pages = {1025--1032}, publisher = {{ACM}}, year = {2006}, url = {https://doi.org/10.1145/1143844.1143973}, doi = {10.1145/1143844.1143973}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/WolfeS06.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/uai/CavalloPS06, author = {Ruggiero Cavallo and David C. Parkes and Satinder Singh}, title = {Optimal Coordinated Planning Amongst Self-Interested Agents with Private State}, booktitle = {{UAI} '06, Proceedings of the 22nd Conference in Uncertainty in Artificial Intelligence, Cambridge, MA, USA, July 13-16, 2006}, publisher = {{AUAI} Press}, year = {2006}, url = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1\&\#38;smnu=2\&\#38;article\_id=1300\&\#38;proceeding\_id=22}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/uai/CavalloPS06.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/aim/CassimatisLLGKEBSDLMSSLSAD05, author = {Nicholas L. Cassimatis and Sean Luke and Simon D. Levy and Ross W. Gayler and Pentti Kanerva and Chris Eliasmith and Timothy W. Bickmore and Alan C. Schultz and Randall Davis and James A. Landay and Robert C. Miller and Eric Saund and Thomas F. Stahovich and Michael L. Littman and Satinder Singh and Shlomo Argamon and Shlomo Dubnov}, title = {Reports on the 2004 {AAAI} Fall Symposia}, journal = {{AI} Mag.}, volume = {26}, number = {1}, pages = {98--102}, year = {2005}, url = {https://doi.org/10.1609/aimag.v26i1.1805}, doi = {10.1609/AIMAG.V26I1.1805}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/aim/CassimatisLLGKEBSDLMSSLSAD05.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ci/WellmanESVKS05, author = {Michael P. Wellman and Joshua Estelle and Satinder Singh and Yevgeniy Vorobeychik and Christopher Kiekintveld and Vishal Soni}, title = {Strategic Interactions in a Supply Chain Game}, journal = {Comput. Intell.}, volume = {21}, number = {1}, pages = {1--26}, year = {2005}, url = {https://doi.org/10.1111/j.0824-7935.2005.00263.x}, doi = {10.1111/J.0824-7935.2005.00263.X}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ci/WellmanESVKS05.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/JamesS05, author = {Michael R. James and Satinder Singh}, editor = {Manuela M. Veloso and Subbarao Kambhampati}, title = {Planning in Models that Combine Memory with Predictive Representations of State}, booktitle = {Proceedings, The Twentieth National Conference on Artificial Intelligence and the Seventeenth Innovative Applications of Artificial Intelligence Conference, July 9-13, 2005, Pittsburgh, Pennsylvania, {USA}}, pages = {987--992}, publisher = {{AAAI} Press / The {MIT} Press}, year = {2005}, url = {http://www.aaai.org/Library/AAAI/2005/aaai05-156.php}, timestamp = {Tue, 05 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/JamesS05.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/WolfeJS05, author = {Britton Wolfe and Michael R. James and Satinder Singh}, editor = {Luc De Raedt and Stefan Wrobel}, title = {Learning predictive state representations in dynamical systems without reset}, booktitle = {Machine Learning, Proceedings of the Twenty-Second International Conference {(ICML} 2005), Bonn, Germany, August 7-11, 2005}, series = {{ACM} International Conference Proceeding Series}, volume = {119}, pages = {980--987}, publisher = {{ACM}}, year = {2005}, url = {https://doi.org/10.1145/1102351.1102475}, doi = {10.1145/1102351.1102475}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/WolfeJS05.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/JamesWS05, author = {Michael R. James and Britton Wolfe and Satinder Singh}, editor = {Leslie Pack Kaelbling and Alessandro Saffiotti}, title = {Combining Memory and Landmarks with Predictive State Representations}, booktitle = {IJCAI-05, Proceedings of the Nineteenth International Joint Conference on Artificial Intelligence, Edinburgh, Scotland, UK, July 30 - August 5, 2005}, pages = {734--739}, publisher = {Professional Book Center}, year = {2005}, url = {http://ijcai.org/Proceedings/05/Papers/1621.pdf}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ijcai/JamesWS05.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/VorobeychikWS05, author = {Yevgeniy Vorobeychik and Michael P. Wellman and Satinder Singh}, editor = {Leslie Pack Kaelbling and Alessandro Saffiotti}, title = {Learning Payoff Functions in Infinite Games}, booktitle = {IJCAI-05, Proceedings of the Nineteenth International Joint Conference on Artificial Intelligence, Edinburgh, Scotland, UK, July 30 - August 5, 2005}, pages = {977--982}, publisher = {Professional Book Center}, year = {2005}, url = {http://ijcai.org/Proceedings/05/Papers/0527.pdf}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ijcai/VorobeychikWS05.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/PrecupSPKS05, author = {Doina Precup and Richard S. Sutton and Cosmin Paduraru and Anna Koop and Satinder Singh}, title = {Off-policy Learning with Options and Recognizers}, booktitle = {Advances in Neural Information Processing Systems 18 [Neural Information Processing Systems, {NIPS} 2005, December 5-8, 2005, Vancouver, British Columbia, Canada]}, pages = {1097--1104}, year = {2005}, url = {https://proceedings.neurips.cc/paper/2005/hash/f75526659f31040afeb61cb7133e4e6d-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/PrecupSPKS05.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/uai/RudarySW05, author = {Matthew R. Rudary and Satinder Singh and David Wingate}, title = {Predictive Linear-Gaussian Models of Stochastic Dynamical Systems}, booktitle = {{UAI} '05, Proceedings of the 21st Conference in Uncertainty in Artificial Intelligence, Edinburgh, Scotland, July 26-29, 2005}, pages = {501--508}, publisher = {{AUAI} Press}, year = {2005}, url = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1\&\#38;smnu=2\&\#38;article\_id=1187\&\#38;proceeding\_id=21}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/uai/RudarySW05.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/sigecom/KiekintveldWSS04, author = {Christopher Kiekintveld and Michael P. Wellman and Satinder Singh and Vishal Soni}, title = {Value-driven procurement in the {TAC} supply chain game}, journal = {SIGecom Exch.}, volume = {4}, number = {3}, pages = {9--18}, year = {2004}, url = {https://doi.org/10.1145/1120701.1120704}, doi = {10.1145/1120701.1120704}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/sigecom/KiekintveldWSS04.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaaifs/VorobeychikWS04, author = {Yevgeniy Vorobeychik and Michael P. Wellman and Satinder Singh}, title = {Learning Payoff Functions in Infinite Games}, booktitle = {Artificial Multiagent Learning, Papers from the 2004 {AAAI} Fall Symposium. Arlington, VA, USA, October 22-24, 2004}, volume = {{FS-04-02}}, pages = {60--65}, publisher = {{AAAI} Press}, year = {2004}, url = {https://www.aaai.org/Library/Symposia/Fall/2004/fs04-02-008.php}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaaifs/VorobeychikWS04.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aips/KiekintveldWSEVSR04, author = {Christopher Kiekintveld and Michael P. Wellman and Satinder Singh and Joshua Estelle and Yevgeniy Vorobeychik and Vishal Soni and Matthew R. Rudary}, editor = {Shlomo Zilberstein and Jana Koehler and Sven Koenig}, title = {Distributed Feedback Control for Decision Making on Supply Chains}, booktitle = {Proceedings of the Fourteenth International Conference on Automated Planning and Scheduling {(ICAPS} 2004), June 3-7 2004, Whistler, British Columbia, Canada}, pages = {384--392}, publisher = {{AAAI}}, year = {2004}, url = {http://www.aaai.org/Library/ICAPS/2004/icaps04-045.php}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aips/KiekintveldWSEVSR04.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/cg/EstelleVWSKS04, author = {Joshua Estelle and Yevgeniy Vorobeychik and Michael P. Wellman and Satinder Singh and Christopher Kiekintveld and Vishal Soni}, editor = {H. Jaap van den Herik and Yngvi Bj{\"{o}}rnsson and Nathan S. Netanyahu}, title = {Strategic Interactions in the {TAC} 2003 Supply Chain Tournament}, booktitle = {Computers and Games, 4th International Conference, {CG} 2004, Ramat-Gan, Israel, July 5-7, 2004, Revised Papers}, series = {Lecture Notes in Computer Science}, volume = {3846}, pages = {316--331}, publisher = {Springer}, year = {2004}, url = {https://doi.org/10.1007/11674399\_22}, doi = {10.1007/11674399\_22}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/cg/EstelleVWSKS04.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/JamesS04, author = {Michael R. James and Satinder Singh}, editor = {Carla E. Brodley}, title = {Learning and discovery of predictive state representations in dynamical systems with reset}, booktitle = {Machine Learning, Proceedings of the Twenty-first International Conference {(ICML} 2004), Banff, Alberta, Canada, July 4-8, 2004}, series = {{ACM} International Conference Proceeding Series}, volume = {69}, publisher = {{ACM}}, year = {2004}, url = {https://doi.org/10.1145/1015330.1015359}, doi = {10.1145/1015330.1015359}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/JamesS04.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/RudarySP04, author = {Matthew R. Rudary and Satinder Singh and Martha E. Pollack}, editor = {Carla E. Brodley}, title = {Adaptive cognitive orthotics: combining reinforcement learning and constraint-based temporal reasoning}, booktitle = {Machine Learning, Proceedings of the Twenty-first International Conference {(ICML} 2004), Banff, Alberta, Canada, July 4-8, 2004}, series = {{ACM} International Conference Proceeding Series}, volume = {69}, publisher = {{ACM}}, year = {2004}, url = {https://doi.org/10.1145/1015330.1015411}, doi = {10.1145/1015330.1015411}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/RudarySP04.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icmla/JamesSL04, author = {Michael R. James and Satinder Singh and Michael L. Littman}, editor = {Mehmed M. Kantardzic and Mariofanna G. Milanova and Olfa Nasraoui}, title = {Planning with predictive state representations}, booktitle = {Proceedings of the 2004 International Conference on Machine Learning and Applications - {ICMLA} 2004, 16-18 December 2004, Louisville, KY, {USA}}, pages = {304--311}, publisher = {{IEEE} Computer Society}, year = {2004}, url = {https://doi.org/10.1109/ICMLA.2004.1383528}, doi = {10.1109/ICMLA.2004.1383528}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icmla/JamesSL04.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/ParkesSY04, author = {David C. Parkes and Satinder Singh and Dimah Yanovsky}, title = {Approximately Efficient Online Mechanism Design}, booktitle = {Advances in Neural Information Processing Systems 17 [Neural Information Processing Systems, {NIPS} 2004, December 13-18, 2004, Vancouver, British Columbia, Canada]}, pages = {1049--1056}, year = {2004}, url = {https://proceedings.neurips.cc/paper/2004/hash/fc03d48253286a798f5116ec00e99b2b-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/ParkesSY04.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/SinghBC04, author = {Satinder Singh and Andrew G. Barto and Nuttapong Chentanez}, title = {Intrinsically Motivated Reinforcement Learning}, booktitle = {Advances in Neural Information Processing Systems 17 [Neural Information Processing Systems, {NIPS} 2004, December 13-18, 2004, Vancouver, British Columbia, Canada]}, pages = {1281--1288}, year = {2004}, url = {https://proceedings.neurips.cc/paper/2004/hash/4be5a36cbaca8ab9d2066debfe4e65c1-Abstract.html}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/SinghBC04.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/sigecom/SinghSW04, author = {Satinder Singh and Vishal Soni and Michael P. Wellman}, editor = {Jack S. Breese and Joan Feigenbaum and Margo I. Seltzer}, title = {Computing approximate bayes-nash equilibria in tree-games of incomplete information}, booktitle = {Proceedings 5th {ACM} Conference on Electronic Commerce (EC-2004), New York, NY, USA, May 17-20, 2004}, pages = {81--90}, publisher = {{ACM}}, year = {2004}, url = {https://doi.org/10.1145/988772.988785}, doi = {10.1145/988772.988785}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/sigecom/SinghSW04.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/uai/SinghJR04, author = {Satinder Singh and Michael R. James and Matthew R. Rudary}, editor = {David Maxwell Chickering and Joseph Y. Halpern}, title = {Predictive State Representations: {A} New Theory for Modeling Dynamical Systems}, booktitle = {{UAI} '04, Proceedings of the 20th Conference in Uncertainty in Artificial Intelligence, Banff, Canada, July 7-11, 2004}, pages = {512--518}, publisher = {{AUAI} Press}, year = {2004}, url = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1\&\#38;smnu=2\&\#38;article\_id=1148\&\#38;proceeding\_id=20}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/uai/SinghJR04.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/SinghLJPS03, author = {Satinder Singh and Michael L. Littman and Nicholas K. Jong and David Pardoe and Peter Stone}, editor = {Tom Fawcett and Nina Mishra}, title = {Learning Predictive State Representations}, booktitle = {Machine Learning, Proceedings of the Twentieth International Conference {(ICML} 2003), August 21-24, 2003, Washington, DC, {USA}}, pages = {712--719}, publisher = {{AAAI} Press}, year = {2003}, url = {http://www.aaai.org/Library/ICML/2003/icml03-093.php}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/SinghLJPS03.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/ParkesS03, author = {David C. Parkes and Satinder Singh}, editor = {Sebastian Thrun and Lawrence K. Saul and Bernhard Sch{\"{o}}lkopf}, title = {An MDP-Based Approach to Online Mechanism Design}, booktitle = {Advances in Neural Information Processing Systems 16 [Neural Information Processing Systems, {NIPS} 2003, December 8-13, 2003, Vancouver and Whistler, British Columbia, Canada]}, pages = {791--798}, publisher = {{MIT} Press}, year = {2003}, url = {https://proceedings.neurips.cc/paper/2003/hash/d16509f6eaca1022bd8f28d6bc582cae-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/ParkesS03.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/RudaryS03, author = {Matthew R. Rudary and Satinder Singh}, editor = {Sebastian Thrun and Lawrence K. Saul and Bernhard Sch{\"{o}}lkopf}, title = {A Nonlinear Predictive State Representation}, booktitle = {Advances in Neural Information Processing Systems 16 [Neural Information Processing Systems, {NIPS} 2003, December 8-13, 2003, Vancouver and Whistler, British Columbia, Canada]}, pages = {855--862}, publisher = {{MIT} Press}, year = {2003}, url = {https://proceedings.neurips.cc/paper/2003/hash/72e6d3238361fe70f22fb0ac624a7072-Abstract.html}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/RudaryS03.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jair/SinghLKW02, author = {Satinder Singh and Diane J. Litman and Michael J. Kearns and Marilyn A. Walker}, title = {Optimizing Dialogue Management with Reinforcement Learning: Experiments with the NJFun System}, journal = {J. Artif. Intell. Res.}, volume = {16}, pages = {105--133}, year = {2002}, url = {https://doi.org/10.1613/jair.859}, doi = {10.1613/JAIR.859}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jair/SinghLKW02.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ml/Singh02, author = {Satinder Singh}, title = {Introduction}, journal = {Mach. Learn.}, volume = {49}, number = {2-3}, pages = {107--109}, year = {2002}, url = {https://doi.org/10.1023/A:1017917511082}, doi = {10.1023/A:1017917511082}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ml/Singh02.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ml/KearnsS02, author = {Michael J. Kearns and Satinder Singh}, title = {Near-Optimal Reinforcement Learning in Polynomial Time}, journal = {Mach. Learn.}, volume = {49}, number = {2-3}, pages = {209--232}, year = {2002}, url = {https://doi.org/10.1023/A:1017984413808}, doi = {10.1023/A:1017984413808}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ml/KearnsS02.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/KearnsISLH02, author = {Michael J. Kearns and Charles Lee Isbell Jr. and Satinder Singh and Diane J. Litman and Jessica Howe}, editor = {Rina Dechter and Michael J. Kearns and Richard S. Sutton}, title = {CobotDS: {A} Spoken Dialogue System for Chat}, booktitle = {Proceedings of the Eighteenth National Conference on Artificial Intelligence and Fourteenth Conference on Innovative Applications of Artificial Intelligence, July 28 - August 1, 2002, Edmonton, Alberta, Canada}, pages = {425--430}, publisher = {{AAAI} Press / The {MIT} Press}, year = {2002}, url = {http://www.aaai.org/Library/AAAI/2002/aaai02-065.php}, timestamp = {Tue, 05 Sep 2023 09:10:47 +0200}, biburl = {https://dblp.org/rec/conf/aaai/KearnsISLH02.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jair/StoneLSK01, author = {Peter Stone and Michael L. Littman and Satinder Singh and Michael J. Kearns}, title = {ATTac-2000: An Adaptive Autonomous Bidding Agent}, journal = {J. Artif. Intell. Res.}, volume = {15}, pages = {189--206}, year = {2001}, url = {https://doi.org/10.1613/jair.865}, doi = {10.1613/JAIR.865}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jair/StoneLSK01.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/agents/StoneLSK01, author = {Peter Stone and Michael L. Littman and Satinder Singh and Michael J. Kearns}, editor = {Elisabeth Andr{\'{e}} and Sandip Sen and Claude Frasson and J{\"{o}}rg P. M{\"{u}}ller}, title = {ATTac-2000: an adaptive autonomous bidding agent}, booktitle = {Proceedings of the Fifth International Conference on Autonomous Agents, {AGENTS} 2001, Montreal, Canada, May 28 - June 1, 2001}, pages = {238--245}, publisher = {{ACM}}, year = {2001}, url = {https://doi.org/10.1145/375735.376301}, doi = {10.1145/375735.376301}, timestamp = {Sat, 30 Sep 2023 09:33:47 +0200}, biburl = {https://dblp.org/rec/conf/agents/StoneLSK01.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/agents/IsbellSKSS01, author = {Charles Lee Isbell Jr. and Christian R. Shelton and Michael J. Kearns and Satinder Singh and Peter Stone}, editor = {Elisabeth Andr{\'{e}} and Sandip Sen and Claude Frasson and J{\"{o}}rg P. M{\"{u}}ller}, title = {A social reinforcement learning agent}, booktitle = {Proceedings of the Fifth International Conference on Autonomous Agents, {AGENTS} 2001, Montreal, Canada, May 28 - June 1, 2001}, pages = {377--384}, publisher = {{ACM}}, year = {2001}, url = {https://doi.org/10.1145/375735.376334}, doi = {10.1145/375735.376334}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/agents/IsbellSKSS01.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/LittmanKS01, author = {Michael L. Littman and Michael J. Kearns and Satinder Singh}, editor = {Thomas G. Dietterich and Suzanna Becker and Zoubin Ghahramani}, title = {An Efficient, Exact Algorithm for Solving Tree-Structured Graphical Games}, booktitle = {Advances in Neural Information Processing Systems 14 [Neural Information Processing Systems: Natural and Synthetic, {NIPS} 2001, December 3-8, 2001, Vancouver, British Columbia, Canada]}, pages = {817--823}, publisher = {{MIT} Press}, year = {2001}, url = {https://proceedings.neurips.cc/paper/2001/hash/c5866e93cab1776890fe343c9e7063fb-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/LittmanKS01.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/IsbellSKSS01, author = {Charles Lee Isbell Jr. and Christian R. Shelton and Michael J. Kearns and Satinder Singh and Peter Stone}, editor = {Thomas G. Dietterich and Suzanna Becker and Zoubin Ghahramani}, title = {Cobot: {A} Social Reinforcement Learning Agent}, booktitle = {Advances in Neural Information Processing Systems 14 [Neural Information Processing Systems: Natural and Synthetic, {NIPS} 2001, December 3-8, 2001, Vancouver, British Columbia, Canada]}, pages = {1393--1400}, publisher = {{MIT} Press}, year = {2001}, url = {https://proceedings.neurips.cc/paper/2001/hash/92bbd31f8e0e43a7da8a6295b251725f-Abstract.html}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/IsbellSKSS01.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/LittmanSS01, author = {Michael L. Littman and Richard S. Sutton and Satinder Singh}, editor = {Thomas G. Dietterich and Suzanna Becker and Zoubin Ghahramani}, title = {Predictive Representations of State}, booktitle = {Advances in Neural Information Processing Systems 14 [Neural Information Processing Systems: Natural and Synthetic, {NIPS} 2001, December 3-8, 2001, Vancouver, British Columbia, Canada]}, pages = {1555--1561}, publisher = {{MIT} Press}, year = {2001}, url = {https://proceedings.neurips.cc/paper/2001/hash/1e4d36177d71bbb3558e43af9577d70e-Abstract.html}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/LittmanSS01.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/uai/KearnsLS01, author = {Michael J. Kearns and Michael L. Littman and Satinder Singh}, editor = {Jack S. Breese and Daphne Koller}, title = {Graphical Models for Game Theory}, booktitle = {{UAI} '01: Proceedings of the 17th Conference in Uncertainty in Artificial Intelligence, University of Washington, Seattle, Washington, USA, August 2-5, 2001}, pages = {253--260}, publisher = {Morgan Kaufmann}, year = {2001}, url = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1\&\#38;smnu=2\&\#38;article\_id=107\&\#38;proceeding\_id=17}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/uai/KearnsLS01.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/welcom/CsirikLSS01, author = {J{\'{a}}nos A. Csirik and Michael L. Littman and Satinder Singh and Peter Stone}, editor = {Ludger Fiege and Gero M{\"{u}}hl and Uwe G. Wilhelm}, title = {FAucS : An {FCC} Spectrum Auction Simulator for Autonomous Bidding Agents}, booktitle = {Electronic Commerce, Second International Workshop, {WELCOM} 2001 Heidelberg, Germany, November 16-17, 2001, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {2232}, pages = {139--151}, publisher = {Springer}, year = {2001}, url = {https://doi.org/10.1007/3-540-45598-1\_14}, doi = {10.1007/3-540-45598-1\_14}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/welcom/CsirikLSS01.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ml/SinghJLS00, author = {Satinder Singh and Tommi S. Jaakkola and Michael L. Littman and Csaba Szepesv{\'{a}}ri}, title = {Convergence Results for Single-Step On-Policy Reinforcement-Learning Algorithms}, journal = {Mach. Learn.}, volume = {38}, number = {3}, pages = {287--308}, year = {2000}, url = {https://doi.org/10.1023/A:1007678930559}, doi = {10.1023/A:1007678930559}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ml/SinghJLS00.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/IsbellKKSS00, author = {Charles Lee Isbell Jr. and Michael J. Kearns and David P. Kormann and Satinder Singh and Peter Stone}, editor = {Henry A. Kautz and Bruce W. Porter}, title = {Cobot in LambdaMOO: {A} Social Statistics Agent}, booktitle = {Proceedings of the Seventeenth National Conference on Artificial Intelligence and Twelfth Conference on on Innovative Applications of Artificial Intelligence, July 30 - August 3, 2000, Austin, Texas, {USA}}, pages = {36--41}, publisher = {{AAAI} Press / The {MIT} Press}, year = {2000}, url = {http://www.aaai.org/Library/AAAI/2000/aaai00-006.php}, timestamp = {Tue, 05 Sep 2023 09:10:47 +0200}, biburl = {https://dblp.org/rec/conf/aaai/IsbellKKSS00.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/SinghKLW00, author = {Satinder Singh and Michael J. Kearns and Diane J. Litman and Marilyn A. Walker}, editor = {Henry A. Kautz and Bruce W. Porter}, title = {Empirical Evaluation of a Reinforcement Learning Spoken Dialogue System}, booktitle = {Proceedings of the Seventeenth National Conference on Artificial Intelligence and Twelfth Conference on on Innovative Applications of Artificial Intelligence, July 30 - August 3, 2000, Austin, Texas, {USA}}, pages = {645--651}, publisher = {{AAAI} Press / The {MIT} Press}, year = {2000}, url = {http://www.aaai.org/Library/AAAI/2000/aaai00-099.php}, timestamp = {Tue, 05 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/SinghKLW00.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/coling/LitmanKSW00, author = {Diane J. Litman and Michael S. Kearns and Satinder Singh and Marilyn A. Walker}, title = {Automatic Optimization of Dialogue Management}, booktitle = {{COLING} 2000, 18th International Conference on Computational Linguistics, Proceedings of the Conference, 2 Volumes, July 31 - August 4, 2000, Universit{\"{a}}t des Saarlandes, Saarbr{\"{u}}cken, Germany}, pages = {502--508}, publisher = {Morgan Kaufmann}, year = {2000}, url = {https://aclanthology.org/C00-1073/}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/coling/LitmanKSW00.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/colt/KearnsS00, author = {Michael J. Kearns and Satinder Singh}, editor = {Nicol{\`{o}} Cesa{-}Bianchi and Sally A. Goldman}, title = {Bias-Variance Error Bounds for Temporal Difference Updates}, booktitle = {Proceedings of the Thirteenth Annual Conference on Computational Learning Theory {(COLT} 2000), June 28 - July 1, 2000, Palo Alto, California, {USA}}, pages = {142--147}, publisher = {Morgan Kaufmann}, year = {2000}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/colt/KearnsS00.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/MyersKSW00, author = {Kary L. Myers and Michael J. Kearns and Satinder Singh and Marilyn A. Walker}, editor = {Pat Langley}, title = {A Boosting Approach to Topic Spotting on Subdialogues}, booktitle = {Proceedings of the Seventeenth International Conference on Machine Learning {(ICML} 2000), Stanford University, Stanford, CA, USA, June 29 - July 2, 2000}, pages = {655--662}, publisher = {Morgan Kaufmann}, year = {2000}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/MyersKSW00.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/PrecupSS00, author = {Doina Precup and Richard S. Sutton and Satinder Singh}, editor = {Pat Langley}, title = {Eligibility Traces for Off-Policy Policy Evaluation}, booktitle = {Proceedings of the Seventeenth International Conference on Machine Learning {(ICML} 2000), Stanford University, Stanford, CA, USA, June 29 - July 2, 2000}, pages = {759--766}, publisher = {Morgan Kaufmann}, year = {2000}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/PrecupSS00.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/robocup/StoneSS00, author = {Peter Stone and Richard S. Sutton and Satinder Singh}, editor = {Peter Stone and Tucker R. Balch and Gerhard K. Kraetzschmar}, title = {Reinforcement Learning for 3 vs. 2 Keepaway}, booktitle = {RoboCup 2000: Robot Soccer World Cup {IV}}, series = {Lecture Notes in Computer Science}, volume = {2019}, pages = {249--258}, publisher = {Springer}, year = {2000}, url = {https://doi.org/10.1007/3-540-45324-5\_23}, doi = {10.1007/3-540-45324-5\_23}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/robocup/StoneSS00.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/uai/KearnsMS00, author = {Michael J. Kearns and Yishay Mansour and Satinder Singh}, editor = {Craig Boutilier and Mois{\'{e}}s Goldszmidt}, title = {Fast Planning in Stochastic Games}, booktitle = {{UAI} '00: Proceedings of the 16th Conference in Uncertainty in Artificial Intelligence, Stanford University, Stanford, California, USA, June 30 - July 3, 2000}, pages = {309--316}, publisher = {Morgan Kaufmann}, year = {2000}, url = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1\&\#38;smnu=2\&\#38;article\_id=37\&\#38;proceeding\_id=16}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/uai/KearnsMS00.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/uai/SinghKM00, author = {Satinder Singh and Michael J. Kearns and Yishay Mansour}, editor = {Craig Boutilier and Mois{\'{e}}s Goldszmidt}, title = {Nash Convergence of Gradient Dynamics in General-Sum Games}, booktitle = {{UAI} '00: Proceedings of the 16th Conference in Uncertainty in Artificial Intelligence, Stanford University, Stanford, California, USA, June 30 - July 3, 2000}, pages = {541--548}, publisher = {Morgan Kaufmann}, year = {2000}, url = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1\&\#38;smnu=2\&\#38;article\_id=63\&\#38;proceeding\_id=16}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/uai/SinghKM00.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ai/SuttonPS99, author = {Richard S. Sutton and Doina Precup and Satinder Singh}, title = {Between MDPs and Semi-MDPs: {A} Framework for Temporal Abstraction in Reinforcement Learning}, journal = {Artif. Intell.}, volume = {112}, number = {1-2}, pages = {181--211}, year = {1999}, url = {https://doi.org/10.1016/S0004-3702(99)00052-1}, doi = {10.1016/S0004-3702(99)00052-1}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ai/SuttonPS99.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/SinghKLW99, author = {Satinder Singh and Michael J. Kearns and Diane J. Litman and Marilyn A. Walker}, editor = {Sara A. Solla and Todd K. Leen and Klaus{-}Robert M{\"{u}}ller}, title = {Reinforcement Learning for Spoken Dialogue Systems}, booktitle = {Advances in Neural Information Processing Systems 12, {[NIPS} Conference, Denver, Colorado, USA, November 29 - December 4, 1999]}, pages = {956--962}, publisher = {The {MIT} Press}, year = {1999}, url = {http://papers.nips.cc/paper/1775-reinforcement-learning-for-spoken-dialogue-systems}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/SinghKLW99.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/SuttonMSM99, author = {Richard S. Sutton and David A. McAllester and Satinder Singh and Yishay Mansour}, editor = {Sara A. Solla and Todd K. Leen and Klaus{-}Robert M{\"{u}}ller}, title = {Policy Gradient Methods for Reinforcement Learning with Function Approximation}, booktitle = {Advances in Neural Information Processing Systems 12, {[NIPS} Conference, Denver, Colorado, USA, November 29 - December 4, 1999]}, pages = {1057--1063}, publisher = {The {MIT} Press}, year = {1999}, url = {http://papers.nips.cc/paper/1713-policy-gradient-methods-for-reinforcement-learning-with-function-approximation}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/SuttonMSM99.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/uai/MansourS99, author = {Yishay Mansour and Satinder Singh}, editor = {Kathryn B. Laskey and Henri Prade}, title = {On the Complexity of Policy Iteration}, booktitle = {{UAI} '99: Proceedings of the Fifteenth Conference on Uncertainty in Artificial Intelligence, Stockholm, Sweden, July 30 - August 1, 1999}, pages = {401--408}, publisher = {Morgan Kaufmann}, year = {1999}, url = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1\&\#38;smnu=2\&\#38;article\_id=192\&\#38;proceeding\_id=15}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/uai/MansourS99.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/uai/McAllesterS99, author = {David A. McAllester and Satinder Singh}, editor = {Kathryn B. Laskey and Henri Prade}, title = {Approximate Planning for Factored POMDPs using Belief State Simplification}, booktitle = {{UAI} '99: Proceedings of the Fifteenth Conference on Uncertainty in Artificial Intelligence, Stockholm, Sweden, July 30 - August 1, 1999}, pages = {409--416}, publisher = {Morgan Kaufmann}, year = {1999}, url = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1\&\#38;smnu=2\&\#38;article\_id=193\&\#38;proceeding\_id=15}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/uai/McAllesterS99.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ml/SinghD98, author = {Satinder Singh and Peter Dayan}, title = {Analytical Mean Squared Error Curves for Temporal Difference Learning}, journal = {Mach. Learn.}, volume = {32}, number = {1}, pages = {5--40}, year = {1998}, url = {https://doi.org/10.1023/A:1007495401240}, doi = {10.1023/A:1007495401240}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ml/SinghD98.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ecml/PrecupSS98, author = {Doina Precup and Richard S. Sutton and Satinder Singh}, editor = {Claire Nedellec and C{\'{e}}line Rouveirol}, title = {Theoretical Results on Reinforcement Learning with Temporally Abstract Options}, booktitle = {Machine Learning: ECML-98, 10th European Conference on Machine Learning, Chemnitz, Germany, April 21-23, 1998, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {1398}, pages = {382--393}, publisher = {Springer}, year = {1998}, url = {https://doi.org/10.1007/BFb0026709}, doi = {10.1007/BFB0026709}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ecml/PrecupSS98.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/KearnsS98, author = {Michael J. Kearns and Satinder Singh}, editor = {Jude W. Shavlik}, title = {Near-Optimal Reinforcement Learning in Polynominal Time}, booktitle = {Proceedings of the Fifteenth International Conference on Machine Learning {(ICML} 1998), Madison, Wisconsin, USA, July 24-27, 1998}, pages = {260--268}, publisher = {Morgan Kaufmann}, year = {1998}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/KearnsS98.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/LochS98, author = {John Loch and Satinder Singh}, editor = {Jude W. Shavlik}, title = {Using Eligibility Traces to Find the Best Memoryless Policy in Partially Observable Markov Decision Processes}, booktitle = {Proceedings of the Fifteenth International Conference on Machine Learning {(ICML} 1998), Madison, Wisconsin, USA, July 24-27, 1998}, pages = {323--331}, publisher = {Morgan Kaufmann}, year = {1998}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/LochS98.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/SuttonPS98, author = {Richard S. Sutton and Doina Precup and Satinder Singh}, editor = {Jude W. Shavlik}, title = {Intra-Option Learning about Temporally Abstract Actions}, booktitle = {Proceedings of the Fifteenth International Conference on Machine Learning {(ICML} 1998), Madison, Wisconsin, USA, July 24-27, 1998}, pages = {556--564}, publisher = {Morgan Kaufmann}, year = {1998}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/SuttonPS98.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/BrownTS98, author = {Timothy X. Brown and Hui Tong and Satinder Singh}, editor = {Michael J. Kearns and Sara A. Solla and David A. Cohn}, title = {Optimizing Admission Control while Ensuring Quality of Service in Multimedia Networks via Reinforcement Learning}, booktitle = {Advances in Neural Information Processing Systems 11, {[NIPS} Conference, Denver, Colorado, USA, November 30 - December 5, 1998]}, pages = {982--988}, publisher = {The {MIT} Press}, year = {1998}, url = {http://papers.nips.cc/paper/1599-optimizing-admission-control-while-ensuring-quality-of-service-in-multimedia-networks-via-reinforcement-learning}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/BrownTS98.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/KearnsS98a, author = {Michael J. Kearns and Satinder Singh}, editor = {Michael J. Kearns and Sara A. Solla and David A. Cohn}, title = {Finite-Sample Convergence Rates for Q-Learning and Indirect Algorithms}, booktitle = {Advances in Neural Information Processing Systems 11, {[NIPS} Conference, Denver, Colorado, USA, November 30 - December 5, 1998]}, pages = {996--1002}, publisher = {The {MIT} Press}, year = {1998}, url = {http://papers.nips.cc/paper/1531-finite-sample-convergence-rates-for-q-learning-and-indirect-algorithms}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/KearnsS98a.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/SuttonSPR98, author = {Richard S. Sutton and Satinder Singh and Doina Precup and Balaraman Ravindran}, editor = {Michael J. Kearns and Sara A. Solla and David A. Cohn}, title = {Improved Switching among Temporally Abstract Actions}, booktitle = {Advances in Neural Information Processing Systems 11, {[NIPS} Conference, Denver, Colorado, USA, November 30 - December 5, 1998]}, pages = {1066--1072}, publisher = {The {MIT} Press}, year = {1998}, url = {http://papers.nips.cc/paper/1607-improved-switching-among-temporally-abstract-actions}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/SuttonSPR98.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/WilliamsS98, author = {John K. Williams and Satinder Singh}, editor = {Michael J. Kearns and Sara A. Solla and David A. Cohn}, title = {Experimental Results on Learning Stochastic Memoryless Policies for Partially Observable Markov Decision Processes}, booktitle = {Advances in Neural Information Processing Systems 11, {[NIPS} Conference, Denver, Colorado, USA, November 30 - December 5, 1998]}, pages = {1073--1080}, publisher = {The {MIT} Press}, year = {1998}, url = {http://papers.nips.cc/paper/1509-experimental-results-on-learning-stochastic-memoryless-policies-for-partially-observable-markov-decision-processes}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/WilliamsS98.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/SinghC97, author = {Satinder Singh and David Cohn}, editor = {Michael I. Jordan and Michael J. Kearns and Sara A. Solla}, title = {How to Dynamically Merge Markov Decision Processes}, booktitle = {Advances in Neural Information Processing Systems 10, {[NIPS} Conference, Denver, Colorado, USA, 1997]}, pages = {1057--1063}, publisher = {The {MIT} Press}, year = {1997}, url = {http://papers.nips.cc/paper/1420-how-to-dynamically-merge-markov-decision-processes}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/SinghC97.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ml/SinghS96, author = {Satinder P. Singh and Richard S. Sutton}, title = {Reinforcement Learning with Replacing Eligibility Traces}, journal = {Mach. Learn.}, volume = {22}, number = {1-3}, pages = {123--158}, year = {1996}, url = {https://doi.org/10.1023/A:1018012322525}, doi = {10.1023/A:1018012322525}, timestamp = {Mon, 02 Mar 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/ml/SinghS96.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/colt/SaulS96, author = {Lawrence K. Saul and Satinder P. Singh}, editor = {Avrim Blum and Michael J. Kearns}, title = {Learning Curve Bounds for a Markov Decision Process with Undiscounted Rewards}, booktitle = {Proceedings of the Ninth Annual Conference on Computational Learning Theory, {COLT} 1996, Desenzano del Garda, Italy, June 28-July 1, 1996}, pages = {147--156}, publisher = {{ACM}}, year = {1996}, url = {https://doi.org/10.1145/238061.238084}, doi = {10.1145/238061.238084}, timestamp = {Tue, 06 Nov 2018 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/colt/SaulS96.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/CohnS96, author = {David A. Cohn and Satinder Singh}, editor = {Michael Mozer and Michael I. Jordan and Thomas Petsche}, title = {Predicting Lifetimes in Dynamically Allocated Memory}, booktitle = {Advances in Neural Information Processing Systems 9, NIPS, Denver, CO, USA, December 2-5, 1996}, pages = {939--945}, publisher = {{MIT} Press}, year = {1996}, url = {http://papers.nips.cc/paper/1240-predicting-lifetimes-in-dynamically-allocated-memory}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/CohnS96.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/SinghB96, author = {Satinder Singh and Dimitri P. Bertsekas}, editor = {Michael Mozer and Michael I. Jordan and Thomas Petsche}, title = {Reinforcement Learning for Dynamic Channel Allocation in Cellular Telephone Systems}, booktitle = {Advances in Neural Information Processing Systems 9, NIPS, Denver, CO, USA, December 2-5, 1996}, pages = {974--980}, publisher = {{MIT} Press}, year = {1996}, url = {http://papers.nips.cc/paper/1216-reinforcement-learning-for-dynamic-channel-allocation-in-cellular-telephone-systems}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/SinghB96.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/SinghD96, author = {Satinder Singh and Peter Dayan}, editor = {Michael Mozer and Michael I. Jordan and Thomas Petsche}, title = {Analytical Mean Squared Error Curves in Temporal Difference Learning}, booktitle = {Advances in Neural Information Processing Systems 9, NIPS, Denver, CO, USA, December 2-5, 1996}, pages = {1054--1060}, publisher = {{MIT} Press}, year = {1996}, url = {http://papers.nips.cc/paper/1284-analytical-mean-squared-error-curves-in-temporal-difference-learning}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/SinghD96.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ai/BartoBS95, author = {Andrew G. Barto and Steven J. Bradtke and Satinder P. Singh}, title = {Learning to Act Using Real-Time Dynamic Programming}, journal = {Artif. Intell.}, volume = {72}, number = {1-2}, pages = {81--138}, year = {1995}, url = {https://doi.org/10.1016/0004-3702(94)00011-O}, doi = {10.1016/0004-3702(94)00011-O}, timestamp = {Sat, 27 May 2017 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ai/BartoBS95.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/colt/SaulS95, author = {Lawrence K. Saul and Satinder P. Singh}, editor = {Wolfgang Maass}, title = {Markov Decision Processes in Large State Spaces}, booktitle = {Proceedings of the Eigth Annual Conference on Computational Learning Theory, {COLT} 1995, Santa Cruz, California, USA, July 5-8, 1995}, pages = {281--288}, publisher = {{ACM}}, year = {1995}, url = {https://doi.org/10.1145/225298.225332}, doi = {10.1145/225298.225332}, timestamp = {Tue, 06 Nov 2018 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/colt/SaulS95.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/DayanS95, author = {Peter Dayan and Satinder Singh}, editor = {David S. Touretzky and Michael Mozer and Michael E. Hasselmo}, title = {Improving Policies without Measuring Merits}, booktitle = {Advances in Neural Information Processing Systems 8, NIPS, Denver, CO, USA, November 27-30, 1995}, pages = {1059--1065}, publisher = {{MIT} Press}, year = {1995}, url = {http://papers.nips.cc/paper/1143-improving-policies-without-measuring-merits}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/DayanS95.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ml/SinghY94, author = {Satinder P. Singh and Richard C. Yee}, title = {An Upper Bound on the Loss from Approximate Optimal-Value Functions}, journal = {Mach. Learn.}, volume = {16}, number = {3}, pages = {227--233}, year = {1994}, url = {https://doi.org/10.1007/BF00993308}, doi = {10.1007/BF00993308}, timestamp = {Mon, 02 Mar 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/ml/SinghY94.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/neco/JaakkolaJS94, author = {Tommi S. Jaakkola and Michael I. Jordan and Satinder P. Singh}, title = {On the Convergence of Stochastic Iterative Dynamic Programming Algorithms}, journal = {Neural Comput.}, volume = {6}, number = {6}, pages = {1185--1201}, year = {1994}, url = {https://doi.org/10.1162/neco.1994.6.6.1185}, doi = {10.1162/NECO.1994.6.6.1185}, timestamp = {Tue, 01 Sep 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/neco/JaakkolaJS94.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/Singh94, author = {Satinder P. Singh}, editor = {Barbara Hayes{-}Roth and Richard E. Korf}, title = {Reinforcement Learning Algorithms for Average-Payoff Markovian Decision Processes}, booktitle = {Proceedings of the 12th National Conference on Artificial Intelligence, Seattle, WA, USA, July 31 - August 4, 1994, Volume 1}, pages = {700--705}, publisher = {{AAAI} Press / The {MIT} Press}, year = {1994}, url = {http://www.aaai.org/Library/AAAI/1994/aaai94-107.php}, timestamp = {Mon, 04 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/Singh94.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/SinghJJ94, author = {Satinder P. Singh and Tommi S. Jaakkola and Michael I. Jordan}, editor = {William W. Cohen and Haym Hirsh}, title = {Learning Without State-Estimation in Partially Observable Markovian Decision Processes}, booktitle = {Machine Learning, Proceedings of the Eleventh International Conference, Rutgers University, New Brunswick, NJ, USA, July 10-13, 1994}, pages = {284--292}, publisher = {Morgan Kaufmann}, year = {1994}, url = {https://doi.org/10.1016/b978-1-55860-335-6.50042-8}, doi = {10.1016/B978-1-55860-335-6.50042-8}, timestamp = {Mon, 24 Jun 2019 13:56:31 +0200}, biburl = {https://dblp.org/rec/conf/icml/SinghJJ94.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/JaakkolaSJ94, author = {Tommi S. Jaakkola and Satinder Singh and Michael I. Jordan}, editor = {Gerald Tesauro and David S. Touretzky and Todd K. Leen}, title = {Reinforcement Learning Algorithm for Partially Observable Markov Decision Problems}, booktitle = {Advances in Neural Information Processing Systems 7, {[NIPS} Conference, Denver, Colorado, USA, 1994]}, pages = {345--352}, publisher = {{MIT} Press}, year = {1994}, url = {http://papers.nips.cc/paper/951-reinforcement-learning-algorithm-for-partially-observable-markov-decision-problems}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/JaakkolaSJ94.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/SinghJJ94, author = {Satinder Singh and Tommi S. Jaakkola and Michael I. Jordan}, editor = {Gerald Tesauro and David S. Touretzky and Todd K. Leen}, title = {Reinforcement Learning with Soft State Aggregation}, booktitle = {Advances in Neural Information Processing Systems 7, {[NIPS} Conference, Denver, Colorado, USA, 1994]}, pages = {361--368}, publisher = {{MIT} Press}, year = {1994}, url = {http://papers.nips.cc/paper/981-reinforcement-learning-with-soft-state-aggregation}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/SinghJJ94.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/SinghBGC93, author = {Satinder Singh and Andrew G. Barto and Roderic A. Grupen and Christopher I. Connolly}, editor = {Jack D. Cowan and Gerald Tesauro and Joshua Alspector}, title = {Robust Reinforcement Learning in Motion Planning}, booktitle = {Advances in Neural Information Processing Systems 6, [7th {NIPS} Conference, Denver, Colorado, USA, 1993]}, pages = {655--662}, publisher = {Morgan Kaufmann}, year = {1993}, url = {http://papers.nips.cc/paper/843-robust-reinforcement-learning-in-motion-planning}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/SinghBGC93.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/JaakkolaJS93, author = {Tommi S. Jaakkola and Michael I. Jordan and Satinder Singh}, editor = {Jack D. Cowan and Gerald Tesauro and Joshua Alspector}, title = {Convergence of Stochastic Iterative Dynamic Programming Algorithms}, booktitle = {Advances in Neural Information Processing Systems 6, [7th {NIPS} Conference, Denver, Colorado, USA, 1993]}, pages = {703--710}, publisher = {Morgan Kaufmann}, year = {1993}, url = {http://papers.nips.cc/paper/764-convergence-of-stochastic-iterative-dynamic-programming-algorithms}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/JaakkolaJS93.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ml/Singh92, author = {Satinder Pal Singh}, title = {Transfer of Learning by Composing Solutions of Elemental Sequential Tasks}, journal = {Mach. Learn.}, volume = {8}, pages = {323--339}, year = {1992}, url = {https://doi.org/10.1007/BF00992700}, doi = {10.1007/BF00992700}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ml/Singh92.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/Singh92, author = {Satinder P. Singh}, editor = {William R. Swartout}, title = {Reinforcement Learning with a Hierarchy of Abstract Models}, booktitle = {Proceedings of the 10th National Conference on Artificial Intelligence, San Jose, CA, USA, July 12-16, 1992}, pages = {202--207}, publisher = {{AAAI} Press / The {MIT} Press}, year = {1992}, url = {http://www.aaai.org/Library/AAAI/1992/aaai92-032.php}, timestamp = {Mon, 04 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/Singh92.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/Singh92, author = {Satinder P. Singh}, editor = {Derek H. Sleeman and Peter Edwards}, title = {Scaling Reinforcement Learning Algorithms by Learning Variable Temporal Resolution Models}, booktitle = {Proceedings of the Ninth International Workshop on Machine Learning {(ML} 1992), Aberdeen, Scotland, UK, July 1-3, 1992}, pages = {406--415}, publisher = {Morgan Kaufmann}, year = {1992}, url = {https://doi.org/10.1016/b978-1-55860-247-2.50058-9}, doi = {10.1016/B978-1-55860-247-2.50058-9}, timestamp = {Fri, 21 Jun 2019 11:43:03 +0200}, biburl = {https://dblp.org/rec/conf/icml/Singh92.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/Singh91, author = {Satinder P. Singh}, editor = {Lawrence Birnbaum and Gregg Collins}, title = {Transfer of Learning Across Compositions of Sequentail Tasks}, booktitle = {Proceedings of the Eighth International Workshop (ML91), Northwestern University, Evanston, Illinois, {USA}}, pages = {348--352}, publisher = {Morgan Kaufmann}, year = {1991}, url = {https://doi.org/10.1016/b978-1-55860-200-7.50072-6}, doi = {10.1016/B978-1-55860-200-7.50072-6}, timestamp = {Wed, 19 Jun 2019 17:09:09 +0200}, biburl = {https://dblp.org/rec/conf/icml/Singh91.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/Singh91, author = {Satinder Singh}, editor = {John E. Moody and Stephen Jose Hanson and Richard Lippmann}, title = {The Efficient Learning of Multiple Task Sequences}, booktitle = {Advances in Neural Information Processing Systems 4, {[NIPS} Conference, Denver, Colorado, USA, December 2-5, 1991]}, pages = {251--258}, publisher = {Morgan Kaufmann}, year = {1991}, url = {http://papers.nips.cc/paper/569-the-efficient-learning-of-multiple-task-sequences}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/Singh91.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/BerthierSBH91, author = {N. E. Berthier and Satinder P. Singh and Andrew G. Barto and James C. Houk}, editor = {John E. Moody and Stephen Jose Hanson and Richard Lippmann}, title = {A Cortico-Cerebellar Model that Learns to Generate Distributed Motor Commands to Control a Kinematic Arm}, booktitle = {Advances in Neural Information Processing Systems 4, {[NIPS} Conference, Denver, Colorado, USA, December 2-5, 1991]}, pages = {611--618}, publisher = {Morgan Kaufmann}, year = {1991}, url = {http://papers.nips.cc/paper/532-a-cortico-cerebellar-model-that-learns-to-generate-distributed-motor-commands-to-control-a-kinematic-arm}, timestamp = {Fri, 06 Mar 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/BerthierSBH91.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
manage site settings
To protect your privacy, all features that rely on external API calls from your browser are turned off by default. You need to opt-in for them to become active. All settings here will be stored as cookies with your web browser. For more information see our F.A.Q.