Stop the war!

Остановите войну!

for scientists:

default search action

combined dblp search
author search
venue search
publication search

ask others

BibTeX records: Martha White

Name: dblp XML data dump
Creator: Schloss Dagstuhl - Leibniz Center for Informatics
Published: 1993
License: https://creativecommons.org/publicdomain/zero/1.0/
Keywords: dblp, XML, computer science, scholarly publications, metadata

> Home > Persons > Martha White

download as .bib file

@inproceedings{DBLP:conf/aaai/LiuWW24,
  author       = {Vincent Liu and
                  James R. Wright and
                  Martha White},
  editor       = {Michael J. Wooldridge and
                  Jennifer G. Dy and
                  Sriraam Natarajan},
  title        = {Exploiting Action Impact Regularity and Exogenous State Variables
                  for Offline Reinforcement Learning (Abstract Reprint)},
  booktitle    = {Thirty-Eighth {AAAI} Conference on Artificial Intelligence, {AAAI}
                  2024, Thirty-Sixth Conference on Innovative Applications of Artificial
                  Intelligence, {IAAI} 2024, Fourteenth Symposium on Educational Advances
                  in Artificial Intelligence, {EAAI} 2014, February 20-27, 2024, Vancouver,
                  Canada},
  pages        = {22706},
  publisher    = {{AAAI} Press},
  year         = {2024},
  url          = {https://doi.org/10.1609/aaai.v38i20.30606},
  doi          = {10.1609/AAAI.V38I20.30606},
  timestamp    = {Tue, 02 Apr 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/LiuWW24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2402-03903,
  author       = {Brett Daley and
                  Martha White and
                  Marlos C. Machado},
  title        = {Compound Returns Reduce Variance in Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2402.03903},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2402.03903},
  doi          = {10.48550/ARXIV.2402.03903},
  eprinttype    = {arXiv},
  eprint       = {2402.03903},
  timestamp    = {Mon, 12 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2402-03903.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2402-10339,
  author       = {Hugo Silva and
                  Martha White},
  title        = {What to Do When Your Discrete Optimization Is the Size of a Neural
                  Network?},
  journal      = {CoRR},
  volume       = {abs/2402.10339},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2402.10339},
  doi          = {10.48550/ARXIV.2402.10339},
  eprinttype    = {arXiv},
  eprint       = {2402.10339},
  timestamp    = {Mon, 26 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2402-10339.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2402-13425,
  author       = {Ehsan Imani and
                  Kai Luedemann and
                  Sam Scholnick{-}Hughes and
                  Esraa Elelimy and
                  Martha White},
  title        = {Investigating the Histogram Loss in Regression},
  journal      = {CoRR},
  volume       = {abs/2402.13425},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2402.13425},
  doi          = {10.48550/ARXIV.2402.13425},
  eprinttype    = {arXiv},
  eprint       = {2402.13425},
  timestamp    = {Thu, 21 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2402-13425.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/jair/LiuWW23,
  author       = {Vincent Liu and
                  James R. Wright and
                  Martha White},
  title        = {Exploiting Action Impact Regularity and Exogenous State Variables
                  for Offline Reinforcement Learning},
  journal      = {J. Artif. Intell. Res.},
  volume       = {77},
  pages        = {71--101},
  year         = {2023},
  url          = {https://doi.org/10.1613/jair.1.14580},
  doi          = {10.1613/JAIR.1.14580},
  timestamp    = {Mon, 22 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jair/LiuWW23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/jmlr/0002IKW23,
  author       = {Eric Graves and
                  Ehsan Imani and
                  Raksha Kumaraswamy and
                  Martha White},
  title        = {Off-Policy Actor-Critic with Emphatic Weightings},
  journal      = {J. Mach. Learn. Res.},
  volume       = {24},
  pages        = {146:1--146:63},
  year         = {2023},
  url          = {http://jmlr.org/papers/v24/21-1350.html},
  timestamp    = {Tue, 13 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/0002IKW23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/jmlr/JavedSSW23,
  author       = {Khurram Javed and
                  Haseeb Shah and
                  Richard S. Sutton and
                  Martha White},
  title        = {Scalable Real-Time Recurrent Learning Using Columnar-Constructive
                  Networks},
  journal      = {J. Mach. Learn. Res.},
  volume       = {24},
  pages        = {256:1--256:34},
  year         = {2023},
  url          = {http://jmlr.org/papers/v24/23-0367.html},
  timestamp    = {Thu, 19 Oct 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/JavedSSW23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/pami/PattersonLW23,
  author       = {Andrew Patterson and
                  Victor Liao and
                  Martha White},
  title        = {Robust Losses for Learning Value Functions},
  journal      = {{IEEE} Trans. Pattern Anal. Mach. Intell.},
  volume       = {45},
  number       = {5},
  pages        = {6157--6167},
  year         = {2023},
  url          = {https://doi.org/10.1109/TPAMI.2022.3213503},
  doi          = {10.1109/TPAMI.2022.3213503},
  timestamp    = {Thu, 27 Jul 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/pami/PattersonLW23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/tmlr/SchlegelTWW23,
  author       = {Matthew Schlegel and
                  Volodymyr Tkachuk and
                  Adam M. White and
                  Martha White},
  title        = {Investigating Action Encodings in Recurrent Neural Networks in Reinforcement
                  Learning},
  journal      = {Trans. Mach. Learn. Res.},
  volume       = {2023},
  year         = {2023},
  url          = {https://openreview.net/forum?id=K6g4MbAC1r},
  timestamp    = {Thu, 18 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tmlr/SchlegelTWW23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/aistats/LiuCTW23,
  author       = {Vincent Liu and
                  Yash Chandak and
                  Philip S. Thomas and
                  Martha White},
  editor       = {Francisco J. R. Ruiz and
                  Jennifer G. Dy and
                  Jan{-}Willem van de Meent},
  title        = {Asymptotically Unbiased Off-Policy Policy Evaluation when Reusing
                  Old Data in Nonstationary Environments},
  booktitle    = {International Conference on Artificial Intelligence and Statistics,
                  25-27 April 2023, Palau de Congressos, Valencia, Spain},
  series       = {Proceedings of Machine Learning Research},
  volume       = {206},
  pages        = {5474--5492},
  publisher    = {{PMLR}},
  year         = {2023},
  url          = {https://proceedings.mlr.press/v206/liu23d.html},
  timestamp    = {Mon, 19 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/LiuCTW23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/collas/LiuWTJ0W23,
  author       = {Vincent Liu and
                  Han Wang and
                  Ruo Yu Tao and
                  Khurram Javed and
                  Adam White and
                  Martha White},
  editor       = {Sarath Chandar and
                  Razvan Pascanu and
                  Hanie Sedghi and
                  Doina Precup},
  title        = {Measuring and Mitigating Interference in Reinforcement Learning},
  booktitle    = {Conference on Lifelong Learning Agents, 22-25 August 2023, McGill
                  University, Montr{\'{e}}al, Qu{\'{e}}bec, Canada},
  series       = {Proceedings of Machine Learning Research},
  volume       = {232},
  pages        = {781--795},
  publisher    = {{PMLR}},
  year         = {2023},
  url          = {https://proceedings.mlr.press/v232/liu23a.html},
  timestamp    = {Tue, 20 Feb 2024 13:52:18 +0100},
  biburl       = {https://dblp.org/rec/conf/collas/LiuWTJ0W23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iclr/NeumannLJP0W23,
  author       = {Samuel Neumann and
                  Sungsu Lim and
                  Ajin George Joseph and
                  Yangchen Pan and
                  Adam White and
                  Martha White},
  title        = {Greedy Actor-Critic: {A} New Conditional Cross-Entropy Method for
                  Policy Improvement},
  booktitle    = {The Eleventh International Conference on Learning Representations,
                  {ICLR} 2023, Kigali, Rwanda, May 1-5, 2023},
  publisher    = {OpenReview.net},
  year         = {2023},
  url          = {https://openreview.net/pdf?id=eSQh8rG8Oa},
  timestamp    = {Fri, 30 Jun 2023 14:38:38 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/NeumannLJP0W23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iclr/XiaoWP0W23,
  author       = {Chenjun Xiao and
                  Han Wang and
                  Yangchen Pan and
                  Adam White and
                  Martha White},
  title        = {The In-Sample Softmax for Offline Reinforcement Learning},
  booktitle    = {The Eleventh International Conference on Learning Representations,
                  {ICLR} 2023, Kigali, Rwanda, May 1-5, 2023},
  publisher    = {OpenReview.net},
  year         = {2023},
  url          = {https://openreview.net/pdf?id=u-RuvyDYqCM},
  timestamp    = {Fri, 30 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/XiaoWP0W23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icml/DaleyWAM23,
  author       = {Brett Daley and
                  Martha White and
                  Christopher Amato and
                  Marlos C. Machado},
  editor       = {Andreas Krause and
                  Emma Brunskill and
                  Kyunghyun Cho and
                  Barbara Engelhardt and
                  Sivan Sabato and
                  Jonathan Scarlett},
  title        = {Trajectory-Aware Eligibility Traces for Off-Policy Reinforcement Learning},
  booktitle    = {International Conference on Machine Learning, {ICML} 2023, 23-29 July
                  2023, Honolulu, Hawaii, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {202},
  pages        = {6818--6835},
  publisher    = {{PMLR}},
  year         = {2023},
  url          = {https://proceedings.mlr.press/v202/daley23a.html},
  timestamp    = {Mon, 28 Aug 2023 17:23:08 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/DaleyWAM23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/nips/ZhuCSW23,
  author       = {Lingwei Zhu and
                  Zheng Chen and
                  Matthew Schlegel and
                  Martha White},
  editor       = {Alice Oh and
                  Tristan Naumann and
                  Amir Globerson and
                  Kate Saenko and
                  Moritz Hardt and
                  Sergey Levine},
  title        = {General Munchausen Reinforcement Learning with Tsallis Kullback-Leibler
                  Divergence},
  booktitle    = {Advances in Neural Information Processing Systems 36: Annual Conference
                  on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans,
                  LA, USA, December 10 - 16, 2023},
  year         = {2023},
  url          = {http://papers.nips.cc/paper\_files/paper/2023/hash/b3e866c228f8f4ea18021ae63aea5453-Abstract-Conference.html},
  timestamp    = {Fri, 01 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/ZhuCSW23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2301-11321,
  author       = {Brett Daley and
                  Martha White and
                  Christopher Amato and
                  Marlos C. Machado},
  title        = {Trajectory-Aware Eligibility Traces for Off-Policy Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2301.11321},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2301.11321},
  doi          = {10.48550/ARXIV.2301.11321},
  eprinttype    = {arXiv},
  eprint       = {2301.11321},
  timestamp    = {Tue, 31 Jan 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2301-11321.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2301-11476,
  author       = {Lingwei Zhu and
                  Zheng Chen and
                  Takamitsu Matsubara and
                  Martha White},
  title        = {Generalized Munchausen Reinforcement Learning using Tsallis {KL} Divergence},
  journal      = {CoRR},
  volume       = {abs/2301.11476},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2301.11476},
  doi          = {10.48550/ARXIV.2301.11476},
  eprinttype    = {arXiv},
  eprint       = {2301.11476},
  timestamp    = {Tue, 31 Jan 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2301-11476.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2302-05326,
  author       = {Khurram Javed and
                  Haseeb Shah and
                  Richard S. Sutton and
                  Martha White},
  title        = {Online Real-Time Recurrent Learning Using Sparse Connections and Selective
                  Learning},
  journal      = {CoRR},
  volume       = {abs/2302.05326},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2302.05326},
  doi          = {10.48550/ARXIV.2302.05326},
  eprinttype    = {arXiv},
  eprint       = {2302.05326},
  timestamp    = {Mon, 13 Feb 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2302-05326.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2302-11725,
  author       = {Vincent Liu and
                  Yash Chandak and
                  Philip S. Thomas and
                  Martha White},
  title        = {Asymptotically Unbiased Off-Policy Policy Evaluation when Reusing
                  Old Data in Nonstationary Environments},
  journal      = {CoRR},
  volume       = {abs/2302.11725},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2302.11725},
  doi          = {10.48550/ARXIV.2302.11725},
  eprinttype    = {arXiv},
  eprint       = {2302.11725},
  timestamp    = {Tue, 28 Feb 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2302-11725.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2302-14372,
  author       = {Chenjun Xiao and
                  Han Wang and
                  Yangchen Pan and
                  Adam White and
                  Martha White},
  title        = {The In-Sample Softmax for Offline Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2302.14372},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2302.14372},
  doi          = {10.48550/ARXIV.2302.14372},
  eprinttype    = {arXiv},
  eprint       = {2302.14372},
  timestamp    = {Thu, 02 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2302-14372.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2304-01315,
  author       = {Andrew Patterson and
                  Samuel Neumann and
                  Martha White and
                  Adam White},
  title        = {Empirical Design in Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2304.01315},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2304.01315},
  doi          = {10.48550/ARXIV.2304.01315},
  eprinttype    = {arXiv},
  eprint       = {2304.01315},
  timestamp    = {Tue, 18 Apr 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2304-01315.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2305-09838,
  author       = {James E. Kostas and
                  Scott M. Jordan and
                  Yash Chandak and
                  Georgios Theocharous and
                  Dhawal Gupta and
                  Martha White and
                  Bruno Castro da Silva and
                  Philip S. Thomas},
  title        = {Coagent Networks: Generalized and Scaled},
  journal      = {CoRR},
  volume       = {abs/2305.09838},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2305.09838},
  doi          = {10.48550/ARXIV.2305.09838},
  eprinttype    = {arXiv},
  eprint       = {2305.09838},
  timestamp    = {Wed, 24 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2305-09838.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2307-04887,
  author       = {Vincent Liu and
                  Han Wang and
                  Ruo Yu Tao and
                  Khurram Javed and
                  Adam White and
                  Martha White},
  title        = {Measuring and Mitigating Interference in Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2307.04887},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2307.04887},
  doi          = {10.48550/ARXIV.2307.04887},
  eprinttype    = {arXiv},
  eprint       = {2307.04887},
  timestamp    = {Mon, 24 Jul 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2307-04887.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2312-01624,
  author       = {Muhammad Kamran Janjua and
                  Haseeb Shah and
                  Martha White and
                  Erfan Miahi and
                  Marlos C. Machado and
                  Adam White},
  title        = {GVFs in the Real World: Making Predictions Online for Water Treatment},
  journal      = {CoRR},
  volume       = {abs/2312.01624},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2312.01624},
  doi          = {10.48550/ARXIV.2312.01624},
  eprinttype    = {arXiv},
  eprint       = {2312.01624},
  timestamp    = {Wed, 13 Dec 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2312-01624.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2312-02355,
  author       = {Vincent Liu and
                  Prabhat Nagarajan and
                  Andrew Patterson and
                  Martha White},
  title        = {When is Offline Policy Selection Sample Efficient for Reinforcement
                  Learning?},
  journal      = {CoRR},
  volume       = {abs/2312.02355},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2312.02355},
  doi          = {10.48550/ARXIV.2312.02355},
  eprinttype    = {arXiv},
  eprint       = {2312.02355},
  timestamp    = {Wed, 13 Dec 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2312-02355.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/jmlr/Patterson0W22,
  author       = {Andrew Patterson and
                  Adam White and
                  Martha White},
  title        = {A Generalized Projected Bellman Error for Off-policy Value Estimation
                  in Reinforcement Learning},
  journal      = {J. Mach. Learn. Res.},
  volume       = {23},
  pages        = {145:1--145:61},
  year         = {2022},
  url          = {http://jmlr.org/papers/v23/21-037.html},
  timestamp    = {Wed, 07 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/Patterson0W22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/jmlr/0001SLKMW22,
  author       = {Alan Chan and
                  Hugo Silva and
                  Sungsu Lim and
                  Tadashi Kozuno and
                  A. Rupam Mahmood and
                  Martha White},
  title        = {Greedification Operators for Policy Optimization: Investigating Forward
                  and Reverse {KL} Divergences},
  journal      = {J. Mach. Learn. Res.},
  volume       = {23},
  pages        = {253:1--253:79},
  year         = {2022},
  url          = {http://jmlr.org/papers/v23/21-054.html},
  timestamp    = {Wed, 07 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/0001SLKMW22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/tmlr/ImaniHW22,
  author       = {Ehsan Imani and
                  Wei Hu and
                  Martha White},
  title        = {Representation Alignment in Neural Networks},
  journal      = {Trans. Mach. Learn. Res.},
  volume       = {2022},
  year         = {2022},
  url          = {https://openreview.net/forum?id=fLIWMnZ9ij},
  timestamp    = {Fri, 19 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tmlr/ImaniHW22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/tmlr/WangSWBLZLKFW22,
  author       = {Han Wang and
                  Archit Sakhadeo and
                  Adam M. White and
                  James Bell and
                  Vincent Liu and
                  Xutong Zhao and
                  Puer Liu and
                  Tadashi Kozuno and
                  Alona Fyshe and
                  Martha White},
  title        = {No More Pesky Hyperparameters: Offline Hyperparameter Tuning for {RL}},
  journal      = {Trans. Mach. Learn. Res.},
  volume       = {2022},
  year         = {2022},
  url          = {https://openreview.net/forum?id=AiOUi3440V},
  timestamp    = {Fri, 19 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tmlr/WangSWBLZLKFW22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/aistats/0006TPWM22,
  author       = {Shivam Garg and
                  Samuele Tosatto and
                  Yangchen Pan and
                  Martha White and
                  Rupam Mahmood},
  editor       = {Gustau Camps{-}Valls and
                  Francisco J. R. Ruiz and
                  Isabel Valera},
  title        = {An Alternate Policy Gradient Estimator for Softmax Policies},
  booktitle    = {International Conference on Artificial Intelligence and Statistics,
                  {AISTATS} 2022, 28-30 March 2022, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {151},
  pages        = {6630--6689},
  publisher    = {{PMLR}},
  year         = {2022},
  url          = {https://proceedings.mlr.press/v151/garg22b.html},
  timestamp    = {Sat, 30 Sep 2023 09:34:08 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/0006TPWM22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iclr/BanmanP0FW22,
  author       = {Kirby Banman and
                  Liam Peet{-}Pare and
                  Nidhi Hegde and
                  Alona Fyshe and
                  Martha White},
  title        = {Resonance in Weight Space: Covariate Shift Can Drive Divergence of
                  {SGD} with Momentum},
  booktitle    = {The Tenth International Conference on Learning Representations, {ICLR}
                  2022, Virtual Event, April 25-29, 2022},
  publisher    = {OpenReview.net},
  year         = {2022},
  url          = {https://openreview.net/forum?id=5ECQL05ub0J},
  timestamp    = {Sat, 20 Aug 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/BanmanP0FW22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icml/TosattoPWM22,
  author       = {Samuele Tosatto and
                  Andrew Patterson and
                  Martha White and
                  Rupam Mahmood},
  editor       = {Kamalika Chaudhuri and
                  Stefanie Jegelka and
                  Le Song and
                  Csaba Szepesv{\'{a}}ri and
                  Gang Niu and
                  Sivan Sabato},
  title        = {A Temporal-Difference Approach to Policy Gradient Estimation},
  booktitle    = {International Conference on Machine Learning, {ICML} 2022, 17-23 July
                  2022, Baltimore, Maryland, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {162},
  pages        = {21609--21632},
  publisher    = {{PMLR}},
  year         = {2022},
  url          = {https://proceedings.mlr.press/v162/tosatto22a.html},
  timestamp    = {Tue, 12 Jul 2022 17:36:52 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/TosattoPWM22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/uai/PanMFWYR022,
  author       = {Yangchen Pan and
                  Jincheng Mei and
                  Amir{-}massoud Farahmand and
                  Martha White and
                  Hengshuai Yao and
                  Mohsen Rohani and
                  Jun Luo},
  editor       = {James Cussens and
                  Kun Zhang},
  title        = {Understanding and mitigating the limitations of prioritized experience
                  replay},
  booktitle    = {Uncertainty in Artificial Intelligence, Proceedings of the Thirty-Eighth
                  Conference on Uncertainty in Artificial Intelligence, {UAI} 2022,
                  1-5 August 2022, Eindhoven, The Netherlands},
  series       = {Proceedings of Machine Learning Research},
  volume       = {180},
  pages        = {1561--1571},
  publisher    = {{PMLR}},
  year         = {2022},
  url          = {https://proceedings.mlr.press/v180/pan22a.html},
  timestamp    = {Sat, 15 Oct 2022 12:08:13 +0200},
  biburl       = {https://dblp.org/rec/conf/uai/PanMFWYR022.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2202-02396,
  author       = {Samuele Tosatto and
                  Andrew Patterson and
                  Martha White and
                  A. Rupam Mahmood},
  title        = {A Temporal-Difference Approach to Policy Gradient Estimation},
  journal      = {CoRR},
  volume       = {abs/2202.02396},
  year         = {2022},
  url          = {https://arxiv.org/abs/2202.02396},
  eprinttype    = {arXiv},
  eprint       = {2202.02396},
  timestamp    = {Wed, 09 Feb 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2202-02396.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2202-11133,
  author       = {Matthew McLeod and
                  Chunlok Lo and
                  Matthew Schlegel and
                  Andrew Jacobsen and
                  Raksha Kumaraswamy and
                  Martha White and
                  Adam White},
  title        = {Continual Auxiliary Task Learning},
  journal      = {CoRR},
  volume       = {abs/2202.11133},
  year         = {2022},
  url          = {https://arxiv.org/abs/2202.11133},
  eprinttype    = {arXiv},
  eprint       = {2202.11133},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2202-11133.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2203-11992,
  author       = {Kirby Banman and
                  Liam Peet{-}Pare and
                  Nidhi Hegde and
                  Alona Fyshe and
                  Martha White},
  title        = {Resonance in Weight Space: Covariate Shift Can Drive Divergence of
                  {SGD} with Momentum},
  journal      = {CoRR},
  volume       = {abs/2203.11992},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2203.11992},
  doi          = {10.48550/ARXIV.2203.11992},
  eprinttype    = {arXiv},
  eprint       = {2203.11992},
  timestamp    = {Tue, 29 Mar 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2203-11992.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2203-15955,
  author       = {Han Wang and
                  Erfan Miahi and
                  Martha White and
                  Marlos C. Machado and
                  Zaheer Abbas and
                  Raksha Kumaraswamy and
                  Vincent Liu and
                  Adam White},
  title        = {Investigating the Properties of Neural Network Representations in
                  Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2203.15955},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2203.15955},
  doi          = {10.48550/ARXIV.2203.15955},
  eprinttype    = {arXiv},
  eprint       = {2203.15955},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2203-15955.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2205-08464,
  author       = {Andrew Patterson and
                  Victor Liao and
                  Martha White},
  title        = {Robust Losses for Learning Value Functions},
  journal      = {CoRR},
  volume       = {abs/2205.08464},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2205.08464},
  doi          = {10.48550/ARXIV.2205.08464},
  eprinttype    = {arXiv},
  eprint       = {2205.08464},
  timestamp    = {Mon, 23 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2205-08464.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2205-08716,
  author       = {Han Wang and
                  Archit Sakhadeo and
                  Adam White and
                  James Bell and
                  Vincent Liu and
                  Xutong Zhao and
                  Puer Liu and
                  Tadashi Kozuno and
                  Alona Fyshe and
                  Martha White},
  title        = {No More Pesky Hyperparameters: Offline Hyperparameter Tuning for {RL}},
  journal      = {CoRR},
  volume       = {abs/2205.08716},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2205.08716},
  doi          = {10.48550/ARXIV.2205.08716},
  eprinttype    = {arXiv},
  eprint       = {2205.08716},
  timestamp    = {Wed, 07 Dec 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2205-08716.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2206-02902,
  author       = {Chunlok Lo and
                  Gabor Mihucz and
                  Adam White and
                  Farzane Aminmansour and
                  Martha White},
  title        = {Goal-Space Planning with Subgoal Models},
  journal      = {CoRR},
  volume       = {abs/2206.02902},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2206.02902},
  doi          = {10.48550/ARXIV.2206.02902},
  eprinttype    = {arXiv},
  eprint       = {2206.02902},
  timestamp    = {Tue, 14 Jun 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2206-02902.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/jair/SchlegelJAPWW21,
  author       = {Matthew Schlegel and
                  Andrew Jacobsen and
                  Zaheer Abbas and
                  Andrew Patterson and
                  Adam White and
                  Martha White},
  title        = {General Value Function Networks},
  journal      = {J. Artif. Intell. Res.},
  volume       = {70},
  pages        = {497--543},
  year         = {2021},
  url          = {https://doi.org/10.1613/jair.1.12105},
  doi          = {10.1613/JAIR.1.12105},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jair/SchlegelJAPWW21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/tase/HoferBHGMGAFGLL21,
  author       = {Sebastian H{\"{o}}fer and
                  Kostas E. Bekris and
                  Ankur Handa and
                  Juan Camilo Gamboa and
                  Melissa Mozifian and
                  Florian Golemo and
                  Christopher G. Atkeson and
                  Dieter Fox and
                  Ken Goldberg and
                  John Leonard and
                  C. Karen Liu and
                  Jan Peters and
                  Shuran Song and
                  Peter Welinder and
                  Martha White},
  title        = {Sim2Real in Robotics and Automation: Applications and Challenges},
  journal      = {{IEEE} Trans Autom. Sci. Eng.},
  volume       = {18},
  number       = {2},
  pages        = {398--400},
  year         = {2021},
  url          = {https://doi.org/10.1109/TASE.2021.3064065},
  doi          = {10.1109/TASE.2021.3064065},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tase/HoferBHGMGAFGLL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iclr/PanBW21,
  author       = {Yangchen Pan and
                  Kirby Banman and
                  Martha White},
  title        = {Fuzzy Tiling Activations: {A} Simple Approach to Learning Sparse Representations
                  Online},
  booktitle    = {9th International Conference on Learning Representations, {ICLR} 2021,
                  Virtual Event, Austria, May 3-7, 2021},
  publisher    = {OpenReview.net},
  year         = {2021},
  url          = {https://openreview.net/forum?id=zElset1Klrp},
  timestamp    = {Wed, 23 Jun 2021 17:36:39 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/PanBW21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/nips/McLeodLSJKWW21,
  author       = {Matthew McLeod and
                  Chunlok Lo and
                  Matthew Schlegel and
                  Andrew Jacobsen and
                  Raksha Kumaraswamy and
                  Martha White and
                  Adam White},
  editor       = {Marc'Aurelio Ranzato and
                  Alina Beygelzimer and
                  Yann N. Dauphin and
                  Percy Liang and
                  Jennifer Wortman Vaughan},
  title        = {Continual Auxiliary Task Learning},
  booktitle    = {Advances in Neural Information Processing Systems 34: Annual Conference
                  on Neural Information Processing Systems 2021, NeurIPS 2021, December
                  6-14, 2021, virtual},
  pages        = {12549--12562},
  year         = {2021},
  url          = {https://proceedings.neurips.cc/paper/2021/hash/68331ff0427b551b68e911eebe35233b-Abstract.html},
  timestamp    = {Tue, 03 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/McLeodLSJKWW21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/nips/GuptaMSKTW21,
  author       = {Dhawal Gupta and
                  Gabor Mihucz and
                  Matthew Schlegel and
                  James E. Kostas and
                  Philip S. Thomas and
                  Martha White},
  editor       = {Marc'Aurelio Ranzato and
                  Alina Beygelzimer and
                  Yann N. Dauphin and
                  Percy Liang and
                  Jennifer Wortman Vaughan},
  title        = {Structural Credit Assignment in Neural Networks using Reinforcement
                  Learning},
  booktitle    = {Advances in Neural Information Processing Systems 34: Annual Conference
                  on Neural Information Processing Systems 2021, NeurIPS 2021, December
                  6-14, 2021, virtual},
  pages        = {30257--30270},
  year         = {2021},
  url          = {https://proceedings.neurips.cc/paper/2021/hash/fe1f9c70bdf347497e1a01b6c486bdb9-Abstract.html},
  timestamp    = {Tue, 03 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/GuptaMSKTW21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2103-05787,
  author       = {Khurram Javed and
                  Martha White and
                  Richard S. Sutton},
  title        = {Scalable Online Recurrent Learning Using Columnar Neural Networks},
  journal      = {CoRR},
  volume       = {abs/2103.05787},
  year         = {2021},
  url          = {https://arxiv.org/abs/2103.05787},
  eprinttype    = {arXiv},
  eprint       = {2103.05787},
  timestamp    = {Tue, 16 Mar 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2103-05787.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2104-13844,
  author       = {Andrew Patterson and
                  Adam White and
                  Sina Ghiassian and
                  Martha White},
  title        = {A Generalized Projected Bellman Error for Off-policy Value Estimation
                  in Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2104.13844},
  year         = {2021},
  url          = {https://arxiv.org/abs/2104.13844},
  eprinttype    = {arXiv},
  eprint       = {2104.13844},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2104-13844.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2105-14214,
  author       = {Qingfeng Lan and
                  Luke Kumar and
                  Martha White and
                  Alona Fyshe},
  title        = {Predictive Representation Learning for Language Modeling},
  journal      = {CoRR},
  volume       = {abs/2105.14214},
  year         = {2021},
  url          = {https://arxiv.org/abs/2105.14214},
  eprinttype    = {arXiv},
  eprint       = {2105.14214},
  timestamp    = {Wed, 02 Jun 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2105-14214.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2107-08285,
  author       = {Alan Chan and
                  Hugo Silva and
                  Sungsu Lim and
                  Tadashi Kozuno and
                  A. Rupam Mahmood and
                  Martha White},
  title        = {Greedification Operators for Policy Optimization: Investigating Forward
                  and Reverse {KL} Divergences},
  journal      = {CoRR},
  volume       = {abs/2107.08285},
  year         = {2021},
  url          = {https://arxiv.org/abs/2107.08285},
  eprinttype    = {arXiv},
  eprint       = {2107.08285},
  timestamp    = {Wed, 21 Jul 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2107-08285.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2111-08066,
  author       = {Vincent Liu and
                  James R. Wright and
                  Martha White},
  title        = {Exploiting Action Impact Regularity and Partially Known Models for
                  Offline Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2111.08066},
  year         = {2021},
  url          = {https://arxiv.org/abs/2111.08066},
  eprinttype    = {arXiv},
  eprint       = {2111.08066},
  timestamp    = {Mon, 22 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2111-08066.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2111-08172,
  author       = {Eric Graves and
                  Ehsan Imani and
                  Raksha Kumaraswamy and
                  Martha White},
  title        = {Off-Policy Actor-Critic with Emphatic Weightings},
  journal      = {CoRR},
  volume       = {abs/2111.08172},
  year         = {2021},
  url          = {https://arxiv.org/abs/2111.08172},
  eprinttype    = {arXiv},
  eprint       = {2111.08172},
  timestamp    = {Mon, 22 Nov 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2111-08172.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2112-07806,
  author       = {Ehsan Imani and
                  Wei Hu and
                  Martha White},
  title        = {Understanding Feature Transfer Through Representation Alignment},
  journal      = {CoRR},
  volume       = {abs/2112.07806},
  year         = {2021},
  url          = {https://arxiv.org/abs/2112.07806},
  eprinttype    = {arXiv},
  eprint       = {2112.07806},
  timestamp    = {Mon, 03 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2112-07806.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2112-11622,
  author       = {Shivam Garg and
                  Samuele Tosatto and
                  Yangchen Pan and
                  Martha White and
                  A. Rupam Mahmood},
  title        = {An Alternate Policy Gradient Estimator for Softmax Policies},
  journal      = {CoRR},
  volume       = {abs/2112.11622},
  year         = {2021},
  url          = {https://arxiv.org/abs/2112.11622},
  eprinttype    = {arXiv},
  eprint       = {2112.11622},
  timestamp    = {Tue, 04 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2112-11622.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/jair/LinkeAWDW20,
  author       = {Cam Linke and
                  Nadia M. Ady and
                  Martha White and
                  Thomas Degris and
                  Adam White},
  title        = {Adapting Behavior via Intrinsic Reward: {A} Survey and Empirical Study},
  journal      = {J. Artif. Intell. Res.},
  volume       = {69},
  pages        = {1287--1332},
  year         = {2020},
  url          = {https://doi.org/10.1613/jair.1.12087},
  doi          = {10.1613/JAIR.1.12087},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jair/LinkeAWDW20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/atal/SatsangiLWOW20,
  author       = {Yash Satsangi and
                  Sungsu Lim and
                  Shimon Whiteson and
                  Frans A. Oliehoek and
                  Martha White},
  editor       = {Amal El Fallah Seghrouchni and
                  Gita Sukthankar and
                  Bo An and
                  Neil Yorke{-}Smith},
  title        = {Maximizing Information Gain in Partially Observable Environments via
                  Prediction Rewards},
  booktitle    = {Proceedings of the 19th International Conference on Autonomous Agents
                  and Multiagent Systems, {AAMAS} '20, Auckland, New Zealand, May 9-13,
                  2020},
  pages        = {1215--1223},
  publisher    = {International Foundation for Autonomous Agents and Multiagent Systems},
  year         = {2020},
  url          = {https://dl.acm.org/doi/10.5555/3398761.3398902},
  doi          = {10.5555/3398761.3398902},
  timestamp    = {Tue, 26 Jul 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/atal/SatsangiLWOW20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/emnlp/HashemzadehKWMF20,
  author       = {Maryam Hashemzadeh and
                  Greta Kaufeld and
                  Martha White and
                  Andrea E. Martin and
                  Alona Fyshe},
  editor       = {Trevor Cohn and
                  Yulan He and
                  Yang Liu},
  title        = {From Language to Language-ish: How Brain-Like is an LSTM's Representation
                  of Atypical Language Stimuli?},
  booktitle    = {Findings of the Association for Computational Linguistics: {EMNLP}
                  2020, Online Event, 16-20 November 2020},
  series       = {Findings of {ACL}},
  volume       = {{EMNLP} 2020},
  pages        = {645--656},
  publisher    = {Association for Computational Linguistics},
  year         = {2020},
  url          = {https://doi.org/10.18653/v1/2020.findings-emnlp.57},
  doi          = {10.18653/V1/2020.FINDINGS-EMNLP.57},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/emnlp/HashemzadehKWMF20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iclr/LanPFW20,
  author       = {Qingfeng Lan and
                  Yangchen Pan and
                  Alona Fyshe and
                  Martha White},
  title        = {Maxmin Q-learning: Controlling the Estimation Bias of Q-learning},
  booktitle    = {8th International Conference on Learning Representations, {ICLR} 2020,
                  Addis Ababa, Ethiopia, April 26-30, 2020},
  publisher    = {OpenReview.net},
  year         = {2020},
  url          = {https://openreview.net/forum?id=Bkg0u3Etwr},
  timestamp    = {Thu, 07 May 2020 17:11:47 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/LanPFW20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iclr/NathLCLWW20,
  author       = {Somjit Nath and
                  Vincent Liu and
                  Alan Chan and
                  Xin Li and
                  Adam White and
                  Martha White},
  title        = {Training Recurrent Neural Networks Online by Learning Explicit State
                  Variables},
  booktitle    = {8th International Conference on Learning Representations, {ICLR} 2020,
                  Addis Ababa, Ethiopia, April 26-30, 2020},
  publisher    = {OpenReview.net},
  year         = {2020},
  url          = {https://openreview.net/forum?id=SJgmR0NKPr},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/NathLCLWW20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icml/AbbasSTW20,
  author       = {Zaheer Abbas and
                  Samuel Sokota and
                  Erin Talvitie and
                  Martha White},
  title        = {Selective Dyna-Style Planning Under Limited Model Capacity},
  booktitle    = {Proceedings of the 37th International Conference on Machine Learning,
                  {ICML} 2020, 13-18 July 2020, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {119},
  pages        = {1--10},
  publisher    = {{PMLR}},
  year         = {2020},
  url          = {http://proceedings.mlr.press/v119/abbas20a.html},
  timestamp    = {Tue, 15 Dec 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/AbbasSTW20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icml/ChandakTSWMT20,
  author       = {Yash Chandak and
                  Georgios Theocharous and
                  Shiv Shankar and
                  Martha White and
                  Sridhar Mahadevan and
                  Philip S. Thomas},
  title        = {Optimizing for the Future in Non-Stationary MDPs},
  booktitle    = {Proceedings of the 37th International Conference on Machine Learning,
                  {ICML} 2020, 13-18 July 2020, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {119},
  pages        = {1414--1425},
  publisher    = {{PMLR}},
  year         = {2020},
  url          = {http://proceedings.mlr.press/v119/chandak20a.html},
  timestamp    = {Tue, 15 Dec 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/ChandakTSWMT20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icml/GhiassianP0GWW20,
  author       = {Sina Ghiassian and
                  Andrew Patterson and
                  Shivam Garg and
                  Dhawal Gupta and
                  Adam White and
                  Martha White},
  title        = {Gradient Temporal-Difference Learning with Regularized Corrections},
  booktitle    = {Proceedings of the 37th International Conference on Machine Learning,
                  {ICML} 2020, 13-18 July 2020, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {119},
  pages        = {3524--3534},
  publisher    = {{PMLR}},
  year         = {2020},
  url          = {http://proceedings.mlr.press/v119/ghiassian20a.html},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/GhiassianP0GWW20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/nips/ChandakJTWT20,
  author       = {Yash Chandak and
                  Scott M. Jordan and
                  Georgios Theocharous and
                  Martha White and
                  Philip S. Thomas},
  editor       = {Hugo Larochelle and
                  Marc'Aurelio Ranzato and
                  Raia Hadsell and
                  Maria{-}Florina Balcan and
                  Hsuan{-}Tien Lin},
  title        = {Towards Safe Policy Improvement for Non-Stationary MDPs},
  booktitle    = {Advances in Neural Information Processing Systems 33: Annual Conference
                  on Neural Information Processing Systems 2020, NeurIPS 2020, December
                  6-12, 2020, virtual},
  year         = {2020},
  url          = {https://proceedings.neurips.cc/paper/2020/hash/680390c55bbd9ce416d1d69a9ab4760d-Abstract.html},
  timestamp    = {Tue, 19 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/ChandakJTWT20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/nips/PanIFW20,
  author       = {Yangchen Pan and
                  Ehsan Imani and
                  Amir{-}massoud Farahmand and
                  Martha White},
  editor       = {Hugo Larochelle and
                  Marc'Aurelio Ranzato and
                  Raia Hadsell and
                  Maria{-}Florina Balcan and
                  Hsuan{-}Tien Lin},
  title        = {An implicit function learning approach for parametric modal regression},
  booktitle    = {Advances in Neural Information Processing Systems 33: Annual Conference
                  on Neural Information Processing Systems 2020, NeurIPS 2020, December
                  6-12, 2020, virtual},
  year         = {2020},
  url          = {https://proceedings.neurips.cc/paper/2020/hash/83eaa6722798a773dd55e8fc7443aa09-Abstract.html},
  timestamp    = {Tue, 19 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/PanIFW20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2002-06195,
  author       = {Yangchen Pan and
                  Ehsan Imani and
                  Martha White and
                  Amir{-}massoud Farahmand},
  title        = {An implicit function learning approach for parametric modal regression},
  journal      = {CoRR},
  volume       = {abs/2002.06195},
  year         = {2020},
  url          = {https://arxiv.org/abs/2002.06195},
  eprinttype    = {arXiv},
  eprint       = {2002.06195},
  timestamp    = {Tue, 03 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2002-06195.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2002-06487,
  author       = {Qingfeng Lan and
                  Yangchen Pan and
                  Alona Fyshe and
                  Martha White},
  title        = {Maxmin Q-learning: Controlling the Estimation Bias of Q-learning},
  journal      = {CoRR},
  volume       = {abs/2002.06487},
  year         = {2020},
  url          = {https://arxiv.org/abs/2002.06487},
  eprinttype    = {arXiv},
  eprint       = {2002.06487},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2002-06487.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2005-04912,
  author       = {Yash Satsangi and
                  Sungsu Lim and
                  Shimon Whiteson and
                  Frans A. Oliehoek and
                  Martha White},
  title        = {Maximizing Information Gain in Partially Observable Environments via
                  Prediction Reward},
  journal      = {CoRR},
  volume       = {abs/2005.04912},
  year         = {2020},
  url          = {https://arxiv.org/abs/2005.04912},
  eprinttype    = {arXiv},
  eprint       = {2005.04912},
  timestamp    = {Thu, 14 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2005-04912.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2005-08158,
  author       = {Yash Chandak and
                  Georgios Theocharous and
                  Shiv Shankar and
                  Martha White and
                  Sridhar Mahadevan and
                  Philip S. Thomas},
  title        = {Optimizing for the Future in Non-Stationary MDPs},
  journal      = {CoRR},
  volume       = {abs/2005.08158},
  year         = {2020},
  url          = {https://arxiv.org/abs/2005.08158},
  eprinttype    = {arXiv},
  eprint       = {2005.08158},
  timestamp    = {Wed, 03 Jun 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2005-08158.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2006-04363,
  author       = {Taher Jafferjee and
                  Ehsan Imani and
                  Erin Talvitie and
                  Martha White and
                  Michael Bowling},
  title        = {Hallucinating Value: {A} Pitfall of Dyna-style Planning with Imperfect
                  Environment Models},
  journal      = {CoRR},
  volume       = {abs/2006.04363},
  year         = {2020},
  url          = {https://arxiv.org/abs/2006.04363},
  eprinttype    = {arXiv},
  eprint       = {2006.04363},
  timestamp    = {Fri, 12 Jun 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2006-04363.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2006-07461,
  author       = {Khurram Javed and
                  Martha White and
                  Yoshua Bengio},
  title        = {Learning Causal Models Online},
  journal      = {CoRR},
  volume       = {abs/2006.07461},
  year         = {2020},
  url          = {https://arxiv.org/abs/2006.07461},
  eprinttype    = {arXiv},
  eprint       = {2006.07461},
  timestamp    = {Wed, 17 Jun 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2006-07461.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2007-00611,
  author       = {Sina Ghiassian and
                  Andrew Patterson and
                  Shivam Garg and
                  Dhawal Gupta and
                  Adam White and
                  Martha White},
  title        = {Gradient Temporal-Difference Learning with Regularized Corrections},
  journal      = {CoRR},
  volume       = {abs/2007.00611},
  year         = {2020},
  url          = {https://arxiv.org/abs/2007.00611},
  eprinttype    = {arXiv},
  eprint       = {2007.00611},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2007-00611.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2007-02418,
  author       = {Zaheer Abbas and
                  Samuel Sokota and
                  Erin J. Talvitie and
                  Martha White},
  title        = {Selective Dyna-style Planning Under Limited Model Capacity},
  journal      = {CoRR},
  volume       = {abs/2007.02418},
  year         = {2020},
  url          = {https://arxiv.org/abs/2007.02418},
  eprinttype    = {arXiv},
  eprint       = {2007.02418},
  timestamp    = {Mon, 03 Aug 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2007-02418.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2007-03807,
  author       = {Vincent Liu and
                  Adam White and
                  Hengshuai Yao and
                  Martha White},
  title        = {Towards a practical measure of interference for reinforcement learning},
  journal      = {CoRR},
  volume       = {abs/2007.03807},
  year         = {2020},
  url          = {https://arxiv.org/abs/2007.03807},
  eprinttype    = {arXiv},
  eprint       = {2007.03807},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2007-03807.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2007-09569,
  author       = {Jincheng Mei and
                  Yangchen Pan and
                  Martha White and
                  Amir{-}massoud Farahmand and
                  Hengshuai Yao},
  title        = {Beyond Prioritized Replay: Sampling States in Model-Based {RL} via
                  Simulated Priorities},
  journal      = {CoRR},
  volume       = {abs/2007.09569},
  year         = {2020},
  url          = {https://arxiv.org/abs/2007.09569},
  eprinttype    = {arXiv},
  eprint       = {2007.09569},
  timestamp    = {Tue, 28 Jul 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2007-09569.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2010-07435,
  author       = {Maryam Hashemzadeh and
                  Greta Kaufeld and
                  Martha White and
                  Andrea E. Martin and
                  Alona Fyshe},
  title        = {From Language to Language-ish: How Brain-Like is an LSTM's Representation
                  of Nonsensical Language Stimuli?},
  journal      = {CoRR},
  volume       = {abs/2010.07435},
  year         = {2020},
  url          = {https://arxiv.org/abs/2010.07435},
  eprinttype    = {arXiv},
  eprint       = {2010.07435},
  timestamp    = {Tue, 20 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2010-07435.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2010-12645,
  author       = {Yash Chandak and
                  Scott M. Jordan and
                  Georgios Theocharous and
                  Martha White and
                  Philip S. Thomas},
  title        = {Towards Safe Policy Improvement for Non-Stationary MDPs},
  journal      = {CoRR},
  volume       = {abs/2010.12645},
  year         = {2020},
  url          = {https://arxiv.org/abs/2010.12645},
  eprinttype    = {arXiv},
  eprint       = {2010.12645},
  timestamp    = {Mon, 02 Nov 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2010-12645.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2012-03806,
  author       = {Sebastian H{\"{o}}fer and
                  Kostas E. Bekris and
                  Ankur Handa and
                  Juan Camilo Gamboa Higuera and
                  Florian Golemo and
                  Melissa Mozifian and
                  Christopher G. Atkeson and
                  Dieter Fox and
                  Ken Goldberg and
                  John Leonard and
                  C. Karen Liu and
                  Jan Peters and
                  Shuran Song and
                  Peter Welinder and
                  Martha White},
  title        = {Perspectives on Sim2Real Transfer for Robotics: {A} Summary of the
                  {R:} {SS} 2020 Workshop},
  journal      = {CoRR},
  volume       = {abs/2012.03806},
  year         = {2020},
  url          = {https://arxiv.org/abs/2012.03806},
  eprinttype    = {arXiv},
  eprint       = {2012.03806},
  timestamp    = {Tue, 29 Jun 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2012-03806.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/aaai/JacobsenSLDWW19,
  author       = {Andrew Jacobsen and
                  Matthew Schlegel and
                  Cameron Linke and
                  Thomas Degris and
                  Adam White and
                  Martha White},
  title        = {Meta-Descent for Online, Continual Prediction},
  booktitle    = {The Thirty-Third {AAAI} Conference on Artificial Intelligence, {AAAI}
                  2019, The Thirty-First Innovative Applications of Artificial Intelligence
                  Conference, {IAAI} 2019, The Ninth {AAAI} Symposium on Educational
                  Advances in Artificial Intelligence, {EAAI} 2019, Honolulu, Hawaii,
                  USA, January 27 - February 1, 2019},
  pages        = {3943--3950},
  publisher    = {{AAAI} Press},
  year         = {2019},
  url          = {https://doi.org/10.1609/aaai.v33i01.33013943},
  doi          = {10.1609/AAAI.V33I01.33013943},
  timestamp    = {Mon, 04 Sep 2023 12:29:24 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/JacobsenSLDWW19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/aaai/LiuKLW19,
  author       = {Vincent Liu and
                  Raksha Kumaraswamy and
                  Lei Le and
                  Martha White},
  title        = {The Utility of Sparse Representations for Control in Reinforcement
                  Learning},
  booktitle    = {The Thirty-Third {AAAI} Conference on Artificial Intelligence, {AAAI}
                  2019, The Thirty-First Innovative Applications of Artificial Intelligence
                  Conference, {IAAI} 2019, The Ninth {AAAI} Symposium on Educational
                  Advances in Artificial Intelligence, {EAAI} 2019, Honolulu, Hawaii,
                  USA, January 27 - February 1, 2019},
  pages        = {4384--4391},
  publisher    = {{AAAI} Press},
  year         = {2019},
  url          = {https://doi.org/10.1609/aaai.v33i01.33014384},
  doi          = {10.1609/AAAI.V33I01.33014384},
  timestamp    = {Tue, 02 Feb 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/aaai/LiuKLW19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iclr/ChungNJW19,
  author       = {Wesley Chung and
                  Somjit Nath and
                  Ajin Joseph and
                  Martha White},
  title        = {Two-Timescale Networks for Nonlinear Value Function Approximation},
  booktitle    = {7th International Conference on Learning Representations, {ICLR} 2019,
                  New Orleans, LA, USA, May 6-9, 2019},
  publisher    = {OpenReview.net},
  year         = {2019},
  url          = {https://openreview.net/forum?id=rJleN20qK7},
  timestamp    = {Thu, 22 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/ChungNJW19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/ijcai/PanYFW19,
  author       = {Yangchen Pan and
                  Hengshuai Yao and
                  Amir{-}massoud Farahmand and
                  Martha White},
  editor       = {Sarit Kraus},
  title        = {Hill Climbing on Value Estimates for Search-control in Dyna},
  booktitle    = {Proceedings of the Twenty-Eighth International Joint Conference on
                  Artificial Intelligence, {IJCAI} 2019, Macao, China, August 10-16,
                  2019},
  pages        = {3209--3215},
  publisher    = {ijcai.org},
  year         = {2019},
  url          = {https://doi.org/10.24963/ijcai.2019/445},
  doi          = {10.24963/IJCAI.2019/445},
  timestamp    = {Tue, 20 Aug 2019 16:18:18 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/PanYFW19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/ijcai/WanZWWS19,
  author       = {Yi Wan and
                  Muhammad Zaheer and
                  Adam White and
                  Martha White and
                  Richard S. Sutton},
  editor       = {Sarit Kraus},
  title        = {Planning with Expectation Models},
  booktitle    = {Proceedings of the Twenty-Eighth International Joint Conference on
                  Artificial Intelligence, {IJCAI} 2019, Macao, China, August 10-16,
                  2019},
  pages        = {3649--3655},
  publisher    = {ijcai.org},
  year         = {2019},
  url          = {https://doi.org/10.24963/ijcai.2019/506},
  doi          = {10.24963/IJCAI.2019/506},
  timestamp    = {Thu, 14 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ijcai/WanZWWS19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/nips/SchlegelCGQW19,
  author       = {Matthew Schlegel and
                  Wesley Chung and
                  Daniel Graves and
                  Jian Qian and
                  Martha White},
  editor       = {Hanna M. Wallach and
                  Hugo Larochelle and
                  Alina Beygelzimer and
                  Florence d'Alch{\'{e}}{-}Buc and
                  Emily B. Fox and
                  Roman Garnett},
  title        = {Importance Resampling for Off-policy Prediction},
  booktitle    = {Advances in Neural Information Processing Systems 32: Annual Conference
                  on Neural Information Processing Systems 2019, NeurIPS 2019, December
                  8-14, 2019, Vancouver, BC, Canada},
  pages        = {1797--1807},
  year         = {2019},
  url          = {https://proceedings.neurips.cc/paper/2019/hash/9ac403da7947a183884c18a67d3aa8de-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/SchlegelCGQW19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/nips/JavedW19,
  author       = {Khurram Javed and
                  Martha White},
  editor       = {Hanna M. Wallach and
                  Hugo Larochelle and
                  Alina Beygelzimer and
                  Florence d'Alch{\'{e}}{-}Buc and
                  Emily B. Fox and
                  Roman Garnett},
  title        = {Meta-Learning Representations for Continual Learning},
  booktitle    = {Advances in Neural Information Processing Systems 32: Annual Conference
                  on Neural Information Processing Systems 2019, NeurIPS 2019, December
                  8-14, 2019, Vancouver, BC, Canada},
  pages        = {1818--1828},
  year         = {2019},
  url          = {https://proceedings.neurips.cc/paper/2019/hash/f4dd765c12f2ef67f98f3558c282a9cd-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/JavedW19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/nips/AminmansourPLPM19,
  author       = {Farzane Aminmansour and
                  Andrew Patterson and
                  Lei Le and
                  Yisu Peng and
                  Daniel Mitchell and
                  Franco Pestilli and
                  Cesar F. Caiafa and
                  Russell Greiner and
                  Martha White},
  editor       = {Hanna M. Wallach and
                  Hugo Larochelle and
                  Alina Beygelzimer and
                  Florence d'Alch{\'{e}}{-}Buc and
                  Emily B. Fox and
                  Roman Garnett},
  title        = {Learning Macroscopic Brain Connectomes via Group-Sparse Factorization},
  booktitle    = {Advances in Neural Information Processing Systems 32: Annual Conference
                  on Neural Information Processing Systems 2019, NeurIPS 2019, December
                  8-14, 2019, Vancouver, BC, Canada},
  pages        = {8847--8857},
  year         = {2019},
  url          = {https://proceedings.neurips.cc/paper/2019/hash/0bfce127947574733b19da0f30739fcd-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/AminmansourPLPM19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1904-01191,
  author       = {Yi Wan and
                  Muhammad Zaheer and
                  Adam White and
                  Martha White and
                  Richard S. Sutton},
  title        = {Planning with Expectation Models},
  journal      = {CoRR},
  volume       = {abs/1904.01191},
  year         = {2019},
  url          = {http://arxiv.org/abs/1904.01191},
  eprinttype    = {arXiv},
  eprint       = {1904.01191},
  timestamp    = {Thu, 14 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1904-01191.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1905-12588,
  author       = {Khurram Javed and
                  Martha White},
  title        = {Meta-Learning Representations for Continual Learning},
  journal      = {CoRR},
  volume       = {abs/1905.12588},
  year         = {2019},
  url          = {http://arxiv.org/abs/1905.12588},
  eprinttype    = {arXiv},
  eprint       = {1905.12588},
  timestamp    = {Mon, 03 Jun 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1905-12588.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1906-04328,
  author       = {Matthew Schlegel and
                  Wesley Chung and
                  Daniel Graves and
                  Jian Qian and
                  Martha White},
  title        = {Importance Resampling for Off-policy Prediction},
  journal      = {CoRR},
  volume       = {abs/1906.04328},
  year         = {2019},
  url          = {http://arxiv.org/abs/1906.04328},
  eprinttype    = {arXiv},
  eprint       = {1906.04328},
  timestamp    = {Fri, 14 Jun 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1906-04328.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1906-07791,
  author       = {Yangchen Pan and
                  Hengshuai Yao and
                  Amir{-}massoud Farahmand and
                  Martha White},
  title        = {Hill Climbing on Value Estimates for Search-control in Dyna},
  journal      = {CoRR},
  volume       = {abs/1906.07791},
  year         = {2019},
  url          = {http://arxiv.org/abs/1906.07791},
  eprinttype    = {arXiv},
  eprint       = {1906.07791},
  timestamp    = {Mon, 24 Jun 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1906-07791.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1906-07865,
  author       = {Cam Linke and
                  Nadia M. Ady and
                  Martha White and
                  Thomas Degris and
                  Adam White},
  title        = {Adapting Behaviour via Intrinsic Reward: {A} Survey and Empirical
                  Study},
  journal      = {CoRR},
  volume       = {abs/1906.07865},
  year         = {2019},
  url          = {http://arxiv.org/abs/1906.07865},
  eprinttype    = {arXiv},
  eprint       = {1906.07865},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1906-07865.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1907-07751,
  author       = {Andrew Jacobsen and
                  Matthew Schlegel and
                  Cameron Linke and
                  Thomas Degris and
                  Adam White and
                  Martha White},
  title        = {Meta-descent for Online, Continual Prediction},
  journal      = {CoRR},
  volume       = {abs/1907.07751},
  year         = {2019},
  url          = {http://arxiv.org/abs/1907.07751},
  eprinttype    = {arXiv},
  eprint       = {1907.07751},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1907-07751.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1910-01705,
  author       = {Khurram Javed and
                  Hengshuai Yao and
                  Martha White},
  title        = {Is Fast Adaptation All You Need?},
  journal      = {CoRR},
  volume       = {abs/1910.01705},
  year         = {2019},
  url          = {http://arxiv.org/abs/1910.01705},
  eprinttype    = {arXiv},
  eprint       = {1910.01705},
  timestamp    = {Wed, 09 Oct 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1910-01705.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icml/ImaniW18,
  author       = {Ehsan Imani and
                  Martha White},
  editor       = {Jennifer G. Dy and
                  Andreas Krause},
  title        = {Improving Regression Performance with Distributional Losses},
  booktitle    = {Proceedings of the 35th International Conference on Machine Learning,
                  {ICML} 2018, Stockholmsm{\"{a}}ssan, Stockholm, Sweden, July
                  10-15, 2018},
  series       = {Proceedings of Machine Learning Research},
  volume       = {80},
  pages        = {2162--2171},
  publisher    = {{PMLR}},
  year         = {2018},
  url          = {http://proceedings.mlr.press/v80/imani18a.html},
  timestamp    = {Wed, 03 Apr 2019 18:17:30 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/ImaniW18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icml/PanFWNGN18,
  author       = {Yangchen Pan and
                  Amir{-}massoud Farahmand and
                  Martha White and
                  Saleh Nabi and
                  Piyush Grover and
                  Daniel Nikovski},
  editor       = {Jennifer G. Dy and
                  Andreas Krause},
  title        = {Reinforcement Learning with Function-Valued Action Spaces for Partial
                  Differential Equation Control},
  booktitle    = {Proceedings of the 35th International Conference on Machine Learning,
                  {ICML} 2018, Stockholmsm{\"{a}}ssan, Stockholm, Sweden, July
                  10-15, 2018},
  series       = {Proceedings of Machine Learning Research},
  volume       = {80},
  pages        = {3983--3992},
  publisher    = {{PMLR}},
  year         = {2018},
  url          = {http://proceedings.mlr.press/v80/pan18a.html},
  timestamp    = {Wed, 03 Apr 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/PanFWNGN18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/ijcai/PanZWPW18,
  author       = {Yangchen Pan and
                  Muhammad Zaheer and
                  Adam White and
                  Andrew Patterson and
                  Martha White},
  editor       = {J{\'{e}}r{\^{o}}me Lang},
  title        = {Organizing Experience: a Deeper Look at Replay Mechanisms for Sample-Based
                  Planning in Continuous State Domains},
  booktitle    = {Proceedings of the Twenty-Seventh International Joint Conference on
                  Artificial Intelligence, {IJCAI} 2018, July 13-19, 2018, Stockholm,
                  Sweden},
  pages        = {4794--4800},
  publisher    = {ijcai.org},
  year         = {2018},
  url          = {https://doi.org/10.24963/ijcai.2018/666},
  doi          = {10.24963/IJCAI.2018/666},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/PanZWPW18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/nips/ImaniGW18,
  author       = {Ehsan Imani and
                  Eric Graves and
                  Martha White},
  editor       = {Samy Bengio and
                  Hanna M. Wallach and
                  Hugo Larochelle and
                  Kristen Grauman and
                  Nicol{\`{o}} Cesa{-}Bianchi and
                  Roman Garnett},
  title        = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings},
  booktitle    = {Advances in Neural Information Processing Systems 31: Annual Conference
                  on Neural Information Processing Systems 2018, NeurIPS 2018, December
                  3-8, 2018, Montr{\'{e}}al, Canada},
  pages        = {96--106},
  year         = {2018},
  url          = {https://proceedings.neurips.cc/paper/2018/hash/3ef815416f775098fe977004015c6193-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/ImaniGW18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/nips/LePW18,
  author       = {Lei Le and
                  Andrew Patterson and
                  Martha White},
  editor       = {Samy Bengio and
                  Hanna M. Wallach and
                  Hugo Larochelle and
                  Kristen Grauman and
                  Nicol{\`{o}} Cesa{-}Bianchi and
                  Roman Garnett},
  title        = {Supervised autoencoders: Improving generalization performance with
                  unsupervised regularizers},
  booktitle    = {Advances in Neural Information Processing Systems 31: Annual Conference
                  on Neural Information Processing Systems 2018, NeurIPS 2018, December
                  3-8, 2018, Montr{\'{e}}al, Canada},
  pages        = {107--117},
  year         = {2018},
  url          = {https://proceedings.neurips.cc/paper/2018/hash/2a38a4a9316c49e5a833517c45d31070-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/LePW18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/nips/KumaraswamySWW18,
  author       = {Raksha Kumaraswamy and
                  Matthew Schlegel and
                  Adam White and
                  Martha White},
  editor       = {Samy Bengio and
                  Hanna M. Wallach and
                  Hugo Larochelle and
                  Kristen Grauman and
                  Nicol{\`{o}} Cesa{-}Bianchi and
                  Roman Garnett},
  title        = {Context-dependent upper-confidence bounds for directed exploration},
  booktitle    = {Advances in Neural Information Processing Systems 31: Annual Conference
                  on Neural Information Processing Systems 2018, NeurIPS 2018, December
                  3-8, 2018, Montr{\'{e}}al, Canada},
  pages        = {4784--4794},
  year         = {2018},
  url          = {https://proceedings.neurips.cc/paper/2018/hash/f516dfb84b9051ed85b89cdc3a8ab7f5-Abstract.html},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/KumaraswamySWW18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/uai/SherstanABYWWS18,
  author       = {Craig Sherstan and
                  Dylan R. Ashley and
                  Brendan Bennett and
                  Kenny Young and
                  Adam White and
                  Martha White and
                  Richard S. Sutton},
  editor       = {Amir Globerson and
                  Ricardo Silva},
  title        = {Comparing Direct and Indirect Temporal-Difference Methods for Estimating
                  the Variance of the Return},
  booktitle    = {Proceedings of the Thirty-Fourth Conference on Uncertainty in Artificial
                  Intelligence, {UAI} 2018, Monterey, California, USA, August 6-10,
                  2018},
  pages        = {63--72},
  publisher    = {{AUAI} Press},
  year         = {2018},
  url          = {http://auai.org/uai2018/proceedings/papers/35.pdf},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/uai/SherstanABYWWS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/uai/SajedCW18,
  author       = {Touqir Sajed and
                  Wesley Chung and
                  Martha White},
  editor       = {Amir Globerson and
                  Ricardo Silva},
  title        = {High-confidence error estimates for learned value functions},
  booktitle    = {Proceedings of the Thirty-Fourth Conference on Uncertainty in Artificial
                  Intelligence, {UAI} 2018, Monterey, California, USA, August 6-10,
                  2018},
  pages        = {683--692},
  publisher    = {{AUAI} Press},
  year         = {2018},
  url          = {http://auai.org/uai2018/proceedings/papers/245.pdf},
  timestamp    = {Wed, 09 Mar 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/uai/SajedCW18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1801-08287,
  author       = {Craig Sherstan and
                  Brendan Bennett and
                  Kenny Young and
                  Dylan R. Ashley and
                  Adam White and
                  Martha White and
                  Richard S. Sutton},
  title        = {Directly Estimating the Variance of the {\(\lambda\)}-Return Using
                  Temporal-Difference Methods},
  journal      = {CoRR},
  volume       = {abs/1801.08287},
  year         = {2018},
  url          = {http://arxiv.org/abs/1801.08287},
  eprinttype    = {arXiv},
  eprint       = {1801.08287},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1801-08287.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1806-04613,
  author       = {Ehsan Imani and
                  Martha White},
  title        = {Improving Regression Performance with Distributional Losses},
  journal      = {CoRR},
  volume       = {abs/1806.04613},
  year         = {2018},
  url          = {http://arxiv.org/abs/1806.04613},
  eprinttype    = {arXiv},
  eprint       = {1806.04613},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1806-04613.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1806-04624,
  author       = {Yangchen Pan and
                  Muhammad Zaheer and
                  Adam White and
                  Andrew Patterson and
                  Martha White},
  title        = {Organizing Experience: {A} Deeper Look at Replay Mechanisms for Sample-based
                  Planning in Continuous State Domains},
  journal      = {CoRR},
  volume       = {abs/1806.04624},
  year         = {2018},
  url          = {http://arxiv.org/abs/1806.04624},
  eprinttype    = {arXiv},
  eprint       = {1806.04624},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1806-04624.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1806-06931,
  author       = {Yangchen Pan and
                  Amir{-}massoud Farahmand and
                  Martha White and
                  Saleh Nabi and
                  Piyush Grover and
                  Daniel Nikovski},
  title        = {Reinforcement Learning with Function-Valued Action Spaces for Partial
                  Differential Equation Control},
  journal      = {CoRR},
  volume       = {abs/1806.06931},
  year         = {2018},
  url          = {http://arxiv.org/abs/1806.06931},
  eprinttype    = {arXiv},
  eprint       = {1806.06931},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1806-06931.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1807-06763,
  author       = {Matthew Schlegel and
                  Adam White and
                  Andrew Patterson and
                  Martha White},
  title        = {General Value Function Networks},
  journal      = {CoRR},
  volume       = {abs/1807.06763},
  year         = {2018},
  url          = {http://arxiv.org/abs/1807.06763},
  eprinttype    = {arXiv},
  eprint       = {1807.06763},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1807-06763.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1808-09127,
  author       = {Touqir Sajed and
                  Wesley Chung and
                  Martha White},
  title        = {High-confidence error estimates for learned value functions},
  journal      = {CoRR},
  volume       = {abs/1808.09127},
  year         = {2018},
  url          = {http://arxiv.org/abs/1808.09127},
  eprinttype    = {arXiv},
  eprint       = {1808.09127},
  timestamp    = {Mon, 03 Sep 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1808-09127.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1810-09103,
  author       = {Sungsu Lim and
                  Ajin Joseph and
                  Lei Le and
                  Yangchen Pan and
                  Martha White},
  title        = {Actor-Expert: {A} Framework for using Action-Value Methods in Continuous
                  Action Spaces},
  journal      = {CoRR},
  volume       = {abs/1810.09103},
  year         = {2018},
  url          = {http://arxiv.org/abs/1810.09103},
  eprinttype    = {arXiv},
  eprint       = {1810.09103},
  timestamp    = {Thu, 22 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1810-09103.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1811-02597,
  author       = {Sina Ghiassian and
                  Andrew Patterson and
                  Martha White and
                  Richard S. Sutton and
                  Adam White},
  title        = {Online Off-policy Prediction},
  journal      = {CoRR},
  volume       = {abs/1811.02597},
  year         = {2018},
  url          = {http://arxiv.org/abs/1811.02597},
  eprinttype    = {arXiv},
  eprint       = {1811.02597},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1811-02597.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1811-06626,
  author       = {Vincent Liu and
                  Raksha Kumaraswamy and
                  Lei Le and
                  Martha White},
  title        = {The Utility of Sparse Representations for Control in Reinforcement
                  Learning},
  journal      = {CoRR},
  volume       = {abs/1811.06626},
  year         = {2018},
  url          = {http://arxiv.org/abs/1811.06626},
  eprinttype    = {arXiv},
  eprint       = {1811.06626},
  timestamp    = {Sun, 25 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1811-06626.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1811-06629,
  author       = {Raksha Kumaraswamy and
                  Matthew Schlegel and
                  Adam White and
                  Martha White},
  title        = {Context-Dependent Upper-Confidence Bounds for Directed Exploration},
  journal      = {CoRR},
  volume       = {abs/1811.06629},
  year         = {2018},
  url          = {http://arxiv.org/abs/1811.06629},
  eprinttype    = {arXiv},
  eprint       = {1811.06629},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1811-06629.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1811-07004,
  author       = {Tom Schaul and
                  Hado van Hasselt and
                  Joseph Modayil and
                  Martha White and
                  Adam White and
                  Pierre{-}Luc Bacon and
                  Jean Harb and
                  Shibl Mourad and
                  Marc G. Bellemare and
                  Doina Precup},
  title        = {The Barbados 2018 List of Open Issues in Continual Learning},
  journal      = {CoRR},
  volume       = {abs/1811.07004},
  year         = {2018},
  url          = {http://arxiv.org/abs/1811.07004},
  eprinttype    = {arXiv},
  eprint       = {1811.07004},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1811-07004.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1811-09013,
  author       = {Ehsan Imani and
                  Eric Graves and
                  Martha White},
  title        = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings},
  journal      = {CoRR},
  volume       = {abs/1811.09013},
  year         = {2018},
  url          = {http://arxiv.org/abs/1811.09013},
  eprinttype    = {arXiv},
  eprint       = {1811.09013},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1811-09013.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1812-00914,
  author       = {Minghan Li and
                  Tanli Zuo and
                  Ruicheng Li and
                  Martha White and
                  Weishi Zheng},
  title        = {Accelerating Large Scale Knowledge Distillation via Dynamic Importance
                  Sampling},
  journal      = {CoRR},
  volume       = {abs/1812.00914},
  year         = {2018},
  url          = {http://arxiv.org/abs/1812.00914},
  eprinttype    = {arXiv},
  eprint       = {1812.00914},
  timestamp    = {Tue, 19 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1812-00914.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/aaai/JainWR17,
  author       = {Shantanu Jain and
                  Martha White and
                  Predrag Radivojac},
  editor       = {Satinder Singh and
                  Shaul Markovitch},
  title        = {Recovering True Classifier Performance in Positive-Unlabeled Learning},
  booktitle    = {Proceedings of the Thirty-First {AAAI} Conference on Artificial Intelligence,
                  February 4-9, 2017, San Francisco, California, {USA}},
  pages        = {2066--2072},
  publisher    = {{AAAI} Press},
  year         = {2017},
  url          = {https://doi.org/10.1609/aaai.v31i1.10937},
  doi          = {10.1609/AAAI.V31I1.10937},
  timestamp    = {Mon, 04 Sep 2023 14:40:32 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/JainWR17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/aaai/PanWW17,
  author       = {Yangchen Pan and
                  Adam White and
                  Martha White},
  editor       = {Satinder Singh and
                  Shaul Markovitch},
  title        = {Accelerated Gradient Temporal Difference Learning},
  booktitle    = {Proceedings of the Thirty-First {AAAI} Conference on Artificial Intelligence,
                  February 4-9, 2017, San Francisco, California, {USA}},
  pages        = {2464--2470},
  publisher    = {{AAAI} Press},
  year         = {2017},
  url          = {https://doi.org/10.1609/aaai.v31i1.10829},
  doi          = {10.1609/AAAI.V31I1.10829},
  timestamp    = {Mon, 04 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/PanWW17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icml/SchlegelPCW17,
  author       = {Matthew Schlegel and
                  Yangchen Pan and
                  Jiecao Chen and
                  Martha White},
  editor       = {Doina Precup and
                  Yee Whye Teh},
  title        = {Adapting Kernel Representations Online Using Submodular Maximization},
  booktitle    = {Proceedings of the 34th International Conference on Machine Learning,
                  {ICML} 2017, Sydney, NSW, Australia, 6-11 August 2017},
  series       = {Proceedings of Machine Learning Research},
  volume       = {70},
  pages        = {3037--3046},
  publisher    = {{PMLR}},
  year         = {2017},
  url          = {http://proceedings.mlr.press/v70/schlegel17a.html},
  timestamp    = {Wed, 29 May 2019 08:41:45 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/SchlegelPCW17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icml/White17,
  author       = {Martha White},
  editor       = {Doina Precup and
                  Yee Whye Teh},
  title        = {Unifying Task Specification in Reinforcement Learning},
  booktitle    = {Proceedings of the 34th International Conference on Machine Learning,
                  {ICML} 2017, Sydney, NSW, Australia, 6-11 August 2017},
  series       = {Proceedings of Machine Learning Research},
  volume       = {70},
  pages        = {3742--3750},
  publisher    = {{PMLR}},
  year         = {2017},
  url          = {http://proceedings.mlr.press/v70/white17a.html},
  timestamp    = {Wed, 03 Apr 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/White17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/ijcai/LeKW17,
  author       = {Lei Le and
                  Raksha Kumaraswamy and
                  Martha White},
  editor       = {Carles Sierra},
  title        = {Learning Sparse Representations in Reinforcement Learning with Sparse
                  Coding},
  booktitle    = {Proceedings of the Twenty-Sixth International Joint Conference on
                  Artificial Intelligence, {IJCAI} 2017, Melbourne, Australia, August
                  19-25, 2017},
  pages        = {2067--2073},
  publisher    = {ijcai.org},
  year         = {2017},
  url          = {https://doi.org/10.24963/ijcai.2017/287},
  doi          = {10.24963/IJCAI.2017/287},
  timestamp    = {Tue, 20 Aug 2019 16:16:54 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/LeKW17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/nips/KaramiWSS17,
  author       = {Mahdi Karami and
                  Martha White and
                  Dale Schuurmans and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Isabelle Guyon and
                  Ulrike von Luxburg and
                  Samy Bengio and
                  Hanna M. Wallach and
                  Rob Fergus and
                  S. V. N. Vishwanathan and
                  Roman Garnett},
  title        = {Multi-view Matrix Factorization for Linear Dynamical System Estimation},
  booktitle    = {Advances in Neural Information Processing Systems 30: Annual Conference
                  on Neural Information Processing Systems 2017, December 4-9, 2017,
                  Long Beach, CA, {USA}},
  pages        = {7092--7101},
  year         = {2017},
  url          = {https://proceedings.neurips.cc/paper/2017/hash/c2964caac096f26db222cb325aa267cb-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 13:58:27 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/KaramiWSS17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/uai/PanAW17,
  author       = {Yangchen Pan and
                  Erfan Sadeqi Azer and
                  Martha White},
  editor       = {Gal Elidan and
                  Kristian Kersting and
                  Alexander Ihler},
  title        = {Effective sketching methods for value function approximation},
  booktitle    = {Proceedings of the Thirty-Third Conference on Uncertainty in Artificial
                  Intelligence, {UAI} 2017, Sydney, Australia, August 11-15, 2017},
  publisher    = {{AUAI} Press},
  year         = {2017},
  url          = {http://auai.org/uai2017/proceedings/papers/81.pdf},
  timestamp    = {Mon, 05 Dec 2022 15:54:38 +0100},
  biburl       = {https://dblp.org/rec/conf/uai/PanAW17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/JainWR17,
  author       = {Shantanu Jain and
                  Martha White and
                  Predrag Radivojac},
  title        = {Recovering True Classifier Performance in Positive-Unlabeled Learning},
  journal      = {CoRR},
  volume       = {abs/1702.00518},
  year         = {2017},
  url          = {http://arxiv.org/abs/1702.00518},
  eprinttype    = {arXiv},
  eprint       = {1702.00518},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/JainWR17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/LeKW17,
  author       = {Lei Le and
                  Raksha Kumaraswamy and
                  Martha White},
  title        = {Learning Sparse Representations in Reinforcement Learning with Sparse
                  Coding},
  journal      = {CoRR},
  volume       = {abs/1707.08316},
  year         = {2017},
  url          = {http://arxiv.org/abs/1707.08316},
  eprinttype    = {arXiv},
  eprint       = {1707.08316},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/LeKW17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1708-01298,
  author       = {Yangchen Pan and
                  Erfan Sadeqi Azer and
                  Martha White},
  title        = {Effective sketching methods for value function approximation},
  journal      = {CoRR},
  volume       = {abs/1708.01298},
  year         = {2017},
  url          = {http://arxiv.org/abs/1708.01298},
  eprinttype    = {arXiv},
  eprint       = {1708.01298},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1708-01298.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/jmlr/SuttonMW16,
  author       = {Richard S. Sutton and
                  Ashique Rupam Mahmood and
                  Martha White},
  title        = {An Emphatic Approach to the Problem of Off-policy Temporal-Difference
                  Learning},
  journal      = {J. Mach. Learn. Res.},
  volume       = {17},
  pages        = {73:1--73:29},
  year         = {2016},
  url          = {http://jmlr.org/papers/v17/14-488.html},
  timestamp    = {Wed, 10 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/SuttonMW16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/atal/AdamW16,
  author       = {Adam White and
                  Martha White},
  editor       = {Catholijn M. Jonker and
                  Stacy Marsella and
                  John Thangarajah and
                  Karl Tuyls},
  title        = {Investigating Practical Linear Temporal Difference Learning},
  booktitle    = {Proceedings of the 2016 International Conference on Autonomous Agents
                  {\&} Multiagent Systems, Singapore, May 9-13, 2016},
  pages        = {494--502},
  publisher    = {{ACM}},
  year         = {2016},
  url          = {http://dl.acm.org/citation.cfm?id=2936997},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/atal/AdamW16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/atal/WhiteW16,
  author       = {Martha White and
                  Adam White},
  editor       = {Catholijn M. Jonker and
                  Stacy Marsella and
                  John Thangarajah and
                  Karl Tuyls},
  title        = {A Greedy Approach to Adapting the Trace Parameter for Temporal Difference
                  Learning},
  booktitle    = {Proceedings of the 2016 International Conference on Autonomous Agents
                  {\&} Multiagent Systems, Singapore, May 9-13, 2016},
  pages        = {557--565},
  publisher    = {{ACM}},
  year         = {2016},
  url          = {http://dl.acm.org/citation.cfm?id=2937006},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/atal/WhiteW16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/ijcai/GehringPW16,
  author       = {Clement Gehring and
                  Yangchen Pan and
                  Martha White},
  editor       = {Subbarao Kambhampati},
  title        = {Incremental Truncated {LSTD}},
  booktitle    = {Proceedings of the Twenty-Fifth International Joint Conference on
                  Artificial Intelligence, {IJCAI} 2016, New York, NY, USA, 9-15 July
                  2016},
  pages        = {1505--1511},
  publisher    = {{IJCAI/AAAI} Press},
  year         = {2016},
  url          = {http://www.ijcai.org/Abstract/16/216},
  timestamp    = {Tue, 20 Aug 2019 16:19:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/GehringPW16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/nips/JainWR16,
  author       = {Shantanu Jain and
                  Martha White and
                  Predrag Radivojac},
  editor       = {Daniel D. Lee and
                  Masashi Sugiyama and
                  Ulrike von Luxburg and
                  Isabelle Guyon and
                  Roman Garnett},
  title        = {Estimating the class prior and posterior from noisy positives and
                  unlabeled data},
  booktitle    = {Advances in Neural Information Processing Systems 29: Annual Conference
                  on Neural Information Processing Systems 2016, December 5-10, 2016,
                  Barcelona, Spain},
  pages        = {2685--2693},
  year         = {2016},
  url          = {https://proceedings.neurips.cc/paper/2016/hash/79a49b3e3762632813f9e35f4ba53d6c-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/JainWR16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/JainWTR16,
  author       = {Shantanu Jain and
                  Martha White and
                  Michael W. Trosset and
                  Predrag Radivojac},
  title        = {Nonparametric semi-supervised learning of class proportions},
  journal      = {CoRR},
  volume       = {abs/1601.01944},
  year         = {2016},
  url          = {http://arxiv.org/abs/1601.01944},
  eprinttype    = {arXiv},
  eprint       = {1601.01944},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/JainWTR16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/WhiteW16,
  author       = {Adam White and
                  Martha White},
  title        = {Investigating practical, linear temporal difference learning},
  journal      = {CoRR},
  volume       = {abs/1602.08771},
  year         = {2016},
  url          = {http://arxiv.org/abs/1602.08771},
  eprinttype    = {arXiv},
  eprint       = {1602.08771},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/WhiteW16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/LeW16,
  author       = {Lei Le and
                  Martha White},
  title        = {Global optimization of factor models using alternating minimization},
  journal      = {CoRR},
  volume       = {abs/1604.04942},
  year         = {2016},
  url          = {http://arxiv.org/abs/1604.04942},
  eprinttype    = {arXiv},
  eprint       = {1604.04942},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/LeW16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/JainWR16,
  author       = {Shantanu Jain and
                  Martha White and
                  Predrag Radivojac},
  title        = {Estimating the class prior and posterior from noisy positives and
                  unlabeled data},
  journal      = {CoRR},
  volume       = {abs/1606.08561},
  year         = {2016},
  url          = {http://arxiv.org/abs/1606.08561},
  eprinttype    = {arXiv},
  eprint       = {1606.08561},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/JainWR16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/WhiteW16a,
  author       = {Martha White and
                  Adam White},
  title        = {A Greedy Approach to Adapting the Trace Parameter for Temporal Difference
                  Learning},
  journal      = {CoRR},
  volume       = {abs/1607.00446},
  year         = {2016},
  url          = {http://arxiv.org/abs/1607.00446},
  eprinttype    = {arXiv},
  eprint       = {1607.00446},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/WhiteW16a.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/White16,
  author       = {Martha White},
  title        = {Unifying task specification in reinforcement learning},
  journal      = {CoRR},
  volume       = {abs/1609.01995},
  year         = {2016},
  url          = {http://arxiv.org/abs/1609.01995},
  eprinttype    = {arXiv},
  eprint       = {1609.01995},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/White16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/PanWW16,
  author       = {Yangchen Pan and
                  Adam White and
                  Martha White},
  title        = {Accelerated Gradient Temporal Difference Learning},
  journal      = {CoRR},
  volume       = {abs/1611.09328},
  year         = {2016},
  url          = {http://arxiv.org/abs/1611.09328},
  eprinttype    = {arXiv},
  eprint       = {1611.09328},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/PanWW16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/aaai/WhiteWBS15,
  author       = {Martha White and
                  Junfeng Wen and
                  Michael Bowling and
                  Dale Schuurmans},
  editor       = {Blai Bonet and
                  Sven Koenig},
  title        = {Optimal Estimation of Multivariate {ARMA} Models},
  booktitle    = {Proceedings of the Twenty-Ninth {AAAI} Conference on Artificial Intelligence,
                  January 25-30, 2015, Austin, Texas, {USA}},
  pages        = {3080--3086},
  publisher    = {{AAAI} Press},
  year         = {2015},
  url          = {https://doi.org/10.1609/aaai.v29i1.9614},
  doi          = {10.1609/AAAI.V29I1.9614},
  timestamp    = {Mon, 18 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/WhiteWBS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/pkdd/MirzazadehWGS15,
  author       = {Farzaneh Mirzazadeh and
                  Martha White and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Dale Schuurmans},
  editor       = {Annalisa Appice and
                  Pedro Pereira Rodrigues and
                  V{\'{\i}}tor Santos Costa and
                  Carlos Soares and
                  Jo{\~{a}}o Gama and
                  Al{\'{\i}}pio Jorge},
  title        = {Scalable Metric Learning for Co-Embedding},
  booktitle    = {Machine Learning and Knowledge Discovery in Databases - European Conference,
                  {ECML} {PKDD} 2015, Porto, Portugal, September 7-11, 2015, Proceedings,
                  Part {I}},
  series       = {Lecture Notes in Computer Science},
  volume       = {9284},
  pages        = {625--642},
  publisher    = {Springer},
  year         = {2015},
  url          = {https://doi.org/10.1007/978-3-319-23528-8\_39},
  doi          = {10.1007/978-3-319-23528-8\_39},
  timestamp    = {Mon, 30 Nov 2020 08:47:26 +0100},
  biburl       = {https://dblp.org/rec/conf/pkdd/MirzazadehWGS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/SuttonMW15,
  author       = {Richard S. Sutton and
                  Ashique Rupam Mahmood and
                  Martha White},
  title        = {An Emphatic Approach to the Problem of Off-policy Temporal-Difference
                  Learning},
  journal      = {CoRR},
  volume       = {abs/1503.04269},
  year         = {2015},
  url          = {http://arxiv.org/abs/1503.04269},
  eprinttype    = {arXiv},
  eprint       = {1503.04269},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/SuttonMW15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/MahmoodYWS15,
  author       = {Ashique Rupam Mahmood and
                  Huizhen Yu and
                  Martha White and
                  Richard S. Sutton},
  title        = {Emphatic Temporal-Difference Learning},
  journal      = {CoRR},
  volume       = {abs/1507.01569},
  year         = {2015},
  url          = {http://arxiv.org/abs/1507.01569},
  eprinttype    = {arXiv},
  eprint       = {1507.01569},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/MahmoodYWS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/GehringW15,
  author       = {Clement Gehring and
                  Martha White},
  title        = {Incremental Truncated {LSTD}},
  journal      = {CoRR},
  volume       = {abs/1511.08495},
  year         = {2015},
  url          = {http://arxiv.org/abs/1511.08495},
  eprinttype    = {arXiv},
  eprint       = {1511.08495},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/GehringW15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/dcc/VenessWBG13,
  author       = {Joel Veness and
                  Martha White and
                  Michael Bowling and
                  Andr{\'{a}}s Gy{\"{o}}rgy},
  editor       = {Ali Bilgin and
                  Michael W. Marcellin and
                  Joan Serra{-}Sagrist{\`{a}} and
                  James A. Storer},
  title        = {Partition Tree Weighting},
  booktitle    = {2013 Data Compression Conference, {DCC} 2013, Snowbird, UT, USA, March
                  20-22, 2013},
  pages        = {321--330},
  publisher    = {{IEEE}},
  year         = {2013},
  url          = {https://doi.org/10.1109/DCC.2013.40},
  doi          = {10.1109/DCC.2013.40},
  timestamp    = {Wed, 20 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/dcc/VenessWBG13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icml/DegrisWS12,
  author       = {Thomas Degris and
                  Martha White and
                  Richard S. Sutton},
  title        = {Linear Off-Policy Actor-Critic},
  booktitle    = {Proceedings of the 29th International Conference on Machine Learning,
                  {ICML} 2012, Edinburgh, Scotland, UK, June 26 - July 1, 2012},
  publisher    = {icml.cc / Omnipress},
  year         = {2012},
  url          = {http://icml.cc/2012/papers/268.pdf},
  timestamp    = {Wed, 03 Apr 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/DegrisWS12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/nips/WhiteYZS12,
  author       = {Martha White and
                  Yaoliang Yu and
                  Xinhua Zhang and
                  Dale Schuurmans},
  editor       = {Peter L. Bartlett and
                  Fernando C. N. Pereira and
                  Christopher J. C. Burges and
                  L{\'{e}}on Bottou and
                  Kilian Q. Weinberger},
  title        = {Convex Multi-view Subspace Learning},
  booktitle    = {Advances in Neural Information Processing Systems 25: 26th Annual
                  Conference on Neural Information Processing Systems 2012. Proceedings
                  of a meeting held December 3-6, 2012, Lake Tahoe, Nevada, United States},
  pages        = {1682--1690},
  year         = {2012},
  url          = {https://proceedings.neurips.cc/paper/2012/hash/1141938ba2c2b13f5505d7c424ebae5f-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/WhiteYZS12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:journals/jmlr/WhiteS12,
  author       = {Martha White and
                  Dale Schuurmans},
  editor       = {Neil D. Lawrence and
                  Mark A. Girolami},
  title        = {Generalized Optimal Reverse Prediction},
  booktitle    = {Proceedings of the Fifteenth International Conference on Artificial
                  Intelligence and Statistics, {AISTATS} 2012, La Palma, Canary Islands,
                  Spain, April 21-23, 2012},
  series       = {{JMLR} Proceedings},
  volume       = {22},
  pages        = {1305--1313},
  publisher    = {JMLR.org},
  year         = {2012},
  url          = {http://proceedings.mlr.press/v22/white12.html},
  timestamp    = {Wed, 29 May 2019 08:41:44 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/WhiteS12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1205-4839,
  author       = {Thomas Degris and
                  Martha White and
                  Richard S. Sutton},
  title        = {Off-Policy Actor-Critic},
  journal      = {CoRR},
  volume       = {abs/1205.4839},
  year         = {2012},
  url          = {http://arxiv.org/abs/1205.4839},
  eprinttype    = {arXiv},
  eprint       = {1205.4839},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1205-4839.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1211-0587,
  author       = {Joel Veness and
                  Martha White and
                  Michael Bowling and
                  Andr{\'{a}}s Gy{\"{o}}rgy},
  title        = {Partition Tree Weighting},
  journal      = {CoRR},
  volume       = {abs/1211.0587},
  year         = {2012},
  url          = {http://arxiv.org/abs/1211.0587},
  eprinttype    = {arXiv},
  eprint       = {1211.0587},
  timestamp    = {Wed, 20 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1211-0587.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/aaai/ZhangYWHS11,
  author       = {Xinhua Zhang and
                  Yaoliang Yu and
                  Martha White and
                  Ruitong Huang and
                  Dale Schuurmans},
  editor       = {Wolfram Burgard and
                  Dan Roth},
  title        = {Convex Sparse Coding, Subspace Learning, and Semi-Supervised Extensions},
  booktitle    = {Proceedings of the Twenty-Fifth {AAAI} Conference on Artificial Intelligence,
                  {AAAI} 2011, San Francisco, California, USA, August 7-11, 2011},
  pages        = {567--573},
  publisher    = {{AAAI} Press},
  year         = {2011},
  url          = {https://doi.org/10.1609/aaai.v25i1.7935},
  doi          = {10.1609/AAAI.V25I1.7935},
  timestamp    = {Mon, 04 Sep 2023 16:05:54 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/ZhangYWHS11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/nips/WhiteW10,
  author       = {Martha White and
                  Adam White},
  editor       = {John D. Lafferty and
                  Christopher K. I. Williams and
                  John Shawe{-}Taylor and
                  Richard S. Zemel and
                  Aron Culotta},
  title        = {Interval Estimation for Reinforcement-Learning Algorithms in Continuous-State
                  Domains},
  booktitle    = {Advances in Neural Information Processing Systems 23: 24th Annual
                  Conference on Neural Information Processing Systems 2010. Proceedings
                  of a meeting held 6-9 December 2010, Vancouver, British Columbia,
                  Canada},
  pages        = {2433--2441},
  publisher    = {Curran Associates, Inc.},
  year         = {2010},
  url          = {https://proceedings.neurips.cc/paper/2010/hash/13f3cf8c531952d72e5847c4183e6910-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/WhiteW10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/nips/YuYXWS10,
  author       = {Yaoliang Yu and
                  Min Yang and
                  Linli Xu and
                  Martha White and
                  Dale Schuurmans},
  editor       = {John D. Lafferty and
                  Christopher K. I. Williams and
                  John Shawe{-}Taylor and
                  Richard S. Zemel and
                  Aron Culotta},
  title        = {Relaxed Clipping: {A} Global Training Method for Robust Regression
                  and Classification},
  booktitle    = {Advances in Neural Information Processing Systems 23: 24th Annual
                  Conference on Neural Information Processing Systems 2010. Proceedings
                  of a meeting held 6-9 December 2010, Vancouver, British Columbia,
                  Canada},
  pages        = {2532--2540},
  publisher    = {Curran Associates, Inc.},
  year         = {2010},
  url          = {https://proceedings.neurips.cc/paper/2010/hash/01882513d5fa7c329e940dda99b12147-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/YuYXWS10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icml/XuWS09,
  author       = {Linli Xu and
                  Martha White and
                  Dale Schuurmans},
  editor       = {Andrea Pohoreckyj Danyluk and
                  L{\'{e}}on Bottou and
                  Michael L. Littman},
  title        = {Optimal reverse prediction: a unified perspective on supervised, unsupervised
                  and semi-supervised learning},
  booktitle    = {Proceedings of the 26th Annual International Conference on Machine
                  Learning, {ICML} 2009, Montreal, Quebec, Canada, June 14-18, 2009},
  series       = {{ACM} International Conference Proceeding Series},
  volume       = {382},
  pages        = {1137--1144},
  publisher    = {{ACM}},
  year         = {2009},
  url          = {https://doi.org/10.1145/1553374.1553519},
  doi          = {10.1145/1553374.1553519},
  timestamp    = {Tue, 06 Nov 2018 16:58:29 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/XuWS09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/ijcai/WhiteB09,
  author       = {Martha White and
                  Michael H. Bowling},
  editor       = {Craig Boutilier},
  title        = {Learning a Value Analysis Tool for Agent Evaluation},
  booktitle    = {{IJCAI} 2009, Proceedings of the 21st International Joint Conference
                  on Artificial Intelligence, Pasadena, California, USA, July 11-17,
                  2009},
  pages        = {1976--1981},
  year         = {2009},
  url          = {http://ijcai.org/Proceedings/09/Papers/326.pdf},
  timestamp    = {Tue, 20 Aug 2019 16:16:40 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/WhiteB09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

a service of

manage site settings

To protect your privacy, all features that rely on external API calls from your browser are turned off by default. You need to opt-in for them to become active. All settings here will be stored as cookies with your web browser. For more information see our F.A.Q.