Stop the war!
Остановите войну!
for scientists:
default search action
BibTeX records: Martha White
@inproceedings{DBLP:conf/aaai/LiuWW24, author = {Vincent Liu and James R. Wright and Martha White}, editor = {Michael J. Wooldridge and Jennifer G. Dy and Sriraam Natarajan}, title = {Exploiting Action Impact Regularity and Exogenous State Variables for Offline Reinforcement Learning (Abstract Reprint)}, booktitle = {Thirty-Eighth {AAAI} Conference on Artificial Intelligence, {AAAI} 2024, Thirty-Sixth Conference on Innovative Applications of Artificial Intelligence, {IAAI} 2024, Fourteenth Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2014, February 20-27, 2024, Vancouver, Canada}, pages = {22706}, publisher = {{AAAI} Press}, year = {2024}, url = {https://doi.org/10.1609/aaai.v38i20.30606}, doi = {10.1609/AAAI.V38I20.30606}, timestamp = {Tue, 02 Apr 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/LiuWW24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2402-03903, author = {Brett Daley and Martha White and Marlos C. Machado}, title = {Compound Returns Reduce Variance in Reinforcement Learning}, journal = {CoRR}, volume = {abs/2402.03903}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2402.03903}, doi = {10.48550/ARXIV.2402.03903}, eprinttype = {arXiv}, eprint = {2402.03903}, timestamp = {Mon, 12 Feb 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2402-03903.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2402-10339, author = {Hugo Silva and Martha White}, title = {What to Do When Your Discrete Optimization Is the Size of a Neural Network?}, journal = {CoRR}, volume = {abs/2402.10339}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2402.10339}, doi = {10.48550/ARXIV.2402.10339}, eprinttype = {arXiv}, eprint = {2402.10339}, timestamp = {Mon, 26 Feb 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2402-10339.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2402-13425, author = {Ehsan Imani and Kai Luedemann and Sam Scholnick{-}Hughes and Esraa Elelimy and Martha White}, title = {Investigating the Histogram Loss in Regression}, journal = {CoRR}, volume = {abs/2402.13425}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2402.13425}, doi = {10.48550/ARXIV.2402.13425}, eprinttype = {arXiv}, eprint = {2402.13425}, timestamp = {Thu, 21 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2402-13425.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jair/LiuWW23, author = {Vincent Liu and James R. Wright and Martha White}, title = {Exploiting Action Impact Regularity and Exogenous State Variables for Offline Reinforcement Learning}, journal = {J. Artif. Intell. Res.}, volume = {77}, pages = {71--101}, year = {2023}, url = {https://doi.org/10.1613/jair.1.14580}, doi = {10.1613/JAIR.1.14580}, timestamp = {Mon, 22 May 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jair/LiuWW23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jmlr/0002IKW23, author = {Eric Graves and Ehsan Imani and Raksha Kumaraswamy and Martha White}, title = {Off-Policy Actor-Critic with Emphatic Weightings}, journal = {J. Mach. Learn. Res.}, volume = {24}, pages = {146:1--146:63}, year = {2023}, url = {http://jmlr.org/papers/v24/21-1350.html}, timestamp = {Tue, 13 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jmlr/0002IKW23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jmlr/JavedSSW23, author = {Khurram Javed and Haseeb Shah and Richard S. Sutton and Martha White}, title = {Scalable Real-Time Recurrent Learning Using Columnar-Constructive Networks}, journal = {J. Mach. Learn. Res.}, volume = {24}, pages = {256:1--256:34}, year = {2023}, url = {http://jmlr.org/papers/v24/23-0367.html}, timestamp = {Thu, 19 Oct 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jmlr/JavedSSW23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/pami/PattersonLW23, author = {Andrew Patterson and Victor Liao and Martha White}, title = {Robust Losses for Learning Value Functions}, journal = {{IEEE} Trans. Pattern Anal. Mach. Intell.}, volume = {45}, number = {5}, pages = {6157--6167}, year = {2023}, url = {https://doi.org/10.1109/TPAMI.2022.3213503}, doi = {10.1109/TPAMI.2022.3213503}, timestamp = {Thu, 27 Jul 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/pami/PattersonLW23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/tmlr/SchlegelTWW23, author = {Matthew Schlegel and Volodymyr Tkachuk and Adam M. White and Martha White}, title = {Investigating Action Encodings in Recurrent Neural Networks in Reinforcement Learning}, journal = {Trans. Mach. Learn. Res.}, volume = {2023}, year = {2023}, url = {https://openreview.net/forum?id=K6g4MbAC1r}, timestamp = {Thu, 18 May 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/tmlr/SchlegelTWW23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aistats/LiuCTW23, author = {Vincent Liu and Yash Chandak and Philip S. Thomas and Martha White}, editor = {Francisco J. R. Ruiz and Jennifer G. Dy and Jan{-}Willem van de Meent}, title = {Asymptotically Unbiased Off-Policy Policy Evaluation when Reusing Old Data in Nonstationary Environments}, booktitle = {International Conference on Artificial Intelligence and Statistics, 25-27 April 2023, Palau de Congressos, Valencia, Spain}, series = {Proceedings of Machine Learning Research}, volume = {206}, pages = {5474--5492}, publisher = {{PMLR}}, year = {2023}, url = {https://proceedings.mlr.press/v206/liu23d.html}, timestamp = {Mon, 19 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aistats/LiuCTW23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/collas/LiuWTJ0W23, author = {Vincent Liu and Han Wang and Ruo Yu Tao and Khurram Javed and Adam White and Martha White}, editor = {Sarath Chandar and Razvan Pascanu and Hanie Sedghi and Doina Precup}, title = {Measuring and Mitigating Interference in Reinforcement Learning}, booktitle = {Conference on Lifelong Learning Agents, 22-25 August 2023, McGill University, Montr{\'{e}}al, Qu{\'{e}}bec, Canada}, series = {Proceedings of Machine Learning Research}, volume = {232}, pages = {781--795}, publisher = {{PMLR}}, year = {2023}, url = {https://proceedings.mlr.press/v232/liu23a.html}, timestamp = {Tue, 20 Feb 2024 13:52:18 +0100}, biburl = {https://dblp.org/rec/conf/collas/LiuWTJ0W23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/NeumannLJP0W23, author = {Samuel Neumann and Sungsu Lim and Ajin George Joseph and Yangchen Pan and Adam White and Martha White}, title = {Greedy Actor-Critic: {A} New Conditional Cross-Entropy Method for Policy Improvement}, booktitle = {The Eleventh International Conference on Learning Representations, {ICLR} 2023, Kigali, Rwanda, May 1-5, 2023}, publisher = {OpenReview.net}, year = {2023}, url = {https://openreview.net/pdf?id=eSQh8rG8Oa}, timestamp = {Fri, 30 Jun 2023 14:38:38 +0200}, biburl = {https://dblp.org/rec/conf/iclr/NeumannLJP0W23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/XiaoWP0W23, author = {Chenjun Xiao and Han Wang and Yangchen Pan and Adam White and Martha White}, title = {The In-Sample Softmax for Offline Reinforcement Learning}, booktitle = {The Eleventh International Conference on Learning Representations, {ICLR} 2023, Kigali, Rwanda, May 1-5, 2023}, publisher = {OpenReview.net}, year = {2023}, url = {https://openreview.net/pdf?id=u-RuvyDYqCM}, timestamp = {Fri, 30 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/XiaoWP0W23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/DaleyWAM23, author = {Brett Daley and Martha White and Christopher Amato and Marlos C. Machado}, editor = {Andreas Krause and Emma Brunskill and Kyunghyun Cho and Barbara Engelhardt and Sivan Sabato and Jonathan Scarlett}, title = {Trajectory-Aware Eligibility Traces for Off-Policy Reinforcement Learning}, booktitle = {International Conference on Machine Learning, {ICML} 2023, 23-29 July 2023, Honolulu, Hawaii, {USA}}, series = {Proceedings of Machine Learning Research}, volume = {202}, pages = {6818--6835}, publisher = {{PMLR}}, year = {2023}, url = {https://proceedings.mlr.press/v202/daley23a.html}, timestamp = {Mon, 28 Aug 2023 17:23:08 +0200}, biburl = {https://dblp.org/rec/conf/icml/DaleyWAM23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/ZhuCSW23, author = {Lingwei Zhu and Zheng Chen and Matthew Schlegel and Martha White}, editor = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine}, title = {General Munchausen Reinforcement Learning with Tsallis Kullback-Leibler Divergence}, booktitle = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023}, year = {2023}, url = {http://papers.nips.cc/paper\_files/paper/2023/hash/b3e866c228f8f4ea18021ae63aea5453-Abstract-Conference.html}, timestamp = {Fri, 01 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/ZhuCSW23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2301-11321, author = {Brett Daley and Martha White and Christopher Amato and Marlos C. Machado}, title = {Trajectory-Aware Eligibility Traces for Off-Policy Reinforcement Learning}, journal = {CoRR}, volume = {abs/2301.11321}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2301.11321}, doi = {10.48550/ARXIV.2301.11321}, eprinttype = {arXiv}, eprint = {2301.11321}, timestamp = {Tue, 31 Jan 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2301-11321.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2301-11476, author = {Lingwei Zhu and Zheng Chen and Takamitsu Matsubara and Martha White}, title = {Generalized Munchausen Reinforcement Learning using Tsallis {KL} Divergence}, journal = {CoRR}, volume = {abs/2301.11476}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2301.11476}, doi = {10.48550/ARXIV.2301.11476}, eprinttype = {arXiv}, eprint = {2301.11476}, timestamp = {Tue, 31 Jan 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2301-11476.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2302-05326, author = {Khurram Javed and Haseeb Shah and Richard S. Sutton and Martha White}, title = {Online Real-Time Recurrent Learning Using Sparse Connections and Selective Learning}, journal = {CoRR}, volume = {abs/2302.05326}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2302.05326}, doi = {10.48550/ARXIV.2302.05326}, eprinttype = {arXiv}, eprint = {2302.05326}, timestamp = {Mon, 13 Feb 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2302-05326.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2302-11725, author = {Vincent Liu and Yash Chandak and Philip S. Thomas and Martha White}, title = {Asymptotically Unbiased Off-Policy Policy Evaluation when Reusing Old Data in Nonstationary Environments}, journal = {CoRR}, volume = {abs/2302.11725}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2302.11725}, doi = {10.48550/ARXIV.2302.11725}, eprinttype = {arXiv}, eprint = {2302.11725}, timestamp = {Tue, 28 Feb 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2302-11725.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2302-14372, author = {Chenjun Xiao and Han Wang and Yangchen Pan and Adam White and Martha White}, title = {The In-Sample Softmax for Offline Reinforcement Learning}, journal = {CoRR}, volume = {abs/2302.14372}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2302.14372}, doi = {10.48550/ARXIV.2302.14372}, eprinttype = {arXiv}, eprint = {2302.14372}, timestamp = {Thu, 02 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2302-14372.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2304-01315, author = {Andrew Patterson and Samuel Neumann and Martha White and Adam White}, title = {Empirical Design in Reinforcement Learning}, journal = {CoRR}, volume = {abs/2304.01315}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2304.01315}, doi = {10.48550/ARXIV.2304.01315}, eprinttype = {arXiv}, eprint = {2304.01315}, timestamp = {Tue, 18 Apr 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2304-01315.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2305-09838, author = {James E. Kostas and Scott M. Jordan and Yash Chandak and Georgios Theocharous and Dhawal Gupta and Martha White and Bruno Castro da Silva and Philip S. Thomas}, title = {Coagent Networks: Generalized and Scaled}, journal = {CoRR}, volume = {abs/2305.09838}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2305.09838}, doi = {10.48550/ARXIV.2305.09838}, eprinttype = {arXiv}, eprint = {2305.09838}, timestamp = {Wed, 24 May 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2305-09838.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2307-04887, author = {Vincent Liu and Han Wang and Ruo Yu Tao and Khurram Javed and Adam White and Martha White}, title = {Measuring and Mitigating Interference in Reinforcement Learning}, journal = {CoRR}, volume = {abs/2307.04887}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2307.04887}, doi = {10.48550/ARXIV.2307.04887}, eprinttype = {arXiv}, eprint = {2307.04887}, timestamp = {Mon, 24 Jul 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2307-04887.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2312-01624, author = {Muhammad Kamran Janjua and Haseeb Shah and Martha White and Erfan Miahi and Marlos C. Machado and Adam White}, title = {GVFs in the Real World: Making Predictions Online for Water Treatment}, journal = {CoRR}, volume = {abs/2312.01624}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2312.01624}, doi = {10.48550/ARXIV.2312.01624}, eprinttype = {arXiv}, eprint = {2312.01624}, timestamp = {Wed, 13 Dec 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2312-01624.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2312-02355, author = {Vincent Liu and Prabhat Nagarajan and Andrew Patterson and Martha White}, title = {When is Offline Policy Selection Sample Efficient for Reinforcement Learning?}, journal = {CoRR}, volume = {abs/2312.02355}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2312.02355}, doi = {10.48550/ARXIV.2312.02355}, eprinttype = {arXiv}, eprint = {2312.02355}, timestamp = {Wed, 13 Dec 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2312-02355.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jmlr/Patterson0W22, author = {Andrew Patterson and Adam White and Martha White}, title = {A Generalized Projected Bellman Error for Off-policy Value Estimation in Reinforcement Learning}, journal = {J. Mach. Learn. Res.}, volume = {23}, pages = {145:1--145:61}, year = {2022}, url = {http://jmlr.org/papers/v23/21-037.html}, timestamp = {Wed, 07 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jmlr/Patterson0W22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jmlr/0001SLKMW22, author = {Alan Chan and Hugo Silva and Sungsu Lim and Tadashi Kozuno and A. Rupam Mahmood and Martha White}, title = {Greedification Operators for Policy Optimization: Investigating Forward and Reverse {KL} Divergences}, journal = {J. Mach. Learn. Res.}, volume = {23}, pages = {253:1--253:79}, year = {2022}, url = {http://jmlr.org/papers/v23/21-054.html}, timestamp = {Wed, 07 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jmlr/0001SLKMW22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/tmlr/ImaniHW22, author = {Ehsan Imani and Wei Hu and Martha White}, title = {Representation Alignment in Neural Networks}, journal = {Trans. Mach. Learn. Res.}, volume = {2022}, year = {2022}, url = {https://openreview.net/forum?id=fLIWMnZ9ij}, timestamp = {Fri, 19 May 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/tmlr/ImaniHW22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/tmlr/WangSWBLZLKFW22, author = {Han Wang and Archit Sakhadeo and Adam M. White and James Bell and Vincent Liu and Xutong Zhao and Puer Liu and Tadashi Kozuno and Alona Fyshe and Martha White}, title = {No More Pesky Hyperparameters: Offline Hyperparameter Tuning for {RL}}, journal = {Trans. Mach. Learn. Res.}, volume = {2022}, year = {2022}, url = {https://openreview.net/forum?id=AiOUi3440V}, timestamp = {Fri, 19 May 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/tmlr/WangSWBLZLKFW22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aistats/0006TPWM22, author = {Shivam Garg and Samuele Tosatto and Yangchen Pan and Martha White and Rupam Mahmood}, editor = {Gustau Camps{-}Valls and Francisco J. R. Ruiz and Isabel Valera}, title = {An Alternate Policy Gradient Estimator for Softmax Policies}, booktitle = {International Conference on Artificial Intelligence and Statistics, {AISTATS} 2022, 28-30 March 2022, Virtual Event}, series = {Proceedings of Machine Learning Research}, volume = {151}, pages = {6630--6689}, publisher = {{PMLR}}, year = {2022}, url = {https://proceedings.mlr.press/v151/garg22b.html}, timestamp = {Sat, 30 Sep 2023 09:34:08 +0200}, biburl = {https://dblp.org/rec/conf/aistats/0006TPWM22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/BanmanP0FW22, author = {Kirby Banman and Liam Peet{-}Pare and Nidhi Hegde and Alona Fyshe and Martha White}, title = {Resonance in Weight Space: Covariate Shift Can Drive Divergence of {SGD} with Momentum}, booktitle = {The Tenth International Conference on Learning Representations, {ICLR} 2022, Virtual Event, April 25-29, 2022}, publisher = {OpenReview.net}, year = {2022}, url = {https://openreview.net/forum?id=5ECQL05ub0J}, timestamp = {Sat, 20 Aug 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/BanmanP0FW22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/TosattoPWM22, author = {Samuele Tosatto and Andrew Patterson and Martha White and Rupam Mahmood}, editor = {Kamalika Chaudhuri and Stefanie Jegelka and Le Song and Csaba Szepesv{\'{a}}ri and Gang Niu and Sivan Sabato}, title = {A Temporal-Difference Approach to Policy Gradient Estimation}, booktitle = {International Conference on Machine Learning, {ICML} 2022, 17-23 July 2022, Baltimore, Maryland, {USA}}, series = {Proceedings of Machine Learning Research}, volume = {162}, pages = {21609--21632}, publisher = {{PMLR}}, year = {2022}, url = {https://proceedings.mlr.press/v162/tosatto22a.html}, timestamp = {Tue, 12 Jul 2022 17:36:52 +0200}, biburl = {https://dblp.org/rec/conf/icml/TosattoPWM22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/uai/PanMFWYR022, author = {Yangchen Pan and Jincheng Mei and Amir{-}massoud Farahmand and Martha White and Hengshuai Yao and Mohsen Rohani and Jun Luo}, editor = {James Cussens and Kun Zhang}, title = {Understanding and mitigating the limitations of prioritized experience replay}, booktitle = {Uncertainty in Artificial Intelligence, Proceedings of the Thirty-Eighth Conference on Uncertainty in Artificial Intelligence, {UAI} 2022, 1-5 August 2022, Eindhoven, The Netherlands}, series = {Proceedings of Machine Learning Research}, volume = {180}, pages = {1561--1571}, publisher = {{PMLR}}, year = {2022}, url = {https://proceedings.mlr.press/v180/pan22a.html}, timestamp = {Sat, 15 Oct 2022 12:08:13 +0200}, biburl = {https://dblp.org/rec/conf/uai/PanMFWYR022.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2202-02396, author = {Samuele Tosatto and Andrew Patterson and Martha White and A. Rupam Mahmood}, title = {A Temporal-Difference Approach to Policy Gradient Estimation}, journal = {CoRR}, volume = {abs/2202.02396}, year = {2022}, url = {https://arxiv.org/abs/2202.02396}, eprinttype = {arXiv}, eprint = {2202.02396}, timestamp = {Wed, 09 Feb 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2202-02396.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2202-11133, author = {Matthew McLeod and Chunlok Lo and Matthew Schlegel and Andrew Jacobsen and Raksha Kumaraswamy and Martha White and Adam White}, title = {Continual Auxiliary Task Learning}, journal = {CoRR}, volume = {abs/2202.11133}, year = {2022}, url = {https://arxiv.org/abs/2202.11133}, eprinttype = {arXiv}, eprint = {2202.11133}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2202-11133.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2203-11992, author = {Kirby Banman and Liam Peet{-}Pare and Nidhi Hegde and Alona Fyshe and Martha White}, title = {Resonance in Weight Space: Covariate Shift Can Drive Divergence of {SGD} with Momentum}, journal = {CoRR}, volume = {abs/2203.11992}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2203.11992}, doi = {10.48550/ARXIV.2203.11992}, eprinttype = {arXiv}, eprint = {2203.11992}, timestamp = {Tue, 29 Mar 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2203-11992.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2203-15955, author = {Han Wang and Erfan Miahi and Martha White and Marlos C. Machado and Zaheer Abbas and Raksha Kumaraswamy and Vincent Liu and Adam White}, title = {Investigating the Properties of Neural Network Representations in Reinforcement Learning}, journal = {CoRR}, volume = {abs/2203.15955}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2203.15955}, doi = {10.48550/ARXIV.2203.15955}, eprinttype = {arXiv}, eprint = {2203.15955}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2203-15955.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2205-08464, author = {Andrew Patterson and Victor Liao and Martha White}, title = {Robust Losses for Learning Value Functions}, journal = {CoRR}, volume = {abs/2205.08464}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2205.08464}, doi = {10.48550/ARXIV.2205.08464}, eprinttype = {arXiv}, eprint = {2205.08464}, timestamp = {Mon, 23 May 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2205-08464.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2205-08716, author = {Han Wang and Archit Sakhadeo and Adam White and James Bell and Vincent Liu and Xutong Zhao and Puer Liu and Tadashi Kozuno and Alona Fyshe and Martha White}, title = {No More Pesky Hyperparameters: Offline Hyperparameter Tuning for {RL}}, journal = {CoRR}, volume = {abs/2205.08716}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2205.08716}, doi = {10.48550/ARXIV.2205.08716}, eprinttype = {arXiv}, eprint = {2205.08716}, timestamp = {Wed, 07 Dec 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2205-08716.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2206-02902, author = {Chunlok Lo and Gabor Mihucz and Adam White and Farzane Aminmansour and Martha White}, title = {Goal-Space Planning with Subgoal Models}, journal = {CoRR}, volume = {abs/2206.02902}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2206.02902}, doi = {10.48550/ARXIV.2206.02902}, eprinttype = {arXiv}, eprint = {2206.02902}, timestamp = {Tue, 14 Jun 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2206-02902.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jair/SchlegelJAPWW21, author = {Matthew Schlegel and Andrew Jacobsen and Zaheer Abbas and Andrew Patterson and Adam White and Martha White}, title = {General Value Function Networks}, journal = {J. Artif. Intell. Res.}, volume = {70}, pages = {497--543}, year = {2021}, url = {https://doi.org/10.1613/jair.1.12105}, doi = {10.1613/JAIR.1.12105}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jair/SchlegelJAPWW21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/tase/HoferBHGMGAFGLL21, author = {Sebastian H{\"{o}}fer and Kostas E. Bekris and Ankur Handa and Juan Camilo Gamboa and Melissa Mozifian and Florian Golemo and Christopher G. Atkeson and Dieter Fox and Ken Goldberg and John Leonard and C. Karen Liu and Jan Peters and Shuran Song and Peter Welinder and Martha White}, title = {Sim2Real in Robotics and Automation: Applications and Challenges}, journal = {{IEEE} Trans Autom. Sci. Eng.}, volume = {18}, number = {2}, pages = {398--400}, year = {2021}, url = {https://doi.org/10.1109/TASE.2021.3064065}, doi = {10.1109/TASE.2021.3064065}, timestamp = {Sat, 30 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/tase/HoferBHGMGAFGLL21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/PanBW21, author = {Yangchen Pan and Kirby Banman and Martha White}, title = {Fuzzy Tiling Activations: {A} Simple Approach to Learning Sparse Representations Online}, booktitle = {9th International Conference on Learning Representations, {ICLR} 2021, Virtual Event, Austria, May 3-7, 2021}, publisher = {OpenReview.net}, year = {2021}, url = {https://openreview.net/forum?id=zElset1Klrp}, timestamp = {Wed, 23 Jun 2021 17:36:39 +0200}, biburl = {https://dblp.org/rec/conf/iclr/PanBW21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/McLeodLSJKWW21, author = {Matthew McLeod and Chunlok Lo and Matthew Schlegel and Andrew Jacobsen and Raksha Kumaraswamy and Martha White and Adam White}, editor = {Marc'Aurelio Ranzato and Alina Beygelzimer and Yann N. Dauphin and Percy Liang and Jennifer Wortman Vaughan}, title = {Continual Auxiliary Task Learning}, booktitle = {Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021, NeurIPS 2021, December 6-14, 2021, virtual}, pages = {12549--12562}, year = {2021}, url = {https://proceedings.neurips.cc/paper/2021/hash/68331ff0427b551b68e911eebe35233b-Abstract.html}, timestamp = {Tue, 03 May 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/McLeodLSJKWW21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/GuptaMSKTW21, author = {Dhawal Gupta and Gabor Mihucz and Matthew Schlegel and James E. Kostas and Philip S. Thomas and Martha White}, editor = {Marc'Aurelio Ranzato and Alina Beygelzimer and Yann N. Dauphin and Percy Liang and Jennifer Wortman Vaughan}, title = {Structural Credit Assignment in Neural Networks using Reinforcement Learning}, booktitle = {Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021, NeurIPS 2021, December 6-14, 2021, virtual}, pages = {30257--30270}, year = {2021}, url = {https://proceedings.neurips.cc/paper/2021/hash/fe1f9c70bdf347497e1a01b6c486bdb9-Abstract.html}, timestamp = {Tue, 03 May 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/GuptaMSKTW21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2103-05787, author = {Khurram Javed and Martha White and Richard S. Sutton}, title = {Scalable Online Recurrent Learning Using Columnar Neural Networks}, journal = {CoRR}, volume = {abs/2103.05787}, year = {2021}, url = {https://arxiv.org/abs/2103.05787}, eprinttype = {arXiv}, eprint = {2103.05787}, timestamp = {Tue, 16 Mar 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2103-05787.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2104-13844, author = {Andrew Patterson and Adam White and Sina Ghiassian and Martha White}, title = {A Generalized Projected Bellman Error for Off-policy Value Estimation in Reinforcement Learning}, journal = {CoRR}, volume = {abs/2104.13844}, year = {2021}, url = {https://arxiv.org/abs/2104.13844}, eprinttype = {arXiv}, eprint = {2104.13844}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2104-13844.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2105-14214, author = {Qingfeng Lan and Luke Kumar and Martha White and Alona Fyshe}, title = {Predictive Representation Learning for Language Modeling}, journal = {CoRR}, volume = {abs/2105.14214}, year = {2021}, url = {https://arxiv.org/abs/2105.14214}, eprinttype = {arXiv}, eprint = {2105.14214}, timestamp = {Wed, 02 Jun 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2105-14214.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2107-08285, author = {Alan Chan and Hugo Silva and Sungsu Lim and Tadashi Kozuno and A. Rupam Mahmood and Martha White}, title = {Greedification Operators for Policy Optimization: Investigating Forward and Reverse {KL} Divergences}, journal = {CoRR}, volume = {abs/2107.08285}, year = {2021}, url = {https://arxiv.org/abs/2107.08285}, eprinttype = {arXiv}, eprint = {2107.08285}, timestamp = {Wed, 21 Jul 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2107-08285.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2111-08066, author = {Vincent Liu and James R. Wright and Martha White}, title = {Exploiting Action Impact Regularity and Partially Known Models for Offline Reinforcement Learning}, journal = {CoRR}, volume = {abs/2111.08066}, year = {2021}, url = {https://arxiv.org/abs/2111.08066}, eprinttype = {arXiv}, eprint = {2111.08066}, timestamp = {Mon, 22 May 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2111-08066.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2111-08172, author = {Eric Graves and Ehsan Imani and Raksha Kumaraswamy and Martha White}, title = {Off-Policy Actor-Critic with Emphatic Weightings}, journal = {CoRR}, volume = {abs/2111.08172}, year = {2021}, url = {https://arxiv.org/abs/2111.08172}, eprinttype = {arXiv}, eprint = {2111.08172}, timestamp = {Mon, 22 Nov 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2111-08172.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2112-07806, author = {Ehsan Imani and Wei Hu and Martha White}, title = {Understanding Feature Transfer Through Representation Alignment}, journal = {CoRR}, volume = {abs/2112.07806}, year = {2021}, url = {https://arxiv.org/abs/2112.07806}, eprinttype = {arXiv}, eprint = {2112.07806}, timestamp = {Mon, 03 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2112-07806.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2112-11622, author = {Shivam Garg and Samuele Tosatto and Yangchen Pan and Martha White and A. Rupam Mahmood}, title = {An Alternate Policy Gradient Estimator for Softmax Policies}, journal = {CoRR}, volume = {abs/2112.11622}, year = {2021}, url = {https://arxiv.org/abs/2112.11622}, eprinttype = {arXiv}, eprint = {2112.11622}, timestamp = {Tue, 04 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2112-11622.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jair/LinkeAWDW20, author = {Cam Linke and Nadia M. Ady and Martha White and Thomas Degris and Adam White}, title = {Adapting Behavior via Intrinsic Reward: {A} Survey and Empirical Study}, journal = {J. Artif. Intell. Res.}, volume = {69}, pages = {1287--1332}, year = {2020}, url = {https://doi.org/10.1613/jair.1.12087}, doi = {10.1613/JAIR.1.12087}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jair/LinkeAWDW20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/atal/SatsangiLWOW20, author = {Yash Satsangi and Sungsu Lim and Shimon Whiteson and Frans A. Oliehoek and Martha White}, editor = {Amal El Fallah Seghrouchni and Gita Sukthankar and Bo An and Neil Yorke{-}Smith}, title = {Maximizing Information Gain in Partially Observable Environments via Prediction Rewards}, booktitle = {Proceedings of the 19th International Conference on Autonomous Agents and Multiagent Systems, {AAMAS} '20, Auckland, New Zealand, May 9-13, 2020}, pages = {1215--1223}, publisher = {International Foundation for Autonomous Agents and Multiagent Systems}, year = {2020}, url = {https://dl.acm.org/doi/10.5555/3398761.3398902}, doi = {10.5555/3398761.3398902}, timestamp = {Tue, 26 Jul 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/atal/SatsangiLWOW20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/emnlp/HashemzadehKWMF20, author = {Maryam Hashemzadeh and Greta Kaufeld and Martha White and Andrea E. Martin and Alona Fyshe}, editor = {Trevor Cohn and Yulan He and Yang Liu}, title = {From Language to Language-ish: How Brain-Like is an LSTM's Representation of Atypical Language Stimuli?}, booktitle = {Findings of the Association for Computational Linguistics: {EMNLP} 2020, Online Event, 16-20 November 2020}, series = {Findings of {ACL}}, volume = {{EMNLP} 2020}, pages = {645--656}, publisher = {Association for Computational Linguistics}, year = {2020}, url = {https://doi.org/10.18653/v1/2020.findings-emnlp.57}, doi = {10.18653/V1/2020.FINDINGS-EMNLP.57}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/emnlp/HashemzadehKWMF20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/LanPFW20, author = {Qingfeng Lan and Yangchen Pan and Alona Fyshe and Martha White}, title = {Maxmin Q-learning: Controlling the Estimation Bias of Q-learning}, booktitle = {8th International Conference on Learning Representations, {ICLR} 2020, Addis Ababa, Ethiopia, April 26-30, 2020}, publisher = {OpenReview.net}, year = {2020}, url = {https://openreview.net/forum?id=Bkg0u3Etwr}, timestamp = {Thu, 07 May 2020 17:11:47 +0200}, biburl = {https://dblp.org/rec/conf/iclr/LanPFW20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/NathLCLWW20, author = {Somjit Nath and Vincent Liu and Alan Chan and Xin Li and Adam White and Martha White}, title = {Training Recurrent Neural Networks Online by Learning Explicit State Variables}, booktitle = {8th International Conference on Learning Representations, {ICLR} 2020, Addis Ababa, Ethiopia, April 26-30, 2020}, publisher = {OpenReview.net}, year = {2020}, url = {https://openreview.net/forum?id=SJgmR0NKPr}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/NathLCLWW20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/AbbasSTW20, author = {Zaheer Abbas and Samuel Sokota and Erin Talvitie and Martha White}, title = {Selective Dyna-Style Planning Under Limited Model Capacity}, booktitle = {Proceedings of the 37th International Conference on Machine Learning, {ICML} 2020, 13-18 July 2020, Virtual Event}, series = {Proceedings of Machine Learning Research}, volume = {119}, pages = {1--10}, publisher = {{PMLR}}, year = {2020}, url = {http://proceedings.mlr.press/v119/abbas20a.html}, timestamp = {Tue, 15 Dec 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icml/AbbasSTW20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/ChandakTSWMT20, author = {Yash Chandak and Georgios Theocharous and Shiv Shankar and Martha White and Sridhar Mahadevan and Philip S. Thomas}, title = {Optimizing for the Future in Non-Stationary MDPs}, booktitle = {Proceedings of the 37th International Conference on Machine Learning, {ICML} 2020, 13-18 July 2020, Virtual Event}, series = {Proceedings of Machine Learning Research}, volume = {119}, pages = {1414--1425}, publisher = {{PMLR}}, year = {2020}, url = {http://proceedings.mlr.press/v119/chandak20a.html}, timestamp = {Tue, 15 Dec 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icml/ChandakTSWMT20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/GhiassianP0GWW20, author = {Sina Ghiassian and Andrew Patterson and Shivam Garg and Dhawal Gupta and Adam White and Martha White}, title = {Gradient Temporal-Difference Learning with Regularized Corrections}, booktitle = {Proceedings of the 37th International Conference on Machine Learning, {ICML} 2020, 13-18 July 2020, Virtual Event}, series = {Proceedings of Machine Learning Research}, volume = {119}, pages = {3524--3534}, publisher = {{PMLR}}, year = {2020}, url = {http://proceedings.mlr.press/v119/ghiassian20a.html}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/GhiassianP0GWW20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/ChandakJTWT20, author = {Yash Chandak and Scott M. Jordan and Georgios Theocharous and Martha White and Philip S. Thomas}, editor = {Hugo Larochelle and Marc'Aurelio Ranzato and Raia Hadsell and Maria{-}Florina Balcan and Hsuan{-}Tien Lin}, title = {Towards Safe Policy Improvement for Non-Stationary MDPs}, booktitle = {Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6-12, 2020, virtual}, year = {2020}, url = {https://proceedings.neurips.cc/paper/2020/hash/680390c55bbd9ce416d1d69a9ab4760d-Abstract.html}, timestamp = {Tue, 19 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/ChandakJTWT20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/PanIFW20, author = {Yangchen Pan and Ehsan Imani and Amir{-}massoud Farahmand and Martha White}, editor = {Hugo Larochelle and Marc'Aurelio Ranzato and Raia Hadsell and Maria{-}Florina Balcan and Hsuan{-}Tien Lin}, title = {An implicit function learning approach for parametric modal regression}, booktitle = {Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6-12, 2020, virtual}, year = {2020}, url = {https://proceedings.neurips.cc/paper/2020/hash/83eaa6722798a773dd55e8fc7443aa09-Abstract.html}, timestamp = {Tue, 19 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/PanIFW20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2002-06195, author = {Yangchen Pan and Ehsan Imani and Martha White and Amir{-}massoud Farahmand}, title = {An implicit function learning approach for parametric modal regression}, journal = {CoRR}, volume = {abs/2002.06195}, year = {2020}, url = {https://arxiv.org/abs/2002.06195}, eprinttype = {arXiv}, eprint = {2002.06195}, timestamp = {Tue, 03 Mar 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2002-06195.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2002-06487, author = {Qingfeng Lan and Yangchen Pan and Alona Fyshe and Martha White}, title = {Maxmin Q-learning: Controlling the Estimation Bias of Q-learning}, journal = {CoRR}, volume = {abs/2002.06487}, year = {2020}, url = {https://arxiv.org/abs/2002.06487}, eprinttype = {arXiv}, eprint = {2002.06487}, timestamp = {Mon, 02 Mar 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2002-06487.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2005-04912, author = {Yash Satsangi and Sungsu Lim and Shimon Whiteson and Frans A. Oliehoek and Martha White}, title = {Maximizing Information Gain in Partially Observable Environments via Prediction Reward}, journal = {CoRR}, volume = {abs/2005.04912}, year = {2020}, url = {https://arxiv.org/abs/2005.04912}, eprinttype = {arXiv}, eprint = {2005.04912}, timestamp = {Thu, 14 May 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2005-04912.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2005-08158, author = {Yash Chandak and Georgios Theocharous and Shiv Shankar and Martha White and Sridhar Mahadevan and Philip S. Thomas}, title = {Optimizing for the Future in Non-Stationary MDPs}, journal = {CoRR}, volume = {abs/2005.08158}, year = {2020}, url = {https://arxiv.org/abs/2005.08158}, eprinttype = {arXiv}, eprint = {2005.08158}, timestamp = {Wed, 03 Jun 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2005-08158.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2006-04363, author = {Taher Jafferjee and Ehsan Imani and Erin Talvitie and Martha White and Michael Bowling}, title = {Hallucinating Value: {A} Pitfall of Dyna-style Planning with Imperfect Environment Models}, journal = {CoRR}, volume = {abs/2006.04363}, year = {2020}, url = {https://arxiv.org/abs/2006.04363}, eprinttype = {arXiv}, eprint = {2006.04363}, timestamp = {Fri, 12 Jun 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2006-04363.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2006-07461, author = {Khurram Javed and Martha White and Yoshua Bengio}, title = {Learning Causal Models Online}, journal = {CoRR}, volume = {abs/2006.07461}, year = {2020}, url = {https://arxiv.org/abs/2006.07461}, eprinttype = {arXiv}, eprint = {2006.07461}, timestamp = {Wed, 17 Jun 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2006-07461.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2007-00611, author = {Sina Ghiassian and Andrew Patterson and Shivam Garg and Dhawal Gupta and Adam White and Martha White}, title = {Gradient Temporal-Difference Learning with Regularized Corrections}, journal = {CoRR}, volume = {abs/2007.00611}, year = {2020}, url = {https://arxiv.org/abs/2007.00611}, eprinttype = {arXiv}, eprint = {2007.00611}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2007-00611.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2007-02418, author = {Zaheer Abbas and Samuel Sokota and Erin J. Talvitie and Martha White}, title = {Selective Dyna-style Planning Under Limited Model Capacity}, journal = {CoRR}, volume = {abs/2007.02418}, year = {2020}, url = {https://arxiv.org/abs/2007.02418}, eprinttype = {arXiv}, eprint = {2007.02418}, timestamp = {Mon, 03 Aug 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2007-02418.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2007-03807, author = {Vincent Liu and Adam White and Hengshuai Yao and Martha White}, title = {Towards a practical measure of interference for reinforcement learning}, journal = {CoRR}, volume = {abs/2007.03807}, year = {2020}, url = {https://arxiv.org/abs/2007.03807}, eprinttype = {arXiv}, eprint = {2007.03807}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2007-03807.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2007-09569, author = {Jincheng Mei and Yangchen Pan and Martha White and Amir{-}massoud Farahmand and Hengshuai Yao}, title = {Beyond Prioritized Replay: Sampling States in Model-Based {RL} via Simulated Priorities}, journal = {CoRR}, volume = {abs/2007.09569}, year = {2020}, url = {https://arxiv.org/abs/2007.09569}, eprinttype = {arXiv}, eprint = {2007.09569}, timestamp = {Tue, 28 Jul 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2007-09569.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2010-07435, author = {Maryam Hashemzadeh and Greta Kaufeld and Martha White and Andrea E. Martin and Alona Fyshe}, title = {From Language to Language-ish: How Brain-Like is an LSTM's Representation of Nonsensical Language Stimuli?}, journal = {CoRR}, volume = {abs/2010.07435}, year = {2020}, url = {https://arxiv.org/abs/2010.07435}, eprinttype = {arXiv}, eprint = {2010.07435}, timestamp = {Tue, 20 Oct 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2010-07435.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2010-12645, author = {Yash Chandak and Scott M. Jordan and Georgios Theocharous and Martha White and Philip S. Thomas}, title = {Towards Safe Policy Improvement for Non-Stationary MDPs}, journal = {CoRR}, volume = {abs/2010.12645}, year = {2020}, url = {https://arxiv.org/abs/2010.12645}, eprinttype = {arXiv}, eprint = {2010.12645}, timestamp = {Mon, 02 Nov 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2010-12645.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2012-03806, author = {Sebastian H{\"{o}}fer and Kostas E. Bekris and Ankur Handa and Juan Camilo Gamboa Higuera and Florian Golemo and Melissa Mozifian and Christopher G. Atkeson and Dieter Fox and Ken Goldberg and John Leonard and C. Karen Liu and Jan Peters and Shuran Song and Peter Welinder and Martha White}, title = {Perspectives on Sim2Real Transfer for Robotics: {A} Summary of the {R:} {SS} 2020 Workshop}, journal = {CoRR}, volume = {abs/2012.03806}, year = {2020}, url = {https://arxiv.org/abs/2012.03806}, eprinttype = {arXiv}, eprint = {2012.03806}, timestamp = {Tue, 29 Jun 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2012-03806.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/JacobsenSLDWW19, author = {Andrew Jacobsen and Matthew Schlegel and Cameron Linke and Thomas Degris and Adam White and Martha White}, title = {Meta-Descent for Online, Continual Prediction}, booktitle = {The Thirty-Third {AAAI} Conference on Artificial Intelligence, {AAAI} 2019, The Thirty-First Innovative Applications of Artificial Intelligence Conference, {IAAI} 2019, The Ninth {AAAI} Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2019, Honolulu, Hawaii, USA, January 27 - February 1, 2019}, pages = {3943--3950}, publisher = {{AAAI} Press}, year = {2019}, url = {https://doi.org/10.1609/aaai.v33i01.33013943}, doi = {10.1609/AAAI.V33I01.33013943}, timestamp = {Mon, 04 Sep 2023 12:29:24 +0200}, biburl = {https://dblp.org/rec/conf/aaai/JacobsenSLDWW19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/LiuKLW19, author = {Vincent Liu and Raksha Kumaraswamy and Lei Le and Martha White}, title = {The Utility of Sparse Representations for Control in Reinforcement Learning}, booktitle = {The Thirty-Third {AAAI} Conference on Artificial Intelligence, {AAAI} 2019, The Thirty-First Innovative Applications of Artificial Intelligence Conference, {IAAI} 2019, The Ninth {AAAI} Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2019, Honolulu, Hawaii, USA, January 27 - February 1, 2019}, pages = {4384--4391}, publisher = {{AAAI} Press}, year = {2019}, url = {https://doi.org/10.1609/aaai.v33i01.33014384}, doi = {10.1609/AAAI.V33I01.33014384}, timestamp = {Tue, 02 Feb 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/aaai/LiuKLW19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/ChungNJW19, author = {Wesley Chung and Somjit Nath and Ajin Joseph and Martha White}, title = {Two-Timescale Networks for Nonlinear Value Function Approximation}, booktitle = {7th International Conference on Learning Representations, {ICLR} 2019, New Orleans, LA, USA, May 6-9, 2019}, publisher = {OpenReview.net}, year = {2019}, url = {https://openreview.net/forum?id=rJleN20qK7}, timestamp = {Thu, 22 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/ChungNJW19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/PanYFW19, author = {Yangchen Pan and Hengshuai Yao and Amir{-}massoud Farahmand and Martha White}, editor = {Sarit Kraus}, title = {Hill Climbing on Value Estimates for Search-control in Dyna}, booktitle = {Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence, {IJCAI} 2019, Macao, China, August 10-16, 2019}, pages = {3209--3215}, publisher = {ijcai.org}, year = {2019}, url = {https://doi.org/10.24963/ijcai.2019/445}, doi = {10.24963/IJCAI.2019/445}, timestamp = {Tue, 20 Aug 2019 16:18:18 +0200}, biburl = {https://dblp.org/rec/conf/ijcai/PanYFW19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/WanZWWS19, author = {Yi Wan and Muhammad Zaheer and Adam White and Martha White and Richard S. Sutton}, editor = {Sarit Kraus}, title = {Planning with Expectation Models}, booktitle = {Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence, {IJCAI} 2019, Macao, China, August 10-16, 2019}, pages = {3649--3655}, publisher = {ijcai.org}, year = {2019}, url = {https://doi.org/10.24963/ijcai.2019/506}, doi = {10.24963/IJCAI.2019/506}, timestamp = {Thu, 14 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/ijcai/WanZWWS19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/SchlegelCGQW19, author = {Matthew Schlegel and Wesley Chung and Daniel Graves and Jian Qian and Martha White}, editor = {Hanna M. Wallach and Hugo Larochelle and Alina Beygelzimer and Florence d'Alch{\'{e}}{-}Buc and Emily B. Fox and Roman Garnett}, title = {Importance Resampling for Off-policy Prediction}, booktitle = {Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS 2019, December 8-14, 2019, Vancouver, BC, Canada}, pages = {1797--1807}, year = {2019}, url = {https://proceedings.neurips.cc/paper/2019/hash/9ac403da7947a183884c18a67d3aa8de-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/SchlegelCGQW19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/JavedW19, author = {Khurram Javed and Martha White}, editor = {Hanna M. Wallach and Hugo Larochelle and Alina Beygelzimer and Florence d'Alch{\'{e}}{-}Buc and Emily B. Fox and Roman Garnett}, title = {Meta-Learning Representations for Continual Learning}, booktitle = {Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS 2019, December 8-14, 2019, Vancouver, BC, Canada}, pages = {1818--1828}, year = {2019}, url = {https://proceedings.neurips.cc/paper/2019/hash/f4dd765c12f2ef67f98f3558c282a9cd-Abstract.html}, timestamp = {Thu, 21 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/JavedW19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/AminmansourPLPM19, author = {Farzane Aminmansour and Andrew Patterson and Lei Le and Yisu Peng and Daniel Mitchell and Franco Pestilli and Cesar F. Caiafa and Russell Greiner and Martha White}, editor = {Hanna M. Wallach and Hugo Larochelle and Alina Beygelzimer and Florence d'Alch{\'{e}}{-}Buc and Emily B. Fox and Roman Garnett}, title = {Learning Macroscopic Brain Connectomes via Group-Sparse Factorization}, booktitle = {Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS 2019, December 8-14, 2019, Vancouver, BC, Canada}, pages = {8847--8857}, year = {2019}, url = {https://proceedings.neurips.cc/paper/2019/hash/0bfce127947574733b19da0f30739fcd-Abstract.html}, timestamp = {Thu, 21 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/AminmansourPLPM19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1904-01191, author = {Yi Wan and Muhammad Zaheer and Adam White and Martha White and Richard S. Sutton}, title = {Planning with Expectation Models}, journal = {CoRR}, volume = {abs/1904.01191}, year = {2019}, url = {http://arxiv.org/abs/1904.01191}, eprinttype = {arXiv}, eprint = {1904.01191}, timestamp = {Thu, 14 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1904-01191.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1905-12588, author = {Khurram Javed and Martha White}, title = {Meta-Learning Representations for Continual Learning}, journal = {CoRR}, volume = {abs/1905.12588}, year = {2019}, url = {http://arxiv.org/abs/1905.12588}, eprinttype = {arXiv}, eprint = {1905.12588}, timestamp = {Mon, 03 Jun 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1905-12588.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1906-04328, author = {Matthew Schlegel and Wesley Chung and Daniel Graves and Jian Qian and Martha White}, title = {Importance Resampling for Off-policy Prediction}, journal = {CoRR}, volume = {abs/1906.04328}, year = {2019}, url = {http://arxiv.org/abs/1906.04328}, eprinttype = {arXiv}, eprint = {1906.04328}, timestamp = {Fri, 14 Jun 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1906-04328.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1906-07791, author = {Yangchen Pan and Hengshuai Yao and Amir{-}massoud Farahmand and Martha White}, title = {Hill Climbing on Value Estimates for Search-control in Dyna}, journal = {CoRR}, volume = {abs/1906.07791}, year = {2019}, url = {http://arxiv.org/abs/1906.07791}, eprinttype = {arXiv}, eprint = {1906.07791}, timestamp = {Mon, 24 Jun 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1906-07791.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1906-07865, author = {Cam Linke and Nadia M. Ady and Martha White and Thomas Degris and Adam White}, title = {Adapting Behaviour via Intrinsic Reward: {A} Survey and Empirical Study}, journal = {CoRR}, volume = {abs/1906.07865}, year = {2019}, url = {http://arxiv.org/abs/1906.07865}, eprinttype = {arXiv}, eprint = {1906.07865}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1906-07865.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1907-07751, author = {Andrew Jacobsen and Matthew Schlegel and Cameron Linke and Thomas Degris and Adam White and Martha White}, title = {Meta-descent for Online, Continual Prediction}, journal = {CoRR}, volume = {abs/1907.07751}, year = {2019}, url = {http://arxiv.org/abs/1907.07751}, eprinttype = {arXiv}, eprint = {1907.07751}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1907-07751.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1910-01705, author = {Khurram Javed and Hengshuai Yao and Martha White}, title = {Is Fast Adaptation All You Need?}, journal = {CoRR}, volume = {abs/1910.01705}, year = {2019}, url = {http://arxiv.org/abs/1910.01705}, eprinttype = {arXiv}, eprint = {1910.01705}, timestamp = {Wed, 09 Oct 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1910-01705.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/ImaniW18, author = {Ehsan Imani and Martha White}, editor = {Jennifer G. Dy and Andreas Krause}, title = {Improving Regression Performance with Distributional Losses}, booktitle = {Proceedings of the 35th International Conference on Machine Learning, {ICML} 2018, Stockholmsm{\"{a}}ssan, Stockholm, Sweden, July 10-15, 2018}, series = {Proceedings of Machine Learning Research}, volume = {80}, pages = {2162--2171}, publisher = {{PMLR}}, year = {2018}, url = {http://proceedings.mlr.press/v80/imani18a.html}, timestamp = {Wed, 03 Apr 2019 18:17:30 +0200}, biburl = {https://dblp.org/rec/conf/icml/ImaniW18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/PanFWNGN18, author = {Yangchen Pan and Amir{-}massoud Farahmand and Martha White and Saleh Nabi and Piyush Grover and Daniel Nikovski}, editor = {Jennifer G. Dy and Andreas Krause}, title = {Reinforcement Learning with Function-Valued Action Spaces for Partial Differential Equation Control}, booktitle = {Proceedings of the 35th International Conference on Machine Learning, {ICML} 2018, Stockholmsm{\"{a}}ssan, Stockholm, Sweden, July 10-15, 2018}, series = {Proceedings of Machine Learning Research}, volume = {80}, pages = {3983--3992}, publisher = {{PMLR}}, year = {2018}, url = {http://proceedings.mlr.press/v80/pan18a.html}, timestamp = {Wed, 03 Apr 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/PanFWNGN18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/PanZWPW18, author = {Yangchen Pan and Muhammad Zaheer and Adam White and Andrew Patterson and Martha White}, editor = {J{\'{e}}r{\^{o}}me Lang}, title = {Organizing Experience: a Deeper Look at Replay Mechanisms for Sample-Based Planning in Continuous State Domains}, booktitle = {Proceedings of the Twenty-Seventh International Joint Conference on Artificial Intelligence, {IJCAI} 2018, July 13-19, 2018, Stockholm, Sweden}, pages = {4794--4800}, publisher = {ijcai.org}, year = {2018}, url = {https://doi.org/10.24963/ijcai.2018/666}, doi = {10.24963/IJCAI.2018/666}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ijcai/PanZWPW18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/ImaniGW18, author = {Ehsan Imani and Eric Graves and Martha White}, editor = {Samy Bengio and Hanna M. Wallach and Hugo Larochelle and Kristen Grauman and Nicol{\`{o}} Cesa{-}Bianchi and Roman Garnett}, title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings}, booktitle = {Advances in Neural Information Processing Systems 31: Annual Conference on Neural Information Processing Systems 2018, NeurIPS 2018, December 3-8, 2018, Montr{\'{e}}al, Canada}, pages = {96--106}, year = {2018}, url = {https://proceedings.neurips.cc/paper/2018/hash/3ef815416f775098fe977004015c6193-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/ImaniGW18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/LePW18, author = {Lei Le and Andrew Patterson and Martha White}, editor = {Samy Bengio and Hanna M. Wallach and Hugo Larochelle and Kristen Grauman and Nicol{\`{o}} Cesa{-}Bianchi and Roman Garnett}, title = {Supervised autoencoders: Improving generalization performance with unsupervised regularizers}, booktitle = {Advances in Neural Information Processing Systems 31: Annual Conference on Neural Information Processing Systems 2018, NeurIPS 2018, December 3-8, 2018, Montr{\'{e}}al, Canada}, pages = {107--117}, year = {2018}, url = {https://proceedings.neurips.cc/paper/2018/hash/2a38a4a9316c49e5a833517c45d31070-Abstract.html}, timestamp = {Thu, 21 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/LePW18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/KumaraswamySWW18, author = {Raksha Kumaraswamy and Matthew Schlegel and Adam White and Martha White}, editor = {Samy Bengio and Hanna M. Wallach and Hugo Larochelle and Kristen Grauman and Nicol{\`{o}} Cesa{-}Bianchi and Roman Garnett}, title = {Context-dependent upper-confidence bounds for directed exploration}, booktitle = {Advances in Neural Information Processing Systems 31: Annual Conference on Neural Information Processing Systems 2018, NeurIPS 2018, December 3-8, 2018, Montr{\'{e}}al, Canada}, pages = {4784--4794}, year = {2018}, url = {https://proceedings.neurips.cc/paper/2018/hash/f516dfb84b9051ed85b89cdc3a8ab7f5-Abstract.html}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/KumaraswamySWW18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/uai/SherstanABYWWS18, author = {Craig Sherstan and Dylan R. Ashley and Brendan Bennett and Kenny Young and Adam White and Martha White and Richard S. Sutton}, editor = {Amir Globerson and Ricardo Silva}, title = {Comparing Direct and Indirect Temporal-Difference Methods for Estimating the Variance of the Return}, booktitle = {Proceedings of the Thirty-Fourth Conference on Uncertainty in Artificial Intelligence, {UAI} 2018, Monterey, California, USA, August 6-10, 2018}, pages = {63--72}, publisher = {{AUAI} Press}, year = {2018}, url = {http://auai.org/uai2018/proceedings/papers/35.pdf}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/uai/SherstanABYWWS18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/uai/SajedCW18, author = {Touqir Sajed and Wesley Chung and Martha White}, editor = {Amir Globerson and Ricardo Silva}, title = {High-confidence error estimates for learned value functions}, booktitle = {Proceedings of the Thirty-Fourth Conference on Uncertainty in Artificial Intelligence, {UAI} 2018, Monterey, California, USA, August 6-10, 2018}, pages = {683--692}, publisher = {{AUAI} Press}, year = {2018}, url = {http://auai.org/uai2018/proceedings/papers/245.pdf}, timestamp = {Wed, 09 Mar 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/uai/SajedCW18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1801-08287, author = {Craig Sherstan and Brendan Bennett and Kenny Young and Dylan R. Ashley and Adam White and Martha White and Richard S. Sutton}, title = {Directly Estimating the Variance of the {\(\lambda\)}-Return Using Temporal-Difference Methods}, journal = {CoRR}, volume = {abs/1801.08287}, year = {2018}, url = {http://arxiv.org/abs/1801.08287}, eprinttype = {arXiv}, eprint = {1801.08287}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1801-08287.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1806-04613, author = {Ehsan Imani and Martha White}, title = {Improving Regression Performance with Distributional Losses}, journal = {CoRR}, volume = {abs/1806.04613}, year = {2018}, url = {http://arxiv.org/abs/1806.04613}, eprinttype = {arXiv}, eprint = {1806.04613}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1806-04613.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1806-04624, author = {Yangchen Pan and Muhammad Zaheer and Adam White and Andrew Patterson and Martha White}, title = {Organizing Experience: {A} Deeper Look at Replay Mechanisms for Sample-based Planning in Continuous State Domains}, journal = {CoRR}, volume = {abs/1806.04624}, year = {2018}, url = {http://arxiv.org/abs/1806.04624}, eprinttype = {arXiv}, eprint = {1806.04624}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1806-04624.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1806-06931, author = {Yangchen Pan and Amir{-}massoud Farahmand and Martha White and Saleh Nabi and Piyush Grover and Daniel Nikovski}, title = {Reinforcement Learning with Function-Valued Action Spaces for Partial Differential Equation Control}, journal = {CoRR}, volume = {abs/1806.06931}, year = {2018}, url = {http://arxiv.org/abs/1806.06931}, eprinttype = {arXiv}, eprint = {1806.06931}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1806-06931.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1807-06763, author = {Matthew Schlegel and Adam White and Andrew Patterson and Martha White}, title = {General Value Function Networks}, journal = {CoRR}, volume = {abs/1807.06763}, year = {2018}, url = {http://arxiv.org/abs/1807.06763}, eprinttype = {arXiv}, eprint = {1807.06763}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1807-06763.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1808-09127, author = {Touqir Sajed and Wesley Chung and Martha White}, title = {High-confidence error estimates for learned value functions}, journal = {CoRR}, volume = {abs/1808.09127}, year = {2018}, url = {http://arxiv.org/abs/1808.09127}, eprinttype = {arXiv}, eprint = {1808.09127}, timestamp = {Mon, 03 Sep 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1808-09127.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1810-09103, author = {Sungsu Lim and Ajin Joseph and Lei Le and Yangchen Pan and Martha White}, title = {Actor-Expert: {A} Framework for using Action-Value Methods in Continuous Action Spaces}, journal = {CoRR}, volume = {abs/1810.09103}, year = {2018}, url = {http://arxiv.org/abs/1810.09103}, eprinttype = {arXiv}, eprint = {1810.09103}, timestamp = {Thu, 22 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1810-09103.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1811-02597, author = {Sina Ghiassian and Andrew Patterson and Martha White and Richard S. Sutton and Adam White}, title = {Online Off-policy Prediction}, journal = {CoRR}, volume = {abs/1811.02597}, year = {2018}, url = {http://arxiv.org/abs/1811.02597}, eprinttype = {arXiv}, eprint = {1811.02597}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1811-02597.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1811-06626, author = {Vincent Liu and Raksha Kumaraswamy and Lei Le and Martha White}, title = {The Utility of Sparse Representations for Control in Reinforcement Learning}, journal = {CoRR}, volume = {abs/1811.06626}, year = {2018}, url = {http://arxiv.org/abs/1811.06626}, eprinttype = {arXiv}, eprint = {1811.06626}, timestamp = {Sun, 25 Nov 2018 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1811-06626.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1811-06629, author = {Raksha Kumaraswamy and Matthew Schlegel and Adam White and Martha White}, title = {Context-Dependent Upper-Confidence Bounds for Directed Exploration}, journal = {CoRR}, volume = {abs/1811.06629}, year = {2018}, url = {http://arxiv.org/abs/1811.06629}, eprinttype = {arXiv}, eprint = {1811.06629}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1811-06629.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1811-07004, author = {Tom Schaul and Hado van Hasselt and Joseph Modayil and Martha White and Adam White and Pierre{-}Luc Bacon and Jean Harb and Shibl Mourad and Marc G. Bellemare and Doina Precup}, title = {The Barbados 2018 List of Open Issues in Continual Learning}, journal = {CoRR}, volume = {abs/1811.07004}, year = {2018}, url = {http://arxiv.org/abs/1811.07004}, eprinttype = {arXiv}, eprint = {1811.07004}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1811-07004.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1811-09013, author = {Ehsan Imani and Eric Graves and Martha White}, title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings}, journal = {CoRR}, volume = {abs/1811.09013}, year = {2018}, url = {http://arxiv.org/abs/1811.09013}, eprinttype = {arXiv}, eprint = {1811.09013}, timestamp = {Mon, 02 Mar 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1811-09013.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1812-00914, author = {Minghan Li and Tanli Zuo and Ruicheng Li and Martha White and Weishi Zheng}, title = {Accelerating Large Scale Knowledge Distillation via Dynamic Importance Sampling}, journal = {CoRR}, volume = {abs/1812.00914}, year = {2018}, url = {http://arxiv.org/abs/1812.00914}, eprinttype = {arXiv}, eprint = {1812.00914}, timestamp = {Tue, 19 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1812-00914.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/JainWR17, author = {Shantanu Jain and Martha White and Predrag Radivojac}, editor = {Satinder Singh and Shaul Markovitch}, title = {Recovering True Classifier Performance in Positive-Unlabeled Learning}, booktitle = {Proceedings of the Thirty-First {AAAI} Conference on Artificial Intelligence, February 4-9, 2017, San Francisco, California, {USA}}, pages = {2066--2072}, publisher = {{AAAI} Press}, year = {2017}, url = {https://doi.org/10.1609/aaai.v31i1.10937}, doi = {10.1609/AAAI.V31I1.10937}, timestamp = {Mon, 04 Sep 2023 14:40:32 +0200}, biburl = {https://dblp.org/rec/conf/aaai/JainWR17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/PanWW17, author = {Yangchen Pan and Adam White and Martha White}, editor = {Satinder Singh and Shaul Markovitch}, title = {Accelerated Gradient Temporal Difference Learning}, booktitle = {Proceedings of the Thirty-First {AAAI} Conference on Artificial Intelligence, February 4-9, 2017, San Francisco, California, {USA}}, pages = {2464--2470}, publisher = {{AAAI} Press}, year = {2017}, url = {https://doi.org/10.1609/aaai.v31i1.10829}, doi = {10.1609/AAAI.V31I1.10829}, timestamp = {Mon, 04 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/PanWW17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/SchlegelPCW17, author = {Matthew Schlegel and Yangchen Pan and Jiecao Chen and Martha White}, editor = {Doina Precup and Yee Whye Teh}, title = {Adapting Kernel Representations Online Using Submodular Maximization}, booktitle = {Proceedings of the 34th International Conference on Machine Learning, {ICML} 2017, Sydney, NSW, Australia, 6-11 August 2017}, series = {Proceedings of Machine Learning Research}, volume = {70}, pages = {3037--3046}, publisher = {{PMLR}}, year = {2017}, url = {http://proceedings.mlr.press/v70/schlegel17a.html}, timestamp = {Wed, 29 May 2019 08:41:45 +0200}, biburl = {https://dblp.org/rec/conf/icml/SchlegelPCW17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/White17, author = {Martha White}, editor = {Doina Precup and Yee Whye Teh}, title = {Unifying Task Specification in Reinforcement Learning}, booktitle = {Proceedings of the 34th International Conference on Machine Learning, {ICML} 2017, Sydney, NSW, Australia, 6-11 August 2017}, series = {Proceedings of Machine Learning Research}, volume = {70}, pages = {3742--3750}, publisher = {{PMLR}}, year = {2017}, url = {http://proceedings.mlr.press/v70/white17a.html}, timestamp = {Wed, 03 Apr 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/White17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/LeKW17, author = {Lei Le and Raksha Kumaraswamy and Martha White}, editor = {Carles Sierra}, title = {Learning Sparse Representations in Reinforcement Learning with Sparse Coding}, booktitle = {Proceedings of the Twenty-Sixth International Joint Conference on Artificial Intelligence, {IJCAI} 2017, Melbourne, Australia, August 19-25, 2017}, pages = {2067--2073}, publisher = {ijcai.org}, year = {2017}, url = {https://doi.org/10.24963/ijcai.2017/287}, doi = {10.24963/IJCAI.2017/287}, timestamp = {Tue, 20 Aug 2019 16:16:54 +0200}, biburl = {https://dblp.org/rec/conf/ijcai/LeKW17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/KaramiWSS17, author = {Mahdi Karami and Martha White and Dale Schuurmans and Csaba Szepesv{\'{a}}ri}, editor = {Isabelle Guyon and Ulrike von Luxburg and Samy Bengio and Hanna M. Wallach and Rob Fergus and S. V. N. Vishwanathan and Roman Garnett}, title = {Multi-view Matrix Factorization for Linear Dynamical System Estimation}, booktitle = {Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017, December 4-9, 2017, Long Beach, CA, {USA}}, pages = {7092--7101}, year = {2017}, url = {https://proceedings.neurips.cc/paper/2017/hash/c2964caac096f26db222cb325aa267cb-Abstract.html}, timestamp = {Thu, 21 Jan 2021 13:58:27 +0100}, biburl = {https://dblp.org/rec/conf/nips/KaramiWSS17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/uai/PanAW17, author = {Yangchen Pan and Erfan Sadeqi Azer and Martha White}, editor = {Gal Elidan and Kristian Kersting and Alexander Ihler}, title = {Effective sketching methods for value function approximation}, booktitle = {Proceedings of the Thirty-Third Conference on Uncertainty in Artificial Intelligence, {UAI} 2017, Sydney, Australia, August 11-15, 2017}, publisher = {{AUAI} Press}, year = {2017}, url = {http://auai.org/uai2017/proceedings/papers/81.pdf}, timestamp = {Mon, 05 Dec 2022 15:54:38 +0100}, biburl = {https://dblp.org/rec/conf/uai/PanAW17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/JainWR17, author = {Shantanu Jain and Martha White and Predrag Radivojac}, title = {Recovering True Classifier Performance in Positive-Unlabeled Learning}, journal = {CoRR}, volume = {abs/1702.00518}, year = {2017}, url = {http://arxiv.org/abs/1702.00518}, eprinttype = {arXiv}, eprint = {1702.00518}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/JainWR17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/LeKW17, author = {Lei Le and Raksha Kumaraswamy and Martha White}, title = {Learning Sparse Representations in Reinforcement Learning with Sparse Coding}, journal = {CoRR}, volume = {abs/1707.08316}, year = {2017}, url = {http://arxiv.org/abs/1707.08316}, eprinttype = {arXiv}, eprint = {1707.08316}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/LeKW17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1708-01298, author = {Yangchen Pan and Erfan Sadeqi Azer and Martha White}, title = {Effective sketching methods for value function approximation}, journal = {CoRR}, volume = {abs/1708.01298}, year = {2017}, url = {http://arxiv.org/abs/1708.01298}, eprinttype = {arXiv}, eprint = {1708.01298}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1708-01298.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jmlr/SuttonMW16, author = {Richard S. Sutton and Ashique Rupam Mahmood and Martha White}, title = {An Emphatic Approach to the Problem of Off-policy Temporal-Difference Learning}, journal = {J. Mach. Learn. Res.}, volume = {17}, pages = {73:1--73:29}, year = {2016}, url = {http://jmlr.org/papers/v17/14-488.html}, timestamp = {Wed, 10 Jul 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jmlr/SuttonMW16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/atal/AdamW16, author = {Adam White and Martha White}, editor = {Catholijn M. Jonker and Stacy Marsella and John Thangarajah and Karl Tuyls}, title = {Investigating Practical Linear Temporal Difference Learning}, booktitle = {Proceedings of the 2016 International Conference on Autonomous Agents {\&} Multiagent Systems, Singapore, May 9-13, 2016}, pages = {494--502}, publisher = {{ACM}}, year = {2016}, url = {http://dl.acm.org/citation.cfm?id=2936997}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/atal/AdamW16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/atal/WhiteW16, author = {Martha White and Adam White}, editor = {Catholijn M. Jonker and Stacy Marsella and John Thangarajah and Karl Tuyls}, title = {A Greedy Approach to Adapting the Trace Parameter for Temporal Difference Learning}, booktitle = {Proceedings of the 2016 International Conference on Autonomous Agents {\&} Multiagent Systems, Singapore, May 9-13, 2016}, pages = {557--565}, publisher = {{ACM}}, year = {2016}, url = {http://dl.acm.org/citation.cfm?id=2937006}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/atal/WhiteW16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/GehringPW16, author = {Clement Gehring and Yangchen Pan and Martha White}, editor = {Subbarao Kambhampati}, title = {Incremental Truncated {LSTD}}, booktitle = {Proceedings of the Twenty-Fifth International Joint Conference on Artificial Intelligence, {IJCAI} 2016, New York, NY, USA, 9-15 July 2016}, pages = {1505--1511}, publisher = {{IJCAI/AAAI} Press}, year = {2016}, url = {http://www.ijcai.org/Abstract/16/216}, timestamp = {Tue, 20 Aug 2019 16:19:00 +0200}, biburl = {https://dblp.org/rec/conf/ijcai/GehringPW16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/JainWR16, author = {Shantanu Jain and Martha White and Predrag Radivojac}, editor = {Daniel D. Lee and Masashi Sugiyama and Ulrike von Luxburg and Isabelle Guyon and Roman Garnett}, title = {Estimating the class prior and posterior from noisy positives and unlabeled data}, booktitle = {Advances in Neural Information Processing Systems 29: Annual Conference on Neural Information Processing Systems 2016, December 5-10, 2016, Barcelona, Spain}, pages = {2685--2693}, year = {2016}, url = {https://proceedings.neurips.cc/paper/2016/hash/79a49b3e3762632813f9e35f4ba53d6c-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/JainWR16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/JainWTR16, author = {Shantanu Jain and Martha White and Michael W. Trosset and Predrag Radivojac}, title = {Nonparametric semi-supervised learning of class proportions}, journal = {CoRR}, volume = {abs/1601.01944}, year = {2016}, url = {http://arxiv.org/abs/1601.01944}, eprinttype = {arXiv}, eprint = {1601.01944}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/JainWTR16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/WhiteW16, author = {Adam White and Martha White}, title = {Investigating practical, linear temporal difference learning}, journal = {CoRR}, volume = {abs/1602.08771}, year = {2016}, url = {http://arxiv.org/abs/1602.08771}, eprinttype = {arXiv}, eprint = {1602.08771}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/WhiteW16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/LeW16, author = {Lei Le and Martha White}, title = {Global optimization of factor models using alternating minimization}, journal = {CoRR}, volume = {abs/1604.04942}, year = {2016}, url = {http://arxiv.org/abs/1604.04942}, eprinttype = {arXiv}, eprint = {1604.04942}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/LeW16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/JainWR16, author = {Shantanu Jain and Martha White and Predrag Radivojac}, title = {Estimating the class prior and posterior from noisy positives and unlabeled data}, journal = {CoRR}, volume = {abs/1606.08561}, year = {2016}, url = {http://arxiv.org/abs/1606.08561}, eprinttype = {arXiv}, eprint = {1606.08561}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/JainWR16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/WhiteW16a, author = {Martha White and Adam White}, title = {A Greedy Approach to Adapting the Trace Parameter for Temporal Difference Learning}, journal = {CoRR}, volume = {abs/1607.00446}, year = {2016}, url = {http://arxiv.org/abs/1607.00446}, eprinttype = {arXiv}, eprint = {1607.00446}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/WhiteW16a.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/White16, author = {Martha White}, title = {Unifying task specification in reinforcement learning}, journal = {CoRR}, volume = {abs/1609.01995}, year = {2016}, url = {http://arxiv.org/abs/1609.01995}, eprinttype = {arXiv}, eprint = {1609.01995}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/White16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/PanWW16, author = {Yangchen Pan and Adam White and Martha White}, title = {Accelerated Gradient Temporal Difference Learning}, journal = {CoRR}, volume = {abs/1611.09328}, year = {2016}, url = {http://arxiv.org/abs/1611.09328}, eprinttype = {arXiv}, eprint = {1611.09328}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/PanWW16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/WhiteWBS15, author = {Martha White and Junfeng Wen and Michael Bowling and Dale Schuurmans}, editor = {Blai Bonet and Sven Koenig}, title = {Optimal Estimation of Multivariate {ARMA} Models}, booktitle = {Proceedings of the Twenty-Ninth {AAAI} Conference on Artificial Intelligence, January 25-30, 2015, Austin, Texas, {USA}}, pages = {3080--3086}, publisher = {{AAAI} Press}, year = {2015}, url = {https://doi.org/10.1609/aaai.v29i1.9614}, doi = {10.1609/AAAI.V29I1.9614}, timestamp = {Mon, 18 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/WhiteWBS15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/pkdd/MirzazadehWGS15, author = {Farzaneh Mirzazadeh and Martha White and Andr{\'{a}}s Gy{\"{o}}rgy and Dale Schuurmans}, editor = {Annalisa Appice and Pedro Pereira Rodrigues and V{\'{\i}}tor Santos Costa and Carlos Soares and Jo{\~{a}}o Gama and Al{\'{\i}}pio Jorge}, title = {Scalable Metric Learning for Co-Embedding}, booktitle = {Machine Learning and Knowledge Discovery in Databases - European Conference, {ECML} {PKDD} 2015, Porto, Portugal, September 7-11, 2015, Proceedings, Part {I}}, series = {Lecture Notes in Computer Science}, volume = {9284}, pages = {625--642}, publisher = {Springer}, year = {2015}, url = {https://doi.org/10.1007/978-3-319-23528-8\_39}, doi = {10.1007/978-3-319-23528-8\_39}, timestamp = {Mon, 30 Nov 2020 08:47:26 +0100}, biburl = {https://dblp.org/rec/conf/pkdd/MirzazadehWGS15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/SuttonMW15, author = {Richard S. Sutton and Ashique Rupam Mahmood and Martha White}, title = {An Emphatic Approach to the Problem of Off-policy Temporal-Difference Learning}, journal = {CoRR}, volume = {abs/1503.04269}, year = {2015}, url = {http://arxiv.org/abs/1503.04269}, eprinttype = {arXiv}, eprint = {1503.04269}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/SuttonMW15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/MahmoodYWS15, author = {Ashique Rupam Mahmood and Huizhen Yu and Martha White and Richard S. Sutton}, title = {Emphatic Temporal-Difference Learning}, journal = {CoRR}, volume = {abs/1507.01569}, year = {2015}, url = {http://arxiv.org/abs/1507.01569}, eprinttype = {arXiv}, eprint = {1507.01569}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/MahmoodYWS15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/GehringW15, author = {Clement Gehring and Martha White}, title = {Incremental Truncated {LSTD}}, journal = {CoRR}, volume = {abs/1511.08495}, year = {2015}, url = {http://arxiv.org/abs/1511.08495}, eprinttype = {arXiv}, eprint = {1511.08495}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/GehringW15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/dcc/VenessWBG13, author = {Joel Veness and Martha White and Michael Bowling and Andr{\'{a}}s Gy{\"{o}}rgy}, editor = {Ali Bilgin and Michael W. Marcellin and Joan Serra{-}Sagrist{\`{a}} and James A. Storer}, title = {Partition Tree Weighting}, booktitle = {2013 Data Compression Conference, {DCC} 2013, Snowbird, UT, USA, March 20-22, 2013}, pages = {321--330}, publisher = {{IEEE}}, year = {2013}, url = {https://doi.org/10.1109/DCC.2013.40}, doi = {10.1109/DCC.2013.40}, timestamp = {Wed, 20 May 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/dcc/VenessWBG13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/DegrisWS12, author = {Thomas Degris and Martha White and Richard S. Sutton}, title = {Linear Off-Policy Actor-Critic}, booktitle = {Proceedings of the 29th International Conference on Machine Learning, {ICML} 2012, Edinburgh, Scotland, UK, June 26 - July 1, 2012}, publisher = {icml.cc / Omnipress}, year = {2012}, url = {http://icml.cc/2012/papers/268.pdf}, timestamp = {Wed, 03 Apr 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/DegrisWS12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/WhiteYZS12, author = {Martha White and Yaoliang Yu and Xinhua Zhang and Dale Schuurmans}, editor = {Peter L. Bartlett and Fernando C. N. Pereira and Christopher J. C. Burges and L{\'{e}}on Bottou and Kilian Q. Weinberger}, title = {Convex Multi-view Subspace Learning}, booktitle = {Advances in Neural Information Processing Systems 25: 26th Annual Conference on Neural Information Processing Systems 2012. Proceedings of a meeting held December 3-6, 2012, Lake Tahoe, Nevada, United States}, pages = {1682--1690}, year = {2012}, url = {https://proceedings.neurips.cc/paper/2012/hash/1141938ba2c2b13f5505d7c424ebae5f-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/WhiteYZS12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:journals/jmlr/WhiteS12, author = {Martha White and Dale Schuurmans}, editor = {Neil D. Lawrence and Mark A. Girolami}, title = {Generalized Optimal Reverse Prediction}, booktitle = {Proceedings of the Fifteenth International Conference on Artificial Intelligence and Statistics, {AISTATS} 2012, La Palma, Canary Islands, Spain, April 21-23, 2012}, series = {{JMLR} Proceedings}, volume = {22}, pages = {1305--1313}, publisher = {JMLR.org}, year = {2012}, url = {http://proceedings.mlr.press/v22/white12.html}, timestamp = {Wed, 29 May 2019 08:41:44 +0200}, biburl = {https://dblp.org/rec/journals/jmlr/WhiteS12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1205-4839, author = {Thomas Degris and Martha White and Richard S. Sutton}, title = {Off-Policy Actor-Critic}, journal = {CoRR}, volume = {abs/1205.4839}, year = {2012}, url = {http://arxiv.org/abs/1205.4839}, eprinttype = {arXiv}, eprint = {1205.4839}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1205-4839.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1211-0587, author = {Joel Veness and Martha White and Michael Bowling and Andr{\'{a}}s Gy{\"{o}}rgy}, title = {Partition Tree Weighting}, journal = {CoRR}, volume = {abs/1211.0587}, year = {2012}, url = {http://arxiv.org/abs/1211.0587}, eprinttype = {arXiv}, eprint = {1211.0587}, timestamp = {Wed, 20 May 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1211-0587.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/ZhangYWHS11, author = {Xinhua Zhang and Yaoliang Yu and Martha White and Ruitong Huang and Dale Schuurmans}, editor = {Wolfram Burgard and Dan Roth}, title = {Convex Sparse Coding, Subspace Learning, and Semi-Supervised Extensions}, booktitle = {Proceedings of the Twenty-Fifth {AAAI} Conference on Artificial Intelligence, {AAAI} 2011, San Francisco, California, USA, August 7-11, 2011}, pages = {567--573}, publisher = {{AAAI} Press}, year = {2011}, url = {https://doi.org/10.1609/aaai.v25i1.7935}, doi = {10.1609/AAAI.V25I1.7935}, timestamp = {Mon, 04 Sep 2023 16:05:54 +0200}, biburl = {https://dblp.org/rec/conf/aaai/ZhangYWHS11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/WhiteW10, author = {Martha White and Adam White}, editor = {John D. Lafferty and Christopher K. I. Williams and John Shawe{-}Taylor and Richard S. Zemel and Aron Culotta}, title = {Interval Estimation for Reinforcement-Learning Algorithms in Continuous-State Domains}, booktitle = {Advances in Neural Information Processing Systems 23: 24th Annual Conference on Neural Information Processing Systems 2010. Proceedings of a meeting held 6-9 December 2010, Vancouver, British Columbia, Canada}, pages = {2433--2441}, publisher = {Curran Associates, Inc.}, year = {2010}, url = {https://proceedings.neurips.cc/paper/2010/hash/13f3cf8c531952d72e5847c4183e6910-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/WhiteW10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/YuYXWS10, author = {Yaoliang Yu and Min Yang and Linli Xu and Martha White and Dale Schuurmans}, editor = {John D. Lafferty and Christopher K. I. Williams and John Shawe{-}Taylor and Richard S. Zemel and Aron Culotta}, title = {Relaxed Clipping: {A} Global Training Method for Robust Regression and Classification}, booktitle = {Advances in Neural Information Processing Systems 23: 24th Annual Conference on Neural Information Processing Systems 2010. Proceedings of a meeting held 6-9 December 2010, Vancouver, British Columbia, Canada}, pages = {2532--2540}, publisher = {Curran Associates, Inc.}, year = {2010}, url = {https://proceedings.neurips.cc/paper/2010/hash/01882513d5fa7c329e940dda99b12147-Abstract.html}, timestamp = {Thu, 21 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/YuYXWS10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/XuWS09, author = {Linli Xu and Martha White and Dale Schuurmans}, editor = {Andrea Pohoreckyj Danyluk and L{\'{e}}on Bottou and Michael L. Littman}, title = {Optimal reverse prediction: a unified perspective on supervised, unsupervised and semi-supervised learning}, booktitle = {Proceedings of the 26th Annual International Conference on Machine Learning, {ICML} 2009, Montreal, Quebec, Canada, June 14-18, 2009}, series = {{ACM} International Conference Proceeding Series}, volume = {382}, pages = {1137--1144}, publisher = {{ACM}}, year = {2009}, url = {https://doi.org/10.1145/1553374.1553519}, doi = {10.1145/1553374.1553519}, timestamp = {Tue, 06 Nov 2018 16:58:29 +0100}, biburl = {https://dblp.org/rec/conf/icml/XuWS09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/WhiteB09, author = {Martha White and Michael H. Bowling}, editor = {Craig Boutilier}, title = {Learning a Value Analysis Tool for Agent Evaluation}, booktitle = {{IJCAI} 2009, Proceedings of the 21st International Joint Conference on Artificial Intelligence, Pasadena, California, USA, July 11-17, 2009}, pages = {1976--1981}, year = {2009}, url = {http://ijcai.org/Proceedings/09/Papers/326.pdf}, timestamp = {Tue, 20 Aug 2019 16:16:40 +0200}, biburl = {https://dblp.org/rec/conf/ijcai/WhiteB09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
manage site settings
To protect your privacy, all features that rely on external API calls from your browser are turned off by default. You need to opt-in for them to become active. All settings here will be stored as cookies with your web browser. For more information see our F.A.Q.