BibTeX record journals/corr/abs-1907-00456

download as .bib file

@article{DBLP:journals/corr/abs-1907-00456,
  author       = {Natasha Jaques and
                  Asma Ghandeharioun and
                  Judy Hanwen Shen and
                  Craig Ferguson and
                  {\`{A}}gata Lapedriza and
                  Noah Jones and
                  Shixiang Gu and
                  Rosalind W. Picard},
  title        = {Way Off-Policy Batch Deep Reinforcement Learning of Implicit Human
                  Preferences in Dialog},
  journal      = {CoRR},
  volume       = {abs/1907.00456},
  year         = {2019},
  url          = {http://arxiv.org/abs/1907.00456},
  eprinttype    = {arXiv},
  eprint       = {1907.00456},
  timestamp    = {Thu, 14 Oct 2021 09:17:05 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1907-00456.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
a service of  Schloss Dagstuhl - Leibniz Center for Informatics