BibTeX record journals/corr/abs-2403-05171

download as .bib file

@article{DBLP:journals/corr/abs-2403-05171,
  author       = {Xiaoying Zhang and
                  Jean{-}Francois Ton and
                  Wei Shen and
                  Hongning Wang and
                  Yang Liu},
  title        = {Overcoming Reward Overoptimization via Adversarial Policy Optimization
                  with Lightweight Uncertainty Estimation},
  journal      = {CoRR},
  volume       = {abs/2403.05171},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2403.05171},
  doi          = {10.48550/ARXIV.2403.05171},
  eprinttype    = {arXiv},
  eprint       = {2403.05171},
  timestamp    = {Tue, 13 Aug 2024 08:01:17 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2403-05171.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
a service of  Schloss Dagstuhl - Leibniz Center for Informatics