BibTeX record journals/corr/abs-2401-00243

download as .bib file

@article{DBLP:journals/corr/abs-2401-00243,
  author       = {Yuanzhao Zhai and
                  Han Zhang and
                  Yu Lei and
                  Yue Yu and
                  Kele Xu and
                  Dawei Feng and
                  Bo Ding and
                  Huaimin Wang},
  title        = {Uncertainty-Penalized Reinforcement Learning from Human Feedback with
                  Diverse Reward LoRA Ensembles},
  journal      = {CoRR},
  volume       = {abs/2401.00243},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2401.00243},
  doi          = {10.48550/ARXIV.2401.00243},
  eprinttype    = {arXiv},
  eprint       = {2401.00243},
  timestamp    = {Mon, 15 Jan 2024 16:37:16 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2401-00243.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}