BibTeX record journals/corr/abs-2312-11456

download as .bib file

@article{DBLP:journals/corr/abs-2312-11456,
  author       = {Wei Xiong and
                  Hanze Dong and
                  Chenlu Ye and
                  Han Zhong and
                  Nan Jiang and
                  Tong Zhang},
  title        = {Gibbs Sampling from Human Feedback: {A} Provable {KL-} constrained
                  Framework for {RLHF}},
  journal      = {CoRR},
  volume       = {abs/2312.11456},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2312.11456},
  doi          = {10.48550/ARXIV.2312.11456},
  eprinttype    = {arXiv},
  eprint       = {2312.11456},
  timestamp    = {Tue, 16 Jan 2024 11:57:42 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2312-11456.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
a service of  Schloss Dagstuhl - Leibniz Center for Informatics