BibTeX records: Wei-Ning Hsu

download as .bib file

@article{DBLP:journals/corr/abs-2403-14402,
  author       = {HyoJung Han and
                  Mohamed Anwar and
                  Juan Pino and
                  Wei{-}Ning Hsu and
                  Marine Carpuat and
                  Bowen Shi and
                  Changhan Wang},
  title        = {{XLAVS-R:} Cross-Lingual Audio-Visual Speech Representation Learning
                  for Noise-Robust Speech Perception},
  journal      = {CoRR},
  volume       = {abs/2403.14402},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2403.14402},
  doi          = {10.48550/ARXIV.2403.14402},
  eprinttype    = {arXiv},
  eprint       = {2403.14402},
  timestamp    = {Tue, 09 Apr 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2403-14402.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/acl/ChenTYDKCTDSGIP23,
  author       = {Peng{-}Jen Chen and
                  Kevin Tran and
                  Yilin Yang and
                  Jingfei Du and
                  Justine Kao and
                  Yu{-}An Chung and
                  Paden Tomasello and
                  Paul{-}Ambroise Duquenne and
                  Holger Schwenk and
                  Hongyu Gong and
                  Hirofumi Inaguma and
                  Sravya Popuri and
                  Changhan Wang and
                  Juan Pino and
                  Wei{-}Ning Hsu and
                  Ann Lee},
  editor       = {Anna Rogers and
                  Jordan L. Boyd{-}Graber and
                  Naoaki Okazaki},
  title        = {Speech-to-Speech Translation for a Real-world Unwritten Language},
  booktitle    = {Findings of the Association for Computational Linguistics: {ACL} 2023,
                  Toronto, Canada, July 9-14, 2023},
  pages        = {4969--4983},
  publisher    = {Association for Computational Linguistics},
  year         = {2023},
  url          = {https://doi.org/10.18653/v1/2023.findings-acl.307},
  doi          = {10.18653/V1/2023.FINDINGS-ACL.307},
  timestamp    = {Thu, 10 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/acl/ChenTYDKCTDSGIP23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/acl/WangICK0HA023,
  author       = {Changhan Wang and
                  Hirofumi Inaguma and
                  Peng{-}Jen Chen and
                  Ilia Kulikov and
                  Yun Tang and
                  Wei{-}Ning Hsu and
                  Michael Auli and
                  Juan Pino},
  editor       = {Anna Rogers and
                  Jordan L. Boyd{-}Graber and
                  Naoaki Okazaki},
  title        = {Simple and Effective Unsupervised Speech Translation},
  booktitle    = {Proceedings of the 61st Annual Meeting of the Association for Computational
                  Linguistics (Volume 1: Long Papers), {ACL} 2023, Toronto, Canada,
                  July 9-14, 2023},
  pages        = {10771--10784},
  publisher    = {Association for Computational Linguistics},
  year         = {2023},
  url          = {https://doi.org/10.18653/v1/2023.acl-long.602},
  doi          = {10.18653/V1/2023.ACL-LONG.602},
  timestamp    = {Thu, 10 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/acl/WangICK0HA023.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/asru/LianBHA23,
  author       = {Jiachen Lian and
                  Alexei Baevski and
                  Wei{-}Ning Hsu and
                  Michael Auli},
  title        = {Av-Data2Vec: Self-Supervised Learning of Audio-Visual Speech Representations
                  with Contextualized Target Representations},
  booktitle    = {{IEEE} Automatic Speech Recognition and Understanding Workshop, {ASRU}
                  2023, Taipei, Taiwan, December 16-20, 2023},
  pages        = {1--8},
  publisher    = {{IEEE}},
  year         = {2023},
  url          = {https://doi.org/10.1109/ASRU57964.2023.10389642},
  doi          = {10.1109/ASRU57964.2023.10389642},
  timestamp    = {Tue, 13 Feb 2024 21:21:14 +0100},
  biburl       = {https://dblp.org/rec/conf/asru/LianBHA23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cvpr/HsuRSDA23,
  author       = {Wei{-}Ning Hsu and
                  Tal Remez and
                  Bowen Shi and
                  Jacob Donley and
                  Yossi Adi},
  title        = {ReVISE: Self-Supervised Speech Resynthesis with Visual Input for Universal
                  and Generalized Speech Regeneration},
  booktitle    = {{IEEE/CVF} Conference on Computer Vision and Pattern Recognition,
                  {CVPR} 2023, Vancouver, BC, Canada, June 17-24, 2023},
  pages        = {18796--18806},
  publisher    = {{IEEE}},
  year         = {2023},
  url          = {https://doi.org/10.1109/CVPR52729.2023.01802},
  doi          = {10.1109/CVPR52729.2023.01802},
  timestamp    = {Tue, 29 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/cvpr/HsuRSDA23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/emnlp/ChouCHLBCBA23,
  author       = {Ju{-}Chieh Chou and
                  Chung{-}Ming Chien and
                  Wei{-}Ning Hsu and
                  Karen Livescu and
                  Arun Babu and
                  Alexis Conneau and
                  Alexei Baevski and
                  Michael Auli},
  editor       = {Houda Bouamor and
                  Juan Pino and
                  Kalika Bali},
  title        = {Toward Joint Language Modeling for Speech Units and Text},
  booktitle    = {Findings of the Association for Computational Linguistics: {EMNLP}
                  2023, Singapore, December 6-10, 2023},
  pages        = {6582--6593},
  publisher    = {Association for Computational Linguistics},
  year         = {2023},
  url          = {https://doi.org/10.18653/v1/2023.findings-emnlp.438},
  doi          = {10.18653/V1/2023.FINDINGS-EMNLP.438},
  timestamp    = {Fri, 12 Apr 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/emnlp/ChouCHLBCBA23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/DiwanYHTCHM23,
  author       = {Anuj Diwan and
                  Ching{-}Feng Yeh and
                  Wei{-}Ning Hsu and
                  Paden Tomasello and
                  Eunsol Choi and
                  David Harwath and
                  Abdelrahman Mohamed},
  title        = {Continual Learning for On-Device Speech Recognition Using Disentangled
                  Conformers},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing
                  {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
  pages        = {1--5},
  publisher    = {{IEEE}},
  year         = {2023},
  url          = {https://doi.org/10.1109/ICASSP49357.2023.10095484},
  doi          = {10.1109/ICASSP49357.2023.10095484},
  timestamp    = {Sun, 05 Nov 2023 16:51:21 +0100},
  biburl       = {https://dblp.org/rec/conf/icassp/DiwanYHTCHM23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/ElkahkyHTNAACDM23,
  author       = {Ali Elkahky and
                  Wei{-}Ning Hsu and
                  Paden Tomasello and
                  Tu Anh Nguyen and
                  Robin Algayres and
                  Yossi Adi and
                  Jade Copet and
                  Emmanuel Dupoux and
                  Abdelrahman Mohamed},
  title        = {Do Coarser Units Benefit Cluster Prediction-Based Speech Pre-Training?},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing
                  {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
  pages        = {1--5},
  publisher    = {{IEEE}},
  year         = {2023},
  url          = {https://doi.org/10.1109/ICASSP49357.2023.10096788},
  doi          = {10.1109/ICASSP49357.2023.10096788},
  timestamp    = {Sun, 05 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icassp/ElkahkyHTNAACDM23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/FazelZarandiH23,
  author       = {Maryam Fazel{-}Zarandi and
                  Wei{-}Ning Hsu},
  title        = {Cocktail Hubert: Generalized Self-Supervised Pre-Training for Mixture
                  and Single-Source Speech},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing
                  {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
  pages        = {1--5},
  publisher    = {{IEEE}},
  year         = {2023},
  url          = {https://doi.org/10.1109/ICASSP49357.2023.10096630},
  doi          = {10.1109/ICASSP49357.2023.10096630},
  timestamp    = {Sun, 05 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icassp/FazelZarandiH23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/SanabriaHBA23,
  author       = {Ramon Sanabria and
                  Wei{-}Ning Hsu and
                  Alexei Baevski and
                  Michael Auli},
  title        = {Measuring the Impact of Domain Factors in Self-Supervised Pre-Training},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech, and Signal Processing,
                  {ICASSP} 2023 - Workshops, Rhodes Island, Greece, June 4-10, 2023},
  pages        = {1--5},
  publisher    = {{IEEE}},
  year         = {2023},
  url          = {https://doi.org/10.1109/ICASSPW59220.2023.10193184},
  doi          = {10.1109/ICASSPW59220.2023.10193184},
  timestamp    = {Mon, 07 Aug 2023 15:56:26 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/SanabriaHBA23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/AghajanyanYCHHZ23,
  author       = {Armen Aghajanyan and
                  Lili Yu and
                  Alexis Conneau and
                  Wei{-}Ning Hsu and
                  Karen Hambardzumyan and
                  Susan Zhang and
                  Stephen Roller and
                  Naman Goyal and
                  Omer Levy and
                  Luke Zettlemoyer},
  editor       = {Andreas Krause and
                  Emma Brunskill and
                  Kyunghyun Cho and
                  Barbara Engelhardt and
                  Sivan Sabato and
                  Jonathan Scarlett},
  title        = {Scaling Laws for Generative Mixed-Modal Language Models},
  booktitle    = {International Conference on Machine Learning, {ICML} 2023, 23-29 July
                  2023, Honolulu, Hawaii, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {202},
  pages        = {265--279},
  publisher    = {{PMLR}},
  year         = {2023},
  url          = {https://proceedings.mlr.press/v202/aghajanyan23a.html},
  timestamp    = {Mon, 28 Aug 2023 17:23:08 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/AghajanyanYCHHZ23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/BaevskiBHA23,
  author       = {Alexei Baevski and
                  Arun Babu and
                  Wei{-}Ning Hsu and
                  Michael Auli},
  editor       = {Andreas Krause and
                  Emma Brunskill and
                  Kyunghyun Cho and
                  Barbara Engelhardt and
                  Sivan Sabato and
                  Jonathan Scarlett},
  title        = {Efficient Self-supervised Learning with Contextualized Target Representations
                  for Vision, Speech and Language},
  booktitle    = {International Conference on Machine Learning, {ICML} 2023, 23-29 July
                  2023, Honolulu, Hawaii, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {202},
  pages        = {1416--1429},
  publisher    = {{PMLR}},
  year         = {2023},
  url          = {https://proceedings.mlr.press/v202/baevski23a.html},
  timestamp    = {Mon, 28 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/BaevskiBHA23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/LeVSKSMWMAMH23,
  author       = {Matthew Le and
                  Apoorv Vyas and
                  Bowen Shi and
                  Brian Karrer and
                  Leda Sari and
                  Rashel Moritz and
                  Mary Williamson and
                  Vimal Manohar and
                  Yossi Adi and
                  Jay Mahadeokar and
                  Wei{-}Ning Hsu},
  editor       = {Alice Oh and
                  Tristan Naumann and
                  Amir Globerson and
                  Kate Saenko and
                  Moritz Hardt and
                  Sergey Levine},
  title        = {Voicebox: Text-Guided Multilingual Universal Speech Generation at
                  Scale},
  booktitle    = {Advances in Neural Information Processing Systems 36: Annual Conference
                  on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans,
                  LA, USA, December 10 - 16, 2023},
  year         = {2023},
  url          = {http://papers.nips.cc/paper\_files/paper/2023/hash/2d8911db9ecedf866015091b28946e15-Abstract-Conference.html},
  timestamp    = {Fri, 01 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/LeVSKSMWMAMH23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/LiuCAHG23,
  author       = {Alexander H. Liu and
                  Heng{-}Jui Chang and
                  Michael Auli and
                  Wei{-}Ning Hsu and
                  Jim Glass},
  editor       = {Alice Oh and
                  Tristan Naumann and
                  Amir Globerson and
                  Kate Saenko and
                  Moritz Hardt and
                  Sergey Levine},
  title        = {DinoSR: Self-Distillation and Online Clustering for Self-supervised
                  Speech Representation Learning},
  booktitle    = {Advances in Neural Information Processing Systems 36: Annual Conference
                  on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans,
                  LA, USA, December 10 - 16, 2023},
  year         = {2023},
  url          = {http://papers.nips.cc/paper\_files/paper/2023/hash/b6404bf461c3c3186bdf5f55756af908-Abstract-Conference.html},
  timestamp    = {Fri, 01 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/LiuCAHG23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2301-00652,
  author       = {Ching{-}Feng Yeh and
                  Wei{-}Ning Hsu and
                  Paden Tomasello and
                  Abdelrahman Mohamed},
  title        = {Efficient Speech Representation Learning with Low-Bit Quantization},
  journal      = {CoRR},
  volume       = {abs/2301.00652},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2301.00652},
  doi          = {10.48550/ARXIV.2301.00652},
  eprinttype    = {arXiv},
  eprint       = {2301.00652},
  timestamp    = {Tue, 10 Jan 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2301-00652.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2301-03728,
  author       = {Armen Aghajanyan and
                  Lili Yu and
                  Alexis Conneau and
                  Wei{-}Ning Hsu and
                  Karen Hambardzumyan and
                  Susan Zhang and
                  Stephen Roller and
                  Naman Goyal and
                  Omer Levy and
                  Luke Zettlemoyer},
  title        = {Scaling Laws for Generative Mixed-Modal Language Models},
  journal      = {CoRR},
  volume       = {abs/2301.03728},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2301.03728},
  doi          = {10.48550/ARXIV.2301.03728},
  eprinttype    = {arXiv},
  eprint       = {2301.03728},
  timestamp    = {Thu, 19 Jan 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2301-03728.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2302-06419,
  author       = {Jiachen Lian and
                  Alexei Baevski and
                  Wei{-}Ning Hsu and
                  Michael Auli},
  title        = {AV-data2vec: Self-supervised Learning of Audio-Visual Speech Representations
                  with Contextualized Target Representations},
  journal      = {CoRR},
  volume       = {abs/2302.06419},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2302.06419},
  doi          = {10.48550/ARXIV.2302.06419},
  eprinttype    = {arXiv},
  eprint       = {2302.06419},
  timestamp    = {Mon, 20 Feb 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2302-06419.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2303-00628,
  author       = {Mohamed Anwar and
                  Bowen Shi and
                  Vedanuj Goswami and
                  Wei{-}Ning Hsu and
                  Juan Pino and
                  Changhan Wang},
  title        = {MuAViC: {A} Multilingual Audio-Visual Corpus for Robust Speech Recognition
                  and Robust Speech-to-Text Translation},
  journal      = {CoRR},
  volume       = {abs/2303.00628},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2303.00628},
  doi          = {10.48550/ARXIV.2303.00628},
  eprinttype    = {arXiv},
  eprint       = {2303.00628},
  timestamp    = {Wed, 19 Apr 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2303-00628.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2303-11131,
  author       = {Maryam Fazel{-}Zarandi and
                  Wei{-}Ning Hsu},
  title        = {Cocktail HuBERT: Generalized Self-Supervised Pre-training for Mixture
                  and Single-Source Speech},
  journal      = {CoRR},
  volume       = {abs/2303.11131},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2303.11131},
  doi          = {10.48550/ARXIV.2303.11131},
  eprinttype    = {arXiv},
  eprint       = {2303.11131},
  timestamp    = {Wed, 22 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2303-11131.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2305-10005,
  author       = {Alexander H. Liu and
                  Heng{-}Jui Chang and
                  Michael Auli and
                  Wei{-}Ning Hsu and
                  James R. Glass},
  title        = {DinoSR: Self-Distillation and Online Clustering for Self-supervised
                  Speech Representation Learning},
  journal      = {CoRR},
  volume       = {abs/2305.10005},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2305.10005},
  doi          = {10.48550/ARXIV.2305.10005},
  eprinttype    = {arXiv},
  eprint       = {2305.10005},
  timestamp    = {Wed, 24 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2305-10005.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2305-13516,
  author       = {Vineel Pratap and
                  Andros Tjandra and
                  Bowen Shi and
                  Paden Tomasello and
                  Arun Babu and
                  Sayani Kundu and
                  Ali Elkahky and
                  Zhaoheng Ni and
                  Apoorv Vyas and
                  Maryam Fazel{-}Zarandi and
                  Alexei Baevski and
                  Yossi Adi and
                  Xiaohui Zhang and
                  Wei{-}Ning Hsu and
                  Alexis Conneau and
                  Michael Auli},
  title        = {Scaling Speech Technology to 1, 000+ Languages},
  journal      = {CoRR},
  volume       = {abs/2305.13516},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2305.13516},
  doi          = {10.48550/ARXIV.2305.13516},
  eprinttype    = {arXiv},
  eprint       = {2305.13516},
  timestamp    = {Thu, 04 Apr 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2305-13516.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2306-15687,
  author       = {Matthew Le and
                  Apoorv Vyas and
                  Bowen Shi and
                  Brian Karrer and
                  Leda Sari and
                  Rashel Moritz and
                  Mary Williamson and
                  Vimal Manohar and
                  Yossi Adi and
                  Jay Mahadeokar and
                  Wei{-}Ning Hsu},
  title        = {Voicebox: Text-Guided Multilingual Universal Speech Generation at
                  Scale},
  journal      = {CoRR},
  volume       = {abs/2306.15687},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2306.15687},
  doi          = {10.48550/ARXIV.2306.15687},
  eprinttype    = {arXiv},
  eprint       = {2306.15687},
  timestamp    = {Mon, 03 Jul 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2306-15687.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2308-05725,
  author       = {Tu Anh Nguyen and
                  Wei{-}Ning Hsu and
                  Antony D'Avirro and
                  Bowen Shi and
                  Itai Gat and
                  Maryam Fazel{-}Zarandi and
                  Tal Remez and
                  Jade Copet and
                  Gabriel Synnaeve and
                  Michael Hassid and
                  Felix Kreuk and
                  Yossi Adi and
                  Emmanuel Dupoux},
  title        = {{EXPRESSO:} {A} Benchmark and Analysis of Discrete Expressive Speech
                  Resynthesis},
  journal      = {CoRR},
  volume       = {abs/2308.05725},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2308.05725},
  doi          = {10.48550/ARXIV.2308.05725},
  eprinttype    = {arXiv},
  eprint       = {2308.05725},
  timestamp    = {Wed, 23 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2308-05725.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2309-17020,
  author       = {Po{-}Chun Hsu and
                  Ali Elkahky and
                  Wei{-}Ning Hsu and
                  Yossi Adi and
                  Tu Anh Nguyen and
                  Jade Copet and
                  Emmanuel Dupoux and
                  Hung{-}yi Lee and
                  Abdelrahman Mohamed},
  title        = {Low-Resource Self-Supervised Learning with SSL-Enhanced {TTS}},
  journal      = {CoRR},
  volume       = {abs/2309.17020},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2309.17020},
  doi          = {10.48550/ARXIV.2309.17020},
  eprinttype    = {arXiv},
  eprint       = {2309.17020},
  timestamp    = {Tue, 17 Oct 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2309-17020.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2310-08715,
  author       = {Ju{-}Chieh Chou and
                  Chung{-}Ming Chien and
                  Wei{-}Ning Hsu and
                  Karen Livescu and
                  Arun Babu and
                  Alexis Conneau and
                  Alexei Baevski and
                  Michael Auli},
  title        = {Toward Joint Language Modeling for Speech Units and Text},
  journal      = {CoRR},
  volume       = {abs/2310.08715},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2310.08715},
  doi          = {10.48550/ARXIV.2310.08715},
  eprinttype    = {arXiv},
  eprint       = {2310.08715},
  timestamp    = {Wed, 25 Oct 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2310-08715.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2310-16338,
  author       = {Alexander H. Liu and
                  Matt Le and
                  Apoorv Vyas and
                  Bowen Shi and
                  Andros Tjandra and
                  Wei{-}Ning Hsu},
  title        = {Generative Pre-training for Speech with Flow Matching},
  journal      = {CoRR},
  volume       = {abs/2310.16338},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2310.16338},
  doi          = {10.48550/ARXIV.2310.16338},
  eprinttype    = {arXiv},
  eprint       = {2310.16338},
  timestamp    = {Tue, 31 Oct 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2310-16338.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2311-02772,
  author       = {Sungho Jeon and
                  Ching{-}Feng Yeh and
                  Hakan Inan and
                  Wei{-}Ning Hsu and
                  Rashi Rungta and
                  Yashar Mehdad and
                  Daniel Bikel},
  title        = {Attention or Convolution: Transformer Encoders in Audio Language Models
                  for Inference Efficiency},
  journal      = {CoRR},
  volume       = {abs/2311.02772},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2311.02772},
  doi          = {10.48550/ARXIV.2311.02772},
  eprinttype    = {arXiv},
  eprint       = {2311.02772},
  timestamp    = {Wed, 08 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2311-02772.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2312-15821,
  author       = {Apoorv Vyas and
                  Bowen Shi and
                  Matthew Le and
                  Andros Tjandra and
                  Yi{-}Chiao Wu and
                  Baishan Guo and
                  Jiemin Zhang and
                  Xinyue Zhang and
                  Robert Adkins and
                  William Ngan and
                  Jeff Wang and
                  Ivan Cruz and
                  Bapi Akula and
                  Akinniyi Akinyemi and
                  Brian Ellis and
                  Rashel Moritz and
                  Yael Yungster and
                  Alice Rakotoarison and
                  Liang Tan and
                  Chris Summers and
                  Carleigh Wood and
                  Joshua Lane and
                  Mary Williamson and
                  Wei{-}Ning Hsu},
  title        = {Audiobox: Unified Audio Generation with Natural Language Prompts},
  journal      = {CoRR},
  volume       = {abs/2312.15821},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2312.15821},
  doi          = {10.48550/ARXIV.2312.15821},
  eprinttype    = {arXiv},
  eprint       = {2312.15821},
  timestamp    = {Tue, 16 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2312-15821.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/acl/TangGDWHGBLMAP22,
  author       = {Yun Tang and
                  Hongyu Gong and
                  Ning Dong and
                  Changhan Wang and
                  Wei{-}Ning Hsu and
                  Jiatao Gu and
                  Alexei Baevski and
                  Xian Li and
                  Abdelrahman Mohamed and
                  Michael Auli and
                  Juan Miguel Pino},
  editor       = {Smaranda Muresan and
                  Preslav Nakov and
                  Aline Villavicencio},
  title        = {Unified Speech-Text Pre-training for Speech Translation and Recognition},
  booktitle    = {Proceedings of the 60th Annual Meeting of the Association for Computational
                  Linguistics (Volume 1: Long Papers), {ACL} 2022, Dublin, Ireland,
                  May 22-27, 2022},
  pages        = {1488--1499},
  publisher    = {Association for Computational Linguistics},
  year         = {2022},
  url          = {https://doi.org/10.18653/v1/2022.acl-long.105},
  doi          = {10.18653/V1/2022.ACL-LONG.105},
  timestamp    = {Tue, 27 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/acl/TangGDWHGBLMAP22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/acl/LeeCWGPMPAHTPH22,
  author       = {Ann Lee and
                  Peng{-}Jen Chen and
                  Changhan Wang and
                  Jiatao Gu and
                  Sravya Popuri and
                  Xutai Ma and
                  Adam Polyak and
                  Yossi Adi and
                  Qing He and
                  Yun Tang and
                  Juan Pino and
                  Wei{-}Ning Hsu},
  editor       = {Smaranda Muresan and
                  Preslav Nakov and
                  Aline Villavicencio},
  title        = {Direct Speech-to-Speech Translation With Discrete Units},
  booktitle    = {Proceedings of the 60th Annual Meeting of the Association for Computational
                  Linguistics (Volume 1: Long Papers), {ACL} 2022, Dublin, Ireland,
                  May 22-27, 2022},
  pages        = {3327--3339},
  publisher    = {Association for Computational Linguistics},
  year         = {2022},
  url          = {https://doi.org/10.18653/v1/2022.acl-long.235},
  doi          = {10.18653/V1/2022.ACL-LONG.235},
  timestamp    = {Tue, 27 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/acl/LeeCWGPMPAHTPH22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/acl/KharitonovLPACL22,
  author       = {Eugene Kharitonov and
                  Ann Lee and
                  Adam Polyak and
                  Yossi Adi and
                  Jade Copet and
                  Kushal Lakhotia and
                  Tu Anh Nguyen and
                  Morgane Rivi{\`{e}}re and
                  Abdelrahman Mohamed and
                  Emmanuel Dupoux and
                  Wei{-}Ning Hsu},
  editor       = {Smaranda Muresan and
                  Preslav Nakov and
                  Aline Villavicencio},
  title        = {Text-Free Prosody-Aware Generative Spoken Language Modeling},
  booktitle    = {Proceedings of the 60th Annual Meeting of the Association for Computational
                  Linguistics (Volume 1: Long Papers), {ACL} 2022, Dublin, Ireland,
                  May 22-27, 2022},
  pages        = {8666--8681},
  publisher    = {Association for Computational Linguistics},
  year         = {2022},
  url          = {https://doi.org/10.18653/v1/2022.acl-long.593},
  doi          = {10.18653/V1/2022.ACL-LONG.593},
  timestamp    = {Thu, 06 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/acl/KharitonovLPACL22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/emnlp/KreukPCKNRHMDA22,
  author       = {Felix Kreuk and
                  Adam Polyak and
                  Jade Copet and
                  Eugene Kharitonov and
                  Tu Anh Nguyen and
                  Morgane Rivi{\`{e}}re and
                  Wei{-}Ning Hsu and
                  Abdelrahman Mohamed and
                  Emmanuel Dupoux and
                  Yossi Adi},
  editor       = {Yoav Goldberg and
                  Zornitsa Kozareva and
                  Yue Zhang},
  title        = {Textless Speech Emotion Conversion using Discrete {\&} Decomposed
                  Representations},
  booktitle    = {Proceedings of the 2022 Conference on Empirical Methods in Natural
                  Language Processing, {EMNLP} 2022, Abu Dhabi, United Arab Emirates,
                  December 7-11, 2022},
  pages        = {11200--11214},
  publisher    = {Association for Computational Linguistics},
  year         = {2022},
  url          = {https://doi.org/10.18653/v1/2022.emnlp-main.769},
  doi          = {10.18653/V1/2022.EMNLP-MAIN.769},
  timestamp    = {Thu, 10 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/emnlp/KreukPCKNRHMDA22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/ShiHLM22,
  author       = {Bowen Shi and
                  Wei{-}Ning Hsu and
                  Kushal Lakhotia and
                  Abdelrahman Mohamed},
  title        = {Learning Audio-Visual Speech Representation by Masked Multimodal Cluster
                  Prediction},
  booktitle    = {The Tenth International Conference on Learning Representations, {ICLR}
                  2022, Virtual Event, April 25-29, 2022},
  publisher    = {OpenReview.net},
  year         = {2022},
  url          = {https://openreview.net/forum?id=Z1Qlm11uOM},
  timestamp    = {Sat, 20 Aug 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/ShiHLM22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/BaevskiHXBGA22,
  author       = {Alexei Baevski and
                  Wei{-}Ning Hsu and
                  Qiantong Xu and
                  Arun Babu and
                  Jiatao Gu and
                  Michael Auli},
  editor       = {Kamalika Chaudhuri and
                  Stefanie Jegelka and
                  Le Song and
                  Csaba Szepesv{\'{a}}ri and
                  Gang Niu and
                  Sivan Sabato},
  title        = {data2vec: {A} General Framework for Self-supervised Learning in Speech,
                  Vision and Language},
  booktitle    = {International Conference on Machine Learning, {ICML} 2022, 17-23 July
                  2022, Baltimore, Maryland, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {162},
  pages        = {1298--1312},
  publisher    = {{PMLR}},
  year         = {2022},
  url          = {https://proceedings.mlr.press/v162/baevski22a.html},
  timestamp    = {Tue, 12 Jul 2022 17:36:52 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/BaevskiHXBGA22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/LiuLHABG22,
  author       = {Alexander H. Liu and
                  Cheng{-}I Lai and
                  Wei{-}Ning Hsu and
                  Michael Auli and
                  Alexei Baevski and
                  James R. Glass},
  editor       = {Hanseok Ko and
                  John H. L. Hansen},
  title        = {Simple and Effective Unsupervised Speech Synthesis},
  booktitle    = {Interspeech 2022, 23rd Annual Conference of the International Speech
                  Communication Association, Incheon, Korea, 18-22 September 2022},
  pages        = {843--847},
  publisher    = {{ISCA}},
  year         = {2022},
  url          = {https://doi.org/10.21437/Interspeech.2022-11071},
  doi          = {10.21437/INTERSPEECH.2022-11071},
  timestamp    = {Wed, 21 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/LiuLHABG22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/ShiHM22,
  author       = {Bowen Shi and
                  Wei{-}Ning Hsu and
                  Abdelrahman Mohamed},
  editor       = {Hanseok Ko and
                  John H. L. Hansen},
  title        = {Robust Self-Supervised Audio-Visual Speech Recognition},
  booktitle    = {Interspeech 2022, 23rd Annual Conference of the International Speech
                  Communication Association, Incheon, Korea, 18-22 September 2022},
  pages        = {2118--2122},
  publisher    = {{ISCA}},
  year         = {2022},
  url          = {https://doi.org/10.21437/Interspeech.2022-99},
  doi          = {10.21437/INTERSPEECH.2022-99},
  timestamp    = {Wed, 21 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/ShiHM22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/VyasHAB22,
  author       = {Apoorv Vyas and
                  Wei{-}Ning Hsu and
                  Michael Auli and
                  Alexei Baevski},
  editor       = {Hanseok Ko and
                  John H. L. Hansen},
  title        = {On-demand compute reduction with stochastic wav2vec 2.0},
  booktitle    = {Interspeech 2022, 23rd Annual Conference of the International Speech
                  Communication Association, Incheon, Korea, 18-22 September 2022},
  pages        = {3048--3052},
  publisher    = {{ISCA}},
  year         = {2022},
  url          = {https://doi.org/10.21437/Interspeech.2022-10584},
  doi          = {10.21437/INTERSPEECH.2022-10584},
  timestamp    = {Wed, 21 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/VyasHAB22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/ShiMH22,
  author       = {Bowen Shi and
                  Abdelrahman Mohamed and
                  Wei{-}Ning Hsu},
  editor       = {Hanseok Ko and
                  John H. L. Hansen},
  title        = {Learning Lip-Based Audio-Visual Speaker Embeddings with AV-HuBERT},
  booktitle    = {Interspeech 2022, 23rd Annual Conference of the International Speech
                  Communication Association, Incheon, Korea, 18-22 September 2022},
  pages        = {4785--4789},
  publisher    = {{ISCA}},
  year         = {2022},
  url          = {https://doi.org/10.21437/Interspeech.2022-885},
  doi          = {10.21437/INTERSPEECH.2022-885},
  timestamp    = {Wed, 21 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/ShiMH22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/PopuriCWPAGHL22,
  author       = {Sravya Popuri and
                  Peng{-}Jen Chen and
                  Changhan Wang and
                  Juan Pino and
                  Yossi Adi and
                  Jiatao Gu and
                  Wei{-}Ning Hsu and
                  Ann Lee},
  editor       = {Hanseok Ko and
                  John H. L. Hansen},
  title        = {Enhanced Direct Speech-to-Speech Translation Using Self-supervised
                  Pre-training and Data Augmentation},
  booktitle    = {Interspeech 2022, 23rd Annual Conference of the International Speech
                  Communication Association, Incheon, Korea, 18-22 September 2022},
  pages        = {5195--5199},
  publisher    = {{ISCA}},
  year         = {2022},
  url          = {https://doi.org/10.21437/Interspeech.2022-11032},
  doi          = {10.21437/INTERSPEECH.2022-11032},
  timestamp    = {Wed, 21 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/PopuriCWPAGHL22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/naacl/LeeGDSCWPAPGH22,
  author       = {Ann Lee and
                  Hongyu Gong and
                  Paul{-}Ambroise Duquenne and
                  Holger Schwenk and
                  Peng{-}Jen Chen and
                  Changhan Wang and
                  Sravya Popuri and
                  Yossi Adi and
                  Juan Miguel Pino and
                  Jiatao Gu and
                  Wei{-}Ning Hsu},
  editor       = {Marine Carpuat and
                  Marie{-}Catherine de Marneffe and
                  Iv{\'{a}}n Vladimir Meza Ru{\'{\i}}z},
  title        = {Textless Speech-to-Speech Translation on Real Data},
  booktitle    = {Proceedings of the 2022 Conference of the North American Chapter of
                  the Association for Computational Linguistics: Human Language Technologies,
                  {NAACL} 2022, Seattle, WA, United States, July 10-15, 2022},
  pages        = {860--872},
  publisher    = {Association for Computational Linguistics},
  year         = {2022},
  url          = {https://doi.org/10.18653/v1/2022.naacl-main.63},
  doi          = {10.18653/V1/2022.NAACL-MAIN.63},
  timestamp    = {Tue, 31 Jan 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/naacl/LeeGDSCWPAPGH22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/HsuS22,
  author       = {Wei{-}Ning Hsu and
                  Bowen Shi},
  editor       = {Sanmi Koyejo and
                  S. Mohamed and
                  A. Agarwal and
                  Danielle Belgrave and
                  K. Cho and
                  A. Oh},
  title        = {u-HuBERT: Unified Mixed-Modal Speech Pretraining And Zero-Shot Transfer
                  to Unlabeled Modality},
  booktitle    = {Advances in Neural Information Processing Systems 35: Annual Conference
                  on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans,
                  LA, USA, November 28 - December 9, 2022},
  year         = {2022},
  url          = {http://papers.nips.cc/paper\_files/paper/2022/hash/853e781cb2af58956ed5c89aa59da3fc-Abstract-Conference.html},
  timestamp    = {Mon, 08 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/HsuS22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/LiuHAB22,
  author       = {Alexander H. Liu and
                  Wei{-}Ning Hsu and
                  Michael Auli and
                  Alexei Baevski},
  title        = {Towards End-to-End Unsupervised Speech Recognition},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2022, Doha, Qatar,
                  January 9-12, 2023},
  pages        = {221--228},
  publisher    = {{IEEE}},
  year         = {2022},
  url          = {https://doi.org/10.1109/SLT54892.2023.10023187},
  doi          = {10.1109/SLT54892.2023.10023187},
  timestamp    = {Mon, 06 Feb 2023 22:19:30 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/LiuHAB22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/TomaselloSLHLSECHAANDZM22,
  author       = {Paden Tomasello and
                  Akshat Shrivastava and
                  Daniel Lazar and
                  Po{-}Chun Hsu and
                  Duc Le and
                  Adithya Sagar and
                  Ali Elkahky and
                  Jade Copet and
                  Wei{-}Ning Hsu and
                  Yossi Adi and
                  Robin Algayres and
                  Tu Anh Nguyen and
                  Emmanuel Dupoux and
                  Luke Zettlemoyer and
                  Abdelrahman Mohamed},
  title        = {Stop: {A} Dataset for Spoken Task Oriented Semantic Parsing},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2022, Doha, Qatar,
                  January 9-12, 2023},
  pages        = {991--998},
  publisher    = {{IEEE}},
  year         = {2022},
  url          = {https://doi.org/10.1109/SLT54892.2023.10022703},
  doi          = {10.1109/SLT54892.2023.10022703},
  timestamp    = {Mon, 11 Dec 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/TomaselloSLHLSECHAANDZM22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2201-01763,
  author       = {Bowen Shi and
                  Wei{-}Ning Hsu and
                  Abdelrahman Mohamed},
  title        = {Robust Self-Supervised Audio-Visual Speech Recognition},
  journal      = {CoRR},
  volume       = {abs/2201.01763},
  year         = {2022},
  url          = {https://arxiv.org/abs/2201.01763},
  eprinttype    = {arXiv},
  eprint       = {2201.01763},
  timestamp    = {Mon, 10 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2201-01763.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2201-02184,
  author       = {Bowen Shi and
                  Wei{-}Ning Hsu and
                  Kushal Lakhotia and
                  Abdelrahman Mohamed},
  title        = {Learning Audio-Visual Speech Representation by Masked Multimodal Cluster
                  Prediction},
  journal      = {CoRR},
  volume       = {abs/2201.02184},
  year         = {2022},
  url          = {https://arxiv.org/abs/2201.02184},
  eprinttype    = {arXiv},
  eprint       = {2201.02184},
  timestamp    = {Mon, 10 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2201-02184.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2202-03555,
  author       = {Alexei Baevski and
                  Wei{-}Ning Hsu and
                  Qiantong Xu and
                  Arun Babu and
                  Jiatao Gu and
                  Michael Auli},
  title        = {data2vec: {A} General Framework for Self-supervised Learning in Speech,
                  Vision and Language},
  journal      = {CoRR},
  volume       = {abs/2202.03555},
  year         = {2022},
  url          = {https://arxiv.org/abs/2202.03555},
  eprinttype    = {arXiv},
  eprint       = {2202.03555},
  timestamp    = {Wed, 09 Feb 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2202-03555.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2202-07359,
  author       = {Eugene Kharitonov and
                  Jade Copet and
                  Kushal Lakhotia and
                  Tu Anh Nguyen and
                  Paden Tomasello and
                  Ann Lee and
                  Ali Elkahky and
                  Wei{-}Ning Hsu and
                  Abdelrahman Mohamed and
                  Emmanuel Dupoux and
                  Yossi Adi},
  title        = {textless-lib: a Library for Textless Spoken Language Processing},
  journal      = {CoRR},
  volume       = {abs/2202.07359},
  year         = {2022},
  url          = {https://arxiv.org/abs/2202.07359},
  eprinttype    = {arXiv},
  eprint       = {2202.07359},
  timestamp    = {Mon, 14 Mar 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2202-07359.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2203-00648,
  author       = {Ramon Sanabria and
                  Wei{-}Ning Hsu and
                  Alexei Baevski and
                  Michael Auli},
  title        = {Measuring the Impact of Individual Domain Factors in Self-Supervised
                  Pre-Training},
  journal      = {CoRR},
  volume       = {abs/2203.00648},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2203.00648},
  doi          = {10.48550/ARXIV.2203.00648},
  eprinttype    = {arXiv},
  eprint       = {2203.00648},
  timestamp    = {Wed, 16 Mar 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2203-00648.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2203-16502,
  author       = {Tu Anh Nguyen and
                  Eugene Kharitonov and
                  Jade Copet and
                  Yossi Adi and
                  Wei{-}Ning Hsu and
                  Ali Elkahky and
                  Paden Tomasello and
                  Robin Algayres and
                  Beno{\^{\i}}t Sagot and
                  Abdelrahman Mohamed and
                  Emmanuel Dupoux},
  title        = {Generative Spoken Dialogue Language Modeling},
  journal      = {CoRR},
  volume       = {abs/2203.16502},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2203.16502},
  doi          = {10.48550/ARXIV.2203.16502},
  eprinttype    = {arXiv},
  eprint       = {2203.16502},
  timestamp    = {Mon, 04 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2203-16502.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2204-02492,
  author       = {Alexander H. Liu and
                  Wei{-}Ning Hsu and
                  Michael Auli and
                  Alexei Baevski},
  title        = {Towards End-to-end Unsupervised Speech Recognition},
  journal      = {CoRR},
  volume       = {abs/2204.02492},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2204.02492},
  doi          = {10.48550/ARXIV.2204.02492},
  eprinttype    = {arXiv},
  eprint       = {2204.02492},
  timestamp    = {Tue, 12 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2204-02492.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2204-02524,
  author       = {Alexander H. Liu and
                  Cheng{-}I Jeff Lai and
                  Wei{-}Ning Hsu and
                  Michael Auli and
                  Alexei Baevski and
                  James R. Glass},
  title        = {Simple and Effective Unsupervised Speech Synthesis},
  journal      = {CoRR},
  volume       = {abs/2204.02524},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2204.02524},
  doi          = {10.48550/ARXIV.2204.02524},
  eprinttype    = {arXiv},
  eprint       = {2204.02524},
  timestamp    = {Mon, 25 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2204-02524.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2204-02967,
  author       = {Sravya Popuri and
                  Peng{-}Jen Chen and
                  Changhan Wang and
                  Juan Pino and
                  Yossi Adi and
                  Jiatao Gu and
                  Wei{-}Ning Hsu and
                  Ann Lee},
  title        = {Enhanced Direct Speech-to-Speech Translation Using Self-supervised
                  Pre-training and Data Augmentation},
  journal      = {CoRR},
  volume       = {abs/2204.02967},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2204.02967},
  doi          = {10.48550/ARXIV.2204.02967},
  eprinttype    = {arXiv},
  eprint       = {2204.02967},
  timestamp    = {Wed, 19 Apr 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2204-02967.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2204-05409,
  author       = {Yun Tang and
                  Hongyu Gong and
                  Ning Dong and
                  Changhan Wang and
                  Wei{-}Ning Hsu and
                  Jiatao Gu and
                  Alexei Baevski and
                  Xian Li and
                  Abdelrahman Mohamed and
                  Michael Auli and
                  Juan Miguel Pino},
  title        = {Unified Speech-Text Pre-training for Speech Translation and Recognition},
  journal      = {CoRR},
  volume       = {abs/2204.05409},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2204.05409},
  doi          = {10.48550/ARXIV.2204.05409},
  eprinttype    = {arXiv},
  eprint       = {2204.05409},
  timestamp    = {Tue, 27 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2204-05409.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2204-11934,
  author       = {Apoorv Vyas and
                  Wei{-}Ning Hsu and
                  Michael Auli and
                  Alexei Baevski},
  title        = {On-demand compute reduction with stochastic wav2vec 2.0},
  journal      = {CoRR},
  volume       = {abs/2204.11934},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2204.11934},
  doi          = {10.48550/ARXIV.2204.11934},
  eprinttype    = {arXiv},
  eprint       = {2204.11934},
  timestamp    = {Thu, 28 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2204-11934.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2205-07180,
  author       = {Bowen Shi and
                  Abdelrahman Mohamed and
                  Wei{-}Ning Hsu},
  title        = {Learning Lip-Based Audio-Visual Speaker Embeddings with AV-HuBERT},
  journal      = {CoRR},
  volume       = {abs/2205.07180},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2205.07180},
  doi          = {10.48550/ARXIV.2205.07180},
  eprinttype    = {arXiv},
  eprint       = {2205.07180},
  timestamp    = {Wed, 18 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2205-07180.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2207-07036,
  author       = {Wei{-}Ning Hsu and
                  Bowen Shi},
  title        = {A Single Self-Supervised Model for Many Speech Modalities Enables
                  Zero-Shot Modality Transfer},
  journal      = {CoRR},
  volume       = {abs/2207.07036},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2207.07036},
  doi          = {10.48550/ARXIV.2207.07036},
  eprinttype    = {arXiv},
  eprint       = {2207.07036},
  timestamp    = {Tue, 19 Jul 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2207-07036.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2207-10643,
  author       = {Paden Tomasello and
                  Akshat Shrivastava and
                  Daniel Lazar and
                  Po{-}Chun Hsu and
                  Duc Le and
                  Adithya Sagar and
                  Ali Elkahky and
                  Jade Copet and
                  Wei{-}Ning Hsu and
                  Yossef Mordechay and
                  Robin Algayres and
                  Tu Anh Nguyen and
                  Emmanuel Dupoux and
                  Luke Zettlemoyer and
                  Abdelrahman Mohamed},
  title        = {{STOP:} {A} dataset for Spoken Task Oriented Semantic Parsing},
  journal      = {CoRR},
  volume       = {abs/2207.10643},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2207.10643},
  doi          = {10.48550/ARXIV.2207.10643},
  eprinttype    = {arXiv},
  eprint       = {2207.10643},
  timestamp    = {Mon, 25 Jul 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2207-10643.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2210-10191,
  author       = {Changhan Wang and
                  Hirofumi Inaguma and
                  Peng{-}Jen Chen and
                  Ilia Kulikov and
                  Yun Tang and
                  Wei{-}Ning Hsu and
                  Michael Auli and
                  Juan Pino},
  title        = {Simple and Effective Unsupervised Speech Translation},
  journal      = {CoRR},
  volume       = {abs/2210.10191},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2210.10191},
  doi          = {10.48550/ARXIV.2210.10191},
  eprinttype    = {arXiv},
  eprint       = {2210.10191},
  timestamp    = {Tue, 27 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2210-10191.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2211-06474,
  author       = {Peng{-}Jen Chen and
                  Kevin Tran and
                  Yilin Yang and
                  Jingfei Du and
                  Justine Kao and
                  Yu{-}An Chung and
                  Paden Tomasello and
                  Paul{-}Ambroise Duquenne and
                  Holger Schwenk and
                  Hongyu Gong and
                  Hirofumi Inaguma and
                  Sravya Popuri and
                  Changhan Wang and
                  Juan Miguel Pino and
                  Wei{-}Ning Hsu and
                  Ann Lee},
  title        = {Speech-to-Speech Translation For {A} Real-world Unwritten Language},
  journal      = {CoRR},
  volume       = {abs/2211.06474},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2211.06474},
  doi          = {10.48550/ARXIV.2211.06474},
  eprinttype    = {arXiv},
  eprint       = {2211.06474},
  timestamp    = {Tue, 31 Jan 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2211-06474.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2212-01393,
  author       = {Anuj Diwan and
                  Ching{-}Feng Yeh and
                  Wei{-}Ning Hsu and
                  Paden Tomasello and
                  Eunsol Choi and
                  David Harwath and
                  Abdelrahman Mohamed},
  title        = {Continual Learning for On-Device Speech Recognition using Disentangled
                  Conformers},
  journal      = {CoRR},
  volume       = {abs/2212.01393},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2212.01393},
  doi          = {10.48550/ARXIV.2212.01393},
  eprinttype    = {arXiv},
  eprint       = {2212.01393},
  timestamp    = {Thu, 08 Dec 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2212-01393.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2212-07525,
  author       = {Alexei Baevski and
                  Arun Babu and
                  Wei{-}Ning Hsu and
                  Michael Auli},
  title        = {Efficient Self-supervised Learning with Contextualized Target Representations
                  for Vision, Speech and Language},
  journal      = {CoRR},
  volume       = {abs/2212.07525},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2212.07525},
  doi          = {10.48550/ARXIV.2212.07525},
  eprinttype    = {arXiv},
  eprint       = {2212.07525},
  timestamp    = {Mon, 02 Jan 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2212-07525.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2212-11377,
  author       = {Wei{-}Ning Hsu and
                  Tal Remez and
                  Bowen Shi and
                  Jacob Donley and
                  Yossi Adi},
  title        = {ReVISE: Self-Supervised Speech Resynthesis with Visual Input for Universal
                  and Generalized Speech Enhancement},
  journal      = {CoRR},
  volume       = {abs/2212.11377},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2212.11377},
  doi          = {10.48550/ARXIV.2212.11377},
  eprinttype    = {arXiv},
  eprint       = {2212.11377},
  timestamp    = {Mon, 02 Jan 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2212-11377.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/taslp/HsuBTLSM21,
  author       = {Wei{-}Ning Hsu and
                  Benjamin Bolte and
                  Yao{-}Hung Hubert Tsai and
                  Kushal Lakhotia and
                  Ruslan Salakhutdinov and
                  Abdelrahman Mohamed},
  title        = {HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction
                  of Hidden Units},
  journal      = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  volume       = {29},
  pages        = {3451--3460},
  year         = {2021},
  url          = {https://doi.org/10.1109/TASLP.2021.3122291},
  doi          = {10.1109/TASLP.2021.3122291},
  timestamp    = {Wed, 15 Dec 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/taslp/HsuBTLSM21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/acl/HsuHMSG20,
  author       = {Wei{-}Ning Hsu and
                  David Harwath and
                  Tyler Miller and
                  Christopher Song and
                  James R. Glass},
  editor       = {Chengqing Zong and
                  Fei Xia and
                  Wenjie Li and
                  Roberto Navigli},
  title        = {Text-Free Image-to-Speech Synthesis Using Learned Segmental Units},
  booktitle    = {Proceedings of the 59th Annual Meeting of the Association for Computational
                  Linguistics and the 11th International Joint Conference on Natural
                  Language Processing, {ACL/IJCNLP} 2021, (Volume 1: Long Papers), Virtual
                  Event, August 1-6, 2021},
  pages        = {5284--5300},
  publisher    = {Association for Computational Linguistics},
  year         = {2021},
  url          = {https://doi.org/10.18653/v1/2021.acl-long.411},
  doi          = {10.18653/V1/2021.ACL-LONG.411},
  timestamp    = {Mon, 09 Aug 2021 16:25:37 +0200},
  biburl       = {https://dblp.org/rec/conf/acl/HsuHMSG20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/asru/ManoharLXHCSZM21,
  author       = {Vimal Manohar and
                  Tatiana Likhomanenko and
                  Qiantong Xu and
                  Wei{-}Ning Hsu and
                  Ronan Collobert and
                  Yatharth Saraf and
                  Geoffrey Zweig and
                  Abdelrahman Mohamed},
  title        = {Kaizen: Continuously Improving Teacher Using Exponential Moving Average
                  for Semi-Supervised Speech Recognition},
  booktitle    = {{IEEE} Automatic Speech Recognition and Understanding Workshop, {ASRU}
                  2021, Cartagena, Colombia, December 13-17, 2021},
  pages        = {518--525},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/ASRU51503.2021.9688028},
  doi          = {10.1109/ASRU51503.2021.9688028},
  timestamp    = {Wed, 09 Feb 2022 09:03:03 +0100},
  biburl       = {https://dblp.org/rec/conf/asru/ManoharLXHCSZM21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/emnlp/WangHAPLCGP21,
  author       = {Changhan Wang and
                  Wei{-}Ning Hsu and
                  Yossi Adi and
                  Adam Polyak and
                  Ann Lee and
                  Peng{-}Jen Chen and
                  Jiatao Gu and
                  Juan Pino},
  editor       = {Heike Adel and
                  Shuming Shi},
  title        = {fairseq S{\textbackslash}{\^{}}2: {A} Scalable and Integrable Speech
                  Synthesis Toolkit},
  booktitle    = {Proceedings of the 2021 Conference on Empirical Methods in Natural
                  Language Processing: System Demonstrations, {EMNLP} 2021, Online and
                  Punta Cana, Dominican Republic, 7-11 November, 2021},
  pages        = {143--152},
  publisher    = {Association for Computational Linguistics},
  year         = {2021},
  url          = {https://doi.org/10.18653/v1/2021.emnlp-demo.17},
  doi          = {10.18653/V1/2021.EMNLP-DEMO.17},
  timestamp    = {Wed, 19 Apr 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/emnlp/WangHAPLCGP21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/HsuTBSM21,
  author       = {Wei{-}Ning Hsu and
                  Yao{-}Hung Hubert Tsai and
                  Benjamin Bolte and
                  Ruslan Salakhutdinov and
                  Abdelrahman Mohamed},
  title        = {Hubert: How Much Can a Bad Teacher Benefit {ASR} Pre-Training?},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2021, Toronto, ON, Canada, June 6-11, 2021},
  pages        = {6533--6537},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/ICASSP39728.2021.9414460},
  doi          = {10.1109/ICASSP39728.2021.9414460},
  timestamp    = {Fri, 09 Jul 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/HsuTBSM21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/HsuSBLXPK0CSA21,
  author       = {Wei{-}Ning Hsu and
                  Anuroop Sriram and
                  Alexei Baevski and
                  Tatiana Likhomanenko and
                  Qiantong Xu and
                  Vineel Pratap and
                  Jacob Kahn and
                  Ann Lee and
                  Ronan Collobert and
                  Gabriel Synnaeve and
                  Michael Auli},
  editor       = {Hynek Hermansky and
                  Honza Cernock{\'{y}} and
                  Luk{\'{a}}s Burget and
                  Lori Lamel and
                  Odette Scharenborg and
                  Petr Motl{\'{\i}}cek},
  title        = {Robust wav2vec 2.0: Analyzing Domain Shift in Self-Supervised Pre-Training},
  booktitle    = {Interspeech 2021, 22nd Annual Conference of the International Speech
                  Communication Association, Brno, Czechia, 30 August - 3 September
                  2021},
  pages        = {721--725},
  publisher    = {{ISCA}},
  year         = {2021},
  url          = {https://doi.org/10.21437/Interspeech.2021-236},
  doi          = {10.21437/INTERSPEECH.2021-236},
  timestamp    = {Wed, 21 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/HsuSBLXPK0CSA21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/PolyakACKLHMD21,
  author       = {Adam Polyak and
                  Yossi Adi and
                  Jade Copet and
                  Eugene Kharitonov and
                  Kushal Lakhotia and
                  Wei{-}Ning Hsu and
                  Abdelrahman Mohamed and
                  Emmanuel Dupoux},
  editor       = {Hynek Hermansky and
                  Honza Cernock{\'{y}} and
                  Luk{\'{a}}s Burget and
                  Lori Lamel and
                  Odette Scharenborg and
                  Petr Motl{\'{\i}}cek},
  title        = {Speech Resynthesis from Discrete Disentangled Self-Supervised Representations},
  booktitle    = {Interspeech 2021, 22nd Annual Conference of the International Speech
                  Communication Association, Brno, Czechia, 30 August - 3 September
                  2021},
  pages        = {3615--3619},
  publisher    = {{ISCA}},
  year         = {2021},
  url          = {https://doi.org/10.21437/Interspeech.2021-475},
  doi          = {10.21437/INTERSPEECH.2021-475},
  timestamp    = {Wed, 21 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/PolyakACKLHMD21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/BaevskiHCA21,
  author       = {Alexei Baevski and
                  Wei{-}Ning Hsu and
                  Alexis Conneau and
                  Michael Auli},
  editor       = {Marc'Aurelio Ranzato and
                  Alina Beygelzimer and
                  Yann N. Dauphin and
                  Percy Liang and
                  Jennifer Wortman Vaughan},
  title        = {Unsupervised Speech Recognition},
  booktitle    = {Advances in Neural Information Processing Systems 34: Annual Conference
                  on Neural Information Processing Systems 2021, NeurIPS 2021, December
                  6-14, 2021, virtual},
  pages        = {27826--27839},
  year         = {2021},
  url          = {https://proceedings.neurips.cc/paper/2021/hash/ea159dc9788ffac311592613b7f71fbb-Abstract.html},
  timestamp    = {Tue, 03 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/BaevskiHCA21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/Hsu0SH21,
  author       = {Wei{-}Ning Hsu and
                  Ann Lee and
                  Gabriel Synnaeve and
                  Awni Y. Hannun},
  title        = {Semi-Supervised end-to-end Speech Recognition via Local Prior Matching},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {125--132},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383552},
  doi          = {10.1109/SLT48900.2021.9383552},
  timestamp    = {Tue, 05 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/Hsu0SH21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2102-01192,
  author       = {Kushal Lakhotia and
                  Evgeny Kharitonov and
                  Wei{-}Ning Hsu and
                  Yossi Adi and
                  Adam Polyak and
                  Benjamin Bolte and
                  Tu Anh Nguyen and
                  Jade Copet and
                  Alexei Baevski and
                  Adelrahman Mohamed and
                  Emmanuel Dupoux},
  title        = {Generative Spoken Language Modeling from Raw Audio},
  journal      = {CoRR},
  volume       = {abs/2102.01192},
  year         = {2021},
  url          = {https://arxiv.org/abs/2102.01192},
  eprinttype    = {arXiv},
  eprint       = {2102.01192},
  timestamp    = {Tue, 09 Feb 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2102-01192.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2104-00355,
  author       = {Adam Polyak and
                  Yossi Adi and
                  Jade Copet and
                  Eugene Kharitonov and
                  Kushal Lakhotia and
                  Wei{-}Ning Hsu and
                  Abdelrahman Mohamed and
                  Emmanuel Dupoux},
  title        = {Speech Resynthesis from Discrete Disentangled Self-Supervised Representations},
  journal      = {CoRR},
  volume       = {abs/2104.00355},
  year         = {2021},
  url          = {https://arxiv.org/abs/2104.00355},
  eprinttype    = {arXiv},
  eprint       = {2104.00355},
  timestamp    = {Wed, 07 Dec 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2104-00355.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2104-01027,
  author       = {Wei{-}Ning Hsu and
                  Anuroop Sriram and
                  Alexei Baevski and
                  Tatiana Likhomanenko and
                  Qiantong Xu and
                  Vineel Pratap and
                  Jacob Kahn and
                  Ann Lee and
                  Ronan Collobert and
                  Gabriel Synnaeve and
                  Michael Auli},
  title        = {Robust wav2vec 2.0: Analyzing Domain Shift in Self-Supervised Pre-Training},
  journal      = {CoRR},
  volume       = {abs/2104.01027},
  year         = {2021},
  url          = {https://arxiv.org/abs/2104.01027},
  eprinttype    = {arXiv},
  eprint       = {2104.01027},
  timestamp    = {Mon, 12 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2104-01027.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2105-11084,
  author       = {Alexei Baevski and
                  Wei{-}Ning Hsu and
                  Alexis Conneau and
                  Michael Auli},
  title        = {Unsupervised Speech Recognition},
  journal      = {CoRR},
  volume       = {abs/2105.11084},
  year         = {2021},
  url          = {https://arxiv.org/abs/2105.11084},
  eprinttype    = {arXiv},
  eprint       = {2105.11084},
  timestamp    = {Tue, 01 Jun 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2105-11084.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2106-07447,
  author       = {Wei{-}Ning Hsu and
                  Benjamin Bolte and
                  Yao{-}Hung Hubert Tsai and
                  Kushal Lakhotia and
                  Ruslan Salakhutdinov and
                  Abdelrahman Mohamed},
  title        = {HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction
                  of Hidden Units},
  journal      = {CoRR},
  volume       = {abs/2106.07447},
  year         = {2021},
  url          = {https://arxiv.org/abs/2106.07447},
  eprinttype    = {arXiv},
  eprint       = {2106.07447},
  timestamp    = {Wed, 16 Jun 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2106-07447.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2106-07759,
  author       = {Vimal Manohar and
                  Tatiana Likhomanenko and
                  Qiantong Xu and
                  Wei{-}Ning Hsu and
                  Ronan Collobert and
                  Yatharth Saraf and
                  Geoffrey Zweig and
                  Abdelrahman Mohamed},
  title        = {Kaizen: Continuously improving teacher using Exponential Moving Average
                  for semi-supervised speech recognition},
  journal      = {CoRR},
  volume       = {abs/2106.07759},
  year         = {2021},
  url          = {https://arxiv.org/abs/2106.07759},
  eprinttype    = {arXiv},
  eprint       = {2106.07759},
  timestamp    = {Thu, 01 Jul 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2106-07759.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2107-05604,
  author       = {Ann Lee and
                  Peng{-}Jen Chen and
                  Changhan Wang and
                  Jiatao Gu and
                  Xutai Ma and
                  Adam Polyak and
                  Yossi Adi and
                  Qing He and
                  Yun Tang and
                  Juan Miguel Pino and
                  Wei{-}Ning Hsu},
  title        = {Direct speech-to-speech translation with discrete units},
  journal      = {CoRR},
  volume       = {abs/2107.05604},
  year         = {2021},
  url          = {https://arxiv.org/abs/2107.05604},
  eprinttype    = {arXiv},
  eprint       = {2107.05604},
  timestamp    = {Tue, 27 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2107-05604.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2109-03264,
  author       = {Eugene Kharitonov and
                  Ann Lee and
                  Adam Polyak and
                  Yossi Adi and
                  Jade Copet and
                  Kushal Lakhotia and
                  Tu Anh Nguyen and
                  Morgane Rivi{\`{e}}re and
                  Abdelrahman Mohamed and
                  Emmanuel Dupoux and
                  Wei{-}Ning Hsu},
  title        = {Text-Free Prosody-Aware Generative Spoken Language Modeling},
  journal      = {CoRR},
  volume       = {abs/2109.03264},
  year         = {2021},
  url          = {https://arxiv.org/abs/2109.03264},
  eprinttype    = {arXiv},
  eprint       = {2109.03264},
  timestamp    = {Thu, 06 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2109-03264.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2109-06912,
  author       = {Changhan Wang and
                  Wei{-}Ning Hsu and
                  Yossi Adi and
                  Adam Polyak and
                  Ann Lee and
                  Peng{-}Jen Chen and
                  Jiatao Gu and
                  Juan Miguel Pino},
  title        = {fairseq S{\^{}}2: {A} Scalable and Integrable Speech Synthesis Toolkit},
  journal      = {CoRR},
  volume       = {abs/2109.06912},
  year         = {2021},
  url          = {https://arxiv.org/abs/2109.06912},
  eprinttype    = {arXiv},
  eprint       = {2109.06912},
  timestamp    = {Thu, 06 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2109-06912.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2110-08250,
  author       = {Xutai Ma and
                  Hongyu Gong and
                  Danni Liu and
                  Ann Lee and
                  Yun Tang and
                  Peng{-}Jen Chen and
                  Wei{-}Ning Hsu and
                  Kenneth Heafield and
                  Phillip Koehn and
                  Juan Miguel Pino},
  title        = {Direct simultaneous speech to speech translation},
  journal      = {CoRR},
  volume       = {abs/2110.08250},
  year         = {2021},
  url          = {https://arxiv.org/abs/2110.08250},
  eprinttype    = {arXiv},
  eprint       = {2110.08250},
  timestamp    = {Tue, 27 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2110-08250.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2111-07402,
  author       = {Felix Kreuk and
                  Adam Polyak and
                  Jade Copet and
                  Eugene Kharitonov and
                  Tu Anh Nguyen and
                  Morgane Rivi{\`{e}}re and
                  Wei{-}Ning Hsu and
                  Abdelrahman Mohamed and
                  Emmanuel Dupoux and
                  Yossi Adi},
  title        = {Textless Speech Emotion Conversion using Decomposed and Discrete Representations},
  journal      = {CoRR},
  volume       = {abs/2111.07402},
  year         = {2021},
  url          = {https://arxiv.org/abs/2111.07402},
  eprinttype    = {arXiv},
  eprint       = {2111.07402},
  timestamp    = {Tue, 16 Nov 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2111-07402.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2112-08352,
  author       = {Ann Lee and
                  Hongyu Gong and
                  Paul{-}Ambroise Duquenne and
                  Holger Schwenk and
                  Peng{-}Jen Chen and
                  Changhan Wang and
                  Sravya Popuri and
                  Juan Miguel Pino and
                  Jiatao Gu and
                  Wei{-}Ning Hsu},
  title        = {Textless Speech-to-Speech Translation on Real Data},
  journal      = {CoRR},
  volume       = {abs/2112.08352},
  year         = {2021},
  url          = {https://arxiv.org/abs/2112.08352},
  eprinttype    = {arXiv},
  eprint       = {2112.08352},
  timestamp    = {Tue, 31 Jan 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2112-08352.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/HarwathHG20,
  author       = {David Harwath and
                  Wei{-}Ning Hsu and
                  James R. Glass},
  title        = {Learning Hierarchical Discrete Linguistic Units from Visually-Grounded
                  Speech},
  booktitle    = {8th International Conference on Learning Representations, {ICLR} 2020,
                  Addis Ababa, Ethiopia, April 26-30, 2020},
  publisher    = {OpenReview.net},
  year         = {2020},
  url          = {https://openreview.net/forum?id=B1elCp4KwH},
  timestamp    = {Thu, 07 May 2020 17:11:47 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/HarwathHG20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/GumpHG20,
  author       = {Michael Gump and
                  Wei{-}Ning Hsu and
                  James R. Glass},
  editor       = {Helen Meng and
                  Bo Xu and
                  Thomas Fang Zheng},
  title        = {Unsupervised Methods for Evaluating Speech Representations},
  booktitle    = {Interspeech 2020, 21st Annual Conference of the International Speech
                  Communication Association, Virtual Event, Shanghai, China, 25-29 October
                  2020},
  pages        = {170--174},
  publisher    = {{ISCA}},
  year         = {2020},
  url          = {https://doi.org/10.21437/Interspeech.2020-2990},
  doi          = {10.21437/INTERSPEECH.2020-2990},
  timestamp    = {Fri, 29 Jan 2021 17:40:16 +0100},
  biburl       = {https://dblp.org/rec/conf/interspeech/GumpHG20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/KhuranaLHCLMG20,
  author       = {Sameer Khurana and
                  Antoine Laurent and
                  Wei{-}Ning Hsu and
                  Jan Chorowski and
                  Adrian Lancucki and
                  Ricard Marxer and
                  James R. Glass},
  editor       = {Helen Meng and
                  Bo Xu and
                  Thomas Fang Zheng},
  title        = {A Convolutional Deep Markov Model for Unsupervised Speech Representation
                  Learning},
  booktitle    = {Interspeech 2020, 21st Annual Conference of the International Speech
                  Communication Association, Virtual Event, Shanghai, China, 25-29 October
                  2020},
  pages        = {3790--3794},
  publisher    = {{ISCA}},
  year         = {2020},
  url          = {https://doi.org/10.21437/Interspeech.2020-3084},
  doi          = {10.21437/INTERSPEECH.2020-3084},
  timestamp    = {Fri, 29 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/interspeech/KhuranaLHCLMG20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2002-10336,
  author       = {Wei{-}Ning Hsu and
                  Ann Lee and
                  Gabriel Synnaeve and
                  Awni Y. Hannun},
  title        = {Semi-Supervised Speech Recognition via Local Prior Matching},
  journal      = {CoRR},
  volume       = {abs/2002.10336},
  year         = {2020},
  url          = {https://arxiv.org/abs/2002.10336},
  eprinttype    = {arXiv},
  eprint       = {2002.10336},
  timestamp    = {Tue, 05 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2002-10336.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2006-02547,
  author       = {Sameer Khurana and
                  Antoine Laurent and
                  Wei{-}Ning Hsu and
                  Jan Chorowski and
                  Adrian Lancucki and
                  Ricard Marxer and
                  James R. Glass},
  title        = {A Convolutional Deep Markov Model for Unsupervised Speech Representation
                  Learning},
  journal      = {CoRR},
  volume       = {abs/2006.02547},
  year         = {2020},
  url          = {https://arxiv.org/abs/2006.02547},
  eprinttype    = {arXiv},
  eprint       = {2006.02547},
  timestamp    = {Tue, 09 Jun 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2006-02547.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2010-01003,
  author       = {Awni Y. Hannun and
                  Vineel Pratap and
                  Jacob Kahn and
                  Wei{-}Ning Hsu},
  title        = {Differentiable Weighted Finite-State Transducers},
  journal      = {CoRR},
  volume       = {abs/2010.01003},
  year         = {2020},
  url          = {https://arxiv.org/abs/2010.01003},
  eprinttype    = {arXiv},
  eprint       = {2010.01003},
  timestamp    = {Tue, 05 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2010-01003.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2012-15454,
  author       = {Wei{-}Ning Hsu and
                  David Harwath and
                  Christopher Song and
                  James R. Glass},
  title        = {Text-Free Image-to-Speech Synthesis Using Learned Segmental Units},
  journal      = {CoRR},
  volume       = {abs/2012.15454},
  year         = {2020},
  url          = {https://arxiv.org/abs/2012.15454},
  eprinttype    = {arXiv},
  eprint       = {2012.15454},
  timestamp    = {Fri, 08 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2012-15454.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/HsuZWCWWG19,
  author       = {Wei{-}Ning Hsu and
                  Yu Zhang and
                  Ron J. Weiss and
                  Yu{-}An Chung and
                  Yuxuan Wang and
                  Yonghui Wu and
                  James R. Glass},
  title        = {Disentangling Correlated Speaker and Noise for Speech Synthesis via
                  Data Augmentation and Adversarial Factorization},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2019, Brighton, United Kingdom, May 12-17, 2019},
  pages        = {5901--5905},
  publisher    = {{IEEE}},
  year         = {2019},
  url          = {https://doi.org/10.1109/ICASSP.2019.8683561},
  doi          = {10.1109/ICASSP.2019.8683561},
  timestamp    = {Mon, 25 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/HsuZWCWWG19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/ChungWHZS19,
  author       = {Yu{-}An Chung and
                  Yuxuan Wang and
                  Wei{-}Ning Hsu and
                  Yu Zhang and
                  R. J. Skerry{-}Ryan},
  title        = {Semi-supervised Training for Improving Data Efficiency in End-to-end
                  Speech Synthesis},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2019, Brighton, United Kingdom, May 12-17, 2019},
  pages        = {6940--6944},
  publisher    = {{IEEE}},
  year         = {2019},
  url          = {https://doi.org/10.1109/ICASSP.2019.8683862},
  doi          = {10.1109/ICASSP.2019.8683862},
  timestamp    = {Wed, 20 Jul 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/ChungWHZS19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/HsuZWZWWCJCSNP19,
  author       = {Wei{-}Ning Hsu and
                  Yu Zhang and
                  Ron J. Weiss and
                  Heiga Zen and
                  Yonghui Wu and
                  Yuxuan Wang and
                  Yuan Cao and
                  Ye Jia and
                  Zhifeng Chen and
                  Jonathan Shen and
                  Patrick Nguyen and
                  Ruoming Pang},
  title        = {Hierarchical Generative Modeling for Controllable Speech Synthesis},
  booktitle    = {7th International Conference on Learning Representations, {ICLR} 2019,
                  New Orleans, LA, USA, May 6-9, 2019},
  publisher    = {OpenReview.net},
  year         = {2019},
  url          = {https://openreview.net/forum?id=rygkk305YQ},
  timestamp    = {Mon, 25 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/HsuZWZWWCJCSNP19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/ChungHTG19,
  author       = {Yu{-}An Chung and
                  Wei{-}Ning Hsu and
                  Hao Tang and
                  James R. Glass},
  editor       = {Gernot Kubin and
                  Zdravko Kacic},
  title        = {An Unsupervised Autoregressive Model for Speech Representation Learning},
  booktitle    = {Interspeech 2019, 20th Annual Conference of the International Speech
                  Communication Association, Graz, Austria, 15-19 September 2019},
  pages        = {146--150},
  publisher    = {{ISCA}},
  year         = {2019},
  url          = {https://doi.org/10.21437/Interspeech.2019-1473},
  doi          = {10.21437/INTERSPEECH.2019-1473},
  timestamp    = {Fri, 29 Jan 2021 17:41:10 +0100},
  biburl       = {https://dblp.org/rec/conf/interspeech/ChungHTG19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/HsuHG19,
  author       = {Wei{-}Ning Hsu and
                  David Harwath and
                  James R. Glass},
  editor       = {Gernot Kubin and
                  Zdravko Kacic},
  title        = {Transfer Learning from Audio-Visual Grounding to Speech Recognition},
  booktitle    = {Interspeech 2019, 20th Annual Conference of the International Speech
                  Communication Association, Graz, Austria, 15-19 September 2019},
  pages        = {3242--3246},
  publisher    = {{ISCA}},
  year         = {2019},
  url          = {https://doi.org/10.21437/Interspeech.2019-1227},
  doi          = {10.21437/INTERSPEECH.2019-1227},
  timestamp    = {Fri, 29 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/interspeech/HsuHG19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1902-08295,
  author       = {Jonathan Shen and
                  Patrick Nguyen and
                  Yonghui Wu and
                  Zhifeng Chen and
                  Mia Xu Chen and
                  Ye Jia and
                  Anjuli Kannan and
                  Tara N. Sainath and
                  Yuan Cao and
                  Chung{-}Cheng Chiu and
                  Yanzhang He and
                  Jan Chorowski and
                  Smit Hinsu and
                  Stella Laurenzo and
                  James Qin and
                  Orhan Firat and
                  Wolfgang Macherey and
                  Suyog Gupta and
                  Ankur Bapna and
                  Shuyuan Zhang and
                  Ruoming Pang and
                  Ron J. Weiss and
                  Rohit Prabhavalkar and
                  Qiao Liang and
                  Benoit Jacob and
                  Bowen Liang and
                  HyoukJoong Lee and
                  Ciprian Chelba and
                  S{\'{e}}bastien Jean and
                  Bo Li and
                  Melvin Johnson and
                  Rohan Anil and
                  Rajat Tibrewal and
                  Xiaobing Liu and
                  Akiko Eriguchi and
                  Navdeep Jaitly and
                  Naveen Ari and
                  Colin Cherry and
                  Parisa Haghani and
                  Otavio Good and
                  Youlong Cheng and
                  Raziel Alvarez and
                  Isaac Caswell and
                  Wei{-}Ning Hsu and
                  Zongheng Yang and
                  Kuan{-}Chieh Wang and
                  Ekaterina Gonina and
                  Katrin Tomanek and
                  Ben Vanik and
                  Zelin Wu and
                  Llion Jones and
                  Mike Schuster and
                  Yanping Huang and
                  Dehao Chen and
                  Kazuki Irie and
                  George F. Foster and
                  John Richardson and
                  Klaus Macherey and
                  Antoine Bruguier and
                  Heiga Zen and
                  Colin Raffel and
                  Shankar Kumar and
                  Kanishka Rao and
                  David Rybach and
                  Matthew Murray and
                  Vijayaditya Peddinti and
                  Maxim Krikun and
                  Michiel Bacchiani and
                  Thomas B. Jablin and
                  Robert Suderman and
                  Ian Williams and
                  Benjamin Lee and
                  Deepti Bhatia and
                  Justin Carlson and
                  Semih Yavuz and
                  Yu Zhang and
                  Ian McGraw and
                  Max Galkin and
                  Qi Ge and
                  Golan Pundak and
                  Chad Whipkey and
                  Todd Wang and
                  Uri Alon and
                  Dmitry Lepikhin and
                  Ye Tian and
                  Sara Sabour and
                  William Chan and
                  Shubham Toshniwal and
                  Baohua Liao and
                  Michael Nirschl and
                  Pat Rondon},
  title        = {Lingvo: a Modular and Scalable Framework for Sequence-to-Sequence
                  Modeling},
  journal      = {CoRR},
  volume       = {abs/1902.08295},
  year         = {2019},
  url          = {http://arxiv.org/abs/1902.08295},
  eprinttype    = {arXiv},
  eprint       = {1902.08295},
  timestamp    = {Tue, 07 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1902-08295.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1904-03240,
  author       = {Yu{-}An Chung and
                  Wei{-}Ning Hsu and
                  Hao Tang and
                  James R. Glass},
  title        = {An Unsupervised Autoregressive Model for Speech Representation Learning},
  journal      = {CoRR},
  volume       = {abs/1904.03240},
  year         = {2019},
  url          = {http://arxiv.org/abs/1904.03240},
  eprinttype    = {arXiv},
  eprint       = {1904.03240},
  timestamp    = {Wed, 30 Sep 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1904-03240.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1907-04355,
  author       = {Wei{-}Ning Hsu and
                  David F. Harwath and
                  James R. Glass},
  title        = {Transfer Learning from Audio-Visual Grounding to Speech Recognition},
  journal      = {CoRR},
  volume       = {abs/1907.04355},
  year         = {2019},
  url          = {http://arxiv.org/abs/1907.04355},
  eprinttype    = {arXiv},
  eprint       = {1907.04355},
  timestamp    = {Tue, 23 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1907-04355.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1911-09602,
  author       = {David Harwath and
                  Wei{-}Ning Hsu and
                  James R. Glass},
  title        = {Learning Hierarchical Discrete Linguistic Units from Visually-Grounded
                  Speech},
  journal      = {CoRR},
  volume       = {abs/1911.09602},
  year         = {2019},
  url          = {http://arxiv.org/abs/1911.09602},
  eprinttype    = {arXiv},
  eprint       = {1911.09602},
  timestamp    = {Tue, 03 Dec 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1911-09602.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/HsuG18,
  author       = {Wei{-}Ning Hsu and
                  James R. Glass},
  title        = {Extracting Domain Invariant Features by Unsupervised Learning for
                  Robust Automatic Speech Recognition},
  booktitle    = {2018 {IEEE} International Conference on Acoustics, Speech and Signal
                  Processing, {ICASSP} 2018, Calgary, AB, Canada, April 15-20, 2018},
  pages        = {5614--5618},
  publisher    = {{IEEE}},
  year         = {2018},
  url          = {https://doi.org/10.1109/ICASSP.2018.8462037},
  doi          = {10.1109/ICASSP.2018.8462037},
  timestamp    = {Wed, 16 Oct 2019 14:14:52 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/HsuG18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icpr/ZhengWXHG18,
  author       = {Siqi Zheng and
                  Jianzong Wang and
                  Jing Xiao and
                  Wei{-}Ning Hsu and
                  James R. Glass},
  title        = {A Noise-Robust Self-Adaptive Multitarget Speaker Detection System},
  booktitle    = {24th International Conference on Pattern Recognition, {ICPR} 2018,
                  Beijing, China, August 20-24, 2018},
  pages        = {1068--1072},
  publisher    = {{IEEE} Computer Society},
  year         = {2018},
  url          = {https://doi.org/10.1109/ICPR.2018.8545395},
  doi          = {10.1109/ICPR.2018.8545395},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icpr/ZhengWXHG18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/HsuG18,
  author       = {Wei{-}Ning Hsu and
                  James R. Glass},
  editor       = {B. Yegnanarayana},
  title        = {Scalable Factorized Hierarchical Variational Autoencoder Training},
  booktitle    = {Interspeech 2018, 19th Annual Conference of the International Speech
                  Communication Association, Hyderabad, India, 2-6 September 2018},
  pages        = {1462--1466},
  publisher    = {{ISCA}},
  year         = {2018},
  url          = {https://doi.org/10.21437/Interspeech.2018-1034},
  doi          = {10.21437/INTERSPEECH.2018-1034},
  timestamp    = {Fri, 21 May 2021 08:16:43 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/HsuG18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/HsuTG18,
  author       = {Wei{-}Ning Hsu and
                  Hao Tang and
                  James R. Glass},
  editor       = {B. Yegnanarayana},
  title        = {Unsupervised Adaptation with Interpretable Disentangled Representations
                  for Distant Conversational Speech Recognition},
  booktitle    = {Interspeech 2018, 19th Annual Conference of the International Speech
                  Communication Association, Hyderabad, India, 2-6 September 2018},
  pages        = {1576--1580},
  publisher    = {{ISCA}},
  year         = {2018},
  url          = {https://doi.org/10.21437/Interspeech.2018-1097},
  doi          = {10.21437/INTERSPEECH.2018-1097},
  timestamp    = {Fri, 29 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/interspeech/HsuTG18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/TangHGG18,
  author       = {Hao Tang and
                  Wei{-}Ning Hsu and
                  Fran{\c{c}}ois Grondin and
                  James R. Glass},
  editor       = {B. Yegnanarayana},
  title        = {A Study of Enhancement, Augmentation and Autoencoder Methods for Domain
                  Adaptation in Distant Speech Recognition},
  booktitle    = {Interspeech 2018, 19th Annual Conference of the International Speech
                  Communication Association, Hyderabad, India, 2-6 September 2018},
  pages        = {2928--2932},
  publisher    = {{ISCA}},
  year         = {2018},
  url          = {https://doi.org/10.21437/Interspeech.2018-2030},
  doi          = {10.21437/INTERSPEECH.2018-2030},
  timestamp    = {Fri, 29 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/interspeech/TangHGG18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/ShonHG18,
  author       = {Suwon Shon and
                  Wei{-}Ning Hsu and
                  James R. Glass},
  title        = {Unsupervised Representation Learning of Speech for Dialect Identification},
  booktitle    = {2018 {IEEE} Spoken Language Technology Workshop, {SLT} 2018, Athens,
                  Greece, December 18-21, 2018},
  pages        = {105--111},
  publisher    = {{IEEE}},
  year         = {2018},
  url          = {https://doi.org/10.1109/SLT.2018.8639650},
  doi          = {10.1109/SLT.2018.8639650},
  timestamp    = {Wed, 16 Oct 2019 14:14:53 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/ShonHG18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1803-02551,
  author       = {Wei{-}Ning Hsu and
                  James R. Glass},
  title        = {Extracting Domain Invariant Features by Unsupervised Learning for
                  Robust Automatic Speech Recognition},
  journal      = {CoRR},
  volume       = {abs/1803.02551},
  year         = {2018},
  url          = {http://arxiv.org/abs/1803.02551},
  eprinttype    = {arXiv},
  eprint       = {1803.02551},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1803-02551.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1804-03201,
  author       = {Wei{-}Ning Hsu and
                  James R. Glass},
  title        = {Scalable Factorized Hierarchical Variational Autoencoder Training},
  journal      = {CoRR},
  volume       = {abs/1804.03201},
  year         = {2018},
  url          = {http://arxiv.org/abs/1804.03201},
  eprinttype    = {arXiv},
  eprint       = {1804.03201},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1804-03201.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1805-11264,
  author       = {Wei{-}Ning Hsu and
                  James R. Glass},
  title        = {Disentangling by Partitioning: {A} Representation Learning Framework
                  for Multimodal Sensory Data},
  journal      = {CoRR},
  volume       = {abs/1805.11264},
  year         = {2018},
  url          = {http://arxiv.org/abs/1805.11264},
  eprinttype    = {arXiv},
  eprint       = {1805.11264},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1805-11264.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1806-04841,
  author       = {Hao Tang and
                  Wei{-}Ning Hsu and
                  Fran{\c{c}}ois Grondin and
                  James R. Glass},
  title        = {A Study of Enhancement, Augmentation, and Autoencoder Methods for
                  Domain Adaptation in Distant Speech Recognition},
  journal      = {CoRR},
  volume       = {abs/1806.04841},
  year         = {2018},
  url          = {http://arxiv.org/abs/1806.04841},
  eprinttype    = {arXiv},
  eprint       = {1806.04841},
  timestamp    = {Wed, 30 Sep 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1806-04841.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1806-04872,
  author       = {Wei{-}Ning Hsu and
                  Hao Tang and
                  James R. Glass},
  title        = {Unsupervised Adaptation with Interpretable Disentangled Representations
                  for Distant Conversational Speech Recognition},
  journal      = {CoRR},
  volume       = {abs/1806.04872},
  year         = {2018},
  url          = {http://arxiv.org/abs/1806.04872},
  eprinttype    = {arXiv},
  eprint       = {1806.04872},
  timestamp    = {Wed, 30 Sep 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1806-04872.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1808-10128,
  author       = {Yu{-}An Chung and
                  Yuxuan Wang and
                  Wei{-}Ning Hsu and
                  Yu Zhang and
                  R. J. Skerry{-}Ryan},
  title        = {Semi-Supervised Training for Improving Data Efficiency in End-to-End
                  Speech Synthesis},
  journal      = {CoRR},
  volume       = {abs/1808.10128},
  year         = {2018},
  url          = {http://arxiv.org/abs/1808.10128},
  eprinttype    = {arXiv},
  eprint       = {1808.10128},
  timestamp    = {Wed, 20 Jul 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1808-10128.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1809-04458,
  author       = {Suwon Shon and
                  Wei{-}Ning Hsu and
                  James R. Glass},
  title        = {Unsupervised Representation Learning of Speech for Dialect Identification},
  journal      = {CoRR},
  volume       = {abs/1809.04458},
  year         = {2018},
  url          = {http://arxiv.org/abs/1809.04458},
  eprinttype    = {arXiv},
  eprint       = {1809.04458},
  timestamp    = {Fri, 05 Oct 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1809-04458.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1810-07217,
  author       = {Wei{-}Ning Hsu and
                  Yu Zhang and
                  Ron J. Weiss and
                  Heiga Zen and
                  Yonghui Wu and
                  Yuxuan Wang and
                  Yuan Cao and
                  Ye Jia and
                  Zhifeng Chen and
                  Jonathan Shen and
                  Patrick Nguyen and
                  Ruoming Pang},
  title        = {Hierarchical Generative Modeling for Controllable Speech Synthesis},
  journal      = {CoRR},
  volume       = {abs/1810.07217},
  year         = {2018},
  url          = {http://arxiv.org/abs/1810.07217},
  eprinttype    = {arXiv},
  eprint       = {1810.07217},
  timestamp    = {Mon, 25 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1810-07217.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/asru/HsuZG17,
  author       = {Wei{-}Ning Hsu and
                  Yu Zhang and
                  James R. Glass},
  title        = {Unsupervised domain adaptation for robust speech recognition via variational
                  autoencoder-based data augmentation},
  booktitle    = {2017 {IEEE} Automatic Speech Recognition and Understanding Workshop,
                  {ASRU} 2017, Okinawa, Japan, December 16-20, 2017},
  pages        = {16--23},
  publisher    = {{IEEE}},
  year         = {2017},
  url          = {https://doi.org/10.1109/ASRU.2017.8268911},
  doi          = {10.1109/ASRU.2017.8268911},
  timestamp    = {Wed, 16 Oct 2019 14:14:51 +0200},
  biburl       = {https://dblp.org/rec/conf/asru/HsuZG17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/asru/NajafianHAG17,
  author       = {Maryam Najafian and
                  Wei{-}Ning Hsu and
                  Ahmed Ali and
                  James R. Glass},
  title        = {Automatic speech recognition of Arabic multi-genre broadcast media},
  booktitle    = {2017 {IEEE} Automatic Speech Recognition and Understanding Workshop,
                  {ASRU} 2017, Okinawa, Japan, December 16-20, 2017},
  pages        = {353--359},
  publisher    = {{IEEE}},
  year         = {2017},
  url          = {https://doi.org/10.1109/ASRU.2017.8268957},
  doi          = {10.1109/ASRU.2017.8268957},
  timestamp    = {Fri, 03 Apr 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/asru/NajafianHAG17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/HsuZG17,
  author       = {Wei{-}Ning Hsu and
                  Yu Zhang and
                  James R. Glass},
  editor       = {Francisco Lacerda},
  title        = {Learning Latent Representations for Speech Generation and Transformation},
  booktitle    = {Interspeech 2017, 18th Annual Conference of the International Speech
                  Communication Association, Stockholm, Sweden, August 20-24, 2017},
  pages        = {1273--1277},
  publisher    = {{ISCA}},
  year         = {2017},
  url          = {https://doi.org/10.21437/Interspeech.2017-349},
  doi          = {10.21437/INTERSPEECH.2017-349},
  timestamp    = {Mon, 26 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/HsuZG17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/HsuZG17,
  author       = {Wei{-}Ning Hsu and
                  Yu Zhang and
                  James R. Glass},
  editor       = {Isabelle Guyon and
                  Ulrike von Luxburg and
                  Samy Bengio and
                  Hanna M. Wallach and
                  Rob Fergus and
                  S. V. N. Vishwanathan and
                  Roman Garnett},
  title        = {Unsupervised Learning of Disentangled and Interpretable Representations
                  from Sequential Data},
  booktitle    = {Advances in Neural Information Processing Systems 30: Annual Conference
                  on Neural Information Processing Systems 2017, December 4-9, 2017,
                  Long Beach, CA, {USA}},
  pages        = {1878--1889},
  year         = {2017},
  url          = {https://proceedings.neurips.cc/paper/2017/hash/0a0a0c8aaa00ade50f74a3f0ca981ed7-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 13:58:27 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/HsuZG17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HsuZG17,
  author       = {Wei{-}Ning Hsu and
                  Yu Zhang and
                  James R. Glass},
  title        = {Learning Latent Representations for Speech Generation and Transformation},
  journal      = {CoRR},
  volume       = {abs/1704.04222},
  year         = {2017},
  url          = {http://arxiv.org/abs/1704.04222},
  eprinttype    = {arXiv},
  eprint       = {1704.04222},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/HsuZG17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HsuZG17aa,
  author       = {Wei{-}Ning Hsu and
                  Yu Zhang and
                  James R. Glass},
  title        = {Unsupervised Domain Adaptation for Robust Speech Recognition via Variational
                  Autoencoder-Based Data Augmentation},
  journal      = {CoRR},
  volume       = {abs/1707.06265},
  year         = {2017},
  url          = {http://arxiv.org/abs/1707.06265},
  eprinttype    = {arXiv},
  eprint       = {1707.06265},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/HsuZG17aa.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1709-07902,
  author       = {Wei{-}Ning Hsu and
                  Yu Zhang and
                  James R. Glass},
  title        = {Unsupervised Learning of Disentangled and Interpretable Representations
                  from Sequential Data},
  journal      = {CoRR},
  volume       = {abs/1709.07902},
  year         = {2017},
  url          = {http://arxiv.org/abs/1709.07902},
  eprinttype    = {arXiv},
  eprint       = {1709.07902},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1709-07902.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/coling/RomeoMBMBHZMG16,
  author       = {Salvatore Romeo and
                  Giovanni Da San Martino and
                  Alberto Barr{\'{o}}n{-}Cede{\~{n}}o and
                  Alessandro Moschitti and
                  Yonatan Belinkov and
                  Wei{-}Ning Hsu and
                  Yu Zhang and
                  Mitra Mohtarami and
                  James R. Glass},
  editor       = {Nicoletta Calzolari and
                  Yuji Matsumoto and
                  Rashmi Prasad},
  title        = {Neural Attention for Learning to Rank Questions in Community Question
                  Answering},
  booktitle    = {{COLING} 2016, 26th International Conference on Computational Linguistics,
                  Proceedings of the Conference: Technical Papers, December 11-16, 2016,
                  Osaka, Japan},
  pages        = {1734--1745},
  publisher    = {{ACL}},
  year         = {2016},
  url          = {https://aclanthology.org/C16-1163/},
  timestamp    = {Fri, 06 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/coling/RomeoMBMBHZMG16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/HsuZLG16,
  author       = {Wei{-}Ning Hsu and
                  Yu Zhang and
                  Ann Lee and
                  James R. Glass},
  editor       = {Nelson Morgan},
  title        = {Exploiting Depth and Highway Connections in Convolutional Recurrent
                  Deep Neural Networks for Speech Recognition},
  booktitle    = {Interspeech 2016, 17th Annual Conference of the International Speech
                  Communication Association, San Francisco, CA, USA, September 8-12,
                  2016},
  pages        = {395--399},
  publisher    = {{ISCA}},
  year         = {2016},
  url          = {https://doi.org/10.21437/Interspeech.2016-515},
  doi          = {10.21437/INTERSPEECH.2016-515},
  timestamp    = {Mon, 26 Jun 2023 16:43:56 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/HsuZLG16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/semeval/MohtaramiBHZLBC16,
  author       = {Mitra Mohtarami and
                  Yonatan Belinkov and
                  Wei{-}Ning Hsu and
                  Yu Zhang and
                  Tao Lei and
                  Kfir Bar and
                  Scott Cyphers and
                  James R. Glass},
  editor       = {Steven Bethard and
                  Daniel M. Cer and
                  Marine Carpuat and
                  David Jurgens and
                  Preslav Nakov and
                  Torsten Zesch},
  title        = {{SLS} at SemEval-2016 Task 3: Neural-based Approaches for Ranking
                  in Community Question Answering},
  booktitle    = {Proceedings of the 10th International Workshop on Semantic Evaluation,
                  SemEval@NAACL-HLT 2016, San Diego, CA, USA, June 16-17, 2016},
  pages        = {828--835},
  publisher    = {The Association for Computer Linguistics},
  year         = {2016},
  url          = {https://doi.org/10.18653/v1/s16-1128},
  doi          = {10.18653/V1/S16-1128},
  timestamp    = {Fri, 06 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/semeval/MohtaramiBHZLBC16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/HanaiHG16,
  author       = {Tuka Al Hanai and
                  Wei{-}Ning Hsu and
                  James R. Glass},
  title        = {Development of the {MIT} {ASR} system for the 2016 Arabic Multi-genre
                  Broadcast Challenge},
  booktitle    = {2016 {IEEE} Spoken Language Technology Workshop, {SLT} 2016, San Diego,
                  CA, USA, December 13-16, 2016},
  pages        = {299--304},
  publisher    = {{IEEE}},
  year         = {2016},
  url          = {https://doi.org/10.1109/SLT.2016.7846280},
  doi          = {10.1109/SLT.2016.7846280},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/HanaiHG16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/HsuZG16,
  author       = {Wei{-}Ning Hsu and
                  Yu Zhang and
                  James R. Glass},
  title        = {A prioritized grid long short-term memory {RNN} for speech recognition},
  booktitle    = {2016 {IEEE} Spoken Language Technology Workshop, {SLT} 2016, San Diego,
                  CA, USA, December 13-16, 2016},
  pages        = {467--473},
  publisher    = {{IEEE}},
  year         = {2016},
  url          = {https://doi.org/10.1109/SLT.2016.7846305},
  doi          = {10.1109/SLT.2016.7846305},
  timestamp    = {Thu, 07 Jun 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/HsuZG16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HsuZG16,
  author       = {Wei{-}Ning Hsu and
                  Yu Zhang and
                  James R. Glass},
  title        = {Recurrent Neural Network Encoder with Attention for Community Question
                  Answering},
  journal      = {CoRR},
  volume       = {abs/1603.07044},
  year         = {2016},
  url          = {http://arxiv.org/abs/1603.07044},
  eprinttype    = {arXiv},
  eprint       = {1603.07044},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/HsuZG16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/HsuL15,
  author       = {Wei{-}Ning Hsu and
                  Hsuan{-}Tien Lin},
  editor       = {Blai Bonet and
                  Sven Koenig},
  title        = {Active Learning by Learning},
  booktitle    = {Proceedings of the Twenty-Ninth {AAAI} Conference on Artificial Intelligence,
                  January 25-30, 2015, Austin, Texas, {USA}},
  pages        = {2659--2665},
  publisher    = {{AAAI} Press},
  year         = {2015},
  url          = {https://doi.org/10.1609/aaai.v29i1.9597},
  doi          = {10.1609/AAAI.V29I1.9597},
  timestamp    = {Mon, 18 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/HsuL15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/ChungHLL15,
  author       = {Cheng{-}Tao Chung and
                  Wei{-}Ning Hsu and
                  Cheng{-}Yi Lee and
                  Lin{-}Shan Lee},
  title        = {Enhancing automatically discovered multi-level acoustic patterns considering
                  context consistency with applications in spoken term detection},
  booktitle    = {2015 {IEEE} International Conference on Acoustics, Speech and Signal
                  Processing, {ICASSP} 2015, South Brisbane, Queensland, Australia,
                  April 19-24, 2015},
  pages        = {5231--5235},
  publisher    = {{IEEE}},
  year         = {2015},
  url          = {https://doi.org/10.1109/ICASSP.2015.7178969},
  doi          = {10.1109/ICASSP.2015.7178969},
  timestamp    = {Wed, 16 Oct 2019 14:14:52 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/ChungHLL15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/ChungHLL15,
  author       = {Cheng{-}Tao Chung and
                  Wei{-}Ning Hsu and
                  Cheng{-}Yi Lee and
                  Lin{-}Shan Lee},
  title        = {Enhancing Automatically Discovered Multi-level Acoustic Patterns Considering
                  Context Consistency With Applications in Spoken Term Detection},
  journal      = {CoRR},
  volume       = {abs/1509.02217},
  year         = {2015},
  url          = {http://arxiv.org/abs/1509.02217},
  eprinttype    = {arXiv},
  eprint       = {1509.02217},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/ChungHLL15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
a service of  Schloss Dagstuhl - Leibniz Center for Informatics