Stop the war!
Остановите войну!
for scientists:
default search action
BibTeX records: Wei-Ning Hsu
@article{DBLP:journals/corr/abs-2403-14402, author = {HyoJung Han and Mohamed Anwar and Juan Pino and Wei{-}Ning Hsu and Marine Carpuat and Bowen Shi and Changhan Wang}, title = {{XLAVS-R:} Cross-Lingual Audio-Visual Speech Representation Learning for Noise-Robust Speech Perception}, journal = {CoRR}, volume = {abs/2403.14402}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2403.14402}, doi = {10.48550/ARXIV.2403.14402}, eprinttype = {arXiv}, eprint = {2403.14402}, timestamp = {Tue, 09 Apr 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2403-14402.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/acl/ChenTYDKCTDSGIP23, author = {Peng{-}Jen Chen and Kevin Tran and Yilin Yang and Jingfei Du and Justine Kao and Yu{-}An Chung and Paden Tomasello and Paul{-}Ambroise Duquenne and Holger Schwenk and Hongyu Gong and Hirofumi Inaguma and Sravya Popuri and Changhan Wang and Juan Pino and Wei{-}Ning Hsu and Ann Lee}, editor = {Anna Rogers and Jordan L. Boyd{-}Graber and Naoaki Okazaki}, title = {Speech-to-Speech Translation for a Real-world Unwritten Language}, booktitle = {Findings of the Association for Computational Linguistics: {ACL} 2023, Toronto, Canada, July 9-14, 2023}, pages = {4969--4983}, publisher = {Association for Computational Linguistics}, year = {2023}, url = {https://doi.org/10.18653/v1/2023.findings-acl.307}, doi = {10.18653/V1/2023.FINDINGS-ACL.307}, timestamp = {Thu, 10 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/acl/ChenTYDKCTDSGIP23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/acl/WangICK0HA023, author = {Changhan Wang and Hirofumi Inaguma and Peng{-}Jen Chen and Ilia Kulikov and Yun Tang and Wei{-}Ning Hsu and Michael Auli and Juan Pino}, editor = {Anna Rogers and Jordan L. Boyd{-}Graber and Naoaki Okazaki}, title = {Simple and Effective Unsupervised Speech Translation}, booktitle = {Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), {ACL} 2023, Toronto, Canada, July 9-14, 2023}, pages = {10771--10784}, publisher = {Association for Computational Linguistics}, year = {2023}, url = {https://doi.org/10.18653/v1/2023.acl-long.602}, doi = {10.18653/V1/2023.ACL-LONG.602}, timestamp = {Thu, 10 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/acl/WangICK0HA023.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/asru/LianBHA23, author = {Jiachen Lian and Alexei Baevski and Wei{-}Ning Hsu and Michael Auli}, title = {Av-Data2Vec: Self-Supervised Learning of Audio-Visual Speech Representations with Contextualized Target Representations}, booktitle = {{IEEE} Automatic Speech Recognition and Understanding Workshop, {ASRU} 2023, Taipei, Taiwan, December 16-20, 2023}, pages = {1--8}, publisher = {{IEEE}}, year = {2023}, url = {https://doi.org/10.1109/ASRU57964.2023.10389642}, doi = {10.1109/ASRU57964.2023.10389642}, timestamp = {Tue, 13 Feb 2024 21:21:14 +0100}, biburl = {https://dblp.org/rec/conf/asru/LianBHA23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/cvpr/HsuRSDA23, author = {Wei{-}Ning Hsu and Tal Remez and Bowen Shi and Jacob Donley and Yossi Adi}, title = {ReVISE: Self-Supervised Speech Resynthesis with Visual Input for Universal and Generalized Speech Regeneration}, booktitle = {{IEEE/CVF} Conference on Computer Vision and Pattern Recognition, {CVPR} 2023, Vancouver, BC, Canada, June 17-24, 2023}, pages = {18796--18806}, publisher = {{IEEE}}, year = {2023}, url = {https://doi.org/10.1109/CVPR52729.2023.01802}, doi = {10.1109/CVPR52729.2023.01802}, timestamp = {Tue, 29 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/cvpr/HsuRSDA23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/emnlp/ChouCHLBCBA23, author = {Ju{-}Chieh Chou and Chung{-}Ming Chien and Wei{-}Ning Hsu and Karen Livescu and Arun Babu and Alexis Conneau and Alexei Baevski and Michael Auli}, editor = {Houda Bouamor and Juan Pino and Kalika Bali}, title = {Toward Joint Language Modeling for Speech Units and Text}, booktitle = {Findings of the Association for Computational Linguistics: {EMNLP} 2023, Singapore, December 6-10, 2023}, pages = {6582--6593}, publisher = {Association for Computational Linguistics}, year = {2023}, url = {https://doi.org/10.18653/v1/2023.findings-emnlp.438}, doi = {10.18653/V1/2023.FINDINGS-EMNLP.438}, timestamp = {Fri, 12 Apr 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/emnlp/ChouCHLBCBA23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icassp/DiwanYHTCHM23, author = {Anuj Diwan and Ching{-}Feng Yeh and Wei{-}Ning Hsu and Paden Tomasello and Eunsol Choi and David Harwath and Abdelrahman Mohamed}, title = {Continual Learning for On-Device Speech Recognition Using Disentangled Conformers}, booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023}, pages = {1--5}, publisher = {{IEEE}}, year = {2023}, url = {https://doi.org/10.1109/ICASSP49357.2023.10095484}, doi = {10.1109/ICASSP49357.2023.10095484}, timestamp = {Sun, 05 Nov 2023 16:51:21 +0100}, biburl = {https://dblp.org/rec/conf/icassp/DiwanYHTCHM23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icassp/ElkahkyHTNAACDM23, author = {Ali Elkahky and Wei{-}Ning Hsu and Paden Tomasello and Tu Anh Nguyen and Robin Algayres and Yossi Adi and Jade Copet and Emmanuel Dupoux and Abdelrahman Mohamed}, title = {Do Coarser Units Benefit Cluster Prediction-Based Speech Pre-Training?}, booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023}, pages = {1--5}, publisher = {{IEEE}}, year = {2023}, url = {https://doi.org/10.1109/ICASSP49357.2023.10096788}, doi = {10.1109/ICASSP49357.2023.10096788}, timestamp = {Sun, 05 Nov 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icassp/ElkahkyHTNAACDM23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icassp/FazelZarandiH23, author = {Maryam Fazel{-}Zarandi and Wei{-}Ning Hsu}, title = {Cocktail Hubert: Generalized Self-Supervised Pre-Training for Mixture and Single-Source Speech}, booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023}, pages = {1--5}, publisher = {{IEEE}}, year = {2023}, url = {https://doi.org/10.1109/ICASSP49357.2023.10096630}, doi = {10.1109/ICASSP49357.2023.10096630}, timestamp = {Sun, 05 Nov 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icassp/FazelZarandiH23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icassp/SanabriaHBA23, author = {Ramon Sanabria and Wei{-}Ning Hsu and Alexei Baevski and Michael Auli}, title = {Measuring the Impact of Domain Factors in Self-Supervised Pre-Training}, booktitle = {{IEEE} International Conference on Acoustics, Speech, and Signal Processing, {ICASSP} 2023 - Workshops, Rhodes Island, Greece, June 4-10, 2023}, pages = {1--5}, publisher = {{IEEE}}, year = {2023}, url = {https://doi.org/10.1109/ICASSPW59220.2023.10193184}, doi = {10.1109/ICASSPW59220.2023.10193184}, timestamp = {Mon, 07 Aug 2023 15:56:26 +0200}, biburl = {https://dblp.org/rec/conf/icassp/SanabriaHBA23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/AghajanyanYCHHZ23, author = {Armen Aghajanyan and Lili Yu and Alexis Conneau and Wei{-}Ning Hsu and Karen Hambardzumyan and Susan Zhang and Stephen Roller and Naman Goyal and Omer Levy and Luke Zettlemoyer}, editor = {Andreas Krause and Emma Brunskill and Kyunghyun Cho and Barbara Engelhardt and Sivan Sabato and Jonathan Scarlett}, title = {Scaling Laws for Generative Mixed-Modal Language Models}, booktitle = {International Conference on Machine Learning, {ICML} 2023, 23-29 July 2023, Honolulu, Hawaii, {USA}}, series = {Proceedings of Machine Learning Research}, volume = {202}, pages = {265--279}, publisher = {{PMLR}}, year = {2023}, url = {https://proceedings.mlr.press/v202/aghajanyan23a.html}, timestamp = {Mon, 28 Aug 2023 17:23:08 +0200}, biburl = {https://dblp.org/rec/conf/icml/AghajanyanYCHHZ23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/BaevskiBHA23, author = {Alexei Baevski and Arun Babu and Wei{-}Ning Hsu and Michael Auli}, editor = {Andreas Krause and Emma Brunskill and Kyunghyun Cho and Barbara Engelhardt and Sivan Sabato and Jonathan Scarlett}, title = {Efficient Self-supervised Learning with Contextualized Target Representations for Vision, Speech and Language}, booktitle = {International Conference on Machine Learning, {ICML} 2023, 23-29 July 2023, Honolulu, Hawaii, {USA}}, series = {Proceedings of Machine Learning Research}, volume = {202}, pages = {1416--1429}, publisher = {{PMLR}}, year = {2023}, url = {https://proceedings.mlr.press/v202/baevski23a.html}, timestamp = {Mon, 28 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/BaevskiBHA23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/LeVSKSMWMAMH23, author = {Matthew Le and Apoorv Vyas and Bowen Shi and Brian Karrer and Leda Sari and Rashel Moritz and Mary Williamson and Vimal Manohar and Yossi Adi and Jay Mahadeokar and Wei{-}Ning Hsu}, editor = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine}, title = {Voicebox: Text-Guided Multilingual Universal Speech Generation at Scale}, booktitle = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023}, year = {2023}, url = {http://papers.nips.cc/paper\_files/paper/2023/hash/2d8911db9ecedf866015091b28946e15-Abstract-Conference.html}, timestamp = {Fri, 01 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/LeVSKSMWMAMH23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/LiuCAHG23, author = {Alexander H. Liu and Heng{-}Jui Chang and Michael Auli and Wei{-}Ning Hsu and Jim Glass}, editor = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine}, title = {DinoSR: Self-Distillation and Online Clustering for Self-supervised Speech Representation Learning}, booktitle = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023}, year = {2023}, url = {http://papers.nips.cc/paper\_files/paper/2023/hash/b6404bf461c3c3186bdf5f55756af908-Abstract-Conference.html}, timestamp = {Fri, 01 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/LiuCAHG23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2301-00652, author = {Ching{-}Feng Yeh and Wei{-}Ning Hsu and Paden Tomasello and Abdelrahman Mohamed}, title = {Efficient Speech Representation Learning with Low-Bit Quantization}, journal = {CoRR}, volume = {abs/2301.00652}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2301.00652}, doi = {10.48550/ARXIV.2301.00652}, eprinttype = {arXiv}, eprint = {2301.00652}, timestamp = {Tue, 10 Jan 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2301-00652.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2301-03728, author = {Armen Aghajanyan and Lili Yu and Alexis Conneau and Wei{-}Ning Hsu and Karen Hambardzumyan and Susan Zhang and Stephen Roller and Naman Goyal and Omer Levy and Luke Zettlemoyer}, title = {Scaling Laws for Generative Mixed-Modal Language Models}, journal = {CoRR}, volume = {abs/2301.03728}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2301.03728}, doi = {10.48550/ARXIV.2301.03728}, eprinttype = {arXiv}, eprint = {2301.03728}, timestamp = {Thu, 19 Jan 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2301-03728.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2302-06419, author = {Jiachen Lian and Alexei Baevski and Wei{-}Ning Hsu and Michael Auli}, title = {AV-data2vec: Self-supervised Learning of Audio-Visual Speech Representations with Contextualized Target Representations}, journal = {CoRR}, volume = {abs/2302.06419}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2302.06419}, doi = {10.48550/ARXIV.2302.06419}, eprinttype = {arXiv}, eprint = {2302.06419}, timestamp = {Mon, 20 Feb 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2302-06419.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2303-00628, author = {Mohamed Anwar and Bowen Shi and Vedanuj Goswami and Wei{-}Ning Hsu and Juan Pino and Changhan Wang}, title = {MuAViC: {A} Multilingual Audio-Visual Corpus for Robust Speech Recognition and Robust Speech-to-Text Translation}, journal = {CoRR}, volume = {abs/2303.00628}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2303.00628}, doi = {10.48550/ARXIV.2303.00628}, eprinttype = {arXiv}, eprint = {2303.00628}, timestamp = {Wed, 19 Apr 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2303-00628.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2303-11131, author = {Maryam Fazel{-}Zarandi and Wei{-}Ning Hsu}, title = {Cocktail HuBERT: Generalized Self-Supervised Pre-training for Mixture and Single-Source Speech}, journal = {CoRR}, volume = {abs/2303.11131}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2303.11131}, doi = {10.48550/ARXIV.2303.11131}, eprinttype = {arXiv}, eprint = {2303.11131}, timestamp = {Wed, 22 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2303-11131.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2305-10005, author = {Alexander H. Liu and Heng{-}Jui Chang and Michael Auli and Wei{-}Ning Hsu and James R. Glass}, title = {DinoSR: Self-Distillation and Online Clustering for Self-supervised Speech Representation Learning}, journal = {CoRR}, volume = {abs/2305.10005}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2305.10005}, doi = {10.48550/ARXIV.2305.10005}, eprinttype = {arXiv}, eprint = {2305.10005}, timestamp = {Wed, 24 May 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2305-10005.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2305-13516, author = {Vineel Pratap and Andros Tjandra and Bowen Shi and Paden Tomasello and Arun Babu and Sayani Kundu and Ali Elkahky and Zhaoheng Ni and Apoorv Vyas and Maryam Fazel{-}Zarandi and Alexei Baevski and Yossi Adi and Xiaohui Zhang and Wei{-}Ning Hsu and Alexis Conneau and Michael Auli}, title = {Scaling Speech Technology to 1, 000+ Languages}, journal = {CoRR}, volume = {abs/2305.13516}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2305.13516}, doi = {10.48550/ARXIV.2305.13516}, eprinttype = {arXiv}, eprint = {2305.13516}, timestamp = {Thu, 04 Apr 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2305-13516.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2306-15687, author = {Matthew Le and Apoorv Vyas and Bowen Shi and Brian Karrer and Leda Sari and Rashel Moritz and Mary Williamson and Vimal Manohar and Yossi Adi and Jay Mahadeokar and Wei{-}Ning Hsu}, title = {Voicebox: Text-Guided Multilingual Universal Speech Generation at Scale}, journal = {CoRR}, volume = {abs/2306.15687}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2306.15687}, doi = {10.48550/ARXIV.2306.15687}, eprinttype = {arXiv}, eprint = {2306.15687}, timestamp = {Mon, 03 Jul 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2306-15687.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2308-05725, author = {Tu Anh Nguyen and Wei{-}Ning Hsu and Antony D'Avirro and Bowen Shi and Itai Gat and Maryam Fazel{-}Zarandi and Tal Remez and Jade Copet and Gabriel Synnaeve and Michael Hassid and Felix Kreuk and Yossi Adi and Emmanuel Dupoux}, title = {{EXPRESSO:} {A} Benchmark and Analysis of Discrete Expressive Speech Resynthesis}, journal = {CoRR}, volume = {abs/2308.05725}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2308.05725}, doi = {10.48550/ARXIV.2308.05725}, eprinttype = {arXiv}, eprint = {2308.05725}, timestamp = {Wed, 23 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2308-05725.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2309-17020, author = {Po{-}Chun Hsu and Ali Elkahky and Wei{-}Ning Hsu and Yossi Adi and Tu Anh Nguyen and Jade Copet and Emmanuel Dupoux and Hung{-}yi Lee and Abdelrahman Mohamed}, title = {Low-Resource Self-Supervised Learning with SSL-Enhanced {TTS}}, journal = {CoRR}, volume = {abs/2309.17020}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2309.17020}, doi = {10.48550/ARXIV.2309.17020}, eprinttype = {arXiv}, eprint = {2309.17020}, timestamp = {Tue, 17 Oct 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2309-17020.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2310-08715, author = {Ju{-}Chieh Chou and Chung{-}Ming Chien and Wei{-}Ning Hsu and Karen Livescu and Arun Babu and Alexis Conneau and Alexei Baevski and Michael Auli}, title = {Toward Joint Language Modeling for Speech Units and Text}, journal = {CoRR}, volume = {abs/2310.08715}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2310.08715}, doi = {10.48550/ARXIV.2310.08715}, eprinttype = {arXiv}, eprint = {2310.08715}, timestamp = {Wed, 25 Oct 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2310-08715.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2310-16338, author = {Alexander H. Liu and Matt Le and Apoorv Vyas and Bowen Shi and Andros Tjandra and Wei{-}Ning Hsu}, title = {Generative Pre-training for Speech with Flow Matching}, journal = {CoRR}, volume = {abs/2310.16338}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2310.16338}, doi = {10.48550/ARXIV.2310.16338}, eprinttype = {arXiv}, eprint = {2310.16338}, timestamp = {Tue, 31 Oct 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2310-16338.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2311-02772, author = {Sungho Jeon and Ching{-}Feng Yeh and Hakan Inan and Wei{-}Ning Hsu and Rashi Rungta and Yashar Mehdad and Daniel Bikel}, title = {Attention or Convolution: Transformer Encoders in Audio Language Models for Inference Efficiency}, journal = {CoRR}, volume = {abs/2311.02772}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2311.02772}, doi = {10.48550/ARXIV.2311.02772}, eprinttype = {arXiv}, eprint = {2311.02772}, timestamp = {Wed, 08 Nov 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2311-02772.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2312-15821, author = {Apoorv Vyas and Bowen Shi and Matthew Le and Andros Tjandra and Yi{-}Chiao Wu and Baishan Guo and Jiemin Zhang and Xinyue Zhang and Robert Adkins and William Ngan and Jeff Wang and Ivan Cruz and Bapi Akula and Akinniyi Akinyemi and Brian Ellis and Rashel Moritz and Yael Yungster and Alice Rakotoarison and Liang Tan and Chris Summers and Carleigh Wood and Joshua Lane and Mary Williamson and Wei{-}Ning Hsu}, title = {Audiobox: Unified Audio Generation with Natural Language Prompts}, journal = {CoRR}, volume = {abs/2312.15821}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2312.15821}, doi = {10.48550/ARXIV.2312.15821}, eprinttype = {arXiv}, eprint = {2312.15821}, timestamp = {Tue, 16 Jan 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2312-15821.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/acl/TangGDWHGBLMAP22, author = {Yun Tang and Hongyu Gong and Ning Dong and Changhan Wang and Wei{-}Ning Hsu and Jiatao Gu and Alexei Baevski and Xian Li and Abdelrahman Mohamed and Michael Auli and Juan Miguel Pino}, editor = {Smaranda Muresan and Preslav Nakov and Aline Villavicencio}, title = {Unified Speech-Text Pre-training for Speech Translation and Recognition}, booktitle = {Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), {ACL} 2022, Dublin, Ireland, May 22-27, 2022}, pages = {1488--1499}, publisher = {Association for Computational Linguistics}, year = {2022}, url = {https://doi.org/10.18653/v1/2022.acl-long.105}, doi = {10.18653/V1/2022.ACL-LONG.105}, timestamp = {Tue, 27 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/acl/TangGDWHGBLMAP22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/acl/LeeCWGPMPAHTPH22, author = {Ann Lee and Peng{-}Jen Chen and Changhan Wang and Jiatao Gu and Sravya Popuri and Xutai Ma and Adam Polyak and Yossi Adi and Qing He and Yun Tang and Juan Pino and Wei{-}Ning Hsu}, editor = {Smaranda Muresan and Preslav Nakov and Aline Villavicencio}, title = {Direct Speech-to-Speech Translation With Discrete Units}, booktitle = {Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), {ACL} 2022, Dublin, Ireland, May 22-27, 2022}, pages = {3327--3339}, publisher = {Association for Computational Linguistics}, year = {2022}, url = {https://doi.org/10.18653/v1/2022.acl-long.235}, doi = {10.18653/V1/2022.ACL-LONG.235}, timestamp = {Tue, 27 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/acl/LeeCWGPMPAHTPH22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/acl/KharitonovLPACL22, author = {Eugene Kharitonov and Ann Lee and Adam Polyak and Yossi Adi and Jade Copet and Kushal Lakhotia and Tu Anh Nguyen and Morgane Rivi{\`{e}}re and Abdelrahman Mohamed and Emmanuel Dupoux and Wei{-}Ning Hsu}, editor = {Smaranda Muresan and Preslav Nakov and Aline Villavicencio}, title = {Text-Free Prosody-Aware Generative Spoken Language Modeling}, booktitle = {Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), {ACL} 2022, Dublin, Ireland, May 22-27, 2022}, pages = {8666--8681}, publisher = {Association for Computational Linguistics}, year = {2022}, url = {https://doi.org/10.18653/v1/2022.acl-long.593}, doi = {10.18653/V1/2022.ACL-LONG.593}, timestamp = {Thu, 06 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/acl/KharitonovLPACL22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/emnlp/KreukPCKNRHMDA22, author = {Felix Kreuk and Adam Polyak and Jade Copet and Eugene Kharitonov and Tu Anh Nguyen and Morgane Rivi{\`{e}}re and Wei{-}Ning Hsu and Abdelrahman Mohamed and Emmanuel Dupoux and Yossi Adi}, editor = {Yoav Goldberg and Zornitsa Kozareva and Yue Zhang}, title = {Textless Speech Emotion Conversion using Discrete {\&} Decomposed Representations}, booktitle = {Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, {EMNLP} 2022, Abu Dhabi, United Arab Emirates, December 7-11, 2022}, pages = {11200--11214}, publisher = {Association for Computational Linguistics}, year = {2022}, url = {https://doi.org/10.18653/v1/2022.emnlp-main.769}, doi = {10.18653/V1/2022.EMNLP-MAIN.769}, timestamp = {Thu, 10 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/emnlp/KreukPCKNRHMDA22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/ShiHLM22, author = {Bowen Shi and Wei{-}Ning Hsu and Kushal Lakhotia and Abdelrahman Mohamed}, title = {Learning Audio-Visual Speech Representation by Masked Multimodal Cluster Prediction}, booktitle = {The Tenth International Conference on Learning Representations, {ICLR} 2022, Virtual Event, April 25-29, 2022}, publisher = {OpenReview.net}, year = {2022}, url = {https://openreview.net/forum?id=Z1Qlm11uOM}, timestamp = {Sat, 20 Aug 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/ShiHLM22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/BaevskiHXBGA22, author = {Alexei Baevski and Wei{-}Ning Hsu and Qiantong Xu and Arun Babu and Jiatao Gu and Michael Auli}, editor = {Kamalika Chaudhuri and Stefanie Jegelka and Le Song and Csaba Szepesv{\'{a}}ri and Gang Niu and Sivan Sabato}, title = {data2vec: {A} General Framework for Self-supervised Learning in Speech, Vision and Language}, booktitle = {International Conference on Machine Learning, {ICML} 2022, 17-23 July 2022, Baltimore, Maryland, {USA}}, series = {Proceedings of Machine Learning Research}, volume = {162}, pages = {1298--1312}, publisher = {{PMLR}}, year = {2022}, url = {https://proceedings.mlr.press/v162/baevski22a.html}, timestamp = {Tue, 12 Jul 2022 17:36:52 +0200}, biburl = {https://dblp.org/rec/conf/icml/BaevskiHXBGA22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/LiuLHABG22, author = {Alexander H. Liu and Cheng{-}I Lai and Wei{-}Ning Hsu and Michael Auli and Alexei Baevski and James R. Glass}, editor = {Hanseok Ko and John H. L. Hansen}, title = {Simple and Effective Unsupervised Speech Synthesis}, booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, Korea, 18-22 September 2022}, pages = {843--847}, publisher = {{ISCA}}, year = {2022}, url = {https://doi.org/10.21437/Interspeech.2022-11071}, doi = {10.21437/INTERSPEECH.2022-11071}, timestamp = {Wed, 21 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/LiuLHABG22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/ShiHM22, author = {Bowen Shi and Wei{-}Ning Hsu and Abdelrahman Mohamed}, editor = {Hanseok Ko and John H. L. Hansen}, title = {Robust Self-Supervised Audio-Visual Speech Recognition}, booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, Korea, 18-22 September 2022}, pages = {2118--2122}, publisher = {{ISCA}}, year = {2022}, url = {https://doi.org/10.21437/Interspeech.2022-99}, doi = {10.21437/INTERSPEECH.2022-99}, timestamp = {Wed, 21 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/ShiHM22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/VyasHAB22, author = {Apoorv Vyas and Wei{-}Ning Hsu and Michael Auli and Alexei Baevski}, editor = {Hanseok Ko and John H. L. Hansen}, title = {On-demand compute reduction with stochastic wav2vec 2.0}, booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, Korea, 18-22 September 2022}, pages = {3048--3052}, publisher = {{ISCA}}, year = {2022}, url = {https://doi.org/10.21437/Interspeech.2022-10584}, doi = {10.21437/INTERSPEECH.2022-10584}, timestamp = {Wed, 21 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/VyasHAB22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/ShiMH22, author = {Bowen Shi and Abdelrahman Mohamed and Wei{-}Ning Hsu}, editor = {Hanseok Ko and John H. L. Hansen}, title = {Learning Lip-Based Audio-Visual Speaker Embeddings with AV-HuBERT}, booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, Korea, 18-22 September 2022}, pages = {4785--4789}, publisher = {{ISCA}}, year = {2022}, url = {https://doi.org/10.21437/Interspeech.2022-885}, doi = {10.21437/INTERSPEECH.2022-885}, timestamp = {Wed, 21 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/ShiMH22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/PopuriCWPAGHL22, author = {Sravya Popuri and Peng{-}Jen Chen and Changhan Wang and Juan Pino and Yossi Adi and Jiatao Gu and Wei{-}Ning Hsu and Ann Lee}, editor = {Hanseok Ko and John H. L. Hansen}, title = {Enhanced Direct Speech-to-Speech Translation Using Self-supervised Pre-training and Data Augmentation}, booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, Korea, 18-22 September 2022}, pages = {5195--5199}, publisher = {{ISCA}}, year = {2022}, url = {https://doi.org/10.21437/Interspeech.2022-11032}, doi = {10.21437/INTERSPEECH.2022-11032}, timestamp = {Wed, 21 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/PopuriCWPAGHL22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/naacl/LeeGDSCWPAPGH22, author = {Ann Lee and Hongyu Gong and Paul{-}Ambroise Duquenne and Holger Schwenk and Peng{-}Jen Chen and Changhan Wang and Sravya Popuri and Yossi Adi and Juan Miguel Pino and Jiatao Gu and Wei{-}Ning Hsu}, editor = {Marine Carpuat and Marie{-}Catherine de Marneffe and Iv{\'{a}}n Vladimir Meza Ru{\'{\i}}z}, title = {Textless Speech-to-Speech Translation on Real Data}, booktitle = {Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, {NAACL} 2022, Seattle, WA, United States, July 10-15, 2022}, pages = {860--872}, publisher = {Association for Computational Linguistics}, year = {2022}, url = {https://doi.org/10.18653/v1/2022.naacl-main.63}, doi = {10.18653/V1/2022.NAACL-MAIN.63}, timestamp = {Tue, 31 Jan 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/naacl/LeeGDSCWPAPGH22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/HsuS22, author = {Wei{-}Ning Hsu and Bowen Shi}, editor = {Sanmi Koyejo and S. Mohamed and A. Agarwal and Danielle Belgrave and K. Cho and A. Oh}, title = {u-HuBERT: Unified Mixed-Modal Speech Pretraining And Zero-Shot Transfer to Unlabeled Modality}, booktitle = {Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans, LA, USA, November 28 - December 9, 2022}, year = {2022}, url = {http://papers.nips.cc/paper\_files/paper/2022/hash/853e781cb2af58956ed5c89aa59da3fc-Abstract-Conference.html}, timestamp = {Mon, 08 Jan 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/HsuS22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/slt/LiuHAB22, author = {Alexander H. Liu and Wei{-}Ning Hsu and Michael Auli and Alexei Baevski}, title = {Towards End-to-End Unsupervised Speech Recognition}, booktitle = {{IEEE} Spoken Language Technology Workshop, {SLT} 2022, Doha, Qatar, January 9-12, 2023}, pages = {221--228}, publisher = {{IEEE}}, year = {2022}, url = {https://doi.org/10.1109/SLT54892.2023.10023187}, doi = {10.1109/SLT54892.2023.10023187}, timestamp = {Mon, 06 Feb 2023 22:19:30 +0100}, biburl = {https://dblp.org/rec/conf/slt/LiuHAB22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/slt/TomaselloSLHLSECHAANDZM22, author = {Paden Tomasello and Akshat Shrivastava and Daniel Lazar and Po{-}Chun Hsu and Duc Le and Adithya Sagar and Ali Elkahky and Jade Copet and Wei{-}Ning Hsu and Yossi Adi and Robin Algayres and Tu Anh Nguyen and Emmanuel Dupoux and Luke Zettlemoyer and Abdelrahman Mohamed}, title = {Stop: {A} Dataset for Spoken Task Oriented Semantic Parsing}, booktitle = {{IEEE} Spoken Language Technology Workshop, {SLT} 2022, Doha, Qatar, January 9-12, 2023}, pages = {991--998}, publisher = {{IEEE}}, year = {2022}, url = {https://doi.org/10.1109/SLT54892.2023.10022703}, doi = {10.1109/SLT54892.2023.10022703}, timestamp = {Mon, 11 Dec 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/slt/TomaselloSLHLSECHAANDZM22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2201-01763, author = {Bowen Shi and Wei{-}Ning Hsu and Abdelrahman Mohamed}, title = {Robust Self-Supervised Audio-Visual Speech Recognition}, journal = {CoRR}, volume = {abs/2201.01763}, year = {2022}, url = {https://arxiv.org/abs/2201.01763}, eprinttype = {arXiv}, eprint = {2201.01763}, timestamp = {Mon, 10 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2201-01763.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2201-02184, author = {Bowen Shi and Wei{-}Ning Hsu and Kushal Lakhotia and Abdelrahman Mohamed}, title = {Learning Audio-Visual Speech Representation by Masked Multimodal Cluster Prediction}, journal = {CoRR}, volume = {abs/2201.02184}, year = {2022}, url = {https://arxiv.org/abs/2201.02184}, eprinttype = {arXiv}, eprint = {2201.02184}, timestamp = {Mon, 10 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2201-02184.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2202-03555, author = {Alexei Baevski and Wei{-}Ning Hsu and Qiantong Xu and Arun Babu and Jiatao Gu and Michael Auli}, title = {data2vec: {A} General Framework for Self-supervised Learning in Speech, Vision and Language}, journal = {CoRR}, volume = {abs/2202.03555}, year = {2022}, url = {https://arxiv.org/abs/2202.03555}, eprinttype = {arXiv}, eprint = {2202.03555}, timestamp = {Wed, 09 Feb 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2202-03555.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2202-07359, author = {Eugene Kharitonov and Jade Copet and Kushal Lakhotia and Tu Anh Nguyen and Paden Tomasello and Ann Lee and Ali Elkahky and Wei{-}Ning Hsu and Abdelrahman Mohamed and Emmanuel Dupoux and Yossi Adi}, title = {textless-lib: a Library for Textless Spoken Language Processing}, journal = {CoRR}, volume = {abs/2202.07359}, year = {2022}, url = {https://arxiv.org/abs/2202.07359}, eprinttype = {arXiv}, eprint = {2202.07359}, timestamp = {Mon, 14 Mar 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2202-07359.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2203-00648, author = {Ramon Sanabria and Wei{-}Ning Hsu and Alexei Baevski and Michael Auli}, title = {Measuring the Impact of Individual Domain Factors in Self-Supervised Pre-Training}, journal = {CoRR}, volume = {abs/2203.00648}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2203.00648}, doi = {10.48550/ARXIV.2203.00648}, eprinttype = {arXiv}, eprint = {2203.00648}, timestamp = {Wed, 16 Mar 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2203-00648.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2203-16502, author = {Tu Anh Nguyen and Eugene Kharitonov and Jade Copet and Yossi Adi and Wei{-}Ning Hsu and Ali Elkahky and Paden Tomasello and Robin Algayres and Beno{\^{\i}}t Sagot and Abdelrahman Mohamed and Emmanuel Dupoux}, title = {Generative Spoken Dialogue Language Modeling}, journal = {CoRR}, volume = {abs/2203.16502}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2203.16502}, doi = {10.48550/ARXIV.2203.16502}, eprinttype = {arXiv}, eprint = {2203.16502}, timestamp = {Mon, 04 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2203-16502.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2204-02492, author = {Alexander H. Liu and Wei{-}Ning Hsu and Michael Auli and Alexei Baevski}, title = {Towards End-to-end Unsupervised Speech Recognition}, journal = {CoRR}, volume = {abs/2204.02492}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2204.02492}, doi = {10.48550/ARXIV.2204.02492}, eprinttype = {arXiv}, eprint = {2204.02492}, timestamp = {Tue, 12 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2204-02492.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2204-02524, author = {Alexander H. Liu and Cheng{-}I Jeff Lai and Wei{-}Ning Hsu and Michael Auli and Alexei Baevski and James R. Glass}, title = {Simple and Effective Unsupervised Speech Synthesis}, journal = {CoRR}, volume = {abs/2204.02524}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2204.02524}, doi = {10.48550/ARXIV.2204.02524}, eprinttype = {arXiv}, eprint = {2204.02524}, timestamp = {Mon, 25 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2204-02524.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2204-02967, author = {Sravya Popuri and Peng{-}Jen Chen and Changhan Wang and Juan Pino and Yossi Adi and Jiatao Gu and Wei{-}Ning Hsu and Ann Lee}, title = {Enhanced Direct Speech-to-Speech Translation Using Self-supervised Pre-training and Data Augmentation}, journal = {CoRR}, volume = {abs/2204.02967}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2204.02967}, doi = {10.48550/ARXIV.2204.02967}, eprinttype = {arXiv}, eprint = {2204.02967}, timestamp = {Wed, 19 Apr 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2204-02967.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2204-05409, author = {Yun Tang and Hongyu Gong and Ning Dong and Changhan Wang and Wei{-}Ning Hsu and Jiatao Gu and Alexei Baevski and Xian Li and Abdelrahman Mohamed and Michael Auli and Juan Miguel Pino}, title = {Unified Speech-Text Pre-training for Speech Translation and Recognition}, journal = {CoRR}, volume = {abs/2204.05409}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2204.05409}, doi = {10.48550/ARXIV.2204.05409}, eprinttype = {arXiv}, eprint = {2204.05409}, timestamp = {Tue, 27 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2204-05409.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2204-11934, author = {Apoorv Vyas and Wei{-}Ning Hsu and Michael Auli and Alexei Baevski}, title = {On-demand compute reduction with stochastic wav2vec 2.0}, journal = {CoRR}, volume = {abs/2204.11934}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2204.11934}, doi = {10.48550/ARXIV.2204.11934}, eprinttype = {arXiv}, eprint = {2204.11934}, timestamp = {Thu, 28 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2204-11934.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2205-07180, author = {Bowen Shi and Abdelrahman Mohamed and Wei{-}Ning Hsu}, title = {Learning Lip-Based Audio-Visual Speaker Embeddings with AV-HuBERT}, journal = {CoRR}, volume = {abs/2205.07180}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2205.07180}, doi = {10.48550/ARXIV.2205.07180}, eprinttype = {arXiv}, eprint = {2205.07180}, timestamp = {Wed, 18 May 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2205-07180.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2207-07036, author = {Wei{-}Ning Hsu and Bowen Shi}, title = {A Single Self-Supervised Model for Many Speech Modalities Enables Zero-Shot Modality Transfer}, journal = {CoRR}, volume = {abs/2207.07036}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2207.07036}, doi = {10.48550/ARXIV.2207.07036}, eprinttype = {arXiv}, eprint = {2207.07036}, timestamp = {Tue, 19 Jul 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2207-07036.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2207-10643, author = {Paden Tomasello and Akshat Shrivastava and Daniel Lazar and Po{-}Chun Hsu and Duc Le and Adithya Sagar and Ali Elkahky and Jade Copet and Wei{-}Ning Hsu and Yossef Mordechay and Robin Algayres and Tu Anh Nguyen and Emmanuel Dupoux and Luke Zettlemoyer and Abdelrahman Mohamed}, title = {{STOP:} {A} dataset for Spoken Task Oriented Semantic Parsing}, journal = {CoRR}, volume = {abs/2207.10643}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2207.10643}, doi = {10.48550/ARXIV.2207.10643}, eprinttype = {arXiv}, eprint = {2207.10643}, timestamp = {Mon, 25 Jul 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2207-10643.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2210-10191, author = {Changhan Wang and Hirofumi Inaguma and Peng{-}Jen Chen and Ilia Kulikov and Yun Tang and Wei{-}Ning Hsu and Michael Auli and Juan Pino}, title = {Simple and Effective Unsupervised Speech Translation}, journal = {CoRR}, volume = {abs/2210.10191}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2210.10191}, doi = {10.48550/ARXIV.2210.10191}, eprinttype = {arXiv}, eprint = {2210.10191}, timestamp = {Tue, 27 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2210-10191.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2211-06474, author = {Peng{-}Jen Chen and Kevin Tran and Yilin Yang and Jingfei Du and Justine Kao and Yu{-}An Chung and Paden Tomasello and Paul{-}Ambroise Duquenne and Holger Schwenk and Hongyu Gong and Hirofumi Inaguma and Sravya Popuri and Changhan Wang and Juan Miguel Pino and Wei{-}Ning Hsu and Ann Lee}, title = {Speech-to-Speech Translation For {A} Real-world Unwritten Language}, journal = {CoRR}, volume = {abs/2211.06474}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2211.06474}, doi = {10.48550/ARXIV.2211.06474}, eprinttype = {arXiv}, eprint = {2211.06474}, timestamp = {Tue, 31 Jan 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2211-06474.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2212-01393, author = {Anuj Diwan and Ching{-}Feng Yeh and Wei{-}Ning Hsu and Paden Tomasello and Eunsol Choi and David Harwath and Abdelrahman Mohamed}, title = {Continual Learning for On-Device Speech Recognition using Disentangled Conformers}, journal = {CoRR}, volume = {abs/2212.01393}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2212.01393}, doi = {10.48550/ARXIV.2212.01393}, eprinttype = {arXiv}, eprint = {2212.01393}, timestamp = {Thu, 08 Dec 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2212-01393.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2212-07525, author = {Alexei Baevski and Arun Babu and Wei{-}Ning Hsu and Michael Auli}, title = {Efficient Self-supervised Learning with Contextualized Target Representations for Vision, Speech and Language}, journal = {CoRR}, volume = {abs/2212.07525}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2212.07525}, doi = {10.48550/ARXIV.2212.07525}, eprinttype = {arXiv}, eprint = {2212.07525}, timestamp = {Mon, 02 Jan 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2212-07525.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2212-11377, author = {Wei{-}Ning Hsu and Tal Remez and Bowen Shi and Jacob Donley and Yossi Adi}, title = {ReVISE: Self-Supervised Speech Resynthesis with Visual Input for Universal and Generalized Speech Enhancement}, journal = {CoRR}, volume = {abs/2212.11377}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2212.11377}, doi = {10.48550/ARXIV.2212.11377}, eprinttype = {arXiv}, eprint = {2212.11377}, timestamp = {Mon, 02 Jan 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2212-11377.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/taslp/HsuBTLSM21, author = {Wei{-}Ning Hsu and Benjamin Bolte and Yao{-}Hung Hubert Tsai and Kushal Lakhotia and Ruslan Salakhutdinov and Abdelrahman Mohamed}, title = {HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units}, journal = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.}, volume = {29}, pages = {3451--3460}, year = {2021}, url = {https://doi.org/10.1109/TASLP.2021.3122291}, doi = {10.1109/TASLP.2021.3122291}, timestamp = {Wed, 15 Dec 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/taslp/HsuBTLSM21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/acl/HsuHMSG20, author = {Wei{-}Ning Hsu and David Harwath and Tyler Miller and Christopher Song and James R. Glass}, editor = {Chengqing Zong and Fei Xia and Wenjie Li and Roberto Navigli}, title = {Text-Free Image-to-Speech Synthesis Using Learned Segmental Units}, booktitle = {Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing, {ACL/IJCNLP} 2021, (Volume 1: Long Papers), Virtual Event, August 1-6, 2021}, pages = {5284--5300}, publisher = {Association for Computational Linguistics}, year = {2021}, url = {https://doi.org/10.18653/v1/2021.acl-long.411}, doi = {10.18653/V1/2021.ACL-LONG.411}, timestamp = {Mon, 09 Aug 2021 16:25:37 +0200}, biburl = {https://dblp.org/rec/conf/acl/HsuHMSG20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/asru/ManoharLXHCSZM21, author = {Vimal Manohar and Tatiana Likhomanenko and Qiantong Xu and Wei{-}Ning Hsu and Ronan Collobert and Yatharth Saraf and Geoffrey Zweig and Abdelrahman Mohamed}, title = {Kaizen: Continuously Improving Teacher Using Exponential Moving Average for Semi-Supervised Speech Recognition}, booktitle = {{IEEE} Automatic Speech Recognition and Understanding Workshop, {ASRU} 2021, Cartagena, Colombia, December 13-17, 2021}, pages = {518--525}, publisher = {{IEEE}}, year = {2021}, url = {https://doi.org/10.1109/ASRU51503.2021.9688028}, doi = {10.1109/ASRU51503.2021.9688028}, timestamp = {Wed, 09 Feb 2022 09:03:03 +0100}, biburl = {https://dblp.org/rec/conf/asru/ManoharLXHCSZM21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/emnlp/WangHAPLCGP21, author = {Changhan Wang and Wei{-}Ning Hsu and Yossi Adi and Adam Polyak and Ann Lee and Peng{-}Jen Chen and Jiatao Gu and Juan Pino}, editor = {Heike Adel and Shuming Shi}, title = {fairseq S{\textbackslash}{\^{}}2: {A} Scalable and Integrable Speech Synthesis Toolkit}, booktitle = {Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, {EMNLP} 2021, Online and Punta Cana, Dominican Republic, 7-11 November, 2021}, pages = {143--152}, publisher = {Association for Computational Linguistics}, year = {2021}, url = {https://doi.org/10.18653/v1/2021.emnlp-demo.17}, doi = {10.18653/V1/2021.EMNLP-DEMO.17}, timestamp = {Wed, 19 Apr 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/emnlp/WangHAPLCGP21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icassp/HsuTBSM21, author = {Wei{-}Ning Hsu and Yao{-}Hung Hubert Tsai and Benjamin Bolte and Ruslan Salakhutdinov and Abdelrahman Mohamed}, title = {Hubert: How Much Can a Bad Teacher Benefit {ASR} Pre-Training?}, booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2021, Toronto, ON, Canada, June 6-11, 2021}, pages = {6533--6537}, publisher = {{IEEE}}, year = {2021}, url = {https://doi.org/10.1109/ICASSP39728.2021.9414460}, doi = {10.1109/ICASSP39728.2021.9414460}, timestamp = {Fri, 09 Jul 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icassp/HsuTBSM21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/HsuSBLXPK0CSA21, author = {Wei{-}Ning Hsu and Anuroop Sriram and Alexei Baevski and Tatiana Likhomanenko and Qiantong Xu and Vineel Pratap and Jacob Kahn and Ann Lee and Ronan Collobert and Gabriel Synnaeve and Michael Auli}, editor = {Hynek Hermansky and Honza Cernock{\'{y}} and Luk{\'{a}}s Burget and Lori Lamel and Odette Scharenborg and Petr Motl{\'{\i}}cek}, title = {Robust wav2vec 2.0: Analyzing Domain Shift in Self-Supervised Pre-Training}, booktitle = {Interspeech 2021, 22nd Annual Conference of the International Speech Communication Association, Brno, Czechia, 30 August - 3 September 2021}, pages = {721--725}, publisher = {{ISCA}}, year = {2021}, url = {https://doi.org/10.21437/Interspeech.2021-236}, doi = {10.21437/INTERSPEECH.2021-236}, timestamp = {Wed, 21 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/HsuSBLXPK0CSA21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/PolyakACKLHMD21, author = {Adam Polyak and Yossi Adi and Jade Copet and Eugene Kharitonov and Kushal Lakhotia and Wei{-}Ning Hsu and Abdelrahman Mohamed and Emmanuel Dupoux}, editor = {Hynek Hermansky and Honza Cernock{\'{y}} and Luk{\'{a}}s Burget and Lori Lamel and Odette Scharenborg and Petr Motl{\'{\i}}cek}, title = {Speech Resynthesis from Discrete Disentangled Self-Supervised Representations}, booktitle = {Interspeech 2021, 22nd Annual Conference of the International Speech Communication Association, Brno, Czechia, 30 August - 3 September 2021}, pages = {3615--3619}, publisher = {{ISCA}}, year = {2021}, url = {https://doi.org/10.21437/Interspeech.2021-475}, doi = {10.21437/INTERSPEECH.2021-475}, timestamp = {Wed, 21 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/PolyakACKLHMD21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/BaevskiHCA21, author = {Alexei Baevski and Wei{-}Ning Hsu and Alexis Conneau and Michael Auli}, editor = {Marc'Aurelio Ranzato and Alina Beygelzimer and Yann N. Dauphin and Percy Liang and Jennifer Wortman Vaughan}, title = {Unsupervised Speech Recognition}, booktitle = {Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021, NeurIPS 2021, December 6-14, 2021, virtual}, pages = {27826--27839}, year = {2021}, url = {https://proceedings.neurips.cc/paper/2021/hash/ea159dc9788ffac311592613b7f71fbb-Abstract.html}, timestamp = {Tue, 03 May 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/BaevskiHCA21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/slt/Hsu0SH21, author = {Wei{-}Ning Hsu and Ann Lee and Gabriel Synnaeve and Awni Y. Hannun}, title = {Semi-Supervised end-to-end Speech Recognition via Local Prior Matching}, booktitle = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen, China, January 19-22, 2021}, pages = {125--132}, publisher = {{IEEE}}, year = {2021}, url = {https://doi.org/10.1109/SLT48900.2021.9383552}, doi = {10.1109/SLT48900.2021.9383552}, timestamp = {Tue, 05 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/slt/Hsu0SH21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2102-01192, author = {Kushal Lakhotia and Evgeny Kharitonov and Wei{-}Ning Hsu and Yossi Adi and Adam Polyak and Benjamin Bolte and Tu Anh Nguyen and Jade Copet and Alexei Baevski and Adelrahman Mohamed and Emmanuel Dupoux}, title = {Generative Spoken Language Modeling from Raw Audio}, journal = {CoRR}, volume = {abs/2102.01192}, year = {2021}, url = {https://arxiv.org/abs/2102.01192}, eprinttype = {arXiv}, eprint = {2102.01192}, timestamp = {Tue, 09 Feb 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2102-01192.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2104-00355, author = {Adam Polyak and Yossi Adi and Jade Copet and Eugene Kharitonov and Kushal Lakhotia and Wei{-}Ning Hsu and Abdelrahman Mohamed and Emmanuel Dupoux}, title = {Speech Resynthesis from Discrete Disentangled Self-Supervised Representations}, journal = {CoRR}, volume = {abs/2104.00355}, year = {2021}, url = {https://arxiv.org/abs/2104.00355}, eprinttype = {arXiv}, eprint = {2104.00355}, timestamp = {Wed, 07 Dec 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2104-00355.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2104-01027, author = {Wei{-}Ning Hsu and Anuroop Sriram and Alexei Baevski and Tatiana Likhomanenko and Qiantong Xu and Vineel Pratap and Jacob Kahn and Ann Lee and Ronan Collobert and Gabriel Synnaeve and Michael Auli}, title = {Robust wav2vec 2.0: Analyzing Domain Shift in Self-Supervised Pre-Training}, journal = {CoRR}, volume = {abs/2104.01027}, year = {2021}, url = {https://arxiv.org/abs/2104.01027}, eprinttype = {arXiv}, eprint = {2104.01027}, timestamp = {Mon, 12 Apr 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2104-01027.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2105-11084, author = {Alexei Baevski and Wei{-}Ning Hsu and Alexis Conneau and Michael Auli}, title = {Unsupervised Speech Recognition}, journal = {CoRR}, volume = {abs/2105.11084}, year = {2021}, url = {https://arxiv.org/abs/2105.11084}, eprinttype = {arXiv}, eprint = {2105.11084}, timestamp = {Tue, 01 Jun 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2105-11084.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2106-07447, author = {Wei{-}Ning Hsu and Benjamin Bolte and Yao{-}Hung Hubert Tsai and Kushal Lakhotia and Ruslan Salakhutdinov and Abdelrahman Mohamed}, title = {HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units}, journal = {CoRR}, volume = {abs/2106.07447}, year = {2021}, url = {https://arxiv.org/abs/2106.07447}, eprinttype = {arXiv}, eprint = {2106.07447}, timestamp = {Wed, 16 Jun 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2106-07447.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2106-07759, author = {Vimal Manohar and Tatiana Likhomanenko and Qiantong Xu and Wei{-}Ning Hsu and Ronan Collobert and Yatharth Saraf and Geoffrey Zweig and Abdelrahman Mohamed}, title = {Kaizen: Continuously improving teacher using Exponential Moving Average for semi-supervised speech recognition}, journal = {CoRR}, volume = {abs/2106.07759}, year = {2021}, url = {https://arxiv.org/abs/2106.07759}, eprinttype = {arXiv}, eprint = {2106.07759}, timestamp = {Thu, 01 Jul 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2106-07759.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2107-05604, author = {Ann Lee and Peng{-}Jen Chen and Changhan Wang and Jiatao Gu and Xutai Ma and Adam Polyak and Yossi Adi and Qing He and Yun Tang and Juan Miguel Pino and Wei{-}Ning Hsu}, title = {Direct speech-to-speech translation with discrete units}, journal = {CoRR}, volume = {abs/2107.05604}, year = {2021}, url = {https://arxiv.org/abs/2107.05604}, eprinttype = {arXiv}, eprint = {2107.05604}, timestamp = {Tue, 27 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2107-05604.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2109-03264, author = {Eugene Kharitonov and Ann Lee and Adam Polyak and Yossi Adi and Jade Copet and Kushal Lakhotia and Tu Anh Nguyen and Morgane Rivi{\`{e}}re and Abdelrahman Mohamed and Emmanuel Dupoux and Wei{-}Ning Hsu}, title = {Text-Free Prosody-Aware Generative Spoken Language Modeling}, journal = {CoRR}, volume = {abs/2109.03264}, year = {2021}, url = {https://arxiv.org/abs/2109.03264}, eprinttype = {arXiv}, eprint = {2109.03264}, timestamp = {Thu, 06 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2109-03264.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2109-06912, author = {Changhan Wang and Wei{-}Ning Hsu and Yossi Adi and Adam Polyak and Ann Lee and Peng{-}Jen Chen and Jiatao Gu and Juan Miguel Pino}, title = {fairseq S{\^{}}2: {A} Scalable and Integrable Speech Synthesis Toolkit}, journal = {CoRR}, volume = {abs/2109.06912}, year = {2021}, url = {https://arxiv.org/abs/2109.06912}, eprinttype = {arXiv}, eprint = {2109.06912}, timestamp = {Thu, 06 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2109-06912.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2110-08250, author = {Xutai Ma and Hongyu Gong and Danni Liu and Ann Lee and Yun Tang and Peng{-}Jen Chen and Wei{-}Ning Hsu and Kenneth Heafield and Phillip Koehn and Juan Miguel Pino}, title = {Direct simultaneous speech to speech translation}, journal = {CoRR}, volume = {abs/2110.08250}, year = {2021}, url = {https://arxiv.org/abs/2110.08250}, eprinttype = {arXiv}, eprint = {2110.08250}, timestamp = {Tue, 27 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2110-08250.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2111-07402, author = {Felix Kreuk and Adam Polyak and Jade Copet and Eugene Kharitonov and Tu Anh Nguyen and Morgane Rivi{\`{e}}re and Wei{-}Ning Hsu and Abdelrahman Mohamed and Emmanuel Dupoux and Yossi Adi}, title = {Textless Speech Emotion Conversion using Decomposed and Discrete Representations}, journal = {CoRR}, volume = {abs/2111.07402}, year = {2021}, url = {https://arxiv.org/abs/2111.07402}, eprinttype = {arXiv}, eprint = {2111.07402}, timestamp = {Tue, 16 Nov 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2111-07402.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2112-08352, author = {Ann Lee and Hongyu Gong and Paul{-}Ambroise Duquenne and Holger Schwenk and Peng{-}Jen Chen and Changhan Wang and Sravya Popuri and Juan Miguel Pino and Jiatao Gu and Wei{-}Ning Hsu}, title = {Textless Speech-to-Speech Translation on Real Data}, journal = {CoRR}, volume = {abs/2112.08352}, year = {2021}, url = {https://arxiv.org/abs/2112.08352}, eprinttype = {arXiv}, eprint = {2112.08352}, timestamp = {Tue, 31 Jan 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2112-08352.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/HarwathHG20, author = {David Harwath and Wei{-}Ning Hsu and James R. Glass}, title = {Learning Hierarchical Discrete Linguistic Units from Visually-Grounded Speech}, booktitle = {8th International Conference on Learning Representations, {ICLR} 2020, Addis Ababa, Ethiopia, April 26-30, 2020}, publisher = {OpenReview.net}, year = {2020}, url = {https://openreview.net/forum?id=B1elCp4KwH}, timestamp = {Thu, 07 May 2020 17:11:47 +0200}, biburl = {https://dblp.org/rec/conf/iclr/HarwathHG20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/GumpHG20, author = {Michael Gump and Wei{-}Ning Hsu and James R. Glass}, editor = {Helen Meng and Bo Xu and Thomas Fang Zheng}, title = {Unsupervised Methods for Evaluating Speech Representations}, booktitle = {Interspeech 2020, 21st Annual Conference of the International Speech Communication Association, Virtual Event, Shanghai, China, 25-29 October 2020}, pages = {170--174}, publisher = {{ISCA}}, year = {2020}, url = {https://doi.org/10.21437/Interspeech.2020-2990}, doi = {10.21437/INTERSPEECH.2020-2990}, timestamp = {Fri, 29 Jan 2021 17:40:16 +0100}, biburl = {https://dblp.org/rec/conf/interspeech/GumpHG20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/KhuranaLHCLMG20, author = {Sameer Khurana and Antoine Laurent and Wei{-}Ning Hsu and Jan Chorowski and Adrian Lancucki and Ricard Marxer and James R. Glass}, editor = {Helen Meng and Bo Xu and Thomas Fang Zheng}, title = {A Convolutional Deep Markov Model for Unsupervised Speech Representation Learning}, booktitle = {Interspeech 2020, 21st Annual Conference of the International Speech Communication Association, Virtual Event, Shanghai, China, 25-29 October 2020}, pages = {3790--3794}, publisher = {{ISCA}}, year = {2020}, url = {https://doi.org/10.21437/Interspeech.2020-3084}, doi = {10.21437/INTERSPEECH.2020-3084}, timestamp = {Fri, 29 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/interspeech/KhuranaLHCLMG20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2002-10336, author = {Wei{-}Ning Hsu and Ann Lee and Gabriel Synnaeve and Awni Y. Hannun}, title = {Semi-Supervised Speech Recognition via Local Prior Matching}, journal = {CoRR}, volume = {abs/2002.10336}, year = {2020}, url = {https://arxiv.org/abs/2002.10336}, eprinttype = {arXiv}, eprint = {2002.10336}, timestamp = {Tue, 05 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2002-10336.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2006-02547, author = {Sameer Khurana and Antoine Laurent and Wei{-}Ning Hsu and Jan Chorowski and Adrian Lancucki and Ricard Marxer and James R. Glass}, title = {A Convolutional Deep Markov Model for Unsupervised Speech Representation Learning}, journal = {CoRR}, volume = {abs/2006.02547}, year = {2020}, url = {https://arxiv.org/abs/2006.02547}, eprinttype = {arXiv}, eprint = {2006.02547}, timestamp = {Tue, 09 Jun 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2006-02547.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2010-01003, author = {Awni Y. Hannun and Vineel Pratap and Jacob Kahn and Wei{-}Ning Hsu}, title = {Differentiable Weighted Finite-State Transducers}, journal = {CoRR}, volume = {abs/2010.01003}, year = {2020}, url = {https://arxiv.org/abs/2010.01003}, eprinttype = {arXiv}, eprint = {2010.01003}, timestamp = {Tue, 05 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2010-01003.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2012-15454, author = {Wei{-}Ning Hsu and David Harwath and Christopher Song and James R. Glass}, title = {Text-Free Image-to-Speech Synthesis Using Learned Segmental Units}, journal = {CoRR}, volume = {abs/2012.15454}, year = {2020}, url = {https://arxiv.org/abs/2012.15454}, eprinttype = {arXiv}, eprint = {2012.15454}, timestamp = {Fri, 08 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2012-15454.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icassp/HsuZWCWWG19, author = {Wei{-}Ning Hsu and Yu Zhang and Ron J. Weiss and Yu{-}An Chung and Yuxuan Wang and Yonghui Wu and James R. Glass}, title = {Disentangling Correlated Speaker and Noise for Speech Synthesis via Data Augmentation and Adversarial Factorization}, booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2019, Brighton, United Kingdom, May 12-17, 2019}, pages = {5901--5905}, publisher = {{IEEE}}, year = {2019}, url = {https://doi.org/10.1109/ICASSP.2019.8683561}, doi = {10.1109/ICASSP.2019.8683561}, timestamp = {Mon, 25 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icassp/HsuZWCWWG19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icassp/ChungWHZS19, author = {Yu{-}An Chung and Yuxuan Wang and Wei{-}Ning Hsu and Yu Zhang and R. J. Skerry{-}Ryan}, title = {Semi-supervised Training for Improving Data Efficiency in End-to-end Speech Synthesis}, booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2019, Brighton, United Kingdom, May 12-17, 2019}, pages = {6940--6944}, publisher = {{IEEE}}, year = {2019}, url = {https://doi.org/10.1109/ICASSP.2019.8683862}, doi = {10.1109/ICASSP.2019.8683862}, timestamp = {Wed, 20 Jul 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icassp/ChungWHZS19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/HsuZWZWWCJCSNP19, author = {Wei{-}Ning Hsu and Yu Zhang and Ron J. Weiss and Heiga Zen and Yonghui Wu and Yuxuan Wang and Yuan Cao and Ye Jia and Zhifeng Chen and Jonathan Shen and Patrick Nguyen and Ruoming Pang}, title = {Hierarchical Generative Modeling for Controllable Speech Synthesis}, booktitle = {7th International Conference on Learning Representations, {ICLR} 2019, New Orleans, LA, USA, May 6-9, 2019}, publisher = {OpenReview.net}, year = {2019}, url = {https://openreview.net/forum?id=rygkk305YQ}, timestamp = {Mon, 25 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/HsuZWZWWCJCSNP19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/ChungHTG19, author = {Yu{-}An Chung and Wei{-}Ning Hsu and Hao Tang and James R. Glass}, editor = {Gernot Kubin and Zdravko Kacic}, title = {An Unsupervised Autoregressive Model for Speech Representation Learning}, booktitle = {Interspeech 2019, 20th Annual Conference of the International Speech Communication Association, Graz, Austria, 15-19 September 2019}, pages = {146--150}, publisher = {{ISCA}}, year = {2019}, url = {https://doi.org/10.21437/Interspeech.2019-1473}, doi = {10.21437/INTERSPEECH.2019-1473}, timestamp = {Fri, 29 Jan 2021 17:41:10 +0100}, biburl = {https://dblp.org/rec/conf/interspeech/ChungHTG19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/HsuHG19, author = {Wei{-}Ning Hsu and David Harwath and James R. Glass}, editor = {Gernot Kubin and Zdravko Kacic}, title = {Transfer Learning from Audio-Visual Grounding to Speech Recognition}, booktitle = {Interspeech 2019, 20th Annual Conference of the International Speech Communication Association, Graz, Austria, 15-19 September 2019}, pages = {3242--3246}, publisher = {{ISCA}}, year = {2019}, url = {https://doi.org/10.21437/Interspeech.2019-1227}, doi = {10.21437/INTERSPEECH.2019-1227}, timestamp = {Fri, 29 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/interspeech/HsuHG19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1902-08295, author = {Jonathan Shen and Patrick Nguyen and Yonghui Wu and Zhifeng Chen and Mia Xu Chen and Ye Jia and Anjuli Kannan and Tara N. Sainath and Yuan Cao and Chung{-}Cheng Chiu and Yanzhang He and Jan Chorowski and Smit Hinsu and Stella Laurenzo and James Qin and Orhan Firat and Wolfgang Macherey and Suyog Gupta and Ankur Bapna and Shuyuan Zhang and Ruoming Pang and Ron J. Weiss and Rohit Prabhavalkar and Qiao Liang and Benoit Jacob and Bowen Liang and HyoukJoong Lee and Ciprian Chelba and S{\'{e}}bastien Jean and Bo Li and Melvin Johnson and Rohan Anil and Rajat Tibrewal and Xiaobing Liu and Akiko Eriguchi and Navdeep Jaitly and Naveen Ari and Colin Cherry and Parisa Haghani and Otavio Good and Youlong Cheng and Raziel Alvarez and Isaac Caswell and Wei{-}Ning Hsu and Zongheng Yang and Kuan{-}Chieh Wang and Ekaterina Gonina and Katrin Tomanek and Ben Vanik and Zelin Wu and Llion Jones and Mike Schuster and Yanping Huang and Dehao Chen and Kazuki Irie and George F. Foster and John Richardson and Klaus Macherey and Antoine Bruguier and Heiga Zen and Colin Raffel and Shankar Kumar and Kanishka Rao and David Rybach and Matthew Murray and Vijayaditya Peddinti and Maxim Krikun and Michiel Bacchiani and Thomas B. Jablin and Robert Suderman and Ian Williams and Benjamin Lee and Deepti Bhatia and Justin Carlson and Semih Yavuz and Yu Zhang and Ian McGraw and Max Galkin and Qi Ge and Golan Pundak and Chad Whipkey and Todd Wang and Uri Alon and Dmitry Lepikhin and Ye Tian and Sara Sabour and William Chan and Shubham Toshniwal and Baohua Liao and Michael Nirschl and Pat Rondon}, title = {Lingvo: a Modular and Scalable Framework for Sequence-to-Sequence Modeling}, journal = {CoRR}, volume = {abs/1902.08295}, year = {2019}, url = {http://arxiv.org/abs/1902.08295}, eprinttype = {arXiv}, eprint = {1902.08295}, timestamp = {Tue, 07 Nov 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1902-08295.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1904-03240, author = {Yu{-}An Chung and Wei{-}Ning Hsu and Hao Tang and James R. Glass}, title = {An Unsupervised Autoregressive Model for Speech Representation Learning}, journal = {CoRR}, volume = {abs/1904.03240}, year = {2019}, url = {http://arxiv.org/abs/1904.03240}, eprinttype = {arXiv}, eprint = {1904.03240}, timestamp = {Wed, 30 Sep 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1904-03240.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1907-04355, author = {Wei{-}Ning Hsu and David F. Harwath and James R. Glass}, title = {Transfer Learning from Audio-Visual Grounding to Speech Recognition}, journal = {CoRR}, volume = {abs/1907.04355}, year = {2019}, url = {http://arxiv.org/abs/1907.04355}, eprinttype = {arXiv}, eprint = {1907.04355}, timestamp = {Tue, 23 Jul 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1907-04355.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1911-09602, author = {David Harwath and Wei{-}Ning Hsu and James R. Glass}, title = {Learning Hierarchical Discrete Linguistic Units from Visually-Grounded Speech}, journal = {CoRR}, volume = {abs/1911.09602}, year = {2019}, url = {http://arxiv.org/abs/1911.09602}, eprinttype = {arXiv}, eprint = {1911.09602}, timestamp = {Tue, 03 Dec 2019 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1911-09602.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icassp/HsuG18, author = {Wei{-}Ning Hsu and James R. Glass}, title = {Extracting Domain Invariant Features by Unsupervised Learning for Robust Automatic Speech Recognition}, booktitle = {2018 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2018, Calgary, AB, Canada, April 15-20, 2018}, pages = {5614--5618}, publisher = {{IEEE}}, year = {2018}, url = {https://doi.org/10.1109/ICASSP.2018.8462037}, doi = {10.1109/ICASSP.2018.8462037}, timestamp = {Wed, 16 Oct 2019 14:14:52 +0200}, biburl = {https://dblp.org/rec/conf/icassp/HsuG18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icpr/ZhengWXHG18, author = {Siqi Zheng and Jianzong Wang and Jing Xiao and Wei{-}Ning Hsu and James R. Glass}, title = {A Noise-Robust Self-Adaptive Multitarget Speaker Detection System}, booktitle = {24th International Conference on Pattern Recognition, {ICPR} 2018, Beijing, China, August 20-24, 2018}, pages = {1068--1072}, publisher = {{IEEE} Computer Society}, year = {2018}, url = {https://doi.org/10.1109/ICPR.2018.8545395}, doi = {10.1109/ICPR.2018.8545395}, timestamp = {Fri, 24 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icpr/ZhengWXHG18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/HsuG18, author = {Wei{-}Ning Hsu and James R. Glass}, editor = {B. Yegnanarayana}, title = {Scalable Factorized Hierarchical Variational Autoencoder Training}, booktitle = {Interspeech 2018, 19th Annual Conference of the International Speech Communication Association, Hyderabad, India, 2-6 September 2018}, pages = {1462--1466}, publisher = {{ISCA}}, year = {2018}, url = {https://doi.org/10.21437/Interspeech.2018-1034}, doi = {10.21437/INTERSPEECH.2018-1034}, timestamp = {Fri, 21 May 2021 08:16:43 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/HsuG18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/HsuTG18, author = {Wei{-}Ning Hsu and Hao Tang and James R. Glass}, editor = {B. Yegnanarayana}, title = {Unsupervised Adaptation with Interpretable Disentangled Representations for Distant Conversational Speech Recognition}, booktitle = {Interspeech 2018, 19th Annual Conference of the International Speech Communication Association, Hyderabad, India, 2-6 September 2018}, pages = {1576--1580}, publisher = {{ISCA}}, year = {2018}, url = {https://doi.org/10.21437/Interspeech.2018-1097}, doi = {10.21437/INTERSPEECH.2018-1097}, timestamp = {Fri, 29 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/interspeech/HsuTG18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/TangHGG18, author = {Hao Tang and Wei{-}Ning Hsu and Fran{\c{c}}ois Grondin and James R. Glass}, editor = {B. Yegnanarayana}, title = {A Study of Enhancement, Augmentation and Autoencoder Methods for Domain Adaptation in Distant Speech Recognition}, booktitle = {Interspeech 2018, 19th Annual Conference of the International Speech Communication Association, Hyderabad, India, 2-6 September 2018}, pages = {2928--2932}, publisher = {{ISCA}}, year = {2018}, url = {https://doi.org/10.21437/Interspeech.2018-2030}, doi = {10.21437/INTERSPEECH.2018-2030}, timestamp = {Fri, 29 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/interspeech/TangHGG18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/slt/ShonHG18, author = {Suwon Shon and Wei{-}Ning Hsu and James R. Glass}, title = {Unsupervised Representation Learning of Speech for Dialect Identification}, booktitle = {2018 {IEEE} Spoken Language Technology Workshop, {SLT} 2018, Athens, Greece, December 18-21, 2018}, pages = {105--111}, publisher = {{IEEE}}, year = {2018}, url = {https://doi.org/10.1109/SLT.2018.8639650}, doi = {10.1109/SLT.2018.8639650}, timestamp = {Wed, 16 Oct 2019 14:14:53 +0200}, biburl = {https://dblp.org/rec/conf/slt/ShonHG18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1803-02551, author = {Wei{-}Ning Hsu and James R. Glass}, title = {Extracting Domain Invariant Features by Unsupervised Learning for Robust Automatic Speech Recognition}, journal = {CoRR}, volume = {abs/1803.02551}, year = {2018}, url = {http://arxiv.org/abs/1803.02551}, eprinttype = {arXiv}, eprint = {1803.02551}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1803-02551.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1804-03201, author = {Wei{-}Ning Hsu and James R. Glass}, title = {Scalable Factorized Hierarchical Variational Autoencoder Training}, journal = {CoRR}, volume = {abs/1804.03201}, year = {2018}, url = {http://arxiv.org/abs/1804.03201}, eprinttype = {arXiv}, eprint = {1804.03201}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1804-03201.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1805-11264, author = {Wei{-}Ning Hsu and James R. Glass}, title = {Disentangling by Partitioning: {A} Representation Learning Framework for Multimodal Sensory Data}, journal = {CoRR}, volume = {abs/1805.11264}, year = {2018}, url = {http://arxiv.org/abs/1805.11264}, eprinttype = {arXiv}, eprint = {1805.11264}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1805-11264.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1806-04841, author = {Hao Tang and Wei{-}Ning Hsu and Fran{\c{c}}ois Grondin and James R. Glass}, title = {A Study of Enhancement, Augmentation, and Autoencoder Methods for Domain Adaptation in Distant Speech Recognition}, journal = {CoRR}, volume = {abs/1806.04841}, year = {2018}, url = {http://arxiv.org/abs/1806.04841}, eprinttype = {arXiv}, eprint = {1806.04841}, timestamp = {Wed, 30 Sep 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1806-04841.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1806-04872, author = {Wei{-}Ning Hsu and Hao Tang and James R. Glass}, title = {Unsupervised Adaptation with Interpretable Disentangled Representations for Distant Conversational Speech Recognition}, journal = {CoRR}, volume = {abs/1806.04872}, year = {2018}, url = {http://arxiv.org/abs/1806.04872}, eprinttype = {arXiv}, eprint = {1806.04872}, timestamp = {Wed, 30 Sep 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1806-04872.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1808-10128, author = {Yu{-}An Chung and Yuxuan Wang and Wei{-}Ning Hsu and Yu Zhang and R. J. Skerry{-}Ryan}, title = {Semi-Supervised Training for Improving Data Efficiency in End-to-End Speech Synthesis}, journal = {CoRR}, volume = {abs/1808.10128}, year = {2018}, url = {http://arxiv.org/abs/1808.10128}, eprinttype = {arXiv}, eprint = {1808.10128}, timestamp = {Wed, 20 Jul 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1808-10128.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1809-04458, author = {Suwon Shon and Wei{-}Ning Hsu and James R. Glass}, title = {Unsupervised Representation Learning of Speech for Dialect Identification}, journal = {CoRR}, volume = {abs/1809.04458}, year = {2018}, url = {http://arxiv.org/abs/1809.04458}, eprinttype = {arXiv}, eprint = {1809.04458}, timestamp = {Fri, 05 Oct 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1809-04458.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1810-07217, author = {Wei{-}Ning Hsu and Yu Zhang and Ron J. Weiss and Heiga Zen and Yonghui Wu and Yuxuan Wang and Yuan Cao and Ye Jia and Zhifeng Chen and Jonathan Shen and Patrick Nguyen and Ruoming Pang}, title = {Hierarchical Generative Modeling for Controllable Speech Synthesis}, journal = {CoRR}, volume = {abs/1810.07217}, year = {2018}, url = {http://arxiv.org/abs/1810.07217}, eprinttype = {arXiv}, eprint = {1810.07217}, timestamp = {Mon, 25 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1810-07217.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/asru/HsuZG17, author = {Wei{-}Ning Hsu and Yu Zhang and James R. Glass}, title = {Unsupervised domain adaptation for robust speech recognition via variational autoencoder-based data augmentation}, booktitle = {2017 {IEEE} Automatic Speech Recognition and Understanding Workshop, {ASRU} 2017, Okinawa, Japan, December 16-20, 2017}, pages = {16--23}, publisher = {{IEEE}}, year = {2017}, url = {https://doi.org/10.1109/ASRU.2017.8268911}, doi = {10.1109/ASRU.2017.8268911}, timestamp = {Wed, 16 Oct 2019 14:14:51 +0200}, biburl = {https://dblp.org/rec/conf/asru/HsuZG17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/asru/NajafianHAG17, author = {Maryam Najafian and Wei{-}Ning Hsu and Ahmed Ali and James R. Glass}, title = {Automatic speech recognition of Arabic multi-genre broadcast media}, booktitle = {2017 {IEEE} Automatic Speech Recognition and Understanding Workshop, {ASRU} 2017, Okinawa, Japan, December 16-20, 2017}, pages = {353--359}, publisher = {{IEEE}}, year = {2017}, url = {https://doi.org/10.1109/ASRU.2017.8268957}, doi = {10.1109/ASRU.2017.8268957}, timestamp = {Fri, 03 Apr 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/asru/NajafianHAG17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/HsuZG17, author = {Wei{-}Ning Hsu and Yu Zhang and James R. Glass}, editor = {Francisco Lacerda}, title = {Learning Latent Representations for Speech Generation and Transformation}, booktitle = {Interspeech 2017, 18th Annual Conference of the International Speech Communication Association, Stockholm, Sweden, August 20-24, 2017}, pages = {1273--1277}, publisher = {{ISCA}}, year = {2017}, url = {https://doi.org/10.21437/Interspeech.2017-349}, doi = {10.21437/INTERSPEECH.2017-349}, timestamp = {Mon, 26 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/HsuZG17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/HsuZG17, author = {Wei{-}Ning Hsu and Yu Zhang and James R. Glass}, editor = {Isabelle Guyon and Ulrike von Luxburg and Samy Bengio and Hanna M. Wallach and Rob Fergus and S. V. N. Vishwanathan and Roman Garnett}, title = {Unsupervised Learning of Disentangled and Interpretable Representations from Sequential Data}, booktitle = {Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017, December 4-9, 2017, Long Beach, CA, {USA}}, pages = {1878--1889}, year = {2017}, url = {https://proceedings.neurips.cc/paper/2017/hash/0a0a0c8aaa00ade50f74a3f0ca981ed7-Abstract.html}, timestamp = {Thu, 21 Jan 2021 13:58:27 +0100}, biburl = {https://dblp.org/rec/conf/nips/HsuZG17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/HsuZG17, author = {Wei{-}Ning Hsu and Yu Zhang and James R. Glass}, title = {Learning Latent Representations for Speech Generation and Transformation}, journal = {CoRR}, volume = {abs/1704.04222}, year = {2017}, url = {http://arxiv.org/abs/1704.04222}, eprinttype = {arXiv}, eprint = {1704.04222}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/HsuZG17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/HsuZG17aa, author = {Wei{-}Ning Hsu and Yu Zhang and James R. Glass}, title = {Unsupervised Domain Adaptation for Robust Speech Recognition via Variational Autoencoder-Based Data Augmentation}, journal = {CoRR}, volume = {abs/1707.06265}, year = {2017}, url = {http://arxiv.org/abs/1707.06265}, eprinttype = {arXiv}, eprint = {1707.06265}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/HsuZG17aa.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1709-07902, author = {Wei{-}Ning Hsu and Yu Zhang and James R. Glass}, title = {Unsupervised Learning of Disentangled and Interpretable Representations from Sequential Data}, journal = {CoRR}, volume = {abs/1709.07902}, year = {2017}, url = {http://arxiv.org/abs/1709.07902}, eprinttype = {arXiv}, eprint = {1709.07902}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1709-07902.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/coling/RomeoMBMBHZMG16, author = {Salvatore Romeo and Giovanni Da San Martino and Alberto Barr{\'{o}}n{-}Cede{\~{n}}o and Alessandro Moschitti and Yonatan Belinkov and Wei{-}Ning Hsu and Yu Zhang and Mitra Mohtarami and James R. Glass}, editor = {Nicoletta Calzolari and Yuji Matsumoto and Rashmi Prasad}, title = {Neural Attention for Learning to Rank Questions in Community Question Answering}, booktitle = {{COLING} 2016, 26th International Conference on Computational Linguistics, Proceedings of the Conference: Technical Papers, December 11-16, 2016, Osaka, Japan}, pages = {1734--1745}, publisher = {{ACL}}, year = {2016}, url = {https://aclanthology.org/C16-1163/}, timestamp = {Fri, 06 Aug 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/coling/RomeoMBMBHZMG16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/HsuZLG16, author = {Wei{-}Ning Hsu and Yu Zhang and Ann Lee and James R. Glass}, editor = {Nelson Morgan}, title = {Exploiting Depth and Highway Connections in Convolutional Recurrent Deep Neural Networks for Speech Recognition}, booktitle = {Interspeech 2016, 17th Annual Conference of the International Speech Communication Association, San Francisco, CA, USA, September 8-12, 2016}, pages = {395--399}, publisher = {{ISCA}}, year = {2016}, url = {https://doi.org/10.21437/Interspeech.2016-515}, doi = {10.21437/INTERSPEECH.2016-515}, timestamp = {Mon, 26 Jun 2023 16:43:56 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/HsuZLG16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/semeval/MohtaramiBHZLBC16, author = {Mitra Mohtarami and Yonatan Belinkov and Wei{-}Ning Hsu and Yu Zhang and Tao Lei and Kfir Bar and Scott Cyphers and James R. Glass}, editor = {Steven Bethard and Daniel M. Cer and Marine Carpuat and David Jurgens and Preslav Nakov and Torsten Zesch}, title = {{SLS} at SemEval-2016 Task 3: Neural-based Approaches for Ranking in Community Question Answering}, booktitle = {Proceedings of the 10th International Workshop on Semantic Evaluation, SemEval@NAACL-HLT 2016, San Diego, CA, USA, June 16-17, 2016}, pages = {828--835}, publisher = {The Association for Computer Linguistics}, year = {2016}, url = {https://doi.org/10.18653/v1/s16-1128}, doi = {10.18653/V1/S16-1128}, timestamp = {Fri, 06 Aug 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/semeval/MohtaramiBHZLBC16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/slt/HanaiHG16, author = {Tuka Al Hanai and Wei{-}Ning Hsu and James R. Glass}, title = {Development of the {MIT} {ASR} system for the 2016 Arabic Multi-genre Broadcast Challenge}, booktitle = {2016 {IEEE} Spoken Language Technology Workshop, {SLT} 2016, San Diego, CA, USA, December 13-16, 2016}, pages = {299--304}, publisher = {{IEEE}}, year = {2016}, url = {https://doi.org/10.1109/SLT.2016.7846280}, doi = {10.1109/SLT.2016.7846280}, timestamp = {Thu, 14 Oct 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/slt/HanaiHG16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/slt/HsuZG16, author = {Wei{-}Ning Hsu and Yu Zhang and James R. Glass}, title = {A prioritized grid long short-term memory {RNN} for speech recognition}, booktitle = {2016 {IEEE} Spoken Language Technology Workshop, {SLT} 2016, San Diego, CA, USA, December 13-16, 2016}, pages = {467--473}, publisher = {{IEEE}}, year = {2016}, url = {https://doi.org/10.1109/SLT.2016.7846305}, doi = {10.1109/SLT.2016.7846305}, timestamp = {Thu, 07 Jun 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/slt/HsuZG16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/HsuZG16, author = {Wei{-}Ning Hsu and Yu Zhang and James R. Glass}, title = {Recurrent Neural Network Encoder with Attention for Community Question Answering}, journal = {CoRR}, volume = {abs/1603.07044}, year = {2016}, url = {http://arxiv.org/abs/1603.07044}, eprinttype = {arXiv}, eprint = {1603.07044}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/HsuZG16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/HsuL15, author = {Wei{-}Ning Hsu and Hsuan{-}Tien Lin}, editor = {Blai Bonet and Sven Koenig}, title = {Active Learning by Learning}, booktitle = {Proceedings of the Twenty-Ninth {AAAI} Conference on Artificial Intelligence, January 25-30, 2015, Austin, Texas, {USA}}, pages = {2659--2665}, publisher = {{AAAI} Press}, year = {2015}, url = {https://doi.org/10.1609/aaai.v29i1.9597}, doi = {10.1609/AAAI.V29I1.9597}, timestamp = {Mon, 18 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/HsuL15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icassp/ChungHLL15, author = {Cheng{-}Tao Chung and Wei{-}Ning Hsu and Cheng{-}Yi Lee and Lin{-}Shan Lee}, title = {Enhancing automatically discovered multi-level acoustic patterns considering context consistency with applications in spoken term detection}, booktitle = {2015 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2015, South Brisbane, Queensland, Australia, April 19-24, 2015}, pages = {5231--5235}, publisher = {{IEEE}}, year = {2015}, url = {https://doi.org/10.1109/ICASSP.2015.7178969}, doi = {10.1109/ICASSP.2015.7178969}, timestamp = {Wed, 16 Oct 2019 14:14:52 +0200}, biburl = {https://dblp.org/rec/conf/icassp/ChungHLL15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/ChungHLL15, author = {Cheng{-}Tao Chung and Wei{-}Ning Hsu and Cheng{-}Yi Lee and Lin{-}Shan Lee}, title = {Enhancing Automatically Discovered Multi-level Acoustic Patterns Considering Context Consistency With Applications in Spoken Term Detection}, journal = {CoRR}, volume = {abs/1509.02217}, year = {2015}, url = {http://arxiv.org/abs/1509.02217}, eprinttype = {arXiv}, eprint = {1509.02217}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/ChungHLL15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
manage site settings
To protect your privacy, all features that rely on external API calls from your browser are turned off by default. You need to opt-in for them to become active. All settings here will be stored as cookies with your web browser. For more information see our F.A.Q.