Search dblp for Publications

export results for "toc:db/conf/slt/slt2021.bht:"

 download as .bib file

@inproceedings{DBLP:conf/slt/0001KDZA21,
  author       = {Shang{-}Wen Li and
                  Jason Krone and
                  Shuyan Dong and
                  Yi Zhang and
                  Yaser Al{-}Onaizan},
  title        = {Meta Learning to Classify Intent and Slot Labels with Noisy Few Shot
                  Examples},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {1004--1011},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383489},
  doi          = {10.1109/SLT48900.2021.9383489},
  timestamp    = {Mon, 05 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/0001KDZA21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/0002DLTL21,
  author       = {Li Chai and
                  Jun Du and
                  Diyuan Liu and
                  Yanhui Tu and
                  Chin{-}Hui Lee},
  title        = {Acoustic Modeling for Multi-Array Conversational Speech Recognition
                  in the Chime-6 Challenge},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {912--918},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383628},
  doi          = {10.1109/SLT48900.2021.9383628},
  timestamp    = {Thu, 14 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/0002DLTL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/0004HM21,
  author       = {Yi Luo and
                  Cong Han and
                  Nima Mesgarani},
  title        = {Distortion-Controlled Training for end-to-end Reverberant Speech Separation
                  with Auxiliary Autoencoding Loss},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {825--832},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383494},
  doi          = {10.1109/SLT48900.2021.9383494},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/0004HM21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/0007KGLAG21,
  author       = {Manoj Kumar and
                  Varun Kumar and
                  Hadrien Glaude and
                  Cyprien de Lichy and
                  Aman Alok and
                  Rahul Gupta},
  title        = {Protoda: Efficient Transfer Learning for Few-Shot Intent Classification},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {966--972},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383495},
  doi          = {10.1109/SLT48900.2021.9383495},
  timestamp    = {Thu, 06 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/0007KGLAG21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/AiLWYL21,
  author       = {Yang Ai and
                  Haoyu Li and
                  Xin Wang and
                  Junichi Yamagishi and
                  Zhen{-}Hua Ling},
  title        = {Denoising-and-Dereverberation Hierarchical Neural Vocoder for Robust
                  Waveform Generation},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {477--484},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383611},
  doi          = {10.1109/SLT48900.2021.9383611},
  timestamp    = {Mon, 26 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/AiLWYL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/BaharBN21,
  author       = {Parnia Bahar and
                  Christopher Brix and
                  Hermann Ney},
  title        = {Two-Way Neural Machine Translation: {A} Proof of Concept for Bidirectional
                  Translation Modeling Using a Two-Dimensional Grid},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {1065--1070},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383589},
  doi          = {10.1109/SLT48900.2021.9383589},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/BaharBN21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/BaharBSN21,
  author       = {Parnia Bahar and
                  Tobias Bieschke and
                  Ralf Schl{\"{u}}ter and
                  Hermann Ney},
  title        = {Tight Integrated End-to-End Training for Cascaded Speech Translation},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {950--957},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383462},
  doi          = {10.1109/SLT48900.2021.9383462},
  timestamp    = {Wed, 07 Dec 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/BaharBSN21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/BairdAMS21,
  author       = {Alice Baird and
                  Shahin Amiriparian and
                  Manuel Milling and
                  Bj{\"{o}}rn W. Schuller},
  title        = {Emotion Recognition in Public Speaking Scenarios Utilising An {LSTM-RNN}
                  Approach with Attention},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {397--402},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383542},
  doi          = {10.1109/SLT48900.2021.9383542},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/BairdAMS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/CaiL21,
  author       = {Danwei Cai and
                  Ming Li},
  title        = {Embedding Aggregation for Far-Field Speaker Verification with Distributed
                  Microphone Arrays},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {308--315},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383501},
  doi          = {10.1109/SLT48900.2021.9383501},
  timestamp    = {Wed, 12 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/CaiL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/CaoZFM21,
  author       = {Songjun Cao and
                  Yike Zhang and
                  Xiaobing Feng and
                  Long Ma},
  title        = {Improving Speech Recognition Accuracy of Local {POI} Using Geographical
                  Models},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {180--185},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383538},
  doi          = {10.1109/SLT48900.2021.9383538},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/CaoZFM21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/ChangL0HZH21,
  author       = {Ting{-}Yun Chang and
                  Yang Liu and
                  Karthik Gopalakrishnan and
                  Behnam Hedayatnia and
                  Pei Zhou and
                  Dilek Hakkani{-}T{\"{u}}r},
  title        = {Go Beyond Plain Fine-Tuning: Improving Pretrained Models for Social
                  Commonsense},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {1028--1035},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383453},
  doi          = {10.1109/SLT48900.2021.9383453},
  timestamp    = {Thu, 04 Nov 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/ChangL0HZH21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/ChangLLL21,
  author       = {Heng{-}Jui Chang and
                  Alexander H. Liu and
                  Hung{-}yi Lee and
                  Lin{-}Shan Lee},
  title        = {End-to-End Whispered Speech Recognition with Frequency-Weighted Approaches
                  and Pseudo Whisper Pre-training},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {186--193},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383595},
  doi          = {10.1109/SLT48900.2021.9383595},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/ChangLLL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/ChenK21,
  author       = {Tianxiang Chen and
                  Elie Khoury},
  title        = {Spoofprint: {A} New Paradigm for Spoofing Attacks Detection},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {538--543},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383572},
  doi          = {10.1109/SLT48900.2021.9383572},
  timestamp    = {Thu, 15 Jul 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/ChenK21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/ChenLL21,
  author       = {Huan{-}Yu Chen and
                  Yun{-}Shao Lin and
                  Chi{-}Chun Lee},
  title        = {Through the Words of Viewers: Using Comment-Content Entangled Network
                  for Humor Impression Recognition},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {1058--1064},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383564},
  doi          = {10.1109/SLT48900.2021.9383564},
  timestamp    = {Mon, 05 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/ChenLL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/ChiCWHC0L21,
  author       = {Po{-}Han Chi and
                  Pei{-}Hung Chung and
                  Tsung{-}Han Wu and
                  Chun{-}Cheng Hsieh and
                  Yen{-}Hao Chen and
                  Shang{-}Wen Li and
                  Hung{-}yi Lee},
  title        = {Audio Albert: {A} Lite Bert for Self-Supervised Learning of Audio
                  Representation},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {344--350},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383575},
  doi          = {10.1109/SLT48900.2021.9383575},
  timestamp    = {Mon, 05 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/ChiCWHC0L21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/ChienL21,
  author       = {Chung{-}Ming Chien and
                  Hung{-}yi Lee},
  title        = {Hierarchical Prosody Modeling for Non-Autoregressive Speech Synthesis},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {446--453},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383629},
  doi          = {10.1109/SLT48900.2021.9383629},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/ChienL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/ChiuC21,
  author       = {Shih{-}Hsuan Chiu and
                  Berlin Chen},
  title        = {Innovative Bert-Based Reranking Language Models for Speech Recognition},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {266--271},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383557},
  doi          = {10.1109/SLT48900.2021.9383557},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/ChiuC21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/ChiuNHPZJPSNCW21,
  author       = {Chung{-}Cheng Chiu and
                  Arun Narayanan and
                  Wei Han and
                  Rohit Prabhavalkar and
                  Yu Zhang and
                  Navdeep Jaitly and
                  Ruoming Pang and
                  Tara N. Sainath and
                  Patrick Nguyen and
                  Liangliang Cao and
                  Yonghui Wu},
  title        = {{RNN-T} Models Fail to Generalize to Out-of-Domain Audio: Causes and
                  Solutions},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {873--880},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383518},
  doi          = {10.1109/SLT48900.2021.9383518},
  timestamp    = {Fri, 07 May 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/ChiuNHPZJPSNCW21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/ChoiJK21,
  author       = {Yeunju Choi and
                  Youngmoon Jung and
                  Hoirin Kim},
  title        = {Neural {MOS} Prediction for Synthesized Speech Using Multi-Task Learning
                  with Spoofing Detection and Spoofing Type Classification},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {462--469},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383533},
  doi          = {10.1109/SLT48900.2021.9383533},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/ChoiJK21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/DuLLWQ21,
  author       = {Chenpeng Du and
                  Hao Li and
                  Yizhou Lu and
                  Lan Wang and
                  Yanmin Qian},
  title        = {Data Augmentation for end-to-end Code-Switching Speech Recognition},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {194--200},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383620},
  doi          = {10.1109/SLT48900.2021.9383620},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/DuLLWQ21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/DuTX021,
  author       = {Hongqiang Du and
                  Xiaohai Tian and
                  Lei Xie and
                  Haizhou Li},
  title        = {Optimizing Voice Conversion Network with Cycle Consistency Loss of
                  Speaker Identity},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {507--513},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383567},
  doi          = {10.1109/SLT48900.2021.9383567},
  timestamp    = {Fri, 14 May 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/DuTX021.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/EppsUK21,
  author       = {Maya Epps and
                  Juan Uribe and
                  Mandy Korpusik},
  title        = {A New Dataset for Natural Language Understanding of Exercise Logs
                  in a Food and Fitness Spoken Dialogue System},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {1071--1078},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383508},
  doi          = {10.1109/SLT48900.2021.9383508},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/EppsUK21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/FengBE21,
  author       = {Yulan Feng and
                  Alan W. Black and
                  Maxine Esk{\'{e}}nazi},
  title        = {Towards Automatic Route Description Unification in Spoken Dialog Systems},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {685--692},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383465},
  doi          = {10.1109/SLT48900.2021.9383465},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/FengBE21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/FuWHXX21,
  author       = {Yihui Fu and
                  Jian Wu and
                  Yanxin Hu and
                  Mengtao Xing and
                  Lei Xie},
  title        = {DESNet: {A} Multi-Channel Network for Simultaneous Speech Dereverberation,
                  Enhancement and Separation},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {857--864},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383604},
  doi          = {10.1109/SLT48900.2021.9383604},
  timestamp    = {Sat, 18 Feb 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/FuWHXX21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/FuYHWWYZXHBMO21,
  author       = {Yihui Fu and
                  Zhuoyuan Yao and
                  Weipeng He and
                  Jian Wu and
                  Xiong Wang and
                  Zhanheng Yang and
                  Shimin Zhang and
                  Lei Xie and
                  Dongyan Huang and
                  Hui Bu and
                  Petr Motl{\'{\i}}cek and
                  Jean{-}Marc Odobez},
  title        = {{IEEE} {SLT} 2021 Alpha-Mini Speech Challenge: Open Datasets, Tracks,
                  Rules and Baselines},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {1101--1108},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383546},
  doi          = {10.1109/SLT48900.2021.9383546},
  timestamp    = {Tue, 07 May 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/FuYHWWYZXHBMO21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/GaoLRS21,
  author       = {Yang Gao and
                  Jiachen Lian and
                  Bhiksha Raj and
                  Rita Singh},
  title        = {Detection and Evaluation of Human and Machine Generated Speech in
                  Spoofing Attacks on Automatic Speaker Verification Systems},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {544--551},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383558},
  doi          = {10.1109/SLT48900.2021.9383558},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/GaoLRS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/GeorgescuMOCB21,
  author       = {Alexandru{-}Lucian Georgescu and
                  Cristian Manolache and
                  Dan Oneata and
                  Horia Cucu and
                  Corneliu Burileanu},
  title        = {Data-Filtering Methods for Self-Training of Automatic Speech Recognition
                  Systems},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {141--147},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383577},
  doi          = {10.1109/SLT48900.2021.9383577},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/GeorgescuMOCB21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/GhorbaniGSL21,
  author       = {Shahram Ghorbani and
                  Yashesh Gaur and
                  Yu Shi and
                  Jinyu Li},
  title        = {Listen, Look and Deliberate: Visual Context-Aware Speech Recognition
                  Using Pre-Trained Text-Video Representations},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {621--628},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383466},
  doi          = {10.1109/SLT48900.2021.9383466},
  timestamp    = {Tue, 21 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/GhorbaniGSL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/GuoZSHX21,
  author       = {Haohan Guo and
                  Shaofei Zhang and
                  Frank K. Soong and
                  Lei He and
                  Lei Xie},
  title        = {Conversational End-to-End {TTS} for Voice Agents},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {403--409},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383460},
  doi          = {10.1109/SLT48900.2021.9383460},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/GuoZSHX21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/HGDR21,
  author       = {Muralikrishna H and
                  Shikha Gupta and
                  Dileep Aroor Dinesh and
                  Padmanabhan Rajan},
  title        = {Noise-Robust Spoken Language Identification Using Language Relevance
                  Factor Based Embedding},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {644--651},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383503},
  doi          = {10.1109/SLT48900.2021.9383503},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/HGDR21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/HoriguchiFN21,
  author       = {Shota Horiguchi and
                  Yusuke Fujita and
                  Kenji Nagamatsu},
  title        = {Block-Online Guided Source Separation},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {236--242},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383510},
  doi          = {10.1109/SLT48900.2021.9383510},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/HoriguchiFN21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/HossainS21,
  author       = {Delowar Hossain and
                  Yoshinao Sato},
  title        = {Efficient corpus design for wake-word detection},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {1094--1100},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383569},
  doi          = {10.1109/SLT48900.2021.9383569},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/HossainS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/Hsu0SH21,
  author       = {Wei{-}Ning Hsu and
                  Ann Lee and
                  Gabriel Synnaeve and
                  Awni Y. Hannun},
  title        = {Semi-Supervised end-to-end Speech Recognition via Local Prior Matching},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {125--132},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383552},
  doi          = {10.1109/SLT48900.2021.9383552},
  timestamp    = {Tue, 05 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/Hsu0SH21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/HuBPRML21,
  author       = {Qiong Hu and
                  Tobias Bleisch and
                  Petko Petkov and
                  Tuomo Raitio and
                  Erik Marchi and
                  Varun Lakshminarasimhan},
  title        = {Whispered and Lombard Neural Speech Synthesis},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {454--461},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383454},
  doi          = {10.1109/SLT48900.2021.9383454},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/HuBPRML21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/HuPSS21,
  author       = {Ke Hu and
                  Ruoming Pang and
                  Tara N. Sainath and
                  Trevor Strohman},
  title        = {Transformer Based Deliberation for Two-Pass Speech Recognition},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {68--74},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383497},
  doi          = {10.1109/SLT48900.2021.9383497},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/HuPSS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/HuSL21,
  author       = {Yushi Hu and
                  Shane Settle and
                  Karen Livescu},
  title        = {Acoustic Span Embeddings for Multilingual Query-by-Example Search},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {935--942},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383545},
  doi          = {10.1109/SLT48900.2021.9383545},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/HuSL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/HuangCPW21,
  author       = {Yiling Huang and
                  Yutian Chen and
                  Jason Pelecanos and
                  Quan Wang},
  title        = {Synth2Aug: Cross-Domain Speaker Recognition with {TTS} Synthesized
                  Speech},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {316--322},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383525},
  doi          = {10.1109/SLT48900.2021.9383525},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/HuangCPW21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/HuangLL21,
  author       = {Tzu{-}hsien Huang and
                  Jheng{-}Hao Lin and
                  Hung{-}yi Lee},
  title        = {How Far Are We from Robust Voice Conversion: {A} Survey},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {514--521},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383498},
  doi          = {10.1109/SLT48900.2021.9383498},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/HuangLL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/HuangLLL21,
  author       = {Chien{-}yu Huang and
                  Yist Y. Lin and
                  Hung{-}yi Lee and
                  Lin{-}Shan Lee},
  title        = {Defending Your Voice: Adversarial Attack on Voice Conversion},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {552--559},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383529},
  doi          = {10.1109/SLT48900.2021.9383529},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/HuangLLL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/HuhLHMC21,
  author       = {Jaesung Huh and
                  Minjae Lee and
                  Heesoo Heo and
                  Seongkyu Mun and
                  Joon Son Chung},
  title        = {Metric Learning for Keyword Spotting},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {133--140},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383571},
  doi          = {10.1109/SLT48900.2021.9383571},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/HuhLHMC21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/JacobsMK21,
  author       = {Christiaan Jacobs and
                  Yevgen Matusevych and
                  Herman Kamper},
  title        = {Acoustic Word Embeddings for Zero-Resource Languages Using Self-Supervised
                  Contrastive Learning and Multilingual Adaptation},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {919--926},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383594},
  doi          = {10.1109/SLT48900.2021.9383594},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/JacobsMK21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/JayasimhaP21,
  author       = {Aditya Jayasimha and
                  Periyasamy Paramasivam},
  title        = {Personalizing Speech Start Point and End Point Detection in {ASR}
                  Systems from Speaker Embeddings},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {771--777},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383516},
  doi          = {10.1109/SLT48900.2021.9383516},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/JayasimhaP21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/KaiTSK21,
  author       = {Hiroto Kai and
                  Shinnosuke Takamichi and
                  Sayaka Shiota and
                  Hitoshi Kiya},
  title        = {Lightweight Voice Anonymization Based on Data-Driven Optimization
                  of Cascaded Voice Modification Modules},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {560--566},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383535},
  doi          = {10.1109/SLT48900.2021.9383535},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/KaiTSK21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/KandaCGWMCY21,
  author       = {Naoyuki Kanda and
                  Xuankai Chang and
                  Yashesh Gaur and
                  Xiaofei Wang and
                  Zhong Meng and
                  Zhuo Chen and
                  Takuya Yoshioka},
  title        = {Investigation of End-to-End Speaker-Attributed {ASR} for Continuous
                  Multi-Talker Recordings},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {809--816},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383600},
  doi          = {10.1109/SLT48900.2021.9383600},
  timestamp    = {Wed, 19 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/KandaCGWMCY21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/KangY021,
  author       = {Fang Kang and
                  Feiran Yang and
                  Jun Yang},
  title        = {Real-Time Independent Vector Analysis with a Deep-Learning-Based Source
                  Model},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {665--669},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383599},
  doi          = {10.1109/SLT48900.2021.9383599},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/KangY021.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/KanoS021,
  author       = {Takatomo Kano and
                  Sakriani Sakti and
                  Satoshi Nakamura},
  title        = {Transformer-Based Direct Speech-To-Speech Translation with Transcoder},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {958--965},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383496},
  doi          = {10.1109/SLT48900.2021.9383496},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/KanoS021.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/KhanalJB21,
  author       = {Subash Khanal and
                  Michael T. Johnson and
                  Narjes Bozorg},
  title        = {Articulatory Comparison of {L1} and {L2} Speech for Mispronunciation
                  Diagnosis},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {693--697},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383574},
  doi          = {10.1109/SLT48900.2021.9383574},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/KhanalJB21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/KharePS21,
  author       = {Aparna Khare and
                  Srinivas Parthasarathy and
                  Shiva Sundaram},
  title        = {Self-Supervised Learning with Cross-Modal Transformers for Emotion
                  Recognition},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {381--388},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383618},
  doi          = {10.1109/SLT48900.2021.9383618},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/KharePS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/KharitonovRSWMD21,
  author       = {Eugene Kharitonov and
                  Morgane Rivi{\`{e}}re and
                  Gabriel Synnaeve and
                  Lior Wolf and
                  Pierre{-}Emmanuel Mazar{\'{e}} and
                  Matthijs Douze and
                  Emmanuel Dupoux},
  title        = {Data Augmenting Contrastive Learning of Speech Representations in
                  the Time Domain},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {215--222},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383605},
  doi          = {10.1109/SLT48900.2021.9383605},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/KharitonovRSWMD21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/KimHCL21,
  author       = {You Jin Kim and
                  Hee Soo Heo and
                  Soo{-}Whan Chung and
                  Bong{-}Jin Lee},
  title        = {End-To-End Lip Synchronisation Based on Pattern Classification},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {598--605},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383616},
  doi          = {10.1109/SLT48900.2021.9383616},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/KimHCL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/KongWWGZWX21,
  author       = {Yuxiang Kong and
                  Jian Wu and
                  Quandong Wang and
                  Peng Gao and
                  Weiji Zhuang and
                  Yujun Wang and
                  Lei Xie},
  title        = {Multi-Channel Automatic Speech Recognition Using Deep Complex Unet},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {104--110},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383492},
  doi          = {10.1109/SLT48900.2021.9383492},
  timestamp    = {Sat, 20 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/KongWWGZWX21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/Krishnamachari021,
  author       = {Suchitra Krishnamachari and
                  Manoj Kumar and
                  So Hyun Kim and
                  Catherine Lord and
                  Shrikanth Narayanan},
  title        = {Developing Neural Representations for Robust Child-Adult Diarization},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {590--597},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383488},
  doi          = {10.1109/SLT48900.2021.9383488},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/Krishnamachari021.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/KwonHHLC21,
  author       = {Youngki Kwon and
                  Hee Soo Heo and
                  Jaesung Huh and
                  Bong{-}Jin Lee and
                  Joon Son Chung},
  title        = {Look Who's Not Talking},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {567--573},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383502},
  doi          = {10.1109/SLT48900.2021.9383502},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/KwonHHLC21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/KyeCK21,
  author       = {Seong Min Kye and
                  Joon Son Chung and
                  Hoirin Kim},
  title        = {Supervised Attention for Speaker Recognition},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {286--293},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383579},
  doi          = {10.1109/SLT48900.2021.9383579},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/KyeCK21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/KyeKC21,
  author       = {Seong Min Kye and
                  Yoohwan Kwon and
                  Joon Son Chung},
  title        = {Cross Attentive Pooling for Speaker Verification},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {294--300},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383565},
  doi          = {10.1109/SLT48900.2021.9383565},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/KyeKC21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/LamWSY21,
  author       = {Max W. Y. Lam and
                  Jun Wang and
                  Dan Su and
                  Dong Yu},
  title        = {Effective Low-Cost Time-Domain Audio Separation Using Globally Attentive
                  Locally Recurrent Networks},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {801--808},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383464},
  doi          = {10.1109/SLT48900.2021.9383464},
  timestamp    = {Wed, 22 Dec 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/LamWSY21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/LeKCMFS21,
  author       = {Duc Le and
                  Gil Keren and
                  Julian Chan and
                  Jay Mahadeokar and
                  Christian Fuegen and
                  Michael L. Seltzer},
  title        = {Deep Shallow Fusion for {RNN-T} Personalization},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {251--257},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383560},
  doi          = {10.1109/SLT48900.2021.9383560},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/LeKCMFS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/Lee21,
  author       = {Shi{-}wook Lee},
  title        = {Domain Generalization with Triplet Network for Cross-Corpus Speech
                  Emotion Recognition},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {389--396},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383534},
  doi          = {10.1109/SLT48900.2021.9383534},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/Lee21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/LeiYX21,
  author       = {Yi Lei and
                  Shan Yang and
                  Lei Xie},
  title        = {Fine-Grained Emotion Strength Transfer, Control and Prediction for
                  Emotional Speech Synthesis},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {423--430},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383524},
  doi          = {10.1109/SLT48900.2021.9383524},
  timestamp    = {Fri, 05 Nov 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/LeiYX21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/Li0ZSCKHHBC021,
  author       = {Chenda Li and
                  Jing Shi and
                  Wangyou Zhang and
                  Aswin Shanmugam Subramanian and
                  Xuankai Chang and
                  Naoyuki Kamo and
                  Moto Hira and
                  Tomoki Hayashi and
                  Christoph B{\"{o}}ddeker and
                  Zhuo Chen and
                  Shinji Watanabe},
  title        = {ESPnet-SE: End-To-End Speech Enhancement and Separation Toolkit Designed
                  for {ASR} Integration},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {785--792},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383615},
  doi          = {10.1109/SLT48900.2021.9383615},
  timestamp    = {Tue, 21 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/Li0ZSCKHHBC021.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/LiAY21,
  author       = {Haoyu Li and
                  Yang Ai and
                  Junichi Yamagishi},
  title        = {Enhancing Low-Quality Voice Recordings Using Disentangled Channel
                  Factor and Neural Waveform Model},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {734--741},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383507},
  doi          = {10.1109/SLT48900.2021.9383507},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/LiAY21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/LiK0W21,
  author       = {Qiujia Li and
                  Florian L. Kreyssig and
                  Chao Zhang and
                  Philip C. Woodland},
  title        = {Discriminative Neural Clustering for Speaker Diarisation},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {574--581},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383617},
  doi          = {10.1109/SLT48900.2021.9383617},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/LiK0W21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/LiLHLYZDKBQ0C21,
  author       = {Chenda Li and
                  Yi Luo and
                  Cong Han and
                  Jinyu Li and
                  Takuya Yoshioka and
                  Tianyan Zhou and
                  Marc Delcroix and
                  Keisuke Kinoshita and
                  Christoph B{\"{o}}ddeker and
                  Yanmin Qian and
                  Shinji Watanabe and
                  Zhuo Chen},
  title        = {Dual-Path {RNN} for Long Recording Speech Separation},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {865--872},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383514},
  doi          = {10.1109/SLT48900.2021.9383514},
  timestamp    = {Sun, 12 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/LiLHLYZDKBQ0C21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/LiOLH21,
  author       = {Song Li and
                  Beibei Ouyang and
                  Lin Li and
                  Qingyang Hong},
  title        = {Lightspeech: Lightweight Non-Autoregressive Multi-Speaker Text-To-Speech},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {499--506},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383562},
  doi          = {10.1109/SLT48900.2021.9383562},
  timestamp    = {Mon, 26 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/LiOLH21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/LiSH21,
  author       = {Ruizhi Li and
                  Gregory Sell and
                  Hynek Hermansky},
  title        = {Two-Stage Augmentation and Adaptive {CTC} Fusion for Improved Robustness
                  of Multi-Stream end-to-end {ASR}},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {229--235},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383621},
  doi          = {10.1109/SLT48900.2021.9383621},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/LiSH21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/LiZD21,
  author       = {Mohan Li and
                  Catalin Zorila and
                  Rama Doddipatla},
  title        = {Transformer-Based Online Speech Recognition with Decoder-end Adaptive
                  Computation Steps},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {1--7},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383613},
  doi          = {10.1109/SLT48900.2021.9383613},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/LiZD21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/LiZLH21,
  author       = {Zheng Li and
                  Miao Zhao and
                  Lin Li and
                  Qingyang Hong},
  title        = {Multi-Feature Learning with Canonical Correlation Analysis Constraint
                  for Text-Independent Speaker Verification},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {330--337},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383541},
  doi          = {10.1109/SLT48900.2021.9383541},
  timestamp    = {Mon, 26 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/LiZLH21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/LiZZL21,
  author       = {Xinwei Li and
                  Yuanyuan Zhang and
                  Xiaodan Zhuang and
                  Daben Liu},
  title        = {Frame-Level Specaugment for Deep Convolutional Neural Networks in
                  Hybrid {ASR} Systems},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {209--214},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383626},
  doi          = {10.1109/SLT48900.2021.9383626},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/LiZZL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/LinWDF21,
  author       = {Binghuai Lin and
                  Liyuan Wang and
                  Hongwei Ding and
                  Xiaoli Feng},
  title        = {Improving {L2} English Rhythm Evaluation with Automatic Sentence Stress
                  Detection},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {713--719},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383455},
  doi          = {10.1109/SLT48900.2021.9383455},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/LinWDF21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/Liu0LKSZ21,
  author       = {Chunxi Liu and
                  Frank Zhang and
                  Duc Le and
                  Suyoun Kim and
                  Yatharth Saraf and
                  Geoffrey Zweig},
  title        = {Improving {RNN} Transducer Based {ASR} with Auxiliary Tasks},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {172--179},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383548},
  doi          = {10.1109/SLT48900.2021.9383548},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/Liu0LKSZ21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/LiuHWYY0M21,
  author       = {Liangqi Liu and
                  Jiankun Hu and
                  Zhiyong Wu and
                  Song Yang and
                  Songfan Yang and
                  Jia Jia and
                  Helen Meng},
  title        = {Controllable Emphatic Speech Synthesis based on Forward Attention
                  for Expressive Speech Synthesis},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {410--414},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383537},
  doi          = {10.1109/SLT48900.2021.9383537},
  timestamp    = {Sat, 08 May 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/LiuHWYY0M21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/LiuYXWZC021,
  author       = {Jianming Liu and
                  Meng Yu and
                  Yong Xu and
                  Chao Weng and
                  Shi{-}Xiong Zhang and
                  Lianwu Chen and
                  Dong Yu},
  title        = {Neural Mask based Multi-channel Convolutional Beamforming for Joint
                  Dereverberation, Echo Cancellation and Denoising},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {766--770},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383519},
  doi          = {10.1109/SLT48900.2021.9383519},
  timestamp    = {Mon, 26 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/LiuYXWZC021.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/LuLZZ21,
  author       = {Zexin Lu and
                  Jing Li and
                  Yingyi Zhang and
                  Haisong Zhang},
  title        = {Getting Your Conversation on Track: Estimation of Residual Life for
                  Conversations},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {1036--1043},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383544},
  doi          = {10.1109/SLT48900.2021.9383544},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/LuLZZ21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/LuoWCJX21,
  author       = {Jian Luo and
                  Jianzong Wang and
                  Ning Cheng and
                  Guilin Jiang and
                  Jing Xiao},
  title        = {Multi-Quartznet: Multi-Resolution Convolution for Speech Recognition
                  with Multi-Layer Feature Fusion},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {82--88},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383532},
  doi          = {10.1109/SLT48900.2021.9383532},
  timestamp    = {Wed, 14 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/LuoWCJX21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/LuoWCJX21a,
  author       = {Jian Luo and
                  Jianzong Wang and
                  Ning Cheng and
                  Guilin Jiang and
                  Jing Xiao},
  title        = {End-To-End Silent Speech Recognition with Acoustic Sensing},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {606--612},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383622},
  doi          = {10.1109/SLT48900.2021.9383622},
  timestamp    = {Wed, 14 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/LuoWCJX21a.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/LuoZLX21,
  author       = {Haoneng Luo and
                  Shiliang Zhang and
                  Ming Lei and
                  Lei Xie},
  title        = {Simplified Self-Attention for Transformer-Based end-to-end Speech
                  Recognition},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {75--81},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383581},
  doi          = {10.1109/SLT48900.2021.9383581},
  timestamp    = {Fri, 09 Dec 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/LuoZLX21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/MacaryTER21,
  author       = {Manon Macary and
                  Marie Tahon and
                  Yannick Est{\`{e}}ve and
                  Anthony Rousseau},
  title        = {On the Use of Self-Supervised Pre-Trained Acoustic and Linguistic
                  Features for Continuous Speech Emotion Recognition},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {373--380},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383456},
  doi          = {10.1109/SLT48900.2021.9383456},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/MacaryTER21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/MahadeokarSLKSL21,
  author       = {Jay Mahadeokar and
                  Yuan Shangguan and
                  Duc Le and
                  Gil Keren and
                  Hang Su and
                  Thong Le and
                  Ching{-}Feng Yeh and
                  Christian Fuegen and
                  Michael L. Seltzer},
  title        = {Alignment Restricted Streaming Recurrent Neural Network Transducer},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {52--59},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383606},
  doi          = {10.1109/SLT48900.2021.9383606},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/MahadeokarSLKSL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/MariniVCZSFDDF21,
  author       = {Marco Marini and
                  Mauro Vigan{\`{o}} and
                  Massimo Corbo and
                  Marina Zettin and
                  Gloria Simoncini and
                  Bruno Fattori and
                  Clelia D'Anna and
                  Massimiliano Donati and
                  Luca Fanucci},
  title        = {{IDEA:} An Italian Dysarthric Speech Database},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {1086--1093},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383467},
  doi          = {10.1109/SLT48900.2021.9383467},
  timestamp    = {Wed, 07 Dec 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/MariniVCZSFDDF21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/MasumuraMITTO21,
  author       = {Ryo Masumura and
                  Naoki Makishima and
                  Mana Ihori and
                  Akihiko Takashima and
                  Tomohiro Tanaka and
                  Shota Orihashi},
  title        = {Large-Context Conversational Representation Learning: Self-Supervised
                  Learning For Conversational Documents},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {1012--1019},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383584},
  doi          = {10.1109/SLT48900.2021.9383584},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/MasumuraMITTO21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/MeghananiSR21,
  author       = {Amit Meghanani and
                  Chandran Savithri Anoop and
                  A. G. Ramakrishnan},
  title        = {An Exploration of Log-Mel Spectrogram and {MFCC} Features for Alzheimer's
                  Dementia Recognition from Spontaneous Speech},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {670--677},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383491},
  doi          = {10.1109/SLT48900.2021.9383491},
  timestamp    = {Mon, 25 Dec 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/MeghananiSR21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/MengPSGKLCZLG21,
  author       = {Zhong Meng and
                  Sarangarajan Parthasarathy and
                  Eric Sun and
                  Yashesh Gaur and
                  Naoyuki Kanda and
                  Liang Lu and
                  Xie Chen and
                  Rui Zhao and
                  Jinyu Li and
                  Yifan Gong},
  title        = {Internal Language Model Estimation for Domain-Adaptive End-to-End
                  Speech Recognition},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {243--250},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383515},
  doi          = {10.1109/SLT48900.2021.9383515},
  timestamp    = {Tue, 21 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/MengPSGKLCZLG21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/MeyerXF21,
  author       = {Patrick Meyer and
                  Ziyi Xu and
                  Tim Fingscheidt},
  title        = {Improving Convolutional Recurrent Neural Networks for Speech Emotion
                  Recognition},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {365--372},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383513},
  doi          = {10.1109/SLT48900.2021.9383513},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/MeyerXF21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/NakadaiFT21,
  author       = {Kazuhiro Nakadai and
                  Yosuke Fukumoto and
                  Ryu Takeda},
  title        = {Investigation of Node Pruning Criteria for Neural Networks Model Compression
                  with Non-Linear Function and Non-Uniform Network Topology},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {117--124},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383593},
  doi          = {10.1109/SLT48900.2021.9383593},
  timestamp    = {Mon, 26 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/NakadaiFT21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/NamazifarTH21,
  author       = {Mahdi Namazifar and
                  G{\"{o}}khan T{\"{u}}r and
                  Dilek Hakkani{-}T{\"{u}}r},
  title        = {Warped Language Models for Noise Robust Language Understanding},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {981--988},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383493},
  doi          = {10.1109/SLT48900.2021.9383493},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/NamazifarTH21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/NeumannV21,
  author       = {Michael Neumann and
                  Ngoc Thang Vu},
  title        = {Investigations on audiovisual emotion recognition in noisy conditions},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {358--364},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383588},
  doi          = {10.1109/SLT48900.2021.9383588},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/NeumannV21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/NiXYWZYM21,
  author       = {Zhaoheng Ni and
                  Yong Xu and
                  Meng Yu and
                  Bo Wu and
                  Shi{-}Xiong Zhang and
                  Dong Yu and
                  Michael I. Mandel},
  title        = {{WPD++:} An Improved Neural Beamformer for Simultaneous Speech Separation
                  and Dereverberation},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {817--824},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383528},
  doi          = {10.1109/SLT48900.2021.9383528},
  timestamp    = {Fri, 23 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/NiXYWZYM21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/OneataCSC21,
  author       = {Dan Oneata and
                  Alexandru Caranica and
                  Adriana Stan and
                  Horia Cucu},
  title        = {An Evaluation of Word-Level Confidence Estimation for End-to-End Automatic
                  Speech Recognition},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {258--265},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383570},
  doi          = {10.1109/SLT48900.2021.9383570},
  timestamp    = {Sun, 25 Jul 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/OneataCSC21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/PandeyLWS21,
  author       = {Ashutosh Pandey and
                  Chunxi Liu and
                  Yun Wang and
                  Yatharth Saraf},
  title        = {Dual Application of Speech Enhancement for Automatic Speech Recognition},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {223--228},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383624},
  doi          = {10.1109/SLT48900.2021.9383624},
  timestamp    = {Tue, 30 Nov 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/PandeyLWS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/ParkKS21,
  author       = {Jinhwan Park and
                  Chanwoo Kim and
                  Wonyong Sung},
  title        = {Convolution-Based Attention Model With Positional Encoding For Streaming
                  Speech Recognition On Embedded Devices},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {30--37},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383583},
  doi          = {10.1109/SLT48900.2021.9383583},
  timestamp    = {Mon, 24 Apr 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/ParkKS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/ParkR21,
  author       = {Su Ji Park and
                  Alan Rozet},
  title        = {Film Quality Prediction Using Acoustic, Prosodic and Lexical Cues},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {678--684},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383509},
  doi          = {10.1109/SLT48900.2021.9383509},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/ParkR21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/ParthasarathyS21,
  author       = {Srinivas Parthasarathy and
                  Shiva Sundaram},
  title        = {Detecting Expressions with Multimodal Transformers},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {636--643},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383573},
  doi          = {10.1109/SLT48900.2021.9383573},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/ParthasarathyS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/PellegriniZM21,
  author       = {Thomas Pellegrini and
                  Romain Zimmer and
                  Timoth{\'{e}}e Masquelier},
  title        = {Low-Activity Supervised Convolutional Spiking Neural Networks Applied
                  to Speech Commands Recognition},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {97--103},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383587},
  doi          = {10.1109/SLT48900.2021.9383587},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/PellegriniZM21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/PraveenPKRB21,
  author       = {Kiran Praveen and
                  Abhishek Pandey and
                  Deepak Kumar and
                  Shakti Prasad Rath and
                  Sandip Shriram Bapat},
  title        = {Dynamically Weighted Ensemble Models for Automatic Speech Recognition},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {111--116},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383463},
  doi          = {10.1109/SLT48900.2021.9383463},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/PraveenPKRB21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/RajDCEHH0DYLKLW21,
  author       = {Desh Raj and
                  Pavel Denisov and
                  Zhuo Chen and
                  Hakan Erdogan and
                  Zili Huang and
                  Maokui He and
                  Shinji Watanabe and
                  Jun Du and
                  Takuya Yoshioka and
                  Yi Luo and
                  Naoyuki Kanda and
                  Jinyu Li and
                  Scott Wisdom and
                  John R. Hershey},
  title        = {Integration of Speech Separation, Diarization, and Recognition for
                  Multi-Speaker Meetings: System Description, Comparison, and Analysis},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {897--904},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383556},
  doi          = {10.1109/SLT48900.2021.9383556},
  timestamp    = {Mon, 05 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/RajDCEHH0DYLKLW21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/RajGH0PSK21,
  author       = {Desh Raj and
                  Leibny Paola Garc{\'{\i}}a{-}Perera and
                  Zili Huang and
                  Shinji Watanabe and
                  Daniel Povey and
                  Andreas Stolcke and
                  Sanjeev Khudanpur},
  title        = {DOVER-Lap: {A} Method for Combining Overlap-Aware Diarization Outputs},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {881--888},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383490},
  doi          = {10.1109/SLT48900.2021.9383490},
  timestamp    = {Mon, 05 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/RajGH0PSK21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/RajHK21,
  author       = {Desh Raj and
                  Zili Huang and
                  Sanjeev Khudanpur},
  title        = {Multi-Class Spectral Clustering with Overlaps for Speaker Diarization},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {582--589},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383602},
  doi          = {10.1109/SLT48900.2021.9383602},
  timestamp    = {Mon, 05 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/RajHK21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/RaoPK21,
  author       = {Hrishikesh Rao and
                  Kedar Phatak and
                  Elie Khoury},
  title        = {Improving Speaker Recognition with Quality Indicators},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {338--343},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383627},
  doi          = {10.1109/SLT48900.2021.9383627},
  timestamp    = {Thu, 15 Jul 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/RaoPK21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/RibeiroSZEWRR21,
  author       = {Manuel Sam Ribeiro and
                  Jennifer Sanger and
                  Jing{-}Xuan Zhang and
                  Aciel Eshky and
                  Alan Wrench and
                  Korin Richmond and
                  Steve Renals},
  title        = {Tal: {A} Synchronised Multi-Speaker Corpus of Ultrasound Tongue Imaging,
                  Audio, and Lip Videos},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {1109--1116},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383619},
  doi          = {10.1109/SLT48900.2021.9383619},
  timestamp    = {Tue, 21 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/RibeiroSZEWRR21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/RiviereD21,
  author       = {Morgane Rivi{\`{e}}re and
                  Emmanuel Dupoux},
  title        = {Towards Unsupervised Learning of Speech Features in the Wild},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {156--163},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383461},
  doi          = {10.1109/SLT48900.2021.9383461},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/RiviereD21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/RutowskiSHLOC21,
  author       = {Tomek Rutowski and
                  Elizabeth Shriberg and
                  Amir Harati and
                  Yang Lu and
                  Ricardo Oliveira and
                  Piotr Chlebek},
  title        = {Cross-Demographic Portability of Deep NLP-Based Depression Models},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {1052--1057},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383609},
  doi          = {10.1109/SLT48900.2021.9383609},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/RutowskiSHLOC21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/SaekiMKOK21,
  author       = {Mao Saeki and
                  Yoichi Matsuyama and
                  Satoshi Kobashikawa and
                  Tetsuji Ogawa and
                  Tetsunori Kobayashi},
  title        = {Analysis of Multimodal Features for Speaking Proficiency Scoring in
                  an Interview Dialogue},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {629--635},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383590},
  doi          = {10.1109/SLT48900.2021.9383590},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/SaekiMKOK21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/SahidullahSVLSK21,
  author       = {Md. Sahidullah and
                  Achintya Kumar Sarkar and
                  Ville Vestman and
                  Xuechen Liu and
                  Romain Serizel and
                  Tomi Kinnunen and
                  Zheng{-}Hua Tan and
                  Emmanuel Vincent},
  title        = {{UIAI} System for Short-Duration Speaker Verification Challenge 2020},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {323--329},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383596},
  doi          = {10.1109/SLT48900.2021.9383596},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/SahidullahSVLSK21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/SantosAWX21,
  author       = {Timothy Israel Santos and
                  Andrew Abel and
                  Nick Wilson and
                  Yan Xu},
  title        = {Speaker-Independent Visual Speech Recognition with the Inception {V3}
                  Model},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {613--620},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383540},
  doi          = {10.1109/SLT48900.2021.9383540},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/SantosAWX21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/SatoOKDNA21,
  author       = {Hiroshi Sato and
                  Tsubasa Ochiai and
                  Keisuke Kinoshita and
                  Marc Delcroix and
                  Tomohiro Nakatani and
                  Shoko Araki},
  title        = {Multimodal Attention Fusion for Target Speaker Extraction},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {778--784},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383539},
  doi          = {10.1109/SLT48900.2021.9383539},
  timestamp    = {Sun, 12 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/SatoOKDNA21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/SemenzinHSKC21,
  author       = {Chiara Semenzin and
                  Lisa Hamrick and
                  Amanda Seidl and
                  Bridgette Kelleher and
                  Alejandrina Cristi{\`{a}}},
  title        = {Towards Large-Scale Data Annotation of Audio from Wearables: Validating
                  Zooniverse Annotations of Infant Vocalization Types},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {1079--1085},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383511},
  doi          = {10.1109/SLT48900.2021.9383511},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/SemenzinHSKC21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/SenAGV21,
  author       = {Bipasha Sen and
                  Aditya Agarwal and
                  Mirishkar Sai Ganesh and
                  Anil Kumar Vuppala},
  title        = {Reed: An Approach Towards Quickly Bootstrapping Multilingual Acoustic
                  Models},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {272--279},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383457},
  doi          = {10.1109/SLT48900.2021.9383457},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/SenAGV21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/SenayIH21,
  author       = {Gr{\'{e}}gory Senay and
                  Badr Youbi Idrissi and
                  Marine Haziza},
  title        = {VirAAL: Virtual Adversarial Active Learning for {NLU}},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {973--980},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383576},
  doi          = {10.1109/SLT48900.2021.9383576},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/SenayIH21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/ShechtmanFH21,
  author       = {Slava Shechtman and
                  Raul Fernandez and
                  David Haws},
  title        = {Supervised and unsupervised approaches for controlling narrow lexical
                  focus in sequence-to-sequence speech synthesis},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {431--437},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383591},
  doi          = {10.1109/SLT48900.2021.9383591},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/ShechtmanFH21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/ShekarBH21,
  author       = {Ram C. M. C. Shekar and
                  Chelzy Belitz and
                  John H. L. Hansen},
  title        = {Development of CNN-Based Cochlear Implant and Normal Hearing Sound
                  Recognition Models Using Natural and Auralized Environmental Audio},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {728--733},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383550},
  doi          = {10.1109/SLT48900.2021.9383550},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/ShekarBH21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/ShenYSMS21,
  author       = {Yang Shen and
                  Ayano Yasukagawa and
                  Daisuke Saito and
                  Nobuaki Minematsu and
                  Kazuya Saito},
  title        = {Optimized Prediction of Fluency of {L2} English Based on Interpretable
                  Network Using Quantity of Phonation and Quality of Pronunciation},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {698--704},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383458},
  doi          = {10.1109/SLT48900.2021.9383458},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/ShenYSMS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/ShiCTL0H21,
  author       = {Ying Shi and
                  Haolin Chen and
                  Zhiyuan Tang and
                  Lantian Li and
                  Dong Wang and
                  Jiqing Han},
  title        = {Can We Trust Deep Speech Prior?},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {742--749},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383499},
  doi          = {10.1109/SLT48900.2021.9383499},
  timestamp    = {Thu, 26 Oct 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/ShiCTL0H21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/ShiH21,
  author       = {Yanpei Shi and
                  Thomas Hain},
  title        = {Contextual Joint Factor Acoustic Embeddings},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {750--757},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383592},
  doi          = {10.1109/SLT48900.2021.9383592},
  timestamp    = {Sun, 25 Jul 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/ShiH21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/ShiH21a,
  author       = {Yanpei Shi and
                  Thomas Hain},
  title        = {Supervised Speaker Embedding De-Mixing in Two-Speaker Environment},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {758--765},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383580},
  doi          = {10.1109/SLT48900.2021.9383580},
  timestamp    = {Sun, 25 Jul 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/ShiH21a.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/ShiSL21,
  author       = {Bowen Shi and
                  Shane Settle and
                  Karen Livescu},
  title        = {Whole-Word Segmental Speech Recognition with Acoustic Word Embeddings},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {164--171},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383578},
  doi          = {10.1109/SLT48900.2021.9383578},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/ShiSL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/ShibataZS21,
  author       = {Hayato Shibata and
                  Mingxin Zhang and
                  Takahiro Shinozaki},
  title        = {Unsupervised Acoustic-to-Articulatory Inversion Neural Network Learning
                  Based on Deterministic Policy Gradient},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {530--537},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383554},
  doi          = {10.1109/SLT48900.2021.9383554},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/ShibataZS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/Shivakumar0GN21,
  author       = {Prashanth Gurunath Shivakumar and
                  Naveen Kumar and
                  Panayiotis G. Georgiou and
                  Shrikanth Narayanan},
  title        = {{RNN} Based Incremental Online Spoken Language Understanding},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {989--996},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383614},
  doi          = {10.1109/SLT48900.2021.9383614},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/Shivakumar0GN21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/SongYHKKK21,
  author       = {Eunwoo Song and
                  Ryuichi Yamamoto and
                  Min{-}Jae Hwang and
                  Jin{-}Seob Kim and
                  Ohsung Kwon and
                  Jae{-}Min Kim},
  title        = {Improved Parallel Wavegan Vocoder with Perceptually Weighted Spectrogram
                  Loss},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {470--476},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383549},
  doi          = {10.1109/SLT48900.2021.9383549},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/SongYHKKK21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/SongZWH21,
  author       = {Minguang Song and
                  Yunxin Zhao and
                  Shaojun Wang and
                  Mei Han},
  title        = {Word Similarity Based Label Smoothing in Rnnlm Training for {ASR}},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {280--285},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383598},
  doi          = {10.1109/SLT48900.2021.9383598},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/SongZWH21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/StadenK21,
  author       = {Lisa van Staden and
                  Herman Kamper},
  title        = {A Comparison of Self-Supervised Speech Representations As Input Features
                  For Unsupervised Acoustic Word Embeddings},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {927--934},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383625},
  doi          = {10.1109/SLT48900.2021.9383625},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/StadenK21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/SterpuSH21,
  author       = {George Sterpu and
                  Christian Saam and
                  Naomi Harte},
  title        = {Learning to Count Words in Fluent Speech Enables Online Speech Recognition},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {38--45},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383563},
  doi          = {10.1109/SLT48900.2021.9383563},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/SterpuSH21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/SuL21,
  author       = {Bo{-}Hao Su and
                  Chi{-}Chun Lee},
  title        = {A Conditional Cycle Emotion Gan for Cross Corpus Speech Emotion Recognition},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {351--357},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383512},
  doi          = {10.1109/SLT48900.2021.9383512},
  timestamp    = {Mon, 05 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/SuL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/SudroD0P21,
  author       = {Protima Nomo Sudro and
                  Rohan Kumar Das and
                  Rohit Sinha and
                  S. R. Mahadeva Prasanna},
  title        = {Enhancing the Intelligibility of Cleft Lip and Palate Speech Using
                  Cycle-Consistent Adversarial Networks},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {720--727},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383543},
  doi          = {10.1109/SLT48900.2021.9383543},
  timestamp    = {Thu, 23 Jun 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/SudroD0P21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/SunWCPZKX21,
  author       = {Aolan Sun and
                  Jianzong Wang and
                  Ning Cheng and
                  Huayi Peng and
                  Zhen Zeng and
                  Lingwei Kong and
                  Jing Xiao},
  title        = {GraphPB: Graphical Representations of Prosody Boundary in Speech Synthesis},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {438--445},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383530},
  doi          = {10.1109/SLT48900.2021.9383530},
  timestamp    = {Wed, 14 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/SunWCPZKX21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/SwarupCSTAG21,
  author       = {Prakhar Swarup and
                  Debmalya Chakrabarty and
                  Ashtosh Sapru and
                  Hitesh Tulsiani and
                  Harish Arsikere and
                  Sri Garimella},
  title        = {Efficient Large Scale Semi-Supervised Learning for {CTC} Based Acoustic
                  Models},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {148--155},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383536},
  doi          = {10.1109/SLT48900.2021.9383536},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/SwarupCSTAG21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/TakashimaF0HGN21,
  author       = {Yuki Takashima and
                  Yusuke Fujita and
                  Shinji Watanabe and
                  Shota Horiguchi and
                  Paola Garc{\'{\i}}a and
                  Kenji Nagamatsu},
  title        = {End-to-End Speaker Diarization Conditioned on Speech Activity and
                  Overlap Detection},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {849--856},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383555},
  doi          = {10.1109/SLT48900.2021.9383555},
  timestamp    = {Thu, 07 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/TakashimaF0HGN21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/TakatsuOMHFK21,
  author       = {Hiroaki Takatsu and
                  Mayu Okuda and
                  Yoichi Matsuyama and
                  Hiroshi Honda and
                  Shinya Fujie and
                  Tetsunori Kobayashi},
  title        = {Personalized Extractive Summarization for a News Dialogue System},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {1044--1051},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383568},
  doi          = {10.1109/SLT48900.2021.9383568},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/TakatsuOMHFK21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/TongHMJPCRM21,
  author       = {Xiaosu Tong and
                  Che{-}Wei Huang and
                  Sri Harish Mallidi and
                  Shaun Joseph and
                  Sonal Pareek and
                  Chander Chandak and
                  Ariya Rastrow and
                  Roland Maas},
  title        = {Streaming ResLSTM with Causal Mean Aggregation for Device-Directed
                  Utterance Detection},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {659--664},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383607},
  doi          = {10.1109/SLT48900.2021.9383607},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/TongHMJPCRM21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/TsunooK021,
  author       = {Emiru Tsunoo and
                  Yosuke Kashiwagi and
                  Shinji Watanabe},
  title        = {Streaming Transformer Asr With Blockwise Synchronous Beam Search},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {22--29},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383517},
  doi          = {10.1109/SLT48900.2021.9383517},
  timestamp    = {Tue, 21 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/TsunooK021.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/UnluA21,
  author       = {Merve {\"{U}}nl{\"{u}} and
                  Ebru Arisoy},
  title        = {Uncertainty-Aware Representations for Spoken Question Answering},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {943--949},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383547},
  doi          = {10.1109/SLT48900.2021.9383547},
  timestamp    = {Thu, 23 Jun 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/UnluA21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/ValkA21,
  author       = {J{\"{o}}rgen Valk and
                  Tanel Alum{\"{a}}e},
  title        = {{VOXLINGUA107:} {A} Dataset for Spoken Language Recognition},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {652--658},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383459},
  doi          = {10.1109/SLT48900.2021.9383459},
  timestamp    = {Wed, 16 Mar 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/ValkA21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/WangEQM21,
  author       = {Xinhao Wang and
                  Keelan Evanini and
                  Yao Qian and
                  Matthew Mulholland},
  title        = {Automated Scoring of Spontaneous Speech from Young Learners of English
                  Using Transformers},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {705--712},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383553},
  doi          = {10.1109/SLT48900.2021.9383553},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/WangEQM21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/WangEWWR0CH21,
  author       = {Zhong{-}Qiu Wang and
                  Hakan Erdogan and
                  Scott Wisdom and
                  Kevin W. Wilson and
                  Desh Raj and
                  Shinji Watanabe and
                  Zhuo Chen and
                  John R. Hershey},
  title        = {Sequential Multi-Frame Neural Beamforming for Speech Separation and
                  Enhancement},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {905--911},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383522},
  doi          = {10.1109/SLT48900.2021.9383522},
  timestamp    = {Mon, 05 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/WangEWWR0CH21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/WangKGCMY21,
  author       = {Xiaofei Wang and
                  Naoyuki Kanda and
                  Yashesh Gaur and
                  Zhuo Chen and
                  Zhong Meng and
                  Takuya Yoshioka},
  title        = {Exploring End-to-End Multi-Channel {ASR} with Bias Information for
                  Meeting Transcription},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {833--840},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383500},
  doi          = {10.1109/SLT48900.2021.9383500},
  timestamp    = {Wed, 19 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/WangKGCMY21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/WangYSX21,
  author       = {Xiong Wang and
                  Zhuoyuan Yao and
                  Xian Shi and
                  Lei Xie},
  title        = {Cascade RNN-Transducer: Syllable Based Streaming On-Device Mandarin
                  Speech Recognition with a Syllable-To-Character Converter},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {15--21},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383506},
  doi          = {10.1109/SLT48900.2021.9383506},
  timestamp    = {Tue, 29 Nov 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/WangYSX21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/Wangh21,
  author       = {Pu Wang and
                  Hugo Van hamme},
  title        = {A Light Transformer For Speech-To-Intent Applications},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {997--1003},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383559},
  doi          = {10.1109/SLT48900.2021.9383559},
  timestamp    = {Tue, 07 May 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/Wangh21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/WuS021,
  author       = {Bin Wu and
                  Sakriani Sakti and
                  Satoshi Nakamura},
  title        = {Incorporating Discriminative {DPGMM} Posteriorgrams for Low-Resource
                  {ASR}},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {201--208},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383597},
  doi          = {10.1109/SLT48900.2021.9383597},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/WuS021.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/XueHF0GN21,
  author       = {Yawen Xue and
                  Shota Horiguchi and
                  Yusuke Fujita and
                  Shinji Watanabe and
                  Paola Garc{\'{\i}}a and
                  Kenji Nagamatsu},
  title        = {Online End-To-End Neural Diarization with Speaker-Tracing Buffer},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {841--848},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383523},
  doi          = {10.1109/SLT48900.2021.9383523},
  timestamp    = {Thu, 07 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/XueHF0GN21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/XueYLXL21,
  author       = {Heyang Xue and
                  Shan Yang and
                  Yi Lei and
                  Lei Xie and
                  Xiulin Li},
  title        = {Learn2Sing: Target Speaker Singing Voice Synthesis by Learning from
                  a Singing Teacher},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {522--529},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383585},
  doi          = {10.1109/SLT48900.2021.9383585},
  timestamp    = {Fri, 05 Nov 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/XueYLXL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/YangYLF0X21,
  author       = {Geng Yang and
                  Shan Yang and
                  Kai Liu and
                  Peng Fang and
                  Wei Chen and
                  Lei Xie},
  title        = {Multi-Band Melgan: Faster Waveform Generation For High-Quality Text-To-Speech},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {492--498},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383551},
  doi          = {10.1109/SLT48900.2021.9383551},
  timestamp    = {Fri, 05 Nov 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/YangYLF0X21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/YehWSW0CS21,
  author       = {Ching{-}Feng Yeh and
                  Yongqiang Wang and
                  Yangyang Shi and
                  Chunyang Wu and
                  Frank Zhang and
                  Julian Chan and
                  Michael L. Seltzer},
  title        = {Streaming Attention-Based Models with Augmented Memory for End-To-End
                  Speech Recognition},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {8--14},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383504},
  doi          = {10.1109/SLT48900.2021.9383504},
  timestamp    = {Wed, 02 Nov 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/YehWSW0CS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/YuYWAXOLLM21,
  author       = {Fan Yu and
                  Zhuoyuan Yao and
                  Xiong Wang and
                  Keyu An and
                  Lei Xie and
                  Zhijian Ou and
                  Bo Liu and
                  Xiulin Li and
                  Guanqiong Miao},
  title        = {The {SLT} 2021 Children Speech Recognition Challenge: Open Datasets,
                  Rules and Baselines},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {1117--1123},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383608},
  doi          = {10.1109/SLT48900.2021.9383608},
  timestamp    = {Tue, 29 Nov 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/YuYWAXOLLM21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/ZengWCX21,
  author       = {Zhen Zeng and
                  Jianzong Wang and
                  Ning Cheng and
                  Jing Xiao},
  title        = {MelGlow: Efficient Waveform Generative Network Based On Location-Variable
                  Convolution},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {485--491},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383603},
  doi          = {10.1109/SLT48900.2021.9383603},
  timestamp    = {Wed, 14 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/ZengWCX21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/Zhang0LSCPLYPSZ21,
  author       = {Xiaohui Zhang and
                  Frank Zhang and
                  Chunxi Liu and
                  Kjell Schubert and
                  Julian Chan and
                  Pradyot Prakash and
                  Jun Liu and
                  Ching{-}Feng Yeh and
                  Fuchun Peng and
                  Yatharth Saraf and
                  Geoffrey Zweig},
  title        = {Benchmarking LF-MMI, {CTC} And {RNN-T} Criteria For Streaming {ASR}},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {46--51},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383623},
  doi          = {10.1109/SLT48900.2021.9383623},
  timestamp    = {Thu, 04 Apr 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/Zhang0LSCPLYPSZ21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/ZhangL0R21,
  author       = {Shucong Zhang and
                  Erfan Loweimi and
                  Peter Bell and
                  Steve Renals},
  title        = {On The Usefulness of Self-Attention for Automatic Speech Recognition
                  with Transformers},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {89--96},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383521},
  doi          = {10.1109/SLT48900.2021.9383521},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/ZhangL0R21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/ZhengAO21,
  author       = {Huahuan Zheng and
                  Keyu An and
                  Zhijian Ou},
  title        = {Efficient Neural Architecture Search for End-to-End Speech Recognition
                  Via Straight-Through Gradients},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {60--67},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383527},
  doi          = {10.1109/SLT48900.2021.9383527},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/ZhengAO21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/ZhouCHYL21,
  author       = {Zhengyu Zhou and
                  In Gyu Choi and
                  Yongliang He and
                  Vikas Yadav and
                  Chin{-}Hui Lee},
  title        = {Using Paralinguistic Information to Disambiguate User Intentions for
                  Distinguishing Phrase Structure and Sarcasm in Spoken Dialog Systems},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {1020--1027},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383505},
  doi          = {10.1109/SLT48900.2021.9383505},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/ZhouCHYL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/ZhouS021,
  author       = {Kun Zhou and
                  Berrak Sisman and
                  Haizhou Li},
  title        = {Vaw-Gan For Disentanglement And Recomposition Of Emotional Elements
                  In Speech},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {415--422},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383526},
  doi          = {10.1109/SLT48900.2021.9383526},
  timestamp    = {Sat, 06 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/ZhouS021.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/ZhouZW21,
  author       = {Tianyan Zhou and
                  Yong Zhao and
                  Jian Wu},
  title        = {ResNeXt and Res2Net Structures for Speaker Verification},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {301--307},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383531},
  doi          = {10.1109/SLT48900.2021.9383531},
  timestamp    = {Sat, 20 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/ZhouZW21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/ZmolikovaDBNC21,
  author       = {Katerina Zmol{\'{\i}}kov{\'{a}} and
                  Marc Delcroix and
                  Luk{\'{a}}s Burget and
                  Tomohiro Nakatani and
                  Jan Honza Cernock{\'{y}}},
  title        = {Integration of Variational Autoencoder and Spatial Clustering for
                  Adaptive Multi-Channel Neural Speech Separation},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {889--896},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383612},
  doi          = {10.1109/SLT48900.2021.9383612},
  timestamp    = {Mon, 03 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/ZmolikovaDBNC21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/ZorilaLD21,
  author       = {Catalin Zorila and
                  Mohan Li and
                  Rama Doddipatla},
  title        = {An Investigation into the Multi-channel Time Domain Speaker Extraction
                  Network},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {793--800},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383582},
  doi          = {10.1109/SLT48900.2021.9383582},
  timestamp    = {Mon, 05 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/ZorilaLD21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@proceedings{DBLP:conf/slt/2021,
  title        = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021},
  doi          = {10.1109/SLT48900.2021},
  isbn         = {978-1-7281-7066-4},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/2021.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
a service of  Schloss Dagstuhl - Leibniz Center for Informatics