Stop the war!
Остановите войну!
for scientists:
default search action
Search dblp for Publications
export results for "audio-visual feature"
@article{DBLP:journals/ipm/MotamediKSEBT24, author = {Elham Motamedi and Danial Khosh Kholgh and Sorush Saghari and Mehdi Elahi and Francesco Barile and Marko Tkalcic}, title = {Predicting movies' eudaimonic and hedonic scores: {A} machine learning approach using metadata, audio and visual features}, journal = {Inf. Process. Manag.}, volume = {61}, number = {2}, pages = {103610}, year = {2024}, url = {https://doi.org/10.1016/j.ipm.2023.103610}, doi = {10.1016/J.IPM.2023.103610}, timestamp = {Mon, 05 Feb 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/ipm/MotamediKSEBT24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/tomccap/BingolPFA24, author = {G{\"{u}}lnaziye Bing{\"{o}}l and Simone Porcu and Alessandro Floris and Luigi Atzori}, title = {QoE Estimation of WebRTC-based Audio-visual Conversations from Facial and Speech Features}, journal = {{ACM} Trans. Multim. Comput. Commun. Appl.}, volume = {20}, number = {5}, pages = {130:1--130:23}, year = {2024}, url = {https://doi.org/10.1145/3638251}, doi = {10.1145/3638251}, timestamp = {Fri, 22 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/tomccap/BingolPFA24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/mmm/TanGZ24, author = {Sze An Peter Tan and Guangyu Gao and Jia Zhao}, editor = {Stevan Rudinac and Alan Hanjalic and Cynthia C. S. Liem and Marcel Worring and Bj{\"{o}}rn {\TH}{\'{o}}r J{\'{o}}nsson and Bei Liu and Yoko Yamakata}, title = {Audio-Visual Segmentation by Leveraging Multi-scaled Features Learning}, booktitle = {MultiMedia Modeling - 30th International Conference, {MMM} 2024, Amsterdam, The Netherlands, January 29 - February 2, 2024, Proceedings, Part {II}}, series = {Lecture Notes in Computer Science}, volume = {14555}, pages = {156--169}, publisher = {Springer}, year = {2024}, url = {https://doi.org/10.1007/978-3-031-53308-2\_12}, doi = {10.1007/978-3-031-53308-2\_12}, timestamp = {Sat, 16 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/mmm/TanGZ24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2401-17796, author = {Xueyuan Chen and Yuejiao Wang and Xixin Wu and Disong Wang and Zhiyong Wu and Xunying Liu and Helen Meng}, title = {Exploiting Audio-Visual Features with Pretrained AV-HuBERT for Multi-Modal Dysarthric Speech Reconstruction}, journal = {CoRR}, volume = {abs/2401.17796}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2401.17796}, doi = {10.48550/ARXIV.2401.17796}, eprinttype = {arXiv}, eprint = {2401.17796}, timestamp = {Wed, 07 Feb 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2401-17796.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ijon/LiZ23, author = {Yangke Li and Xinman Zhang}, title = {Lip landmark-based audio-visual speech enhancement with multimodal feature fusion network}, journal = {Neurocomputing}, volume = {549}, pages = {126432}, year = {2023}, url = {https://doi.org/10.1016/j.neucom.2023.126432}, doi = {10.1016/J.NEUCOM.2023.126432}, timestamp = {Tue, 07 May 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ijon/LiZ23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jifs/ZhaoZZL23, author = {Yiming Zhao and Hongdong Zhao and Xuezhi Zhang and Weina Liu}, title = {Vehicle classification based on audio-visual feature fusion with low-quality images and noise}, journal = {J. Intell. Fuzzy Syst.}, volume = {45}, number = {5}, pages = {8931--8944}, year = {2023}, url = {https://doi.org/10.3233/jifs-232812}, doi = {10.3233/JIFS-232812}, timestamp = {Mon, 18 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/jifs/ZhaoZZL23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/mta/KihalH23, author = {Marouane Kihal and Lamia Hamza}, title = {Robust multimedia spam filtering based on visual, textual, and audio deep features and random forest}, journal = {Multim. Tools Appl.}, volume = {82}, number = {26}, pages = {40819--40837}, year = {2023}, url = {https://doi.org/10.1007/s11042-023-15170-x}, doi = {10.1007/S11042-023-15170-X}, timestamp = {Thu, 09 Nov 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/mta/KihalH23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/mta/MistryBK23, author = {Yogita D. Mistry and Gajanan K. Birajdar and Archana M. Khodke}, title = {Time-frequency visual representation and texture features for audio applications: a comprehensive review, recent trends, and challenges}, journal = {Multim. Tools Appl.}, volume = {82}, number = {23}, pages = {36143--36177}, year = {2023}, url = {https://doi.org/10.1007/s11042-023-14734-1}, doi = {10.1007/S11042-023-14734-1}, timestamp = {Sat, 14 Oct 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/mta/MistryBK23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/sensors/LiFSLZ23, author = {Guizhu Li and Min Fu and Mengnan Sun and Xuefeng Liu and Bing Zheng}, title = {A Facial Feature and Lip Movement Enhanced Audio-Visual Speech Separation Model}, journal = {Sensors}, volume = {23}, number = {21}, pages = {8770}, year = {2023}, url = {https://doi.org/10.3390/s23218770}, doi = {10.3390/S23218770}, timestamp = {Sat, 20 Jan 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/sensors/LiFSLZ23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/apsipa/ChungHWZSL23, author = {Yu{-}Ching Chung and Ji{-}Yan Han and Bo{-}Sin Wang and Wei{-}Zhong Zheng and Kung{-}Yao Shen and Ying{-}Hui Lai}, title = {An Audio-Visual Speech Enhancement System Based on 3D Image Features: An Application in Hearing Aids}, booktitle = {Asia Pacific Signal and Information Processing Association Annual Summit and Conference, {APSIPA} {ASC} 2023, Taipei, Taiwan, October 31 - Nov. 3, 2023}, pages = {1131--1137}, publisher = {{IEEE}}, year = {2023}, url = {https://doi.org/10.1109/APSIPAASC58517.2023.10317139}, doi = {10.1109/APSIPAASC58517.2023.10317139}, timestamp = {Sat, 02 Dec 2023 14:05:45 +0100}, biburl = {https://dblp.org/rec/conf/apsipa/ChungHWZSL23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/dagm/MerceaHKA23, author = {Otniel{-}Bogdan Mercea and Thomas Hummel and A. Sophia Koepke and Zeynep Akata}, editor = {Ullrich K{\"{o}}the and Carsten Rother}, title = {Text-to-Feature Diffusion for Audio-Visual Few-Shot Learning}, booktitle = {Pattern Recognition - 45th {DAGM} German Conference, {DAGM} {GCPR} 2023, Heidelberg, Germany, September 19-22, 2023, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {14264}, pages = {491--507}, publisher = {Springer}, year = {2023}, url = {https://doi.org/10.1007/978-3-031-54605-1\_32}, doi = {10.1007/978-3-031-54605-1\_32}, timestamp = {Sat, 16 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/dagm/MerceaHKA23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/embc/SinghTKG23, author = {Prerna Singh and Ayush Tripathi and Lalan Kumar and Tapan Kumar Gandhi}, title = {Brain Connectivity Features-based Age Group Classification using Temporal Asynchrony Audio-Visual Integration Task}, booktitle = {45th Annual International Conference of the {IEEE} Engineering in Medicine {\&} Biology Society, {EMBC} 2023, Sydney, Australia, July 24-27, 2023}, pages = {1--4}, publisher = {{IEEE}}, year = {2023}, url = {https://doi.org/10.1109/EMBC40787.2023.10341177}, doi = {10.1109/EMBC40787.2023.10341177}, timestamp = {Thu, 11 Jan 2024 15:01:18 +0100}, biburl = {https://dblp.org/rec/conf/embc/SinghTKG23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/eusipco/MezzaSS23, author = {Alessandro Ilic Mezza and Paolo Sani and Augusto Sarti}, title = {Automatic {TV} Genre Classification Based on Visually-Conditioned Deep Audio Features}, booktitle = {31st European Signal Processing Conference, {EUSIPCO} 2023, Helsinki, Finland, September 4-8, 2023}, pages = {166--170}, publisher = {{IEEE}}, year = {2023}, url = {https://doi.org/10.23919/EUSIPCO58844.2023.10289723}, doi = {10.23919/EUSIPCO58844.2023.10289723}, timestamp = {Mon, 06 Nov 2023 12:35:15 +0100}, biburl = {https://dblp.org/rec/conf/eusipco/MezzaSS23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/hci/SetoA23, author = {Kazuki Seto and Yumi Asahi}, editor = {Hirohiko Mori and Yumi Asahi}, title = {Sound Logo to Increase {TV} Advertising Effectiveness Based on Audio-Visual Features}, booktitle = {Human Interface and the Management of Information - Thematic Area, {HIMI} 2023, Held as Part of the 25th {HCI} International Conference, {HCII} 2023, Copenhagen, Denmark, July 23-28, 2023, Proceedings, Part {I}}, series = {Lecture Notes in Computer Science}, volume = {14015}, pages = {136--151}, publisher = {Springer}, year = {2023}, url = {https://doi.org/10.1007/978-3-031-35132-7\_10}, doi = {10.1007/978-3-031-35132-7\_10}, timestamp = {Thu, 13 Jul 2023 10:09:58 +0200}, biburl = {https://dblp.org/rec/conf/hci/SetoA23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icassp/ChenZZSZL23, author = {Hongbo Chen and Dongchen Zhu and Guanghui Zhang and Wenjun Shi and Xiaolin Zhang and Jiamao Li}, title = {{CM-CS:} Cross-Modal Common-Specific Feature Learning For Audio-Visual Video Parsing}, booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023}, pages = {1--5}, publisher = {{IEEE}}, year = {2023}, url = {https://doi.org/10.1109/ICASSP49357.2023.10097072}, doi = {10.1109/ICASSP49357.2023.10097072}, timestamp = {Sun, 05 Nov 2023 16:51:21 +0100}, biburl = {https://dblp.org/rec/conf/icassp/ChenZZSZL23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icassp/JiangCDWL23, author = {Ya Jiang and Hang Chen and Jun Du and Qing Wang and Chin{-}Hui Lee}, title = {Incorporating Lip Features into Audio-Visual Multi-Speaker {DOA} Estimation by Gated Fusion}, booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023}, pages = {1--5}, publisher = {{IEEE}}, year = {2023}, url = {https://doi.org/10.1109/ICASSP49357.2023.10095549}, doi = {10.1109/ICASSP49357.2023.10095549}, timestamp = {Sun, 05 Nov 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icassp/JiangCDWL23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icassp/XuWZYWGFD23, author = {Haitao Xu and Liangfa Wei and Jie Zhang and Jianming Yang and Yannan Wang and Tian Gao and Xin Fang and Li{-}Rong Dai}, title = {A Multi-Scale Feature Aggregation Based Lightweight Network for Audio-Visual Speech Enhancement}, booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023}, pages = {1--5}, publisher = {{IEEE}}, year = {2023}, url = {https://doi.org/10.1109/ICASSP49357.2023.10096565}, doi = {10.1109/ICASSP49357.2023.10096565}, timestamp = {Tue, 23 Apr 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icassp/XuWZYWGFD23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icmi/LiLLZTZZ23, author = {Sunan Li and Hailun Lian and Cheng Lu and Yan Zhao and Chuangao Tang and Yuan Zong and Wenming Zheng}, editor = {Elisabeth Andr{\'{e}} and Mohamed Chetouani and Dominique Vaufreydaz and Gale M. Lucas and Tanja Schultz and Louis{-}Philippe Morency and Alessandro Vinciarelli}, title = {Audio-Visual Group-based Emotion Recognition using Local and Global Feature Aggregation based Multi-Task Learning}, booktitle = {Proceedings of the 25th International Conference on Multimodal Interaction, {ICMI} 2023, Paris, France, October 9-13, 2023}, pages = {741--745}, publisher = {{ACM}}, year = {2023}, url = {https://doi.org/10.1145/3577190.3616544}, doi = {10.1145/3577190.3616544}, timestamp = {Mon, 05 Feb 2024 20:29:19 +0100}, biburl = {https://dblp.org/rec/conf/icmi/LiLLZTZZ23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/micad/BhattacharyaP23, author = {Moinak Bhattacharya and Prateek Prasanna}, editor = {Khan M. Iftekharuddin and Weijie Chen}, title = {Audio-visual feature fusion for improved thoracic disease classification}, booktitle = {Medical Imaging 2023: Computer-Aided Diagnosis, San Diego, CA, USA, February 19-23, 2023}, series = {{SPIE} Proceedings}, volume = {12465}, publisher = {{SPIE}}, year = {2023}, url = {https://doi.org/10.1117/12.2654571}, doi = {10.1117/12.2654571}, timestamp = {Tue, 19 Mar 2024 12:50:04 +0100}, biburl = {https://dblp.org/rec/conf/micad/BhattacharyaP23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/smc/WangYGLW23, author = {Jinxin Wang and Chao Yang and Zhongwen Guo and Xiaomei Li and Weigang Wang}, title = {An End-to-End Mandarin Audio-Visual Speech Recognition Model with a Feature Enhancement Module}, booktitle = {{IEEE} International Conference on Systems, Man, and Cybernetics, {SMC} 2023, Honolulu, Oahu, HI, USA, October 1-4, 2023}, pages = {572--577}, publisher = {{IEEE}}, year = {2023}, url = {https://doi.org/10.1109/SMC53992.2023.10394108}, doi = {10.1109/SMC53992.2023.10394108}, timestamp = {Sun, 03 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/smc/WangYGLW23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/specom/NandakishorP23, author = {Salam Nandakishor and Debadatta Pati}, editor = {Alexey Karpov and K. Samudravijaya and K. T. Deepak and Rajesh M. Hegde and Shyam S. Agrawal and S. R. Mahadeva Prasanna}, title = {Improvement of Audio-Visual Keyword Spotting System Accuracy Using Excitation Source Feature}, booktitle = {Speech and Computer - 25th International Conference, {SPECOM} 2023, Dharwad, India, November 29 - December 2, 2023, Proceedings, Part {II}}, series = {Lecture Notes in Computer Science}, volume = {14339}, pages = {344--356}, publisher = {Springer}, year = {2023}, url = {https://doi.org/10.1007/978-3-031-48312-7\_28}, doi = {10.1007/978-3-031-48312-7\_28}, timestamp = {Tue, 07 May 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/specom/NandakishorP23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2304-06315, author = {Prerna Singh and Ayush Tripathi and Lalan Kumar and Tapan Kumar Gandhi}, title = {Brain Connectivity Features-based Age Group Classification using Temporal Asynchrony Audio-Visual Integration Task}, journal = {CoRR}, volume = {abs/2304.06315}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2304.06315}, doi = {10.48550/ARXIV.2304.06315}, eprinttype = {arXiv}, eprint = {2304.06315}, timestamp = {Thu, 20 Apr 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2304-06315.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2307-04760, author = {Sagnik Majumder and Ziad Al{-}Halah and Kristen Grauman}, title = {Learning Spatial Features from Audio-Visual Correspondence in Egocentric Videos}, journal = {CoRR}, volume = {abs/2307.04760}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2307.04760}, doi = {10.48550/ARXIV.2307.04760}, eprinttype = {arXiv}, eprint = {2307.04760}, timestamp = {Mon, 24 Jul 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2307-04760.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2309-03869, author = {Otniel{-}Bogdan Mercea and Thomas Hummel and A. Sophia Koepke and Zeynep Akata}, title = {Text-to-feature diffusion for audio-visual few-shot learning}, journal = {CoRR}, volume = {abs/2309.03869}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2309.03869}, doi = {10.48550/ARXIV.2309.03869}, eprinttype = {arXiv}, eprint = {2309.03869}, timestamp = {Tue, 12 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2309-03869.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2309-08030, author = {Ju{-}Chieh Chou and Chung{-}Ming Chien and Karen Livescu}, title = {AV2Wav: Diffusion-Based Re-synthesis from Continuous Self-supervised Features for Audio-Visual Speech Enhancement}, journal = {CoRR}, volume = {abs/2309.08030}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2309.08030}, doi = {10.48550/ARXIV.2309.08030}, eprinttype = {arXiv}, eprint = {2309.08030}, timestamp = {Tue, 26 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2309-08030.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2310-03456, author = {Edward Fish and Jon Weinbren and Andrew Gilbert}, title = {Multi-Resolution Audio-Visual Feature Fusion for Temporal Action Localization}, journal = {CoRR}, volume = {abs/2310.03456}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2310.03456}, doi = {10.48550/ARXIV.2310.03456}, eprinttype = {arXiv}, eprint = {2310.03456}, timestamp = {Thu, 19 Oct 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2310-03456.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2310-03827, author = {Sneha Muppalla and Shan Jia and Siwei Lyu}, title = {Integrating Audio-Visual Features for Multimodal Deepfake Detection}, journal = {CoRR}, volume = {abs/2310.03827}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2310.03827}, doi = {10.48550/ARXIV.2310.03827}, eprinttype = {arXiv}, eprint = {2310.03827}, timestamp = {Thu, 19 Oct 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2310-03827.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/vi/WangSWMZL22, author = {Lei Wang and Guodao Sun and Yunchao Wang and Ji Ma and Xiaomin Zhao and Ronghua Liang}, title = {AFExplorer: Visual analysis and interactive selection of audio features}, journal = {Vis. Informatics}, volume = {6}, number = {1}, pages = {47--55}, year = {2022}, url = {https://doi.org/10.1016/j.visinf.2022.02.003}, doi = {10.1016/J.VISINF.2022.02.003}, timestamp = {Thu, 02 Jun 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/vi/WangSWMZL22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ialp/LuDLZZSWY22, author = {Shangjun Lu and Xiaoxia Du and Juan Liu and Yu{-}Mei Zhang and Shaofeng Zhao and Rongfeng Su and Lan Wang and Nan Yan}, editor = {Rong Tong and Yanfeng Lu and Minghui Dong and Wengao Gong and Haizhou Li}, title = {A New Method for Predicting Severity Level of Dysarthric Speech Based on Joint Feature-Sample Selection using Audio-Visual Data}, booktitle = {International Conference on Asian Language Processing, {IALP} 2022, Singapore, October 27-28, 2022}, pages = {190--195}, publisher = {{IEEE}}, year = {2022}, url = {https://doi.org/10.1109/IALP57159.2022.9961300}, doi = {10.1109/IALP57159.2022.9961300}, timestamp = {Fri, 09 Dec 2022 16:21:50 +0100}, biburl = {https://dblp.org/rec/conf/ialp/LuDLZZSWY22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/HongKYR22, author = {Joanna Hong and Minsu Kim and Daehun Yoo and Yong Man Ro}, editor = {Hanseok Ko and John H. L. Hansen}, title = {Visual Context-driven Audio Feature Enhancement for Robust End-to-End Audio-Visual Speech Recognition}, booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, Korea, 18-22 September 2022}, pages = {2838--2842}, publisher = {{ISCA}}, year = {2022}, url = {https://doi.org/10.21437/Interspeech.2022-11311}, doi = {10.21437/INTERSPEECH.2022-11311}, timestamp = {Wed, 21 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/HongKYR22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/WeiHYLD22, author = {Jie Wei and Guanyu Hu and Xinyu Yang and Anh Tuan Luu and Yizhuo Dong}, editor = {Hanseok Ko and John H. L. Hansen}, title = {Audio-Visual Domain Adaptation Feature Fusion for Speech Emotion Recognition}, booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, Korea, 18-22 September 2022}, pages = {1988--1992}, publisher = {{ISCA}}, year = {2022}, url = {https://doi.org/10.21437/Interspeech.2022-703}, doi = {10.21437/INTERSPEECH.2022-703}, timestamp = {Wed, 21 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/WeiHYLD22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/mwscas/FanarasTAM22, author = {Konstantinos Fanaras and Antonios Tragoudaras and Charalampos Antoniadis and Yehia Massoud}, title = {Audio-visual Speaker Diarization: Improved Voice Activity Detection with {CNN} based Feature Extraction}, booktitle = {65th {IEEE} International Midwest Symposium on Circuits and Systems, {MWSCAS} 2022, Fukuoka, Japan, August 7-10, 2022}, pages = {1--4}, publisher = {{IEEE}}, year = {2022}, url = {https://doi.org/10.1109/MWSCAS54063.2022.9859533}, doi = {10.1109/MWSCAS54063.2022.9859533}, timestamp = {Mon, 01 May 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/mwscas/FanarasTAM22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/sdf/GeeromsAKKVM22, author = {Warre Geeroms and Gianni Allebosch and Stijn Kindt and Loubna Kadri and Peter Veelaert and Nilesh Madhu}, title = {Audio-Visual Active Speaker Identification: {A} comparison of dense image-based features and sparse facial landmark-based features}, booktitle = {Sensor Data Fusion: Trends, Solutions, Applications, {SDF} 2022, Bonn, Germany, October 12-14, 2022}, pages = {1--6}, publisher = {{IEEE}}, year = {2022}, url = {https://doi.org/10.1109/SDF55338.2022.9931697}, doi = {10.1109/SDF55338.2022.9931697}, timestamp = {Fri, 18 Nov 2022 20:51:17 +0100}, biburl = {https://dblp.org/rec/conf/sdf/GeeromsAKKVM22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2203-02655, author = {Junwen Xiong and Peng Zhang and Lei Xie and Wei Huang and Yufei Zha and Yanning Zhang}, title = {Audio-visual speech separation based on joint feature representation with cross-modal attention}, journal = {CoRR}, volume = {abs/2203.02655}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2203.02655}, doi = {10.48550/ARXIV.2203.02655}, eprinttype = {arXiv}, eprint = {2203.02655}, timestamp = {Fri, 10 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2203-02655.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2203-15183, author = {Jialu Li and Mark Hasegawa{-}Johnson and Nancy L. McElwain}, title = {Visualizations of Complex Sequences of Family-Infant Vocalizations Using Bag-of-Audio-Words Approach Based on Wav2vec 2.0 Features}, journal = {CoRR}, volume = {abs/2203.15183}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2203.15183}, doi = {10.48550/ARXIV.2203.15183}, eprinttype = {arXiv}, eprint = {2203.15183}, timestamp = {Tue, 05 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2203-15183.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2207-06020, author = {Joanna Hong and Minsu Kim and Daehun Yoo and Yong Man Ro}, title = {Visual Context-driven Audio Feature Enhancement for Robust End-to-End Audio-Visual Speech Recognition}, journal = {CoRR}, volume = {abs/2207.06020}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2207.06020}, doi = {10.48550/ARXIV.2207.06020}, eprinttype = {arXiv}, eprint = {2207.06020}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2207-06020.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/access/IshikawaHS21, author = {Reina Ishikawa and Ryo Hachiuma and Hideo Saito}, title = {Self-Supervised Audio-Visual Feature Learning for Single-Modal Incremental Terrain Type Clustering}, journal = {{IEEE} Access}, volume = {9}, pages = {64346--64357}, year = {2021}, url = {https://doi.org/10.1109/ACCESS.2021.3075582}, doi = {10.1109/ACCESS.2021.3075582}, timestamp = {Sun, 16 May 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/access/IshikawaHS21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/eetct/MehtaKK21, author = {Pooja Mehta and Sahil Kaswan and Jaspreet Kaur}, title = {An Enhanced {ANN-HMM} based classification of video recordings with the aid of audio-visual feature extraction}, journal = {{EAI} Endorsed Trans. Creative Technol.}, volume = {8}, number = {28}, pages = {e1}, year = {2021}, url = {https://doi.org/10.4108/eai.31-3-2021.169172}, doi = {10.4108/EAI.31-3-2021.169172}, timestamp = {Mon, 25 Oct 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/eetct/MehtaKK21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/entropy/WilkesVM21, author = {Ben Wilkes and Igor Vatolkin and Heinrich M{\"{u}}ller}, title = {Statistical and Visual Analysis of Audio, Text, and Image Features for Multi-Modal Music Genre Recognition}, journal = {Entropy}, volume = {23}, number = {11}, pages = {1502}, year = {2021}, url = {https://doi.org/10.3390/e23111502}, doi = {10.3390/E23111502}, timestamp = {Wed, 15 Dec 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/entropy/WilkesVM21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/fi/IsobeTHGN21, author = {Shinnosuke Isobe and Satoshi Tamura and Satoru Hayamizu and Yuuto Gotoh and Masaki Nose}, title = {Multi-Angle Lipreading with Angle Classification-Based Feature Extraction and Its Application to Audio-Visual Speech Recognition}, journal = {Future Internet}, volume = {13}, number = {7}, pages = {182}, year = {2021}, url = {https://doi.org/10.3390/fi13070182}, doi = {10.3390/FI13070182}, timestamp = {Thu, 12 Aug 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/fi/IsobeTHGN21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ijcini/HandaAK21, author = {Anand Handa and Rashi Agarwal and Narendra Kohli}, title = {Audio-Visual Emotion Recognition System Using Multi-Modal Features}, journal = {Int. J. Cogn. Informatics Nat. Intell.}, volume = {15}, number = {4}, pages = {1--14}, year = {2021}, url = {https://doi.org/10.4018/ijcini.20211001.oa34}, doi = {10.4018/IJCINI.20211001.OA34}, timestamp = {Tue, 28 Feb 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/ijcini/HandaAK21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/sensors/QuanMN21, author = {Jingyu Quan and Yoshihiro Miyake and Takayuki Nozawa}, title = {Incorporating Interpersonal Synchronization Features for Automatic Emotion Recognition from Visual and Audio Data during Communication}, journal = {Sensors}, volume = {21}, number = {16}, pages = {5317}, year = {2021}, url = {https://doi.org/10.3390/s21165317}, doi = {10.3390/S21165317}, timestamp = {Wed, 01 Sep 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/sensors/QuanMN21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/sivp/DebnathR21, author = {Saswati Debnath and Pinki Roy}, title = {Appearance and shape-based hybrid visual feature extraction: toward audio-visual automatic speech recognition}, journal = {Signal Image Video Process.}, volume = {15}, number = {1}, pages = {25--32}, year = {2021}, url = {https://doi.org/10.1007/s11760-020-01717-0}, doi = {10.1007/S11760-020-01717-0}, timestamp = {Thu, 14 Oct 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/sivp/DebnathR21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/speech/FarhoudiS21, author = {Zeinab Farhoudi and Saeed Setayeshi}, title = {Fusion of deep learning features with mixture of brain emotional learning for audio-visual emotion recognition}, journal = {Speech Commun.}, volume = {127}, pages = {92--103}, year = {2021}, url = {https://doi.org/10.1016/j.specom.2020.12.001}, doi = {10.1016/J.SPECOM.2020.12.001}, timestamp = {Mon, 05 Feb 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/speech/FarhoudiS21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/cascon/ZhouGMSRY21, author = {Jingmin Zhou and Adam Gariba and Vida Movahedi and Mariah Martin Shein and Andre Rosa and Ruiqi Yu}, editor = {Vio Onut and Farhana H. Zulkernine}, title = {Multi-label video categorization using visual and audio transcript features}, booktitle = {{CASCON} '21: Proceedings of the 31st Annual International Conference on Computer Science and Software Engineering, Toronto, Ontario, Canada, November 22 - 25, 2021}, pages = {23--32}, publisher = {{ACM}}, year = {2021}, url = {https://dl.acm.org/doi/10.5555/3507788.3507793}, doi = {10.5555/3507788.3507793}, timestamp = {Fri, 29 Jul 2022 16:56:38 +0200}, biburl = {https://dblp.org/rec/conf/cascon/ZhouGMSRY21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/cis/Wang21, author = {Yufei Wang}, title = {Efficient Audio-Visual Speaker Recognition Via Deep Multi-Modal Feature Fusion}, booktitle = {17th International Conference on Computational Intelligence and Security {CIS} 2021, Chengdu, China, November 19-22, 2021}, pages = {99--103}, publisher = {{IEEE}}, year = {2021}, url = {https://doi.org/10.1109/CIS54983.2021.00029}, doi = {10.1109/CIS54983.2021.00029}, timestamp = {Wed, 16 Feb 2022 17:26:48 +0100}, biburl = {https://dblp.org/rec/conf/cis/Wang21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icassp/ShetuCH21, author = {Shrishti Saha Shetu and Soumitro Chakrabarty and Emanu{\"{e}}l Anco Peter Habets}, title = {An Empirical Study of Visual Features for {DNN} Based Audio-Visual Speech Enhancement in Multi-Talker Environments}, booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2021, Toronto, ON, Canada, June 6-11, 2021}, pages = {8418--8422}, publisher = {{IEEE}}, year = {2021}, url = {https://doi.org/10.1109/ICASSP39728.2021.9414000}, doi = {10.1109/ICASSP39728.2021.9414000}, timestamp = {Fri, 09 Jul 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icassp/ShetuCH21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcnn/ChengTQJ21, author = {Huijie Cheng and Yun Tie and Lin Qi and Cong Jin}, title = {Context-Aware Based Visual-Audio Feature Fusion for Emotion Recognition}, booktitle = {International Joint Conference on Neural Networks, {IJCNN} 2021, Shenzhen, China, July 18-22, 2021}, pages = {1--8}, publisher = {{IEEE}}, year = {2021}, url = {https://doi.org/10.1109/IJCNN52387.2021.9533473}, doi = {10.1109/IJCNN52387.2021.9533473}, timestamp = {Thu, 17 Mar 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/ijcnn/ChengTQJ21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcnn/ZhangXSHQX21, author = {Peng Zhang and Jiaming Xu and Jing Shi and Yunzhe Hao and Lei Qin and Bo Xu}, title = {Audio-Visual Speech Separation with Visual Features Enhanced by Adversarial Training}, booktitle = {International Joint Conference on Neural Networks, {IJCNN} 2021, Shenzhen, China, July 18-22, 2021}, pages = {1--8}, publisher = {{IEEE}}, year = {2021}, url = {https://doi.org/10.1109/IJCNN52387.2021.9533660}, doi = {10.1109/IJCNN52387.2021.9533660}, timestamp = {Wed, 29 Sep 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ijcnn/ZhangXSHQX21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/mm/TaoPDQS021, author = {Ruijie Tao and Zexu Pan and Rohan Kumar Das and Xinyuan Qian and Mike Zheng Shou and Haizhou Li}, editor = {Heng Tao Shen and Yueting Zhuang and John R. Smith and Yang Yang and Pablo C{\'{e}}sar and Florian Metze and Balakrishnan Prabhakaran}, title = {Is Someone Speaking?: Exploring Long-term Temporal Features for Audio-visual Active Speaker Detection}, booktitle = {{MM} '21: {ACM} Multimedia Conference, Virtual Event, China, October 20 - 24, 2021}, pages = {3927--3935}, publisher = {{ACM}}, year = {2021}, url = {https://doi.org/10.1145/3474085.3475587}, doi = {10.1145/3474085.3475587}, timestamp = {Mon, 22 Apr 2024 21:24:20 +0200}, biburl = {https://dblp.org/rec/conf/mm/TaoPDQS021.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/visapp/HuMOM21, author = {Feiyan Hu and Eva Mohedano and Noel E. O'Connor and Kevin McGuinness}, editor = {Giovanni Maria Farinella and Petia Radeva and Jos{\'{e}} Braz and Kadi Bouatouch}, title = {Temporal Bilinear Encoding Network of Audio-visual Features at Low Sampling Rates}, booktitle = {Proceedings of the 16th International Joint Conference on Computer Vision, Imaging and Computer Graphics Theory and Applications, {VISIGRAPP} 2021, Volume 5: VISAPP, Online Streaming, February 8-10, 2021}, pages = {637--644}, publisher = {{SCITEPRESS}}, year = {2021}, url = {https://doi.org/10.5220/0010337306370644}, doi = {10.5220/0010337306370644}, timestamp = {Tue, 06 Jun 2023 14:58:00 +0200}, biburl = {https://dblp.org/rec/conf/visapp/HuMOM21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/wacv/MazumderSPN21, author = {Pratik Mazumder and Pravendra Singh and Kranti Kumar Parida and Vinay P. Namboodiri}, title = {AVGZSLNet: Audio-Visual Generalized Zero-Shot Learning by Reconstructing Label Features from Multi-Modal Embeddings}, booktitle = {{IEEE} Winter Conference on Applications of Computer Vision, {WACV} 2021, Waikoloa, HI, USA, January 3-8, 2021}, pages = {3089--3098}, publisher = {{IEEE}}, year = {2021}, url = {https://doi.org/10.1109/WACV48630.2021.00313}, doi = {10.1109/WACV48630.2021.00313}, timestamp = {Mon, 03 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/wacv/MazumderSPN21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2101-05975, author = {Xinmeng Xu and Dongxiang Xu and Jie Jia and Yang Wang and Binbin Chen}, title = {{MFFCN:} Multi-layer Feature Fusion Convolution Network for Audio-visual Speech Enhancement}, journal = {CoRR}, volume = {abs/2101.05975}, year = {2021}, url = {https://arxiv.org/abs/2101.05975}, eprinttype = {arXiv}, eprint = {2101.05975}, timestamp = {Fri, 20 Jan 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2101-05975.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2101-06268, author = {Xinmeng Xu and Yang Wang and Dongxiang Xu and Cong Zhang and Yiyuan Peng and Jie Jia and Binbin Chen}, title = {Attentional Multi-layer Feature Fusion Convolution Network for Audio-visual Speech Enhancement}, journal = {CoRR}, volume = {abs/2101.06268}, year = {2021}, url = {https://arxiv.org/abs/2101.06268}, eprinttype = {arXiv}, eprint = {2101.06268}, timestamp = {Fri, 20 Jan 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2101-06268.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2103-14189, author = {Taylan K. Sen and Gazi Naven and Luke Gerstner and Daryl Bagley and Raiyan Abdul Baten and Wasifur Rahman and Md. Kamrul Hasan and Kurtis Glenn Haut and Abdullah Al Mamun and Samiha Samrose and Anne Solbu and R. Eric Barnes and Mark G. Frank and Ehsan Hoque}, title = {{DBATES:} DataBase of Audio features, Text, and visual Expressions in competitive debate Speeches}, journal = {CoRR}, volume = {abs/2103.14189}, year = {2021}, url = {https://arxiv.org/abs/2103.14189}, eprinttype = {arXiv}, eprint = {2103.14189}, timestamp = {Thu, 17 Nov 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2103-14189.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2107-06592, author = {Ruijie Tao and Zexu Pan and Rohan Kumar Das and Xinyuan Qian and Mike Zheng Shou and Haizhou Li}, title = {Is Someone Speaking? Exploring Long-term Temporal Features for Audio-visual Active Speaker Detection}, journal = {CoRR}, volume = {abs/2107.06592}, year = {2021}, url = {https://arxiv.org/abs/2107.06592}, eprinttype = {arXiv}, eprint = {2107.06592}, timestamp = {Thu, 22 Jul 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2107-06592.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/computers/FeradovMG20, author = {Firgan Feradov and Iosif Mporas and Todor Ganchev}, title = {Evaluation of Features in Detection of Dislike Responses to Audio-Visual Stimuli from {EEG} Signals}, journal = {Comput.}, volume = {9}, number = {2}, pages = {33}, year = {2020}, url = {https://doi.org/10.3390/computers9020033}, doi = {10.3390/COMPUTERS9020033}, timestamp = {Sat, 05 Sep 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/computers/FeradovMG20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ijon/HaoCLWX20, author = {Man Hao and Weihua Cao and Zhentao Liu and Min Wu and Peng Xiao}, title = {Visual-audio emotion recognition based on multi-task and ensemble learning with multiple features}, journal = {Neurocomputing}, volume = {391}, pages = {42--51}, year = {2020}, url = {https://doi.org/10.1016/j.neucom.2020.01.048}, doi = {10.1016/J.NEUCOM.2020.01.048}, timestamp = {Fri, 22 May 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ijon/HaoCLWX20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ijon/WangWH20a, author = {Zhan Wang and Lizhi Wang and Hua Huang}, title = {Joint low rank embedded multiple features learning for audio-visual emotion recognition}, journal = {Neurocomputing}, volume = {388}, pages = {324--333}, year = {2020}, url = {https://doi.org/10.1016/j.neucom.2020.01.017}, doi = {10.1016/J.NEUCOM.2020.01.017}, timestamp = {Mon, 05 Feb 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/ijon/WangWH20a.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/npl/KhanSAT20, author = {Abdullah Aman Khan and Jie Shao and Waqar Ali and Saifullah Tumrani}, title = {Content-Aware Summarization of Broadcast Sports Videos: An Audio-Visual Feature Extraction Approach}, journal = {Neural Process. Lett.}, volume = {52}, number = {3}, pages = {1945--1968}, year = {2020}, url = {https://doi.org/10.1007/s11063-020-10200-3}, doi = {10.1007/S11063-020-10200-3}, timestamp = {Tue, 28 Mar 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/npl/KhanSAT20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/sensors/JamilFRUBFM20, author = {Sonain Jamil and Fawad and MuhibUr Rahman and Amin Ullah and Salman Badnava and Masoud Forsat and Seyed Sajad Mirjavadi}, title = {Malicious {UAV} Detection Using Integrated Audio and Visual Features for Public Safety Applications}, journal = {Sensors}, volume = {20}, number = {14}, pages = {3923}, year = {2020}, url = {https://doi.org/10.3390/s20143923}, doi = {10.3390/S20143923}, timestamp = {Thu, 13 Aug 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/sensors/JamilFRUBFM20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/taslp/SuLWY20, author = {Rongfeng Su and Xunying Liu and Lan Wang and Jingzhou Yang}, title = {Cross-Domain Deep Visual Feature Generation for Mandarin Audio-Visual Speech Recognition}, journal = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.}, volume = {28}, pages = {185--197}, year = {2020}, url = {https://doi.org/10.1109/TASLP.2019.2950602}, doi = {10.1109/TASLP.2019.2950602}, timestamp = {Fri, 13 Mar 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/taslp/SuLWY20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/fgr/HormannMKR20, author = {Stefan H{\"{o}}rmann and Abdul Moiz and Martin Knoche and Gerhard Rigoll}, title = {Attention Fusion for Audio-Visual Person Verification Using Multi-Scale Features}, booktitle = {15th {IEEE} International Conference on Automatic Face and Gesture Recognition, {FG} 2020, Buenos Aires, Argentina, November 16-20, 2020}, pages = {281--285}, publisher = {{IEEE}}, year = {2020}, url = {https://doi.org/10.1109/FG47880.2020.00074}, doi = {10.1109/FG47880.2020.00074}, timestamp = {Tue, 18 Jul 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/fgr/HormannMKR20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icip/LeeL20, author = {Hansol Lee and Gyemin Lee}, title = {Hierarchical Model For Long-Length Video Summarization With Adversarially Enhanced Audio/Visual Features}, booktitle = {{IEEE} International Conference on Image Processing, {ICIP} 2020, Abu Dhabi, United Arab Emirates, October 25-28, 2020}, pages = {723--727}, publisher = {{IEEE}}, year = {2020}, url = {https://doi.org/10.1109/ICIP40778.2020.9190636}, doi = {10.1109/ICIP40778.2020.9190636}, timestamp = {Tue, 07 May 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icip/LeeL20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icip/LiuCS20, author = {Hong Liu and Zhengyan Chen and Wei Shi}, title = {Robust Audio-Visual Mandarin Speech Recognition Based On Adaptive Decision Fusion And Tone Features}, booktitle = {{IEEE} International Conference on Image Processing, {ICIP} 2020, Abu Dhabi, United Arab Emirates, October 25-28, 2020}, pages = {1381--1385}, publisher = {{IEEE}}, year = {2020}, url = {https://doi.org/10.1109/ICIP40778.2020.9190894}, doi = {10.1109/ICIP40778.2020.9190894}, timestamp = {Wed, 08 Dec 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icip/LiuCS20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icpr/IshikawaHKS20, author = {Reina Ishikawa and Ryo Hachiuma and Akiyoshi Kurobe and Hideo Saito}, title = {Single-modal Incremental Terrain Clustering from Self-Supervised Audio-Visual Feature Learning}, booktitle = {25th International Conference on Pattern Recognition, {ICPR} 2020, Virtual Event / Milan, Italy, January 10-15, 2021}, pages = {9399--9406}, publisher = {{IEEE}}, year = {2020}, url = {https://doi.org/10.1109/ICPR48806.2021.9412638}, doi = {10.1109/ICPR48806.2021.9412638}, timestamp = {Fri, 07 May 2021 08:42:33 +0200}, biburl = {https://dblp.org/rec/conf/icpr/IshikawaHKS20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icpr/LiuXY20, author = {Hong Liu and Wanlu Xu and Bing Yang}, title = {Audio-Visual Speech Recognition Using {A} Two-Step Feature Fusion Strategy}, booktitle = {25th International Conference on Pattern Recognition, {ICPR} 2020, Virtual Event / Milan, Italy, January 10-15, 2021}, pages = {1896--1903}, publisher = {{IEEE}}, year = {2020}, url = {https://doi.org/10.1109/ICPR48806.2021.9412454}, doi = {10.1109/ICPR48806.2021.9412454}, timestamp = {Tue, 14 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icpr/LiuXY20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/mapr/VoNDNDNNL020, author = {Hung{-}Quoc Vo and Dung{-}Minh Nguyen and Tien Do and Vinh{-}Tiep Nguyen and Vu{-}Minh{-}Hieu Dang and Nhat{-}Duy Nguyen and Thanh Duc Ngo and Duy{-}Dinh Le and Shin'ichi Satoh}, title = {Searching For Desired Person Doing Desired Action based on Visual and Audio Feature in Large Scale Video Database}, booktitle = {International Conference on Multimedia Analysis and Pattern Recognition, {MAPR} 2020, Hanoi, Vietnam, October 8-9, 2020}, pages = {1--6}, publisher = {{IEEE}}, year = {2020}, url = {https://doi.org/10.1109/MAPR49794.2020.9237781}, doi = {10.1109/MAPR49794.2020.9237781}, timestamp = {Wed, 03 Nov 2021 16:47:39 +0100}, biburl = {https://dblp.org/rec/conf/mapr/VoNDNDNNL020.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/mmm/GuoZYFH020, author = {Xiaona Guo and Wei Zhong and Long Ye and Li Fang and Yan Heng and Qin Zhang}, editor = {Yong Man Ro and Wen{-}Huang Cheng and Junmo Kim and Wei{-}Ta Chu and Peng Cui and Jung{-}Woo Choi and Min{-}Chun Hu and Wesley De Neve}, title = {Global Affective Video Content Regression Based on Complementary Audio-Visual Features}, booktitle = {MultiMedia Modeling - 26th International Conference, {MMM} 2020, Daejeon, South Korea, January 5-8, 2020, Proceedings, Part {II}}, series = {Lecture Notes in Computer Science}, volume = {11962}, pages = {540--550}, publisher = {Springer}, year = {2020}, url = {https://doi.org/10.1007/978-3-030-37734-2\_44}, doi = {10.1007/978-3-030-37734-2\_44}, timestamp = {Mon, 09 Nov 2020 15:46:42 +0100}, biburl = {https://dblp.org/rec/conf/mmm/GuoZYFH020.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/qomex/MartinezHF20, author = {Helard Becerra Martinez and Andrew Hines and Myl{\`{e}}ne C. Q. Farias}, title = {How Deep is Your Encoder: An Analysis of Features Descriptors for an Autoencoder-Based Audio-Visual Quality Metric}, booktitle = {Twelfth International Conference on Quality of Multimedia Experience, QoMEX 2020, Athlone, Ireland, May 26-28, 2020}, pages = {1--6}, publisher = {{IEEE}}, year = {2020}, url = {https://doi.org/10.1109/QoMEX48832.2020.9123142}, doi = {10.1109/QOMEX48832.2020.9123142}, timestamp = {Thu, 14 Oct 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/qomex/MartinezHF20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/visapp/ChebbiJ20, author = {Safa Chebbi and Sofia Ben Jebara}, editor = {Giovanni Maria Farinella and Petia Radeva and Jos{\'{e}} Braz}, title = {An Audio-Visual based Feature Level Fusion Approach Applied to Deception Detection}, booktitle = {Proceedings of the 15th International Joint Conference on Computer Vision, Imaging and Computer Graphics Theory and Applications, {VISIGRAPP} 2020, Volume 4: VISAPP, Valletta, Malta, February 27-29, 2020}, pages = {197--205}, publisher = {{SCITEPRESS}}, year = {2020}, url = {https://doi.org/10.5220/0008896201970205}, doi = {10.5220/0008896201970205}, timestamp = {Thu, 16 Apr 2020 15:04:56 +0200}, biburl = {https://dblp.org/rec/conf/visapp/ChebbiJ20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2003-11100, author = {Helard Becerra Martinez and Andrew Hines and Myl{\`{e}}ne C. Q. Farias}, title = {How deep is your encoder: an analysis of features descriptors for an autoencoder-based audio-visual quality metric}, journal = {CoRR}, volume = {abs/2003.11100}, year = {2020}, url = {https://arxiv.org/abs/2003.11100}, eprinttype = {arXiv}, eprint = {2003.11100}, timestamp = {Wed, 01 Apr 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2003-11100.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2004-12031, author = {Zakaria Aldeneh and Anushree Prasanna Kumar and Barry{-}John Theobald and Erik Marchi and Sachin Kajarekar and Devang Naik and Ahmed Hussen Abdelaziz}, title = {Self-supervised Learning of Visual Speech Features with Audiovisual Speech Enhancement}, journal = {CoRR}, volume = {abs/2004.12031}, year = {2020}, url = {https://arxiv.org/abs/2004.12031}, eprinttype = {arXiv}, eprint = {2004.12031}, timestamp = {Tue, 28 Apr 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2004-12031.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2005-13402, author = {Pratik Mazumder and Pravendra Singh and Kranti Kumar Parida and Vinay P. Namboodiri}, title = {AVGZSLNet: Audio-Visual Generalized Zero-Shot Learning by Reconstructing Label Features from Multi-Modal Embeddings}, journal = {CoRR}, volume = {abs/2005.13402}, year = {2020}, url = {https://arxiv.org/abs/2005.13402}, eprinttype = {arXiv}, eprint = {2005.13402}, timestamp = {Thu, 28 May 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2005-13402.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2011-04359, author = {Shrishti Saha Shetu and Soumitro Chakrabarty and Emanu{\"{e}}l Anco Peter Habets}, title = {An Empirical Study of Visual Features for {DNN} based Audio-Visual Speech Enhancement in Multi-talker Environments}, journal = {CoRR}, volume = {abs/2011.04359}, year = {2020}, url = {https://arxiv.org/abs/2011.04359}, eprinttype = {arXiv}, eprint = {2011.04359}, timestamp = {Thu, 12 Nov 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2011-04359.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2012-10283, author = {Feiyan Hu and Eva Mohedano and Noel E. O'Connor and Kevin McGuinness}, title = {Temporal Bilinear Encoding Network of Audio-Visual Features at Low Sampling Rates}, journal = {CoRR}, volume = {abs/2012.10283}, year = {2020}, url = {https://arxiv.org/abs/2012.10283}, eprinttype = {arXiv}, eprint = {2012.10283}, timestamp = {Mon, 04 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2012-10283.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/dsp/SaudiKA19, author = {Ali S. Saudi and Mahmoud I. Khalil and Hazem M. Abbas}, title = {Improved features and dynamic stream weight adaption for robust Audio-Visual Speech Recognition framework}, journal = {Digit. Signal Process.}, volume = {89}, pages = {17--29}, year = {2019}, url = {https://doi.org/10.1016/j.dsp.2019.02.016}, doi = {10.1016/J.DSP.2019.02.016}, timestamp = {Mon, 26 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/dsp/SaudiKA19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ieicet/NaranchimegZA19, author = {Bold Naranchimeg and Chao Zhang and Takuya Akashi}, title = {Cross-Domain Deep Feature Combination for Bird Species Classification with Audio-Visual Data}, journal = {{IEICE} Trans. Inf. Syst.}, volume = {102-D}, number = {10}, pages = {2033--2042}, year = {2019}, url = {https://doi.org/10.1587/transinf.2018EDP7383}, doi = {10.1587/TRANSINF.2018EDP7383}, timestamp = {Thu, 23 Jun 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ieicet/NaranchimegZA19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/iet-ipr/JavedIMMA19, author = {Ali Javed and Aun Irtaza and Hafiz Malik and Muhammad Tariq Mahmood and Syed Muhammad Adnan Shah}, title = {Multimodal framework based on audio-visual features for summarisation of cricket videos}, journal = {{IET} Image Process.}, volume = {13}, number = {4}, pages = {615--622}, year = {2019}, url = {https://doi.org/10.1049/iet-ipr.2018.5589}, doi = {10.1049/IET-IPR.2018.5589}, timestamp = {Tue, 21 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/iet-ipr/JavedIMMA19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/mta/DhirajBG19, author = {Dhiraj and Rohit Biswas and Nischay Ghattamaraju}, title = {An effective analysis of deep learning based approaches for audio based feature extraction and its visualization}, journal = {Multim. Tools Appl.}, volume = {78}, number = {17}, pages = {23949--23972}, year = {2019}, url = {https://doi.org/10.1007/s11042-018-6706-x}, doi = {10.1007/S11042-018-6706-X}, timestamp = {Mon, 11 May 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/mta/DhirajBG19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/globalsip/HaiderPAL19, author = {Fasih Haider and Senja Pollak and Pierre Albert and Saturnino Luz}, title = {Extracting Audio-Visual Features for Emotion Recognition Through Active Feature Selection}, booktitle = {2019 {IEEE} Global Conference on Signal and Information Processing, GlobalSIP 2019, Ottawa, ON, Canada, November 11-14, 2019}, pages = {1--5}, publisher = {{IEEE}}, year = {2019}, url = {https://doi.org/10.1109/GlobalSIP45357.2019.8969360}, doi = {10.1109/GLOBALSIP45357.2019.8969360}, timestamp = {Wed, 07 Dec 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/globalsip/HaiderPAL19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icassp/HoriA0WHCMCLDEB19, author = {Chiori Hori and Huda AlAmri and Jue Wang and Gordon Wichern and Takaaki Hori and Anoop Cherian and Tim K. Marks and Vincent Cartillier and Raphael Gontijo Lopes and Abhishek Das and Irfan Essa and Dhruv Batra and Devi Parikh}, title = {End-to-end Audio Visual Scene-aware Dialog Using Multimodal Attention-based Video Features}, booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2019, Brighton, United Kingdom, May 12-17, 2019}, pages = {2352--2356}, publisher = {{IEEE}}, year = {2019}, url = {https://doi.org/10.1109/ICASSP.2019.8682583}, doi = {10.1109/ICASSP.2019.8682583}, timestamp = {Wed, 16 Oct 2019 14:14:52 +0200}, biburl = {https://dblp.org/rec/conf/icassp/HoriA0WHCMCLDEB19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iccvw/LeeL19, author = {Hansol Lee and Gyemin Lee}, title = {Summarizing Long-Length Videos with GAN-Enhanced Audio/Visual Features}, booktitle = {2019 {IEEE/CVF} International Conference on Computer Vision Workshops, {ICCV} Workshops 2019, Seoul, Korea (South), October 27-28, 2019}, pages = {3727--3731}, publisher = {{IEEE}}, year = {2019}, url = {https://doi.org/10.1109/ICCVW.2019.00462}, doi = {10.1109/ICCVW.2019.00462}, timestamp = {Tue, 07 May 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iccvw/LeeL19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iftc/GuoZYF019, author = {Xiaona Guo and Wei Zhong and Long Ye and Li Fang and Qin Zhang}, editor = {Guangtao Zhai and Jun Zhou and Hua Yang and Ping An and Xiaokang Yang}, title = {Affective Video Content Analysis Based on Two Compact Audio-Visual Features}, booktitle = {Digital {TV} and Wireless Multimedia Communication - 16th International Forum, {IFTC} 2019, Shanghai, China, September 19-20, 2019, Revised Selected Papers}, series = {Communications in Computer and Information Science}, volume = {1181}, pages = {355--364}, publisher = {Springer}, year = {2019}, url = {https://doi.org/10.1007/978-981-15-3341-9\_29}, doi = {10.1007/978-981-15-3341-9\_29}, timestamp = {Wed, 11 Mar 2020 14:23:07 +0100}, biburl = {https://dblp.org/rec/conf/iftc/GuoZYF019.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/KumarOSHN19, author = {Shachi H. Kumar and Eda Okur and Saurav Sahay and Jonathan Huang and Lama Nachman}, title = {Leveraging Topics and Audio Features with Multimodal Attention for Audio Visual Scene-Aware Dialog}, booktitle = {Visually Grounded Interaction and Language (ViGIL), NeurIPS 2019 Workshop, Vancouver, Canada, December 13, 2019}, year = {2019}, url = {https://vigilworkshop.github.io/static/papers/39.pdf}, timestamp = {Thu, 12 Mar 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/KumarOSHN19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1912-10131, author = {Shachi H. Kumar and Eda Okur and Saurav Sahay and Jonathan Huang and Lama Nachman}, title = {Leveraging Topics and Audio Features with Multimodal Attention for Audio Visual Scene-Aware Dialog}, journal = {CoRR}, volume = {abs/1912.10131}, year = {2019}, url = {http://arxiv.org/abs/1912.10131}, eprinttype = {arXiv}, eprint = {1912.10131}, timestamp = {Fri, 03 Jan 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1912-10131.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1912-10132, author = {Shachi H. Kumar and Eda Okur and Saurav Sahay and Jonathan Huang and Lama Nachman}, title = {Exploring Context, Attention and Audio Features for Audio Visual Scene-Aware Dialog}, journal = {CoRR}, volume = {abs/1912.10132}, year = {2019}, url = {http://arxiv.org/abs/1912.10132}, eprinttype = {arXiv}, eprint = {1912.10132}, timestamp = {Fri, 03 Jan 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1912-10132.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/dsp/RahmaniAS18, author = {Mohammad Hasan Rahmani and Farshad Almasganj and Seyyed Ali Seyyedsalehi}, title = {Audio-visual feature fusion via deep neural networks for automatic speech recognition}, journal = {Digit. Signal Process.}, volume = {82}, pages = {54--63}, year = {2018}, url = {https://doi.org/10.1016/j.dsp.2018.06.004}, doi = {10.1016/J.DSP.2018.06.004}, timestamp = {Sat, 30 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/dsp/RahmaniAS18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/tcsv/ZhangZHGT18, author = {Shiqing Zhang and Shiliang Zhang and Tiejun Huang and Wen Gao and Qi Tian}, title = {Learning Affective Features With a Hybrid Deep Model for Audio-Visual Emotion Recognition}, journal = {{IEEE} Trans. Circuits Syst. Video Technol.}, volume = {28}, number = {10}, pages = {3030--3043}, year = {2018}, url = {https://doi.org/10.1109/TCSVT.2017.2719043}, doi = {10.1109/TCSVT.2017.2719043}, timestamp = {Thu, 02 Dec 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/tcsv/ZhangZHGT18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/tmm/BeyanCBM18, author = {Cigdem Beyan and Francesca Capozzi and Cristina Becchio and Vittorio Murino}, title = {Prediction of the Leadership Style of an Emergent Leader Using Audio and Visual Nonverbal Features}, journal = {{IEEE} Trans. Multim.}, volume = {20}, number = {2}, pages = {441--456}, year = {2018}, url = {https://doi.org/10.1109/TMM.2017.2740062}, doi = {10.1109/TMM.2017.2740062}, timestamp = {Thu, 01 Oct 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/tmm/BeyanCBM18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/eccv/OwensE18, author = {Andrew Owens and Alexei A. Efros}, editor = {Vittorio Ferrari and Martial Hebert and Cristian Sminchisescu and Yair Weiss}, title = {Audio-Visual Scene Analysis with Self-Supervised Multisensory Features}, booktitle = {Computer Vision - {ECCV} 2018 - 15th European Conference, Munich, Germany, September 8-14, 2018, Proceedings, Part {VI}}, series = {Lecture Notes in Computer Science}, volume = {11210}, pages = {639--658}, publisher = {Springer}, year = {2018}, url = {https://doi.org/10.1007/978-3-030-01231-1\_39}, doi = {10.1007/978-3-030-01231-1\_39}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/eccv/OwensE18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/gcce/HaruyamaTOH18, author = {Tomoki Haruyama and Sho Takahashi and Takahiro Ogawa and Miki Haseyama}, title = {Estimation of Important Scenes in Soccer Videos Based on Collaborative Use of Audio-Visual {CNN} Features}, booktitle = {{IEEE} 7th Global Conference on Consumer Electronics, {GCCE} 2018, Nara, Japan, October 9-12, 2018}, pages = {710--711}, publisher = {{IEEE}}, year = {2018}, url = {https://doi.org/10.1109/GCCE.2018.8574727}, doi = {10.1109/GCCE.2018.8574727}, timestamp = {Wed, 12 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/gcce/HaruyamaTOH18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/huc/MatsudaFTAYM18, author = {Yuki Matsuda and Dmitrii Fedotov and Yuta Takahashi and Yutaka Arakawa and Keiichi Yasumoto and Wolfgang Minker}, title = {EmoTour: Multimodal Emotion Recognition using Physiological and Audio-Visual Features}, booktitle = {Proceedings of the 2018 {ACM} International Joint Conference and 2018 International Symposium on Pervasive and Ubiquitous Computing and Wearable Computers, UbiComp/ISWC 2018 Adjunct, Singapore, October 08-12, 2018}, pages = {946--951}, publisher = {{ACM}}, year = {2018}, url = {https://doi.org/10.1145/3267305.3267687}, doi = {10.1145/3267305.3267687}, timestamp = {Mon, 28 Aug 2023 21:17:48 +0200}, biburl = {https://dblp.org/rec/conf/huc/MatsudaFTAYM18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icassp/AkhtarBF18, author = {Zahid Akhtar and Stefany Bedoya and Tiago H. Falk}, title = {Improved Audio-Visual Laughter Detection Via Multi-Scale Multi-Resolution Image Texture Features and Classifier Fusion}, booktitle = {2018 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2018, Calgary, AB, Canada, April 15-20, 2018}, pages = {3106--3110}, publisher = {{IEEE}}, year = {2018}, url = {https://doi.org/10.1109/ICASSP.2018.8461611}, doi = {10.1109/ICASSP.2018.8461611}, timestamp = {Tue, 21 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icassp/AkhtarBF18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/SuLW18, author = {Rongfeng Su and Xunying Liu and Lan Wang}, editor = {B. Yegnanarayana}, title = {Semi-supervised Cross-domain Visual Feature Learning for Audio-Visual Broadcast Speech Transcription}, booktitle = {Interspeech 2018, 19th Annual Conference of the International Speech Communication Association, Hyderabad, India, 2-6 September 2018}, pages = {3509--3513}, publisher = {{ISCA}}, year = {2018}, url = {https://doi.org/10.21437/Interspeech.2018-1063}, doi = {10.21437/INTERSPEECH.2018-1063}, timestamp = {Fri, 21 May 2021 08:16:43 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/SuLW18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/TamuraHEHT18, author = {Satoshi Tamura and Kento Horio and Hajime Endo and Satoru Hayamizu and Tomoki Toda}, editor = {B. Yegnanarayana}, title = {Audio-visual Voice Conversion Using Deep Canonical Correlation Analysis for Deep Bottleneck Features}, booktitle = {Interspeech 2018, 19th Annual Conference of the International Speech Communication Association, Hyderabad, India, 2-6 September 2018}, pages = {2469--2473}, publisher = {{ISCA}}, year = {2018}, url = {https://doi.org/10.21437/Interspeech.2018-2286}, doi = {10.21437/INTERSPEECH.2018-2286}, timestamp = {Fri, 09 Apr 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/TamuraHEHT18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/prcv/HouWW18, author = {Congcong Hou and Xiaoyu Wu and Ge Wang}, editor = {Jian{-}Huang Lai and Cheng{-}Lin Liu and Xilin Chen and Jie Zhou and Tieniu Tan and Nanning Zheng and Hongbin Zha}, title = {End-to-End Bloody Video Recognition by Audio-Visual Feature Fusion}, booktitle = {Pattern Recognition and Computer Vision - First Chinese Conference, {PRCV} 2018, Guangzhou, China, November 23-26, 2018, Proceedings, Part {I}}, series = {Lecture Notes in Computer Science}, volume = {11256}, pages = {501--510}, publisher = {Springer}, year = {2018}, url = {https://doi.org/10.1007/978-3-030-03398-9\_43}, doi = {10.1007/978-3-030-03398-9\_43}, timestamp = {Fri, 03 Dec 2021 12:20:53 +0100}, biburl = {https://dblp.org/rec/conf/prcv/HouWW18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1804-03641, author = {Andrew Owens and Alexei A. Efros}, title = {Audio-Visual Scene Analysis with Self-Supervised Multisensory Features}, journal = {CoRR}, volume = {abs/1804.03641}, year = {2018}, url = {http://arxiv.org/abs/1804.03641}, eprinttype = {arXiv}, eprint = {1804.03641}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1804-03641.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1805-00625, author = {Didan Deng and Yuqian Zhou and Jimin Pi and Bertram E. Shi}, title = {Multimodal Utterance-level Affect Analysis using Visual, Audio and Text Features}, journal = {CoRR}, volume = {abs/1805.00625}, year = {2018}, url = {http://arxiv.org/abs/1805.00625}, eprinttype = {arXiv}, eprint = {1805.00625}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1805-00625.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1806-08409, author = {Chiori Hori and Huda AlAmri and Jue Wang and Gordon Wichern and Takaaki Hori and Anoop Cherian and Tim K. Marks and Vincent Cartillier and Raphael Gontijo Lopes and Abhishek Das and Irfan Essa and Dhruv Batra and Devi Parikh}, title = {End-to-End Audio Visual Scene-Aware Dialog using Multimodal Attention-Based Video Features}, journal = {CoRR}, volume = {abs/1806.08409}, year = {2018}, url = {http://arxiv.org/abs/1806.08409}, eprinttype = {arXiv}, eprint = {1806.08409}, timestamp = {Thu, 31 Jan 2019 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1806-08409.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1807-00612, author = {Mehmet Ali Arabaci and Fatih {\"{O}}zkan and Elif S{\"{u}}rer and Peter Jancovic and Alptekin Temizel}, title = {Multi-modal Egocentric Activity Recognition using Audio-Visual Features}, journal = {CoRR}, volume = {abs/1807.00612}, year = {2018}, url = {http://arxiv.org/abs/1807.00612}, eprinttype = {arXiv}, eprint = {1807.00612}, timestamp = {Mon, 17 May 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1807-00612.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1811-10199, author = {Bold Naranchimeg and Chao Zhang and Takuya Akashi}, title = {Cross-domain Deep Feature Combination for Bird Species Classification with Audio-visual Data}, journal = {CoRR}, volume = {abs/1811.10199}, year = {2018}, url = {http://arxiv.org/abs/1811.10199}, eprinttype = {arXiv}, eprint = {1811.10199}, timestamp = {Tue, 14 Jan 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1811-10199.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1812-08407, author = {Shachi H. Kumar and Eda Okur and Saurav Sahay and Juan Jose Alvarado Leanos and Jonathan Huang and Lama Nachman}, title = {Context, Attention and Audio Feature Explorations for Audio Visual Scene-Aware Dialog}, journal = {CoRR}, volume = {abs/1812.08407}, year = {2018}, url = {http://arxiv.org/abs/1812.08407}, eprinttype = {arXiv}, eprint = {1812.08407}, timestamp = {Wed, 02 Jan 2019 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1812-08407.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/computation/PapakostasSGSSM17, author = {Michalis Papakostas and Evaggelos Spyrou and Theodoros Giannakopoulos and Giorgos Siantikos and Dimitrios Sgouropoulos and Phivos Mylonas and Fillia Makedon}, title = {Deep Visual Attributes vs. Hand-Crafted Audio Features on Multidomain Speech Emotion Recognition}, journal = {Comput.}, volume = {5}, number = {2}, pages = {26}, year = {2017}, url = {https://doi.org/10.3390/computation5020026}, doi = {10.3390/COMPUTATION5020026}, timestamp = {Thu, 01 Oct 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/computation/PapakostasSGSSM17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jrm/NakadaiK17, author = {Kazuhiro Nakadai and Tomoaki Koiwa}, title = {Psychologically-Inspired Audio-Visual Speech Recognition Using Coarse Speech Recognition and Missing Feature Theory}, journal = {J. Robotics Mechatronics}, volume = {29}, number = {1}, pages = {105--113}, year = {2017}, url = {https://doi.org/10.20965/jrm.2017.p0105}, doi = {10.20965/JRM.2017.P0105}, timestamp = {Wed, 01 Apr 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jrm/NakadaiK17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/mta/GharavianBS17, author = {Davood Gharavian and Mahdi Bejani and Mansour Sheikhan}, title = {Audio-visual emotion recognition using {FCBF} feature selection method and particle swarm optimization for fuzzy {ARTMAP} neural networks}, journal = {Multim. Tools Appl.}, volume = {76}, number = {2}, pages = {2331--2352}, year = {2017}, url = {https://doi.org/10.1007/s11042-015-3180-6}, doi = {10.1007/S11042-015-3180-6}, timestamp = {Mon, 26 Oct 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/mta/GharavianBS17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/prl/NanniCLSB17, author = {Loris Nanni and Yandre M. G. Costa and Diego Rafael Lucio and Carlos Nascimento Silla Jr. and Sheryl Brahnam}, title = {Combining visual and acoustic features for audio classification tasks}, journal = {Pattern Recognit. Lett.}, volume = {88}, pages = {49--56}, year = {2017}, url = {https://doi.org/10.1016/j.patrec.2017.01.013}, doi = {10.1016/J.PATREC.2017.01.013}, timestamp = {Sat, 22 Feb 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/prl/NanniCLSB17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/speech/SuiTB17, author = {Chao Sui and Roberto Togneri and Mohammed Bennamoun}, title = {A cascade gray-stereo visual feature extraction method for visual and audio-visual speech recognition}, journal = {Speech Commun.}, volume = {90}, pages = {26--38}, year = {2017}, url = {https://doi.org/10.1016/j.specom.2017.01.005}, doi = {10.1016/J.SPECOM.2017.01.005}, timestamp = {Sat, 22 Feb 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/speech/SuiTB17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ccbr/LiuLFZD17, author = {Yu{-}Hang Liu and Xin Liu and Wentao Fan and Bineng Zhong and Ji{-}Xiang Du}, editor = {Jie Zhou and Yunhong Wang and Zhenan Sun and Yong Xu and Linlin Shen and Jianjiang Feng and Shiguang Shan and Yu Qiao and Zhenhua Guo and Shiqi Yu}, title = {Efficient Audio-Visual Speaker Recognition via Deep Heterogeneous Feature Fusion}, booktitle = {Biometric Recognition - 12th Chinese Conference, {CCBR} 2017, Shenzhen, China, October 28-29, 2017, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {10568}, pages = {575--583}, publisher = {Springer}, year = {2017}, url = {https://doi.org/10.1007/978-3-319-69923-3\_62}, doi = {10.1007/978-3-319-69923-3\_62}, timestamp = {Tue, 04 Oct 2022 18:09:04 +0200}, biburl = {https://dblp.org/rec/conf/ccbr/LiuLFZD17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ciss/MendatWRNA17, author = {Daniel R. Mendat and James E. West and Sudarshan Ramenahalli and Ernst Niebur and Andreas G. Andreou}, title = {Audio-Visual beamforming with the Eigenmike microphone array an omni-camera and cognitive auditory features}, booktitle = {51st Annual Conference on Information Sciences and Systems, {CISS} 2017, Baltimore, MD, USA, March 22-24, 2017}, pages = {1--4}, publisher = {{IEEE}}, year = {2017}, url = {https://doi.org/10.1109/CISS.2017.7926180}, doi = {10.1109/CISS.2017.7926180}, timestamp = {Sun, 25 Jul 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ciss/MendatWRNA17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/vcip/BokshiTBH17, author = {Mangona Bokshi and Fei Tao and Carlos Busso and John H. L. Hansen}, title = {Assessment and classification of singing quality based on audio-visual features}, booktitle = {2017 {IEEE} Visual Communications and Image Processing, {VCIP} 2017, St. Petersburg, FL, USA, December 10-13, 2017}, pages = {1--4}, publisher = {{IEEE}}, year = {2017}, url = {https://doi.org/10.1109/VCIP.2017.8305078}, doi = {10.1109/VCIP.2017.8305078}, timestamp = {Thu, 14 Oct 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/vcip/BokshiTBH17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ijst/BiswasSC16, author = {Astik Biswas and Prakash Kumar Sahu and Mahesh Chandra}, title = {Multiple cameras audio visual speech recognition using active appearance model visual features in car environment}, journal = {Int. J. Speech Technol.}, volume = {19}, number = {1}, pages = {159--171}, year = {2016}, url = {https://doi.org/10.1007/s10772-016-9332-x}, doi = {10.1007/S10772-016-9332-X}, timestamp = {Mon, 26 Oct 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/ijst/BiswasSC16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/eccv/SubramaniamPMBM16, author = {Arulkumar Subramaniam and Vismay Patel and Ashish Mishra and Prashanth Balasubramanian and Anurag Mittal}, editor = {Gang Hua and Herv{\'{e}} J{\'{e}}gou}, title = {Bi-modal First Impressions Recognition Using Temporally Ordered Deep Audio and Stochastic Visual Features}, booktitle = {Computer Vision - {ECCV} 2016 Workshops - Amsterdam, The Netherlands, October 8-10 and 15-16, 2016, Proceedings, Part {III}}, series = {Lecture Notes in Computer Science}, volume = {9915}, pages = {337--348}, year = {2016}, url = {https://doi.org/10.1007/978-3-319-49409-8\_27}, doi = {10.1007/978-3-319-49409-8\_27}, timestamp = {Sun, 02 Jun 2019 21:17:49 +0200}, biburl = {https://dblp.org/rec/conf/eccv/SubramaniamPMBM16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icwsm/PereiraPPBD16, author = {Mois{\'{e}}s Henrique Ramos Pereira and Fl{\'{a}}vio Luis Cardeal P{\'{a}}dua and Adriano C{\'{e}}sar Machado Pereira and Fabr{\'{\i}}cio Benevenuto and Daniel Hasan Dalip}, title = {Fusing Audio, Textual, and Visual Features for Sentiment Analysis of News Videos}, booktitle = {Proceedings of the Tenth International Conference on Web and Social Media, Cologne, Germany, May 17-20, 2016}, pages = {659--662}, publisher = {{AAAI} Press}, year = {2016}, url = {http://www.aaai.org/ocs/index.php/ICWSM/ICWSM16/paper/view/13144}, timestamp = {Fri, 05 Feb 2021 11:07:46 +0100}, biburl = {https://dblp.org/rec/conf/icwsm/PereiraPPBD16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/Heckmann16, author = {Martin Heckmann}, editor = {Nelson Morgan}, title = {Feature-Level Decision Fusion for Audio-Visual Word Prominence Detection}, booktitle = {Interspeech 2016, 17th Annual Conference of the International Speech Communication Association, San Francisco, CA, USA, September 8-12, 2016}, pages = {575--579}, publisher = {{ISCA}}, year = {2016}, url = {https://doi.org/10.21437/Interspeech.2016-163}, doi = {10.21437/INTERSPEECH.2016-163}, timestamp = {Mon, 26 Jun 2023 16:43:56 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/Heckmann16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/TakashimaATAMON16, author = {Yuki Takashima and Ryo Aihara and Tetsuya Takiguchi and Yasuo Ariki and Nobuyuki Mitani and Kiyohiro Omori and Kaoru Nakazono}, editor = {Nelson Morgan}, title = {Audio-Visual Speech Recognition Using Bimodal-Trained Bottleneck Features for a Person with Severe Hearing Loss}, booktitle = {Interspeech 2016, 17th Annual Conference of the International Speech Communication Association, San Francisco, CA, USA, September 8-12, 2016}, pages = {277--281}, publisher = {{ISCA}}, year = {2016}, url = {https://doi.org/10.21437/Interspeech.2016-721}, doi = {10.21437/INTERSPEECH.2016-721}, timestamp = {Fri, 29 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/interspeech/TakashimaATAMON16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/mldm/MuhammadD16, author = {Atta Muhammad and Sher Muhammad Daudpota}, editor = {Petra Perner}, title = {Content Based Identification of Talk Show Videos Using Audio Visual Features}, booktitle = {Machine Learning and Data Mining in Pattern Recognition - 12th International Conference, {MLDM} 2016, New York, NY, USA, July 16-21, 2016, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {9729}, pages = {267--283}, publisher = {Springer}, year = {2016}, url = {https://doi.org/10.1007/978-3-319-41920-6\_20}, doi = {10.1007/978-3-319-41920-6\_20}, timestamp = {Wed, 07 Dec 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/mldm/MuhammadD16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/sccc/LucioMC16, author = {Diego Rafael Lucio and Yandre Maldonado e Gomes da Costa}, editor = {Claudio Cubillos and Hern{\'{a}}n Astudillo}, title = {Bird species classification using visual and acoustic features extracted from audio signal}, booktitle = {35th International Conference of the Chilean Computer Science Society, {SCCC} 2016, Valpara{\'{\i}}so, Chile, October 10-14, 2016}, pages = {1--12}, publisher = {{IEEE}}, year = {2016}, url = {https://doi.org/10.1109/SCCC.2016.7836063}, doi = {10.1109/SCCC.2016.7836063}, timestamp = {Wed, 16 Oct 2019 14:14:56 +0200}, biburl = {https://dblp.org/rec/conf/sccc/LucioMC16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/sigcse/DokeP16, author = {Abhay Doke and Niranjan Pedanekar}, editor = {Carl Alphonce and Jodi L. Tims and Michael E. Caspersen and Stephen H. Edwards}, title = {Lights, Camera, but No Action: Exploring Affective Audio-Visual Features of Educational Videos (Abstract Only)}, booktitle = {Proceedings of the 47th {ACM} Technical Symposium on Computing Science Education, {SIGCSE} 2016, Memphis, TN, USA, March 02 - 05, 2016}, pages = {686}, publisher = {{ACM}}, year = {2016}, url = {https://doi.org/10.1145/2839509.2850535}, doi = {10.1145/2839509.2850535}, timestamp = {Mon, 13 Dec 2021 09:32:31 +0100}, biburl = {https://dblp.org/rec/conf/sigcse/DokeP16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/tsp/PalecekC16, author = {Karel Palecek and Josef Chaloupka}, title = {Depth-based features in audio-visual speech recognition}, booktitle = {39th International Conference on Telecommunications and Signal Processing, {TSP} 2016, Vienna, Austria, June 27-29, 2016}, pages = {303--306}, publisher = {{IEEE}}, year = {2016}, url = {https://doi.org/10.1109/TSP.2016.7760884}, doi = {10.1109/TSP.2016.7760884}, timestamp = {Mon, 05 Feb 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/tsp/PalecekC16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/PereiraPPBD16, author = {Mois{\'{e}}s H. R. Pereira and Fl{\'{a}}vio L. C. P{\'{a}}dua and Adriano C. M. Pereira and Fabr{\'{\i}}cio Benevenuto and Daniel Hasan Dalip}, title = {Fusing Audio, Textual and Visual Features for Sentiment Analysis of News Videos}, journal = {CoRR}, volume = {abs/1604.02612}, year = {2016}, url = {http://arxiv.org/abs/1604.02612}, eprinttype = {arXiv}, eprint = {1604.02612}, timestamp = {Mon, 12 Aug 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/PereiraPPBD16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/SubramaniamPMBM16, author = {Arulkumar Subramaniam and Vismay Patel and Ashish Mishra and Prashanth Balasubramanian and Anurag Mittal}, title = {Bi-modal First Impressions Recognition using Temporally Ordered Deep Audio and Stochastic Visual Features}, journal = {CoRR}, volume = {abs/1610.10048}, year = {2016}, url = {http://arxiv.org/abs/1610.10048}, eprinttype = {arXiv}, eprint = {1610.10048}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/SubramaniamPMBM16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ijst/BordeVMY15, author = {Prashant L. Borde and Amarsinh Varpe and Ramesh R. Manza and Pravin L. Yannawar}, title = {Recognition of isolated words using Zernike and {MFCC} features for audio visual speech recognition}, journal = {Int. J. Speech Technol.}, volume = {18}, number = {2}, pages = {167--175}, year = {2015}, url = {https://doi.org/10.1007/s10772-014-9257-1}, doi = {10.1007/S10772-014-9257-1}, timestamp = {Mon, 08 Jun 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ijst/BordeVMY15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/taffco/HanJH0L15, author = {Junwei Han and Xiang Ji and Xintao Hu and Lei Guo and Tianming Liu}, title = {Arousal Recognition Using Audio-Visual Features and FMRI-Based Brain Response}, journal = {{IEEE} Trans. Affect. Comput.}, volume = {6}, number = {4}, pages = {337--347}, year = {2015}, url = {https://doi.org/10.1109/TAFFC.2015.2411280}, doi = {10.1109/TAFFC.2015.2411280}, timestamp = {Mon, 19 Feb 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/taffco/HanJH0L15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/apsipa/TamuraNKOITH15, author = {Satoshi Tamura and Hiroshi Ninomiya and Norihide Kitaoka and Shin Osuga and Yurie Iribe and Kazuya Takeda and Satoru Hayamizu}, title = {Audio-visual speech recognition using deep bottleneck features and high-performance lipreading}, booktitle = {Asia-Pacific Signal and Information Processing Association Annual Summit and Conference, {APSIPA} 2015, Hong Kong, December 16-19, 2015}, pages = {575--582}, publisher = {{IEEE}}, year = {2015}, url = {https://doi.org/10.1109/APSIPA.2015.7415335}, doi = {10.1109/APSIPA.2015.7415335}, timestamp = {Wed, 16 Oct 2019 14:14:55 +0200}, biburl = {https://dblp.org/rec/conf/apsipa/TamuraNKOITH15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/caip/CastroMG15, author = {Francisco M. Castro and Manuel J. Mar{\'{\i}}n{-}Jim{\'{e}}nez and Nicol{\'{a}}s Guil}, editor = {George Azzopardi and Nicolai Petkov}, title = {Empirical Study of Audio-Visual Features Fusion for Gait Recognition}, booktitle = {Computer Analysis of Images and Patterns - 16th International Conference, {CAIP} 2015, Valletta, Malta, September 2-4, 2015 Proceedings, Part {I}}, series = {Lecture Notes in Computer Science}, volume = {9256}, pages = {727--739}, publisher = {Springer}, year = {2015}, url = {https://doi.org/10.1007/978-3-319-23192-1\_61}, doi = {10.1007/978-3-319-23192-1\_61}, timestamp = {Tue, 14 May 2019 10:00:53 +0200}, biburl = {https://dblp.org/rec/conf/caip/CastroMG15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/cores/ForczmanskiM15, author = {Pawel Forczmanski and Tomasz Maka}, editor = {Robert Burduk and Konrad Jackowski and Marek Kurzynski and Michal Wozniak and Andrzej Zolnierek}, title = {Investigating Combinations of Visual Audio Features and Distance Metrics in the Problem of Audio Classification}, booktitle = {Proceedings of the 9th International Conference on Computer Recognition Systems {CORES} 2015, Wroclaw, Poland, 25-27 May 2015}, series = {Advances in Intelligent Systems and Computing}, volume = {403}, pages = {733--744}, publisher = {Springer}, year = {2015}, url = {https://doi.org/10.1007/978-3-319-26227-7\_69}, doi = {10.1007/978-3-319-26227-7\_69}, timestamp = {Sat, 09 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/cores/ForczmanskiM15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ecir/SchindlerR15, author = {Alexander Schindler and Andreas Rauber}, editor = {Allan Hanbury and Gabriella Kazai and Andreas Rauber and Norbert Fuhr}, title = {An Audio-Visual Approach to Music Genre Classification through Affective Color Features}, booktitle = {Advances in Information Retrieval - 37th European Conference on {IR} Research, {ECIR} 2015, Vienna, Austria, March 29 - April 2, 2015. Proceedings}, series = {Lecture Notes in Computer Science}, volume = {9022}, pages = {61--67}, year = {2015}, url = {https://doi.org/10.1007/978-3-319-16354-3\_8}, doi = {10.1007/978-3-319-16354-3\_8}, timestamp = {Tue, 14 May 2019 10:00:37 +0200}, biburl = {https://dblp.org/rec/conf/ecir/SchindlerR15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icdsp/TahirCMDDMT15, author = {Yasir Tahir and Debsubhra Chakraborty and Tomasz Maszczyk and Shoko Dauwels and Justin Dauwels and Nadia Magnenat{-}Thalmann and Daniel Thalmann}, title = {Real-time sociometrics from audio-visual features for two-person dialogs}, booktitle = {2015 {IEEE} International Conference on Digital Signal Processing, {DSP} 2015, Singapore, July 21-24, 2015}, pages = {823--827}, publisher = {{IEEE}}, year = {2015}, url = {https://doi.org/10.1109/ICDSP.2015.7251991}, doi = {10.1109/ICDSP.2015.7251991}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icdsp/TahirCMDDMT15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icinco/SidorovSIM15, author = {Maxim Sidorov and Evgenii Sopov and Ilia Ivanov and Wolfgang Minker}, editor = {Joaquim Filipe and Kurosh Madani and Oleg Yu. Gusikhin and Jurek Z. Sasiadek}, title = {Feature and Decision Level Audio-visual Data Fusion in Emotion Recognition Problem}, booktitle = {{ICINCO} 2015 - Proceedings of the 12th International Conference on Informatics in Control, Automation and Robotics, Volume 2, Colmar, Alsace, France, 21-23 July, 2015}, pages = {246--251}, publisher = {SciTePress}, year = {2015}, url = {https://doi.org/10.5220/0005527002460251}, doi = {10.5220/0005527002460251}, timestamp = {Mon, 09 Aug 2021 17:01:45 +0200}, biburl = {https://dblp.org/rec/conf/icinco/SidorovSIM15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/CornuM15, author = {Thomas Le Cornu and Ben Milner}, title = {Reconstructing intelligible audio speech from visual speech features}, booktitle = {{INTERSPEECH} 2015, 16th Annual Conference of the International Speech Communication Association, Dresden, Germany, September 6-10, 2015}, pages = {3355--3359}, publisher = {{ISCA}}, year = {2015}, url = {https://doi.org/10.21437/Interspeech.2015-139}, doi = {10.21437/INTERSPEECH.2015-139}, timestamp = {Fri, 23 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/CornuM15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/NinomiyaKTIT15, author = {Hiroshi Ninomiya and Norihide Kitaoka and Satoshi Tamura and Yurie Iribe and Kazuya Takeda}, title = {Integration of deep bottleneck features for audio-visual speech recognition}, booktitle = {{INTERSPEECH} 2015, 16th Annual Conference of the International Speech Communication Association, Dresden, Germany, September 6-10, 2015}, pages = {563--567}, publisher = {{ISCA}}, year = {2015}, url = {https://doi.org/10.21437/Interspeech.2015-204}, doi = {10.21437/INTERSPEECH.2015-204}, timestamp = {Fri, 23 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/NinomiyaKTIT15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/mediaeval/NishiIS15, author = {Fumito Nishi and Nakamasa Inoue and Koichi Shinoda}, editor = {Martha A. Larson and Bogdan Ionescu and Mats Sj{\"{o}}berg and Xavier Anguera and Johann Poignant and Michael Riegler and Maria Eskevich and Claudia Hauff and Richard F. E. Sutcliffe and Gareth J. F. Jones and Yi{-}Hsuan Yang and Mohammad Soleymani and Symeon Papadopoulos}, title = {Combining Audio Features and Visual I-Vector @ MediaEval 2015 Multimodal Person Discovery in Broadcast {TV}}, booktitle = {Working Notes Proceedings of the MediaEval 2015 Workshop, Wurzen, Germany, September 14-15, 2015}, series = {{CEUR} Workshop Proceedings}, volume = {1436}, publisher = {CEUR-WS.org}, year = {2015}, url = {https://ceur-ws.org/Vol-1436/Paper39.pdf}, timestamp = {Fri, 10 Mar 2023 16:22:12 +0100}, biburl = {https://dblp.org/rec/conf/mediaeval/NishiIS15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/retis/BiswasSBC15, author = {Astik Biswas and Prakash Kumar Sahu and Anirban Bhowmick and Mahesh Chandra}, title = {VidTIMIT audio visual phoneme recognition using {AAM} visual features and human auditory motivated acoustic wavelet features}, booktitle = {2nd {IEEE} International Conference on Recent Trends in Information Systems, ReTIS 2015, Kolkata, India, July 9-11, 2015}, pages = {428--433}, publisher = {{IEEE}}, year = {2015}, url = {https://doi.org/10.1109/ReTIS.2015.7232917}, doi = {10.1109/RETIS.2015.7232917}, timestamp = {Sun, 25 Oct 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/retis/BiswasSBC15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/acisc/IslamS14, author = {Md. Rabiul Islam and Md. Abdus Sobhan}, title = {Feature Fusion Based Audio-Visual Speaker Identification Using Hidden Markov Model under Different Lighting Variations}, journal = {Appl. Comput. Intell. Soft Comput.}, volume = {2014}, pages = {831830:1--831830:7}, year = {2014}, url = {https://doi.org/10.1155/2014/831830}, doi = {10.1155/2014/831830}, timestamp = {Wed, 22 Jul 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/acisc/IslamS14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/fgcs/LiuYXT14, author = {Yizhi Liu and Ying Yang and Hongtao Xie and Sheng Tang}, title = {Fusing audio vocabulary with visual features for pornographic video detection}, journal = {Future Gener. Comput. Syst.}, volume = {31}, pages = {69--76}, year = {2014}, url = {https://doi.org/10.1016/j.future.2012.08.012}, doi = {10.1016/J.FUTURE.2012.08.012}, timestamp = {Wed, 19 Feb 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/fgcs/LiuYXT14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/isci/Rinaldi14, author = {Antonio M. Rinaldi}, title = {A multimedia ontology model based on linguistic properties and audio-visual features}, journal = {Inf. Sci.}, volume = {277}, pages = {234--246}, year = {2014}, url = {https://doi.org/10.1016/j.ins.2014.02.017}, doi = {10.1016/J.INS.2014.02.017}, timestamp = {Tue, 06 Jun 2017 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/isci/Rinaldi14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/cse/LiXLZB14, author = {Chen Li and Yuxiang Xie and Xidao Luan and Kaichao Zhang and Liang Bai}, editor = {Xingang Liu and Didier El Baz and Ching{-}Hsien Hsu and Kai Kang and Weifeng Chen}, title = {Automatic Movie Summarization Based on the Visual-Audio Features}, booktitle = {17th {IEEE} International Conference on Computational Science and Engineering, {CSE} 2014, Chengdu, China, December 19-21, 2014}, pages = {1758--1761}, publisher = {{IEEE} Computer Society}, year = {2014}, url = {https://doi.org/10.1109/CSE.2014.322}, doi = {10.1109/CSE.2014.322}, timestamp = {Fri, 24 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/cse/LiXLZB14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/euvip/MilaniCTTA14, author = {Simone Milani and Luca Cuccovillo and Marco Tagliasacchi and Stefano Tubaro and Patrick Aichroth}, title = {Video camera identification using audio-visual features}, booktitle = {5th European Workshop on Visual Information Processing, {EUVIP} 2014, Villetaneuse, Paris, France, December 10-12, 2014}, pages = {1--6}, publisher = {{IEEE}}, year = {2014}, url = {https://doi.org/10.1109/EUVIP.2014.7018382}, doi = {10.1109/EUVIP.2014.7018382}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/euvip/MilaniCTTA14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icassp/SawadaTTH14, author = {Kohei Sawada and Masanori Takehara and Satoshi Tamura and Satoru Hayamizu}, title = {Audio-visual voice conversion using noise-robust features}, booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2014, Florence, Italy, May 4-9, 2014}, pages = {7899--7903}, publisher = {{IEEE}}, year = {2014}, url = {https://doi.org/10.1109/ICASSP.2014.6855138}, doi = {10.1109/ICASSP.2014.6855138}, timestamp = {Wed, 16 Oct 2019 14:14:52 +0200}, biburl = {https://dblp.org/rec/conf/icassp/SawadaTTH14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icpram/KacheleGZMS14, author = {Markus K{\"{a}}chele and Michael Glodek and Dimitri Zharkov and Sascha Meudt and Friedhelm Schwenker}, editor = {Maria De Marsico and Antoine Tabbone and Ana L. N. Fred}, title = {Fusion of Audio-visual Features using Hierarchical Classifier Systems for the Recognition of Affective States and the State of Depression}, booktitle = {{ICPRAM} 2014 - Proceedings of the 3rd International Conference on Pattern Recognition Applications and Methods, ESEO, Angers, Loire Valley, France, 6-8 March, 2014}, pages = {671--678}, publisher = {SciTePress}, year = {2014}, url = {https://doi.org/10.5220/0004828606710678}, doi = {10.5220/0004828606710678}, timestamp = {Tue, 23 May 2017 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icpram/KacheleGZMS14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/intcompsymp/ZengC14, author = {Yi{-}Chong Zeng and Wen{-}Tsung Chang}, editor = {William Cheng{-}Chung Chu and Han{-}Chieh Chao and Stephen Jenn{-}Hwa Yang}, title = {Fast Seriation of Multiple Homogeneous-content Videos Using Audio-visual Features}, booktitle = {Intelligent Systems and Applications - Proceedings of the International Computer Symposium {(ICS)} held at Taichung, Taiwan, December 12-14, 2014}, series = {Frontiers in Artificial Intelligence and Applications}, volume = {274}, pages = {1157--1166}, publisher = {{IOS} Press}, year = {2014}, url = {https://doi.org/10.3233/978-1-61499-484-8-1157}, doi = {10.3233/978-1-61499-484-8-1157}, timestamp = {Sun, 21 May 2017 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/intcompsymp/ZengC14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/Fernandez-Martinez14, author = {Fernando Fern{\'{a}}ndez Mart{\'{\i}}nez and Alejandro Hern{\'{a}}ndez{-}Garc{\'{\i}}a and Ascensi{\'{o}}n Gallardo{-}Antol{\'{\i}}n and Fernando D{\'{\i}}az{-}de{-}Mar{\'{\i}}a}, title = {Combining audio-visual features for viewers' perception classification of Youtube car commercials}, booktitle = {2nd International Workshop on Speech, Language and Audio in Multimedia, {SLAM} 2014, Penang, Malaysia, September 11-12, 2014}, pages = {14--18}, publisher = {{ISCA}}, year = {2014}, url = {http://www.isca-speech.org/archive/slam\_2014/slm4\_014.html}, timestamp = {Tue, 11 Jul 2023 11:45:03 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/Fernandez-Martinez14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/isccsp/IbrahimM14, author = {Mohd Zamri Ibrahim and David J. Mulvaney}, title = {A lip geometry approach for feature-fusion based audio-visual speech recognition}, booktitle = {6th International Symposium on Communications, Control and Signal Processing, {ISCCSP} 2014, Athens, Greece, May 21-23, 2014}, pages = {644--647}, publisher = {{IEEE}}, year = {2014}, url = {https://doi.org/10.1109/ISCCSP.2014.6877957}, doi = {10.1109/ISCCSP.2014.6877957}, timestamp = {Sun, 17 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/isccsp/IbrahimM14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/mm/EspinosaEPMAR14, author = {Humberto P{\'{e}}rez Espinosa and Hugo Jair Escalante and Luis Villase{\~{n}}or Pineda and Manuel Montes{-}y{-}G{\'{o}}mez and David Pinto Avenda{\~{n}}o and Ver{\'{o}}nica Reyes{-}Meza}, editor = {Michel F. Valstar and Bj{\"{o}}rn W. Schuller and Jarek Krajewski and Roddy Cowie and Maja Pantic}, title = {Fusing Affective Dimensions and Audio-Visual Features from Segmented Video for Depression Recognition: INAOE-BUAP's Participation at AVEC'14 Challenge}, booktitle = {Proceedings of the 4th International Workshop on Audio/Visual Emotion Challenge, {AVEC} '14, Orlando, Florida, USA, November 7, 2014}, pages = {49--55}, publisher = {{ACM}}, year = {2014}, url = {https://doi.org/10.1145/2661806.2661815}, doi = {10.1145/2661806.2661815}, timestamp = {Sat, 09 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/mm/EspinosaEPMAR14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nime/Tsiros14, author = {Augoustinos Tsiros}, editor = {Baptiste Caramiaux and Koray Tahiroglu and Rebecca Fiebrink and Atau Tanaka}, title = {Evaluating the Perceived Similarity Between Audio-Visual Features Using Corpus-Based Concatenative Synthesis}, booktitle = {14th International Conference on New Interfaces for Musical Expression, {NIME} 2014, London, United Kingdom, June 30 - July 4, 2014}, pages = {421--426}, publisher = {nime.org}, year = {2014}, url = {https://doi.org/10.5281/zenodo.1178965}, doi = {10.5281/ZENODO.1178965}, timestamp = {Tue, 04 Apr 2023 16:52:05 +0200}, biburl = {https://dblp.org/rec/conf/nime/Tsiros14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/noms/RainerT14, author = {Benjamin Rainer and Christian Timmerer}, title = {A subjective evaluation using crowdsourcing of Adaptive Media Playout utilizing audio-visual content features}, booktitle = {2014 {IEEE} Network Operations and Management Symposium, {NOMS} 2014, Krakow, Poland, May 5-9, 2014}, pages = {1--7}, publisher = {{IEEE}}, year = {2014}, url = {https://doi.org/10.1109/NOMS.2014.6838406}, doi = {10.1109/NOMS.2014.6838406}, timestamp = {Wed, 16 Oct 2019 14:14:54 +0200}, biburl = {https://dblp.org/rec/conf/noms/RainerT14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/BordeaVMY14, author = {Prashant L. Borde and Amarsinh Varpe and Ramesh R. Manza and Pravin L. Yannawar}, title = {Recognition of Isolated Words using Zernike and {MFCC} features for Audio Visual Speech Recognition}, journal = {CoRR}, volume = {abs/1407.1165}, year = {2014}, url = {http://arxiv.org/abs/1407.1165}, eprinttype = {arXiv}, eprint = {1407.1165}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/BordeaVMY14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/avsp/BarbulescuHBR13, author = {Adela Barbulescu and Thomas Hueber and G{\'{e}}rard Bailly and R{\'{e}}mi Ronfard}, editor = {Slim Ouni and Fr{\'{e}}d{\'{e}}ric Berthommier and Alexandra Jesse}, title = {Audio-visual speaker conversion using prosody features}, booktitle = {Auditory-Visual Speech Processing, {AVSP} 2013, Annecy, France, August 29 - September 1, 2013}, pages = {11--16}, publisher = {{ISCA}}, year = {2013}, url = {http://www.isca-speech.org/archive/avsp13/av13\_011.html}, timestamp = {Tue, 16 Nov 2021 11:36:14 +0100}, biburl = {https://dblp.org/rec/conf/avsp/BarbulescuHBR13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/avsp/ShenTH13, author = {Peng Shen and Satoshi Tamura and Satoru Hayamizu}, editor = {Slim Ouni and Fr{\'{e}}d{\'{e}}ric Berthommier and Alexandra Jesse}, title = {Audio-visual interaction in sparse representation features for noise robust audio-visual speech recognition}, booktitle = {Auditory-Visual Speech Processing, {AVSP} 2013, Annecy, France, August 29 - September 1, 2013}, pages = {43--48}, publisher = {{ISCA}}, year = {2013}, url = {http://www.isca-speech.org/archive/avsp13/av13\_043.html}, timestamp = {Tue, 16 Nov 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/avsp/ShenTH13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/cvpr/WangZH13, author = {Tao Wang and Zhigang Zhu and Riad I. Hammoud}, title = {Audio-Visual Feature Fusion for Vehicles Classification in a Surveillance System}, booktitle = {{IEEE} Conference on Computer Vision and Pattern Recognition, {CVPR} Workshops 2013, Portland, OR, USA, June 23-28, 2013}, pages = {381--386}, publisher = {{IEEE} Computer Society}, year = {2013}, url = {https://doi.org/10.1109/CVPRW.2013.64}, doi = {10.1109/CVPRW.2013.64}, timestamp = {Fri, 24 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/cvpr/WangZH13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/dev/FirminoT13, author = {Emiliano Firmino and Mauro Te{\'{o}}filo}, editor = {William D. Tucker and Antoine B. Bagula and Margaret Martonosi and Bhaskaran Raman}, title = {Visually impaired navigation assistant for emerging market using tactile floor, feature phone and audio descriptions}, booktitle = {Annual Symposium on Computing for Development, {ACM} DEV-4, Cape Town, South Africa - December 06 - 07, 2013}, pages = {20:1--20:2}, publisher = {{ACM}}, year = {2013}, url = {https://doi.org/10.1145/2537052.2537072}, doi = {10.1145/2537052.2537072}, timestamp = {Tue, 06 Nov 2018 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/dev/FirminoT13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icmcs/SayedelahlAK13, author = {Aya Sayedelahl and Rodrigo Araujo and Mohamed S. Kamel}, title = {Audio-visual feature-decision level fusion for spontaneous emotion estimation in speech conversations}, booktitle = {2013 {IEEE} International Conference on Multimedia and Expo Workshops, San Jose, CA, USA, July 15-19, 2013}, pages = {1--6}, publisher = {{IEEE} Computer Society}, year = {2013}, url = {https://doi.org/10.1109/ICMEW.2013.6618372}, doi = {10.1109/ICMEW.2013.6618372}, timestamp = {Fri, 24 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icmcs/SayedelahlAK13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/KhanM13, author = {Faheem Khan and Ben Milner}, editor = {Fr{\'{e}}d{\'{e}}ric Bimbot and Christophe Cerisara and C{\'{e}}cile Fougeron and Guillaume Gravier and Lori Lamel and Fran{\c{c}}ois Pellegrino and Pascal Perrier}, title = {Speaker separation using visual speech features and single-channel audio}, booktitle = {{INTERSPEECH} 2013, 14th Annual Conference of the International Speech Communication Association, Lyon, France, August 25-29, 2013}, pages = {3264--3268}, publisher = {{ISCA}}, year = {2013}, url = {https://doi.org/10.21437/Interspeech.2013-723}, doi = {10.21437/INTERSPEECH.2013-723}, timestamp = {Fri, 23 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/KhanM13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/TiippanaTVV13, author = {Kaisa Tiippana and Mikko Tiainen and Lari Vainio and Martti Vainio}, editor = {Fr{\'{e}}d{\'{e}}ric Bimbot and Christophe Cerisara and C{\'{e}}cile Fougeron and Guillaume Gravier and Lori Lamel and Fran{\c{c}}ois Pellegrino and Pascal Perrier}, title = {Acoustic and visual phonetic features in the mcgurk effect - an audiovisual speech illusion}, booktitle = {{INTERSPEECH} 2013, 14th Annual Conference of the International Speech Communication Association, Lyon, France, August 25-29, 2013}, pages = {1634--1638}, publisher = {{ISCA}}, year = {2013}, url = {https://doi.org/10.21437/Interspeech.2013-424}, doi = {10.21437/INTERSPEECH.2013-424}, timestamp = {Fri, 23 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/TiippanaTVV13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iscas/ZhangZX13, author = {Bo Zhang and Jiancheng Zou and Bo Xu}, title = {Context-dependent audio-visual and temporal features fusion for {TV} commercial detection}, booktitle = {2013 {IEEE} International Symposium on Circuits and Systems (ISCAS2013), Beijing, China, May 19-23, 2013}, pages = {5--8}, publisher = {{IEEE}}, year = {2013}, url = {https://doi.org/10.1109/ISCAS.2013.6571768}, doi = {10.1109/ISCAS.2013.6571768}, timestamp = {Wed, 16 Oct 2019 14:14:49 +0200}, biburl = {https://dblp.org/rec/conf/iscas/ZhangZX13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/mir/GodinNW13, author = {Fr{\'{e}}deric Godin and Wesley De Neve and Rik Van de Walle}, editor = {Ramesh C. Jain and Balakrishnan Prabhakaran and Marcel Worring and John R. Smith and Tat{-}Seng Chua}, title = {Towards fusion of collective knowledge and audio-visual content features for annotating broadcast video}, booktitle = {International Conference on Multimedia Retrieval, ICMR'13, Dallas, TX, USA, April 16-19, 2013}, pages = {329--332}, publisher = {{ACM}}, year = {2013}, url = {https://doi.org/10.1145/2461466.2461530}, doi = {10.1145/2461466.2461530}, timestamp = {Mon, 22 Apr 2024 21:24:20 +0200}, biburl = {https://dblp.org/rec/conf/mir/GodinNW13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/mm/RudovicPP13, author = {Ognjen Rudovic and Stavros Petridis and Maja Pantic}, editor = {Alejandro Jaimes and Nicu Sebe and Nozha Boujemaa and Daniel Gatica{-}Perez and David A. Shamma and Marcel Worring and Roger Zimmermann}, title = {Bimodal log-linear regression for fusion of audio and visual features}, booktitle = {{ACM} Multimedia Conference, {MM} '13, Barcelona, Spain, October 21-25, 2013}, pages = {789--792}, publisher = {{ACM}}, year = {2013}, url = {https://doi.org/10.1145/2502081.2502207}, doi = {10.1145/2502081.2502207}, timestamp = {Tue, 06 Nov 2018 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/mm/RudovicPP13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/mmm/ZhangFX13, author = {Bo Zhang and Bailan Feng and Bo Xu}, editor = {Shipeng Li and Abdulmotaleb El{-}Saddik and Meng Wang and Tao Mei and Nicu Sebe and Shuicheng Yan and Richang Hong and Cathal Gurrin}, title = {Fusion of Audio-Visual Features and Statistical Property for Commercial Segmentation}, booktitle = {Advances in Multimedia Modeling, 19th International Conference, {MMM} 2013, Huangshan, China, January 7-9, 2013, Proceedings, Part {I}}, series = {Lecture Notes in Computer Science}, volume = {7732}, pages = {250--260}, publisher = {Springer}, year = {2013}, url = {https://doi.org/10.1007/978-3-642-35725-1\_23}, doi = {10.1007/978-3-642-35725-1\_23}, timestamp = {Mon, 23 Nov 2020 15:58:17 +0100}, biburl = {https://dblp.org/rec/conf/mmm/ZhangFX13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/taslp/NewmanC12, author = {Jacob L. Newman and Stephen J. Cox}, title = {Language Identification Using Visual Features}, journal = {{IEEE} Trans. Speech Audio Process.}, volume = {20}, number = {7}, pages = {1936--1947}, year = {2012}, url = {https://doi.org/10.1109/TASL.2012.2191956}, doi = {10.1109/TASL.2012.2191956}, timestamp = {Sun, 17 May 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/taslp/NewmanC12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/acssc/SuiTHB12, author = {Chao Sui and Roberto Togneri and Serajul Haque and Mohammed Bennamoun}, editor = {Michael B. Matthews}, title = {Discrimination comparison between audio and visual features}, booktitle = {Conference Record of the Forty Sixth Asilomar Conference on Signals, Systems and Computers, {ACSCC} 2012, Pacific Grove, CA, USA, November 4-7, 2012}, pages = {1609--1612}, publisher = {{IEEE}}, year = {2012}, url = {https://doi.org/10.1109/ACSSC.2012.6489302}, doi = {10.1109/ACSSC.2012.6489302}, timestamp = {Sat, 19 Oct 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/acssc/SuiTHB12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/apsipa/ShenTH12, author = {Peng Shen and Satoshi Tamura and Satoru Hayamizu}, title = {Feature reconstruction using sparse imputation for noise robust audio-visual speech recognition}, booktitle = {Asia-Pacific Signal and Information Processing Association Annual Summit and Conference, {APSIPA} 2012, Hollywood, CA, USA, December 3-6, 2012}, pages = {1--4}, publisher = {{IEEE}}, year = {2012}, url = {https://ieeexplore.ieee.org/document/6411773/}, timestamp = {Sun, 08 Aug 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/apsipa/ShenTH12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iconip/AlmaadeedAA12, author = {Noor Almaadeed and Amar Aggoun and Abbes Amira}, editor = {Tingwen Huang and Zhigang Zeng and Chuandong Li and Chi{-}Sing Leung}, title = {Audio-Visual Feature Fusion for Speaker Identification}, booktitle = {Neural Information Processing - 19th International Conference, {ICONIP} 2012, Doha, Qatar, November 12-15, 2012, Proceedings, Part {I}}, series = {Lecture Notes in Computer Science}, volume = {7663}, pages = {56--67}, publisher = {Springer}, year = {2012}, url = {https://doi.org/10.1007/978-3-642-34475-6\_8}, doi = {10.1007/978-3-642-34475-6\_8}, timestamp = {Tue, 30 Jun 2020 11:04:50 +0200}, biburl = {https://dblp.org/rec/conf/iconip/AlmaadeedAA12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/slt/KashiwagiSMH12, author = {Yosuke Kashiwagi and Masayuki Suzuki and Nobuaki Minematsu and Keikichi Hirose}, title = {Audio-visual feature integration based on piecewise linear transformation for noise robust automatic speech recognition}, booktitle = {2012 {IEEE} Spoken Language Technology Workshop (SLT), Miami, FL, USA, December 2-5, 2012}, pages = {149--152}, publisher = {{IEEE}}, year = {2012}, url = {https://doi.org/10.1109/SLT.2012.6424213}, doi = {10.1109/SLT.2012.6424213}, timestamp = {Wed, 16 Oct 2019 14:14:53 +0200}, biburl = {https://dblp.org/rec/conf/slt/KashiwagiSMH12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/centeris/OliveiraAA11, author = {Rita Oliveira and Jorge Ferraz de Abreu and Ana Margarida Pisco Almeida}, editor = {Maria Manuela Cruz{-}Cunha and Jo{\~{a}}o Varaj{\~{a}}o and Philip Powell and Ricardo Martinho}, title = {An iTV Audio Description Service: Suggesting Requirements and Features for Visually Impaired Users}, booktitle = {ENTERprise Information Systems - International Conference, {CENTERIS} 2011, Vilamoura, Portugal, October 5-7, 2011, Proceedings, Part {III}}, series = {Communications in Computer and Information Science}, volume = {221}, pages = {59--68}, publisher = {Springer}, year = {2011}, url = {https://doi.org/10.1007/978-3-642-24352-3\_7}, doi = {10.1007/978-3-642-24352-3\_7}, timestamp = {Tue, 23 Apr 2024 23:18:35 +0200}, biburl = {https://dblp.org/rec/conf/centeris/OliveiraAA11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/cikm/IslamABR11, author = {Muhammad Asiful Islam and Faisal Ahmed and Yevgen Borodin and I. V. Ramakrishnan}, editor = {Craig Macdonald and Iadh Ounis and Ian Ruthven}, title = {Tightly coupling visual and linguistic features for enriching audio-based web browsing experience}, booktitle = {Proceedings of the 20th {ACM} Conference on Information and Knowledge Management, {CIKM} 2011, Glasgow, United Kingdom, October 24-28, 2011}, pages = {2085--2088}, publisher = {{ACM}}, year = {2011}, url = {https://doi.org/10.1145/2063576.2063896}, doi = {10.1145/2063576.2063896}, timestamp = {Tue, 06 Nov 2018 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/cikm/IslamABR11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/eusipco/LiuNWJC11, author = {Qingju Liu and Syed Mohsen Naqvi and Wenwu Wang and Philip J. B. Jackson and Jonathon A. Chambers}, title = {Robust feature selection for scaling ambiguity reduction in audio-visual convolutive {BSS}}, booktitle = {Proceedings of the 19th European Signal Processing Conference, {EUSIPCO} 2011, Barcelona, Spain, August 29 - Sept. 2, 2011}, pages = {1060--1064}, publisher = {{IEEE}}, year = {2011}, url = {https://ieeexplore.ieee.org/document/7074127/}, timestamp = {Thu, 05 Jan 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/eusipco/LiuNWJC11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icassp/ChandrasekharSR11, author = {Vijay Chandrasekhar and Mehmet Emre Sargin and David A. Ross}, title = {Automatic Language Identification in music videos with low level audio and visual features}, booktitle = {Proceedings of the {IEEE} International Conference on Acoustics, Speech, and Signal Processing, {ICASSP} 2011, May 22-27, 2011, Prague Congress Center, Prague, Czech Republic}, pages = {5724--5727}, publisher = {{IEEE}}, year = {2011}, url = {https://doi.org/10.1109/ICASSP.2011.5947660}, doi = {10.1109/ICASSP.2011.5947660}, timestamp = {Tue, 23 Jul 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icassp/ChandrasekharSR11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/maics/SchlittenhartWSI11, author = {Isaac Schlittenhart and Jason Winters and Kyle Springer and Atsushi Inoue}, editor = {Sofia Visa and Atsushi Inoue and Anca L. Ralescu}, title = {Toward Robust Features for Remote Audio-Visual Classroom}, booktitle = {Proceedings of The 22nd Midwest Artificial Intelligence and Cognitive Science Conference 2011, Cincinnati, Ohio, USA, April 16-17, 2011}, series = {{CEUR} Workshop Proceedings}, volume = {710}, pages = {202--207}, publisher = {CEUR-WS.org}, year = {2011}, url = {https://ceur-ws.org/Vol-710/paper38.pdf}, timestamp = {Fri, 10 Mar 2023 16:22:19 +0100}, biburl = {https://dblp.org/rec/conf/maics/SchlittenhartWSI11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/mediaeval/AcarSA11, author = {Esra Acar and Stephan Spiegel and Sahin Albayrak}, editor = {Martha A. Larson and Adam Rae and Claire{-}H{\'{e}}l{\`{e}}ne Demarty and Christoph Kofler and Florian Metze and Rapha{\"{e}}l Troncy and Vasileios Mezaris and Gareth J. F. Jones}, title = {MediaEval 2011 Affect Task: Violent Scene Detection combining audio and visual Features with {SVM}}, booktitle = {Working Notes Proceedings of the MediaEval 2011 Workshop, Santa Croce in Fossabanda, Pisa, Italy, September 1-2, 2011}, series = {{CEUR} Workshop Proceedings}, volume = {807}, publisher = {CEUR-WS.org}, year = {2011}, url = {https://ceur-ws.org/Vol-807/acar\_TUB\_Violence\_me11wn.pdf}, timestamp = {Fri, 10 Mar 2023 16:22:12 +0100}, biburl = {https://dblp.org/rec/conf/mediaeval/AcarSA11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/psivt/KomaiAT11, author = {Yuto Komai and Yasuo Ariki and Tetsuya Takiguchi}, editor = {Yo{-}Sung Ho}, title = {Audio-Visual Speech Recognition Based on {AAM} Parameter and Phoneme Analysis of Visual Feature}, booktitle = {Advances in Image and Video Technology - 5th Pacific Rim Symposium, {PSIVT} 2011, Gwangju, South Korea, November 20-23, 2011, Proceedings, Part {I}}, series = {Lecture Notes in Computer Science}, volume = {7087}, pages = {97--108}, publisher = {Springer}, year = {2011}, url = {https://doi.org/10.1007/978-3-642-25367-6\_9}, doi = {10.1007/978-3-642-25367-6\_9}, timestamp = {Tue, 14 May 2019 10:00:47 +0200}, biburl = {https://dblp.org/rec/conf/psivt/KomaiAT11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/trecvid/PereraOLKBLMLMH11, author = {A. G. Amitha Perera and Sangmin Oh and Matthew J. Leotta and Ilseo Kim and Byungki Byun and Chin{-}Hui Lee and Scott McCloskey and Jingchen Liu and Ben Miller and Zhi Feng Huang and Arash Vahdat and Weilong Yang and Greg Mori and Kevin Tang and Daphne Koller and Li Fei{-}Fei and Kang Li and Gang Chen and Jason J. Corso and Yun Fu and Rohini K. Srihari}, editor = {Paul Over and George Awad and Jonathan G. Fiscus and Brian Antonishek and Martial Michel and Alan F. Smeaton and Wessel Kraaij and Georges Qu{\'{e}}not}, title = {{GENIE} {TRECVID} 2011 Multimedia Event Detection: Late-Fusion Approaches to Combine Multiple Audio-Visual features}, booktitle = {2011 {TREC} Video Retrieval Evaluation, {TRECVID} 2011, Gaithersburg, MD, USA, December 5-7, 2011}, publisher = {National Institute of Standards and Technology {(NIST)}}, year = {2011}, url = {https://www-nlpir.nist.gov/projects/tvpubs/tv11.papers/genie.pdf}, timestamp = {Tue, 10 Jan 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/trecvid/PereraOLKBLMLMH11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/trustcom/LiuWZT11, author = {Yizhi Liu and Xiangdong Wang and Yongdong Zhang and Sheng Tang}, title = {Fusing Audio-Words with Visual Features for Pornographic Video Detection}, booktitle = {{IEEE} 10th International Conference on Trust, Security and Privacy in Computing and Communications, TrustCom 2011, Changsha, China, 16-18 November, 2011}, pages = {1488--1493}, publisher = {{IEEE} Computer Society}, year = {2011}, url = {https://doi.org/10.1109/TrustCom.2011.205}, doi = {10.1109/TRUSTCOM.2011.205}, timestamp = {Fri, 24 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/trustcom/LiuWZT11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/csl/DeanS10, author = {David Dean and Sridha Sridharan}, title = {Dynamic visual features for audio-visual speaker verification}, journal = {Comput. Speech Lang.}, volume = {24}, number = {2}, pages = {136--149}, year = {2010}, url = {https://doi.org/10.1016/j.csl.2009.03.007}, doi = {10.1016/J.CSL.2009.03.007}, timestamp = {Thu, 20 Feb 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/csl/DeanS10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ijsc/ShahHN10, author = {Dhaval Shah and Kyu Jeong Han and Shrikanth S. Narayanan}, title = {Robust Multimodal Person Recognition Using Low-Complexity Audio-Visual Feature Fusion Approaches}, journal = {Int. J. Semantic Comput.}, volume = {4}, number = {2}, pages = {155--179}, year = {2010}, url = {https://doi.org/10.1142/S1793351X10000985}, doi = {10.1142/S1793351X10000985}, timestamp = {Fri, 03 Jul 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ijsc/ShahHN10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/tmm/ShresthaBWS10, author = {Prarthana Shrestha and Mauro Barbieri and Hans Weda and Dragan Sekulovski}, title = {Synchronization of Multiple Camera Videos Using Audio-Visual Features}, journal = {{IEEE} Trans. Multim.}, volume = {12}, number = {1}, pages = {79--92}, year = {2010}, url = {https://doi.org/10.1109/TMM.2009.2036285}, doi = {10.1109/TMM.2009.2036285}, timestamp = {Thu, 01 Oct 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/tmm/ShresthaBWS10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/vlsisp/MuneesawangGA10, author = {Paisarn Muneesawang and Ling Guan and Tahir Amin}, title = {A New Learning Algorithm for the Fusion of Adaptive Audio-Visual Features for the Retrieval and Classification of Movie Clips}, journal = {J. Signal Process. Syst.}, volume = {59}, number = {2}, pages = {177--188}, year = {2010}, url = {https://doi.org/10.1007/s11265-008-0290-7}, doi = {10.1007/S11265-008-0290-7}, timestamp = {Thu, 12 Mar 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/vlsisp/MuneesawangGA10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icassp/PetridisAP10, author = {Stavros Petridis and Ali Asghar and Maja Pantic}, title = {Classifying laughter and speech using audio-visual feature prediction}, booktitle = {Proceedings of the {IEEE} International Conference on Acoustics, Speech, and Signal Processing, {ICASSP} 2010, 14-19 March 2010, Sheraton Dallas Hotel, Dallas, Texas, {USA}}, pages = {5254--5257}, publisher = {{IEEE}}, year = {2010}, url = {https://doi.org/10.1109/ICASSP.2010.5494992}, doi = {10.1109/ICASSP.2010.5494992}, timestamp = {Fri, 19 May 2017 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icassp/PetridisAP10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icmcs/BoyarAASCA10, author = {Mujdat Boyar and {\"{O}}zg{\"{u}}r Alan and Samet Akpinar and Orkunt Sabuncu and Nihan K. {\c{C}}i{\c{c}}ekli and Ferda Nur Alpaslan}, title = {Event boundary detection using audio-visual features and web-casting texts with imprecise time information}, booktitle = {Proceedings of the 2010 {IEEE} International Conference on Multimedia and Expo, {ICME} 2010, 19-23 July 2010, Singapore}, pages = {578--583}, publisher = {{IEEE} Computer Society}, year = {2010}, url = {https://doi.org/10.1109/ICME.2010.5583864}, doi = {10.1109/ICME.2010.5583864}, timestamp = {Fri, 24 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icmcs/BoyarAASCA10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/ButkoN10, author = {Taras Butko and Climent Nadeu}, editor = {Takao Kobayashi and Keikichi Hirose and Satoshi Nakamura}, title = {A fast one-pass-training feature selection technique for GMM-based acoustic event detection with audio-visual data}, booktitle = {{INTERSPEECH} 2010, 11th Annual Conference of the International Speech Communication Association, Makuhari, Chiba, Japan, September 26-30, 2010}, pages = {2338--2341}, publisher = {{ISCA}}, year = {2010}, url = {https://doi.org/10.21437/Interspeech.2010-640}, doi = {10.21437/INTERSPEECH.2010-640}, timestamp = {Fri, 23 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/ButkoN10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iscslp/JiangWWSV10, author = {Dongmei Jiang and Peng Wu and Fengna Wang and Hichem Sahli and Werner Verhelst}, title = {Audio visual speech recognition based on multi-stream {DBN} models with Articulatory Features}, booktitle = {7th International Symposium on Chinese Spoken Language Processing, {ISCSLP} 2010, November 29 2010-December 3, 2010, Tainan {\&} Sun Moon Lake, Taiwan}, pages = {190--193}, publisher = {{IEEE}}, year = {2010}, url = {https://doi.org/10.1109/ISCSLP.2010.5684915}, doi = {10.1109/ISCSLP.2010.5684915}, timestamp = {Wed, 16 Oct 2019 14:14:48 +0200}, biburl = {https://dblp.org/rec/conf/iscslp/JiangWWSV10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/sac/RoyM10, author = {Anindya Roy and S{\'{e}}bastien Marcel}, editor = {Sung Y. Shin and Sascha Ossowski and Michael Schumacher and Mathew J. Palakal and Chih{-}Cheng Hung}, title = {Visual processing-inspired fern-audio features for noise-robust speaker verification}, booktitle = {Proceedings of the 2010 {ACM} Symposium on Applied Computing (SAC), Sierre, Switzerland, March 22-26, 2010}, pages = {1491--1495}, publisher = {{ACM}}, year = {2010}, url = {https://doi.org/10.1145/1774088.1774407}, doi = {10.1145/1774088.1774407}, timestamp = {Sun, 02 Jun 2019 21:18:37 +0200}, biburl = {https://dblp.org/rec/conf/sac/RoyM10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@phdthesis{DBLP:phd/ch/Gurban09, author = {Mihai Gurban}, title = {Multimodal feature extraction and fusion for audio-visual speech recognition}, school = {EPFL, Switzerland}, year = {2009}, url = {https://doi.org/10.5075/epfl-thesis-4292}, doi = {10.5075/EPFL-THESIS-4292}, timestamp = {Fri, 29 Jul 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/phd/ch/Gurban09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/tsp/GurbanT09, author = {Mihai Gurban and Jean{-}Philippe Thiran}, title = {Information theoretic feature extraction for audio-visual speech recognition}, journal = {{IEEE} Trans. Signal Process.}, volume = {57}, number = {12}, pages = {4765--4776}, year = {2009}, url = {https://doi.org/10.1109/TSP.2009.2026513}, doi = {10.1109/TSP.2009.2026513}, timestamp = {Tue, 10 Mar 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/tsp/GurbanT09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icig/JiangLRSV09, author = {Dongmei Jiang and Peizhen Liu and Ilse Ravyse and Hichem Sahli and Werner Verhelst}, title = {Video Realistic Mouth Animation Based on an Audio Visual {DBN} Model with Articulatory Features and Constrained Asynchrony}, booktitle = {Proceedings of the Fifth International Conference on Image and Graphics, {ICIG} 2009, Xi'an, Shanxi, China, 20-23 September 2009}, pages = {658--662}, publisher = {{IEEE} Computer Society}, year = {2009}, url = {https://doi.org/10.1109/ICIG.2009.51}, doi = {10.1109/ICIG.2009.51}, timestamp = {Fri, 24 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icig/JiangLRSV09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icig/ZhuYL09, author = {Songhao Zhu and Junchi Yan and Yuncai Liu}, title = {Improving Semantic Scene Categorization by Exploiting Audio-Visual Features}, booktitle = {Proceedings of the Fifth International Conference on Image and Graphics, {ICIG} 2009, Xi'an, Shanxi, China, 20-23 September 2009}, pages = {435--440}, publisher = {{IEEE} Computer Society}, year = {2009}, url = {https://doi.org/10.1109/ICIG.2009.17}, doi = {10.1109/ICIG.2009.17}, timestamp = {Fri, 24 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icig/ZhuYL09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icip/WuZX09, author = {Guanyong Wu and Jie Zhu and Haihua Xu}, title = {A hybrid visual feature extraction method for audio-visual speech recognition}, booktitle = {Proceedings of the International Conference on Image Processing, {ICIP} 2009, 7-10 November 2009, Cairo, Egypt}, pages = {1829--1832}, publisher = {{IEEE}}, year = {2009}, url = {https://doi.org/10.1109/ICIP.2009.5413573}, doi = {10.1109/ICIP.2009.5413573}, timestamp = {Thu, 19 Dec 2019 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icip/WuZX09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/AlmajaiM09, author = {Ibrahim Almajai and Ben Milner}, title = {Enhancing audio speech using visual speech features}, booktitle = {{INTERSPEECH} 2009, 10th Annual Conference of the International Speech Communication Association, Brighton, United Kingdom, September 6-10, 2009}, pages = {1959--1962}, publisher = {{ISCA}}, year = {2009}, url = {https://doi.org/10.21437/Interspeech.2009-576}, doi = {10.21437/INTERSPEECH.2009-576}, timestamp = {Fri, 23 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/AlmajaiM09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/mmsp/DasB09, author = {Amitava Das and Vaibhav Bedia}, title = {Audio-visual person authentication with multiple face-profiles and compressed-feature-dynamics signatures of spoken passwords}, booktitle = {2009 {IEEE} International Workshop on Multimedia Signal Processing, {MMSP} '09, Rio de Janeiro, Brazil, October 5-7, 2009}, pages = {1--6}, publisher = {{IEEE}}, year = {2009}, url = {https://doi.org/10.1109/MMSP.2009.5293273}, doi = {10.1109/MMSP.2009.5293273}, timestamp = {Wed, 16 Oct 2019 14:14:49 +0200}, biburl = {https://dblp.org/rec/conf/mmsp/DasB09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/smc/PaoLWL09, author = {Tsang{-}Long Pao and Wen{-}Yuan Liao and Tsan{-}Nung Wu and Ching{-}Yi Lin}, title = {Automatic Visual Feature Extraction for Mandarin Audio-Visual Speech Recognition}, booktitle = {Proceedings of the {IEEE} International Conference on Systems, Man and Cybernetics, San Antonio, TX, USA, 11-14 October 2009}, pages = {2936--2940}, publisher = {{IEEE}}, year = {2009}, url = {https://doi.org/10.1109/ICSMC.2009.5346011}, doi = {10.1109/ICSMC.2009.5346011}, timestamp = {Wed, 16 Oct 2019 14:14:51 +0200}, biburl = {https://dblp.org/rec/conf/smc/PaoLWL09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ieicet/DendaNY08, author = {Yuki Denda and Takanobu Nishiura and Yoichi Yamashita}, title = {Omnidirectional Audio-Visual Talker Localization Based on Dynamic Fusion of Audio-Visual Features Using Validity and Reliability Criteria}, journal = {{IEICE} Trans. Inf. Syst.}, volume = {91-D}, number = {3}, pages = {598--606}, year = {2008}, url = {https://doi.org/10.1093/ietisy/e91-d.3.598}, doi = {10.1093/IETISY/E91-D.3.598}, timestamp = {Sat, 11 Apr 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ieicet/DendaNY08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/avsp/HaqJE08, author = {Sanaul Haq and Philip J. B. Jackson and James D. Edge}, editor = {Roland G{\"{o}}cke and Patrick Lucey and Simon Lucey}, title = {Audio-visual feature selection and reduction for emotion classification}, booktitle = {International Conference on Auditory-Visual Speech Processing 2008, Moreton Island, Queensland, Australia, September 26-29, 2008}, pages = {185--190}, publisher = {{ISCA}}, year = {2008}, url = {http://www.isca-speech.org/archive\_open/avsp08/av08\_185.html}, timestamp = {Wed, 10 Feb 2021 22:00:47 +0100}, biburl = {https://dblp.org/rec/conf/avsp/HaqJE08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/eusipco/AlmajaiM08, author = {Ibrahim Almajai and Ben P. Milner}, title = {Using audio-visual features for robust voice activity detection in clean and noisy speech}, booktitle = {2008 16th European Signal Processing Conference, {EUSIPCO} 2008, Lausanne, Switzerland, August 25-29, 2008}, pages = {1--5}, publisher = {{IEEE}}, year = {2008}, url = {https://ieeexplore.ieee.org/document/7080692/}, timestamp = {Mon, 09 Aug 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/eusipco/AlmajaiM08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/eusipco/DemirEYT08, author = {Yasemin Demir and Engin Erzin and Y{\"{u}}cel Yemez and A. Murat Tekalp}, title = {Evaluation of audio features for audio-visual analysis of dance figures}, booktitle = {2008 16th European Signal Processing Conference, {EUSIPCO} 2008, Lausanne, Switzerland, August 25-29, 2008}, pages = {1--4}, publisher = {{IEEE}}, year = {2008}, url = {https://ieeexplore.ieee.org/document/7080401/}, timestamp = {Mon, 09 Aug 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/eusipco/DemirEYT08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icip/TerrySK08, author = {Louis H. Terry and Derek J. Shiell and Aggelos K. Katsaggelos}, title = {Feature space video stream consistency estimation for dynamic stream weighting in audio-visual speech recognition}, booktitle = {Proceedings of the International Conference on Image Processing, {ICIP} 2008, October 12-15, 2008, San Diego, California, {USA}}, pages = {1316--1319}, publisher = {{IEEE}}, year = {2008}, url = {https://doi.org/10.1109/ICIP.2008.4712005}, doi = {10.1109/ICIP.2008.4712005}, timestamp = {Tue, 21 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icip/TerrySK08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icvgip/DasMT08, author = {Amitava Das and Ohil K. Manyam and Makarand Tapaswi}, title = {Audio-Visual Person Authentication with Multiple Visualized-Speech Features and Multiple Face Profiles}, booktitle = {Sixth Indian Conference on Computer Vision, Graphics {\&} Image Processing, {ICVGIP} 2008, Bhubaneswar, India, 16-19 December 2008}, pages = {39--46}, publisher = {{IEEE} Computer Society}, year = {2008}, url = {https://doi.org/10.1109/ICVGIP.2008.106}, doi = {10.1109/ICVGIP.2008.106}, timestamp = {Fri, 24 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icvgip/DasMT08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ism/MowerMN08, author = {Emily Mower and Maja J. Mataric and Shrikanth S. Narayanan}, title = {Selection of Emotionally Salient Audio-Visual Features for Modeling Human Evaluations of Synthetic Character Emotion Displays}, booktitle = {Tenth {IEEE} International Symposium on Multimedia (ISM2008), December 15-17, 2008, Berkeley, California, {USA}}, pages = {190--195}, publisher = {{IEEE} Computer Society}, year = {2008}, url = {https://doi.org/10.1109/ISM.2008.78}, doi = {10.1109/ISM.2008.78}, timestamp = {Fri, 24 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/ism/MowerMN08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@incollection{DBLP:series/asc/Chaloupka08, author = {Josef Chaloupka}, editor = {Marek Kurzynski and Edward Puchala and Michal Wozniak and Andrzej Zolnierek}, title = {Extraction of the Visual Features from the Audio-Visual Speech Signal and the Utilization of These Features for the Speaker Identification}, booktitle = {Computer Recognition Systems 2}, series = {Advances in Soft Computing}, volume = {45}, pages = {413--420}, publisher = {Springer}, year = {2008}, url = {https://doi.org/10.1007/978-3-540-75175-5\_52}, doi = {10.1007/978-3-540-75175-5\_52}, timestamp = {Thu, 07 Nov 2019 15:14:22 +0100}, biburl = {https://dblp.org/rec/series/asc/Chaloupka08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jmui/ChituRWW07, author = {Alin Gavril Chitu and L{\'{e}}on J. M. Rothkrantz and Pascal Wiggers and Jacek C. Wojdel}, title = {Comparison between different feature extraction techniques for audio-visual speech recognition}, journal = {J. Multimodal User Interfaces}, volume = {1}, number = {1}, pages = {7--20}, year = {2007}, url = {https://doi.org/10.1007/BF02884428}, doi = {10.1007/BF02884428}, timestamp = {Sun, 28 May 2017 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jmui/ChituRWW07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/tce/OtsukaSKD07, author = {Isao Otsuka and Hidetsugu Suginohara and Yoshiaki Kusunoki and Ajay Divakaran}, title = {Detection of music segment boundaries using audio-visual features for a personal video recorder}, journal = {{IEEE} Trans. Consumer Electron.}, volume = {53}, number = {1}, pages = {150--154}, year = {2007}, url = {https://doi.org/10.1109/TCE.2007.339517}, doi = {10.1109/TCE.2007.339517}, timestamp = {Thu, 09 Jul 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/tce/OtsukaSKD07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aiprf/HuangZM07, author = {Lin Huang and Hanqi Zhuang and Salvatore D. Morgera}, editor = {Dimitris A. Karras and Chunping Li and Zoran Majkic and S. R. Mahadeva Prasanna}, title = {Audio-visual Based Person Recognition with Fusion at Feature Level}, booktitle = {International Conference on Artificial Intelligence and Pattern Recognition, AIPR-07, Orlando, Florida, USA, July 9-12, 2007}, pages = {249--254}, publisher = {{ISRST}}, year = {2007}, timestamp = {Wed, 07 Aug 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aiprf/HuangZM07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/avsp/GanMY07, author = {Tian Gan and Wolfgang Menzel and Shiqiang Yang}, editor = {Jean Vroomen and Marc Swerts and Emiel Krahmer}, title = {An audio-visual speech recognition framework based on articulatory features}, booktitle = {Auditory-Visual Speech Processing 2007, {AVSP} 2007, Hilvarenbeek, The Netherlands, August 31 - September 3, 2007}, pages = {1}, publisher = {{ISCA}}, year = {2007}, url = {http://www.isca-speech.org/archive\_open/avsp07/av07\_P01.html}, timestamp = {Wed, 10 Feb 2021 22:00:49 +0100}, biburl = {https://dblp.org/rec/conf/avsp/GanMY07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/avss/KlausnerTLER07, author = {Andreas Klausner and Allan Tengg and Christian Leistner and Stefan Erb and Bernhard Rinner}, title = {An audio-visual sensor fusion approach for feature based vehicle identification}, booktitle = {Fourth {IEEE} International Conference on Advanced Video and Signal Based Surveillance, {AVSS} 2007, 5-7 September, 2007, Queen Mary, University of London, London, United Kingdom}, pages = {111--116}, publisher = {{IEEE} Computer Society}, year = {2007}, url = {https://doi.org/10.1109/AVSS.2007.4425295}, doi = {10.1109/AVSS.2007.4425295}, timestamp = {Thu, 23 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/avss/KlausnerTLER07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/clear/BernardinGS07, author = {Keni Bernardin and Tobias Gehrig and Rainer Stiefelhagen}, editor = {Rainer Stiefelhagen and Rachel Bowers and Jonathan G. Fiscus}, title = {Multi-level Particle Filter Fusion of Features and Cues for Audio-Visual Person Tracking}, booktitle = {Multimodal Technologies for Perception of Humans, International Evaluation Workshops {CLEAR} 2007 and {RT} 2007, Baltimore, MD, USA, May 8-11, 2007, Revised Selected Papers}, series = {Lecture Notes in Computer Science}, volume = {4625}, pages = {70--81}, publisher = {Springer}, year = {2007}, url = {https://doi.org/10.1007/978-3-540-68585-2\_5}, doi = {10.1007/978-3-540-68585-2\_5}, timestamp = {Tue, 14 May 2019 10:00:42 +0200}, biburl = {https://dblp.org/rec/conf/clear/BernardinGS07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/eusipco/CarbonerasGT07, author = {Andres Valles Carboneras and Mihai Gurban and Jean{-}Philippe Thiran}, title = {Low-dimensional motion features for audio-visual speech recognition}, booktitle = {15th European Signal Processing Conference, {EUSIPCO} 2007, Poznan, Poland, September 3-7, 2007}, pages = {297--301}, publisher = {{IEEE}}, year = {2007}, url = {https://ieeexplore.ieee.org/document/7098812/}, timestamp = {Mon, 09 Aug 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/eusipco/CarbonerasGT07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icassp/LivescuCHKBBKLYBDWFMS07, author = {Karen Livescu and {\"{O}}zg{\"{u}}r {\c{C}}etin and Mark Hasegawa{-}Johnson and Simon King and Chris D. Bartels and Nash M. Borges and Arthur Kantor and Partha Lal and Lisa Yung and Ari Bezman and Stephen Dawson{-}Haggerty and Bronwyn Woods and Joe Frankel and Mathew Magimai{-}Doss and Kate Saenko}, title = {Articulatory Feature-Based Methods for Acoustic and Audio-Visual Speech Recognition: Summary from the 2006 {JHU} Summer workshop}, booktitle = {Proceedings of the {IEEE} International Conference on Acoustics, Speech, and Signal Processing, {ICASSP} 2007, Honolulu, Hawaii, USA, April 15-20, 2007}, pages = {621--624}, publisher = {{IEEE}}, year = {2007}, url = {https://doi.org/10.1109/ICASSP.2007.366989}, doi = {10.1109/ICASSP.2007.366989}, timestamp = {Mon, 29 Jan 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icassp/LivescuCHKBBKLYBDWFMS07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/DendaNY07, author = {Yuki Denda and Takanobu Nishiura and Yoichi Yamashita}, title = {Omnidirectional audio-visual talker localizer with dynamic feature fusion based on validity and reliability criteria}, booktitle = {{INTERSPEECH} 2007, 8th Annual Conference of the International Speech Communication Association, Antwerp, Belgium, August 27-31, 2007}, pages = {726--729}, publisher = {{ISCA}}, year = {2007}, url = {https://doi.org/10.21437/Interspeech.2007-300}, doi = {10.21437/INTERSPEECH.2007-300}, timestamp = {Fri, 23 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/DendaNY07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/WuZ07, author = {Guanyong Wu and Jie Zhu}, title = {An extension 2DPCA based visual feature extraction method for audio-visual speech recognition}, booktitle = {{INTERSPEECH} 2007, 8th Annual Conference of the International Speech Communication Association, Antwerp, Belgium, August 27-31, 2007}, pages = {714--717}, publisher = {{ISCA}}, year = {2007}, url = {https://doi.org/10.21437/Interspeech.2007-297}, doi = {10.21437/INTERSPEECH.2007-297}, timestamp = {Fri, 23 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/WuZ07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iros/KoiwaNI07, author = {Tomoaki Koiwa and Kazuhiro Nakadai and Jun{-}ichi Imura}, title = {Coarse speech recognition by audio-visual integration based on missing feature theory}, booktitle = {2007 {IEEE/RSJ} International Conference on Intelligent Robots and Systems, October 29 - November 2, 2007, Sheraton Hotel and Marina, San Diego, California, {USA}}, pages = {1751--1756}, publisher = {{IEEE}}, year = {2007}, url = {https://doi.org/10.1109/IROS.2007.4399300}, doi = {10.1109/IROS.2007.4399300}, timestamp = {Fri, 27 Mar 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/iros/KoiwaNI07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/mmsp/DrugmanGT07, author = {Thomas Drugman and Mihai Gurban and Jean{-}Philippe Thiran}, title = {Relevant Feature Selection for Audio-Visual Speech Recognition}, booktitle = {{IEEE} 9th Workshop on Multimedia Signal Processing, {MMSP} 2007, Chania, Crete, Greece, October 1-3, 2007}, pages = {179--182}, publisher = {{IEEE}}, year = {2007}, url = {https://doi.org/10.1109/MMSP.2007.4412847}, doi = {10.1109/MMSP.2007.4412847}, timestamp = {Wed, 16 Oct 2019 14:14:49 +0200}, biburl = {https://dblp.org/rec/conf/mmsp/DrugmanGT07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/premi/ChettyW07, author = {Girija Chetty and Michael Wagner}, editor = {Ashish Ghosh and Rajat K. De and Sankar K. Pal}, title = {Audio Visual Speaker Verification Based on Hybrid Fusion of Cross Modal Features}, booktitle = {Pattern Recognition and Machine Intelligence, Second International Conference, PReMI 2007, Kolkata, India, December 18-22, 2007, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {4815}, pages = {469--478}, publisher = {Springer}, year = {2007}, url = {https://doi.org/10.1007/978-3-540-77046-6\_58}, doi = {10.1007/978-3-540-77046-6\_58}, timestamp = {Tue, 14 May 2019 10:00:41 +0200}, biburl = {https://dblp.org/rec/conf/premi/ChettyW07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icb/WuCM06, author = {Zhiyong Wu and Lianhong Cai and Helen M. Meng}, editor = {David Zhang and Anil K. Jain}, title = {Multi-level Fusion of Audio and Visual Features for Speaker Identification}, booktitle = {Advances in Biometrics, International Conference, {ICB} 2006, Hong Kong, China, January 5-7, 2006, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {3832}, pages = {493--499}, publisher = {Springer}, year = {2006}, url = {https://doi.org/10.1007/11608288\_66}, doi = {10.1007/11608288\_66}, timestamp = {Sat, 08 May 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icb/WuCM06.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/AlmajaiMD06, author = {Ibrahim Almajai and Ben Milner and Jonathan Darch}, title = {Analysis of correlation between audio and visual speech features for clean audio feature prediction in noise}, booktitle = {{INTERSPEECH} 2006 - ICSLP, Ninth International Conference on Spoken Language Processing, Pittsburgh, PA, USA, September 17-21, 2006}, publisher = {{ISCA}}, year = {2006}, url = {https://doi.org/10.21437/Interspeech.2006-619}, doi = {10.21437/INTERSPEECH.2006-619}, timestamp = {Thu, 22 Jun 2023 16:42:16 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/AlmajaiMD06.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/mlmi/Al-HamesHSR06, author = {Marc A. Al{-}Hames and Benedikt H{\"{o}}rnler and Christoph Scheuermann and Gerhard Rigoll}, editor = {Steve Renals and Samy Bengio and Jonathan G. Fiscus}, title = {Using Audio, Visual, and Lexical Features in a Multi-modal Virtual Meeting Director}, booktitle = {Machine Learning for Multimodal Interaction, Third International Workshop, {MLMI} 2006, Bethesda, MD, USA, May 1-4, 2006, Revised Selected Papers}, series = {Lecture Notes in Computer Science}, volume = {4299}, pages = {63--74}, publisher = {Springer}, year = {2006}, url = {https://doi.org/10.1007/11965152\_6}, doi = {10.1007/11965152\_6}, timestamp = {Tue, 31 Aug 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/mlmi/Al-HamesHSR06.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/mmsp/KepesiNPGGJ06, author = {Mari{\'{a}}n K{\'{e}}pesi and Michael Neffe and Tuan Van Pham and Michael Grabner and Helmut Grabner and Andreas Juffinger}, title = {Audio-Visual Feature Extraction for Semi-Automatic Annotation of Meetings}, booktitle = {{IEEE} 8th Workshop on Multimedia Signal Processing, {MMSP} 2006, Victoria, BC, Canada, October 3-6, 2006}, pages = {207--211}, publisher = {{IEEE}}, year = {2006}, url = {https://doi.org/10.1109/MMSP.2006.285298}, doi = {10.1109/MMSP.2006.285298}, timestamp = {Wed, 16 Oct 2019 14:14:49 +0200}, biburl = {https://dblp.org/rec/conf/mmsp/KepesiNPGGJ06.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/tcsv/SadlierO05, author = {David A. Sadlier and Noel E. O'Connor}, title = {Event detection in field sports video using audio-visual features and a support vector Machine}, journal = {{IEEE} Trans. Circuits Syst. Video Technol.}, volume = {15}, number = {10}, pages = {1225--1233}, year = {2005}, url = {https://doi.org/10.1109/TCSVT.2005.854237}, doi = {10.1109/TCSVT.2005.854237}, timestamp = {Tue, 25 Aug 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/tcsv/SadlierO05.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/eusipco/SarginEYT05, author = {Mehmet Emre Sargin and Engin Erzin and Y{\"{u}}cel Yemez and A. Murat Tekalp}, title = {Lip feature extraction based on audio-visual correlation}, booktitle = {13th European Signal Processing Conference, {EUSIPCO} 2005, Antalya, Turkey, September 4-8, 2005}, pages = {1--4}, publisher = {{IEEE}}, year = {2005}, url = {https://ieeexplore.ieee.org/document/7077967/}, timestamp = {Mon, 09 Aug 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/eusipco/SarginEYT05.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icassp/ChangMC05, author = {Shih{-}Fu Chang and R. Manmatha and Tat{-}Seng Chua}, title = {Combining text and audio-visual features in video indexing}, booktitle = {2005 {IEEE} International Conference on Acoustics, Speech, and Signal Processing, {ICASSP} '05, Philadelphia, Pennsylvania, USA, March 18-23, 2005}, pages = {1005--1008}, publisher = {{IEEE}}, year = {2005}, url = {https://doi.org/10.1109/ICASSP.2005.1416476}, doi = {10.1109/ICASSP.2005.1416476}, timestamp = {Mon, 22 Jun 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icassp/ChangMC05.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icmcs/HuangMV05, author = {Jing Huang and Etienne Marcheret and Karthik Visweswariah}, title = {Rapid Feature Space Speaker Adaptation for Multi-Stream HMM-Based Audio-Visual Speech Recognition}, booktitle = {Proceedings of the 2005 {IEEE} International Conference on Multimedia and Expo, {ICME} 2005, July 6-9, 2005, Amsterdam, The Netherlands}, pages = {338--341}, publisher = {{IEEE} Computer Society}, year = {2005}, url = {https://doi.org/10.1109/ICME.2005.1521429}, doi = {10.1109/ICME.2005.1521429}, timestamp = {Fri, 24 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icmcs/HuangMV05.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icnc/KimRK05, author = {Myung{-}Won Kim and Joung Woo Ryu and Eun Ju Kim}, editor = {Lipo Wang and Ke Chen and Yew{-}Soon Ong}, title = {Speech Recognition by Integrating Audio, Visual and Contextual Features Based on Neural Networks}, booktitle = {Advances in Natural Computation, First International Conference, {ICNC} 2005, Changsha, China, August 27-29, 2005, Proceedings, Part {II}}, series = {Lecture Notes in Computer Science}, volume = {3611}, pages = {155--164}, publisher = {Springer}, year = {2005}, url = {https://doi.org/10.1007/11539117\_25}, doi = {10.1007/11539117\_25}, timestamp = {Sun, 02 Jun 2019 21:14:27 +0200}, biburl = {https://dblp.org/rec/conf/icnc/KimRK05.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/HuangP05, author = {Jing Huang and Daniel Povey}, title = {Discriminatively trained features using fMPE for multi-stream audio-visual speech recognition}, booktitle = {{INTERSPEECH} 2005 - Eurospeech, 9th European Conference on Speech Communication and Technology, Lisbon, Portugal, September 4-8, 2005}, pages = {777--780}, publisher = {{ISCA}}, year = {2005}, url = {https://doi.org/10.21437/Interspeech.2005-361}, doi = {10.21437/INTERSPEECH.2005-361}, timestamp = {Thu, 22 Jun 2023 16:42:16 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/HuangP05.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/HuangV05, author = {Jing Huang and Karthik Visweswariah}, title = {Improving lip-reading with feature space transforms for multi-stream audio-visual speech recognition}, booktitle = {{INTERSPEECH} 2005 - Eurospeech, 9th European Conference on Speech Communication and Technology, Lisbon, Portugal, September 4-8, 2005}, pages = {1221--1224}, publisher = {{ISCA}}, year = {2005}, url = {https://doi.org/10.21437/Interspeech.2005-373}, doi = {10.21437/INTERSPEECH.2005-373}, timestamp = {Thu, 22 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/HuangV05.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/isspa/LewisP05, author = {Trent W. Lewis and David M. W. Powers}, title = {Distinctive feature fusion for improved audio-visual phoneme recognition}, booktitle = {Proceedings of the Eighth International Symposium on Signal Processing and Its Applications, {ISSPA} 2005, 28-31 August 2005, Sydney, Australia}, pages = {62--65}, publisher = {{IEEE}}, year = {2005}, url = {https://doi.org/10.1109/ISSPA.2005.1580196}, doi = {10.1109/ISSPA.2005.1580196}, timestamp = {Wed, 16 Oct 2019 14:14:56 +0200}, biburl = {https://dblp.org/rec/conf/isspa/LewisP05.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/tsmc/KaynakZCSJC04, author = {Mustafa Nazmi Kaynak and Qi Zhi and Adrian David Cheok and Kuntal Sengupta and Jian Zhang and Chi Chung Ko}, title = {Analysis of lip geometric features for audio-visual speech recognition}, journal = {{IEEE} Trans. Syst. Man Cybern. Part {A}}, volume = {34}, number = {4}, pages = {564--570}, year = {2004}, url = {https://doi.org/10.1109/TSMCA.2004.826274}, doi = {10.1109/TSMCA.2004.826274}, timestamp = {Mon, 25 May 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/tsmc/KaynakZCSJC04.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icassp/AleksicK04, author = {Petar S. Aleksic and Aggelos K. Katsaggelos}, title = {Comparison of low- and high-level visual features for audio-visual continuous automatic speech recognition}, booktitle = {2004 {IEEE} International Conference on Acoustics, Speech, and Signal Processing, {ICASSP} 2004, Montreal, Quebec, Canada, May 17-21, 2004}, pages = {917--920}, publisher = {{IEEE}}, year = {2004}, url = {https://doi.org/10.1109/ICASSP.2004.1327261}, doi = {10.1109/ICASSP.2004.1327261}, timestamp = {Mon, 22 Jun 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icassp/AleksicK04.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icassp/JiangPNIN04, author = {Jintao Jiang and Gerasimos Potamianos and Harriet J. Nock and Giridharan Iyengar and Chalapathy Neti}, title = {Improved face and feature finding for audio-visual speech recognition in visually challenging environments}, booktitle = {2004 {IEEE} International Conference on Acoustics, Speech, and Signal Processing, {ICASSP} 2004, Montreal, Quebec, Canada, May 17-21, 2004}, pages = {873--876}, publisher = {{IEEE}}, year = {2004}, url = {https://doi.org/10.1109/ICASSP.2004.1327250}, doi = {10.1109/ICASSP.2004.1327250}, timestamp = {Mon, 22 Jun 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icassp/JiangPNIN04.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/mir/XuC04, author = {Huaxin Xu and Tat{-}Seng Chua}, editor = {Michael S. Lew and Nicu Sebe and Chabane Djeraba}, title = {The fusion of audio-visual features and external knowledge for event detection in team sports video}, booktitle = {Proceedings of the 6th {ACM} {SIGMM} International Workshop on Multimedia Information Retrieval, {MIR} 2004, October 15-16, 2004, New York, NY, {USA}}, pages = {127--134}, publisher = {{ACM}}, year = {2004}, url = {https://doi.org/10.1145/1026711.1026733}, doi = {10.1145/1026711.1026733}, timestamp = {Tue, 06 Nov 2018 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/mir/XuC04.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/webi/Liu04, author = {Huayong Liu}, title = {Content-Based {TV} Sports Video Retrieval Based on Audio-Visual Features and Text Information}, booktitle = {2004 {IEEE/WIC/ACM} International Conference on Web Intelligence {(WI} 2004), 20-24 September 2004, Beijing, China}, pages = {481--484}, publisher = {{IEEE} Computer Society}, year = {2004}, url = {https://doi.org/10.1109/WI.2004.10107}, doi = {10.1109/WI.2004.10107}, timestamp = {Thu, 23 Mar 2023 14:30:18 +0100}, biburl = {https://dblp.org/rec/conf/webi/Liu04.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jvcir/WangDVCS03, author = {Hualu Wang and Ajay Divakaran and Anthony Vetro and Shih{-}Fu Chang and Huifang Sun}, title = {Survey of compressed-domain features used in audio-visual indexing and analysis}, journal = {J. Vis. Commun. Image Represent.}, volume = {14}, number = {2}, pages = {150--183}, year = {2003}, url = {https://doi.org/10.1016/S1047-3203(03)00019-1}, doi = {10.1016/S1047-3203(03)00019-1}, timestamp = {Sat, 22 Feb 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/jvcir/WangDVCS03.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/avbpa/FoxR03, author = {Niall A. Fox and Richard B. Reilly}, editor = {Josef Kittler and Mark S. Nixon}, title = {Audio-Visual Speaker Identification Based on the Use of Dynamic Audio and Visual Features}, booktitle = {Audio-and Video-Based Biometrie Person Authentication, 4th International Conference, {AVBPA} 2003, Guildford, UK, June 9-11, 2003 Proceedings}, series = {Lecture Notes in Computer Science}, volume = {2688}, pages = {743--751}, publisher = {Springer}, year = {2003}, url = {https://doi.org/10.1007/3-540-44887-X\_86}, doi = {10.1007/3-540-44887-X\_86}, timestamp = {Mon, 15 Jun 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/avbpa/FoxR03.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icip/SuganoINY03, author = {Masaru Sugano and Roger Isaksson and Yasuyuki Nakajima and Hiromasa Yanagihara}, title = {Shot genre classification using compressed audio-visual features}, booktitle = {Proceedings of the 2003 International Conference on Image Processing, {ICIP} 2003, Barcelona, Catalonia, Spain, September 14-18, 2003}, pages = {17--20}, publisher = {{IEEE}}, year = {2003}, url = {https://doi.org/10.1109/ICIP.2003.1246605}, doi = {10.1109/ICIP.2003.1246605}, timestamp = {Tue, 02 Jan 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icip/SuganoINY03.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/spieVIP/LeeYK03, author = {Shih{-}Hung Lee and Chia{-}Hung Yeh and C.{-}C. Jay Kuo}, editor = {Zia{-}ur Rahman and Robert A. Schowengerdt and Stephen E. Reichenbach}, title = {Robust {TV} commercial detection based on audiovisual features}, booktitle = {Visual Information Processing XII, Orlando, FL, USA, April 21, 2003}, series = {{SPIE} Proceedings}, volume = {5108}, pages = {147--158}, publisher = {{SPIE}}, year = {2003}, url = {https://doi.org/10.1117/12.486775}, doi = {10.1117/12.486775}, timestamp = {Fri, 06 May 2022 13:45:23 +0200}, biburl = {https://dblp.org/rec/conf/spieVIP/LeeYK03.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ejasp/AleksicWWK02, author = {Petar S. Aleksic and Jay J. Williams and Zhilin Wu and Aggelos K. Katsaggelos}, title = {Audio-Visual Speech Recognition Using {MPEG-4} Compliant Visual Features}, journal = {{EURASIP} J. Adv. Signal Process.}, volume = {2002}, number = {11}, pages = {1213--1227}, year = {2002}, url = {https://doi.org/10.1155/S1110865702206162}, doi = {10.1155/S1110865702206162}, timestamp = {Tue, 21 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/ejasp/AleksicWWK02.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/civr/KimCKK02, author = {Kyungsu Kim and Junho Choi and Namjung Kim and Pankoo Kim}, editor = {Michael S. Lew and Nicu Sebe and John P. Eakins}, title = {Extracting Semantic Information from Basketball Video Based on Audio-Visual Features}, booktitle = {Image and Video Retrieval, International Conference, {CIVR} 2002, London, UK, July 18-19, 2002, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {2383}, pages = {278--288}, publisher = {Springer}, year = {2002}, url = {https://doi.org/10.1007/3-540-45479-9\_30}, doi = {10.1007/3-540-45479-9\_30}, timestamp = {Wed, 16 Jun 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/civr/KimCKK02.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icassp/GoeckePN02, author = {Roland Goecke and Gerasimos Potamianos and Chalapathy Neti}, title = {Noisy audio feature enhancement using audio-visual speech data}, booktitle = {Proceedings of the {IEEE} International Conference on Acoustics, Speech, and Signal Processing, {ICASSP} 2002, May 13-17 2002, Orlando, Florida, {USA}}, pages = {2025--2028}, publisher = {{IEEE}}, year = {2002}, url = {https://doi.org/10.1109/ICASSP.2002.5745030}, doi = {10.1109/ICASSP.2002.5745030}, timestamp = {Wed, 16 Oct 2019 14:14:52 +0200}, biburl = {https://dblp.org/rec/conf/icassp/GoeckePN02.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icip/AleksicWWK02, author = {Petar S. Aleksic and Jay J. Williams and Zhilin Wu and Aggelos K. Katsaggelos}, title = {Audio-visual continuous speech recognition using {MPEG-4} compliant visual features}, booktitle = {Proceedings of the 2002 International Conference on Image Processing, {ICIP} 2002, Rochester, New York, USA, September 22-25, 2002}, pages = {960--963}, publisher = {{IEEE}}, year = {2002}, url = {https://doi.org/10.1109/ICIP.2002.1038187}, doi = {10.1109/ICIP.2002.1038187}, timestamp = {Wed, 16 Oct 2019 14:14:52 +0200}, biburl = {https://dblp.org/rec/conf/icip/AleksicWWK02.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icpr/RasheedS02, author = {Zeeshan Rasheed and Mubarak Shah}, title = {Movie Genre Classification By Exploiting Audio-Visual Features Of Previews}, booktitle = {16th International Conference on Pattern Recognition, {ICPR} 2002, Quebec, Canada, August 11-15, 2002}, pages = {1086--1089}, publisher = {{IEEE} Computer Society}, year = {2002}, url = {https://doi.org/10.1109/ICPR.2002.1048494}, doi = {10.1109/ICPR.2002.1048494}, timestamp = {Fri, 24 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icpr/RasheedS02.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/HeckmannKSB02, author = {Martin Heckmann and Kristian Kroschel and Christophe Savariaux and Fr{\'{e}}d{\'{e}}ric Berthommier}, editor = {John H. L. Hansen and Bryan L. Pellom}, title = {DCT-based video features for audio-visual speech recognition}, booktitle = {7th International Conference on Spoken Language Processing, {ICSLP2002} - {INTERSPEECH} 2002, Denver, Colorado, USA, September 16-20, 2002}, pages = {1925--1928}, publisher = {{ISCA}}, year = {2002}, url = {https://doi.org/10.21437/ICSLP.2002-434}, doi = {10.21437/ICSLP.2002-434}, timestamp = {Thu, 22 Jun 2023 16:42:18 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/HeckmannKSB02.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@phdthesis{DBLP:phd/us/Naphade01, author = {Milind R. Naphade}, title = {A Probablistic Framework for Mapping Audio-Visual Features to High-Level Semantics in Terms of Concepts and Context}, school = {University of Illinois Urbana-Champaign, {USA}}, year = {2001}, url = {https://hdl.handle.net/2142/80716}, timestamp = {Thu, 07 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/phd/us/Naphade01.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icassp/PotamianosLN01, author = {Gerasimos Potamianos and Juergen Luettin and Chalapathy Neti}, title = {Hierarchical discriminant features for audio-visual {LVCSR}}, booktitle = {{IEEE} International Conference on Acoustics, Speech, and Signal Processing, {ICASSP} 2001, 7-11 May, 2001, Salt Palace Convention Center, Salt Lake City, Utah, USA, Proceedings}, pages = {165--168}, publisher = {{IEEE}}, year = {2001}, url = {https://doi.org/10.1109/ICASSP.2001.940793}, doi = {10.1109/ICASSP.2001.940793}, timestamp = {Thu, 23 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icassp/PotamianosLN01.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icmcs/MatthewsPNL01, author = {Iain A. Matthews and Gerasimos Potamianos and Chalapathy Neti and Juergen Luettin}, title = {A Comparison Of Model And Transform-Based Visual Features For Audio-Visual {LVCSR}}, booktitle = {Proceedings of the 2001 {IEEE} International Conference on Multimedia and Expo, {ICME} 2001, August 22-25, 2001, Tokyo, Japan}, publisher = {{IEEE} Computer Society}, year = {2001}, url = {https://doi.org/10.1109/ICME.2001.1237849}, doi = {10.1109/ICME.2001.1237849}, timestamp = {Fri, 24 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icmcs/MatthewsPNL01.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icmcs/YoshitakaM01, author = {Atsuo Yoshitaka and Masato Miyake}, title = {Scene Detection by Audio-Visual Features}, booktitle = {Proceedings of the 2001 {IEEE} International Conference on Multimedia and Expo, {ICME} 2001, August 22-25, 2001, Tokyo, Japan}, publisher = {{IEEE} Computer Society}, year = {2001}, url = {https://doi.org/10.1109/ICME.2001.1237652}, doi = {10.1109/ICME.2001.1237652}, timestamp = {Fri, 24 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icmcs/YoshitakaM01.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/mmsp/Chan01, author = {Michael T. Chan}, editor = {Jean{-}Luc Dugelay and Kenneth Rose}, title = {HMM-based audio-visual speech recognition integrating geometric and appearance-based visual features}, booktitle = {Fourth {IEEE} Workshop on Multimedia Signal Processing, {MMSP} 2001, Cannes, France, October 3-5, 2001}, pages = {9--14}, publisher = {{IEEE}}, year = {2001}, url = {https://doi.org/10.1109/MMSP.2001.962703}, doi = {10.1109/MMSP.2001.962703}, timestamp = {Wed, 16 Oct 2019 14:14:49 +0200}, biburl = {https://dblp.org/rec/conf/mmsp/Chan01.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/spieSR/LiWSD01, author = {Dongge Li and Gang Wei and Ishwar K. Sethi and Nevenka Dimitrova}, editor = {Minerva M. Yeung and Chung{-}Sheng Li and Rainer Lienhart}, title = {Fusion of visual and audio features for person identification in real video}, booktitle = {Storage and Retrieval for Media Databases 2001, San Jose, CA, USA, January 24, 2001}, series = {{SPIE} Proceedings}, volume = {4315}, pages = {180--187}, publisher = {{SPIE}}, year = {2001}, url = {https://doi.org/10.1117/12.410926}, doi = {10.1117/12.410926}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/spieSR/LiWSD01.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/taslp/WatanabeTN00, author = {Akira Watanabe and Shingo Tomishige and Masahiro Nakatake}, title = {Speech visualization by integrating features for the hearing impaired}, journal = {{IEEE} Trans. Speech Audio Process.}, volume = {8}, number = {4}, pages = {454--466}, year = {2000}, url = {https://doi.org/10.1109/89.848226}, doi = {10.1109/89.848226}, timestamp = {Sun, 17 May 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/taslp/WatanabeTN00.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icip/PanLH00, author = {Hao Pan and Zhi{-}Pei Liang and Thomas S. Huang}, title = {Fusing Audio and Visual Features of Speech}, booktitle = {Proceedings of the 2000 International Conference on Image Processing, {ICIP} 2000, Vancouver, BC, Canada, September 10-13, 2000}, pages = {214--217}, publisher = {{IEEE}}, year = {2000}, url = {https://doi.org/10.1109/ICIP.2000.899333}, doi = {10.1109/ICIP.2000.899333}, timestamp = {Fri, 12 Jan 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icip/PanLH00.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icmcs/PanLH00, author = {Hao Pan and Zhi{-}Pei Liang and Thomas S. Huang}, title = {A New Approach to Integrate Audio and Visual Features of Speech}, booktitle = {2000 {IEEE} International Conference on Multimedia and Expo, {ICME} 2000, New York, NY, USA, July 30 - August 2, 2000}, pages = {1093--1096}, publisher = {{IEEE} Computer Society}, year = {2000}, url = {https://doi.org/10.1109/ICME.2000.871551}, doi = {10.1109/ICME.2000.871551}, timestamp = {Fri, 12 Jan 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icmcs/PanLH00.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/avsp/NiyogiPZ99, author = {Partha Niyogi and Eric Petajan and Jialin Zhong}, editor = {Dominic W. Massaro}, title = {Feature based representation for audio-visual speech recognition}, booktitle = {Auditory-Visual Speech Processing, {AVSP} '99, Santa Cruz, CA, USA, August 7-10, 1999}, pages = {16}, publisher = {{ISCA}}, year = {1999}, url = {http://www.isca-speech.org/archive\_open/avsp99/av99\_016.html}, timestamp = {Wed, 10 Feb 2021 22:00:51 +0100}, biburl = {https://dblp.org/rec/conf/avsp/NiyogiPZ99.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icip/Pavlovic98, author = {Vladimir Pavlovic}, title = {Multimodal Tracking and Classification of Audio-Visual Features}, booktitle = {Proceedings of the 1998 {IEEE} International Conference on Image Processing, ICIP-98, Chicago, Illinois, USA, October 4-7, 1998}, pages = {343--347}, publisher = {{IEEE} Computer Society}, year = {1998}, url = {https://doi.org/10.1109/ICIP.1998.723492}, doi = {10.1109/ICIP.1998.723492}, timestamp = {Fri, 24 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icip/Pavlovic98.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
manage site settings
To protect your privacy, all features that rely on external API calls from your browser are turned off by default. You need to opt-in for them to become active. All settings here will be stored as cookies with your web browser. For more information see our F.A.Q.