Search dblp for Publications

export results for "Xizhou Zhu"

 download as .bib file

@inproceedings{DBLP:conf/cvpr/ChenWW0CXZZZLLL24,
  author       = {Zhe Chen and
                  Jiannan Wu and
                  Wenhai Wang and
                  Weijie Su and
                  Guo Chen and
                  Sen Xing and
                  Muyan Zhong and
                  Qinglong Zhang and
                  Xizhou Zhu and
                  Lewei Lu and
                  Bin Li and
                  Ping Luo and
                  Tong Lu and
                  Yu Qiao and
                  Jifeng Dai},
  title        = {Intern {VL:} Scaling up Vision Foundation Models and Aligning for
                  Generic Visual-Linguistic Tasks},
  booktitle    = {{IEEE/CVF} Conference on Computer Vision and Pattern Recognition,
                  {CVPR} 2024, Seattle, WA, USA, June 16-22, 2024},
  pages        = {24185--24198},
  publisher    = {{IEEE}},
  year         = {2024},
  url          = {https://doi.org/10.1109/CVPR52733.2024.02283},
  doi          = {10.1109/CVPR52733.2024.02283},
  timestamp    = {Mon, 07 Oct 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/cvpr/ChenWW0CXZZZLLL24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cvpr/Li0WZZQWLLD24,
  author       = {Hao Li and
                  Xue Yang and
                  Zhaokai Wang and
                  Xizhou Zhu and
                  Jie Zhou and
                  Yu Qiao and
                  Xiaogang Wang and
                  Hongsheng Li and
                  Lewei Lu and
                  Jifeng Dai},
  title        = {Auto MC-Reward: Automated Dense Reward Design with Large Language
                  Models for Minecraft},
  booktitle    = {{IEEE/CVF} Conference on Computer Vision and Pattern Recognition,
                  {CVPR} 2024, Seattle, WA, USA, June 16-22, 2024},
  pages        = {16426--16435},
  publisher    = {{IEEE}},
  year         = {2024},
  url          = {https://doi.org/10.1109/CVPR52733.2024.01554},
  doi          = {10.1109/CVPR52733.2024.01554},
  timestamp    = {Mon, 07 Oct 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/cvpr/Li0WZZQWLLD24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cvpr/XiongLCWZLWL00L24,
  author       = {Yuwen Xiong and
                  Zhiqi Li and
                  Yuntao Chen and
                  Feng Wang and
                  Xizhou Zhu and
                  Jiapeng Luo and
                  Wenhai Wang and
                  Tong Lu and
                  Hongsheng Li and
                  Yu Qiao and
                  Lewei Lu and
                  Jie Zhou and
                  Jifeng Dai},
  title        = {Efficient Deformable ConvNets: Rethinking Dynamic and Sparse Operator
                  for Vision Applications},
  booktitle    = {{IEEE/CVF} Conference on Computer Vision and Pattern Recognition,
                  {CVPR} 2024, Seattle, WA, USA, June 16-22, 2024},
  pages        = {5652--5661},
  publisher    = {{IEEE}},
  year         = {2024},
  url          = {https://doi.org/10.1109/CVPR52733.2024.00540},
  doi          = {10.1109/CVPR52733.2024.00540},
  timestamp    = {Fri, 04 Oct 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/cvpr/XiongLCWZLWL00L24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/TianTD0LL00HZ24,
  author       = {Changyao Tian and
                  Chenxin Tao and
                  Jifeng Dai and
                  Hao Li and
                  Ziheng Li and
                  Lewei Lu and
                  Xiaogang Wang and
                  Hongsheng Li and
                  Gao Huang and
                  Xizhou Zhu},
  title        = {{ADDP:} Learning General Representations for Image Recognition and
                  Generation with Alternating Denoising Diffusion Process},
  booktitle    = {The Twelfth International Conference on Learning Representations,
                  {ICLR} 2024, Vienna, Austria, May 7-11, 2024},
  publisher    = {OpenReview.net},
  year         = {2024},
  url          = {https://openreview.net/forum?id=cMPm8YFXZe},
  timestamp    = {Tue, 20 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/TianTD0LL00HZ24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/Wang0LWHXCLZ0CL24,
  author       = {Weiyun Wang and
                  Min Shi and
                  Qingyun Li and
                  Wenhai Wang and
                  Zhenhang Huang and
                  Linjie Xing and
                  Zhe Chen and
                  Hao Li and
                  Xizhou Zhu and
                  Zhiguo Cao and
                  Yushi Chen and
                  Tong Lu and
                  Jifeng Dai and
                  Yu Qiao},
  title        = {The All-Seeing Project: Towards Panoptic Visual Recognition and Understanding
                  of the Open World},
  booktitle    = {The Twelfth International Conference on Learning Representations,
                  {ICLR} 2024, Vienna, Austria, May 7-11, 2024},
  publisher    = {OpenReview.net},
  year         = {2024},
  url          = {https://openreview.net/forum?id=c2R7ajodcI},
  timestamp    = {Thu, 08 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/Wang0LWHXCLZ0CL24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2401-06197,
  author       = {Yuwen Xiong and
                  Zhiqi Li and
                  Yuntao Chen and
                  Feng Wang and
                  Xizhou Zhu and
                  Jiapeng Luo and
                  Wenhai Wang and
                  Tong Lu and
                  Hongsheng Li and
                  Yu Qiao and
                  Lewei Lu and
                  Jie Zhou and
                  Jifeng Dai},
  title        = {Efficient Deformable ConvNets: Rethinking Dynamic and Sparse Operator
                  for Vision Applications},
  journal      = {CoRR},
  volume       = {abs/2401.06197},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2401.06197},
  doi          = {10.48550/ARXIV.2401.06197},
  eprinttype    = {arXiv},
  eprint       = {2401.06197},
  timestamp    = {Thu, 25 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2401-06197.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2401-08232,
  author       = {Chongzhi Zhang and
                  Mingyuan Zhang and
                  Zhiyang Teng and
                  Jiayi Li and
                  Xizhou Zhu and
                  Lewei Lu and
                  Ziwei Liu and
                  Aixin Sun},
  title        = {Multi-scale 2D Temporal Map Diffusion Models for Natural Language
                  Video Localization},
  journal      = {CoRR},
  volume       = {abs/2401.08232},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2401.08232},
  doi          = {10.48550/ARXIV.2401.08232},
  eprinttype    = {arXiv},
  eprint       = {2401.08232},
  timestamp    = {Thu, 01 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2401-08232.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2401-10208,
  author       = {Changyao Tian and
                  Xizhou Zhu and
                  Yuwen Xiong and
                  Weiyun Wang and
                  Zhe Chen and
                  Wenhai Wang and
                  Yuntao Chen and
                  Lewei Lu and
                  Tong Lu and
                  Jie Zhou and
                  Hongsheng Li and
                  Yu Qiao and
                  Jifeng Dai},
  title        = {MM-Interleaved: Interleaved Image-Text Generative Modeling via Multi-modal
                  Feature Synchronizer},
  journal      = {CoRR},
  volume       = {abs/2401.10208},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2401.10208},
  doi          = {10.48550/ARXIV.2401.10208},
  eprinttype    = {arXiv},
  eprint       = {2401.10208},
  timestamp    = {Sun, 21 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2401-10208.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2402-19474,
  author       = {Weiyun Wang and
                  Yiming Ren and
                  Haowen Luo and
                  Tiantong Li and
                  Chenxiang Yan and
                  Zhe Chen and
                  Wenhai Wang and
                  Qingyun Li and
                  Lewei Lu and
                  Xizhou Zhu and
                  Yu Qiao and
                  Jifeng Dai},
  title        = {The All-Seeing Project {V2:} Towards General Relation Comprehension
                  of the Open World},
  journal      = {CoRR},
  volume       = {abs/2402.19474},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2402.19474},
  doi          = {10.48550/ARXIV.2402.19474},
  eprinttype    = {arXiv},
  eprint       = {2402.19474},
  timestamp    = {Mon, 03 Jun 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2402-19474.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2403-02308,
  author       = {Yuchen Duan and
                  Weiyun Wang and
                  Zhe Chen and
                  Xizhou Zhu and
                  Lewei Lu and
                  Tong Lu and
                  Yu Qiao and
                  Hongsheng Li and
                  Jifeng Dai and
                  Wenhai Wang},
  title        = {Vision-RWKV: Efficient and Scalable Visual Perception with RWKV-Like
                  Architectures},
  journal      = {CoRR},
  volume       = {abs/2403.02308},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2403.02308},
  doi          = {10.48550/ARXIV.2403.02308},
  eprinttype    = {arXiv},
  eprint       = {2403.02308},
  timestamp    = {Fri, 19 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2403-02308.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2404-16821,
  author       = {Zhe Chen and
                  Weiyun Wang and
                  Hao Tian and
                  Shenglong Ye and
                  Zhangwei Gao and
                  Erfei Cui and
                  Wenwen Tong and
                  Kongzhi Hu and
                  Jiapeng Luo and
                  Zheng Ma and
                  Ji Ma and
                  Jiaqi Wang and
                  Xiaoyi Dong and
                  Hang Yan and
                  Hewei Guo and
                  Conghui He and
                  Botian Shi and
                  Zhenjiang Jin and
                  Chao Xu and
                  Bin Wang and
                  Xingjian Wei and
                  Wei Li and
                  Wenjian Zhang and
                  Bo Zhang and
                  Pinlong Cai and
                  Licheng Wen and
                  Xiangchao Yan and
                  Min Dou and
                  Lewei Lu and
                  Xizhou Zhu and
                  Tong Lu and
                  Dahua Lin and
                  Yu Qiao and
                  Jifeng Dai and
                  Wenhai Wang},
  title        = {How Far Are We to GPT-4V? Closing the Gap to Commercial Multimodal
                  Models with Open-Source Suites},
  journal      = {CoRR},
  volume       = {abs/2404.16821},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2404.16821},
  doi          = {10.48550/ARXIV.2404.16821},
  eprinttype    = {arXiv},
  eprint       = {2404.16821},
  timestamp    = {Sat, 17 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2404-16821.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2406-04330,
  author       = {Xizhou Zhu and
                  Xue Yang and
                  Zhaokai Wang and
                  Hao Li and
                  Wenhan Dou and
                  Junqi Ge and
                  Lewei Lu and
                  Yu Qiao and
                  Jifeng Dai},
  title        = {Parameter-Inverted Image Pyramid Networks},
  journal      = {CoRR},
  volume       = {abs/2406.04330},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2406.04330},
  doi          = {10.48550/ARXIV.2406.04330},
  eprinttype    = {arXiv},
  eprint       = {2406.04330},
  timestamp    = {Tue, 06 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2406-04330.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2406-04342,
  author       = {Chenxin Tao and
                  Xizhou Zhu and
                  Shiqian Su and
                  Lewei Lu and
                  Changyao Tian and
                  Xuan Luo and
                  Gao Huang and
                  Hongsheng Li and
                  Yu Qiao and
                  Jie Zhou and
                  Jifeng Dai},
  title        = {Learning 1D Causal Visual Representation with De-focus Attention Networks},
  journal      = {CoRR},
  volume       = {abs/2406.04342},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2406.04342},
  doi          = {10.48550/ARXIV.2406.04342},
  eprinttype    = {arXiv},
  eprint       = {2406.04342},
  timestamp    = {Tue, 20 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2406-04342.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2406-07230,
  author       = {Weiyun Wang and
                  Shuibo Zhang and
                  Yiming Ren and
                  Yuchen Duan and
                  Tiantong Li and
                  Shuo Liu and
                  Mengkang Hu and
                  Zhe Chen and
                  Kaipeng Zhang and
                  Lewei Lu and
                  Xizhou Zhu and
                  Ping Luo and
                  Yu Qiao and
                  Jifeng Dai and
                  Wenqi Shao and
                  Wenhai Wang},
  title        = {Needle In {A} Multimodal Haystack},
  journal      = {CoRR},
  volume       = {abs/2406.07230},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2406.07230},
  doi          = {10.48550/ARXIV.2406.07230},
  eprinttype    = {arXiv},
  eprint       = {2406.07230},
  timestamp    = {Mon, 22 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2406-07230.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2406-07543,
  author       = {Chenyu Yang and
                  Xizhou Zhu and
                  Jinguo Zhu and
                  Weijie Su and
                  Junjie Wang and
                  Xuan Dong and
                  Wenhai Wang and
                  Lewei Lu and
                  Bin Li and
                  Jie Zhou and
                  Yu Qiao and
                  Jifeng Dai},
  title        = {Vision Model Pre-training on Interleaved Image-Text Data via Latent
                  Compression Learning},
  journal      = {CoRR},
  volume       = {abs/2406.07543},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2406.07543},
  doi          = {10.48550/ARXIV.2406.07543},
  eprinttype    = {arXiv},
  eprint       = {2406.07543},
  timestamp    = {Mon, 29 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2406-07543.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2406-08394,
  author       = {Jiannan Wu and
                  Muyan Zhong and
                  Sen Xing and
                  Zeqiang Lai and
                  Zhaoyang Liu and
                  Wenhai Wang and
                  Zhe Chen and
                  Xizhou Zhu and
                  Lewei Lu and
                  Tong Lu and
                  Ping Luo and
                  Yu Qiao and
                  Jifeng Dai},
  title        = {VisionLLM v2: An End-to-End Generalist Multimodal Large Language Model
                  for Hundreds of Vision-Language Tasks},
  journal      = {CoRR},
  volume       = {abs/2406.08394},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2406.08394},
  doi          = {10.48550/ARXIV.2406.08394},
  eprinttype    = {arXiv},
  eprint       = {2406.08394},
  timestamp    = {Tue, 23 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2406-08394.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2406-08418,
  author       = {Qingyun Li and
                  Zhe Chen and
                  Weiyun Wang and
                  Wenhai Wang and
                  Shenglong Ye and
                  Zhenjiang Jin and
                  Guanzhou Chen and
                  Yinan He and
                  Zhangwei Gao and
                  Erfei Cui and
                  Jiashuo Yu and
                  Hao Tian and
                  Jiasheng Zhou and
                  Chao Xu and
                  Bin Wang and
                  Xingjian Wei and
                  Wei Li and
                  Wenjian Zhang and
                  Bo Zhang and
                  Pinlong Cai and
                  Licheng Wen and
                  Xiangchao Yan and
                  Zhenxiang Li and
                  Pei Chu and
                  Yi Wang and
                  Min Dou and
                  Changyao Tian and
                  Xizhou Zhu and
                  Lewei Lu and
                  Yushi Chen and
                  Junjun He and
                  Zhongying Tu and
                  Tong Lu and
                  Yali Wang and
                  Limin Wang and
                  Dahua Lin and
                  Yu Qiao and
                  Botian Shi and
                  Conghui He and
                  Jifeng Dai},
  title        = {OmniCorpus: {A} Unified Multimodal Corpus of 10 Billion-Level Images
                  Interleaved with Text},
  journal      = {CoRR},
  volume       = {abs/2406.08418},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2406.08418},
  doi          = {10.48550/ARXIV.2406.08418},
  eprinttype    = {arXiv},
  eprint       = {2406.08418},
  timestamp    = {Mon, 02 Sep 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2406-08418.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2407-06597,
  author       = {Renjie Liang and
                  Li Li and
                  Chongzhi Zhang and
                  Jing Wang and
                  Xizhou Zhu and
                  Aixin Sun},
  title        = {TVR-Ranking: {A} Dataset for Ranked Video Moment Retrieval with Imprecise
                  Queries},
  journal      = {CoRR},
  volume       = {abs/2407.06597},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2407.06597},
  doi          = {10.48550/ARXIV.2407.06597},
  eprinttype    = {arXiv},
  eprint       = {2407.06597},
  timestamp    = {Sun, 18 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2407-06597.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2407-15838,
  author       = {Yangzhou Liu and
                  Yue Cao and
                  Zhangwei Gao and
                  Weiyun Wang and
                  Zhe Chen and
                  Wenhai Wang and
                  Hao Tian and
                  Lewei Lu and
                  Xizhou Zhu and
                  Tong Lu and
                  Yu Qiao and
                  Jifeng Dai},
  title        = {MMInstruct: {A} High-Quality Multi-Modal Instruction Tuning Dataset
                  with Extensive Diversity},
  journal      = {CoRR},
  volume       = {abs/2407.15838},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2407.15838},
  doi          = {10.48550/ARXIV.2407.15838},
  eprinttype    = {arXiv},
  eprint       = {2407.15838},
  timestamp    = {Mon, 19 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2407-15838.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2408-02718,
  author       = {Fanqing Meng and
                  Jin Wang and
                  Chuanhao Li and
                  Quanfeng Lu and
                  Hao Tian and
                  Jiaqi Liao and
                  Xizhou Zhu and
                  Jifeng Dai and
                  Yu Qiao and
                  Ping Luo and
                  Kaipeng Zhang and
                  Wenqi Shao},
  title        = {{MMIU:} Multimodal Multi-image Understanding for Evaluating Large
                  Vision-Language Models},
  journal      = {CoRR},
  volume       = {abs/2408.02718},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2408.02718},
  doi          = {10.48550/ARXIV.2408.02718},
  eprinttype    = {arXiv},
  eprint       = {2408.02718},
  timestamp    = {Thu, 12 Sep 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2408-02718.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cvpr/0002ZTLLHQWZD23,
  author       = {Weijie Su and
                  Xizhou Zhu and
                  Chenxin Tao and
                  Lewei Lu and
                  Bin Li and
                  Gao Huang and
                  Yu Qiao and
                  Xiaogang Wang and
                  Jie Zhou and
                  Jifeng Dai},
  title        = {Towards All-in-One Pre-Training via Maximizing Multi-Modal Mutual
                  Information},
  booktitle    = {{IEEE/CVF} Conference on Computer Vision and Pattern Recognition,
                  {CVPR} 2023, Vancouver, BC, Canada, June 17-24, 2023},
  pages        = {15888--15899},
  publisher    = {{IEEE}},
  year         = {2023},
  url          = {https://doi.org/10.1109/CVPR52729.2023.01525},
  doi          = {10.1109/CVPR52729.2023.01525},
  timestamp    = {Tue, 20 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/cvpr/0002ZTLLHQWZD23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cvpr/HuYCLSZCDLWLJLD23,
  author       = {Yihan Hu and
                  Jiazhi Yang and
                  Li Chen and
                  Keyu Li and
                  Chonghao Sima and
                  Xizhou Zhu and
                  Siqi Chai and
                  Senyao Du and
                  Tianwei Lin and
                  Wenhai Wang and
                  Lewei Lu and
                  Xiaosong Jia and
                  Qiang Liu and
                  Jifeng Dai and
                  Yu Qiao and
                  Hongyang Li},
  title        = {Planning-oriented Autonomous Driving},
  booktitle    = {{IEEE/CVF} Conference on Computer Vision and Pattern Recognition,
                  {CVPR} 2023, Vancouver, BC, Canada, June 17-24, 2023},
  pages        = {17853--17862},
  publisher    = {{IEEE}},
  year         = {2023},
  url          = {https://doi.org/10.1109/CVPR52729.2023.01712},
  doi          = {10.1109/CVPR52729.2023.01712},
  timestamp    = {Tue, 13 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/cvpr/HuYCLSZCDLWLJLD23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cvpr/LiZJZLYWQWWD23,
  author       = {Hao Li and
                  Jinguo Zhu and
                  Xiaohu Jiang and
                  Xizhou Zhu and
                  Hongsheng Li and
                  Chun Yuan and
                  Xiaohua Wang and
                  Yu Qiao and
                  Xiaogang Wang and
                  Wenhai Wang and
                  Jifeng Dai},
  title        = {Uni-Perceiver v2: {A} Generalist Model for Large-Scale Vision and
                  Vision-Language Tasks},
  booktitle    = {{IEEE/CVF} Conference on Computer Vision and Pattern Recognition,
                  {CVPR} 2023, Vancouver, BC, Canada, June 17-24, 2023},
  pages        = {2691--2700},
  publisher    = {{IEEE}},
  year         = {2023},
  url          = {https://doi.org/10.1109/CVPR52729.2023.00264},
  doi          = {10.1109/CVPR52729.2023.00264},
  timestamp    = {Sun, 04 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/cvpr/LiZJZLYWQWWD23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cvpr/TaoZ0HLZ00D23,
  author       = {Chenxin Tao and
                  Xizhou Zhu and
                  Weijie Su and
                  Gao Huang and
                  Bin Li and
                  Jie Zhou and
                  Yu Qiao and
                  Xiaogang Wang and
                  Jifeng Dai},
  title        = {Siamese Image Modeling for Self-Supervised Vision Representation Learning},
  booktitle    = {{IEEE/CVF} Conference on Computer Vision and Pattern Recognition,
                  {CVPR} 2023, Vancouver, BC, Canada, June 17-24, 2023},
  pages        = {2132--2141},
  publisher    = {{IEEE}},
  year         = {2023},
  url          = {https://doi.org/10.1109/CVPR52729.2023.00212},
  doi          = {10.1109/CVPR52729.2023.00212},
  timestamp    = {Tue, 20 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/cvpr/TaoZ0HLZ00D23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cvpr/WangDCHLZHLLLWQ23,
  author       = {Wenhai Wang and
                  Jifeng Dai and
                  Zhe Chen and
                  Zhenhang Huang and
                  Zhiqi Li and
                  Xizhou Zhu and
                  Xiaowei Hu and
                  Tong Lu and
                  Lewei Lu and
                  Hongsheng Li and
                  Xiaogang Wang and
                  Yu Qiao},
  title        = {InternImage: Exploring Large-Scale Vision Foundation Models with Deformable
                  Convolutions},
  booktitle    = {{IEEE/CVF} Conference on Computer Vision and Pattern Recognition,
                  {CVPR} 2023, Vancouver, BC, Canada, June 17-24, 2023},
  pages        = {14408--14419},
  publisher    = {{IEEE}},
  year         = {2023},
  url          = {https://doi.org/10.1109/CVPR52729.2023.01385},
  doi          = {10.1109/CVPR52729.2023.01385},
  timestamp    = {Sun, 04 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/cvpr/WangDCHLZHLLLWQ23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cvpr/YangCTTZZHLQLZD23,
  author       = {Chenyu Yang and
                  Yuntao Chen and
                  Hao Tian and
                  Chenxin Tao and
                  Xizhou Zhu and
                  Zhaoxiang Zhang and
                  Gao Huang and
                  Hongyang Li and
                  Yu Qiao and
                  Lewei Lu and
                  Jie Zhou and
                  Jifeng Dai},
  title        = {BEVFormer v2: Adapting Modern Image Backbones to Bird's-Eye-View Recognition
                  via Perspective Supervision},
  booktitle    = {{IEEE/CVF} Conference on Computer Vision and Pattern Recognition,
                  {CVPR} 2023, Vancouver, BC, Canada, June 17-24, 2023},
  pages        = {17830--17839},
  publisher    = {{IEEE}},
  year         = {2023},
  url          = {https://doi.org/10.1109/CVPR52729.2023.01710},
  doi          = {10.1109/CVPR52729.2023.01710},
  timestamp    = {Tue, 20 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/cvpr/YangCTTZZHLQLZD23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/WangCCWZZLLZQD23,
  author       = {Wenhai Wang and
                  Zhe Chen and
                  Xiaokang Chen and
                  Jiannan Wu and
                  Xizhou Zhu and
                  Gang Zeng and
                  Ping Luo and
                  Tong Lu and
                  Jie Zhou and
                  Yu Qiao and
                  Jifeng Dai},
  editor       = {Alice Oh and
                  Tristan Naumann and
                  Amir Globerson and
                  Kate Saenko and
                  Moritz Hardt and
                  Sergey Levine},
  title        = {VisionLLM: Large Language Model is also an Open-Ended Decoder for
                  Vision-Centric Tasks},
  booktitle    = {Advances in Neural Information Processing Systems 36: Annual Conference
                  on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans,
                  LA, USA, December 10 - 16, 2023},
  year         = {2023},
  url          = {http://papers.nips.cc/paper\_files/paper/2023/hash/c1f7b1ed763e9c75e4db74b49b76db5f-Abstract-Conference.html},
  timestamp    = {Thu, 25 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/WangCCWZZLLZQD23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2305-05662,
  author       = {Zhaoyang Liu and
                  Yinan He and
                  Wenhai Wang and
                  Weiyun Wang and
                  Yi Wang and
                  Shoufa Chen and
                  Qinglong Zhang and
                  Zeqiang Lai and
                  Yang Yang and
                  Qingyun Li and
                  Jiashuo Yu and
                  Kunchang Li and
                  Zhe Chen and
                  Xue Yang and
                  Xizhou Zhu and
                  Yali Wang and
                  Limin Wang and
                  Ping Luo and
                  Jifeng Dai and
                  Yu Qiao},
  title        = {InternGPT: Solving Vision-Centric Tasks by Interacting with Chatbots
                  Beyond Language},
  journal      = {CoRR},
  volume       = {abs/2305.05662},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2305.05662},
  doi          = {10.48550/ARXIV.2305.05662},
  eprinttype    = {arXiv},
  eprint       = {2305.05662},
  timestamp    = {Mon, 02 Sep 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2305-05662.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2305-11175,
  author       = {Wenhai Wang and
                  Zhe Chen and
                  Xiaokang Chen and
                  Jiannan Wu and
                  Xizhou Zhu and
                  Gang Zeng and
                  Ping Luo and
                  Tong Lu and
                  Jie Zhou and
                  Yu Qiao and
                  Jifeng Dai},
  title        = {VisionLLM: Large Language Model is also an Open-Ended Decoder for
                  Vision-Centric Tasks},
  journal      = {CoRR},
  volume       = {abs/2305.11175},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2305.11175},
  doi          = {10.48550/ARXIV.2305.11175},
  eprinttype    = {arXiv},
  eprint       = {2305.11175},
  timestamp    = {Mon, 03 Jun 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2305-11175.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2305-17144,
  author       = {Xizhou Zhu and
                  Yuntao Chen and
                  Hao Tian and
                  Chenxin Tao and
                  Weijie Su and
                  Chenyu Yang and
                  Gao Huang and
                  Bin Li and
                  Lewei Lu and
                  Xiaogang Wang and
                  Yu Qiao and
                  Zhaoxiang Zhang and
                  Jifeng Dai},
  title        = {Ghost in the Minecraft: Generally Capable Agents for Open-World Environments
                  via Large Language Models with Text-based Knowledge and Memory},
  journal      = {CoRR},
  volume       = {abs/2305.17144},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2305.17144},
  doi          = {10.48550/ARXIV.2305.17144},
  eprinttype    = {arXiv},
  eprint       = {2305.17144},
  timestamp    = {Tue, 20 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2305-17144.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2306-05423,
  author       = {Changyao Tian and
                  Chenxin Tao and
                  Jifeng Dai and
                  Hao Li and
                  Ziheng Li and
                  Lewei Lu and
                  Xiaogang Wang and
                  Hongsheng Li and
                  Gao Huang and
                  Xizhou Zhu},
  title        = {{ADDP:} Learning General Representations for Image Recognition and
                  Generation with Alternating Denoising Diffusion Process},
  journal      = {CoRR},
  volume       = {abs/2306.05423},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2306.05423},
  doi          = {10.48550/ARXIV.2306.05423},
  eprinttype    = {arXiv},
  eprint       = {2306.05423},
  timestamp    = {Tue, 20 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2306-05423.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2308-01907,
  author       = {Weiyun Wang and
                  Min Shi and
                  Qingyun Li and
                  Wenhai Wang and
                  Zhenhang Huang and
                  Linjie Xing and
                  Zhe Chen and
                  Hao Li and
                  Xizhou Zhu and
                  Zhiguo Cao and
                  Yushi Chen and
                  Tong Lu and
                  Jifeng Dai and
                  Yu Qiao},
  title        = {The All-Seeing Project: Towards Panoptic Visual Recognition and Understanding
                  of the Open World},
  journal      = {CoRR},
  volume       = {abs/2308.01907},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2308.01907},
  doi          = {10.48550/ARXIV.2308.01907},
  eprinttype    = {arXiv},
  eprint       = {2308.01907},
  timestamp    = {Tue, 06 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2308-01907.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2310-07653,
  author       = {Zeqiang Lai and
                  Xizhou Zhu and
                  Jifeng Dai and
                  Yu Qiao and
                  Wenhai Wang},
  title        = {Mini-DALLE3: Interactive Text to Image by Prompting Large Language
                  Models},
  journal      = {CoRR},
  volume       = {abs/2310.07653},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2310.07653},
  doi          = {10.48550/ARXIV.2310.07653},
  eprinttype    = {arXiv},
  eprint       = {2310.07653},
  timestamp    = {Wed, 24 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2310-07653.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2310-17796,
  author       = {Zhaoyang Liu and
                  Zeqiang Lai and
                  Zhangwei Gao and
                  Erfei Cui and
                  Zhiheng Li and
                  Xizhou Zhu and
                  Lewei Lu and
                  Qifeng Chen and
                  Yu Qiao and
                  Jifeng Dai and
                  Wenhai Wang},
  title        = {ControlLLM: Augment Language Models with Tools by Searching on Graphs},
  journal      = {CoRR},
  volume       = {abs/2310.17796},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2310.17796},
  doi          = {10.48550/ARXIV.2310.17796},
  eprinttype    = {arXiv},
  eprint       = {2310.17796},
  timestamp    = {Mon, 03 Jun 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2310-17796.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2312-09238,
  author       = {Hao Li and
                  Xue Yang and
                  Zhaokai Wang and
                  Xizhou Zhu and
                  Jie Zhou and
                  Yu Qiao and
                  Xiaogang Wang and
                  Hongsheng Li and
                  Lewei Lu and
                  Jifeng Dai},
  title        = {Auto MC-Reward: Automated Dense Reward Design with Large Language
                  Models for Minecraft},
  journal      = {CoRR},
  volume       = {abs/2312.09238},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2312.09238},
  doi          = {10.48550/ARXIV.2312.09238},
  eprinttype    = {arXiv},
  eprint       = {2312.09238},
  timestamp    = {Tue, 06 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2312-09238.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2312-09245,
  author       = {Wenhai Wang and
                  Jiangwei Xie and
                  Chuanyang Hu and
                  Haoming Zou and
                  Jianan Fan and
                  Wenwen Tong and
                  Yang Wen and
                  Silei Wu and
                  Hanming Deng and
                  Zhiqi Li and
                  Hao Tian and
                  Lewei Lu and
                  Xizhou Zhu and
                  Xiaogang Wang and
                  Yu Qiao and
                  Jifeng Dai},
  title        = {DriveMLM: Aligning Multi-Modal Large Language Models with Behavioral
                  Planning States for Autonomous Driving},
  journal      = {CoRR},
  volume       = {abs/2312.09245},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2312.09245},
  doi          = {10.48550/ARXIV.2312.09245},
  eprinttype    = {arXiv},
  eprint       = {2312.09245},
  timestamp    = {Wed, 24 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2312-09245.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2312-14238,
  author       = {Zhe Chen and
                  Jiannan Wu and
                  Wenhai Wang and
                  Weijie Su and
                  Guo Chen and
                  Sen Xing and
                  Muyan Zhong and
                  Qinglong Zhang and
                  Xizhou Zhu and
                  Lewei Lu and
                  Bin Li and
                  Ping Luo and
                  Tong Lu and
                  Yu Qiao and
                  Jifeng Dai},
  title        = {InternVL: Scaling up Vision Foundation Models and Aligning for Generic
                  Visual-Linguistic Tasks},
  journal      = {CoRR},
  volume       = {abs/2312.14238},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2312.14238},
  doi          = {10.48550/ARXIV.2312.14238},
  eprinttype    = {arXiv},
  eprint       = {2312.14238},
  timestamp    = {Thu, 25 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2312-14238.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cvpr/LiFDLHZ22,
  author       = {Hao Li and
                  Tianwen Fu and
                  Jifeng Dai and
                  Hongsheng Li and
                  Gao Huang and
                  Xizhou Zhu},
  title        = {AutoLoss-Zero: Searching Loss Functions from Scratch for Generic Tasks},
  booktitle    = {{IEEE/CVF} Conference on Computer Vision and Pattern Recognition,
                  {CVPR} 2022, New Orleans, LA, USA, June 18-24, 2022},
  pages        = {999--1008},
  publisher    = {{IEEE}},
  year         = {2022},
  url          = {https://doi.org/10.1109/CVPR52688.2022.00108},
  doi          = {10.1109/CVPR52688.2022.00108},
  timestamp    = {Tue, 20 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/cvpr/LiFDLHZ22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cvpr/TaoWZDSHD22,
  author       = {Chenxin Tao and
                  Honghui Wang and
                  Xizhou Zhu and
                  Jiahua Dong and
                  Shiji Song and
                  Gao Huang and
                  Jifeng Dai},
  title        = {Exploring the Equivalence of Siamese Self-Supervised Learning via
                  {A} Unified Gradient Framework},
  booktitle    = {{IEEE/CVF} Conference on Computer Vision and Pattern Recognition,
                  {CVPR} 2022, New Orleans, LA, USA, June 18-24, 2022},
  pages        = {14411--14420},
  publisher    = {{IEEE}},
  year         = {2022},
  url          = {https://doi.org/10.1109/CVPR52688.2022.01403},
  doi          = {10.1109/CVPR52688.2022.01403},
  timestamp    = {Tue, 20 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/cvpr/TaoWZDSHD22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cvpr/ZhuZLWLWD22,
  author       = {Xizhou Zhu and
                  Jinguo Zhu and
                  Hao Li and
                  Xiaoshi Wu and
                  Hongsheng Li and
                  Xiaohua Wang and
                  Jifeng Dai},
  title        = {Uni-Perceiver: Pre-training Unified Architecture for Generic Perception
                  for Zero-shot and Few-shot Tasks},
  booktitle    = {{IEEE/CVF} Conference on Computer Vision and Pattern Recognition,
                  {CVPR} 2022, New Orleans, LA, USA, June 18-24, 2022},
  pages        = {16783--16794},
  publisher    = {{IEEE}},
  year         = {2022},
  url          = {https://doi.org/10.1109/CVPR52688.2022.01630},
  doi          = {10.1109/CVPR52688.2022.01630},
  timestamp    = {Sun, 04 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/cvpr/ZhuZLWLWD22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/eccv/TianWZDQ22,
  author       = {Changyao Tian and
                  Wenhai Wang and
                  Xizhou Zhu and
                  Jifeng Dai and
                  Yu Qiao},
  editor       = {Shai Avidan and
                  Gabriel J. Brostow and
                  Moustapha Ciss{\'{e}} and
                  Giovanni Maria Farinella and
                  Tal Hassner},
  title        = {{VL-LTR:} Learning Class-wise Visual-Linguistic Representation for
                  Long-Tailed Visual Recognition},
  booktitle    = {Computer Vision - {ECCV} 2022 - 17th European Conference, Tel Aviv,
                  Israel, October 23-27, 2022, Proceedings, Part {XXV}},
  series       = {Lecture Notes in Computer Science},
  volume       = {13685},
  pages        = {73--91},
  publisher    = {Springer},
  year         = {2022},
  url          = {https://doi.org/10.1007/978-3-031-19806-9\_5},
  doi          = {10.1007/978-3-031-19806-9\_5},
  timestamp    = {Mon, 03 Jun 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/eccv/TianWZDQ22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/eccv/ZengJYGZDX22,
  author       = {Ailing Zeng and
                  Xuan Ju and
                  Lei Yang and
                  Ruiyuan Gao and
                  Xizhou Zhu and
                  Bo Dai and
                  Qiang Xu},
  editor       = {Shai Avidan and
                  Gabriel J. Brostow and
                  Moustapha Ciss{\'{e}} and
                  Giovanni Maria Farinella and
                  Tal Hassner},
  title        = {DeciWatch: {A} Simple Baseline for 10{\(^\times\)} Efficient 2D and
                  3D Pose Estimation},
  booktitle    = {Computer Vision - {ECCV} 2022 - 17th European Conference, Tel Aviv,
                  Israel, October 23-27, 2022, Proceedings, Part {V}},
  series       = {Lecture Notes in Computer Science},
  volume       = {13665},
  pages        = {607--624},
  publisher    = {Springer},
  year         = {2022},
  url          = {https://doi.org/10.1007/978-3-031-20065-6\_35},
  doi          = {10.1007/978-3-031-20065-6\_35},
  timestamp    = {Tue, 04 Jul 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/eccv/ZengJYGZDX22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/ZhuZWWLWD22,
  author       = {Jinguo Zhu and
                  Xizhou Zhu and
                  Wenhai Wang and
                  Xiaohua Wang and
                  Hongsheng Li and
                  Xiaogang Wang and
                  Jifeng Dai},
  editor       = {Sanmi Koyejo and
                  S. Mohamed and
                  A. Agarwal and
                  Danielle Belgrave and
                  K. Cho and
                  A. Oh},
  title        = {Uni-Perceiver-MoE: Learning Sparse Generalist Models with Conditional
                  MoEs},
  booktitle    = {Advances in Neural Information Processing Systems 35: Annual Conference
                  on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans,
                  LA, USA, November 28 - December 9, 2022},
  year         = {2022},
  url          = {http://papers.nips.cc/paper\_files/paper/2022/hash/11fc8c98b46d4cbdfe8157267228f7d7-Abstract-Conference.html},
  timestamp    = {Mon, 08 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/ZhuZWWLWD22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2203-08713,
  author       = {Ailing Zeng and
                  Xuan Ju and
                  Lei Yang and
                  Ruiyuan Gao and
                  Xizhou Zhu and
                  Bo Dai and
                  Qiang Xu},
  title        = {DeciWatch: {A} Simple Baseline for 10x Efficient 2D and 3D Pose Estimation},
  journal      = {CoRR},
  volume       = {abs/2203.08713},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2203.08713},
  doi          = {10.48550/ARXIV.2203.08713},
  eprinttype    = {arXiv},
  eprint       = {2203.08713},
  timestamp    = {Tue, 04 Jul 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2203-08713.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2206-01204,
  author       = {Chenxin Tao and
                  Xizhou Zhu and
                  Gao Huang and
                  Yu Qiao and
                  Xiaogang Wang and
                  Jifeng Dai},
  title        = {Siamese Image Modeling for Self-Supervised Vision Representation Learning},
  journal      = {CoRR},
  volume       = {abs/2206.01204},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2206.01204},
  doi          = {10.48550/ARXIV.2206.01204},
  eprinttype    = {arXiv},
  eprint       = {2206.01204},
  timestamp    = {Tue, 20 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2206-01204.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2206-04674,
  author       = {Jinguo Zhu and
                  Xizhou Zhu and
                  Wenhai Wang and
                  Xiaohua Wang and
                  Hongsheng Li and
                  Xiaogang Wang and
                  Jifeng Dai},
  title        = {Uni-Perceiver-MoE: Learning Sparse Generalist Models with Conditional
                  MoEs},
  journal      = {CoRR},
  volume       = {abs/2206.04674},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2206.04674},
  doi          = {10.48550/ARXIV.2206.04674},
  eprinttype    = {arXiv},
  eprint       = {2206.04674},
  timestamp    = {Fri, 03 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2206-04674.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2209-05324,
  author       = {Hongyang Li and
                  Chonghao Sima and
                  Jifeng Dai and
                  Wenhai Wang and
                  Lewei Lu and
                  Huijie Wang and
                  Enze Xie and
                  Zhiqi Li and
                  Hanming Deng and
                  Hao Tian and
                  Xizhou Zhu and
                  Li Chen and
                  Yulu Gao and
                  Xiangwei Geng and
                  Jia Zeng and
                  Yang Li and
                  Jiazhi Yang and
                  Xiaosong Jia and
                  Bohan Yu and
                  Yu Qiao and
                  Dahua Lin and
                  Si Liu and
                  Junchi Yan and
                  Jianping Shi and
                  Ping Luo},
  title        = {Delving into the Devils of Bird's-eye-view Perception: {A} Review,
                  Evaluation and Recipe},
  journal      = {CoRR},
  volume       = {abs/2209.05324},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2209.05324},
  doi          = {10.48550/ARXIV.2209.05324},
  eprinttype    = {arXiv},
  eprint       = {2209.05324},
  timestamp    = {Wed, 14 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2209-05324.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2211-05778,
  author       = {Wenhai Wang and
                  Jifeng Dai and
                  Zhe Chen and
                  Zhenhang Huang and
                  Zhiqi Li and
                  Xizhou Zhu and
                  Xiaowei Hu and
                  Tong Lu and
                  Lewei Lu and
                  Hongsheng Li and
                  Xiaogang Wang and
                  Yu Qiao},
  title        = {InternImage: Exploring Large-Scale Vision Foundation Models with Deformable
                  Convolutions},
  journal      = {CoRR},
  volume       = {abs/2211.05778},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2211.05778},
  doi          = {10.48550/ARXIV.2211.05778},
  eprinttype    = {arXiv},
  eprint       = {2211.05778},
  timestamp    = {Mon, 03 Jun 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2211-05778.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2211-05781,
  author       = {Jifeng Dai and
                  Min Shi and
                  Weiyun Wang and
                  Sitong Wu and
                  Linjie Xing and
                  Wenhai Wang and
                  Xizhou Zhu and
                  Lewei Lu and
                  Jie Zhou and
                  Xiaogang Wang and
                  Yu Qiao and
                  Xiaowei Hu},
  title        = {Demystify Transformers {\&} Convolutions in Modern Image Deep
                  Networks},
  journal      = {CoRR},
  volume       = {abs/2211.05781},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2211.05781},
  doi          = {10.48550/ARXIV.2211.05781},
  eprinttype    = {arXiv},
  eprint       = {2211.05781},
  timestamp    = {Mon, 29 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2211-05781.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2211-09807,
  author       = {Weijie Su and
                  Xizhou Zhu and
                  Chenxin Tao and
                  Lewei Lu and
                  Bin Li and
                  Gao Huang and
                  Yu Qiao and
                  Xiaogang Wang and
                  Jie Zhou and
                  Jifeng Dai},
  title        = {Towards All-in-one Pre-training via Maximizing Multi-modal Mutual
                  Information},
  journal      = {CoRR},
  volume       = {abs/2211.09807},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2211.09807},
  doi          = {10.48550/ARXIV.2211.09807},
  eprinttype    = {arXiv},
  eprint       = {2211.09807},
  timestamp    = {Tue, 20 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2211-09807.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2211-09808,
  author       = {Hao Li and
                  Jinguo Zhu and
                  Xiaohu Jiang and
                  Xizhou Zhu and
                  Hongsheng Li and
                  Chun Yuan and
                  Xiaohua Wang and
                  Yu Qiao and
                  Xiaogang Wang and
                  Wenhai Wang and
                  Jifeng Dai},
  title        = {Uni-Perceiver v2: {A} Generalist Model for Large-Scale Vision and
                  Vision-Language Tasks},
  journal      = {CoRR},
  volume       = {abs/2211.09808},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2211.09808},
  doi          = {10.48550/ARXIV.2211.09808},
  eprinttype    = {arXiv},
  eprint       = {2211.09808},
  timestamp    = {Mon, 03 Jun 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2211-09808.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2211-10439,
  author       = {Chenyu Yang and
                  Yuntao Chen and
                  Hao Tian and
                  Chenxin Tao and
                  Xizhou Zhu and
                  Zhaoxiang Zhang and
                  Gao Huang and
                  Hongyang Li and
                  Yu Qiao and
                  Lewei Lu and
                  Jie Zhou and
                  Jifeng Dai},
  title        = {BEVFormer v2: Adapting Modern Image Backbones to Bird's-Eye-View Recognition
                  via Perspective Supervision},
  journal      = {CoRR},
  volume       = {abs/2211.10439},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2211.10439},
  doi          = {10.48550/ARXIV.2211.10439},
  eprinttype    = {arXiv},
  eprint       = {2211.10439},
  timestamp    = {Tue, 20 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2211-10439.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2212-10156,
  author       = {Yihan Hu and
                  Jiazhi Yang and
                  Li Chen and
                  Keyu Li and
                  Chonghao Sima and
                  Xizhou Zhu and
                  Siqi Chai and
                  Senyao Du and
                  Tianwei Lin and
                  Wenhai Wang and
                  Lewei Lu and
                  Xiaosong Jia and
                  Qiang Liu and
                  Jifeng Dai and
                  Yu Qiao and
                  Hongyang Li},
  title        = {Goal-oriented Autonomous Driving},
  journal      = {CoRR},
  volume       = {abs/2212.10156},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2212.10156},
  doi          = {10.48550/ARXIV.2212.10156},
  eprinttype    = {arXiv},
  eprint       = {2212.10156},
  timestamp    = {Tue, 13 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2212-10156.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cvpr/TianCDZZ21,
  author       = {Hao Tian and
                  Yuntao Chen and
                  Jifeng Dai and
                  Zhaoxiang Zhang and
                  Xizhou Zhu},
  title        = {Unsupervised Object Detection With {LIDAR} Clues},
  booktitle    = {{IEEE} Conference on Computer Vision and Pattern Recognition, {CVPR}
                  2021, virtual, June 19-25, 2021},
  pages        = {5962--5972},
  publisher    = {Computer Vision Foundation / {IEEE}},
  year         = {2021},
  url          = {https://openaccess.thecvf.com/content/CVPR2021/html/Tian\_Unsupervised\_Object\_Detection\_With\_LIDAR\_Clues\_CVPR\_2021\_paper.html},
  doi          = {10.1109/CVPR46437.2021.00590},
  timestamp    = {Tue, 26 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/cvpr/TianCDZZ21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/LiTZWHD21,
  author       = {Hao Li and
                  Chenxin Tao and
                  Xizhou Zhu and
                  Xiaogang Wang and
                  Gao Huang and
                  Jifeng Dai},
  title        = {Auto Seg-Loss: Searching Metric Surrogates for Semantic Segmentation},
  booktitle    = {9th International Conference on Learning Representations, {ICLR} 2021,
                  Virtual Event, Austria, May 3-7, 2021},
  publisher    = {OpenReview.net},
  year         = {2021},
  url          = {https://openreview.net/forum?id=MJAqnaC2vO1},
  timestamp    = {Tue, 20 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/LiTZWHD21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/ZhuSLLWD21,
  author       = {Xizhou Zhu and
                  Weijie Su and
                  Lewei Lu and
                  Bin Li and
                  Xiaogang Wang and
                  Jifeng Dai},
  title        = {Deformable {DETR:} Deformable Transformers for End-to-End Object Detection},
  booktitle    = {9th International Conference on Learning Representations, {ICLR} 2021,
                  Virtual Event, Austria, May 3-7, 2021},
  publisher    = {OpenReview.net},
  year         = {2021},
  url          = {https://openreview.net/forum?id=gZ9hCDWe6ke},
  timestamp    = {Tue, 15 Nov 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/iclr/ZhuSLLWD21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/TaoLZHLD21,
  author       = {Chenxin Tao and
                  Zizhang Li and
                  Xizhou Zhu and
                  Gao Huang and
                  Yong Liu and
                  Jifeng Dai},
  editor       = {Marc'Aurelio Ranzato and
                  Alina Beygelzimer and
                  Yann N. Dauphin and
                  Percy Liang and
                  Jennifer Wortman Vaughan},
  title        = {Searching Parameterized {AP} Loss for Object Detection},
  booktitle    = {Advances in Neural Information Processing Systems 34: Annual Conference
                  on Neural Information Processing Systems 2021, NeurIPS 2021, December
                  6-14, 2021, virtual},
  pages        = {22021--22033},
  year         = {2021},
  url          = {https://proceedings.neurips.cc/paper/2021/hash/b9009beb804fa097c04d226a8ba5102e-Abstract.html},
  timestamp    = {Tue, 20 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/TaoLZHLD21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2103-14026,
  author       = {Hao Li and
                  Tianwen Fu and
                  Jifeng Dai and
                  Hongsheng Li and
                  Gao Huang and
                  Xizhou Zhu},
  title        = {AutoLoss-Zero: Searching Loss Functions from Scratch for Generic Tasks},
  journal      = {CoRR},
  volume       = {abs/2103.14026},
  year         = {2021},
  url          = {https://arxiv.org/abs/2103.14026},
  eprinttype    = {arXiv},
  eprint       = {2103.14026},
  timestamp    = {Tue, 20 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2103-14026.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2107-01151,
  author       = {Haiyang Wang and
                  Wenguan Wang and
                  Xizhou Zhu and
                  Jifeng Dai and
                  Liwei Wang},
  title        = {Collaborative Visual Navigation},
  journal      = {CoRR},
  volume       = {abs/2107.01151},
  year         = {2021},
  url          = {https://arxiv.org/abs/2107.01151},
  eprinttype    = {arXiv},
  eprint       = {2107.01151},
  timestamp    = {Tue, 12 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2107-01151.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2111-13579,
  author       = {Changyao Tian and
                  Wenhai Wang and
                  Xizhou Zhu and
                  Xiaogang Wang and
                  Jifeng Dai and
                  Yu Qiao},
  title        = {{VL-LTR:} Learning Class-wise Visual-Linguistic Representation for
                  Long-Tailed Visual Recognition},
  journal      = {CoRR},
  volume       = {abs/2111.13579},
  year         = {2021},
  url          = {https://arxiv.org/abs/2111.13579},
  eprinttype    = {arXiv},
  eprint       = {2111.13579},
  timestamp    = {Mon, 03 Jun 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2111-13579.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2112-01522,
  author       = {Xizhou Zhu and
                  Jinguo Zhu and
                  Hao Li and
                  Xiaoshi Wu and
                  Xiaogang Wang and
                  Hongsheng Li and
                  Xiaohua Wang and
                  Jifeng Dai},
  title        = {Uni-Perceiver: Pre-training Unified Architecture for Generic Perception
                  for Zero-shot and Few-shot Tasks},
  journal      = {CoRR},
  volume       = {abs/2112.01522},
  year         = {2021},
  url          = {https://arxiv.org/abs/2112.01522},
  eprinttype    = {arXiv},
  eprint       = {2112.01522},
  timestamp    = {Tue, 06 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2112-01522.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2112-05138,
  author       = {Chenxin Tao and
                  Zizhang Li and
                  Xizhou Zhu and
                  Gao Huang and
                  Yong Liu and
                  Jifeng Dai},
  title        = {Searching Parameterized {AP} Loss for Object Detection},
  journal      = {CoRR},
  volume       = {abs/2112.05138},
  year         = {2021},
  url          = {https://arxiv.org/abs/2112.05138},
  eprinttype    = {arXiv},
  eprint       = {2112.05138},
  timestamp    = {Tue, 20 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2112-05138.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2112-05141,
  author       = {Chenxin Tao and
                  Honghui Wang and
                  Xizhou Zhu and
                  Jiahua Dong and
                  Shiji Song and
                  Gao Huang and
                  Jifeng Dai},
  title        = {Exploring the Equivalence of Siamese Self-Supervised Learning via
                  {A} Unified Gradient Framework},
  journal      = {CoRR},
  volume       = {abs/2112.05141},
  year         = {2021},
  url          = {https://arxiv.org/abs/2112.05141},
  eprinttype    = {arXiv},
  eprint       = {2112.05141},
  timestamp    = {Tue, 20 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2112-05141.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/eccv/XieZZHL20,
  author       = {Zhenda Xie and
                  Zheng Zhang and
                  Xizhou Zhu and
                  Gao Huang and
                  Stephen Lin},
  editor       = {Andrea Vedaldi and
                  Horst Bischof and
                  Thomas Brox and
                  Jan{-}Michael Frahm},
  title        = {Spatially Adaptive Inference with Stochastic Feature Sampling and
                  Interpolation},
  booktitle    = {Computer Vision - {ECCV} 2020 - 16th European Conference, Glasgow,
                  UK, August 23-28, 2020, Proceedings, Part {I}},
  series       = {Lecture Notes in Computer Science},
  volume       = {12346},
  pages        = {531--548},
  publisher    = {Springer},
  year         = {2020},
  url          = {https://doi.org/10.1007/978-3-030-58452-8\_31},
  doi          = {10.1007/978-3-030-58452-8\_31},
  timestamp    = {Thu, 22 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/eccv/XieZZHL20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/GaoZLD20,
  author       = {Hang Gao and
                  Xizhou Zhu and
                  Stephen Lin and
                  Jifeng Dai},
  title        = {Deformable Kernels: Adapting Effective Receptive Fields for Object
                  Deformation},
  booktitle    = {8th International Conference on Learning Representations, {ICLR} 2020,
                  Addis Ababa, Ethiopia, April 26-30, 2020},
  publisher    = {OpenReview.net},
  year         = {2020},
  url          = {https://openreview.net/forum?id=SkxSv6VFvS},
  timestamp    = {Thu, 19 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/GaoZLD20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/SuZCLLWD20,
  author       = {Weijie Su and
                  Xizhou Zhu and
                  Yue Cao and
                  Bin Li and
                  Lewei Lu and
                  Furu Wei and
                  Jifeng Dai},
  title        = {{VL-BERT:} Pre-training of Generic Visual-Linguistic Representations},
  booktitle    = {8th International Conference on Learning Representations, {ICLR} 2020,
                  Addis Ababa, Ethiopia, April 26-30, 2020},
  publisher    = {OpenReview.net},
  year         = {2020},
  url          = {https://openreview.net/forum?id=SygXPaEYvH},
  timestamp    = {Tue, 12 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/SuZCLLWD20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2003-08866,
  author       = {Zhenda Xie and
                  Zheng Zhang and
                  Xizhou Zhu and
                  Gao Huang and
                  Stephen Lin},
  title        = {Spatially Adaptive Inference with Stochastic Feature Sampling and
                  Interpolation},
  journal      = {CoRR},
  volume       = {abs/2003.08866},
  year         = {2020},
  url          = {https://arxiv.org/abs/2003.08866},
  eprinttype    = {arXiv},
  eprint       = {2003.08866},
  timestamp    = {Thu, 22 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2003-08866.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2010-04159,
  author       = {Xizhou Zhu and
                  Weijie Su and
                  Lewei Lu and
                  Bin Li and
                  Xiaogang Wang and
                  Jifeng Dai},
  title        = {Deformable {DETR:} Deformable Transformers for End-to-End Object Detection},
  journal      = {CoRR},
  volume       = {abs/2010.04159},
  year         = {2020},
  url          = {https://arxiv.org/abs/2010.04159},
  eprinttype    = {arXiv},
  eprint       = {2010.04159},
  timestamp    = {Tue, 15 Nov 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2010-04159.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2010-07930,
  author       = {Hao Li and
                  Chenxin Tao and
                  Xizhou Zhu and
                  Xiaogang Wang and
                  Gao Huang and
                  Jifeng Dai},
  title        = {Auto Seg-Loss: Searching Metric Surrogates for Semantic Segmentation},
  journal      = {CoRR},
  volume       = {abs/2010.07930},
  year         = {2020},
  url          = {https://arxiv.org/abs/2010.07930},
  eprinttype    = {arXiv},
  eprint       = {2010.07930},
  timestamp    = {Tue, 20 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2010-07930.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2011-12953,
  author       = {Hao Tian and
                  Yuntao Chen and
                  Jifeng Dai and
                  Zhaoxiang Zhang and
                  Xizhou Zhu},
  title        = {Unsupervised Object Detection with LiDAR Clues},
  journal      = {CoRR},
  volume       = {abs/2011.12953},
  year         = {2020},
  url          = {https://arxiv.org/abs/2011.12953},
  eprinttype    = {arXiv},
  eprint       = {2011.12953},
  timestamp    = {Tue, 26 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2011-12953.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cvpr/ZhuHLD19,
  author       = {Xizhou Zhu and
                  Han Hu and
                  Stephen Lin and
                  Jifeng Dai},
  title        = {Deformable ConvNets {V2:} More Deformable, Better Results},
  booktitle    = {{IEEE} Conference on Computer Vision and Pattern Recognition, {CVPR}
                  2019, Long Beach, CA, USA, June 16-20, 2019},
  pages        = {9308--9316},
  publisher    = {Computer Vision Foundation / {IEEE}},
  year         = {2019},
  url          = {http://openaccess.thecvf.com/content\_CVPR\_2019/html/Zhu\_Deformable\_ConvNets\_V2\_More\_Deformable\_Better\_Results\_CVPR\_2019\_paper.html},
  doi          = {10.1109/CVPR.2019.00953},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/cvpr/ZhuHLD19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iccv/ZhuCZLD19,
  author       = {Xizhou Zhu and
                  Dazhi Cheng and
                  Zheng Zhang and
                  Stephen Lin and
                  Jifeng Dai},
  title        = {An Empirical Study of Spatial Attention Mechanisms in Deep Networks},
  booktitle    = {2019 {IEEE/CVF} International Conference on Computer Vision, {ICCV}
                  2019, Seoul, Korea (South), October 27 - November 2, 2019},
  pages        = {6687--6696},
  publisher    = {{IEEE}},
  year         = {2019},
  url          = {https://doi.org/10.1109/ICCV.2019.00679},
  doi          = {10.1109/ICCV.2019.00679},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iccv/ZhuCZLD19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1904-05873,
  author       = {Xizhou Zhu and
                  Dazhi Cheng and
                  Zheng Zhang and
                  Stephen Lin and
                  Jifeng Dai},
  title        = {An Empirical Study of Spatial Attention Mechanisms in Deep Networks},
  journal      = {CoRR},
  volume       = {abs/1904.05873},
  year         = {2019},
  url          = {http://arxiv.org/abs/1904.05873},
  eprinttype    = {arXiv},
  eprint       = {1904.05873},
  timestamp    = {Thu, 19 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1904-05873.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1908-08530,
  author       = {Weijie Su and
                  Xizhou Zhu and
                  Yue Cao and
                  Bin Li and
                  Lewei Lu and
                  Furu Wei and
                  Jifeng Dai},
  title        = {{VL-BERT:} Pre-training of Generic Visual-Linguistic Representations},
  journal      = {CoRR},
  volume       = {abs/1908.08530},
  year         = {2019},
  url          = {http://arxiv.org/abs/1908.08530},
  eprinttype    = {arXiv},
  eprint       = {1908.08530},
  timestamp    = {Tue, 12 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1908-08530.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1910-02940,
  author       = {Hang Gao and
                  Xizhou Zhu and
                  Steve Lin and
                  Jifeng Dai},
  title        = {Deformable Kernels: Adapting Effective Receptive Fields for Object
                  Deformation},
  journal      = {CoRR},
  volume       = {abs/1910.02940},
  year         = {2019},
  url          = {http://arxiv.org/abs/1910.02940},
  eprinttype    = {arXiv},
  eprint       = {1910.02940},
  timestamp    = {Fri, 06 Jan 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1910-02940.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cvpr/ZhuDYW18,
  author       = {Xizhou Zhu and
                  Jifeng Dai and
                  Lu Yuan and
                  Yichen Wei},
  title        = {Towards High Performance Video Object Detection},
  booktitle    = {2018 {IEEE} Conference on Computer Vision and Pattern Recognition,
                  {CVPR} 2018, Salt Lake City, UT, USA, June 18-22, 2018},
  pages        = {7210--7218},
  publisher    = {Computer Vision Foundation / {IEEE} Computer Society},
  year         = {2018},
  url          = {http://openaccess.thecvf.com/content\_cvpr\_2018/html/Zhu\_Towards\_High\_Performance\_CVPR\_2018\_paper.html},
  doi          = {10.1109/CVPR.2018.00753},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/cvpr/ZhuDYW18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1804-05830,
  author       = {Xizhou Zhu and
                  Jifeng Dai and
                  Xingchi Zhu and
                  Yichen Wei and
                  Lu Yuan},
  title        = {Towards High Performance Video Object Detection for Mobiles},
  journal      = {CoRR},
  volume       = {abs/1804.05830},
  year         = {2018},
  url          = {http://arxiv.org/abs/1804.05830},
  eprinttype    = {arXiv},
  eprint       = {1804.05830},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1804-05830.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1811-11167,
  author       = {Zheng Zhang and
                  Dazhi Cheng and
                  Xizhou Zhu and
                  Stephen Lin and
                  Jifeng Dai},
  title        = {Integrated Object Detection and Tracking with Tracklet-Conditioned
                  Detection},
  journal      = {CoRR},
  volume       = {abs/1811.11167},
  year         = {2018},
  url          = {http://arxiv.org/abs/1811.11167},
  eprinttype    = {arXiv},
  eprint       = {1811.11167},
  timestamp    = {Thu, 19 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1811-11167.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1811-11168,
  author       = {Xizhou Zhu and
                  Han Hu and
                  Stephen Lin and
                  Jifeng Dai},
  title        = {Deformable ConvNets v2: More Deformable, Better Results},
  journal      = {CoRR},
  volume       = {abs/1811.11168},
  year         = {2018},
  url          = {http://arxiv.org/abs/1811.11168},
  eprinttype    = {arXiv},
  eprint       = {1811.11168},
  timestamp    = {Mon, 05 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1811-11168.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cvpr/ZhuXDYW17,
  author       = {Xizhou Zhu and
                  Yuwen Xiong and
                  Jifeng Dai and
                  Lu Yuan and
                  Yichen Wei},
  title        = {Deep Feature Flow for Video Recognition},
  booktitle    = {2017 {IEEE} Conference on Computer Vision and Pattern Recognition,
                  {CVPR} 2017, Honolulu, HI, USA, July 21-26, 2017},
  pages        = {4141--4150},
  publisher    = {{IEEE} Computer Society},
  year         = {2017},
  url          = {https://doi.ieeecomputersociety.org/10.1109/CVPR.2017.441},
  doi          = {10.1109/CVPR.2017.441},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/cvpr/ZhuXDYW17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iccv/ZhuWDYW17,
  author       = {Xizhou Zhu and
                  Yujie Wang and
                  Jifeng Dai and
                  Lu Yuan and
                  Yichen Wei},
  title        = {Flow-Guided Feature Aggregation for Video Object Detection},
  booktitle    = {{IEEE} International Conference on Computer Vision, {ICCV} 2017, Venice,
                  Italy, October 22-29, 2017},
  pages        = {408--417},
  publisher    = {{IEEE} Computer Society},
  year         = {2017},
  url          = {https://doi.org/10.1109/ICCV.2017.52},
  doi          = {10.1109/ICCV.2017.52},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iccv/ZhuWDYW17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/ZhuWDYW17,
  author       = {Xizhou Zhu and
                  Yujie Wang and
                  Jifeng Dai and
                  Lu Yuan and
                  Yichen Wei},
  title        = {Flow-Guided Feature Aggregation for Video Object Detection},
  journal      = {CoRR},
  volume       = {abs/1703.10025},
  year         = {2017},
  url          = {http://arxiv.org/abs/1703.10025},
  eprinttype    = {arXiv},
  eprint       = {1703.10025},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/ZhuWDYW17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1711-11577,
  author       = {Xizhou Zhu and
                  Jifeng Dai and
                  Lu Yuan and
                  Yichen Wei},
  title        = {Towards High Performance Video Object Detection},
  journal      = {CoRR},
  volume       = {abs/1711.11577},
  year         = {2017},
  url          = {http://arxiv.org/abs/1711.11577},
  eprinttype    = {arXiv},
  eprint       = {1711.11577},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1711-11577.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tvcg/LiuLZLWP16,
  author       = {Mengchen Liu and
                  Shixia Liu and
                  Xizhou Zhu and
                  Qinying Liao and
                  Furu Wei and
                  Shimei Pan},
  title        = {An Uncertainty-Aware Approach for Exploratory Microblog Retrieval},
  journal      = {{IEEE} Trans. Vis. Comput. Graph.},
  volume       = {22},
  number       = {1},
  pages        = {250--259},
  year         = {2016},
  url          = {https://doi.org/10.1109/TVCG.2015.2467554},
  doi          = {10.1109/TVCG.2015.2467554},
  timestamp    = {Sun, 06 Oct 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tvcg/LiuLZLWP16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/ZhuXDYW16,
  author       = {Xizhou Zhu and
                  Yuwen Xiong and
                  Jifeng Dai and
                  Lu Yuan and
                  Yichen Wei},
  title        = {Deep Feature Flow for Video Recognition},
  journal      = {CoRR},
  volume       = {abs/1611.07715},
  year         = {2016},
  url          = {http://arxiv.org/abs/1611.07715},
  eprinttype    = {arXiv},
  eprint       = {1611.07715},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/ZhuXDYW16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/pcm/ChengZWHI15,
  author       = {Xina Cheng and
                  Xizhou Zhuang and
                  Yuan Wang and
                  Masaaki Honda and
                  Takeshi Ikenaga},
  editor       = {Yo{-}Sung Ho and
                  Jitao Sang and
                  Yong Man Ro and
                  Junmo Kim and
                  Fei Wu},
  title        = {Particle Filter with Ball Size Adaptive Tracking Window and Ball Feature
                  Likelihood Model for Ball's 3D Position Tracking in Volleyball Analysis},
  booktitle    = {Advances in Multimedia Information Processing - {PCM} 2015 - 16th
                  Pacific-Rim Conference on Multimedia, Gwangju, South Korea, September
                  16-18, 2015, Proceedings, Part {I}},
  series       = {Lecture Notes in Computer Science},
  volume       = {9314},
  pages        = {203--211},
  publisher    = {Springer},
  year         = {2015},
  url          = {https://doi.org/10.1007/978-3-319-24075-6\_20},
  doi          = {10.1007/978-3-319-24075-6\_20},
  timestamp    = {Tue, 14 May 2019 10:00:54 +0200},
  biburl       = {https://dblp.org/rec/conf/pcm/ChengZWHI15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/pcm/ZhuangCHHI15,
  author       = {Xizhou Zhuang and
                  Xina Cheng and
                  Shu{-}Yi Huang and
                  Masaaki Honda and
                  Takeshi Ikenaga},
  editor       = {Yo{-}Sung Ho and
                  Jitao Sang and
                  Yong Man Ro and
                  Junmo Kim and
                  Fei Wu},
  title        = {Motion Vector and Players' Features Based Particle Filter for Volleyball
                  Players Tracking in 3D Space},
  booktitle    = {Advances in Multimedia Information Processing - {PCM} 2015 - 16th
                  Pacific-Rim Conference on Multimedia, Gwangju, South Korea, September
                  16-18, 2015, Proceedings, Part {I}},
  series       = {Lecture Notes in Computer Science},
  volume       = {9314},
  pages        = {393--401},
  publisher    = {Springer},
  year         = {2015},
  url          = {https://doi.org/10.1007/978-3-319-24075-6\_38},
  doi          = {10.1007/978-3-319-24075-6\_38},
  timestamp    = {Sun, 21 May 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/pcm/ZhuangCHHI15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/LiuLZLWP15,
  author       = {Mengchen Liu and
                  Shixia Liu and
                  Xizhou Zhu and
                  Qinying Liao and
                  Furu Wei and
                  Shimei Pan},
  title        = {An Uncertainty-Aware Approach for Exploratory Microblog Retrieval},
  journal      = {CoRR},
  volume       = {abs/1512.04038},
  year         = {2015},
  url          = {http://arxiv.org/abs/1512.04038},
  eprinttype    = {arXiv},
  eprint       = {1512.04038},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/LiuLZLWP15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/apsipa/ChengSZI14,
  author       = {Xina Cheng and
                  Yuhi Shiina and
                  Xizhou Zhuang and
                  Takeshi Ikenaga},
  title        = {Player tracking using prediction after intersection based particle
                  filter for volleyball match video},
  booktitle    = {Asia-Pacific Signal and Information Processing Association Annual
                  Summit and Conference, {APSIPA} 2014, Chiang Mai, Thailand, December
                  9-12, 2014},
  pages        = {1--4},
  publisher    = {{IEEE}},
  year         = {2014},
  url          = {https://doi.org/10.1109/APSIPA.2014.7041650},
  doi          = {10.1109/APSIPA.2014.7041650},
  timestamp    = {Wed, 16 Oct 2019 14:14:55 +0200},
  biburl       = {https://dblp.org/rec/conf/apsipa/ChengSZI14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}