Search dblp for Publications

export results for "html document"

 download as .bib file

@article{DBLP:journals/jips/HwangPS23,
  author       = {Hyun Cheon Hwang and
                  Ji Su Park and
                  Jin Gon Shon},
  title        = {Design and Implementation of the Document {HTML} System for Preserving
                  Content Integrity},
  journal      = {J. Inf. Process. Syst.},
  volume       = {19},
  number       = {3},
  pages        = {334--346},
  year         = {2023},
  url          = {https://doi.org/10.3745/JIPS.04.0276},
  doi          = {10.3745/JIPS.04.0276},
  timestamp    = {Tue, 08 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jips/HwangPS23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2201-10608,
  author       = {Xiang Deng and
                  Prashant Shiralkar and
                  Colin Lockard and
                  Binxuan Huang and
                  Huan Sun},
  title        = {{DOM-LM:} Learning Generalizable Representations for {HTML} Documents},
  journal      = {CoRR},
  volume       = {abs/2201.10608},
  year         = {2022},
  url          = {https://arxiv.org/abs/2201.10608},
  eprinttype    = {arXiv},
  eprint       = {2201.10608},
  timestamp    = {Wed, 31 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2201-10608.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sai/BakaoukasB21,
  author       = {Nikolaos G. Bakaoukas and
                  Anastasios G. Bakaoukas},
  editor       = {Kohei Arai},
  title        = {A Novel Three-Way Merge Algorithm for {HTML/XML} Documents Using a
                  Hidden Markov Model},
  booktitle    = {Intelligent Computing - Proceedings of the 2021 Computing Conference,
                  Volume 1, {SAI} 2021, Virtual Event, 15-16 July, 2021},
  series       = {Lecture Notes in Networks and Systems},
  volume       = {283},
  pages        = {75--101},
  publisher    = {Springer},
  year         = {2021},
  url          = {https://doi.org/10.1007/978-3-030-80119-9\_3},
  doi          = {10.1007/978-3-030-80119-9\_3},
  timestamp    = {Tue, 21 Feb 2023 10:40:01 +0100},
  biburl       = {https://dblp.org/rec/conf/sai/BakaoukasB21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2112-00962,
  author       = {Majid Jaberi Douraki and
                  Soudabeh Taghian Dinani and
                  Nuwan Indika Millagaha Gedara and
                  Xuan Xu and
                  Emily Richards and
                  Fiona Maunsell and
                  Nader Zad and
                  Lisa Ann Tell},
  title        = {Large-Scale Data Mining of Rapid Residue Detection Assay Data From
                  {HTML} and {PDF} Documents: Improving Data Access and Visualization
                  for Veterinarians},
  journal      = {CoRR},
  volume       = {abs/2112.00962},
  year         = {2021},
  url          = {https://arxiv.org/abs/2112.00962},
  eprinttype    = {arXiv},
  eprint       = {2112.00962},
  timestamp    = {Tue, 07 Dec 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2112-00962.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nbis/MaruyamaONYK20,
  author       = {Koki Maruyama and
                  Yasuhiro Ohtaki and
                  Michitoshi Niibori and
                  Tatsuhiro Yonekura and
                  Masaru Kamada},
  editor       = {Leonard Barolli and
                  Kin Fun Li and
                  Tomoya Enokido and
                  Makoto Takizawa},
  title        = {Another {HTML5} Implementation of Web-Com for Embedding Chalk Annotations
                  and Talk Voices into {HTML} Documents},
  booktitle    = {Advances in Networked-Based Information Systems - The 23rd International
                  Conference on Network-Based Information Systems, NBiS 2020, Victoria,
                  BC, Canada, 31 August - 2 September 2020},
  series       = {Advances in Intelligent Systems and Computing},
  volume       = {1264},
  pages        = {560--566},
  publisher    = {Springer},
  year         = {2020},
  url          = {https://doi.org/10.1007/978-3-030-57811-4\_56},
  doi          = {10.1007/978-3-030-57811-4\_56},
  timestamp    = {Fri, 21 Aug 2020 15:16:47 +0200},
  biburl       = {https://dblp.org/rec/conf/nbis/MaruyamaONYK20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sai/BakaoukasB20,
  author       = {Anastasios G. Bakaoukas and
                  Nikolaos G. Bakaoukas},
  editor       = {Kohei Arai and
                  Supriya Kapoor and
                  Rahul Bhatia},
  title        = {A Top-Down Three-Way Merge Algorithm for {HTML/XML} Documents},
  booktitle    = {Intelligent Computing - Proceedings of the 2020 Computing Conference,
                  Volume 1, {SAI} 2020, London, UK, 16-17 July 2020},
  series       = {Advances in Intelligent Systems and Computing},
  volume       = {1228},
  pages        = {75--96},
  publisher    = {Springer},
  year         = {2020},
  url          = {https://doi.org/10.1007/978-3-030-52249-0\_6},
  doi          = {10.1007/978-3-030-52249-0\_6},
  timestamp    = {Tue, 07 Jul 2020 14:30:18 +0200},
  biburl       = {https://dblp.org/rec/conf/sai/BakaoukasB20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/vl/SchmollVSSC18,
  author       = {Steven Schmoll and
                  Anith Vishwanath and
                  Mohammad Ammar Siddiqui and
                  Boppaiah Koothanda Subbaiah and
                  Caslon Chua},
  editor       = {J{\'{a}}come Cunha and
                  Jo{\~{a}}o Paulo Fernandes and
                  Caitlin Kelleher and
                  Gregor Engels and
                  Jorge Mendes},
  title        = {{HTML} Document Error Detector and Visualiser for Novice Programmers},
  booktitle    = {2018 {IEEE} Symposium on Visual Languages and Human-Centric Computing,
                  {VL/HCC} 2018, Lisbon, Portugal, October 1-4, 2018},
  pages        = {291--292},
  publisher    = {{IEEE} Computer Society},
  year         = {2018},
  url          = {https://doi.org/10.1109/VLHCC.2018.8506569},
  doi          = {10.1109/VLHCC.2018.8506569},
  timestamp    = {Thu, 23 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/vl/SchmollVSSC18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1801-08928,
  author       = {Jinqiu Yang and
                  Erik Wittern and
                  Annie T. T. Ying and
                  Julian Dolby and
                  Lin Tan},
  title        = {Automatically Extracting Web {API} Specifications from {HTML} Documentation},
  journal      = {CoRR},
  volume       = {abs/1801.08928},
  year         = {2018},
  url          = {http://arxiv.org/abs/1801.08928},
  eprinttype    = {arXiv},
  eprint       = {1801.08928},
  timestamp    = {Sun, 23 Jun 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1801-08928.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icsoc/CaoFB17,
  author       = {Hanyang Cao and
                  Jean{-}R{\'{e}}my Falleri and
                  Xavier Blanc},
  editor       = {E. Michael Maximilien and
                  Antonio Vallecillo and
                  Jianmin Wang and
                  Marc Oriol},
  title        = {Automated Generation of {REST} {API} Specification from Plain {HTML}
                  Documentation},
  booktitle    = {Service-Oriented Computing - 15th International Conference, {ICSOC}
                  2017, Malaga, Spain, November 13-16, 2017, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {10601},
  pages        = {453--461},
  publisher    = {Springer},
  year         = {2017},
  url          = {https://doi.org/10.1007/978-3-319-69035-3\_32},
  doi          = {10.1007/978-3-319-69035-3\_32},
  timestamp    = {Tue, 12 Sep 2023 07:57:22 +0200},
  biburl       = {https://dblp.org/rec/conf/icsoc/CaoFB17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/lht/VallezPCBR15,
  author       = {Mari Vallez and
                  Rafael Pedraza{-}Jimenez and
                  Llu{\'{\i}}s Codina and
                  Sa{\'{u}}l Blanco and
                  Crist{\`{o}}fol Rovira},
  title        = {A semi-automatic indexing system based on embedded information in
                  {HTML} documents},
  journal      = {Libr. Hi Tech},
  volume       = {33},
  number       = {2},
  pages        = {195--210},
  year         = {2015},
  url          = {https://doi.org/10.1108/LHT-12-2014-0114},
  doi          = {10.1108/LHT-12-2014-0114},
  timestamp    = {Sat, 05 Sep 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/lht/VallezPCBR15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/pvldb/ManabeT15,
  author       = {Tomohiro Manabe and
                  Keishi Tajima},
  title        = {Extracting Logical Hierarchical Structure of {HTML} Documents Based
                  on Headings},
  journal      = {Proc. {VLDB} Endow.},
  volume       = {8},
  number       = {12},
  pages        = {1606--1617},
  year         = {2015},
  url          = {http://www.vldb.org/pvldb/vol8/p1606-manabe.pdf},
  doi          = {10.14778/2824032.2824058},
  timestamp    = {Sat, 25 Apr 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/pvldb/ManabeT15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/airs/FaghaniHM15,
  author       = {Shabanali Faghani and
                  Ali Hadian and
                  Behrouz Minaei{-}Bidgoli},
  editor       = {Guido Zuccon and
                  Shlomo Geva and
                  Hideo Joho and
                  Falk Scholer and
                  Aixin Sun and
                  Peng Zhang},
  title        = {Charset Encoding Detection of {HTML} Documents - {A} Practical Experience},
  booktitle    = {Information Retrieval Technology - 11th Asia Information Retrieval
                  Societies Conference, {AIRS} 2015, Brisbane, QLD, Australia, December
                  2-4, 2015. Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {9460},
  pages        = {215--226},
  publisher    = {Springer},
  year         = {2015},
  url          = {https://doi.org/10.1007/978-3-319-28940-3\_17},
  doi          = {10.1007/978-3-319-28940-3\_17},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/airs/FaghaniHM15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icitcs/PrompilaL15,
  author       = {Ekarat Prompila and
                  Yachai Limpiyakorn},
  title        = {Automating Documentation of {HTML} Prototype},
  booktitle    = {5th International Conference on {IT} Convergence and Security, {ICITCS}
                  2015, Kuala Lumpur, Malaysia, August 24-27, 2015},
  pages        = {1--5},
  publisher    = {{IEEE} Computer Society},
  year         = {2015},
  url          = {https://doi.org/10.1109/ICITCS.2015.7292955},
  doi          = {10.1109/ICITCS.2015.7292955},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icitcs/PrompilaL15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/mkm/NakashoS15,
  author       = {Kazuhisa Nakasho and
                  Yasunari Shidama},
  editor       = {Manfred Kerber and
                  Jacques Carette and
                  Cezary Kaliszyk and
                  Florian Rabe and
                  Volker Sorge},
  title        = {Documentation Generator Focusing on Symbols for the HTML-ized Mizar
                  Library},
  booktitle    = {Intelligent Computer Mathematics - International Conference, {CICM}
                  2015, Washington, DC, USA, July 13-17, 2015, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {9150},
  pages        = {343--347},
  publisher    = {Springer},
  year         = {2015},
  url          = {https://doi.org/10.1007/978-3-319-20615-8\_25},
  doi          = {10.1007/978-3-319-20615-8\_25},
  timestamp    = {Fri, 20 Nov 2020 16:08:54 +0100},
  biburl       = {https://dblp.org/rec/conf/mkm/NakashoS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/NakashoS15,
  author       = {Kazuhisa Nakasho and
                  Yasunari Shidama},
  title        = {Documentation Generator Focusing on Symbols for the HTML-ized Mizar
                  Library},
  journal      = {CoRR},
  volume       = {abs/1505.01577},
  year         = {2015},
  url          = {http://arxiv.org/abs/1505.01577},
  eprinttype    = {arXiv},
  eprint       = {1505.01577},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/NakashoS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ijcistudies/ShanmugavadivuS14,
  author       = {P. Shanmugavadivu and
                  P. Sumathy and
                  A. Vadivel},
  title        = {Ranking images in web documents based on {HTML} TAGs for image retrieval
                  from {WWW}},
  journal      = {Int. J. Comput. Intell. Stud.},
  volume       = {3},
  number       = {2/3},
  pages        = {176--195},
  year         = {2014},
  url          = {https://doi.org/10.1504/IJCISTUDIES.2014.062730},
  doi          = {10.1504/IJCISTUDIES.2014.062730},
  timestamp    = {Wed, 01 Apr 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ijcistudies/ShanmugavadivuS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/das/NagySE14,
  author       = {George Nagy and
                  Sharad C. Seth and
                  David W. Embley},
  editor       = {Jean{-}Yves Ramel and
                  Marcus Liwicki and
                  Jean{-}Marc Ogier and
                  Koichi Kise and
                  Ray Smith},
  title        = {End-to-End Conversion of {HTML} Tables for Populating a Relational
                  Database},
  booktitle    = {11th {IAPR} International Workshop on Document Analysis Systems, {DAS}
                  2014, Tours, France, April 7-10, 2014},
  pages        = {222--226},
  publisher    = {{IEEE} Computer Society},
  year         = {2014},
  url          = {https://doi.org/10.1109/DAS.2014.9},
  doi          = {10.1109/DAS.2014.9},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/das/NagySE14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@phdthesis{DBLP:phd/basesearch/Mohammadzadeh13,
  author       = {Hadi Mohammadzadeh},
  title        = {Improving Retrieval Accuracy in Main Content Extraction from {HTML}
                  Web Documents},
  school       = {Leipzig University, Germany},
  year         = {2013},
  url          = {https://nbn-resolving.org/urn:nbn:de:bsz:15-qucosa-130500},
  urn          = {urn:nbn:de:bsz:15-qucosa-130500},
  timestamp    = {Wed, 13 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/phd/basesearch/Mohammadzadeh13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/doceng/FerilliER13,
  author       = {Stefano Ferilli and
                  Floriana Esposito and
                  Domenico Redavid},
  editor       = {Simone Marinai and
                  Kim Marriott},
  title        = {Hi-Fi {HTML} rendering of multi-format documents in DoMinUS},
  booktitle    = {{ACM} Symposium on Document Engineering 2013, DocEng '13, Florence,
                  Italy, September 10-13, 2013},
  pages        = {173--176},
  publisher    = {{ACM}},
  year         = {2013},
  url          = {https://doi.org/10.1145/2494266.2494272},
  doi          = {10.1145/2494266.2494272},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/doceng/FerilliER13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/doceng/SilvaSM13,
  author       = {Esdras Caleb Oliveira Silva and
                  Joel Andr{\'{e}} Ferreira dos Santos and
                  D{\'{e}}bora C. Muchaluat{-}Saade},
  editor       = {Simone Marinai and
                  Kim Marriott},
  title        = {{NCL4WEB:} translating {NCL} applications to {HTML5} web pages},
  booktitle    = {{ACM} Symposium on Document Engineering 2013, DocEng '13, Florence,
                  Italy, September 10-13, 2013},
  pages        = {253--262},
  publisher    = {{ACM}},
  year         = {2013},
  url          = {https://doi.org/10.1145/2494266.2494273},
  doi          = {10.1145/2494266.2494273},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/doceng/SilvaSM13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/eisic/GoslinH13,
  author       = {Kyle Goslin and
                  Markus Hofmann},
  title        = {Cross Domain Assessment of Document to {HTML} Conversion Tools to
                  Quantify Text and Structural Loss during Document Analysis},
  booktitle    = {2013 European Intelligence and Security Informatics Conference, Uppsala,
                  Sweden, August 12-14, 2013},
  pages        = {100--105},
  publisher    = {{IEEE}},
  year         = {2013},
  url          = {https://doi.org/10.1109/EISIC.2013.22},
  doi          = {10.1109/EISIC.2013.22},
  timestamp    = {Thu, 08 Dec 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/eisic/GoslinH13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sas/KimDS13,
  author       = {Hyunha Kim and
                  Kyung{-}Goo Doh and
                  David A. Schmidt},
  editor       = {Francesco Logozzo and
                  Manuel F{\"{a}}hndrich},
  title        = {Static Validation of Dynamically Generated {HTML} Documents Based
                  on Abstract Parsing and Semantic Processing},
  booktitle    = {Static Analysis - 20th International Symposium, {SAS} 2013, Seattle,
                  WA, USA, June 20-22, 2013. Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {7935},
  pages        = {194--214},
  publisher    = {Springer},
  year         = {2013},
  url          = {https://doi.org/10.1007/978-3-642-38856-9\_12},
  doi          = {10.1007/978-3-642-38856-9\_12},
  timestamp    = {Tue, 14 May 2019 10:00:52 +0200},
  biburl       = {https://dblp.org/rec/conf/sas/KimDS13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/fskd/ZengFSH12,
  author       = {Jun Zeng and
                  Brendan Flanagan and
                  Toshihiko Sakai and
                  Sachio Hirokawa},
  title        = {Extraction of relevant components using shallow structure of {HTML}
                  documents},
  booktitle    = {9th International Conference on Fuzzy Systems and Knowledge Discovery,
                  {FSKD} 2012, 29-31 May 2012, Chongqing, China},
  pages        = {1186--1190},
  publisher    = {{IEEE}},
  year         = {2012},
  url          = {https://doi.org/10.1109/FSKD.2012.6234295},
  doi          = {10.1109/FSKD.2012.6234295},
  timestamp    = {Wed, 16 Oct 2019 14:14:57 +0200},
  biburl       = {https://dblp.org/rec/conf/fskd/ZengFSH12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icaart/ZarradDZ12,
  author       = {Rim Zarrad and
                  Narjes Doggaz and
                  Ezzeddine Zagrouba},
  editor       = {Joaquim Filipe and
                  Ana L. N. Fred},
  title        = {Concepts Extraction based on {HTML} Documents Structure},
  booktitle    = {{ICAART} 2012 - Proceedings of the 4th International Conference on
                  Agents and Artificial Intelligence, Volume 1 - Artificial Intelligence,
                  Vilamoura, Algarve, Portugal, 6-8 February, 2012},
  pages        = {503--506},
  publisher    = {SciTePress},
  year         = {2012},
  timestamp    = {Thu, 03 May 2012 17:45:15 +0200},
  biburl       = {https://dblp.org/rec/conf/icaart/ZarradDZ12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iticse/Kurmas12b,
  author       = {Zachary Kurmas},
  editor       = {Tami Lapidot and
                  Judith Gal{-}Ezer and
                  Michael E. Caspersen and
                  Orit Hazzan},
  title        = {Kielce: configurable {HTML} course documents},
  booktitle    = {Annual Conference on Innovation and Technology in Computer Science
                  Education, ITiCSE '12, Haifa, Israel, July 3-5, 2012},
  pages        = {403},
  publisher    = {{ACM}},
  year         = {2012},
  url          = {https://doi.org/10.1145/2325296.2325419},
  doi          = {10.1145/2325296.2325419},
  timestamp    = {Wed, 10 Mar 2021 13:17:16 +0100},
  biburl       = {https://dblp.org/rec/conf/iticse/Kurmas12b.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1204-0186,
  author       = {Youssef Bassil and
                  Paul Semaan},
  title        = {Semantic-Sensitive Web Information Retrieval Model for {HTML} Documents},
  journal      = {CoRR},
  volume       = {abs/1204.0186},
  year         = {2012},
  url          = {http://arxiv.org/abs/1204.0186},
  eprinttype    = {arXiv},
  eprint       = {1204.0186},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1204-0186.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cyberc/ZhuYWSRY11,
  author       = {Yanxu Zhu and
                  Gang Yin and
                  Huaimin Wang and
                  Dianxi Shi and
                  Xiang Rao and
                  Lin Yuan},
  title        = {Efficient Approach for Repeated Patterns Mining Based on Indent Shape
                  of {HTML} Documents},
  booktitle    = {2011 International Conference on Cyber-Enabled Distributed Computing
                  and Knowledge Discovery, CyberC 2011, Beijing, China, October 10-12,
                  2011},
  pages        = {32--39},
  publisher    = {{IEEE} Computer Society},
  year         = {2011},
  url          = {https://doi.org/10.1109/CyberC.2011.15},
  doi          = {10.1109/CYBERC.2011.15},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/cyberc/ZhuYWSRY11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/doceng/BultermanGCMP11,
  author       = {Dick C. A. Bulterman and
                  Rodrigo Laiola Guimar{\~{a}}es and
                  Pablo C{\'{e}}sar and
                  Ethan V. Munson and
                  Maria da Gra{\c{c}}a Campos Pimentel},
  editor       = {Matthew R. B. Hardy and
                  Frank Wm. Tompa},
  title        = {Multimedia document processing in an {HTML5} world},
  booktitle    = {Proceedings of the 2011 {ACM} Symposium on Document Engineering, Mountain
                  View, CA, USA, September 19-22, 2011},
  pages        = {273--274},
  publisher    = {{ACM}},
  year         = {2011},
  url          = {https://doi.org/10.1145/2034691.2034747},
  doi          = {10.1145/2034691.2034747},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/doceng/BultermanGCMP11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/doceng/CazenaveQR11,
  author       = {Fabien Cazenave and
                  Vincent Quint and
                  C{\'{e}}cile Roisin},
  editor       = {Matthew R. B. Hardy and
                  Frank Wm. Tompa},
  title        = {Timesheets.js: when {SMIL} meets {HTML5} and {CSS3}},
  booktitle    = {Proceedings of the 2011 {ACM} Symposium on Document Engineering, Mountain
                  View, CA, USA, September 19-22, 2011},
  pages        = {43--52},
  publisher    = {{ACM}},
  year         = {2011},
  url          = {https://doi.org/10.1145/2034691.2034700},
  doi          = {10.1145/2034691.2034700},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/doceng/CazenaveQR11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/doceng/Tseng11,
  author       = {Lendle Chun{-}Hsiung Tseng},
  editor       = {Matthew R. B. Hardy and
                  Frank Wm. Tompa},
  title        = {Developer-friendly annotation-based HTML-to-XML transformation technology},
  booktitle    = {Proceedings of the 2011 {ACM} Symposium on Document Engineering, Mountain
                  View, CA, USA, September 19-22, 2011},
  pages        = {73--76},
  publisher    = {{ACM}},
  year         = {2011},
  url          = {https://doi.org/10.1145/2034691.2034707},
  doi          = {10.1145/2034691.2034707},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/doceng/Tseng11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ijdar/ZouLT10,
  author       = {Jie Zou and
                  Daniel X. Le and
                  George R. Thoma},
  title        = {Locating and parsing bibliographic references in {HTML} medical articles},
  journal      = {Int. J. Document Anal. Recognit.},
  volume       = {13},
  number       = {2},
  pages        = {107--119},
  year         = {2010},
  url          = {https://doi.org/10.1007/s10032-009-0105-9},
  doi          = {10.1007/S10032-009-0105-9},
  timestamp    = {Thu, 13 Aug 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ijdar/ZouLT10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/doceng/Piotrowski10,
  author       = {Michael Piotrowski},
  editor       = {Apostolos Antonacopoulos and
                  Michael J. Gormish and
                  Rolf Ingold},
  title        = {Document conversion for cultural heritage texts: FrameMaker to {HTML}
                  revisited},
  booktitle    = {Proceedings of the 2010 {ACM} Symposium on Document Engineering, Manchester,
                  United Kingdom, September 21-24, 2010},
  pages        = {223--226},
  publisher    = {{ACM}},
  year         = {2010},
  url          = {https://doi.org/10.1145/1860559.1860608},
  doi          = {10.1145/1860559.1860608},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/doceng/Piotrowski10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/rjour/PauH09,
  author       = {Gr{\'{e}}goire Pau and
                  Wolfgang Huber},
  title        = {The hwriter package: Composing {HTML} documents with {R} objects},
  journal      = {R J.},
  volume       = {1},
  number       = {1},
  pages        = {22},
  year         = {2009},
  url          = {https://doi.org/10.32614/rj-2009-009},
  doi          = {10.32614/RJ-2009-009},
  timestamp    = {Tue, 27 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/rjour/PauH09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/compsystech/RozinajovaH09,
  author       = {Viera Rozinajov{\'{a}} and
                  Ondrej Hluch{\'{y}}},
  editor       = {Boris Rachev and
                  Angel Smrikarov},
  title        = {One approach to {HTML} wrappers creation: using Document Object Model
                  tree},
  booktitle    = {Proceedings of the 2009 International Conference on Computer Systems
                  and Technologies and Workshop for PhD Students in Computing, CompSysTech
                  2009, Rousse, Bulgaria, June 18-19, 2009},
  pages        = {41},
  publisher    = {{ACM}},
  year         = {2009},
  url          = {https://doi.org/10.1145/1731740.1731785},
  doi          = {10.1145/1731740.1731785},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/compsystech/RozinajovaH09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/coria/WaszakLB09,
  author       = {Thierry Waszak and
                  Claude de Loupy and
                  Patrice Bellot},
  title        = {Identification et structuration hi{\'{e}}rarchique des titres
                  dans les documents {HTML}},
  booktitle    = {COnf{\'{e}}rence en Recherche d'Infomations et Applications -
                  {CORIA} 2009, 6th French Information Retrieval Conference, Presqu'{\^{\i}}le
                  de Giens, France, May 5-7, 2009. Proceedings},
  pages        = {285--299},
  publisher    = {{LSIS-USTV}},
  year         = {2009},
  url          = {https://doi.org/10.24348/coria.2009.285},
  doi          = {10.24348/CORIA.2009.285},
  timestamp    = {Wed, 25 Sep 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/coria/WaszakLB09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/hci/HenschenL09,
  author       = {Lawrence J. Henschen and
                  Julia C. Lee},
  editor       = {Constantine Stephanidis},
  title        = {Using Semantic-Level Tags in {HTML/XML} Documents},
  booktitle    = {Universal Access in Human-Computer Interaction. Applications and Services,
                  5th International Conference, {UAHCI} 2009, Held as Part of {HCI}
                  International 2009, San Diego, CA, USA, July 19-24, 2009. Proceedings,
                  Part {III}},
  series       = {Lecture Notes in Computer Science},
  volume       = {5616},
  pages        = {683--692},
  publisher    = {Springer},
  year         = {2009},
  url          = {https://doi.org/10.1007/978-3-642-02713-0\_72},
  doi          = {10.1007/978-3-642-02713-0\_72},
  timestamp    = {Tue, 14 May 2019 10:00:43 +0200},
  biburl       = {https://dblp.org/rec/conf/hci/HenschenL09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iih-msp/LeeC09b,
  author       = {Chin{-}Feng Lee and
                  Hsing{-}Ling Chen},
  editor       = {Jeng{-}Shyang Pan and
                  Yen{-}Wei Chen and
                  Lakhmi C. Jain},
  title        = {Data Concealment Scheme for {HTML} Documents Based on Color Code},
  booktitle    = {Fifth International Conference on Intelligent Information Hiding and
                  Multimedia Signal Processing {(IIH-MSP} 2009), Kyoto, Japan, 12-14
                  September, 2009, Proceedings},
  pages        = {632--635},
  publisher    = {{IEEE} Computer Society},
  year         = {2009},
  url          = {https://doi.org/10.1109/IIH-MSP.2009.19},
  doi          = {10.1109/IIH-MSP.2009.19},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/iih-msp/LeeC09b.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@phdthesis{DBLP:books/daglib/0022691,
  author       = {Thomas Gottron},
  title        = {Content extraction - identifying the main content in {HTML} documents},
  school       = {University of Mainz, Germany},
  year         = {2008},
  url          = {http://ubm.opus.hbz-nrw.de/volltexte/2009/1859/index.html},
  urn          = {urn:nbn:de:hebis:77-18591},
  timestamp    = {Sat, 17 Jul 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/books/daglib/0022691.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/cleiej/MenesesR08,
  author       = {Esteban Meneses and
                  Oldemar Rodr{\'{\i}}guez{-}Rojas},
  title        = {Measuring Contribution of {HTML} Features in Web Document Clustering},
  journal      = {{CLEI} Electron. J.},
  volume       = {11},
  number       = {2},
  year         = {2008},
  url          = {https://doi.org/10.19153/cleiej.11.2.7},
  doi          = {10.19153/CLEIEJ.11.2.7},
  timestamp    = {Tue, 21 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/cleiej/MenesesR08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tsmc/AshrafOA08,
  author       = {Fatima Ashraf and
                  Tansel {\"{O}}zyer and
                  Reda Alhajj},
  title        = {Employing Clustering Techniques for Automatic Information Extraction
                  From {HTML} Documents},
  journal      = {{IEEE} Trans. Syst. Man Cybern. Part {C}},
  volume       = {38},
  number       = {5},
  pages        = {660--673},
  year         = {2008},
  url          = {https://doi.org/10.1109/TSMCC.2008.923882},
  doi          = {10.1109/TSMCC.2008.923882},
  timestamp    = {Thu, 21 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tsmc/AshrafOA08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/IEEEscc/YangZT08,
  author       = {Stephen J. H. Yang and
                  Jia Zhang and
                  Stella T. C. Tsai},
  title        = {An Automatic Semantic Segment Detection Service for {HTML} Documents},
  booktitle    = {2008 {IEEE} International Conference on Services Computing {(SCC}
                  2008), 8-11 July 2008, Honolulu, Hawaii, {USA}},
  pages        = {210--217},
  publisher    = {{IEEE} Computer Society},
  year         = {2008},
  url          = {https://doi.org/10.1109/SCC.2008.155},
  doi          = {10.1109/SCC.2008.155},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/IEEEscc/YangZT08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iv/JernRAY08,
  author       = {Mikael Jern and
                  Jakob Rogstadius and
                  Tobias {\AA}str{\"{o}}m and
                  Anders Ynnerman},
  title        = {Visual Analytics Presentation Tools Applied in {HTML} Documents},
  booktitle    = {12th International Conference on Information Visualisation, {IV} 2008,
                  8-11 July 2008, London, {UK}},
  pages        = {200--207},
  publisher    = {{IEEE} Computer Society},
  year         = {2008},
  url          = {https://doi.org/10.1109/IV.2008.22},
  doi          = {10.1109/IV.2008.22},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/iv/JernRAY08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/cas/HaCohen-KernerSKF07,
  author       = {Yaakov HaCohen{-}Kerner and
                  Ittay Stern and
                  David Korkus and
                  Erick Fredj},
  title        = {Automatic Machine Learning of Keyphrase Extraction from Short Html
                  Documents Written in Hebrew},
  journal      = {Cybern. Syst.},
  volume       = {38},
  number       = {1},
  pages        = {1--21},
  year         = {2007},
  url          = {https://doi.org/10.1080/01969720600998546},
  doi          = {10.1080/01969720600998546},
  timestamp    = {Thu, 13 Aug 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/cas/HaCohen-KernerSKF07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/assets/KeysersRB07,
  author       = {Daniel Keysers and
                  Marius Renn and
                  Thomas M. Breuel},
  editor       = {Enrico Pontelli and
                  Shari Trewin},
  title        = {Improving accessibility of html documents bygenerating image-tags
                  in a proxy},
  booktitle    = {Proceedings of the 9th International {ACM} {SIGACCESS} Conference
                  on Computers and Accessibility, {ASSETS} 2007, Tempe, Arizona, USA,
                  October 15-17, 2007},
  pages        = {249--250},
  publisher    = {{ACM}},
  year         = {2007},
  url          = {https://doi.org/10.1145/1296843.1296896},
  doi          = {10.1145/1296843.1296896},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/assets/KeysersRB07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/doceng/WildeC07,
  author       = {Erik Wilde and
                  Philippe C. Cattin},
  editor       = {Peter R. King and
                  Steven J. Simske},
  title        = {Presenting in html},
  booktitle    = {Proceedings of the 2007 {ACM} Symposium on Document Engineering, Winnipeg,
                  Manitoba, Canada, August 28-31, 2007},
  pages        = {35--36},
  publisher    = {{ACM}},
  year         = {2007},
  url          = {https://doi.org/10.1145/1284420.1284432},
  doi          = {10.1145/1284420.1284432},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/doceng/WildeC07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/doceng/ZouLT07,
  author       = {Jie Zou and
                  Daniel X. Le and
                  George R. Thoma},
  editor       = {Peter R. King and
                  Steven J. Simske},
  title        = {Structure and content analysis for html medical articles: a hidden
                  markov model approach},
  booktitle    = {Proceedings of the 2007 {ACM} Symposium on Document Engineering, Winnipeg,
                  Manitoba, Canada, August 28-31, 2007},
  pages        = {199--201},
  publisher    = {{ACM}},
  year         = {2007},
  url          = {https://doi.org/10.1145/1284420.1284468},
  doi          = {10.1145/1284420.1284468},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/doceng/ZouLT07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/emnlp/KajiK07,
  author       = {Nobuhiro Kaji and
                  Masaru Kitsuregawa},
  editor       = {Jason Eisner},
  title        = {Building Lexicon for Sentiment Analysis from Massive Collection of
                  {HTML} Documents},
  booktitle    = {EMNLP-CoNLL 2007, Proceedings of the 2007 Joint Conference on Empirical
                  Methods in Natural Language Processing and Computational Natural Language
                  Learning, June 28-30, 2007, Prague, Czech Republic},
  pages        = {1075--1083},
  publisher    = {{ACL}},
  year         = {2007},
  url          = {https://aclanthology.org/D07-1115/},
  timestamp    = {Fri, 06 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/emnlp/KajiK07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/f-egc/WisniewskiG07,
  author       = {Guillaume Wisniewski and
                  Patrick Gallinari},
  editor       = {Monique Noirhomme{-}Fraiture and
                  Gilles Venturini},
  title        = {R{\'{e}}-ordonnancement pour l'apprentissage de transformations
                  de documents {HTML}},
  booktitle    = {Extraction et gestion des connaissances (EGC'2007), Actes des cinqui{\`{e}}mes
                  journ{\'{e}}es Extraction et Gestion des Connaissances, Namur,
                  Belgique, 23-26 janvier 2007, 2 Volumes},
  series       = {Revue des Nouvelles Technologies de l'Information},
  volume       = {{RNTI-E-9}},
  pages        = {727--738},
  publisher    = {C{\'{e}}padu{\`{e}}s-{\'{E}}ditions},
  year         = {2007},
  url          = {http://editions-rnti.fr/?inprocid=1001461},
  timestamp    = {Thu, 24 Apr 2014 17:37:27 +0200},
  biburl       = {https://dblp.org/rec/conf/f-egc/WisniewskiG07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icdar/Burget07,
  author       = {Radek Burget},
  title        = {Layout Based Information Extraction from {HTML} Documents},
  booktitle    = {9th International Conference on Document Analysis and Recognition
                  {(ICDAR} 2007), 23-26 September, Curitiba, Paran{\'{a}}, Brazil},
  pages        = {624--628},
  publisher    = {{IEEE} Computer Society},
  year         = {2007},
  url          = {https://doi.org/10.1109/ICDAR.2007.4376990},
  doi          = {10.1109/ICDAR.2007.4376990},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icdar/Burget07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/www/Alabi07,
  author       = {Kehinde Alabi},
  editor       = {Carey L. Williamson and
                  Mary Ellen Zurko and
                  Peter F. Patel{-}Schneider and
                  Prashant J. Shenoy},
  title        = {Generation, documentation and presentation of mathematical equations
                  and symbolic scientific expressions using pure {HTML} and {CSS}},
  booktitle    = {Proceedings of the 16th International Conference on World Wide Web,
                  {WWW} 2007, Banff, Alberta, Canada, May 8-12, 2007},
  pages        = {1321--1322},
  publisher    = {{ACM}},
  year         = {2007},
  url          = {https://doi.org/10.1145/1242572.1242830},
  doi          = {10.1145/1242572.1242830},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/www/Alabi07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/cn/SankaCC06,
  author       = {Anoop Sanka and
                  Shravan Chamakura and
                  Sharma Chakravarthy},
  title        = {A dataflow approach to efficient change detection of {HTML/XML} documents
                  in WebVigiL},
  journal      = {Comput. Networks},
  volume       = {50},
  number       = {10},
  pages        = {1547--1563},
  year         = {2006},
  url          = {https://doi.org/10.1016/j.comnet.2005.10.016},
  doi          = {10.1016/J.COMNET.2005.10.016},
  timestamp    = {Wed, 19 Feb 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/cn/SankaCC06.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ACMicec/SimonLB06,
  author       = {Kai Simon and
                  Georg Lausen and
                  Harold Boley},
  editor       = {Mark S. Fox and
                  Bruce Spencer},
  title        = {From {HTML} documents to web tables and rules},
  booktitle    = {Proceedings of the 8th International Conference on Electronic Commerce:
                  The new e-commerce - Innovations for Conquering Current Barriers,
                  Obstacles and Limitations to Conducting Successful Business on the
                  Internet, 2006, Fredericton, New Brunswick, Canada, August 13-16,
                  2006},
  series       = {{ACM} International Conference Proceeding Series},
  volume       = {156},
  pages        = {125--131},
  publisher    = {{ACM}},
  year         = {2006},
  url          = {https://doi.org/10.1145/1151454.1151484},
  doi          = {10.1145/1151454.1151484},
  timestamp    = {Tue, 06 Apr 2021 12:12:56 +0200},
  biburl       = {https://dblp.org/rec/conf/ACMicec/SimonLB06.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/acl/KajiK06,
  author       = {Nobuhiro Kaji and
                  Masaru Kitsuregawa},
  editor       = {Nicoletta Calzolari and
                  Claire Cardie and
                  Pierre Isabelle},
  title        = {Automatic Construction of Polarity-Tagged Corpus from {HTML} Documents},
  booktitle    = {{ACL} 2006, 21st International Conference on Computational Linguistics
                  and 44th Annual Meeting of the Association for Computational Linguistics,
                  Proceedings of the Conference, Sydney, Australia, 17-21 July 2006},
  publisher    = {The Association for Computer Linguistics},
  year         = {2006},
  url          = {https://aclanthology.org/P06-2059/},
  timestamp    = {Fri, 06 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/acl/KajiK06.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/doceng/LeveringC06,
  author       = {Ryan Levering and
                  Michal Cutler},
  editor       = {Dick C. A. Bulterman and
                  David F. Brailsford},
  title        = {The portrait of a common {HTML} web page},
  booktitle    = {Proceedings of the 2006 {ACM} Symposium on Document Engineering, Amsterdam,
                  The Netherlands, October 10-13, 2006},
  pages        = {198--204},
  publisher    = {{ACM}},
  year         = {2006},
  url          = {https://doi.org/10.1145/1166160.1166213},
  doi          = {10.1145/1166160.1166213},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/doceng/LeveringC06.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icdim/BhattiA06,
  author       = {Muhammad Afzal Bhatti and
                  Adeel Ahmad},
  title        = {{PDF} to {HTML} Conversion: Having a Usable Web Document},
  booktitle    = {First {IEEE} International Conference on Digital Information Management
                  (ICDIM), December 6-8, 2006, Christ College, Bangalore, India, Proceedings},
  pages        = {289--293},
  publisher    = {{IEEE}},
  year         = {2006},
  url          = {https://doi.org/10.1109/ICDIM.2007.369212},
  doi          = {10.1109/ICDIM.2007.369212},
  timestamp    = {Tue, 04 Jul 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icdim/BhattiA06.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/www/NannoO06,
  author       = {Tomoyuki Nanno and
                  Manabu Okumura},
  editor       = {Les Carr and
                  David De Roure and
                  Arun Iyengar and
                  Carole A. Goble and
                  Michael Dahlin},
  title        = {{HTML2RSS:} automatic generation of {RSS} feed based on structure
                  analysis of {HTML} document},
  booktitle    = {Proceedings of the 15th international conference on World Wide Web,
                  {WWW} 2006, Edinburgh, Scotland, UK, May 23-26, 2006},
  pages        = {1061--1062},
  publisher    = {{ACM}},
  year         = {2006},
  url          = {https://doi.org/10.1145/1135777.1136015},
  doi          = {10.1145/1135777.1136015},
  timestamp    = {Wed, 14 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/www/NannoO06.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/www/NannoO06a,
  author       = {Tomoyuki Nanno and
                  Manabu Okumura},
  editor       = {Les Carr and
                  David De Roure and
                  Arun Iyengar and
                  Carole A. Goble and
                  Michael Dahlin},
  title        = {{HTML2RSS:} automatic generation of {RSS} feed based on structure
                  analysis of {HTML} document},
  booktitle    = {Proceedings of the 15th international conference on World Wide Web,
                  {WWW} 2006, Edinburgh, Scotland, UK, May 23-26, 2006},
  pages        = {1075--1076},
  publisher    = {{ACM}},
  year         = {2006},
  url          = {https://doi.org/10.1145/1135777.1136022},
  doi          = {10.1145/1135777.1136022},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/www/NannoO06a.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ieicet/NakanoTTIY05,
  author       = {Yuusuke Nakano and
                  Koji Tsukada and
                  Saeko Takagi and
                  Kei Iwasaki and
                  Fujiichi Yoshimoto},
  title        = {InCom: Support System for Informal Communication in 3D Virtual Worlds
                  Generated from {HTML} Documents},
  journal      = {{IEICE} Trans. Inf. Syst.},
  volume       = {88-D},
  number       = {5},
  pages        = {872--879},
  year         = {2005},
  url          = {https://doi.org/10.1093/ietisy/e88-d.5.872},
  doi          = {10.1093/IETISY/E88-D.5.872},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ieicet/NakanoTTIY05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jucs/Kolbitsch05,
  author       = {Josef Kolbitsch},
  title        = {Fine-Grained Transclusions of Multimedia Documents in {HTML}},
  journal      = {J. Univers. Comput. Sci.},
  volume       = {11},
  number       = {6},
  pages        = {926--943},
  year         = {2005},
  url          = {https://doi.org/10.3217/jucs-011-06-0926},
  doi          = {10.3217/JUCS-011-06-0926},
  timestamp    = {Thu, 07 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jucs/Kolbitsch05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ldvf/Rehm05,
  author       = {Georg Rehm},
  title        = {Language-Independent Text Parsing of Arbitrary HTML-Documents. Towards
                  {A} Foundation For Web Genre Identification},
  journal      = {{LDV} Forum},
  volume       = {20},
  number       = {2},
  pages        = {53--74},
  year         = {2005},
  url          = {http://www.jlcl.org/2005\_Heft2/Georg\_Rehm.pdf},
  timestamp    = {Mon, 19 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ldvf/Rehm05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/soco/PereiraMP05,
  author       = {Ricardo A. Marques Pereira and
                  Andrea Molinari and
                  Gabriella Pasi},
  title        = {Contextual weighted representations and indexing models for the retrieval
                  of {HTML} documents},
  journal      = {Soft Comput.},
  volume       = {9},
  number       = {7},
  pages        = {481--492},
  year         = {2005},
  url          = {https://doi.org/10.1007/s00500-004-0361-z},
  doi          = {10.1007/S00500-004-0361-Z},
  timestamp    = {Sat, 19 Oct 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/soco/PereiraMP05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/www/ChenM05,
  author       = {Huamin Chen and
                  Prasant Mohapatra},
  title        = {A Context-Aware {HTML/XML} Document Transmission Process for Mobile
                  Wireless Clients},
  journal      = {World Wide Web},
  volume       = {8},
  number       = {4},
  pages        = {439--461},
  year         = {2005},
  url          = {https://doi.org/10.1007/s11280-005-1314-x},
  doi          = {10.1007/S11280-005-1314-X},
  timestamp    = {Sat, 20 May 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/www/ChenM05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/www/GuptaKGCS05,
  author       = {Suhit Gupta and
                  Gail E. Kaiser and
                  Peter Grimm and
                  Michael F. Chiang and
                  Justin Starren},
  title        = {Automating Content Extraction of {HTML} Documents},
  journal      = {World Wide Web},
  volume       = {8},
  number       = {2},
  pages        = {179--224},
  year         = {2005},
  url          = {https://doi.org/10.1007/s11280-004-4873-3},
  doi          = {10.1007/S11280-004-4873-3},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/www/GuptaKGCS05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/amia/RoemerRF05,
  author       = {Lorrie K. Roemer and
                  Roberto A. Rocha and
                  Guilherme Del Fiol},
  title        = {Integration of {HTML} Documents into an XML-Based Knowledge Repository},
  booktitle    = {{AMIA} 2005, American Medical Informatics Association Annual Symposium,
                  Washington, DC, USA, October 22-26, 2005},
  publisher    = {{AMIA}},
  year         = {2005},
  url          = {https://knowledge.amia.org/amia-55142-a2005a-1.613296/t-003-1.615259/f-001-1.615260/a-394-1.615497/a-395-1.615494},
  timestamp    = {Wed, 17 Apr 2024 11:48:22 +0200},
  biburl       = {https://dblp.org/rec/conf/amia/RoemerRF05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/clihc/KirschningR05,
  author       = {Ingrid Kirschning and
                  Joaqu{\'{\i}}n O. Rueda},
  editor       = {Maria Cec{\'{\i}}lia Calani Baranauskas and
                  Oscar Mayora{-}Ibarra},
  title        = {Animated agents and {TTS} for {HTML} documents},
  booktitle    = {{CLIHC} '05, Proceedings of the 2005 Latin American Conference on
                  Human-Computer Interaction, Cuernavaca, Mexico, October 23-26, 2005},
  pages        = {148--154},
  publisher    = {{ACM}},
  year         = {2005},
  url          = {https://doi.org/10.1145/1111360.1111375},
  doi          = {10.1145/1111360.1111375},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/clihc/KirschningR05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/grc/YerraN05,
  author       = {Rajiv Yerra and
                  Yiu{-}Kai Ng},
  editor       = {Xiaohua Hu and
                  Qing Liu and
                  Andrzej Skowron and
                  Tsau Young Lin and
                  Ronald R. Yager and
                  Bo Zhang},
  title        = {Detecting similar {HTML} documents using a fuzzy set information retrieval
                  approach},
  booktitle    = {2005 {IEEE} International Conference on Granular Computing, Beijing,
                  China, July 25-27, 2005},
  pages        = {693--699},
  publisher    = {{IEEE}},
  year         = {2005},
  url          = {https://doi.org/10.1109/GRC.2005.1547380},
  doi          = {10.1109/GRC.2005.1547380},
  timestamp    = {Fri, 09 Apr 2021 17:11:12 +0200},
  biburl       = {https://dblp.org/rec/conf/grc/YerraN05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icde/MukherjeeRS05,
  author       = {Saikat Mukherjee and
                  I. V. Ramakrishnan and
                  Amarjeet Singh},
  editor       = {Karl Aberer and
                  Michael J. Franklin and
                  Shojiro Nishio},
  title        = {Bootstrapping Semantic Annotations for Content-Rich {HTML} Documents},
  booktitle    = {Proceedings of the 21st International Conference on Data Engineering,
                  {ICDE} 2005, 5-8 April 2005, Tokyo, Japan},
  pages        = {583--593},
  publisher    = {{IEEE} Computer Society},
  year         = {2005},
  url          = {https://doi.org/10.1109/ICDE.2005.28},
  doi          = {10.1109/ICDE.2005.28},
  timestamp    = {Thu, 23 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icde/MukherjeeRS05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ideas/LimN05,
  author       = {Seung Jin Lim and
                  Yiu{-}Kai Ng},
  editor       = {Bipin C. Desai and
                  Gottfried Vossen},
  title        = {Categorizing and Extracting Information from Multilingual {HTML} Documents},
  booktitle    = {Ninth International Database Engineering and Applications Symposium
                  {(IDEAS} 2005), 25-27 July 2005, Montreal, Canada},
  pages        = {415--422},
  publisher    = {{IEEE} Computer Society},
  year         = {2005},
  url          = {https://doi.org/10.1109/IDEAS.2005.15},
  doi          = {10.1109/IDEAS.2005.15},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ideas/LimN05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sigir/HuXSHSCL05,
  author       = {Yunhua Hu and
                  Guomao Xin and
                  Ruihua Song and
                  Guoping Hu and
                  Shuming Shi and
                  Yunbo Cao and
                  Hang Li},
  editor       = {Ricardo A. Baeza{-}Yates and
                  Nivio Ziviani and
                  Gary Marchionini and
                  Alistair Moffat and
                  John Tait},
  title        = {Title extraction from bodies of {HTML} documents and its application
                  to web page retrieval},
  booktitle    = {{SIGIR} 2005: Proceedings of the 28th Annual International {ACM} {SIGIR}
                  Conference on Research and Development in Information Retrieval, Salvador,
                  Brazil, August 15-19, 2005},
  pages        = {250--257},
  publisher    = {{ACM}},
  year         = {2005},
  url          = {https://doi.org/10.1145/1076034.1076079},
  doi          = {10.1145/1076034.1076079},
  timestamp    = {Wed, 02 Mar 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/sigir/HuXSHSCL05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/www/KruplHG05,
  author       = {Bernhard Kr{\"{u}}pl and
                  Marcus Herzog and
                  Wolfgang Gatterbauer},
  editor       = {Allan Ellis and
                  Tatsuya Hagino},
  title        = {Using visual cues for extraction of tabular data from arbitrary {HTML}
                  documents},
  booktitle    = {Proceedings of the 14th international conference on World Wide Web,
                  {WWW} 2005, Chiba, Japan, May 10-14, 2005 - Special interest tracks
                  and posters},
  pages        = {1000--1001},
  publisher    = {{ACM}},
  year         = {2005},
  url          = {https://doi.org/10.1145/1062745.1062838},
  doi          = {10.1145/1062745.1062838},
  timestamp    = {Mon, 05 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/www/KruplHG05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/dn/DenoyerG04,
  author       = {Ludovic Denoyer and
                  Patrick Gallinari},
  title        = {Un mod{\`{e}}le de mixture de mod{\`{e}}les g{\'{e}}n{\'{e}}ratifs
                  pour les documents structur{\'{e}}s multim{\'{e}}dias. Application
                  {\`{a}} la classification de documents {XML} et {HTML}},
  journal      = {Document Num{\'{e}}rique},
  volume       = {8},
  number       = {3},
  pages        = {35--54},
  year         = {2004},
  url          = {http://dn.revuesonline.com/article.jsp?articleId=5188},
  timestamp    = {Sun, 24 Nov 2013 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/dn/DenoyerG04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ijcpol/ZhangN04,
  author       = {Lihua Zhang and
                  Yiu{-}Kai Ng},
  title        = {A Query Engine for Retrieving Information from Chinese {HTML} Documents},
  journal      = {Int. J. Comput. Process. Orient. Lang.},
  volume       = {17},
  number       = {3},
  pages        = {135--164},
  year         = {2004},
  url          = {https://doi.org/10.1142/S0219427904001085},
  doi          = {10.1142/S0219427904001085},
  timestamp    = {Mon, 11 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ijcpol/ZhangN04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/dexaw/Burget04,
  author       = {Radek Burget},
  title        = {Hierarchies in {HTML} Documents: Linking Text to Concepts},
  booktitle    = {15th International Workshop on Database and Expert Systems Applications
                  {(DEXA} 2004), with CD-ROM, 30 August - 3 September 2004, Zaragoza,
                  Spain},
  pages        = {186--190},
  publisher    = {{IEEE} Computer Society},
  year         = {2004},
  url          = {https://doi.org/10.1109/DEXA.2004.1333471},
  doi          = {10.1109/DEXA.2004.1333471},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/dexaw/Burget04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iadis/BenoitN04,
  author       = {Beno{\^{\i}}t Encelle and
                  Nadine Baptiste{-}Jessel},
  editor       = {Pedro T. Isa{\'{\i}}as and
                  Nitya Karmakar and
                  Lu{\'{\i}}s E. T. Rodrigues and
                  Patr{\'{\i}}cia Barbosa},
  title        = {Using the concept of user policies for improving {HTML} documents
                  accessibility},
  booktitle    = {Proceedings of the {IADIS} International Conference WWW/Internet 2004,
                  Madrid, Spain, 2 Volumes},
  pages        = {835--839},
  publisher    = {{IADIS}},
  year         = {2004},
  timestamp    = {Wed, 01 Apr 2015 20:06:19 +0200},
  biburl       = {https://dblp.org/rec/conf/iadis/BenoitN04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iadis/SinkaC04,
  author       = {Mark P. Sinka and
                  David Corne},
  editor       = {Pedro T. Isa{\'{\i}}as and
                  Nitya Karmakar and
                  Lu{\'{\i}}s E. T. Rodrigues and
                  Patr{\'{\i}}cia Barbosa},
  title        = {Measuring Effectiveness of Text-Decorated {HTML} Tags in Web Document
                  Clustering},
  booktitle    = {Proceedings of the {IADIS} International Conference WWW/Internet 2004,
                  Madrid, Spain, 2 Volumes},
  pages        = {707--714},
  publisher    = {{IADIS}},
  year         = {2004},
  timestamp    = {Mon, 03 Jan 2005 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/iadis/SinkaC04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icchp/AnnamalaiGP04,
  author       = {Narayan Annamalai and
                  Gopal Gupta and
                  B. Prabhakaran},
  editor       = {Joachim Klaus and
                  Klaus Miesenberger and
                  Wolfgang L. Zagler and
                  Dominique Burger},
  title        = {Accessing Documents via Audio: An Extensible Transcoder for {HTML}
                  to VoiceXML Conversion},
  booktitle    = {Computers Helping People with Special Needs, 9th International Conference,
                  {ICCHP} 2004, Paris, France, July 7-9, 2004, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {3118},
  pages        = {339--346},
  publisher    = {Springer},
  year         = {2004},
  url          = {https://doi.org/10.1007/978-3-540-27817-7\_51},
  doi          = {10.1007/978-3-540-27817-7\_51},
  timestamp    = {Wed, 26 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icchp/AnnamalaiGP04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/saint/LimN04,
  author       = {Seung Jin Lim and
                  Yiu{-}Kai Ng},
  title        = {Change Discovery of Hierarchically Structured, Order-Sensitive Data
                  in {HTML/XML} Documents},
  booktitle    = {2004 Symposium on Applications and the Internet {(SAINT} 2004), 26-30
                  January 2004, Tokyo, Japan},
  pages        = {178--187},
  publisher    = {{IEEE} Computer Society},
  year         = {2004},
  url          = {https://doi.org/10.1109/SAINT.2004.1266114},
  doi          = {10.1109/SAINT.2004.1266114},
  timestamp    = {Thu, 23 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/saint/LimN04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/www/SankaCC04,
  author       = {Anoop Sanka and
                  Shravan Chamakura and
                  Sharma Chakravarthy},
  editor       = {Mark Levene and
                  Alexandra Poulovassilis},
  title        = {A Dataflow Approach To Efficient Change Detection of {HTML/XML} Documents
                  in WebVigiL},
  booktitle    = {Proceedings of the Third International Workshop on Web Dynamics, WebDyn@WWW
                  2004, New York, NY, USA, May 18, 2004},
  series       = {{CEUR} Workshop Proceedings},
  volume       = {703},
  pages        = {76--85},
  publisher    = {CEUR-WS.org},
  year         = {2004},
  url          = {https://ceur-ws.org/Vol-703/paper8.pdf},
  timestamp    = {Fri, 10 Mar 2023 16:22:22 +0100},
  biburl       = {https://dblp.org/rec/conf/www/SankaCC04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/apin/KimZ03,
  author       = {Sun Kim and
                  Byoung{-}Tak Zhang},
  title        = {Genetic Mining of {HTML} Structures for Effective Web-Document Retrieval},
  journal      = {Appl. Intell.},
  volume       = {18},
  number       = {3},
  pages        = {243--256},
  year         = {2003},
  url          = {https://doi.org/10.1023/A:1023293820057},
  doi          = {10.1023/A:1023293820057},
  timestamp    = {Wed, 17 May 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/apin/KimZ03.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/bncod/PandrangiJSC03,
  author       = {Naveen Pandrangi and
                  Jyoti Jacob and
                  Anoop Sanka and
                  Sharma Chakravarthy},
  editor       = {Anne E. James and
                  Brian Lings and
                  Muhammad Younas},
  title        = {WebVigiL: User Profile-Based Change Detection for {HTML/XML} Documents},
  booktitle    = {New Horizons in Information Management, 20th British National Conference
                  on Databases, {BNCOD} 20, Coventry, UK, July 15-17, 2003, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {2712},
  pages        = {38--57},
  publisher    = {Springer},
  year         = {2003},
  url          = {https://doi.org/10.1007/3-540-45073-4\_5},
  doi          = {10.1007/3-540-45073-4\_5},
  timestamp    = {Tue, 14 May 2019 10:00:50 +0200},
  biburl       = {https://dblp.org/rec/conf/bncod/PandrangiJSC03.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cmg/Wingfield03,
  author       = {Patricia E. Wingfield},
  title        = {No More Downloading - Using {SAS/ODS} to Create Graphs and {HTML}
                  Documents for {OS/390} Systems},
  booktitle    = {29th International Computer Measurement Group Conference, December
                  7-12, 2003, Dallas, Texas, USA, Proceedings},
  pages        = {469--475},
  publisher    = {Computer Measurement Group},
  year         = {2003},
  url          = {http://www.cmg.org/?s2member\_file\_download=/proceedings/2003/3189.pdf},
  timestamp    = {Wed, 17 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/cmg/Wingfield03.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ecir/CeciM03,
  author       = {Michelangelo Ceci and
                  Donato Malerba},
  editor       = {Fabrizio Sebastiani},
  title        = {Hierarchical Classification of {HTML} Documents with WebClassII},
  booktitle    = {Advances in Information Retrieval, 25th European Conference on {IR}
                  Research, {ECIR} 2003, Pisa, Italy, April 14-16, 2003, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {2633},
  pages        = {57--72},
  publisher    = {Springer},
  year         = {2003},
  url          = {https://doi.org/10.1007/3-540-36618-0\_5},
  doi          = {10.1007/3-540-36618-0\_5},
  timestamp    = {Tue, 14 May 2019 10:00:37 +0200},
  biburl       = {https://dblp.org/rec/conf/ecir/CeciM03.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iadis/HyuseinP03,
  author       = {Byurhan Hyusein and
                  Ahmed Patel},
  title        = {Significance of {HTML} Tags for Document Indexing and Retrieval},
  booktitle    = {Proceedings of the {IADIS} International Conference WWW/Internet 2003,
                  {ICWI} 2003, Algarve, Portugal, November 5-8, 2003},
  pages        = {817--820},
  publisher    = {{IADIS}},
  year         = {2003},
  timestamp    = {Thu, 18 Mar 2004 15:27:46 +0100},
  biburl       = {https://dblp.org/rec/conf/iadis/HyuseinP03.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icdar/MukherjeeYTR03,
  author       = {Saikat Mukherjee and
                  Guizhen Yang and
                  Wenfang Tan and
                  I. V. Ramakrishnan},
  title        = {Automatic Discovery of Semantic Structures in {HTML} Documents},
  booktitle    = {7th International Conference on Document Analysis and Recognition
                  {(ICDAR} 2003), 2-Volume Set, 3-6 August 2003, Edinburgh, Scotland,
                  {UK}},
  pages        = {245--249},
  publisher    = {{IEEE} Computer Society},
  year         = {2003},
  url          = {https://doi.org/10.1109/ICDAR.2003.1227667},
  doi          = {10.1109/ICDAR.2003.1227667},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icdar/MukherjeeYTR03.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/pakdd/YunLP03,
  author       = {Bo{-}Hyun Yun and
                  Myungeun Lim and
                  Soo{-}Hyun Park},
  editor       = {Kyu{-}Young Whang and
                  Jongwoo Jeon and
                  Kyuseok Shim and
                  Jaideep Srivastava},
  title        = {An Integrated System of Mining {HTML} Texts and Filtering Structured
                  Documents},
  booktitle    = {Advances in Knowledge Discovery and Data Mining, 7th Pacific-Asia
                  Conference, {PAKDD} 2003, Seoul, Korea, April 30 - May 2, 2003, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {2637},
  pages        = {350--355},
  publisher    = {Springer},
  year         = {2003},
  url          = {https://doi.org/10.1007/3-540-36175-8\_34},
  doi          = {10.1007/3-540-36175-8\_34},
  timestamp    = {Tue, 14 May 2019 10:00:51 +0200},
  biburl       = {https://dblp.org/rec/conf/pakdd/YunLP03.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/pris/KerlochPG03,
  author       = {Fr{\'{e}}d{\'{e}}ric Kerloch and
                  Thierry Paquet and
                  Patrick Gallinari},
  editor       = {Jean{-}Marc Ogier and
                  {\'{E}}ric Trupin},
  title        = {Using Structure For Information Extraction In Html Documents},
  booktitle    = {Pattern Recognition in Information Systems, Proceedings of the 3rd
                  International Workshop on Pattern Recognition in Information Systems,
                  {PRIS} 2003, In conjunction with {ICEIS} 2003, Angers, France, April
                  2003},
  pages        = {173--178},
  publisher    = {{ICEIS} Press},
  year         = {2003},
  timestamp    = {Mon, 01 Sep 2003 14:36:20 +0200},
  biburl       = {https://dblp.org/rec/conf/pris/KerlochPG03.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sac/MolinariPP03,
  author       = {Andrea Molinari and
                  Ricardo A. Marques Pereira and
                  Gabriella Pasi},
  editor       = {Gary B. Lamont and
                  Hisham Haddad and
                  George A. Papadopoulos and
                  Brajendra Panda},
  title        = {An Indexing Model of {HTML} Documents},
  booktitle    = {Proceedings of the 2003 {ACM} Symposium on Applied Computing (SAC),
                  March 9-12, 2003, Melbourne, FL, {USA}},
  pages        = {834--840},
  publisher    = {{ACM}},
  year         = {2003},
  url          = {https://doi.org/10.1145/952532.952697},
  doi          = {10.1145/952532.952697},
  timestamp    = {Tue, 06 Nov 2018 11:06:45 +0100},
  biburl       = {https://dblp.org/rec/conf/sac/MolinariPP03.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/semweb/MukherjeeYR03,
  author       = {Saikat Mukherjee and
                  Guizhen Yang and
                  I. V. Ramakrishnan},
  editor       = {Dieter Fensel and
                  Katia P. Sycara and
                  John Mylopoulos},
  title        = {Automatic Annotation of Content-Rich {HTML} Documents: Structural
                  and Semantic Analysis},
  booktitle    = {The Semantic Web - {ISWC} 2003, Second International Semantic Web
                  Conference, Sanibel Island, FL, USA, October 20-23, 2003, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {2870},
  pages        = {533--549},
  publisher    = {Springer},
  year         = {2003},
  url          = {https://doi.org/10.1007/978-3-540-39718-2\_34},
  doi          = {10.1007/978-3-540-39718-2\_34},
  timestamp    = {Tue, 07 Sep 2021 13:48:16 +0200},
  biburl       = {https://dblp.org/rec/conf/semweb/MukherjeeYR03.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/wirn/PilatoVVCS03,
  author       = {Giovanni Pilato and
                  Salvatore Vitabile and
                  Giorgio Vassallo and
                  Vincenzo Conti and
                  Filippo Sorbello},
  editor       = {Bruno Apolloni and
                  Maria Marinaro and
                  Roberto Tagliaferri},
  title        = {A Concurrent Neural Classifier for {HTML} Documents Retrieval},
  booktitle    = {Neural Nets, 14th Italian Workshop on Neural Nets, {WIRN} {VIETRI}
                  2003, Vietri sul Mare, Italy, June 4-7, 2003, Revised Papers},
  series       = {Lecture Notes in Computer Science},
  volume       = {2859},
  pages        = {210--217},
  publisher    = {Springer},
  year         = {2003},
  url          = {https://doi.org/10.1007/978-3-540-45216-4\_24},
  doi          = {10.1007/978-3-540-45216-4\_24},
  timestamp    = {Sun, 02 Jun 2019 21:17:04 +0200},
  biburl       = {https://dblp.org/rec/conf/wirn/PilatoVVCS03.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/www/GuptaKNG03,
  author       = {Suhit Gupta and
                  Gail E. Kaiser and
                  David Neistadt and
                  Peter Grimm},
  editor       = {Guszt{\'{a}}v Hencsey and
                  Bebo White and
                  Yih{-}Farn Robin Chen and
                  L{\'{a}}szl{\'{o}} Kov{\'{a}}cs and
                  Steve Lawrence},
  title        = {DOM-based content extraction of {HTML} documents},
  booktitle    = {Proceedings of the Twelfth International World Wide Web Conference,
                  {WWW} 2003, Budapest, Hungary, May 20-24, 2003},
  pages        = {207--214},
  publisher    = {{ACM}},
  year         = {2003},
  url          = {https://doi.org/10.1145/775152.775182},
  doi          = {10.1145/775152.775182},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/www/GuptaKNG03.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/www/KellyJP03,
  author       = {Brian Kelly and
                  Pete Johnston and
                  Andy Powell},
  editor       = {Irwin King and
                  Tam{\'{a}}s M{\'{a}}ray},
  title        = {Approaches to Validation of Dublin Core Metadata Embedded in {(X)HTML}
                  Documents},
  booktitle    = {Proceedings of the Twelfth International World Wide Web Conference
                  - Posters, {WWW} 2003, Budapest, Hungary, May 20-24, 2003},
  year         = {2003},
  url          = {http://www2003.org/cdrom/papers/poster/p109/p109-kelly.html},
  timestamp    = {Wed, 17 Jul 2013 16:59:51 +0200},
  biburl       = {https://dblp.org/rec/conf/www/KellyJP03.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@incollection{DBLP:books/ws/03/0001H03,
  author       = {Yalin Wang and
                  Jianying Hu},
  editor       = {Apostolos Antonacopoulos and
                  Jianying Hu},
  title        = {Automatic Table Detection in {HTML} Documents},
  booktitle    = {Web Document Analysis},
  series       = {Series in Machine Perception and Artificial Intelligence},
  volume       = {55},
  pages        = {135--154},
  publisher    = {World Scientific},
  year         = {2003},
  url          = {https://doi.org/10.1142/9789812775375\_0008},
  doi          = {10.1142/9789812775375\_0008},
  timestamp    = {Mon, 26 Aug 2019 18:32:49 +0200},
  biburl       = {https://dblp.org/rec/books/ws/03/0001H03.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@incollection{DBLP:books/ws/03/AlamHR03,
  author       = {Hassan Alam and
                  Rachmat Hartono and
                  A. F. R. Rahman},
  editor       = {Apostolos Antonacopoulos and
                  Jianying Hu},
  title        = {Extraction and Management of Content from {HTML} Documents},
  booktitle    = {Web Document Analysis},
  series       = {Series in Machine Perception and Artificial Intelligence},
  volume       = {55},
  pages        = {95--111},
  publisher    = {World Scientific},
  year         = {2003},
  url          = {https://doi.org/10.1142/9789812775375\_0006},
  doi          = {10.1142/9789812775375\_0006},
  timestamp    = {Mon, 26 Aug 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/books/ws/03/AlamHR03.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@incollection{DBLP:books/ws/03/YangCZ03,
  author       = {Yudong Yang and
                  Yu Chen and
                  HongJiang Zhang},
  editor       = {Apostolos Antonacopoulos and
                  Jianying Hu},
  title        = {Html Page Analysis based on Visual cues},
  booktitle    = {Web Document Analysis},
  series       = {Series in Machine Perception and Artificial Intelligence},
  volume       = {55},
  pages        = {113--131},
  publisher    = {World Scientific},
  year         = {2003},
  url          = {https://doi.org/10.1142/9789812775375\_0007},
  doi          = {10.1142/9789812775375\_0007},
  timestamp    = {Mon, 26 Aug 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/books/ws/03/YangCZ03.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jfp/Thiemann02,
  author       = {Peter Thiemann},
  title        = {A typed representation for {HTML} and {XML} documents in Haskell},
  journal      = {J. Funct. Program.},
  volume       = {12},
  number       = {4{\&}5},
  pages        = {435--468},
  year         = {2002},
  url          = {https://doi.org/10.1017/S0956796802004392},
  doi          = {10.1017/S0956796802004392},
  timestamp    = {Mon, 23 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/jfp/Thiemann02.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/das/WangH02,
  author       = {Yalin Wang and
                  Jianying Hu},
  editor       = {Daniel P. Lopresti and
                  Jianying Hu and
                  Ramanujan S. Kashi},
  title        = {Detecting Tables in {HTML} Documents},
  booktitle    = {Document Analysis Systems V, 5th International Workshop, {DAS} 2002,
                  Princeton, NJ, USA, August 19-21, 2002, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {2423},
  pages        = {249--260},
  publisher    = {Springer},
  year         = {2002},
  url          = {https://doi.org/10.1007/3-540-45869-7\_29},
  doi          = {10.1007/3-540-45869-7\_29},
  timestamp    = {Wed, 16 Mar 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/das/WangH02.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/dexa/Liu02,
  author       = {Mengchi Liu},
  editor       = {Abdelkader Hameurlain and
                  Rosine Cicchetti and
                  Roland Traunm{\"{u}}ller},
  title        = {Capturing Semantics in {HTML} Documents},
  booktitle    = {Database and Expert Systems Applications, 13th International Conference,
                  {DEXA} 2002, Aix-en-Provence, France, September 2-6, 2002, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {2453},
  pages        = {103--112},
  publisher    = {Springer},
  year         = {2002},
  url          = {https://doi.org/10.1007/3-540-46146-9\_11},
  doi          = {10.1007/3-540-46146-9\_11},
  timestamp    = {Tue, 14 May 2019 10:00:46 +0200},
  biburl       = {https://dblp.org/rec/conf/dexa/Liu02.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/edbtw/MalerbaEC02,
  author       = {Donato Malerba and
                  Floriana Esposito and
                  Michelangelo Ceci},
  editor       = {Akmal B. Chaudhri and
                  Rainer Unland and
                  Chabane Djeraba and
                  Wolfgang Lindner},
  title        = {Mining {HTML} Pages to Support Document Sharing in a Cooperative System},
  booktitle    = {XML-Based Data Management and Multimedia Engineering - {EDBT} 2002
                  Workshops, {EDBT} 2002 Workshops XMLDM, MDDE, and YRWS, Prague, Czech
                  Republic, March 24-28, 2002, Revised Papers},
  series       = {Lecture Notes in Computer Science},
  volume       = {2490},
  pages        = {420--434},
  publisher    = {Springer},
  year         = {2002},
  url          = {https://doi.org/10.1007/3-540-36128-6\_25},
  doi          = {10.1007/3-540-36128-6\_25},
  timestamp    = {Tue, 14 May 2019 10:00:54 +0200},
  biburl       = {https://dblp.org/rec/conf/edbtw/MalerbaEC02.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iadis/HashemiFVT02,
  author       = {Ray R. Hashemi and
                  Charles Wesley Ford and
                  Tim Vamprooyen and
                  John R. Talburt},
  title        = {Extraction of Features with Unstructured Representation from {HTML}
                  Documents},
  booktitle    = {Proceedings of the {IADIS} International Conference WWW/Internet 2002,
                  {ICWI} 2002, Lisbon, Portugal, November 13-15, 2002},
  pages        = {47--53},
  publisher    = {{IADIS}},
  year         = {2002},
  timestamp    = {Thu, 30 Jun 2005 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iadis/HashemiFVT02.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ideal/UmeharaIN02,
  author       = {Masayuki Umehara and
                  Koji Iwanuma and
                  Hidetomo Nabeshima},
  editor       = {Hujun Yin and
                  Nigel M. Allinson and
                  Richard T. Freeman and
                  John A. Keane and
                  Simon J. Hubbard},
  title        = {A Case-Based Recognition of Semantic Structures in {HTML} Documents},
  booktitle    = {Intelligent Data Engineering and Automated Learning - {IDEAL} 2002,
                  Third International Conference, Manchester, UK, August 12-14, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {2412},
  pages        = {141--147},
  publisher    = {Springer},
  year         = {2002},
  url          = {https://doi.org/10.1007/3-540-45675-9\_24},
  doi          = {10.1007/3-540-45675-9\_24},
  timestamp    = {Tue, 14 May 2019 10:00:45 +0200},
  biburl       = {https://dblp.org/rec/conf/ideal/UmeharaIN02.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/www/CohenHJ02,
  author       = {William W. Cohen and
                  Matthew Hurst and
                  Lee S. Jensen},
  editor       = {David Lassner and
                  David De Roure and
                  Arun Iyengar},
  title        = {A flexible learning system for wrapping tables and lists in {HTML}
                  documents},
  booktitle    = {Proceedings of the Eleventh International World Wide Web Conference,
                  {WWW} 2002, May 7-11, 2002, Honolulu, Hawaii, {USA}},
  pages        = {232--241},
  publisher    = {{ACM}},
  year         = {2002},
  url          = {https://doi.org/10.1145/511446.511477},
  doi          = {10.1145/511446.511477},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/www/CohenHJ02.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/amia/DubeyC01,
  author       = {Anil K. Dubey and
                  Henry C. Chueh},
  title        = {Using {XML} Metadata to Enable the Automatic Generation and Processing
                  of {HTML} Forms from {XML} Documents},
  booktitle    = {{AMIA} 2001, American Medical Informatics Association Annual Symposium,
                  Washington, DC, USA, November 3-7, 2001},
  publisher    = {{AMIA}},
  year         = {2001},
  url          = {https://knowledge.amia.org/amia-55142-a2001a-1.597057/t-002-1.598852/f-001-1.598853/a-251-1.599384/a-252-1.599381},
  timestamp    = {Wed, 17 Apr 2024 11:48:39 +0200},
  biburl       = {https://dblp.org/rec/conf/amia/DubeyC01.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/flairs/SakamotoMAA01,
  author       = {Hiroshi Sakamoto and
                  Yoshitsugu Murakami and
                  Hiroki Arimura and
                  Setsuo Arikawa},
  editor       = {Ingrid Russell and
                  John F. Kolen},
  title        = {Extracting Partial Structures from {HTML} Documents},
  booktitle    = {Proceedings of the Fourteenth International Florida Artificial Intelligence
                  Research Society Conference, May 21-23, 2001, Key West, Florida, {USA}},
  pages        = {264--268},
  publisher    = {{AAAI} Press},
  year         = {2001},
  url          = {http://www.aaai.org/Library/FLAIRS/2001/flairs01-051.php},
  timestamp    = {Wed, 26 Oct 2022 08:35:34 +0200},
  biburl       = {https://dblp.org/rec/conf/flairs/SakamotoMAA01.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icdar/DiligentiGMS01,
  author       = {Michelangelo Diligenti and
                  Marco Gori and
                  Marco Maggini and
                  Franco Scarselli},
  title        = {Classification of {HTML} Documents by Hidden Tree-Markov Models},
  booktitle    = {6th International Conference on Document Analysis and Recognition
                  {(ICDAR} 2001), 10-13 September 2001, Seattle, WA, {USA}},
  pages        = {849--853},
  publisher    = {{IEEE} Computer Society},
  year         = {2001},
  url          = {https://doi.org/10.1109/ICDAR.2001.953907},
  doi          = {10.1109/ICDAR.2001.953907},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icdar/DiligentiGMS01.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icde/LimN01,
  author       = {Seung Jin Lim and
                  Yiu{-}Kai Ng},
  editor       = {Dimitrios Georgakopoulos and
                  Alexander Buchmann},
  title        = {An Automated Change Detection Algorithm for {HTML} Documents Based
                  on Semantic Hierarchies},
  booktitle    = {Proceedings of the 17th International Conference on Data Engineering,
                  April 2-6, 2001, Heidelberg, Germany},
  pages        = {303--312},
  publisher    = {{IEEE} Computer Society},
  year         = {2001},
  url          = {https://doi.org/10.1109/ICDE.2001.914842},
  doi          = {10.1109/ICDE.2001.914842},
  timestamp    = {Fri, 15 Mar 2024 12:30:44 +0100},
  biburl       = {https://dblp.org/rec/conf/icde/LimN01.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/widm/Rapela01,
  author       = {Joaquin Rapela},
  editor       = {Roger H. L. Chiang and
                  Ee{-}Peng Lim},
  title        = {Automatically combining ranking heuristics for {HTML} documents},
  booktitle    = {3rd International Workshop on Web Information and Data Management
                  {(WIDM} 2001), Friday, 9 November 2001, In Conjunction with {ACM}
                  {CIKM} 2001, Doubletree Hotel Atlanta-Buckhead, Atlanta, Georgia,
                  {USA.} ACM, 2001},
  pages        = {61--67},
  publisher    = {{ACM}},
  year         = {2001},
  url          = {https://doi.org/10.1145/502932.502945},
  doi          = {10.1145/502932.502945},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/widm/Rapela01.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/woa/GarelliF01,
  author       = {Francesco Garelli and
                  Carlo Ferrari},
  editor       = {Andrea Omicini and
                  Mirko Viroli},
  title        = {Object Oriented Mapping for {HTML} Documents},
  booktitle    = {{WOA} 2001: Dagli Oggetti agli Agenti. 2nd AI*IA/TABOO Joint Workshop
                  "From Objects to Agents": Evolutive Trends of Software Systems, 4-5
                  September 2001, Modena, Italy},
  pages        = {68--73},
  publisher    = {Pitagora Editrice Bologna},
  year         = {2001},
  timestamp    = {Mon, 17 Jan 2005 15:56:23 +0100},
  biburl       = {https://dblp.org/rec/conf/woa/GarelliF01.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cl/LimN00,
  author       = {Seung Jin Lim and
                  Yiu{-}Kai Ng},
  editor       = {John W. Lloyd and
                  Ver{\'{o}}nica Dahl and
                  Ulrich Furbach and
                  Manfred Kerber and
                  Kung{-}Kiu Lau and
                  Catuscia Palamidessi and
                  Lu{\'{\i}}s Moniz Pereira and
                  Yehoshua Sagiv and
                  Peter J. Stuckey},
  title        = {A Heuristic Approach for Converting {HTML} Documents to {XML} Documents},
  booktitle    = {Computational Logic - {CL} 2000, First International Conference, London,
                  UK, 24-28 July, 2000, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {1861},
  pages        = {1182--1196},
  publisher    = {Springer},
  year         = {2000},
  url          = {https://doi.org/10.1007/3-540-44957-4\_79},
  doi          = {10.1007/3-540-44957-4\_79},
  timestamp    = {Tue, 14 May 2019 10:00:42 +0200},
  biburl       = {https://dblp.org/rec/conf/cl/LimN00.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ecis/MotheRRZ00,
  author       = {Josiane Mothe and
                  Franck Ravat and
                  Farshad Riahi and
                  Gilles Zurfluh},
  editor       = {Hans Robert Hansen and
                  Martin Bichler and
                  Harald Mahrer},
  title        = {Structuration and Enrichment of {HTML} Documents in order to Build
                  a Specific Information Warehouse},
  booktitle    = {Proceedings of the 8th European Conference on Information Systems,
                  Trends in Information and Communication Systems for the 21st Century,
                  {ECIS} 2000, Vienna, Austria, July 3-5, 2000},
  pages        = {386--395},
  year         = {2000},
  url          = {http://aisel.aisnet.org/ecis2000/112},
  timestamp    = {Mon, 05 Dec 2016 15:14:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ecis/MotheRRZ00.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icmcs/FerriGGP00,
  author       = {Fernando Ferri and
                  Cristina Ghiselli and
                  Patrizia Grifoni and
                  Marco Padula},
  title        = {Toward a Retrieval of {HTML} Documents using a Semantic Approach},
  booktitle    = {2000 {IEEE} International Conference on Multimedia and Expo, {ICME}
                  2000, New York, NY, USA, July 30 - August 2, 2000},
  pages        = {1571--1574},
  publisher    = {{IEEE} Computer Society},
  year         = {2000},
  url          = {https://doi.org/10.1109/ICME.2000.871069},
  doi          = {10.1109/ICME.2000.871069},
  timestamp    = {Mon, 09 Aug 2021 14:54:02 +0200},
  biburl       = {https://dblp.org/rec/conf/icmcs/FerriGGP00.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/pricai/KimZ00,
  author       = {Sun Kim and
                  Byoung{-}Tak Zhang},
  editor       = {Ah{-}Hwee Tan and
                  Philip S. Yu},
  title        = {Web-Document Retrieval by Genetic Learning of Importance Factors for
                  {HTML} Tags},
  booktitle    = {Proceedings of the International Workshop on Text and Web Mining,
                  Melbourne, Australia, August 2000},
  pages        = {13--23},
  year         = {2000},
  url          = {http://textmining.krdl.org.sg/PRICAI2000/web-retrieval.pdf},
  timestamp    = {Wed, 27 Jun 2018 13:48:41 +0200},
  biburl       = {https://dblp.org/rec/conf/pricai/KimZ00.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sigdoc/Green00b,
  author       = {Robin Green},
  editor       = {Susan B. Jones and
                  Beth Weise Moeller and
                  Michael Priestley and
                  Bernadette Longo},
  title        = {Documentation meets version control: an automated backup system for
                  HTML-based help},
  booktitle    = {Proceedings of {IEEE} Professional Communication Society International
                  Professional Communication Conference and Proceedings of the 18th
                  Annual {ACM} International Conference on Computer Documentation: Technology
                  {\&} Teamwork, Cambridge, Massachusetts, USA, September 24-27,
                  2000},
  pages        = {541--548},
  publisher    = {{IEEE}},
  year         = {2000},
  url          = {https://dl.acm.org/citation.cfm?id=504878},
  timestamp    = {Tue, 27 Nov 2018 10:40:38 +0100},
  biburl       = {https://dblp.org/rec/conf/sigdoc/Green00b.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jilt/Morrison99,
  author       = {Alex Morrison},
  title        = {Hijack on the road to Xanadu: The Infringement of Copyright in {HTML}
                  Documents via Networked Computers and the Legitimacy of Browsing Hypermedia
                  Documents},
  journal      = {J. Inf. Law Technol.},
  volume       = {1999},
  number       = {1},
  year         = {1999},
  url          = {http://elj.warwick.ac.uk/jilt/99-1/morrison.html},
  timestamp    = {Wed, 31 Mar 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jilt/Morrison99.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/adbis/Polcicova99,
  author       = {Gabriela Polcicov{\'{a}}},
  editor       = {Johann Eder and
                  Ivan Rozman and
                  Tatjana Welzer},
  title        = {Recommending HTML-documents using Features Guided Automated Collaborative
                  Filtering},
  booktitle    = {Advances in Databases and Information Systems, Third East European
                  Conference, ADBIS'99, Maribor, Slovenia, September 13-16, 1999, Proceedings
                  of Short Papers},
  pages        = {81--87},
  publisher    = {Institute of Informatics, Faculty of Electrical Engineering and Computer
                  Science, Smetanova 17, {IS-2000} Maribor, Slovenia},
  year         = {1999},
  timestamp    = {Tue, 02 Nov 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/adbis/Polcicova99.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/dasfaa/LimN99,
  author       = {Seung Jin Lim and
                  Yiu{-}Kai Ng},
  editor       = {Arbee L. P. Chen and
                  Frederick H. Lochovsky},
  title        = {WebView: {A} Tool for Retrieving Internal Structures and Extracting
                  Information from {HTML} Documents},
  booktitle    = {Database Systems for Advanced Applications, Proceedings of the Sixth
                  International Conference on Database Systems for Advanced Applications
                  (DASFAA), April 19-21, Hsinchu, Taiwan},
  pages        = {71--80},
  publisher    = {{IEEE} Computer Society},
  year         = {1999},
  url          = {https://doi.org/10.1109/DASFAA.1999.765738},
  doi          = {10.1109/DASFAA.1999.765738},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/dasfaa/LimN99.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icai/KwakKL99,
  author       = {Ju{-}hyun Kwak and
                  Keecheon Kim and
                  Chang{-}Hoon Lee},
  editor       = {Hamid R. Arabnia},
  title        = {Advanced User Profile Agent Using Structure Analysis of {HTML} Document},
  booktitle    = {Proceedings of the International Conference on Artificial Intelligence,
                  {IC-AI} '99, June 28 - July 1, 1999, Las Vegas, Nevada, USA, Volume
                  1},
  pages        = {319--323},
  publisher    = {{CSREA} Press},
  year         = {1999},
  timestamp    = {Fri, 18 Sep 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icai/KwakKL99.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/imsa/PacificiY99,
  author       = {Giovanni Pacifici and
                  Alaa Youssef},
  editor       = {Borko Furht},
  title        = {Synchronous Annotation of Shared {HTML} Documents},
  booktitle    = {Internet, Multimedia Systems and Applications, {IMSA} 1999, October
                  18-21, 1999, Nassau, The Bahamas},
  pages        = {275--279},
  publisher    = {{IASTED/ACTA} Press},
  year         = {1999},
  timestamp    = {Sun, 28 Jul 2019 13:40:03 +0200},
  biburl       = {https://dblp.org/rec/conf/imsa/PacificiY99.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interact/FarhatB99,
  author       = {Siwar Farhat and
                  Yacine Bellik},
  editor       = {M. Angela Sasse and
                  Chris W. Johnson},
  title        = {SeeWeb: Dynamic Improvement of the Accessibility of {HTML} Documents
                  for Blind Persons},
  booktitle    = {Human-Computer Interaction {INTERACT} '99: {IFIP} {TC13} International
                  Conference on Human-Computer Interaction, Edinburgh, UK, 30th August-3rd
                  September 1999},
  pages        = {256--262},
  publisher    = {{IOS} Press},
  year         = {1999},
  timestamp    = {Mon, 19 Sep 2016 17:00:14 +0200},
  biburl       = {https://dblp.org/rec/conf/interact/FarhatB99.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sigdoc/AhrensL99,
  author       = {Virginie Ahrens and
                  Val{\'{e}}rie Lecompte},
  editor       = {Johndan Johnson{-}Eilola and
                  Stuart A. Selber},
  title        = {Paper to {HTML} - an automatic, seamless process for documentation
                  production},
  booktitle    = {Proceedings of the 17th annual international conference on Documentation,
                  {SIGDOC} 1999, New Orleans, Louisiana, USA, September 12-14, 1999},
  pages        = {138--143},
  publisher    = {{ACM}},
  year         = {1999},
  url          = {https://doi.org/10.1145/318372.318571},
  doi          = {10.1145/318372.318571},
  timestamp    = {Tue, 06 Nov 2018 11:07:46 +0100},
  biburl       = {https://dblp.org/rec/conf/sigdoc/AhrensL99.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/webdb/SahuguetA99,
  author       = {Arnaud Sahuguet and
                  Fabien Azavant},
  editor       = {Sophie Cluet and
                  Tova Milo},
  title        = {Web Ecology: Recycling {HTML} Pages as {XML} Documents Using {W4F}},
  booktitle    = {{ACM} {SIGMOD} Workshop on The Web and Databases, WebDB 1999, Philadelphia,
                  Pennsylvania, USA, June 3-4, 1999. Informal Proceedings},
  pages        = {31--36},
  publisher    = {{INRIA}},
  year         = {1999},
  url          = {http://www-rocq.inria.fr/\&\#126;cluet/WEBDB/sahuguet.ps},
  timestamp    = {Wed, 08 Feb 2017 16:53:29 +0100},
  biburl       = {https://dblp.org/rec/conf/webdb/SahuguetA99.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/rfc/rfc2557,
  author       = {Jacob Palme and
                  Alex Hopmann and
                  Nick Shelness},
  title        = {{MIME} Encapsulation of Aggregate Documents, such as {HTML} {(MHTML)}},
  journal      = {{RFC}},
  volume       = {2557},
  pages        = {1--28},
  year         = {1999},
  url          = {https://doi.org/10.17487/RFC2557},
  doi          = {10.17487/RFC2557},
  timestamp    = {Tue, 14 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/rfc/rfc2557.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/criwg/RuizF98,
  author       = {D. C. Ruiz and
                  Jes{\'{u}}s Favela},
  title        = {Collaborative Review and Edition of {HTML} Documents},
  booktitle    = {{CRIWG} '98, Fourth International Workshop on Groupware, B{\'{u}}zios,
                  Rio de Janeiro, Brazil, September 9-11, 1998, Proceedings},
  pages        = {113--127},
  year         = {1998},
  timestamp    = {Thu, 24 Mar 2016 12:03:44 +0100},
  biburl       = {https://dblp.org/rec/conf/criwg/RuizF98.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ercimdl/SugimotoMDONT98,
  author       = {Shigeo Sugimoto and
                  Akira Maeda and
                  Myriam Dartois and
                  Jun Ohta and
                  Shigetaka Nakao and
                  Tetsuo Sakaguchi and
                  Koichi Tabata},
  editor       = {Christos Nikolaou and
                  Constantine Stephanidis},
  title        = {Experimental Studies on an Applet-based Document Viewer for Multilingual
                  {WWW} Documents - Functional Extension of and Lessons Learned from
                  Multilingual {HTML}},
  booktitle    = {Research and Advanced Technology for Digital Libraries, Second European
                  Conference, {ECDL} '98, Heraklion, Crete, Greece, September 21-23,
                  1998, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {1513},
  pages        = {199--214},
  publisher    = {Springer},
  year         = {1998},
  url          = {https://doi.org/10.1007/3-540-49653-X\_13},
  doi          = {10.1007/3-540-49653-X\_13},
  timestamp    = {Mon, 28 Aug 2023 21:17:44 +0200},
  biburl       = {https://dblp.org/rec/conf/ercimdl/SugimotoMDONT98.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/fodo/LimN98,
  author       = {Seung Jin Lim and
                  Yiu{-}Kai Ng},
  editor       = {Katsumi Tanaka and
                  Shahram Ghandeharizadeh},
  title        = {Constructing Hierarchical Information Structures of Sub-Page Level
                  {HTML} Documents},
  booktitle    = {The 5th International Conference of Foundations of Data Organization
                  (FODO'98), Kobe, Japan, November 12-13, 1998},
  pages        = {66--75},
  year         = {1998},
  timestamp    = {Tue, 27 Jul 2004 12:53:36 +0200},
  biburl       = {https://dblp.org/rec/conf/fodo/LimN98.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/zum/CiancariniMV98,
  author       = {Paolo Ciancarini and
                  Cecilia Mascolo and
                  Fabio Vitali},
  editor       = {Jonathan P. Bowen and
                  Andreas Fett and
                  Michael G. Hinchey},
  title        = {Visualizing {Z} Notation in {HTML} Documents},
  booktitle    = {{ZUM} '98: The {Z} Formal Specification Notation, 11th International
                  Conference of {Z} Users, Berlin, Germany, September 24-26, 1998, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {1493},
  pages        = {81--95},
  publisher    = {Springer},
  year         = {1998},
  url          = {https://doi.org/10.1007/978-3-540-49676-2\_7},
  doi          = {10.1007/978-3-540-49676-2\_7},
  timestamp    = {Tue, 14 May 2019 10:00:53 +0200},
  biburl       = {https://dblp.org/rec/conf/zum/CiancariniMV98.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/HsuY97,
  author       = {Jane Yung{-}jen Hsu and
                  Wen{-}tau Yih},
  editor       = {Benjamin Kuipers and
                  Bonnie L. Webber},
  title        = {Template-Based Information Mining from {HTML} Documents},
  booktitle    = {Proceedings of the Fourteenth National Conference on Artificial Intelligence
                  and Ninth Innovative Applications of Artificial Intelligence Conference,
                  {AAAI} 97, {IAAI} 97, July 27-31, 1997, Providence, Rhode Island,
                  {USA}},
  pages        = {256--262},
  publisher    = {{AAAI} Press / The {MIT} Press},
  year         = {1997},
  url          = {http://www.aaai.org/Library/AAAI/1997/aaai97-040.php},
  timestamp    = {Tue, 05 Sep 2023 08:53:09 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/HsuY97.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/bsdia/Tayeb-beyS97,
  author       = {Souad Tayeb{-}bey and
                  Alexander S. Saidi},
  editor       = {Nabeel A. Murshed and
                  Fl{\'{a}}vio Bortolozzi},
  title        = {Grammatical Formalism for Document Understanding System: From Documents
                  towards {HTML} Text},
  booktitle    = {Advances in Document Image Analysis, First Brazilian Symposium, BSDIA'97,
                  Curitiba, Brazil, November 2-5, 1997, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {1339},
  pages        = {165--175},
  publisher    = {Springer},
  year         = {1997},
  url          = {https://doi.org/10.1007/3-540-63791-5\_12},
  doi          = {10.1007/3-540-63791-5\_12},
  timestamp    = {Tue, 14 May 2019 10:00:53 +0200},
  biburl       = {https://dblp.org/rec/conf/bsdia/Tayeb-beyS97.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icdar/HongS97,
  author       = {Tao Hong and
                  Sargur N. Srihari},
  title        = {Representing OCRed documents in {HTML}},
  booktitle    = {4th International Conference Document Analysis and Recognition {(ICDAR}
                  '97), 2-Volume Set, August 18-20, 1997, Ulm, Germany, Proceedings},
  pages        = {831--835},
  publisher    = {{IEEE} Computer Society},
  year         = {1997},
  url          = {https://doi.org/10.1109/ICDAR.1997.620628},
  doi          = {10.1109/ICDAR.1997.620628},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icdar/HongS97.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sigdoc/Priestley97,
  author       = {Michael Priestley},
  editor       = {Karl L. Smart},
  title        = {Navigation Issues in Hypertext: Documenting Complex Hierarchies with
                  {HTML} Frames},
  booktitle    = {The 15th Annual International Conference of Computer Documentation:
                  Crossroads in Communication, {SIGDOC} 1997, Salt Lake City, Utah,
                  USA, October 19-22, 1997},
  pages        = {223--235},
  publisher    = {{ACM}},
  year         = {1997},
  url          = {https://doi.org/10.1145/263367.263396},
  doi          = {10.1145/263367.263396},
  timestamp    = {Tue, 06 Nov 2018 11:07:46 +0100},
  biburl       = {https://dblp.org/rec/conf/sigdoc/Priestley97.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/usits/CutlerSM97,
  author       = {Michal Cutler and
                  Yungming Shih and
                  Weiyi Meng},
  title        = {Using the Structure of {HTML} Documents to Improve Retrieval},
  booktitle    = {1st {USENIX} Symposium on Internet Technologies and Systems, USITS'97,
                  Monterey, California, USA, December 8-11, 1997},
  publisher    = {{USENIX}},
  year         = {1997},
  url          = {http://www.usenix.org/publications/library/proceedings/usits97/cutler.html},
  timestamp    = {Tue, 02 Feb 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/usits/CutlerSM97.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/usits/DouglisHR97,
  author       = {Fred Douglis and
                  Antonio Haro and
                  Michael Rabinovich},
  title        = {{HPP:} {HTML} Macro-Preprocessing to Support Dynamic Document Caching},
  booktitle    = {1st {USENIX} Symposium on Internet Technologies and Systems, USITS'97,
                  Monterey, California, USA, December 8-11, 1997},
  publisher    = {{USENIX}},
  year         = {1997},
  url          = {http://www.usenix.org/publications/library/proceedings/usits97/douglis\_hpp.html},
  timestamp    = {Tue, 02 Feb 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/usits/DouglisHR97.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/rfc/rfc2110,
  author       = {Jacob Palme and
                  Alex Hopmann},
  title        = {{MIME} E-mail Encapsulation of Aggregate Documents, such as {HTML}
                  {(MHTML)}},
  journal      = {{RFC}},
  volume       = {2110},
  pages        = {1--19},
  year         = {1997},
  url          = {https://doi.org/10.17487/RFC2110},
  doi          = {10.17487/RFC2110},
  timestamp    = {Tue, 14 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/rfc/rfc2110.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/cn/BonhommeR96,
  author       = {St{\'{e}}phane Bonhomme and
                  C{\'{e}}cile Roisin},
  title        = {Interactively Restructuring {HTML} Documents},
  journal      = {Comput. Networks},
  volume       = {28},
  number       = {7-11},
  pages        = {1075--1084},
  year         = {1996},
  url          = {https://doi.org/10.1016/0169-7552(96)00042-6},
  doi          = {10.1016/0169-7552(96)00042-6},
  timestamp    = {Wed, 19 Feb 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/cn/BonhommeR96.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/cn/Lim95,
  author       = {Jong{-}Gyun Lim},
  title        = {Using Coollists to Index {HTML} Documents in the Web},
  journal      = {Comput. Networks {ISDN} Syst.},
  volume       = {28},
  number       = {1{\&}2},
  pages        = {147--154},
  year         = {1995},
  url          = {https://doi.org/10.1016/0169-7552(95)00114-0},
  doi          = {10.1016/0169-7552(95)00114-0},
  timestamp    = {Thu, 14 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/cn/Lim95.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/cn/Raggett94,
  author       = {David Raggett},
  title        = {A Review of the {HTML} + Document Format},
  journal      = {Comput. Networks {ISDN} Syst.},
  volume       = {27},
  number       = {2},
  pages        = {135--145},
  year         = {1994},
  url          = {https://doi.org/10.1016/0169-7552(94)90127-9},
  doi          = {10.1016/0169-7552(94)90127-9},
  timestamp    = {Thu, 14 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/cn/Raggett94.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
a service of  Schloss Dagstuhl - Leibniz Center for Informatics