@inproceedings(BarR02, author = "Ziv Bar-Yossef and Sridhar Rajagopalan", year = "2002", title = "{T}emplate detection via data mining and its applications", booktitle = "{P}roceedings of the 11th {I}nternational {C}onference on {W}orld {W}ide {W}eb ({WWW}'02)", publisher = "{ACM}", address = "{N}ew {Y}ork, {NY}, {USA}", pages = "580--591", doi = "10.1145/511446.511522", ) @inproceedings(BurR09, author = "Radek Burget and Ivana Rudolfova", year = "2009", title = "{W}eb {P}age {E}lement {C}lassification {B}ased on {V}isual {F}eatures", booktitle = "{P}roceedings of the 1st {A}sian {C}onference on {I}ntelligent {I}nformation and {D}atabase {S}ystems ({ACIIDS}'09)", publisher = "{IEEE} {C}omputer {S}ociety", address = "{W}ashington, {DC}, {USA}", pages = "67--72", doi = "10.1109/ACIIDS.2009.71", ) @inproceedings(Cha01, author = "Soumen Chakrabarti", year = "2001", title = "{I}ntegrating the {D}ocument {O}bject {M}odel with hyperlinks for enhanced topic distillation and information extraction", booktitle = "{P}roceedings of the 10th {I}nternational {C}onference on {W}orld {W}ide {W}eb ({WWW}'01)", publisher = "{ACM}", address = "{N}ew {Y}ork, {NY}, {USA}", pages = "211--220", doi = "10.1145/371920.372054", ) @inproceedings(FerZBB08, author = "Adriano Ferraresi and Eros Zanchetta and Marco Baroni and Silvia Bernardini", year = "2008", title = "{I}ntroducing and evaluating {ukWaC}, a very large web-derived corpus of english", booktitle = "{P}roceedings of the 4th {W}eb as {C}orpus {W}orkshop ({WAC}-4)", pages = "47--54", ) @inproceedings(GibPT05, author = "David Gibson and Kunal Punera and Andrew Tomkins", year = "2005", title = "{T}he volume and evolution of web page templates", editor = "Allan Ellis and Tatsuya Hagino", booktitle = "{P}roceedings of the 14th {I}nternational {C}onference on {W}orld {W}ide {W}eb ({WWW}'05)", publisher = "{ACM}", pages = "830--839", doi = "10.1145/1062745.1062763", ) @article(KadD12, author = "Vidya Kadam and Prakash R. Devale", year = "2012", title = "A Methodology for Template Extraction from Heterogeneous Web Pages", journal = "Indian Journal of Computer Science and Engineering ({IJCSE})", volume = "3", number = "3", ) @inproceedings(Koh09, author = "Christian Kohlsch\"utter", year = "2009", title = "{A} densitometric analysis of web template content", editor = "Juan Quemada and Gonzalo Le\'on and Yo\"elle S. Maarek and Wolfgang Nejdl", booktitle = "{P}roceedings of the 18th {I}nternational {C}onference on {W}orld {W}ide {W}eb ({WWW}'09)", publisher = "{ACM}", pages = "1165--1166", doi = "10.1145/1526709.1526909", ) @inproceedings(KohFN10, author = "Christian Kohlsch\"utter and Peter Fankhauser and Wolfgang Nejdl", year = "2010", title = "{B}oilerplate detection using shallow text features", editor = "Brian D. Davison and Torsten Suel and Nick Craswell and Bing Liu", booktitle = "{P}roceedings of the 3th {I}nternational {C}onference on {W}eb {S}earch and {W}eb {D}ata {M}ining ({WSDM}'10)", publisher = "{ACM}", pages = "441--450", doi = "10.1145/1718487.1718542", ) @inproceedings(KohN08, author = "Christian Kohlsch\"utter and Wolfgang Nejdl", year = "2008", title = "{A} densitometric approach to web page segmentation", editor = "James G. Shanahan and Sihem Amer-Yahia and Ioana Manolescu and Yi Zhang and David A. Evans and Aleksander Kolcz and Key-Sun Choi and Abdur Chowdhury", booktitle = "{P}roceedings of the 17th {ACM} {C}onference on {I}nformation and {K}nowledge {M}anagement ({CIKM}'08)", publisher = "{ACM}", pages = "1173--1182", doi = "10.1145/1458082.1458237", ) @inproceedings(NguNPB09, author = "Dat Quoc Nguyen and Dai Quoc Nguyen and Son Bao Pham and The Duy Bui", year = "2009", title = "A Fast Template-Based Approach to Automatically Identify Primary Text Content of a Web Page", booktitle = "Proceedings of the 2009 International Conference on Knowledge and Systems Engineering", series = "KSE 2009", publisher = "IEEE Computer Society", pages = "232--236", doi = "10.1109/KSE.2009.39", ) @inproceedings(ReiGSL04, author = "Davi de Castro Reis and Paulo Braz Golgher and Altigran Soares Silva and Alberto Henrique Frade Laender", year = "2004", title = "{A}utomatic web news extraction using tree edit distance", booktitle = "{P}roceedings of the 13th {I}nternational {C}onference on {W}orld {W}ide {W}eb ({WWW}'04)", publisher = "{ACM}", address = "{N}ew {Y}ork, {NY}, {USA}", pages = "502--511", doi = "10.1145/988672.988740", ) @inproceedings(RowlandsTW09, author = "Tom Rowlands and Paul Thomas and Stephen Wan", year = "2009", title = "Web indexing on a diet: Template removal with the sandwich algorithm", booktitle = "Proceedings of the 14th Australasian Document Computing Symposium", url = "http://es.csiro.au/adcs2009/proceedings/poster-presentation/06-rowlands.pdf", ) @inproceedings(VieSPMCF06, author = "Karane Vieira and Altigran S. da Silva and Nick Pinto and Edleno S. de Moura and Jo\~{a}o M. B. Cavalcanti and Juliana Freire", year = "2006", title = "{A} fast and robust method for web page template detection and removal", booktitle = "{P}roceedings of the 15th {ACM} {I}nternational {C}onference on {I}nformation and {K}nowledge {M}anagement ({CIKM}'06)", publisher = "{ACM}", address = "{N}ew {Y}ork, {NY}, {USA}", pages = "258--267", doi = "10.1145/1183614.1183654", ) @inproceedings(WenHH10, author = "Tim Weninger and William Henry Hsu and Jiawei Han", year = "2010", title = "{CETR}: {C}ontent {E}xtraction via {T}ag {R}atios", editor = "Michael Rappa and Paul Jones and Juliana Freire and Soumen Chakrabarti", booktitle = "{P}roceedings of the 19th {I}nternational {C}onference on {W}orld {W}ide {W}eb ({WWW}'10)", publisher = "{ACM}", pages = "971--980", doi = "10.1145/1772690.1772789", ) @inproceedings(YiLL03, author = "Lan Yi and Bing Liu and Xiaoli Li", year = "2003", title = "{E}liminating noisy information in Web pages for data mining", booktitle = "{P}roceedings of the 9th {ACM} {SIGKDD} {I}nternational {C}onference on {K}nowledge {D}iscovery and {D}ata mining ({KDD}'03)", publisher = "{ACM}", address = "{N}ew {Y}ork, {NY}, {USA}", pages = "296--305", doi = "10.1145/956750.956785", )