@inproceedings{3e3d637208514fde855a8b98b6cc2be8,
title = "CLS and CLS close: The scalable method for mining the semi structured data set",
abstract = "Semistructured pattern can be formally modeled as Graph Pattern. The most important problem to be solved in mining large semi structured dataset is the scalability of the method. With the successful development of efficient and scalable algorithms for mining frequent itemsets and sequences, it is natural to extend the scope of study to a more general pattern mining problem: mining frequent semistructured patterns or graph patterns. In this paper, we extend the methodology of pattern-growth and develop a novel algorithm called CLS (Canonical Labeling System), which discovers frequent connected subgraphs efficiently using either depth-first search or breadth-first search strategy. A novel canonical labeling system and search order are devised to support efficient pattern growth. CLS has advantages of simplicity and efficiency over other methods since it combines pattern growing and pattern checking into one procedure. Based on CLS, we develop CLS Close to mine closed frequent graphs, which not only eliminates redundant patterns but also substantially increases the efficiency of mining, especially in the presence of large graph patterns.",
keywords = "Canonical label, Closed pattern, Cls code, Frequent pattern, Graph mining",
author = "Gaol, {Ford Lumban} and Widjaja, {Belawati H.}",
year = "2008",
doi = "10.1007/978-1-4020-8735-6_35",
language = "English",
isbn = "9781402087349",
series = "Innovations and Advanced Techniques in Systems, Computing Sciences and Software Engineering",
pages = "186--191",
booktitle = "Innovations and Advanced Techniques in Systems, Computing Sciences and Software Engineering",
note = "2007 International Conference on Systems, Computing Sciences and Software Engineering, SCSS 2007, Part of the International Joint Conferences on Computer, Information, and Systems Sciences, and Engineering, CISSE 2007 ; Conference date: 03-12-2007 Through 12-12-2007",
}