From 8fce28cde4e422993148e1fca011cae7ea230c16 Mon Sep 17 00:00:00 2001 From: Luca Padovani Date: Mon, 8 Nov 2004 21:04:12 +0000 Subject: [PATCH] * added .cc program (and equivalent .xsl stylesheet) for collecting statistics about XML files --- helm/papers/use_case/stats/Makefile | 4 + helm/papers/use_case/stats/stats.cc | 89 ++++++++++++++++++ helm/papers/use_case/stats/stats.xsl | 130 +++++++++++++++++++++++++++ 3 files changed, 223 insertions(+) create mode 100644 helm/papers/use_case/stats/Makefile create mode 100644 helm/papers/use_case/stats/stats.cc create mode 100644 helm/papers/use_case/stats/stats.xsl diff --git a/helm/papers/use_case/stats/Makefile b/helm/papers/use_case/stats/Makefile new file mode 100644 index 000000000..fa5f5e2e9 --- /dev/null +++ b/helm/papers/use_case/stats/Makefile @@ -0,0 +1,4 @@ + +stats: stats.cc + g++ -o $@ `pkg-config gdome2-cpp-smart --cflags --libs` $< + diff --git a/helm/papers/use_case/stats/stats.cc b/helm/papers/use_case/stats/stats.cc new file mode 100644 index 000000000..9892e12b6 --- /dev/null +++ b/helm/papers/use_case/stats/stats.cc @@ -0,0 +1,89 @@ + +#include +#include + +namespace DOM = GdomeSmartDOM; + +unsigned n_elements; +unsigned n_leaf_elements; +unsigned n_text_nodes; +unsigned n_blank_text_nodes; +unsigned n_attributes; +unsigned max_attributes; +unsigned max_depth; +unsigned max_children; + +bool +is_blank(const std::string& s) +{ + for (int i = 0; i < s.length(); i++) + if (!isblank(s[i])) return false; + return true; +} + +void +visit(DOM::Node node, unsigned depth) +{ + assert(node); + + max_depth = std::max(max_depth, depth); + + switch (node.get_nodeType()) + { + case DOM::Node::ELEMENT_NODE: + { + n_elements++; + const unsigned n_attrs = node.get_attributes().get_length(); + n_attributes += n_attrs; + max_attributes = std::max(max_attributes, n_attrs); + if (!node.get_firstChild()) n_leaf_elements++; + } + break; + case DOM::Node::TEXT_NODE: + n_text_nodes++; + if (is_blank(node.get_nodeValue())) n_blank_text_nodes++; + break; + case DOM::Node::ATTRIBUTE_NODE: + break; + } + + unsigned n_children = 0; + for (DOM::Node p = node.get_firstChild(); p; p = p.get_nextSibling()) + { + visit(p, depth + 1); + n_children++; + } + max_children = std::max(max_children, n_children); +} + +void +print_results(const std::string& URI) +{ + std::cout << "" << std::endl; + std::cout << " " << max_depth << "" << std::endl; + std::cout << " " << std::endl; + std::cout << " " << n_elements << "" << std::endl; + std::cout << " " << n_leaf_elements << "" << std::endl; + std::cout << " " << std::endl; + std::cout << " " << std::endl; + std::cout << " " << n_text_nodes << "" << std::endl; + std::cout << " " << n_blank_text_nodes << "" << std::endl; + std::cout << " " << std::endl; + std::cout << " " << std::endl; + std::cout << " " << n_attributes << "" << std::endl; + std::cout << " " << max_attributes << "" << std::endl; + std::cout << " " << std::endl; + std::cout << " " << std::endl; + std::cout << " " << max_children << "" << std::endl; + std::cout << " " << std::endl; + std::cout << "" << std::endl; +} + +int +main(int argc, char* argv[]) +{ + DOM::DOMImplementation di; + DOM::Document doc = di.createDocumentFromURI(argv[1]); + visit(doc, 0); + print_results(argv[1]); +} diff --git a/helm/papers/use_case/stats/stats.xsl b/helm/papers/use_case/stats/stats.xsl new file mode 100644 index 000000000..ea643f082 --- /dev/null +++ b/helm/papers/use_case/stats/stats.xsl @@ -0,0 +1,130 @@ + + + + + + + + + + + Computing max depth... + + + + Computing number of nodes... + + + + + + + + + + Computing number of leaves... + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -- 2.39.2