+++ /dev/null
-
-#include <vector>
-#include <cassert>
-#include <GdomeSmartDOM.hh>
-
-#include <ext/hash_map>
-namespace stdx = __gnu_cxx;
-
-namespace DOM = GdomeSmartDOM;
-
-int n_elements;
-int n_leaf_elements;
-int n_text_nodes;
-int n_blank_text_nodes;
-int n_attributes;
-int max_attributes;
-int max_children;
-stdx::hash_map<int,int> depths;
-std::vector<int> widths;
-
-bool
-is_blank(const std::string& s)
-{
- for (int i = 0; i < s.length(); i++)
- if (!isspace(s[i])) return false;
- return true;
-}
-
-void
-add_depth(int depth)
-{
- stdx::hash_map<int,int>::iterator p = depths.find(depth);
- if (p != depths.end())
- p->second++;
- else
- depths[depth] = 1;
-}
-
-void
-visit(DOM::Node node, int depth)
-{
- assert(node);
-
- add_depth(depth);
-
- switch (node.get_nodeType())
- {
- case DOM::Node::ELEMENT_NODE:
- {
- n_elements++;
- const int n_attrs = node.get_attributes().get_length();
- n_attributes += n_attrs;
- max_attributes = std::max(max_attributes, n_attrs);
- if (!node.get_firstChild()) n_leaf_elements++;
- }
- break;
- case DOM::Node::TEXT_NODE:
- n_text_nodes++;
- if (is_blank(node.get_nodeValue())) n_blank_text_nodes++;
- break;
- case DOM::Node::ATTRIBUTE_NODE:
- break;
- }
-
- int n_children = 0;
- for (DOM::Node p = node.get_firstChild(); p; p = p.get_nextSibling())
- {
- visit(p, depth + 1);
- n_children++;
- }
- max_children = std::max(max_children, n_children);
-
- if (node.get_firstChild())
- widths.push_back(n_children);
-}
-
-void
-print_results(const std::string& URI, long size)
-{
- int n_depths = 0;
- int tot_depth = 0;
- int max_depth = 0;
- for (stdx::hash_map<int,int>::const_iterator p = depths.begin(); p != depths.end(); p++)
- {
- n_depths += p->second;
- tot_depth += p->first * p->second;
- max_depth = std::max(max_depth, p->first);
- }
-
- int tot_width = 0;
- for (std::vector<int>::const_iterator p = widths.begin(); p != widths.end(); p++)
- tot_width += *p;
-
- std::cout << "<stats for=\"" << URI << "\">" << std::endl;
- std::cout << " <size>" << size << "</size>" << std::endl;
- std::cout << " <depth>" << std::endl;
- std::cout << " <max>" << max_depth << "</max>" << std::endl;
- std::cout << " <leaf-avg>" << tot_depth / ((double) n_depths) << "</leaf-avg>" << std::endl;
- std::cout << " </depth>" << std::endl;
- std::cout << " <width>" << std::endl;
- std::cout << " <max>" << max_children << "</max>" << std::endl;
- std::cout << " <inner-avg>" << tot_width / ((double) widths.size()) << "</inner-avg>" << std::endl;
- std::cout << " </width>" << std::endl;
- std::cout << " <elements>" << std::endl;
- std::cout << " <total>" << n_elements << "</total>" << std::endl;
- std::cout << " <leaf>" << n_leaf_elements << "</leaf>" << std::endl;
- std::cout << " </elements>" << std::endl;
- std::cout << " <text-nodes>" << std::endl;
- std::cout << " <total>" << n_text_nodes << "</total>" << std::endl;
- std::cout << " <blank>" << n_blank_text_nodes << "</blank>" << std::endl;
- std::cout << " </text-nodes>" << std::endl;
- std::cout << " <attributes>" << std::endl;
- std::cout << " <total>" << n_attributes << "</total>" << std::endl;
- std::cout << " <max>" << max_attributes << "</max>" << std::endl;
- std::cout << " </attributes>" << std::endl;
- std::cout << "</stats>" << std::endl;
-}
-
-int
-main(int argc, char* argv[])
-{
- if (argc != 3) {
- std::cerr << "Usage: stats <URI> <size>" << std::endl;
- return -1;
- }
-
- DOM::DOMImplementation di;
- DOM::Document doc = di.createDocumentFromURI(argv[1]);
- visit(doc, 0);
- print_results(argv[1], atoi(argv[2]));
-}