4 #include <GdomeSmartDOM.hh>
6 #include <ext/hash_map>
8 namespace DOM = GdomeSmartDOM;
11 unsigned n_leaf_elements;
12 unsigned n_text_nodes;
13 unsigned n_blank_text_nodes;
14 unsigned n_attributes;
15 unsigned max_attributes;
17 unsigned max_children;
18 std::hash_map<unsigned,unsigned> depths;
19 std::vector<unsigned> widths;
22 is_blank(const std::string& s)
24 for (int i = 0; i < s.length(); i++)
25 if (!isblank(s[i])) return false;
30 add_depth(unsigned depth)
32 std::hash_map<unsigned,unsigned>::iterator p = depths.find(depth);
33 if (p != depths.end())
40 visit(DOM::Node node, unsigned depth)
44 max_depth = std::max(max_depth, depth);
46 switch (node.get_nodeType())
48 case DOM::Node::ELEMENT_NODE:
51 const unsigned n_attrs = node.get_attributes().get_length();
52 n_attributes += n_attrs;
53 max_attributes = std::max(max_attributes, n_attrs);
54 if (!node.get_firstChild()) n_leaf_elements++;
57 case DOM::Node::TEXT_NODE:
59 if (is_blank(node.get_nodeValue())) n_blank_text_nodes++;
61 case DOM::Node::ATTRIBUTE_NODE:
65 unsigned n_children = 0;
66 for (DOM::Node p = node.get_firstChild(); p; p = p.get_nextSibling())
71 max_children = std::max(max_children, n_children);
73 if (!node.get_firstChild())
74 depths.push_back(depth);
76 widths.push_back(n_children);
80 print_results(const std::string& URI)
82 unsigned tot_depth = 0;
83 for (std::vector<unsigned>::const_iterator p = depths.begin(); p != depths.end(); p++)
86 unsigned tot_width = 0;
87 for (std::vector<unsigned>::const_iterator p = widths.begin(); p != widths.end(); p++)
90 std::cout << "<stats for=\"" << URI << "\">" << std::endl;
91 std::cout << " <depth>" << std::endl;
92 std::cout << " <max>" << max_depth << "</max>" << std::endl;
93 std::cout << " <leaf-avg>" << tot_depth / ((double) depths.size()) << "</leaf-avg>" << std::endl;
94 std::cout << " </depth>" << std::endl;
95 std::cout << " <width>" << std::endl;
96 std::cout << " <max>" << max_children << "</max>" << std::endl;
97 std::cout << " <inner-avg>" << tot_width / ((double) widths.size()) << "</inner-avg>" << std::endl;
98 std::cout << " </width>" << std::endl;
99 std::cout << " <elements>" << std::endl;
100 std::cout << " <total>" << n_elements << "</total>" << std::endl;
101 std::cout << " <leaf>" << n_leaf_elements << "</leaf>" << std::endl;
102 std::cout << " </elements>" << std::endl;
103 std::cout << " <text-nodes>" << std::endl;
104 std::cout << " <total>" << n_text_nodes << "</total>" << std::endl;
105 std::cout << " <blank>" << n_blank_text_nodes << "</blank>" << std::endl;
106 std::cout << " </text-nodes>" << std::endl;
107 std::cout << " <attributes>" << std::endl;
108 std::cout << " <total>" << n_attributes << "</total>" << std::endl;
109 std::cout << " <max>" << max_attributes << "</max>" << std::endl;
110 std::cout << " </attributes>" << std::endl;
111 std::cout << "</stats>" << std::endl;
115 main(int argc, char* argv[])
118 std::cerr << "Usage: stats <URI>" << std::endl;
122 DOM::DOMImplementation di;
123 DOM::Document doc = di.createDocumentFromURI(argv[1]);
125 print_results(argv[1]);