4 #include <GdomeSmartDOM.hh>
6 #include <ext/hash_map>
7 namespace stdx = __gnu_cxx;
9 namespace DOM = GdomeSmartDOM;
14 int n_blank_text_nodes;
18 stdx::hash_map<int,int> depths;
19 std::vector<int> widths;
22 is_blank(const std::string& s)
24 for (int i = 0; i < s.length(); i++)
25 if (!isspace(s[i])) return false;
32 stdx::hash_map<int,int>::iterator p = depths.find(depth);
33 if (p != depths.end())
40 visit(DOM::Node node, int depth)
46 switch (node.get_nodeType())
48 case DOM::Node::ELEMENT_NODE:
51 const int n_attrs = node.get_attributes().get_length();
52 n_attributes += n_attrs;
53 max_attributes = std::max(max_attributes, n_attrs);
54 if (!node.get_firstChild()) n_leaf_elements++;
57 case DOM::Node::TEXT_NODE:
59 if (is_blank(node.get_nodeValue())) n_blank_text_nodes++;
61 case DOM::Node::ATTRIBUTE_NODE:
66 for (DOM::Node p = node.get_firstChild(); p; p = p.get_nextSibling())
71 max_children = std::max(max_children, n_children);
73 if (node.get_firstChild())
74 widths.push_back(n_children);
78 print_results(const std::string& URI, long size)
83 for (stdx::hash_map<int,int>::const_iterator p = depths.begin(); p != depths.end(); p++)
85 n_depths += p->second;
86 tot_depth += p->first * p->second;
87 max_depth = std::max(max_depth, p->first);
91 for (std::vector<int>::const_iterator p = widths.begin(); p != widths.end(); p++)
94 std::cout << "<stats for=\"" << URI << "\">" << std::endl;
95 std::cout << " <size>" << size << "</size>" << std::endl;
96 std::cout << " <depth>" << std::endl;
97 std::cout << " <max>" << max_depth << "</max>" << std::endl;
98 std::cout << " <leaf-avg>" << tot_depth / ((double) n_depths) << "</leaf-avg>" << std::endl;
99 std::cout << " </depth>" << std::endl;
100 std::cout << " <width>" << std::endl;
101 std::cout << " <max>" << max_children << "</max>" << std::endl;
102 std::cout << " <inner-avg>" << tot_width / ((double) widths.size()) << "</inner-avg>" << std::endl;
103 std::cout << " </width>" << std::endl;
104 std::cout << " <elements>" << std::endl;
105 std::cout << " <total>" << n_elements << "</total>" << std::endl;
106 std::cout << " <leaf>" << n_leaf_elements << "</leaf>" << std::endl;
107 std::cout << " </elements>" << std::endl;
108 std::cout << " <text-nodes>" << std::endl;
109 std::cout << " <total>" << n_text_nodes << "</total>" << std::endl;
110 std::cout << " <blank>" << n_blank_text_nodes << "</blank>" << std::endl;
111 std::cout << " </text-nodes>" << std::endl;
112 std::cout << " <attributes>" << std::endl;
113 std::cout << " <total>" << n_attributes << "</total>" << std::endl;
114 std::cout << " <max>" << max_attributes << "</max>" << std::endl;
115 std::cout << " </attributes>" << std::endl;
116 std::cout << "</stats>" << std::endl;
120 main(int argc, char* argv[])
123 std::cerr << "Usage: stats <URI> <size>" << std::endl;
127 DOM::DOMImplementation di;
128 DOM::Document doc = di.createDocumentFromURI(argv[1]);
130 print_results(argv[1], atoi(argv[2]));