#include <cassert>
#include <GdomeSmartDOM.hh>
+#include <ext/hash_map>
+namespace stdx = __gnu_cxx;
+
namespace DOM = GdomeSmartDOM;
-unsigned n_elements;
-unsigned n_leaf_elements;
-unsigned n_text_nodes;
-unsigned n_blank_text_nodes;
-unsigned n_attributes;
-unsigned max_attributes;
-unsigned max_depth;
-unsigned max_children;
-std::vector<unsigned> depths;
-std::vector<unsigned> widths;
+int n_elements;
+int n_leaf_elements;
+int n_text_nodes;
+int n_blank_text_nodes;
+int n_attributes;
+int max_attributes;
+int max_children;
+stdx::hash_map<int,int> depths;
+std::vector<int> widths;
bool
is_blank(const std::string& s)
{
for (int i = 0; i < s.length(); i++)
- if (!isblank(s[i])) return false;
+ if (!isspace(s[i])) return false;
return true;
}
void
-visit(DOM::Node node, unsigned depth)
+add_depth(int depth)
+{
+ stdx::hash_map<int,int>::iterator p = depths.find(depth);
+ if (p != depths.end())
+ p->second++;
+ else
+ depths[depth] = 1;
+}
+
+void
+visit(DOM::Node node, int depth)
{
assert(node);
- max_depth = std::max(max_depth, depth);
+ add_depth(depth);
switch (node.get_nodeType())
{
case DOM::Node::ELEMENT_NODE:
{
n_elements++;
- const unsigned n_attrs = node.get_attributes().get_length();
+ const int n_attrs = node.get_attributes().get_length();
n_attributes += n_attrs;
max_attributes = std::max(max_attributes, n_attrs);
if (!node.get_firstChild()) n_leaf_elements++;
break;
}
- unsigned n_children = 0;
+ int n_children = 0;
for (DOM::Node p = node.get_firstChild(); p; p = p.get_nextSibling())
{
visit(p, depth + 1);
}
max_children = std::max(max_children, n_children);
- if (!node.get_firstChild())
- depths.push_back(depth);
- else
+ if (node.get_firstChild())
widths.push_back(n_children);
}
void
-print_results(const std::string& URI)
+print_results(const std::string& URI, long size)
{
- unsigned tot_depth = 0;
- for (std::vector<unsigned>::const_iterator p = depths.begin(); p != depths.end(); p++)
- tot_depth += *p;
-
- unsigned tot_width = 0;
- for (std::vector<unsigned>::const_iterator p = widths.begin(); p != widths.end(); p++)
- tot_width += *p;
+ int n_depths = 0;
+ int tot_depth = 0;
+ int max_depth = 0;
+ for (stdx::hash_map<int,int>::const_iterator p = depths.begin(); p != depths.end(); p++)
+ {
+ n_depths += p->second;
+ tot_depth += p->first * p->second;
+ max_depth = std::max(max_depth, p->first);
+ }
+
+ int tot_width = 0;
+ for (std::vector<int>::const_iterator p = widths.begin(); p != widths.end(); p++)
+ tot_width += *p;
std::cout << "<stats for=\"" << URI << "\">" << std::endl;
+ std::cout << " <size>" << size << "</size>" << std::endl;
std::cout << " <depth>" << std::endl;
std::cout << " <max>" << max_depth << "</max>" << std::endl;
- std::cout << " <leaf-avg>" << tot_depth / ((double) depths.size()) << "</leaf-avg>" << std::endl;
+ std::cout << " <leaf-avg>" << tot_depth / ((double) n_depths) << "</leaf-avg>" << std::endl;
std::cout << " </depth>" << std::endl;
std::cout << " <width>" << std::endl;
std::cout << " <max>" << max_children << "</max>" << std::endl;
int
main(int argc, char* argv[])
{
- if (argc != 2) {
- std::cerr << "Usage: stats <URI>" << std::endl;
+ if (argc != 3) {
+ std::cerr << "Usage: stats <URI> <size>" << std::endl;
return -1;
}
DOM::DOMImplementation di;
DOM::Document doc = di.createDocumentFromURI(argv[1]);
visit(doc, 0);
- print_results(argv[1]);
+ print_results(argv[1], atoi(argv[2]));
}