import org.w3c.dom.Attr; import org.w3c.dom.Document; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.w3c.tidy.Tidy; import java.io.*; import java.net.*; class TidyExample { public static void main(String[] args) throws IOException { if(args.length!=1) { System.out.println("This program requires a URL as parameter."); return; } // The following lines creates an instance of jtidy and prevents it // from writing most of its error and warning messages. The messages that // cannot be prevented are redirected to the file "parselog" Tidy t=new Tidy(); t.setMakeClean(true); t.setQuiet(true); t.setOnlyErrors(true); t.setShowWarnings(false); t.setErrout(new PrintWriter(new FileWriter("parselog"))); // Download and parse: URL u=new URL(args[0]); URLConnection uc=u.openConnection(); Document d=t.parseDOM(uc.getInputStream(), null); // Get the root node of the tree: Node root=d; // Get the body node in an ugly way: Node html=root.getLastChild(); Node body=html.getLastChild(); // Print the contents of the body node: NodeList children = body.getChildNodes(); if(children==null) return; int len = children.getLength(); for(int i=0; i