import javax.servlet.http.*; import javax.servlet.*; import java.io.*; import java.net.*; import org.w3c.tidy.*; import org.w3c.dom.*; import javax.xml.transform.*; import javax.xml.transform.stream.*; import javax.xml.transform.dom.*; /** *
Title:
*Description:
*Copyright: Copyright (c) 2004
*Company:
* @author not attributable * @version 1.0 */ public class Html2Dom extends HttpServlet { public Html2Dom() { } public void processNode( org.w3c.dom.Node currentNode, String p, PrintWriter so ) { switch ( currentNode.getNodeType() ) { // process a Document node case org.w3c.dom.Node.DOCUMENT_NODE: Document doc = ( Document ) currentNode; so.println( "Document node: " + doc.getNodeName() + "\nRoot element: " + doc.getDocumentElement().getNodeName() + ""); processChildNodes( doc.getChildNodes() , p , so ); break; // process an Element node // case org.w3c.dom.Node.ELEMENT_NODE: // so.println( "\nElement node: " + p + // currentNode.getNodeName() + "
" ); // NamedNodeMap attributeNodes = // currentNode.getAttributes(); // for ( int i = 0; i < attributeNodes.getLength(); i++){ // Attr attribute = ( Attr ) attributeNodes.item( i ); // so.println( "\tAttribute: " + // attribute.getNodeName() + " ; Value = " + // attribute.getNodeValue() + "
"); // } // processChildNodes( currentNode.getChildNodes() , p+"/"+currentNode.getNodeName(), so); // break; // process a text node and a CDATA section case org.w3c.dom.Node.CDATA_SECTION_NODE: case org.w3c.dom.Node.TEXT_NODE: Text text = ( Text ) currentNode; if ( !text.getNodeValue().trim().equals( "" ) ) so.println( "\tText: " + text.getNodeValue() + "
"); break; } } public void processChildNodes( NodeList children, String p, PrintWriter so ) { if ( children.getLength() != 0 ) for ( int i = 0; i < children.getLength(); i++) { if (children.item( i ).getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) { int index = 1; String nn = children.item( i ).getNodeName(); for ( int j = 0; j < i; j++) if (children.item( j ).getNodeType() == org.w3c.dom.Node.ELEMENT_NODE && children.item( j ).getNodeName().equals(nn) ) index++; so.println( "\nElement node: " + p +"/"+nn+"["+index+"]"+ "
" ); NamedNodeMap attributeNodes = children.item( i ).getAttributes(); for ( int a = 0; a < attributeNodes.getLength(); a++){ Attr attribute = ( Attr ) attributeNodes.item( a ); so.println( "\tAttribute: " + attribute.getNodeName() + " ; Value = " + attribute.getNodeValue() + "
"); } processChildNodes( children.item( i ).getChildNodes() , p +"/"+nn+"["+index+"]", so); } else processNode( children.item( i ) , p , so ); } } protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { // get the parameter String sourceUrlStr = req.getParameter("sourceURL"); String xslt1UrlStr = req.getParameter("xslt1URL"); String xslt2UrlStr = req.getParameter("xslt2URL"); // open the connection with that url URL sourceURL = new URL(sourceUrlStr); URLConnection cn = sourceURL.openConnection(); // parse the html file into dom Tidy tidy = new Tidy(); tidy.setIndentContent(true); tidy.setXHTML(true); tidy.setWraplen(Integer.MAX_VALUE); Document doc = tidy.parseDOM(cn.getInputStream(), null); // print out the Dom if (xslt1UrlStr.equals("")) // tidy.pprint(doc, resp.getOutputStream()); processNode ( doc, "", resp.getWriter() ); else { try { // create the transformer factory object TransformerFactory factory = TransformerFactory.newInstance(); // create the transformer for output Transformer t = factory.newTransformer(); // open connection with xslt1 URL xslt1URL = new URL(xslt1UrlStr); URLConnection xslt1cn = xslt1URL.openConnection(); // create the transformer for xslt1 Transformer t1 = factory.newTransformer(new StreamSource(xslt1cn. getInputStream())); // transformation with xslt1 DOMResult result1 = new DOMResult(); t1.transform(new DOMSource(doc), result1); // get result node org.w3c.dom.Node result = result1.getNode(); if (!xslt2UrlStr.equals("")){ // open connection with xslt2 URL xslt2URL = new URL(xslt2UrlStr); URLConnection xslt2cn = xslt2URL.openConnection(); // create the transformer for xslt2 Transformer t2 = factory.newTransformer(new StreamSource(xslt2cn.getInputStream())); // transformation with xslt2 DOMResult result2 = new DOMResult(); t2.transform(new DOMSource(result), result2); result = result2.getNode(); } t.transform(new DOMSource(result), new StreamResult(resp.getOutputStream())); } catch (TransformerException ex1) { } } } }