package de.kompf.javaxml;

import java.io.*;
import java.net.URL;
import java.util.*;

import javax.xml.XMLConstants;
import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.*;
import javax.xml.xpath.*;

import org.w3c.dom.*;
import org.xml.sax.SAXException;

/**
 * Sample code how to use to XPath API to extract information from XML data.
 * 
 * @author Kompf
 * 
 */
public class XPathReader {

  /**
   * Evaluate an XML input stream using the given xpath.
   * 
   * @param in The XML input stream.
   * @param xpathExpr The xpath expression - must not contain any namespace
   * prefixes.
   * @param result A collection to append the results to.
   */
  void eval(InputStream in, String xpathExpr, Collection<String> result)
      throws ParserConfigurationException, SAXException, IOException,
      XPathExpressionException {
    DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
    docFactory.setNamespaceAware(false); // important!
    DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
    Document doc = docBuilder.parse(in);

    XPath xpath = XPathFactory.newInstance().newXPath();
    XPathExpression expr = xpath.compile(xpathExpr);

    NodeList nodeList = (NodeList) expr.evaluate(doc, XPathConstants.NODESET);
    for (int i = 0; i < nodeList.getLength(); ++i) {
      Node node = nodeList.item(i);
      result.add(node.getNodeValue());
    }
  }

  /**
   * Evaluate an XML input stream using the given xpath. This implementation is
   * namespace aware.
   * 
   * @param in The XML input stream.
   * @param xpathExpr The xpath expression - may contain namespace prefixes.
   * @param nsCtx The namespace context to resolve the namespace prefixes from
   * the xpath expression.
   * @param result A collection to append the results to.
   */
  void evalNamespaceAware(InputStream in, String xpathExpr,
      NamespaceContext nsCtx, Collection<String> result)
      throws ParserConfigurationException, SAXException, IOException,
      XPathExpressionException {
    DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
    docFactory.setNamespaceAware(true); // important!
    DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
    Document doc = docBuilder.parse(in);

    XPath xpath = XPathFactory.newInstance().newXPath();
    xpath.setNamespaceContext(nsCtx);
    XPathExpression expr = xpath.compile(xpathExpr);

    NodeList nodeList = (NodeList) expr.evaluate(doc, XPathConstants.NODESET);
    for (int i = 0; i < nodeList.getLength(); ++i) {
      Node node = nodeList.item(i);
      result.add(node.getNodeValue());
    }
  }

  /**
   * Print the result.
   * 
   * @param result The collection of results.
   * @param out The print stream to use.
   */
  void printResult(Collection<String> result, PrintStream out) {
    // print result
    for (String name : result) {
      out.println(name);
    }
  }

  /**
   * Read the titles of entries from the Heise news feed.
   */
  void readHeiseFeed() throws Exception {
    URL heiseFeed = new URL("http://www.heise.de/newsticker/heise-atom.xml");
    InputStream in = heiseFeed.openStream();
    Collection<String> result = new LinkedList<String>();
    eval(in, "//entry/title/text()", result);
    printResult(result, System.out);
  }

  /**
   * Read the titles of entries from the Heise news feed. This is basically the
   * same like {@link #readHeiseFeed()} but is aware of the namespace of the
   * atom feed.
   */
  void readHeiseFeedNamespaceAware() throws Exception {
    URL heiseFeed = new URL("http://www.heise.de/newsticker/heise-atom.xml");
    InputStream in = heiseFeed.openStream();
    Collection<String> result = new LinkedList<String>();
    evalNamespaceAware(in, "//ns:entry/ns:title/text()",
        new SimpleNamespaceContext("ns", "http://www.w3.org/2005/Atom"), result);
    printResult(result, System.out);
  }

  /**
   * Read the titles of entries from the Twitter public time line. This is
   * basically the same like {@link #readHeiseFeed()} but uses another URL.
   */
  void readTwitterPublicTimeLine() throws Exception {
    URL twitterFeed = new URL(
        "http://api.twitter.com/1/statuses/public_timeline.atom");
    InputStream in = twitterFeed.openStream();
    Collection<String> result = new LinkedList<String>();
    eval(in, "//entry/title/text()", result);
    printResult(result, System.out);
  }

  /**
   * Read all all names of 'way' elements from an OSM XML file. The names are
   * kept sorted and unique using a TreeSet.
   */
  void readOsmWayNames() throws Exception {
    // OSM path names
    URL osmUrl = new File("md.osm.xml").toURI().toURL();
    InputStream in = osmUrl.openStream();
    Collection<String> result = new TreeSet<String>();
    eval(in, "/osm/way/tag[@k='name']/@v", result);
    printResult(result, System.out);
  }

  /**
   * MAIN.
   * 
   * @param args ignored.
   * @throws Exception If an error occurs.
   */
  public static void main(String[] args) throws Exception {
    XPathReader xPathReader = new XPathReader();
    // Heise News
    xPathReader.readHeiseFeed();
    // Heise News namespace aware
    //xPathReader.readHeiseFeedNamespaceAware();
    // Twitter public time line
    //xPathReader.readTwitterPublicTimeLine();
    // OSM path names
    //xPathReader.readOsmWayNames();
  }

  static class SimpleNamespaceContext implements NamespaceContext {
    private String prefix;
    private String uri;

    public SimpleNamespaceContext(String prefix, String uri) {
      this.prefix = prefix;
      this.uri = uri;
    }

    public String getNamespaceURI(String prefix) {
      if (this.prefix.equals(prefix)) {
        return uri;
      }
      return XMLConstants.NULL_NS_URI;
    }

    public String getPrefix(String namespaceURI) {
      if (uri.equals(namespaceURI)) {
        return prefix;
      }
      return null;
    }

    @SuppressWarnings("unchecked")
    public Iterator getPrefixes(String namespaceURI) {
      List<String> prefixList = new ArrayList<String>();
      if (uri.equals(namespaceURI)) {
        prefixList.add(prefix);
      }
      return prefixList.iterator();
    }
  }
}

