Page MenuHomePhabricator (Chris)

No OneTemporary

Size
16 KB
Referenced Files
None
Subscribers
None
diff --git a/xml-designpattern-casestudy/README.md b/xml-designpattern-casestudy/README.md
new file mode 100644
index 0000000..36e2a3a
--- /dev/null
+++ b/xml-designpattern-casestudy/README.md
@@ -0,0 +1,6 @@
+
+### About ###
+
+Examples to show the use of various design patterns in XML parsing. All classes are executable and process input XML data in different ways, see the comments in the source code for details.
+
+The script `xmlcasestudy.CreateLargeXMLFile` must be run first to create the (large) input data file required by some of the scripts.
\ No newline at end of file
diff --git a/xml-designpattern-casestudy/emails-small.xml b/xml-designpattern-casestudy/emails-small.xml
new file mode 100644
index 0000000..069c261
--- /dev/null
+++ b/xml-designpattern-casestudy/emails-small.xml
@@ -0,0 +1,45 @@
+<?xml version="1.0"?>
+<emails>
+ <email id="1">
+ <to>
+ <emailaddress>test@test.com</emailaddress>
+ <displayname>test</displayname>
+ </to>
+ <from>
+ <emailaddress>jens.dietrich@sample.com</emailaddress>
+ <displayname>Jens Dietrich</displayname>
+ </from>
+ <subject>some subject 1</subject>
+ <body>
+ some body 1
+ </body>
+ </email>
+ <email id="2">
+ <to>
+ <emailaddress>test@test.com</emailaddress>
+ <displayname>test</displayname>
+ </to>
+ <from>
+ <emailaddress>max.dietrich@sample.com</emailaddress>
+ <displayname>Max Dietrich</displayname>
+ </from>
+ <subject>some subject 2</subject>
+ <body>
+ some body 2
+ </body>
+ </email>
+ <email id="3">
+ <to>
+ <emailaddress>test@test.com</emailaddress>
+ <displayname>test</displayname>
+ </to>
+ <from>
+ <emailaddress>some.body@else.com</emailaddress>
+ <displayname>somebody else</displayname>
+ </from>
+ <subject>some subject 1</subject>
+ <body>
+ some body 1
+ </body>
+ </email>
+</emails>
\ No newline at end of file
diff --git a/xml-designpattern-casestudy/pom.xml b/xml-designpattern-casestudy/pom.xml
new file mode 100644
index 0000000..0817f48
--- /dev/null
+++ b/xml-designpattern-casestudy/pom.xml
@@ -0,0 +1,39 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <groupId>oopexamples</groupId>
+ <artifactId>xml-designpattern-casestudy</artifactId>
+ <version>1.0-SNAPSHOT</version>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <configuration>
+ <source>1.8</source>
+ <target>1.8</target>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-dependency-plugin</artifactId>
+ <configuration>
+ <outputDirectory>
+ ${project.build.directory}/dependencies
+ </outputDirectory>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-lang3</artifactId>
+ <version>3.7</version>
+ </dependency>
+ </dependencies>
+</project>
\ No newline at end of file
diff --git a/xml-designpattern-casestudy/src/main/java/xmlcasestudy/CreateLargeXMLFile.java b/xml-designpattern-casestudy/src/main/java/xmlcasestudy/CreateLargeXMLFile.java
new file mode 100644
index 0000000..3441f58
--- /dev/null
+++ b/xml-designpattern-casestudy/src/main/java/xmlcasestudy/CreateLargeXMLFile.java
@@ -0,0 +1,98 @@
+package xmlcasestudy;
+
+import org.apache.commons.lang3.RandomStringUtils;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.PrintWriter;
+import java.util.Random;
+
+/**
+ * Utility to create a large XML file with random data for experiments.
+ * @author jens dietrich
+ */
+public class CreateLargeXMLFile {
+
+ public static final int SIZE = 500_000; // 1,000,000 creates a ca 1 GB file
+ public static final Random random = new Random();
+
+ public static void main(String[] args) throws Exception {
+
+ File xml = new File("emails.xml");
+
+ try (PrintWriter out = new PrintWriter(new FileWriter(xml))) {
+ out.println("<?xml version=\"1.0\"?>");
+ out.println("<emails>");
+
+
+ for (int i=0;i<SIZE;i++) {
+ out.println("\t<email id=\"" + (i+1) + "\">");
+ for (int j=0;j<random.nextInt(5);j++) {
+ out.println("\t\t<to>");
+ out.println("\t\t\t<emailaddress>" + randomEmail() + "</emailaddress>" );
+ out.println("\t\t\t<displayname>" + randomName() + "</displayname>" );
+ out.println("\t\t</to>");
+ }
+ for (int j=0;j<random.nextInt(5);j++) {
+ out.println("\t\t<cc>");
+ out.println("\t\t\t<emailaddress>" + randomEmail() + "</emailaddress>" );
+ out.println("\t\t\t<displayname>" + randomName() + "</displayname>" );
+ out.println("\t\t</cc>");
+ }
+ for (int j=0;j<random.nextInt(5);j++) {
+ out.println("\t\t<bcc>");
+ out.println("\t\t\t<emailaddress>" + randomEmail() + "</emailaddress>" );
+ out.println("\t\t\t<displayname>" + randomName() + "</displayname>" );
+ out.println("\t\t</bcc>");
+ }
+
+ // put in special from to be queried later
+ if (i==(SIZE/2)) {
+ out.println("\t\t<from>");
+ out.println("\t\t\t<emailaddress>jens@server.com</emailaddress>");
+ out.println("\t\t\t<displayname>Jens Dietrich</displayname>");
+ out.println("\t\t</from>");
+ }
+ else {
+ out.println("\t\t<from>");
+ out.println("\t\t\t<emailaddress>" + randomEmail() + "</emailaddress>");
+ out.println("\t\t\t<displayname>" + randomName() + "</displayname>");
+ out.println("\t\t</from>");
+ }
+
+ out.println("\t\t<subject>" + randomSubject() + "</subject>" );
+ out.println("\t\t<body>");
+ out.println(randomBody());
+ out.println("\t\t</body>");
+ out.println("\t</email>");
+ }
+
+ out.println("</emails>");
+ }
+
+ }
+
+ private static String randomSubject() {
+ return RandomStringUtils.random(30,true,false);
+ }
+ private static String randomBody() {
+ StringBuilder b = new StringBuilder();
+ for (int i=0;i<random.nextInt(30);i++) {
+ b.append("\t\t\t");
+ b.append(RandomStringUtils.random(80,true,false));
+ b.append("\n");
+ }
+ return b.toString();
+ }
+
+ private static String randomEmail() {
+ return RandomStringUtils.random(5,true,false) + "." +
+ RandomStringUtils.random(5,true,false) + "@" +
+ RandomStringUtils.random(5,true,false) + ".com";
+ }
+
+ private static String randomName() {
+ return RandomStringUtils.random(5,true,false) + " " +
+ RandomStringUtils.random(5,true,false);
+ }
+
+}
diff --git a/xml-designpattern-casestudy/src/main/java/xmlcasestudy/FindEmailWithXPath.java b/xml-designpattern-casestudy/src/main/java/xmlcasestudy/FindEmailWithXPath.java
new file mode 100644
index 0000000..7852fdd
--- /dev/null
+++ b/xml-designpattern-casestudy/src/main/java/xmlcasestudy/FindEmailWithXPath.java
@@ -0,0 +1,35 @@
+package xmlcasestudy;
+
+import org.w3c.dom.Attr;
+import org.w3c.dom.Document;
+import org.w3c.dom.NodeList;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.xpath.XPath;
+import static javax.xml.xpath.XPathConstants.*;
+import javax.xml.xpath.XPathFactory;
+import java.io.File;
+
+/**
+ * Find a particular email using xpath.
+ * @author jens dietrich
+ */
+public class FindEmailWithXPath {
+
+ public static void main(String[] args) throws Exception {
+ File xml = new File("emails-small.xml");
+
+ DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+ DocumentBuilder db = dbf.newDocumentBuilder();
+ Document document = db.parse(xml);
+
+ XPath xpath = XPathFactory.newInstance().newXPath();
+ String expression = "/emails/email[from/emailaddress='jens.dietrich@sample.com']/@id";
+ NodeList elements = (NodeList) xpath.evaluate(expression, document,NODESET);
+
+ for (int i=0;i<elements.getLength();i++) {
+ Attr attribute = (Attr)elements.item(i);
+ System.out.println("Email found, id is: " + attribute.getValue());
+ }
+ }
+}
diff --git a/xml-designpattern-casestudy/src/main/java/xmlcasestudy/ParseWithDOMAndCountEmails.java b/xml-designpattern-casestudy/src/main/java/xmlcasestudy/ParseWithDOMAndCountEmails.java
new file mode 100644
index 0000000..7e1b5bf
--- /dev/null
+++ b/xml-designpattern-casestudy/src/main/java/xmlcasestudy/ParseWithDOMAndCountEmails.java
@@ -0,0 +1,36 @@
+package xmlcasestudy;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import java.io.File;
+
+
+/**
+ * Count the emails in a large XML file using the DOM representation.
+ * @author jens dietrich
+ */
+public class ParseWithDOMAndCountEmails {
+
+ public static void main(String[] args) throws Exception {
+ File xml = new File("emails.xml");
+
+ long t1 = System.currentTimeMillis();
+
+ DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+ DocumentBuilder db = dbf.newDocumentBuilder();
+ Document document = db.parse(xml);
+
+ Element root = document.getDocumentElement();
+ assert root.getLocalName().equals("emails");
+ NodeList children = root.getElementsByTagName("email");
+
+ long t2 = System.currentTimeMillis();
+
+ System.out.println("Email count is " + children.getLength());
+ System.out.println(".. this took " + (t2-t1) + " ms");
+
+ }
+}
diff --git a/xml-designpattern-casestudy/src/main/java/xmlcasestudy/ParseWithDOMAndFindEmail.java b/xml-designpattern-casestudy/src/main/java/xmlcasestudy/ParseWithDOMAndFindEmail.java
new file mode 100644
index 0000000..589a76f
--- /dev/null
+++ b/xml-designpattern-casestudy/src/main/java/xmlcasestudy/ParseWithDOMAndFindEmail.java
@@ -0,0 +1,43 @@
+package xmlcasestudy;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import java.io.File;
+
+/**
+ * Count the emails in a large XML file using the DOM representation.
+ * @author jens dietrich
+ */
+public class ParseWithDOMAndFindEmail {
+
+ public static void main(String[] args) throws Exception {
+ File xml = new File("emails-small.xml");
+
+ DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+ DocumentBuilder db = dbf.newDocumentBuilder();
+ Document document = db.parse(xml);
+
+ Element root = document.getDocumentElement();
+ assert root.getLocalName().equals("emails");
+ NodeList children = root.getElementsByTagName("email");
+ for (int i=0;i<children.getLength();i++) {
+ Element emailElement = (Element)children.item(i);
+
+ Element fromElement = (Element)emailElement
+ .getElementsByTagName("from")
+ .item(0);
+
+ Element addressElement = (Element)fromElement
+ .getElementsByTagName("emailaddress")
+ .item(0);
+
+ String address = addressElement.getTextContent();
+ if (address.equals("jens.dietrich@sample.com")) {
+ System.out.println("Email found, id is: " + emailElement.getAttribute("id"));
+ }
+ }
+ }
+}
diff --git a/xml-designpattern-casestudy/src/main/java/xmlcasestudy/ParseWithSAXAndCountEmails.java b/xml-designpattern-casestudy/src/main/java/xmlcasestudy/ParseWithSAXAndCountEmails.java
new file mode 100644
index 0000000..61502d9
--- /dev/null
+++ b/xml-designpattern-casestudy/src/main/java/xmlcasestudy/ParseWithSAXAndCountEmails.java
@@ -0,0 +1,42 @@
+package xmlcasestudy;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+import java.io.File;
+
+/**
+ * Count the emails in a large XML file using a SAX parser.
+ * @author jens dietrich
+ */
+public class ParseWithSAXAndCountEmails {
+
+ static class EmailCounter extends DefaultHandler {
+ public int count = 0;
+ @Override
+ public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
+ if (qName.equals("email")) {
+ count = count+1;
+ }
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ File xml = new File("emails.xml");
+
+ long t1 = System.currentTimeMillis();
+
+ SAXParserFactory parserFactory = SAXParserFactory.newInstance();
+ SAXParser saxParser = parserFactory.newSAXParser();
+ EmailCounter counter = new EmailCounter();
+ saxParser.parse(xml,counter);
+
+ long t2 = System.currentTimeMillis();
+
+ System.out.println("Email count is " + counter.count);
+ System.out.println(".. this took " + (t2-t1) + " ms");
+
+ }
+}
diff --git a/xml-designpattern-casestudy/src/main/java/xmlcasestudy/ParseWithStAXAndCountEmails.java b/xml-designpattern-casestudy/src/main/java/xmlcasestudy/ParseWithStAXAndCountEmails.java
new file mode 100644
index 0000000..b06174a
--- /dev/null
+++ b/xml-designpattern-casestudy/src/main/java/xmlcasestudy/ParseWithStAXAndCountEmails.java
@@ -0,0 +1,59 @@
+package xmlcasestudy;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLStreamReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.Reader;
+import static javax.xml.stream.XMLStreamConstants.START_ELEMENT;
+
+
+/**
+ * Count the emails in a large XML file using a StAX parser.
+ * @author jens dietrich
+ */
+public class ParseWithStAXAndCountEmails {
+
+ static class EmailCounter extends DefaultHandler {
+
+ int count = 0;
+
+ @Override
+ public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
+ if (qName.equals("email")) {
+ count = count+1;
+ }
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+
+ Thread.sleep(10_000);
+
+ File xml = new File("emails.xml");
+
+ long t1 = System.currentTimeMillis();
+
+ XMLInputFactory f = XMLInputFactory.newInstance();
+ Reader reader = new FileReader(xml);
+ XMLStreamReader streamReader = f.createXMLStreamReader(reader);
+
+ int count = 0;
+ while(streamReader.hasNext()) {
+ if (START_ELEMENT == streamReader.next()) {
+ if (streamReader.getLocalName().equals("email")) {
+ count = count + 1;
+ }
+ }
+ }
+
+ long t2 = System.currentTimeMillis();
+
+ System.out.println("Email count is " + count);
+ System.out.println(".. this took " + (t2-t1) + " ms");
+
+ }
+}

File Metadata

Mime Type
text/x-diff
Expires
Wed, Sep 10, 3:16 PM (9 h, 46 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
42603
Default Alt Text
(16 KB)

Event Timeline