View Javadoc
1   /**
2    * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3    */
4   package net.sourceforge.pmd.lang.xml;
5   
6   import java.io.ByteArrayOutputStream;
7   import java.io.PrintStream;
8   import java.io.StringReader;
9   import java.io.UnsupportedEncodingException;
10  import java.util.Iterator;
11  import java.util.Locale;
12  
13  import net.sourceforge.pmd.lang.LanguageRegistry;
14  import net.sourceforge.pmd.lang.LanguageVersionHandler;
15  import net.sourceforge.pmd.lang.Parser;
16  import net.sourceforge.pmd.lang.ParserOptions;
17  import net.sourceforge.pmd.lang.ast.Node;
18  import net.sourceforge.pmd.lang.ast.xpath.Attribute;
19  import net.sourceforge.pmd.lang.xml.ast.XmlNode;
20  import net.sourceforge.pmd.lang.xml.ast.XmlParser;
21  import net.sourceforge.pmd.util.StringUtil;
22  
23  import org.junit.Assert;
24  import org.junit.Test;
25  
26  /**
27   * Unit test for the {@link XmlParser}.
28   */
29  public class XmlParserTest {
30  
31      private static final String XML_TEST =
32              "<?xml version=\"1.0\"?>\n" +
33              "<!DOCTYPE rootElement\n" +
34              "[\n" +
35              "<!ELEMENT rootElement (child1,child2)>\n" +
36              "<!ELEMENT child1 (#PCDATA)>\n" +
37              "<!ATTLIST child1 test CDATA #REQUIRED>\n" +
38              "<!ELEMENT child2 (#PCDATA)>\n" +
39              "\n" +
40              "<!ENTITY pmd \"Copyright: PMD\">\n" +
41              "]\n" +
42              ">\n" +
43              "<rootElement>\n" +
44              "    <!-- that's a comment -->\n" +
45              "    <child1 test=\"1\">entity: &pmd;\n" +
46              "    </child1>\n" +
47              "    <child2>\n" +
48              "      <![CDATA[ cdata section ]]>\n" +
49              "    </child2>\n" +
50              "</rootElement>";
51  
52      private static final String XML_NAMESPACE_TEST =
53              "<?xml version=\"1.0\"?>\n" + 
54              "<pmd:rootElement xmlns:pmd=\"http://pmd.sf.net\">\n" + 
55              "    <!-- that's a comment -->\n" + 
56              "    <pmd:child1 test=\"1\">entity: &amp;\n" + 
57              "    </pmd:child1>\n" + 
58              "    <pmd:child2>\n" + 
59              "      <![CDATA[ cdata section ]]>\n" + 
60              "    </pmd:child2>\n" + 
61              "</pmd:rootElement>";
62  
63      private static final String XML_INVALID_WITH_DTD =
64              "<?xml version=\"1.0\"?>\n" +
65              "<!DOCTYPE rootElement\n" +
66              "[\n" +
67              "<!ELEMENT rootElement (child)>\n" +
68              "<!ELEMENT child (#PCDATA)>\n" +
69              "]\n" +
70              ">\n" +
71              "<rootElement>\n" +
72              "  <invalidChild></invalidChild>\n" +
73              "</rootElement>";
74  
75      /**
76       * See bug #1054:
77       * XML Rules ever report a line -1 and not the line/column where the error occurs
78       * @throws Exception any error
79       */
80      @Test
81      public void testLineNumbers() throws Exception {
82          LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME).getDefaultVersion().getLanguageVersionHandler();
83          Parser parser = xmlVersionHandler.getParser(xmlVersionHandler.getDefaultParserOptions());
84          Node document = parser.parse(null, new StringReader(XML_TEST));
85  
86          assertNode(document, "document", 2);
87          assertLineNumbers(document, 1, 1, 19, 14);
88          Node dtdElement = document.jjtGetChild(0);
89          assertNode(dtdElement, "rootElement", 0);
90          assertLineNumbers(dtdElement, 2, 1, 11, 1);
91          Node rootElement = document.jjtGetChild(1);
92          assertNode(rootElement, "rootElement", 7);
93          assertLineNumbers(rootElement, 12, 1, 19, 14);
94          assertTextNode(rootElement.jjtGetChild(0), "\\n    ");
95          assertLineNumbers(rootElement.jjtGetChild(0), 12, 14, 13, 4);
96          assertNode(rootElement.jjtGetChild(1), "comment", 0);
97          assertLineNumbers(rootElement.jjtGetChild(1), 13, 5, 13, 29);
98          assertTextNode(rootElement.jjtGetChild(2), "\\n    ");
99          assertLineNumbers(rootElement.jjtGetChild(2), 13, 30, 14, 4);
100         Node child1 = rootElement.jjtGetChild(3);
101         assertNode(child1, "child1", 1, "test", "1");
102         assertLineNumbers(child1, 14, 5, 15, 13);
103         assertTextNode(child1.jjtGetChild(0), "entity: Copyright: PMD\\n    ");
104         assertLineNumbers(child1.jjtGetChild(0), 14, 22, 15, 4);
105         assertTextNode(rootElement.jjtGetChild(4), "\\n    ");
106         assertLineNumbers(rootElement.jjtGetChild(4), 15, 14, 16, 4);
107         Node child2 = rootElement.jjtGetChild(5);
108         assertNode(child2, "child2", 3);
109         assertLineNumbers(child2, 16, 5, 18, 13);
110         assertTextNode(child2.jjtGetChild(0), "\\n      ");
111         assertLineNumbers(child2.jjtGetChild(0), 16, 13, 17, 6);
112         assertTextNode(child2.jjtGetChild(1), " cdata section ", "cdata-section");
113         assertLineNumbers(child2.jjtGetChild(1), 17, 7, 17, 33);
114         assertTextNode(child2.jjtGetChild(2), "\\n    ");
115         assertLineNumbers(child2.jjtGetChild(2), 17, 34, 18, 4);
116         assertTextNode(rootElement.jjtGetChild(6), "\\n");
117         assertLineNumbers(rootElement.jjtGetChild(6), 18, 14, 18, 14);
118     }
119 
120     /**
121      * Verifies the default parsing behavior of the XML parser.
122      */
123     @Test
124     public void testDefaultParsing() {
125         LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME).getDefaultVersion().getLanguageVersionHandler();
126         Parser parser = xmlVersionHandler.getParser(xmlVersionHandler.getDefaultParserOptions());
127         Node document = parser.parse(null, new StringReader(XML_TEST));
128 
129         assertNode(document, "document", 2);
130         Node dtdElement = document.jjtGetChild(0);
131         assertNode(dtdElement, "rootElement", 0);
132         Node rootElement = document.jjtGetChild(1);
133         assertNode(rootElement, "rootElement", 7);
134         assertTextNode(rootElement.jjtGetChild(0), "\\n    ");
135         assertNode(rootElement.jjtGetChild(1), "comment", 0);
136         assertTextNode(rootElement.jjtGetChild(2), "\\n    ");
137         Node child1 = rootElement.jjtGetChild(3);
138         assertNode(child1, "child1", 1, "test", "1");
139         assertTextNode(child1.jjtGetChild(0), "entity: Copyright: PMD\\n    ");
140         assertTextNode(rootElement.jjtGetChild(4), "\\n    ");
141         Node child2 = rootElement.jjtGetChild(5);
142         assertNode(child2, "child2", 3);
143         assertTextNode(child2.jjtGetChild(0), "\\n      ");
144         assertTextNode(child2.jjtGetChild(1), " cdata section ", "cdata-section");
145         assertTextNode(child2.jjtGetChild(2), "\\n    ");
146         assertTextNode(rootElement.jjtGetChild(6), "\\n");
147     }
148 
149     /**
150      * Verifies the parsing behavior of the XML parser with coalescing enabled.
151      */
152     @Test
153     public void testParsingCoalescingEnabled() {
154         LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME).getDefaultVersion().getLanguageVersionHandler();
155         XmlParserOptions parserOptions = new XmlParserOptions();
156         parserOptions.setCoalescing(true);
157         Parser parser = xmlVersionHandler.getParser(parserOptions);
158         Node document = parser.parse(null, new StringReader(XML_TEST));
159 
160         assertNode(document, "document", 2);
161         Node dtdElement = document.jjtGetChild(0);
162         assertNode(dtdElement, "rootElement", 0);
163         Node rootElement = document.jjtGetChild(1);
164         assertNode(rootElement, "rootElement", 7);
165         assertTextNode(rootElement.jjtGetChild(0), "\\n    ");
166         assertNode(rootElement.jjtGetChild(1), "comment", 0);
167         assertTextNode(rootElement.jjtGetChild(2), "\\n    ");
168         Node child1 = rootElement.jjtGetChild(3);
169         assertNode(child1, "child1", 1, "test", "1");
170         assertTextNode(child1.jjtGetChild(0), "entity: Copyright: PMD\\n    ");
171         assertTextNode(rootElement.jjtGetChild(4), "\\n    ");
172         Node child2 = rootElement.jjtGetChild(5);
173         assertNode(child2, "child2", 1);
174         assertTextNode(child2.jjtGetChild(0), "\\n       cdata section \\n    ");
175         assertTextNode(rootElement.jjtGetChild(6), "\\n");
176     }
177 
178     /**
179      * Verifies the parsing behavior of the XML parser if entities are not expanded.
180      */
181     @Test
182     public void testParsingDoNotExpandEntities() {
183         LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME).getDefaultVersion().getLanguageVersionHandler();
184         XmlParserOptions parserOptions = new XmlParserOptions();
185         parserOptions.setExpandEntityReferences(false);
186         Parser parser = xmlVersionHandler.getParser(parserOptions);
187         Node document = parser.parse(null, new StringReader(XML_TEST));
188 
189         assertNode(document, "document", 2);
190         Node dtdElement = document.jjtGetChild(0);
191         assertNode(dtdElement, "rootElement", 0);
192         Node rootElement = document.jjtGetChild(1);
193         assertNode(rootElement, "rootElement", 7);
194         assertTextNode(rootElement.jjtGetChild(0), "\\n    ");
195         assertNode(rootElement.jjtGetChild(1), "comment", 0);
196         assertTextNode(rootElement.jjtGetChild(2), "\\n    ");
197         Node child1 = rootElement.jjtGetChild(3);
198         assertNode(child1, "child1", 3, "test", "1");
199         assertTextNode(child1.jjtGetChild(0), "entity: ");
200         assertNode(child1.jjtGetChild(1), "pmd", 0);
201         assertTextNode(child1.jjtGetChild(2), "Copyright: PMD\\n    ");
202         assertTextNode(rootElement.jjtGetChild(4), "\\n    ");
203         Node child2 = rootElement.jjtGetChild(5);
204         assertNode(child2, "child2", 3);
205         assertTextNode(child2.jjtGetChild(0), "\\n      ");
206         assertTextNode(child2.jjtGetChild(1), " cdata section ", "cdata-section");
207         assertTextNode(child2.jjtGetChild(2), "\\n    ");
208         assertTextNode(rootElement.jjtGetChild(6), "\\n");
209     }
210 
211     /**
212      * Verifies the parsing behavior of the XML parser if ignoring comments.
213      */
214     @Test
215     public void testParsingIgnoreComments() {
216         LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME).getDefaultVersion().getLanguageVersionHandler();
217         XmlParserOptions parserOptions = new XmlParserOptions();
218         parserOptions.setIgnoringComments(true);
219         Parser parser = xmlVersionHandler.getParser(parserOptions);
220         Node document = parser.parse(null, new StringReader(XML_TEST));
221 
222         assertNode(document, "document", 2);
223         Node dtdElement = document.jjtGetChild(0);
224         assertNode(dtdElement, "rootElement", 0);
225         Node rootElement = document.jjtGetChild(1);
226         assertNode(rootElement, "rootElement", 5);
227         assertTextNode(rootElement.jjtGetChild(0), "\\n    \\n    ");
228         Node child1 = rootElement.jjtGetChild(1);
229         assertNode(child1, "child1", 1, "test", "1");
230         assertTextNode(child1.jjtGetChild(0), "entity: Copyright: PMD\\n    ");
231         assertTextNode(rootElement.jjtGetChild(2), "\\n    ");
232         Node child2 = rootElement.jjtGetChild(3);
233         assertNode(child2, "child2", 3);
234         assertTextNode(child2.jjtGetChild(0), "\\n      ");
235         assertTextNode(child2.jjtGetChild(1), " cdata section ", "cdata-section");
236         assertTextNode(child2.jjtGetChild(2), "\\n    ");
237         assertTextNode(rootElement.jjtGetChild(4), "\\n");
238     }
239 
240     /**
241      * Verifies the parsing behavior of the XML parser if ignoring whitespaces in elements.
242      */
243     @Test
244     public void testParsingIgnoreElementContentWhitespace() {
245         LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME).getDefaultVersion().getLanguageVersionHandler();
246         XmlParserOptions parserOptions = new XmlParserOptions();
247         parserOptions.setIgnoringElementContentWhitespace(true);
248         Parser parser = xmlVersionHandler.getParser(parserOptions);
249         Node document = parser.parse(null, new StringReader(XML_TEST));
250 
251         assertNode(document, "document", 2);
252         Node dtdElement = document.jjtGetChild(0);
253         assertNode(dtdElement, "rootElement", 0);
254         Node rootElement = document.jjtGetChild(1);
255         assertNode(rootElement, "rootElement", 3);
256         assertNode(rootElement.jjtGetChild(0), "comment", 0);
257         Node child1 = rootElement.jjtGetChild(1);
258         assertNode(child1, "child1", 1, "test", "1");
259         assertTextNode(child1.jjtGetChild(0), "entity: Copyright: PMD\\n    ");
260         Node child2 = rootElement.jjtGetChild(2);
261         assertNode(child2, "child2", 3);
262         assertTextNode(child2.jjtGetChild(0), "\\n      ");
263         assertTextNode(child2.jjtGetChild(1), " cdata section ", "cdata-section");
264         assertTextNode(child2.jjtGetChild(2), "\\n    ");
265     }
266 
267     /**
268      * Verifies the default parsing behavior of the XML parser with namespaces.
269      */
270     @Test
271     public void testDefaultParsingNamespaces() {
272         LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME).getDefaultVersion().getLanguageVersionHandler();
273         Parser parser = xmlVersionHandler.getParser(xmlVersionHandler.getDefaultParserOptions());
274         Node document = parser.parse(null, new StringReader(XML_NAMESPACE_TEST));
275 
276         assertNode(document, "document", 1);
277         Node rootElement = document.jjtGetChild(0);
278         assertNode(rootElement, "pmd:rootElement", 7, "xmlns:pmd", "http://pmd.sf.net");
279         Assert.assertEquals("http://pmd.sf.net", ((XmlNode)rootElement).getNode().getNamespaceURI());
280         Assert.assertEquals("pmd", ((XmlNode)rootElement).getNode().getPrefix());
281         Assert.assertEquals("rootElement", ((XmlNode)rootElement).getNode().getLocalName());
282         Assert.assertEquals("pmd:rootElement", ((XmlNode)rootElement).getNode().getNodeName());
283         assertTextNode(rootElement.jjtGetChild(0), "\\n    ");
284         assertNode(rootElement.jjtGetChild(1), "comment", 0);
285         assertTextNode(rootElement.jjtGetChild(2), "\\n    ");
286         Node child1 = rootElement.jjtGetChild(3);
287         assertNode(child1, "pmd:child1", 1, "test", "1");
288         assertTextNode(child1.jjtGetChild(0), "entity: &\\n    ");
289         assertTextNode(rootElement.jjtGetChild(4), "\\n    ");
290         Node child2 = rootElement.jjtGetChild(5);
291         assertNode(child2, "pmd:child2", 3);
292         assertTextNode(child2.jjtGetChild(0), "\\n      ");
293         assertTextNode(child2.jjtGetChild(1), " cdata section ", "cdata-section");
294         assertTextNode(child2.jjtGetChild(2), "\\n    ");
295         assertTextNode(rootElement.jjtGetChild(6), "\\n");
296     }
297 
298     /**
299      * Verifies the default parsing behavior of the XML parser with namespaces but not namespace aware.
300      */
301     @Test
302     public void testParsingNotNamespaceAware() {
303         LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME).getDefaultVersion().getLanguageVersionHandler();
304         XmlParserOptions parserOptions = new XmlParserOptions();
305         parserOptions.setNamespaceAware(false);
306         Parser parser = xmlVersionHandler.getParser(parserOptions);
307         Node document = parser.parse(null, new StringReader(XML_NAMESPACE_TEST));
308 
309         assertNode(document, "document", 1);
310         Node rootElement = document.jjtGetChild(0);
311         assertNode(rootElement, "pmd:rootElement", 7, "xmlns:pmd", "http://pmd.sf.net");
312         Assert.assertNull(((XmlNode)rootElement).getNode().getNamespaceURI());
313         Assert.assertNull(((XmlNode)rootElement).getNode().getPrefix());
314         Assert.assertNull(((XmlNode)rootElement).getNode().getLocalName());
315         Assert.assertEquals("pmd:rootElement", ((XmlNode)rootElement).getNode().getNodeName());
316         assertTextNode(rootElement.jjtGetChild(0), "\\n    ");
317         assertNode(rootElement.jjtGetChild(1), "comment", 0);
318         assertTextNode(rootElement.jjtGetChild(2), "\\n    ");
319         Node child1 = rootElement.jjtGetChild(3);
320         assertNode(child1, "pmd:child1", 1, "test", "1");
321         assertTextNode(child1.jjtGetChild(0), "entity: &\\n    ");
322         assertTextNode(rootElement.jjtGetChild(4), "\\n    ");
323         Node child2 = rootElement.jjtGetChild(5);
324         assertNode(child2, "pmd:child2", 3);
325         assertTextNode(child2.jjtGetChild(0), "\\n      ");
326         assertTextNode(child2.jjtGetChild(1), " cdata section ", "cdata-section");
327         assertTextNode(child2.jjtGetChild(2), "\\n    ");
328         assertTextNode(rootElement.jjtGetChild(6), "\\n");
329     }
330 
331     /**
332      * Verifies the parsing behavior of the XML parser with validation on.
333      * @throws UnsupportedEncodingException error
334      */
335     @Test
336     public void testParsingWithValidation() throws UnsupportedEncodingException {
337         LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME).getDefaultVersion().getLanguageVersionHandler();
338         XmlParserOptions parserOptions = new XmlParserOptions();
339         parserOptions.setValidating(true);
340         Parser parser = xmlVersionHandler.getParser(parserOptions);
341         PrintStream oldErr = System.err;
342         Locale oldLocale = Locale.getDefault();
343         try {
344             ByteArrayOutputStream bos = new ByteArrayOutputStream();
345             System.setErr(new PrintStream(bos));
346             Locale.setDefault(Locale.ENGLISH);
347             Node document = parser.parse(null, new StringReader(XML_INVALID_WITH_DTD));
348             Assert.assertNotNull(document);
349             String output = bos.toString("UTF-8");
350             Assert.assertTrue(output.contains("Element type \"invalidChild\" must be declared."));
351             Assert.assertTrue(output.contains("The content of element type \"rootElement\" must match \"(child)\"."));
352             Assert.assertEquals(2, document.jjtGetNumChildren());
353             Assert.assertEquals("invalidChild", String.valueOf(document.jjtGetChild(1).jjtGetChild(1)));
354         } finally {
355             System.setErr(oldErr);
356             Locale.setDefault(oldLocale);
357         }
358     }
359 
360     @Test
361     public void testWithProcessingInstructions() {
362         String xml = "<?xml version=\"1.0\"?><?mypi?><!DOCTYPE testDoc [<!ENTITY myentity \"e\">]><!--Comment--><foo abc=\"abc\"><bar>TEXT</bar><![CDATA[cdata!]]>&gt;&myentity;&lt;</foo>";
363         LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME).getDefaultVersion().getLanguageVersionHandler();
364         XmlParserOptions options = (XmlParserOptions)xmlVersionHandler.getDefaultParserOptions();
365         options.setExpandEntityReferences(false);
366         Parser parser = xmlVersionHandler.getParser(options);
367         Node document = parser.parse(null, new StringReader(xml));
368         Assert.assertNotNull(document);
369         assertNode(document.jjtGetChild(0), "mypi", 0);
370         assertLineNumbers(document.jjtGetChild(0), 1, 22, 1, 29);
371     }
372 
373     /**
374      * Asserts a single node inclusive attributes.
375      * @param node the node
376      * @param toString the to String representation to expect
377      * @param childs number of childs
378      * @param atts attributes - each object pair forms one attribute: first name, then value.
379      */
380     private void assertNode(Node node, String toString, int childs, Object ... atts) {
381         Assert.assertEquals(toString, String.valueOf(node));
382         Assert.assertEquals(childs, node.jjtGetNumChildren());
383         Iterator<Attribute> attributeIterator = ((XmlNode)node).getAttributeIterator();
384         if (atts != null) {
385             for (int i = 0; i < atts.length; i += 2) {
386                 Assert.assertTrue(attributeIterator.hasNext());
387                 String name = String.valueOf(atts[i]);
388                 Object value = atts[i + 1];
389                 Attribute attribute = attributeIterator.next();
390                 Assert.assertEquals(name, attribute.getName());
391                 Assert.assertEquals(value, attribute.getValue());
392             }
393         }
394         Assert.assertFalse(attributeIterator.hasNext());
395     }
396 
397     /**
398      * Assert a single text node.
399      * @param node the node to check
400      * @param text the text to expect
401      */
402     private void assertTextNode(Node node, String text) {
403         assertTextNode(node, text, "text");
404     }
405 
406     /**
407      * Assert a single text node.
408      *
409      * @param node the node to check
410      * @param text the text to expect
411      * @param toString the to string representation
412      */
413     private void assertTextNode(Node node, String text, String toString) {
414         Assert.assertEquals(toString, String.valueOf(node));
415         Assert.assertEquals(0, node.jjtGetNumChildren());
416         Assert.assertEquals(text, StringUtil.escapeWhitespace(node.getImage()));
417         Iterator<Attribute> attributeIterator = ((XmlNode)node).getAttributeIterator();
418         Assert.assertTrue(attributeIterator.hasNext());
419         Attribute attribute = attributeIterator.next();
420         Assert.assertEquals("Image", attribute.getName());
421         Assert.assertEquals(text, StringUtil.escapeWhitespace(attribute.getValue()));
422         Assert.assertFalse(attributeIterator.hasNext());
423     }
424 
425     /**
426      * Assert the line numbers of a node.
427      *
428      * @param node the node
429      * @param beginLine the begin line
430      * @param beginColumn the begin column
431      * @param endLine the end line
432      * @param endColumn the end column
433      */
434     private void assertLineNumbers(Node node, int beginLine, int beginColumn, int endLine, int endColumn) {
435         Assert.assertEquals("begin line wrong", beginLine, node.getBeginLine());
436         Assert.assertEquals("begin column wrong", beginColumn, node.getBeginColumn());
437         Assert.assertEquals("end line wrong", endLine, node.getEndLine());
438         Assert.assertEquals("end column wrong", endColumn, node.getEndColumn());
439     }
440 }