1
2
3
4 package net.sourceforge.pmd.lang.xml;
5
6 import java.io.ByteArrayOutputStream;
7 import java.io.PrintStream;
8 import java.io.StringReader;
9 import java.io.UnsupportedEncodingException;
10 import java.util.Iterator;
11 import java.util.Locale;
12
13 import net.sourceforge.pmd.lang.LanguageRegistry;
14 import net.sourceforge.pmd.lang.LanguageVersionHandler;
15 import net.sourceforge.pmd.lang.Parser;
16 import net.sourceforge.pmd.lang.ParserOptions;
17 import net.sourceforge.pmd.lang.ast.Node;
18 import net.sourceforge.pmd.lang.ast.xpath.Attribute;
19 import net.sourceforge.pmd.lang.xml.ast.XmlNode;
20 import net.sourceforge.pmd.lang.xml.ast.XmlParser;
21 import net.sourceforge.pmd.util.StringUtil;
22
23 import org.junit.Assert;
24 import org.junit.Test;
25
26
27
28
29 public class XmlParserTest {
30
31 private static final String XML_TEST =
32 "<?xml version=\"1.0\"?>\n" +
33 "<!DOCTYPE rootElement\n" +
34 "[\n" +
35 "<!ELEMENT rootElement (child1,child2)>\n" +
36 "<!ELEMENT child1 (#PCDATA)>\n" +
37 "<!ATTLIST child1 test CDATA #REQUIRED>\n" +
38 "<!ELEMENT child2 (#PCDATA)>\n" +
39 "\n" +
40 "<!ENTITY pmd \"Copyright: PMD\">\n" +
41 "]\n" +
42 ">\n" +
43 "<rootElement>\n" +
44 " <!-- that's a comment -->\n" +
45 " <child1 test=\"1\">entity: &pmd;\n" +
46 " </child1>\n" +
47 " <child2>\n" +
48 " <![CDATA[ cdata section ]]>\n" +
49 " </child2>\n" +
50 "</rootElement>";
51
52 private static final String XML_NAMESPACE_TEST =
53 "<?xml version=\"1.0\"?>\n" +
54 "<pmd:rootElement xmlns:pmd=\"http://pmd.sf.net\">\n" +
55 " <!-- that's a comment -->\n" +
56 " <pmd:child1 test=\"1\">entity: &\n" +
57 " </pmd:child1>\n" +
58 " <pmd:child2>\n" +
59 " <![CDATA[ cdata section ]]>\n" +
60 " </pmd:child2>\n" +
61 "</pmd:rootElement>";
62
63 private static final String XML_INVALID_WITH_DTD =
64 "<?xml version=\"1.0\"?>\n" +
65 "<!DOCTYPE rootElement\n" +
66 "[\n" +
67 "<!ELEMENT rootElement (child)>\n" +
68 "<!ELEMENT child (#PCDATA)>\n" +
69 "]\n" +
70 ">\n" +
71 "<rootElement>\n" +
72 " <invalidChild></invalidChild>\n" +
73 "</rootElement>";
74
75
76
77
78
79
80 @Test
81 public void testLineNumbers() throws Exception {
82 LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME).getDefaultVersion().getLanguageVersionHandler();
83 Parser parser = xmlVersionHandler.getParser(xmlVersionHandler.getDefaultParserOptions());
84 Node document = parser.parse(null, new StringReader(XML_TEST));
85
86 assertNode(document, "document", 2);
87 assertLineNumbers(document, 1, 1, 19, 14);
88 Node dtdElement = document.jjtGetChild(0);
89 assertNode(dtdElement, "rootElement", 0);
90 assertLineNumbers(dtdElement, 2, 1, 11, 1);
91 Node rootElement = document.jjtGetChild(1);
92 assertNode(rootElement, "rootElement", 7);
93 assertLineNumbers(rootElement, 12, 1, 19, 14);
94 assertTextNode(rootElement.jjtGetChild(0), "\\n ");
95 assertLineNumbers(rootElement.jjtGetChild(0), 12, 14, 13, 4);
96 assertNode(rootElement.jjtGetChild(1), "comment", 0);
97 assertLineNumbers(rootElement.jjtGetChild(1), 13, 5, 13, 29);
98 assertTextNode(rootElement.jjtGetChild(2), "\\n ");
99 assertLineNumbers(rootElement.jjtGetChild(2), 13, 30, 14, 4);
100 Node child1 = rootElement.jjtGetChild(3);
101 assertNode(child1, "child1", 1, "test", "1");
102 assertLineNumbers(child1, 14, 5, 15, 13);
103 assertTextNode(child1.jjtGetChild(0), "entity: Copyright: PMD\\n ");
104 assertLineNumbers(child1.jjtGetChild(0), 14, 22, 15, 4);
105 assertTextNode(rootElement.jjtGetChild(4), "\\n ");
106 assertLineNumbers(rootElement.jjtGetChild(4), 15, 14, 16, 4);
107 Node child2 = rootElement.jjtGetChild(5);
108 assertNode(child2, "child2", 3);
109 assertLineNumbers(child2, 16, 5, 18, 13);
110 assertTextNode(child2.jjtGetChild(0), "\\n ");
111 assertLineNumbers(child2.jjtGetChild(0), 16, 13, 17, 6);
112 assertTextNode(child2.jjtGetChild(1), " cdata section ", "cdata-section");
113 assertLineNumbers(child2.jjtGetChild(1), 17, 7, 17, 33);
114 assertTextNode(child2.jjtGetChild(2), "\\n ");
115 assertLineNumbers(child2.jjtGetChild(2), 17, 34, 18, 4);
116 assertTextNode(rootElement.jjtGetChild(6), "\\n");
117 assertLineNumbers(rootElement.jjtGetChild(6), 18, 14, 18, 14);
118 }
119
120
121
122
123 @Test
124 public void testDefaultParsing() {
125 LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME).getDefaultVersion().getLanguageVersionHandler();
126 Parser parser = xmlVersionHandler.getParser(xmlVersionHandler.getDefaultParserOptions());
127 Node document = parser.parse(null, new StringReader(XML_TEST));
128
129 assertNode(document, "document", 2);
130 Node dtdElement = document.jjtGetChild(0);
131 assertNode(dtdElement, "rootElement", 0);
132 Node rootElement = document.jjtGetChild(1);
133 assertNode(rootElement, "rootElement", 7);
134 assertTextNode(rootElement.jjtGetChild(0), "\\n ");
135 assertNode(rootElement.jjtGetChild(1), "comment", 0);
136 assertTextNode(rootElement.jjtGetChild(2), "\\n ");
137 Node child1 = rootElement.jjtGetChild(3);
138 assertNode(child1, "child1", 1, "test", "1");
139 assertTextNode(child1.jjtGetChild(0), "entity: Copyright: PMD\\n ");
140 assertTextNode(rootElement.jjtGetChild(4), "\\n ");
141 Node child2 = rootElement.jjtGetChild(5);
142 assertNode(child2, "child2", 3);
143 assertTextNode(child2.jjtGetChild(0), "\\n ");
144 assertTextNode(child2.jjtGetChild(1), " cdata section ", "cdata-section");
145 assertTextNode(child2.jjtGetChild(2), "\\n ");
146 assertTextNode(rootElement.jjtGetChild(6), "\\n");
147 }
148
149
150
151
152 @Test
153 public void testParsingCoalescingEnabled() {
154 LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME).getDefaultVersion().getLanguageVersionHandler();
155 XmlParserOptions parserOptions = new XmlParserOptions();
156 parserOptions.setCoalescing(true);
157 Parser parser = xmlVersionHandler.getParser(parserOptions);
158 Node document = parser.parse(null, new StringReader(XML_TEST));
159
160 assertNode(document, "document", 2);
161 Node dtdElement = document.jjtGetChild(0);
162 assertNode(dtdElement, "rootElement", 0);
163 Node rootElement = document.jjtGetChild(1);
164 assertNode(rootElement, "rootElement", 7);
165 assertTextNode(rootElement.jjtGetChild(0), "\\n ");
166 assertNode(rootElement.jjtGetChild(1), "comment", 0);
167 assertTextNode(rootElement.jjtGetChild(2), "\\n ");
168 Node child1 = rootElement.jjtGetChild(3);
169 assertNode(child1, "child1", 1, "test", "1");
170 assertTextNode(child1.jjtGetChild(0), "entity: Copyright: PMD\\n ");
171 assertTextNode(rootElement.jjtGetChild(4), "\\n ");
172 Node child2 = rootElement.jjtGetChild(5);
173 assertNode(child2, "child2", 1);
174 assertTextNode(child2.jjtGetChild(0), "\\n cdata section \\n ");
175 assertTextNode(rootElement.jjtGetChild(6), "\\n");
176 }
177
178
179
180
181 @Test
182 public void testParsingDoNotExpandEntities() {
183 LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME).getDefaultVersion().getLanguageVersionHandler();
184 XmlParserOptions parserOptions = new XmlParserOptions();
185 parserOptions.setExpandEntityReferences(false);
186 Parser parser = xmlVersionHandler.getParser(parserOptions);
187 Node document = parser.parse(null, new StringReader(XML_TEST));
188
189 assertNode(document, "document", 2);
190 Node dtdElement = document.jjtGetChild(0);
191 assertNode(dtdElement, "rootElement", 0);
192 Node rootElement = document.jjtGetChild(1);
193 assertNode(rootElement, "rootElement", 7);
194 assertTextNode(rootElement.jjtGetChild(0), "\\n ");
195 assertNode(rootElement.jjtGetChild(1), "comment", 0);
196 assertTextNode(rootElement.jjtGetChild(2), "\\n ");
197 Node child1 = rootElement.jjtGetChild(3);
198 assertNode(child1, "child1", 3, "test", "1");
199 assertTextNode(child1.jjtGetChild(0), "entity: ");
200 assertNode(child1.jjtGetChild(1), "pmd", 0);
201 assertTextNode(child1.jjtGetChild(2), "Copyright: PMD\\n ");
202 assertTextNode(rootElement.jjtGetChild(4), "\\n ");
203 Node child2 = rootElement.jjtGetChild(5);
204 assertNode(child2, "child2", 3);
205 assertTextNode(child2.jjtGetChild(0), "\\n ");
206 assertTextNode(child2.jjtGetChild(1), " cdata section ", "cdata-section");
207 assertTextNode(child2.jjtGetChild(2), "\\n ");
208 assertTextNode(rootElement.jjtGetChild(6), "\\n");
209 }
210
211
212
213
214 @Test
215 public void testParsingIgnoreComments() {
216 LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME).getDefaultVersion().getLanguageVersionHandler();
217 XmlParserOptions parserOptions = new XmlParserOptions();
218 parserOptions.setIgnoringComments(true);
219 Parser parser = xmlVersionHandler.getParser(parserOptions);
220 Node document = parser.parse(null, new StringReader(XML_TEST));
221
222 assertNode(document, "document", 2);
223 Node dtdElement = document.jjtGetChild(0);
224 assertNode(dtdElement, "rootElement", 0);
225 Node rootElement = document.jjtGetChild(1);
226 assertNode(rootElement, "rootElement", 5);
227 assertTextNode(rootElement.jjtGetChild(0), "\\n \\n ");
228 Node child1 = rootElement.jjtGetChild(1);
229 assertNode(child1, "child1", 1, "test", "1");
230 assertTextNode(child1.jjtGetChild(0), "entity: Copyright: PMD\\n ");
231 assertTextNode(rootElement.jjtGetChild(2), "\\n ");
232 Node child2 = rootElement.jjtGetChild(3);
233 assertNode(child2, "child2", 3);
234 assertTextNode(child2.jjtGetChild(0), "\\n ");
235 assertTextNode(child2.jjtGetChild(1), " cdata section ", "cdata-section");
236 assertTextNode(child2.jjtGetChild(2), "\\n ");
237 assertTextNode(rootElement.jjtGetChild(4), "\\n");
238 }
239
240
241
242
243 @Test
244 public void testParsingIgnoreElementContentWhitespace() {
245 LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME).getDefaultVersion().getLanguageVersionHandler();
246 XmlParserOptions parserOptions = new XmlParserOptions();
247 parserOptions.setIgnoringElementContentWhitespace(true);
248 Parser parser = xmlVersionHandler.getParser(parserOptions);
249 Node document = parser.parse(null, new StringReader(XML_TEST));
250
251 assertNode(document, "document", 2);
252 Node dtdElement = document.jjtGetChild(0);
253 assertNode(dtdElement, "rootElement", 0);
254 Node rootElement = document.jjtGetChild(1);
255 assertNode(rootElement, "rootElement", 3);
256 assertNode(rootElement.jjtGetChild(0), "comment", 0);
257 Node child1 = rootElement.jjtGetChild(1);
258 assertNode(child1, "child1", 1, "test", "1");
259 assertTextNode(child1.jjtGetChild(0), "entity: Copyright: PMD\\n ");
260 Node child2 = rootElement.jjtGetChild(2);
261 assertNode(child2, "child2", 3);
262 assertTextNode(child2.jjtGetChild(0), "\\n ");
263 assertTextNode(child2.jjtGetChild(1), " cdata section ", "cdata-section");
264 assertTextNode(child2.jjtGetChild(2), "\\n ");
265 }
266
267
268
269
270 @Test
271 public void testDefaultParsingNamespaces() {
272 LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME).getDefaultVersion().getLanguageVersionHandler();
273 Parser parser = xmlVersionHandler.getParser(xmlVersionHandler.getDefaultParserOptions());
274 Node document = parser.parse(null, new StringReader(XML_NAMESPACE_TEST));
275
276 assertNode(document, "document", 1);
277 Node rootElement = document.jjtGetChild(0);
278 assertNode(rootElement, "pmd:rootElement", 7, "xmlns:pmd", "http://pmd.sf.net");
279 Assert.assertEquals("http://pmd.sf.net", ((XmlNode)rootElement).getNode().getNamespaceURI());
280 Assert.assertEquals("pmd", ((XmlNode)rootElement).getNode().getPrefix());
281 Assert.assertEquals("rootElement", ((XmlNode)rootElement).getNode().getLocalName());
282 Assert.assertEquals("pmd:rootElement", ((XmlNode)rootElement).getNode().getNodeName());
283 assertTextNode(rootElement.jjtGetChild(0), "\\n ");
284 assertNode(rootElement.jjtGetChild(1), "comment", 0);
285 assertTextNode(rootElement.jjtGetChild(2), "\\n ");
286 Node child1 = rootElement.jjtGetChild(3);
287 assertNode(child1, "pmd:child1", 1, "test", "1");
288 assertTextNode(child1.jjtGetChild(0), "entity: &\\n ");
289 assertTextNode(rootElement.jjtGetChild(4), "\\n ");
290 Node child2 = rootElement.jjtGetChild(5);
291 assertNode(child2, "pmd:child2", 3);
292 assertTextNode(child2.jjtGetChild(0), "\\n ");
293 assertTextNode(child2.jjtGetChild(1), " cdata section ", "cdata-section");
294 assertTextNode(child2.jjtGetChild(2), "\\n ");
295 assertTextNode(rootElement.jjtGetChild(6), "\\n");
296 }
297
298
299
300
301 @Test
302 public void testParsingNotNamespaceAware() {
303 LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME).getDefaultVersion().getLanguageVersionHandler();
304 XmlParserOptions parserOptions = new XmlParserOptions();
305 parserOptions.setNamespaceAware(false);
306 Parser parser = xmlVersionHandler.getParser(parserOptions);
307 Node document = parser.parse(null, new StringReader(XML_NAMESPACE_TEST));
308
309 assertNode(document, "document", 1);
310 Node rootElement = document.jjtGetChild(0);
311 assertNode(rootElement, "pmd:rootElement", 7, "xmlns:pmd", "http://pmd.sf.net");
312 Assert.assertNull(((XmlNode)rootElement).getNode().getNamespaceURI());
313 Assert.assertNull(((XmlNode)rootElement).getNode().getPrefix());
314 Assert.assertNull(((XmlNode)rootElement).getNode().getLocalName());
315 Assert.assertEquals("pmd:rootElement", ((XmlNode)rootElement).getNode().getNodeName());
316 assertTextNode(rootElement.jjtGetChild(0), "\\n ");
317 assertNode(rootElement.jjtGetChild(1), "comment", 0);
318 assertTextNode(rootElement.jjtGetChild(2), "\\n ");
319 Node child1 = rootElement.jjtGetChild(3);
320 assertNode(child1, "pmd:child1", 1, "test", "1");
321 assertTextNode(child1.jjtGetChild(0), "entity: &\\n ");
322 assertTextNode(rootElement.jjtGetChild(4), "\\n ");
323 Node child2 = rootElement.jjtGetChild(5);
324 assertNode(child2, "pmd:child2", 3);
325 assertTextNode(child2.jjtGetChild(0), "\\n ");
326 assertTextNode(child2.jjtGetChild(1), " cdata section ", "cdata-section");
327 assertTextNode(child2.jjtGetChild(2), "\\n ");
328 assertTextNode(rootElement.jjtGetChild(6), "\\n");
329 }
330
331
332
333
334
335 @Test
336 public void testParsingWithValidation() throws UnsupportedEncodingException {
337 LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME).getDefaultVersion().getLanguageVersionHandler();
338 XmlParserOptions parserOptions = new XmlParserOptions();
339 parserOptions.setValidating(true);
340 Parser parser = xmlVersionHandler.getParser(parserOptions);
341 PrintStream oldErr = System.err;
342 Locale oldLocale = Locale.getDefault();
343 try {
344 ByteArrayOutputStream bos = new ByteArrayOutputStream();
345 System.setErr(new PrintStream(bos));
346 Locale.setDefault(Locale.ENGLISH);
347 Node document = parser.parse(null, new StringReader(XML_INVALID_WITH_DTD));
348 Assert.assertNotNull(document);
349 String output = bos.toString("UTF-8");
350 Assert.assertTrue(output.contains("Element type \"invalidChild\" must be declared."));
351 Assert.assertTrue(output.contains("The content of element type \"rootElement\" must match \"(child)\"."));
352 Assert.assertEquals(2, document.jjtGetNumChildren());
353 Assert.assertEquals("invalidChild", String.valueOf(document.jjtGetChild(1).jjtGetChild(1)));
354 } finally {
355 System.setErr(oldErr);
356 Locale.setDefault(oldLocale);
357 }
358 }
359
360 @Test
361 public void testWithProcessingInstructions() {
362 String xml = "<?xml version=\"1.0\"?><?mypi?><!DOCTYPE testDoc [<!ENTITY myentity \"e\">]><!--Comment--><foo abc=\"abc\"><bar>TEXT</bar><![CDATA[cdata!]]>>&myentity;<</foo>";
363 LanguageVersionHandler xmlVersionHandler = LanguageRegistry.getLanguage(XmlLanguageModule.NAME).getDefaultVersion().getLanguageVersionHandler();
364 XmlParserOptions options = (XmlParserOptions)xmlVersionHandler.getDefaultParserOptions();
365 options.setExpandEntityReferences(false);
366 Parser parser = xmlVersionHandler.getParser(options);
367 Node document = parser.parse(null, new StringReader(xml));
368 Assert.assertNotNull(document);
369 assertNode(document.jjtGetChild(0), "mypi", 0);
370 assertLineNumbers(document.jjtGetChild(0), 1, 22, 1, 29);
371 }
372
373
374
375
376
377
378
379
380 private void assertNode(Node node, String toString, int childs, Object ... atts) {
381 Assert.assertEquals(toString, String.valueOf(node));
382 Assert.assertEquals(childs, node.jjtGetNumChildren());
383 Iterator<Attribute> attributeIterator = ((XmlNode)node).getAttributeIterator();
384 if (atts != null) {
385 for (int i = 0; i < atts.length; i += 2) {
386 Assert.assertTrue(attributeIterator.hasNext());
387 String name = String.valueOf(atts[i]);
388 Object value = atts[i + 1];
389 Attribute attribute = attributeIterator.next();
390 Assert.assertEquals(name, attribute.getName());
391 Assert.assertEquals(value, attribute.getValue());
392 }
393 }
394 Assert.assertFalse(attributeIterator.hasNext());
395 }
396
397
398
399
400
401
402 private void assertTextNode(Node node, String text) {
403 assertTextNode(node, text, "text");
404 }
405
406
407
408
409
410
411
412
413 private void assertTextNode(Node node, String text, String toString) {
414 Assert.assertEquals(toString, String.valueOf(node));
415 Assert.assertEquals(0, node.jjtGetNumChildren());
416 Assert.assertEquals(text, StringUtil.escapeWhitespace(node.getImage()));
417 Iterator<Attribute> attributeIterator = ((XmlNode)node).getAttributeIterator();
418 Assert.assertTrue(attributeIterator.hasNext());
419 Attribute attribute = attributeIterator.next();
420 Assert.assertEquals("Image", attribute.getName());
421 Assert.assertEquals(text, StringUtil.escapeWhitespace(attribute.getValue()));
422 Assert.assertFalse(attributeIterator.hasNext());
423 }
424
425
426
427
428
429
430
431
432
433
434 private void assertLineNumbers(Node node, int beginLine, int beginColumn, int endLine, int endColumn) {
435 Assert.assertEquals("begin line wrong", beginLine, node.getBeginLine());
436 Assert.assertEquals("begin column wrong", beginColumn, node.getBeginColumn());
437 Assert.assertEquals("end line wrong", endLine, node.getEndLine());
438 Assert.assertEquals("end column wrong", endColumn, node.getEndColumn());
439 }
440 }