1
2
3
4 package net.sourceforge.pmd.lang.xml.ast;
5
6 import java.util.Map;
7 import java.util.TreeMap;
8 import java.util.regex.Matcher;
9 import java.util.regex.Pattern;
10
11 import org.apache.commons.lang3.StringUtils;
12 import org.w3c.dom.Document;
13 import org.w3c.dom.DocumentType;
14 import org.w3c.dom.NamedNodeMap;
15 import org.w3c.dom.Node;
16 import org.w3c.dom.NodeList;
17 import org.w3c.dom.ProcessingInstruction;
18
19
20
21
22 class DOMLineNumbers {
23 private final Document document;
24 private final String xmlString;
25 private Map<Integer, Integer> lines;
26
27 public DOMLineNumbers(Document document, String xmlString) {
28 this.document = document;
29 this.xmlString = xmlString;
30 }
31
32 public void determine() {
33 calculateLinesMap();
34 determineLocation(document, 0);
35 }
36 private int determineLocation(Node n, int index) {
37 int nextIndex = index;
38 if (n.getNodeType() == Node.DOCUMENT_TYPE_NODE) {
39 nextIndex = xmlString.indexOf("<!DOCTYPE", nextIndex);
40 } else if (n.getNodeType() == Node.COMMENT_NODE) {
41 nextIndex = xmlString.indexOf("<!--", nextIndex);
42 } else if (n.getNodeType() == Node.ELEMENT_NODE) {
43 nextIndex = xmlString.indexOf("<" + n.getNodeName(), nextIndex);
44 } else if (n.getNodeType() == Node.CDATA_SECTION_NODE) {
45 nextIndex = xmlString.indexOf("<![CDATA[", nextIndex);
46 } else if (n.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) {
47 ProcessingInstruction pi = (ProcessingInstruction)n;
48 nextIndex = xmlString.indexOf("<?" + pi.getTarget(), nextIndex);
49 } else if (n.getNodeType() == Node.TEXT_NODE) {
50 String te = unexpandEntities(n, n.getNodeValue());
51 int newIndex = xmlString.indexOf(te, nextIndex);
52 if (newIndex > 0) {
53 nextIndex = newIndex;
54 }
55 } else if (n.getNodeType() == Node.ENTITY_REFERENCE_NODE) {
56 nextIndex = xmlString.indexOf("&" + n.getNodeName() + ";", nextIndex);
57 }
58 setBeginLocation(n, nextIndex);
59 if (n.hasChildNodes()) {
60 NodeList childs = n.getChildNodes();
61 for (int i = 0; i < childs.getLength(); i++) {
62 nextIndex = determineLocation(childs.item(i), nextIndex);
63 }
64 }
65 if (n.getNodeType() == Node.ELEMENT_NODE) {
66 nextIndex += 2 + n.getNodeName().length() + 1;
67 } else if (n.getNodeType() == Node.DOCUMENT_TYPE_NODE) {
68 Node nextSibling = n.getNextSibling();
69 if (nextSibling.getNodeType() == Node.ELEMENT_NODE) {
70 nextIndex = xmlString.indexOf("<" + nextSibling.getNodeName(), nextIndex) - 1;
71 } else if (nextSibling.getNodeType() == Node.COMMENT_NODE) {
72 nextIndex = xmlString.indexOf("<!--", nextIndex);
73 } else {
74 nextIndex = xmlString.indexOf(">", nextIndex);
75 }
76 } else if (n.getNodeType() == Node.COMMENT_NODE) {
77 nextIndex += 4 + 3;
78 nextIndex += n.getNodeValue().length();
79 } else if (n.getNodeType() == Node.TEXT_NODE) {
80 String te = unexpandEntities(n, n.getNodeValue());
81 nextIndex += te.length();
82 } else if (n.getNodeType() == Node.CDATA_SECTION_NODE) {
83 nextIndex += "<![CDATA[".length() + n.getNodeValue().length() + "]]>".length();
84 } else if (n.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) {
85 ProcessingInstruction pi = (ProcessingInstruction)n;
86 nextIndex += "<?".length() + pi.getTarget().length() + "?>".length() + pi.getData().length();
87 }
88 setEndLocation(n, nextIndex - 1);
89 return nextIndex;
90 }
91
92 private String unexpandEntities(Node n, String te) {
93 String result = te;
94 DocumentType doctype = n.getOwnerDocument().getDoctype();
95
96 result = result.replaceAll(Matcher.quoteReplacement("&"), "&");
97 result = result.replaceAll(Matcher.quoteReplacement("<"), "<");
98 result = result.replaceAll(Matcher.quoteReplacement(">"), ">");
99 result = result.replaceAll(Matcher.quoteReplacement("\""), """);
100 result = result.replaceAll(Matcher.quoteReplacement("'"), "'");
101
102 if (doctype != null) {
103 NamedNodeMap entities = doctype.getEntities();
104 String internalSubset = doctype.getInternalSubset();
105 if (internalSubset == null) {
106 internalSubset = "";
107 }
108 for (int i = 0; i < entities.getLength(); i++) {
109 Node item = entities.item(i);
110 String entityName = item.getNodeName();
111 Node firstChild = item.getFirstChild();
112 if (firstChild != null) {
113 result = result.replaceAll(Matcher.quoteReplacement(firstChild.getNodeValue()), "&" + entityName + ";");
114 } else {
115 Matcher m = Pattern.compile(Matcher.quoteReplacement("<!ENTITY " + entityName + " ") + "[']([^']*)[']>").matcher(internalSubset);
116 if (m.find()) {
117 result = result.replaceAll(Matcher.quoteReplacement(m.group(1)), "&" + entityName + ";");
118 }
119 }
120 }
121 }
122 return result;
123 }
124 private void setBeginLocation(Node n, int index) {
125 if (n != null) {
126 n.setUserData(XmlNode.BEGIN_LINE, toLine(index), null);
127 n.setUserData(XmlNode.BEGIN_COLUMN, toColumn(index), null);
128 }
129 }
130 private void setEndLocation(Node n, int index) {
131 if (n != null) {
132 n.setUserData(XmlNode.END_LINE, toLine(index), null);
133 n.setUserData(XmlNode.END_COLUMN, toColumn(index), null);
134 }
135 }
136
137 private void calculateLinesMap() {
138 lines = new TreeMap<Integer, Integer>();
139 int index = -1;
140 int count = StringUtils.countMatches(xmlString, "\n");
141 for (int line = 1; line <= count; line++) {
142 lines.put(line, index + 1);
143 index = xmlString.indexOf("\n", index + 1);
144 }
145 lines.put(count + 1, index + 1);
146 }
147
148 private int toLine(int index) {
149 int line = 1;
150 for (Map.Entry<Integer, Integer> e : lines.entrySet()) {
151 line = e.getKey();
152 if (e.getValue() > index) {
153 line--;
154 break;
155 }
156 }
157 return line;
158 }
159 private int toColumn(int index) {
160 int line = toLine(index);
161 Integer lineStart = lines.get(line);
162 if (lineStart == null) {
163 lineStart = lines.get(lines.size() - 1);
164 }
165 int column = index - lineStart;
166 return column + 1;
167 }
168
169 }