1
2
3
4 package net.sourceforge.pmd.cpd;
5
6 import java.io.BufferedReader;
7 import java.io.CharArrayReader;
8 import java.io.IOException;
9
10 import org.apache.commons.io.IOUtils;
11
12
13
14
15
16
17 public class CsTokenizer implements Tokenizer {
18
19 @Override
20 public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
21 BufferedReader reader = new BufferedReader(new CharArrayReader(sourceCode.getCodeBuffer().toString()
22 .toCharArray()));
23 try {
24 int ic = reader.read();
25 int line = 1;
26 char c;
27 StringBuilder b;
28 while (ic != -1) {
29 c = (char) ic;
30 switch (c) {
31
32 case '\n':
33 line++;
34 ic = reader.read();
35 break;
36
37
38 case ' ':
39 case '\t':
40 case '\r':
41 ic = reader.read();
42 break;
43
44
45 case ';':
46 ic = reader.read();
47 break;
48
49
50 case '<':
51 case '>':
52 ic = reader.read();
53 if (ic == '=') {
54 tokenEntries.add(new TokenEntry(c + "=", sourceCode.getFileName(), line));
55 ic = reader.read();
56 } else if (ic == c) {
57 ic = reader.read();
58 if (ic == '=') {
59 tokenEntries.add(new TokenEntry(c + c + "=", sourceCode
60 .getFileName(), line));
61 ic = reader.read();
62 } else {
63 tokenEntries.add(new TokenEntry(String.valueOf(c) + c, sourceCode
64 .getFileName(), line));
65 }
66 } else {
67 tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
68 }
69 break;
70
71
72 case '=':
73 case '&':
74 case '|':
75 case '+':
76 case '-':
77 ic = reader.read();
78 if (ic == '=' || ic == c) {
79 tokenEntries.add(new TokenEntry(c + String.valueOf((char) ic), sourceCode
80 .getFileName(), line));
81 ic = reader.read();
82 } else {
83 tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
84 }
85 break;
86
87
88 case '!':
89 case '*':
90 case '%':
91 case '^':
92 case '~':
93 ic = reader.read();
94 if (ic == '=') {
95 tokenEntries.add(new TokenEntry(c + "=", sourceCode.getFileName(), line));
96 ic = reader.read();
97 } else {
98 tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
99 }
100 break;
101
102
103 case '"':
104 case '\'':
105 int beginLine = line;
106 b = new StringBuilder();
107 b.append(c);
108 while ((ic = reader.read()) != c) {
109 if (ic == -1) {
110 break;
111 }
112 b.append((char) ic);
113 if (ic == '\\') {
114 int next = reader.read();
115 if (next != -1) {
116 b.append((char) next);
117
118 if (next == '\n') {
119 line++;
120 }
121 }
122 } else if (ic == '\n') {
123 line++;
124 }
125 }
126 if (ic != -1) {
127 b.append((char) ic);
128 }
129 tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), beginLine));
130 ic = reader.read();
131 break;
132
133
134 case '/':
135 switch (c = (char) (ic = reader.read())) {
136 case '*':
137
138 int state = 1;
139 b = new StringBuilder();
140 b.append("/*");
141
142 while ((ic = reader.read()) != -1) {
143 c = (char) ic;
144 b.append(c);
145
146 if (c == '\n') {
147 line++;
148 }
149
150 if (state == 1) {
151 if (c == '*') {
152 state = 2;
153 }
154 } else {
155 if (c == '/') {
156 ic = reader.read();
157 break;
158 } else if (c != '*') {
159 state = 1;
160 }
161 }
162 }
163
164
165
166 break;
167
168 case '/':
169 b = new StringBuilder();
170 b.append("//");
171 while ((ic = reader.read()) != '\n') {
172 if (ic == -1) {
173 break;
174 }
175 b.append((char) ic);
176 }
177
178
179
180 break;
181
182 case '=':
183 tokenEntries.add(new TokenEntry("/=", sourceCode.getFileName(), line));
184 ic = reader.read();
185 break;
186
187 default:
188 tokenEntries.add(new TokenEntry("/", sourceCode.getFileName(), line));
189 break;
190 }
191 break;
192
193 default:
194
195 if (Character.isJavaIdentifierStart(c)) {
196 b = new StringBuilder();
197 do {
198 b.append(c);
199 c = (char) (ic = reader.read());
200 } while (Character.isJavaIdentifierPart(c));
201 tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), line));
202 }
203
204 else if (Character.isDigit(c) || c == '.') {
205 b = new StringBuilder();
206 do {
207 b.append(c);
208 if (c == 'e' || c == 'E') {
209 c = (char) (ic = reader.read());
210 if ("1234567890-".indexOf(c) == -1) {
211 break;
212 }
213 b.append(c);
214 }
215 c = (char) (ic = reader.read());
216 } while ("1234567890.iIlLfFdDsSuUeExX".indexOf(c) != -1);
217
218 tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), line));
219 }
220
221 else {
222 tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
223 ic = reader.read();
224 break;
225 }
226 }
227 }
228 } catch (IOException e) {
229 e.printStackTrace();
230 } finally {
231 IOUtils.closeQuietly(reader);
232 tokenEntries.add(TokenEntry.getEOF());
233 }
234 }
235 }