1
2
3
4 package net.sourceforge.pmd.cpd;
5
6 import java.io.BufferedReader;
7 import java.io.CharArrayReader;
8 import java.util.NoSuchElementException;
9 import java.util.StringTokenizer;
10
11 import org.apache.commons.io.IOUtils;
12
13
14
15
16
17
18 public class AnyTokenizer implements Tokenizer {
19 public static final String TOKENS = " \t!#$%^&*(){}-=+<>/\\`~;:";
20
21 public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
22 StringBuilder sb = sourceCode.getCodeBuffer();
23 BufferedReader reader = new BufferedReader(new CharArrayReader(sb.toString().toCharArray()));
24 try {
25 int lineNumber = 1;
26 String line = reader.readLine();
27 while (line != null) {
28 StringTokenizer tokenizer = new StringTokenizer(line, TOKENS, true);
29 try {
30 String token = tokenizer.nextToken();
31 while (token != null) {
32 if (!token.equals(" ") && !token.equals("\t")) {
33 tokenEntries.add(new TokenEntry(token, sourceCode.getFileName(), lineNumber));
34 }
35 token = tokenizer.nextToken();
36 }
37 } catch (NoSuchElementException ex) {
38
39 }
40
41 line = reader.readLine();
42 lineNumber++;
43 }
44 } catch (Exception ex) {
45 ex.printStackTrace();
46 } finally {
47 IOUtils.closeQuietly(reader);
48 tokenEntries.add(TokenEntry.getEOF());
49 }
50 }
51 }