1
2
3
4 package net.sourceforge.pmd.cpd;
5
6 import java.io.StringReader;
7 import java.util.Properties;
8
9 import net.sourceforge.pmd.lang.LanguageRegistry;
10 import net.sourceforge.pmd.lang.LanguageVersionHandler;
11 import net.sourceforge.pmd.lang.TokenManager;
12 import net.sourceforge.pmd.lang.java.JavaLanguageModule;
13 import net.sourceforge.pmd.lang.java.ast.JavaParserConstants;
14 import net.sourceforge.pmd.lang.java.ast.Token;
15
16 public class JavaTokenizer implements Tokenizer {
17
18 public static final String CPD_START = "\"CPD-START\"";
19 public static final String CPD_END = "\"CPD-END\"";
20
21 private boolean ignoreAnnotations;
22 private boolean ignoreLiterals;
23 private boolean ignoreIdentifiers;
24
25 public void setProperties(Properties properties) {
26 ignoreAnnotations = Boolean.parseBoolean(properties.getProperty(IGNORE_ANNOTATIONS, "false"));
27 ignoreLiterals = Boolean.parseBoolean(properties.getProperty(IGNORE_LITERALS, "false"));
28 ignoreIdentifiers = Boolean.parseBoolean(properties.getProperty(IGNORE_IDENTIFIERS, "false"));
29 }
30
31 public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
32 StringBuilder stringBuilder = sourceCode.getCodeBuffer();
33
34
35 LanguageVersionHandler languageVersionHandler = LanguageRegistry.getLanguage(JavaLanguageModule.NAME).getVersion("1.4").getLanguageVersionHandler();
36 String fileName = sourceCode.getFileName();
37 TokenManager tokenMgr = languageVersionHandler.getParser(languageVersionHandler.getDefaultParserOptions()).getTokenManager(
38 fileName, new StringReader(stringBuilder.toString()));
39 Token currentToken = (Token) tokenMgr.getNextToken();
40
41 TokenDiscarder discarder = new TokenDiscarder(ignoreAnnotations);
42
43 while (currentToken.image.length() > 0) {
44 discarder.updateState(currentToken);
45
46 if (discarder.isDiscarding()) {
47 currentToken = (Token) tokenMgr.getNextToken();
48 continue;
49 }
50
51 processToken(tokenEntries, fileName, currentToken);
52 currentToken = (Token) tokenMgr.getNextToken();
53 }
54 tokenEntries.add(TokenEntry.getEOF());
55 }
56
57 private void processToken(Tokens tokenEntries, String fileName, Token currentToken) {
58 String image = currentToken.image;
59 if (ignoreLiterals
60 && (currentToken.kind == JavaParserConstants.STRING_LITERAL
61 || currentToken.kind == JavaParserConstants.CHARACTER_LITERAL
62 || currentToken.kind == JavaParserConstants.DECIMAL_LITERAL
63 || currentToken.kind == JavaParserConstants.FLOATING_POINT_LITERAL)) {
64 image = String.valueOf(currentToken.kind);
65 }
66 if (ignoreIdentifiers && currentToken.kind == JavaParserConstants.IDENTIFIER) {
67 image = String.valueOf(currentToken.kind);
68 }
69 tokenEntries.add(new TokenEntry(image, fileName, currentToken.beginLine));
70 }
71
72 public void setIgnoreLiterals(boolean ignore) {
73 this.ignoreLiterals = ignore;
74 }
75
76 public void setIgnoreIdentifiers(boolean ignore) {
77 this.ignoreIdentifiers = ignore;
78 }
79
80 public void setIgnoreAnnotations(boolean ignoreAnnotations) {
81 this.ignoreAnnotations = ignoreAnnotations;
82 }
83
84
85
86
87
88
89
90
91
92
93 private static class TokenDiscarder {
94 private boolean isAnnotation = false;
95 private boolean nextTokenEndsAnnotation = false;
96 private int annotationStack = 0;
97
98 private boolean discardingSemicolon = false;
99 private boolean discardingKeywords = false;
100 private boolean discardingSuppressing = false;
101 private boolean discardingAnnotations = false;
102 private boolean ignoreAnnotations = false;
103
104 public TokenDiscarder(boolean ignoreAnnotations) {
105 this.ignoreAnnotations = ignoreAnnotations;
106 }
107
108 public void updateState(Token currentToken) {
109 detectAnnotations(currentToken);
110
111 skipSemicolon(currentToken);
112 skipPackageAndImport(currentToken);
113 skipCPDSuppression(currentToken);
114 if (ignoreAnnotations) {
115 skipAnnotations();
116 }
117 }
118
119 public void skipPackageAndImport(Token currentToken) {
120 if (currentToken.kind == JavaParserConstants.PACKAGE || currentToken.kind == JavaParserConstants.IMPORT) {
121 discardingKeywords = true;
122 } else if (discardingKeywords && currentToken.kind == JavaParserConstants.SEMICOLON) {
123 discardingKeywords = false;
124 }
125 }
126
127 public void skipSemicolon(Token currentToken) {
128 if (currentToken.kind == JavaParserConstants.SEMICOLON) {
129 discardingSemicolon = true;
130 } else if (discardingSemicolon && currentToken.kind != JavaParserConstants.SEMICOLON) {
131 discardingSemicolon = false;
132 }
133 }
134
135 public void skipCPDSuppression(Token currentToken) {
136
137 if (isAnnotation) {
138 if (!discardingSuppressing && currentToken.kind == JavaParserConstants.STRING_LITERAL && CPD_START.equals(currentToken.image)) {
139 discardingSuppressing = true;
140 } else if (discardingSuppressing && currentToken.kind == JavaParserConstants.STRING_LITERAL && CPD_END.equals(currentToken.image)) {
141 discardingSuppressing = false;
142 }
143 }
144 }
145
146 public void skipAnnotations() {
147 if (!discardingAnnotations && isAnnotation) {
148 discardingAnnotations = true;
149 } else if (discardingAnnotations && !isAnnotation) {
150 discardingAnnotations = false;
151 }
152 }
153
154 public boolean isDiscarding() {
155 boolean result = discardingSemicolon || discardingKeywords || discardingAnnotations || discardingSuppressing;
156 return result;
157 }
158
159 public void detectAnnotations(Token currentToken) {
160 if (isAnnotation && nextTokenEndsAnnotation) {
161 isAnnotation = false;
162 nextTokenEndsAnnotation = false;
163 }
164 if (isAnnotation) {
165 if (currentToken.kind == JavaParserConstants.LPAREN) {
166 annotationStack++;
167 } else if (currentToken.kind == JavaParserConstants.RPAREN) {
168 annotationStack--;
169 if (annotationStack == 0) {
170 nextTokenEndsAnnotation = true;
171 }
172 } else if (annotationStack == 0 && currentToken.kind != JavaParserConstants.IDENTIFIER && currentToken.kind != JavaParserConstants.LPAREN) {
173 isAnnotation = false;
174 }
175 }
176 if (currentToken.kind == JavaParserConstants.AT) {
177 isAnnotation = true;
178 }
179 }
180 }
181 }