1
2
3
4 package net.sourceforge.pmd.cpd;
5
6 import java.io.Reader;
7 import java.io.StringReader;
8
9 import net.sourceforge.pmd.lang.LanguageRegistry;
10 import net.sourceforge.pmd.lang.LanguageVersionHandler;
11 import net.sourceforge.pmd.lang.TokenManager;
12 import net.sourceforge.pmd.lang.ast.TokenMgrError;
13 import net.sourceforge.pmd.lang.python.PythonLanguageModule;
14 import net.sourceforge.pmd.lang.python.ast.Token;
15 import net.sourceforge.pmd.util.IOUtil;
16
17 import org.apache.commons.io.IOUtils;
18
19
20
21
22 public class PythonTokenizer implements Tokenizer {
23
24 @Override
25 public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
26 StringBuilder buffer = sourceCode.getCodeBuffer();
27 Reader reader = null;
28 try {
29 LanguageVersionHandler languageVersionHandler = LanguageRegistry.getLanguage(PythonLanguageModule.NAME)
30 .getDefaultVersion().getLanguageVersionHandler();
31 reader = new StringReader(buffer.toString());
32 reader = IOUtil.skipBOM(reader);
33 TokenManager tokenManager = languageVersionHandler.getParser(
34 languageVersionHandler.getDefaultParserOptions()).getTokenManager(sourceCode.getFileName(), reader);
35 Token currentToken = (Token) tokenManager.getNextToken();
36 while (currentToken.image.length() > 0) {
37 tokenEntries.add(new TokenEntry(currentToken.image, sourceCode.getFileName(), currentToken.beginLine));
38 currentToken = (Token) tokenManager.getNextToken();
39 }
40 tokenEntries.add(TokenEntry.getEOF());
41 System.err.println("Added " + sourceCode);
42 } catch (TokenMgrError err) {
43 err.printStackTrace();
44 System.err.println("Skipping " + sourceCode + " due to parse error");
45 tokenEntries.add(TokenEntry.getEOF());
46 } finally {
47 IOUtils.closeQuietly(reader);
48 }
49 }
50 }