diff options
Diffstat (limited to 'src/parser/Python3LexerBase.java')
-rw-r--r-- | src/parser/Python3LexerBase.java | 151 |
1 files changed, 151 insertions, 0 deletions
diff --git a/src/parser/Python3LexerBase.java b/src/parser/Python3LexerBase.java new file mode 100644 index 0000000..e2246a3 --- /dev/null +++ b/src/parser/Python3LexerBase.java @@ -0,0 +1,151 @@ +package com.clp.project.parser; + +import java.util.ArrayDeque; +import java.util.Deque; +import org.antlr.v4.runtime.*; + +abstract class Python3LexerBase extends Lexer { + // A queue where extra tokens are pushed on (see the NEWLINE lexer rule). + private java.util.LinkedList<Token> tokens = new java.util.LinkedList<>(); + // The stack that keeps track of the indentation level. + private Deque<Integer> indents = new ArrayDeque<>(); + // The amount of opened braces, brackets and parenthesis. + private int opened = 0; + // The most recently produced token. + private Token lastToken = null; + + protected Python3LexerBase(CharStream input) { + super(input); + } + + @Override + public void emit(Token t) { + super.setToken(t); + tokens.offer(t); + } + + @Override + public Token nextToken() { + // Check if the end-of-file is ahead and there are still some DEDENTS expected. + if (_input.LA(1) == EOF && !this.indents.isEmpty()) { + // Remove any trailing EOF tokens from our buffer. + for (int i = tokens.size() - 1; i >= 0; i--) { + if (tokens.get(i).getType() == EOF) { + tokens.remove(i); + } + } + + // First emit an extra line break that serves as the end of the statement. + this.emit(commonToken(Python3Lexer.NEWLINE, "\n")); + + // Now emit as much DEDENT tokens as needed. + while (!indents.isEmpty()) { + this.emit(createDedent()); + indents.pop(); + } + + // Put the EOF back on the token stream. + this.emit(commonToken(Python3Lexer.EOF, "<EOF>")); + } + + Token next = super.nextToken(); + + if (next.getChannel() == Token.DEFAULT_CHANNEL) { + // Keep track of the last token on the default channel. + this.lastToken = next; + } + + return tokens.isEmpty() ? next : tokens.poll(); + } + + private Token createDedent() { + CommonToken dedent = commonToken(Python3Lexer.DEDENT, ""); + dedent.setLine(this.lastToken.getLine()); + return dedent; + } + + private CommonToken commonToken(int type, String text) { + int stop = this.getCharIndex() - 1; + int start = text.isEmpty() ? stop : stop - text.length() + 1; + return new CommonToken(this._tokenFactorySourcePair, type, DEFAULT_TOKEN_CHANNEL, start, stop); + } + + // Calculates the indentation of the provided spaces, taking the + // following rules into account: + // + // "Tabs are replaced (from left to right) by one to eight spaces + // such that the total number of characters up to and including + // the replacement is a multiple of eight [...]" + // + // -- https://docs.python.org/3.1/reference/lexical_analysis.html#indentation + static int getIndentationCount(String spaces) { + int count = 0; + for (char ch : spaces.toCharArray()) { + switch (ch) { + case '\t': + count += 8 - (count % 8); + break; + default: + // A normal space char. + count++; + } + } + + return count; + } + + boolean atStartOfInput() { + return super.getCharPositionInLine() == 0 && super.getLine() == 1; + } + + void openBrace() { + this.opened++; + } + + void closeBrace() { + this.opened--; + } + + void onNewLine() { + String newLine = getText().replaceAll("[^\r\n\f]+", ""); + String spaces = getText().replaceAll("[\r\n\f]+", ""); + + // Strip newlines inside open clauses except if we are near EOF. We keep + // NEWLINEs near EOF to + // satisfy the final newline needed by the single_put rule used by the REPL. + int next = _input.LA(1); + int nextnext = _input.LA(2); + if (opened > 0 + || (nextnext != -1 && (next == '\r' || next == '\n' || next == '\f' || next == '#'))) { + // If we're inside a list or on a blank line, ignore all indents, + // dedents and line breaks. + skip(); + } else { + emit(commonToken(Python3Lexer.NEWLINE, newLine)); + int indent = getIndentationCount(spaces); + int previous = indents.isEmpty() ? 0 : indents.peek(); + if (indent == previous) { + // skip indents of the same size as the present indent-size + skip(); + } else if (indent > previous) { + indents.push(indent); + emit(commonToken(Python3Lexer.INDENT, spaces)); + } else { + // Possibly emit more than 1 DEDENT token. + while (!indents.isEmpty() && indents.peek() > indent) { + this.emit(createDedent()); + indents.pop(); + } + } + } + } + + @Override + public void reset() { + tokens = new java.util.LinkedList<>(); + indents = new ArrayDeque<>(); + opened = 0; + lastToken = null; + super.reset(); + } +} |