summaryrefslogtreecommitdiff
path: root/src/parser/Python3LexerBase.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/parser/Python3LexerBase.java')
-rw-r--r--src/parser/Python3LexerBase.java151
1 files changed, 151 insertions, 0 deletions
diff --git a/src/parser/Python3LexerBase.java b/src/parser/Python3LexerBase.java
new file mode 100644
index 0000000..e2246a3
--- /dev/null
+++ b/src/parser/Python3LexerBase.java
@@ -0,0 +1,151 @@
+package com.clp.project.parser;
+
+import java.util.ArrayDeque;
+import java.util.Deque;
+import org.antlr.v4.runtime.*;
+
+abstract class Python3LexerBase extends Lexer {
+ // A queue where extra tokens are pushed on (see the NEWLINE lexer rule).
+ private java.util.LinkedList<Token> tokens = new java.util.LinkedList<>();
+ // The stack that keeps track of the indentation level.
+ private Deque<Integer> indents = new ArrayDeque<>();
+ // The amount of opened braces, brackets and parenthesis.
+ private int opened = 0;
+ // The most recently produced token.
+ private Token lastToken = null;
+
+ protected Python3LexerBase(CharStream input) {
+ super(input);
+ }
+
+ @Override
+ public void emit(Token t) {
+ super.setToken(t);
+ tokens.offer(t);
+ }
+
+ @Override
+ public Token nextToken() {
+ // Check if the end-of-file is ahead and there are still some DEDENTS expected.
+ if (_input.LA(1) == EOF && !this.indents.isEmpty()) {
+ // Remove any trailing EOF tokens from our buffer.
+ for (int i = tokens.size() - 1; i >= 0; i--) {
+ if (tokens.get(i).getType() == EOF) {
+ tokens.remove(i);
+ }
+ }
+
+ // First emit an extra line break that serves as the end of the statement.
+ this.emit(commonToken(Python3Lexer.NEWLINE, "\n"));
+
+ // Now emit as much DEDENT tokens as needed.
+ while (!indents.isEmpty()) {
+ this.emit(createDedent());
+ indents.pop();
+ }
+
+ // Put the EOF back on the token stream.
+ this.emit(commonToken(Python3Lexer.EOF, "<EOF>"));
+ }
+
+ Token next = super.nextToken();
+
+ if (next.getChannel() == Token.DEFAULT_CHANNEL) {
+ // Keep track of the last token on the default channel.
+ this.lastToken = next;
+ }
+
+ return tokens.isEmpty() ? next : tokens.poll();
+ }
+
+ private Token createDedent() {
+ CommonToken dedent = commonToken(Python3Lexer.DEDENT, "");
+ dedent.setLine(this.lastToken.getLine());
+ return dedent;
+ }
+
+ private CommonToken commonToken(int type, String text) {
+ int stop = this.getCharIndex() - 1;
+ int start = text.isEmpty() ? stop : stop - text.length() + 1;
+ return new CommonToken(this._tokenFactorySourcePair, type, DEFAULT_TOKEN_CHANNEL, start, stop);
+ }
+
+ // Calculates the indentation of the provided spaces, taking the
+ // following rules into account:
+ //
+ // "Tabs are replaced (from left to right) by one to eight spaces
+ // such that the total number of characters up to and including
+ // the replacement is a multiple of eight [...]"
+ //
+ // -- https://docs.python.org/3.1/reference/lexical_analysis.html#indentation
+ static int getIndentationCount(String spaces) {
+ int count = 0;
+ for (char ch : spaces.toCharArray()) {
+ switch (ch) {
+ case '\t':
+ count += 8 - (count % 8);
+ break;
+ default:
+ // A normal space char.
+ count++;
+ }
+ }
+
+ return count;
+ }
+
+ boolean atStartOfInput() {
+ return super.getCharPositionInLine() == 0 && super.getLine() == 1;
+ }
+
+ void openBrace() {
+ this.opened++;
+ }
+
+ void closeBrace() {
+ this.opened--;
+ }
+
+ void onNewLine() {
+ String newLine = getText().replaceAll("[^\r\n\f]+", "");
+ String spaces = getText().replaceAll("[\r\n\f]+", "");
+
+ // Strip newlines inside open clauses except if we are near EOF. We keep
+ // NEWLINEs near EOF to
+ // satisfy the final newline needed by the single_put rule used by the REPL.
+ int next = _input.LA(1);
+ int nextnext = _input.LA(2);
+ if (opened > 0
+ || (nextnext != -1 && (next == '\r' || next == '\n' || next == '\f' || next == '#'))) {
+ // If we're inside a list or on a blank line, ignore all indents,
+ // dedents and line breaks.
+ skip();
+ } else {
+ emit(commonToken(Python3Lexer.NEWLINE, newLine));
+ int indent = getIndentationCount(spaces);
+ int previous = indents.isEmpty() ? 0 : indents.peek();
+ if (indent == previous) {
+ // skip indents of the same size as the present indent-size
+ skip();
+ } else if (indent > previous) {
+ indents.push(indent);
+ emit(commonToken(Python3Lexer.INDENT, spaces));
+ } else {
+ // Possibly emit more than 1 DEDENT token.
+ while (!indents.isEmpty() && indents.peek() > indent) {
+ this.emit(createDedent());
+ indents.pop();
+ }
+ }
+ }
+ }
+
+ @Override
+ public void reset() {
+ tokens = new java.util.LinkedList<>();
+ indents = new ArrayDeque<>();
+ opened = 0;
+ lastToken = null;
+ super.reset();
+ }
+}