diff options
author | Santo Cariotti <santo@dcariotti.me> | 2024-06-04 14:11:32 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-04 14:11:32 +0200 |
commit | 1c8761901b26c0be4d61f3aed5ec0495a558a0e7 (patch) | |
tree | be3177edeaf93cbd57e49f7067418bc8ef0dfdd3 /src/Python3LexerBase.java | |
parent | 663b99a971f8dd7f7776fe3647f24ce728b3d434 (diff) |
Refactor using package `com.clp.project` (#1)
Co-authored-by: geno <gabriele.genovese2@studio.unibo.it>
Diffstat (limited to 'src/Python3LexerBase.java')
-rw-r--r-- | src/Python3LexerBase.java | 149 |
1 files changed, 0 insertions, 149 deletions
diff --git a/src/Python3LexerBase.java b/src/Python3LexerBase.java deleted file mode 100644 index 407e624..0000000 --- a/src/Python3LexerBase.java +++ /dev/null @@ -1,149 +0,0 @@ -import java.util.ArrayDeque; -import java.util.Deque; -import org.antlr.v4.runtime.*; - -abstract class Python3LexerBase extends Lexer { - // A queue where extra tokens are pushed on (see the NEWLINE lexer rule). - private java.util.LinkedList<Token> tokens = new java.util.LinkedList<>(); - // The stack that keeps track of the indentation level. - private Deque<Integer> indents = new ArrayDeque<>(); - // The amount of opened braces, brackets and parenthesis. - private int opened = 0; - // The most recently produced token. - private Token lastToken = null; - - protected Python3LexerBase(CharStream input) { - super(input); - } - - @Override - public void emit(Token t) { - super.setToken(t); - tokens.offer(t); - } - - @Override - public Token nextToken() { - // Check if the end-of-file is ahead and there are still some DEDENTS expected. - if (_input.LA(1) == EOF && !this.indents.isEmpty()) { - // Remove any trailing EOF tokens from our buffer. - for (int i = tokens.size() - 1; i >= 0; i--) { - if (tokens.get(i).getType() == EOF) { - tokens.remove(i); - } - } - - // First emit an extra line break that serves as the end of the statement. - this.emit(commonToken(Python3Lexer.NEWLINE, "\n")); - - // Now emit as much DEDENT tokens as needed. - while (!indents.isEmpty()) { - this.emit(createDedent()); - indents.pop(); - } - - // Put the EOF back on the token stream. - this.emit(commonToken(Python3Lexer.EOF, "<EOF>")); - } - - Token next = super.nextToken(); - - if (next.getChannel() == Token.DEFAULT_CHANNEL) { - // Keep track of the last token on the default channel. - this.lastToken = next; - } - - return tokens.isEmpty() ? next : tokens.poll(); - } - - private Token createDedent() { - CommonToken dedent = commonToken(Python3Lexer.DEDENT, ""); - dedent.setLine(this.lastToken.getLine()); - return dedent; - } - - private CommonToken commonToken(int type, String text) { - int stop = this.getCharIndex() - 1; - int start = text.isEmpty() ? stop : stop - text.length() + 1; - return new CommonToken(this._tokenFactorySourcePair, type, DEFAULT_TOKEN_CHANNEL, start, stop); - } - - // Calculates the indentation of the provided spaces, taking the - // following rules into account: - // - // "Tabs are replaced (from left to right) by one to eight spaces - // such that the total number of characters up to and including - // the replacement is a multiple of eight [...]" - // - // -- https://docs.python.org/3.1/reference/lexical_analysis.html#indentation - static int getIndentationCount(String spaces) { - int count = 0; - for (char ch : spaces.toCharArray()) { - switch (ch) { - case '\t': - count += 8 - (count % 8); - break; - default: - // A normal space char. - count++; - } - } - - return count; - } - - boolean atStartOfInput() { - return super.getCharPositionInLine() == 0 && super.getLine() == 1; - } - - void openBrace() { - this.opened++; - } - - void closeBrace() { - this.opened--; - } - - void onNewLine() { - String newLine = getText().replaceAll("[^\r\n\f]+", ""); - String spaces = getText().replaceAll("[\r\n\f]+", ""); - - // Strip newlines inside open clauses except if we are near EOF. We keep - // NEWLINEs near EOF to - // satisfy the final newline needed by the single_put rule used by the REPL. - int next = _input.LA(1); - int nextnext = _input.LA(2); - if (opened > 0 - || (nextnext != -1 && (next == '\r' || next == '\n' || next == '\f' || next == '#'))) { - // If we're inside a list or on a blank line, ignore all indents, - // dedents and line breaks. - skip(); - } else { - emit(commonToken(Python3Lexer.NEWLINE, newLine)); - int indent = getIndentationCount(spaces); - int previous = indents.isEmpty() ? 0 : indents.peek(); - if (indent == previous) { - // skip indents of the same size as the present indent-size - skip(); - } else if (indent > previous) { - indents.push(indent); - emit(commonToken(Python3Lexer.INDENT, spaces)); - } else { - // Possibly emit more than 1 DEDENT token. - while (!indents.isEmpty() && indents.peek() > indent) { - this.emit(createDedent()); - indents.pop(); - } - } - } - } - - @Override - public void reset() { - tokens = new java.util.LinkedList<>(); - indents = new ArrayDeque<>(); - opened = 0; - lastToken = null; - super.reset(); - } -} |