summaryrefslogtreecommitdiff
path: root/src/Python3LexerBase.java
diff options
context:
space:
mode:
authorSanto Cariotti <santo@dcariotti.me>2024-06-04 14:11:32 +0200
committerGitHub <noreply@github.com>2024-06-04 14:11:32 +0200
commit1c8761901b26c0be4d61f3aed5ec0495a558a0e7 (patch)
treebe3177edeaf93cbd57e49f7067418bc8ef0dfdd3 /src/Python3LexerBase.java
parent663b99a971f8dd7f7776fe3647f24ce728b3d434 (diff)
Refactor using package `com.clp.project` (#1)
Co-authored-by: geno <gabriele.genovese2@studio.unibo.it>
Diffstat (limited to 'src/Python3LexerBase.java')
-rw-r--r--src/Python3LexerBase.java149
1 files changed, 0 insertions, 149 deletions
diff --git a/src/Python3LexerBase.java b/src/Python3LexerBase.java
deleted file mode 100644
index 407e624..0000000
--- a/src/Python3LexerBase.java
+++ /dev/null
@@ -1,149 +0,0 @@
-import java.util.ArrayDeque;
-import java.util.Deque;
-import org.antlr.v4.runtime.*;
-
-abstract class Python3LexerBase extends Lexer {
- // A queue where extra tokens are pushed on (see the NEWLINE lexer rule).
- private java.util.LinkedList<Token> tokens = new java.util.LinkedList<>();
- // The stack that keeps track of the indentation level.
- private Deque<Integer> indents = new ArrayDeque<>();
- // The amount of opened braces, brackets and parenthesis.
- private int opened = 0;
- // The most recently produced token.
- private Token lastToken = null;
-
- protected Python3LexerBase(CharStream input) {
- super(input);
- }
-
- @Override
- public void emit(Token t) {
- super.setToken(t);
- tokens.offer(t);
- }
-
- @Override
- public Token nextToken() {
- // Check if the end-of-file is ahead and there are still some DEDENTS expected.
- if (_input.LA(1) == EOF && !this.indents.isEmpty()) {
- // Remove any trailing EOF tokens from our buffer.
- for (int i = tokens.size() - 1; i >= 0; i--) {
- if (tokens.get(i).getType() == EOF) {
- tokens.remove(i);
- }
- }
-
- // First emit an extra line break that serves as the end of the statement.
- this.emit(commonToken(Python3Lexer.NEWLINE, "\n"));
-
- // Now emit as much DEDENT tokens as needed.
- while (!indents.isEmpty()) {
- this.emit(createDedent());
- indents.pop();
- }
-
- // Put the EOF back on the token stream.
- this.emit(commonToken(Python3Lexer.EOF, "<EOF>"));
- }
-
- Token next = super.nextToken();
-
- if (next.getChannel() == Token.DEFAULT_CHANNEL) {
- // Keep track of the last token on the default channel.
- this.lastToken = next;
- }
-
- return tokens.isEmpty() ? next : tokens.poll();
- }
-
- private Token createDedent() {
- CommonToken dedent = commonToken(Python3Lexer.DEDENT, "");
- dedent.setLine(this.lastToken.getLine());
- return dedent;
- }
-
- private CommonToken commonToken(int type, String text) {
- int stop = this.getCharIndex() - 1;
- int start = text.isEmpty() ? stop : stop - text.length() + 1;
- return new CommonToken(this._tokenFactorySourcePair, type, DEFAULT_TOKEN_CHANNEL, start, stop);
- }
-
- // Calculates the indentation of the provided spaces, taking the
- // following rules into account:
- //
- // "Tabs are replaced (from left to right) by one to eight spaces
- // such that the total number of characters up to and including
- // the replacement is a multiple of eight [...]"
- //
- // -- https://docs.python.org/3.1/reference/lexical_analysis.html#indentation
- static int getIndentationCount(String spaces) {
- int count = 0;
- for (char ch : spaces.toCharArray()) {
- switch (ch) {
- case '\t':
- count += 8 - (count % 8);
- break;
- default:
- // A normal space char.
- count++;
- }
- }
-
- return count;
- }
-
- boolean atStartOfInput() {
- return super.getCharPositionInLine() == 0 && super.getLine() == 1;
- }
-
- void openBrace() {
- this.opened++;
- }
-
- void closeBrace() {
- this.opened--;
- }
-
- void onNewLine() {
- String newLine = getText().replaceAll("[^\r\n\f]+", "");
- String spaces = getText().replaceAll("[\r\n\f]+", "");
-
- // Strip newlines inside open clauses except if we are near EOF. We keep
- // NEWLINEs near EOF to
- // satisfy the final newline needed by the single_put rule used by the REPL.
- int next = _input.LA(1);
- int nextnext = _input.LA(2);
- if (opened > 0
- || (nextnext != -1 && (next == '\r' || next == '\n' || next == '\f' || next == '#'))) {
- // If we're inside a list or on a blank line, ignore all indents,
- // dedents and line breaks.
- skip();
- } else {
- emit(commonToken(Python3Lexer.NEWLINE, newLine));
- int indent = getIndentationCount(spaces);
- int previous = indents.isEmpty() ? 0 : indents.peek();
- if (indent == previous) {
- // skip indents of the same size as the present indent-size
- skip();
- } else if (indent > previous) {
- indents.push(indent);
- emit(commonToken(Python3Lexer.INDENT, spaces));
- } else {
- // Possibly emit more than 1 DEDENT token.
- while (!indents.isEmpty() && indents.peek() > indent) {
- this.emit(createDedent());
- indents.pop();
- }
- }
- }
- }
-
- @Override
- public void reset() {
- tokens = new java.util.LinkedList<>();
- indents = new ArrayDeque<>();
- opened = 0;
- lastToken = null;
- super.reset();
- }
-}