1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
|
package com.clp.project.parser;
import java.util.ArrayDeque;
import java.util.Deque;
import org.antlr.v4.runtime.*;
abstract class Python3LexerBase extends Lexer {
// A queue where extra tokens are pushed on (see the NEWLINE lexer rule).
private java.util.LinkedList<Token> tokens = new java.util.LinkedList<>();
// The stack that keeps track of the indentation level.
private Deque<Integer> indents = new ArrayDeque<>();
// The amount of opened braces, brackets and parenthesis.
private int opened = 0;
// The most recently produced token.
private Token lastToken = null;
protected Python3LexerBase(CharStream input) {
super(input);
}
@Override
public void emit(Token t) {
super.setToken(t);
tokens.offer(t);
}
@Override
public Token nextToken() {
// Check if the end-of-file is ahead and there are still some DEDENTS expected.
if (_input.LA(1) == EOF && !this.indents.isEmpty()) {
// Remove any trailing EOF tokens from our buffer.
for (int i = tokens.size() - 1; i >= 0; i--) {
if (tokens.get(i).getType() == EOF) {
tokens.remove(i);
}
}
// First emit an extra line break that serves as the end of the statement.
this.emit(commonToken(Python3Lexer.NEWLINE, "\n"));
// Now emit as much DEDENT tokens as needed.
while (!indents.isEmpty()) {
this.emit(createDedent());
indents.pop();
}
// Put the EOF back on the token stream.
this.emit(commonToken(Python3Lexer.EOF, "<EOF>"));
}
Token next = super.nextToken();
if (next.getChannel() == Token.DEFAULT_CHANNEL) {
// Keep track of the last token on the default channel.
this.lastToken = next;
}
return tokens.isEmpty() ? next : tokens.poll();
}
private Token createDedent() {
CommonToken dedent = commonToken(Python3Lexer.DEDENT, "");
dedent.setLine(this.lastToken.getLine());
return dedent;
}
private CommonToken commonToken(int type, String text) {
int stop = this.getCharIndex() - 1;
int start = text.isEmpty() ? stop : stop - text.length() + 1;
return new CommonToken(this._tokenFactorySourcePair, type, DEFAULT_TOKEN_CHANNEL, start, stop);
}
// Calculates the indentation of the provided spaces, taking the
// following rules into account:
//
// "Tabs are replaced (from left to right) by one to eight spaces
// such that the total number of characters up to and including
// the replacement is a multiple of eight [...]"
//
// -- https://docs.python.org/3.1/reference/lexical_analysis.html#indentation
static int getIndentationCount(String spaces) {
int count = 0;
for (char ch : spaces.toCharArray()) {
switch (ch) {
case '\t':
count += 8 - (count % 8);
break;
default:
// A normal space char.
count++;
}
}
return count;
}
boolean atStartOfInput() {
return super.getCharPositionInLine() == 0 && super.getLine() == 1;
}
void openBrace() {
this.opened++;
}
void closeBrace() {
this.opened--;
}
void onNewLine() {
String newLine = getText().replaceAll("[^\r\n\f]+", "");
String spaces = getText().replaceAll("[\r\n\f]+", "");
// Strip newlines inside open clauses except if we are near EOF. We keep
// NEWLINEs near EOF to
// satisfy the final newline needed by the single_put rule used by the REPL.
int next = _input.LA(1);
int nextnext = _input.LA(2);
if (opened > 0
|| (nextnext != -1 && (next == '\r' || next == '\n' || next == '\f' || next == '#'))) {
// If we're inside a list or on a blank line, ignore all indents,
// dedents and line breaks.
skip();
} else {
emit(commonToken(Python3Lexer.NEWLINE, newLine));
int indent = getIndentationCount(spaces);
int previous = indents.isEmpty() ? 0 : indents.peek();
if (indent == previous) {
// skip indents of the same size as the present indent-size
skip();
} else if (indent > previous) {
indents.push(indent);
emit(commonToken(Python3Lexer.INDENT, spaces));
} else {
// Possibly emit more than 1 DEDENT token.
while (!indents.isEmpty() && indents.peek() > indent) {
this.emit(createDedent());
indents.pop();
}
}
}
}
@Override
public void reset() {
tokens = new java.util.LinkedList<>();
indents = new ArrayDeque<>();
opened = 0;
lastToken = null;
super.reset();
}
}
|