424 lines
12 KiB
Plaintext
424 lines
12 KiB
Plaintext
|
package slangc.parser;
|
||
|
|
||
|
public class Scan {
|
||
|
private final Language language;
|
||
|
private final Source source;
|
||
|
private final SourceEncoding encoding;
|
||
|
private Token[] tokens = new Token[1000];
|
||
|
int ntokens = 0;
|
||
|
|
||
|
public void reset() {
|
||
|
tokens = new Token[20];
|
||
|
ntokens = 0;
|
||
|
}
|
||
|
|
||
|
public Scan(Language language, Source source) {
|
||
|
this.language = language;
|
||
|
this.source = source;
|
||
|
encoding = source.getEncoding();
|
||
|
//Log.line("Encoding=" + source.getEncoding());
|
||
|
}
|
||
|
|
||
|
public Language getLanguage() {
|
||
|
return language;
|
||
|
}
|
||
|
|
||
|
public Source getSource() {
|
||
|
return source;
|
||
|
}
|
||
|
|
||
|
public int countTokens() {
|
||
|
return ntokens; //tokens.length;
|
||
|
}
|
||
|
|
||
|
public Token getToken(int index) {
|
||
|
if (index < 0 || index >= ntokens /*tokens.length*/) {
|
||
|
return null;
|
||
|
} else {
|
||
|
return tokens[index];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
public int scanAll() {
|
||
|
SourceWalker w = new SourceWalker(source);
|
||
|
Token t;
|
||
|
int i = 0;
|
||
|
do {
|
||
|
t = tokenAt(w);
|
||
|
append(t);
|
||
|
i++;
|
||
|
} while (!t.is(TokenType.END_OF_FILE));
|
||
|
return i;
|
||
|
}
|
||
|
|
||
|
boolean nullifyComments = false;
|
||
|
public int scanQuick() {
|
||
|
nullifyComments = true;
|
||
|
SourceWalker w = new SourceWalker(source);
|
||
|
Token t;
|
||
|
int i = 0;
|
||
|
do {
|
||
|
t = tokenAt(w);
|
||
|
if (t == null || t.is(TokenType.LINE_COMMENT) || t.is(TokenType.LONG_COMMENT)) {
|
||
|
// Ignore these
|
||
|
} else {
|
||
|
append(t);
|
||
|
i++;
|
||
|
}
|
||
|
} while (t == null || !t.is(TokenType.END_OF_FILE));
|
||
|
return i;
|
||
|
}
|
||
|
|
||
|
public int scanPedantic() {
|
||
|
nullifyComments = false;
|
||
|
SourceWalker w = new SourceWalker(source);
|
||
|
Token t;
|
||
|
Token precedingNonComment = null;
|
||
|
Token[] precedingComments = new Token[] {};
|
||
|
int i = 0;
|
||
|
do {
|
||
|
t = tokenAt(w);
|
||
|
if (t.is(TokenType.LINE_COMMENT) || t.is(TokenType.LONG_COMMENT)) {
|
||
|
if (precedingNonComment != null && precedingNonComment.getSnippet().getStart().getLineCount() == t.getSnippet().getStart().getLineCount()) {
|
||
|
precedingNonComment.annotate(new CommentAnnotation(t));
|
||
|
} else {
|
||
|
Token[] nprecoms = new Token[precedingComments.length + 1];
|
||
|
for (int j = 0; j < precedingComments.length; j++) {
|
||
|
nprecoms[j] = precedingComments[j];
|
||
|
}
|
||
|
nprecoms[nprecoms.length - 1] = t;
|
||
|
precedingComments = nprecoms;
|
||
|
}
|
||
|
} else {
|
||
|
precedingNonComment = t;
|
||
|
if (precedingComments.length != 0) {
|
||
|
for (int j = 0; j < precedingComments.length; j++) {
|
||
|
precedingNonComment.annotate(new CommentAnnotation(precedingComments[j]));
|
||
|
}
|
||
|
precedingComments = new Token[] {};
|
||
|
}
|
||
|
append(t);
|
||
|
i++;
|
||
|
}
|
||
|
} while (!t.is(TokenType.END_OF_FILE));
|
||
|
return i;
|
||
|
}
|
||
|
|
||
|
public void append(Token t) {
|
||
|
if (ntokens >= tokens.length) {
|
||
|
Token[] narr = new Token[tokens.length * 2 + 1];
|
||
|
for (int i = 0; i < tokens.length; i++) {
|
||
|
narr[i] = tokens[i];
|
||
|
}
|
||
|
tokens = narr;
|
||
|
}
|
||
|
tokens[ntokens] = t;
|
||
|
t.setOwner(this);
|
||
|
ntokens++;
|
||
|
/*
|
||
|
Token[] ntokens = new Token[tokens.length + 1];
|
||
|
for (int i = 0; i < tokens.length; i++) {
|
||
|
ntokens[i] = tokens[i];
|
||
|
}
|
||
|
ntokens[ntokens.length - 1] = t;
|
||
|
t.setOwner(this);
|
||
|
tokens = ntokens;
|
||
|
*/
|
||
|
}
|
||
|
|
||
|
public /*static */void skipSpaces(SourceWalker walker) {
|
||
|
while (!walker.isAtEnd() && encoding.isSpace(walker.getSource().getCharacter(walker.getIndex()))) {
|
||
|
walker.advance();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
public Token tokenAt(SourceWalker walker) {
|
||
|
Token t = innerTokenAt(walker);
|
||
|
if (t != null) {
|
||
|
t.setOwner(this);
|
||
|
}
|
||
|
return t;
|
||
|
}
|
||
|
public Token innerTokenAt(SourceWalker walker) {
|
||
|
//assert walker.getSource() == source;
|
||
|
//position = position.clone();
|
||
|
skipSpaces(walker);
|
||
|
if (walker.isAtEnd()) {
|
||
|
return new Token(TokenType.END_OF_FILE, new SourceSnippet(walker.getPosition(), 0));
|
||
|
}
|
||
|
|
||
|
if (source.matches(walker.getIndex(), language.getLineCommentPrefixes())) {
|
||
|
SourcePosition startPosition = walker.getPosition();
|
||
|
|
||
|
int i = 0;
|
||
|
while (!walker.isAtEnd() && !encoding.isNewline(walker.getSource().getCharacter(walker.getIndex()))) {
|
||
|
walker.advance();
|
||
|
i++;
|
||
|
}
|
||
|
|
||
|
if (nullifyComments) return null;
|
||
|
return new Token(TokenType.LINE_COMMENT, new SourceSnippet(startPosition, i));
|
||
|
} else if (source.matches(walker.getIndex(), language.getLongCommentTerminators())) {
|
||
|
SourcePosition startPosition = walker.getPosition();
|
||
|
String end = source.match(walker.getIndex(), language.getLongCommentTerminators());
|
||
|
|
||
|
int i = 0;
|
||
|
while (!walker.isAtEnd() && !source.matches(walker.getIndex(), end)) {
|
||
|
walker.advance();
|
||
|
i++;
|
||
|
}
|
||
|
|
||
|
boolean unterminated = false;
|
||
|
|
||
|
if (walker.isAtEnd()) {
|
||
|
unterminated = true;
|
||
|
} else {
|
||
|
walker.advance(end.intsLength());
|
||
|
i += end.intsLength();
|
||
|
}
|
||
|
if (nullifyComments && !unterminated) return null;
|
||
|
Token result = new Token(TokenType.LONG_COMMENT, new SourceSnippet(startPosition, i));
|
||
|
if (unterminated) {
|
||
|
result.annotate(ErrorType.UNTERMINATED_TOKEN);
|
||
|
}
|
||
|
return result;
|
||
|
} else if (source.matches(walker.getIndex(), language.getStringTerminators())) {
|
||
|
SourcePosition startPosition = walker.getPosition();
|
||
|
String end = source.match(walker.getIndex(), language.getStringTerminators());
|
||
|
|
||
|
walker.advance(); // Must read at least the quote or it will likely match as end too!
|
||
|
int i = 1;
|
||
|
|
||
|
while (!walker.isAtEnd() && !source.matches(walker.getIndex(), end) && !source.matches(walker.getIndex(), language.getStringForbiddenEscapes())) {
|
||
|
if (source.matches(walker.getIndex(), language.getStringEscapes())) {
|
||
|
walker.advance();
|
||
|
i++;
|
||
|
}
|
||
|
walker.advance();
|
||
|
i++;
|
||
|
}
|
||
|
|
||
|
boolean unterminated = false;
|
||
|
boolean badEscape = false;
|
||
|
|
||
|
if (walker.isAtEnd()) {
|
||
|
unterminated = true;
|
||
|
} else if (source.matches(walker.getIndex(), language.getStringForbiddenEscapes())) {
|
||
|
badEscape = true;
|
||
|
} else {
|
||
|
walker.advance(end.intsLength());
|
||
|
i += end.intsLength();
|
||
|
}
|
||
|
|
||
|
Token result = new Token(TokenType.STRING, new SourceSnippet(startPosition, i));
|
||
|
if (unterminated) {
|
||
|
result.annotate(ErrorType.UNTERMINATED_TOKEN);
|
||
|
}
|
||
|
if (badEscape) {
|
||
|
result.annotate(ErrorType.BAD_ESCAPE);
|
||
|
}
|
||
|
return result;
|
||
|
} else if (source.matches(walker.getIndex(), language.getCharTerminators())) {
|
||
|
SourcePosition startPosition = walker.getPosition();
|
||
|
String end = source.match(walker.getIndex(), language.getCharTerminators());
|
||
|
|
||
|
walker.advance(); // Must read at least the quote or it will likely match as end too!
|
||
|
int i = 1;
|
||
|
|
||
|
while (!walker.isAtEnd() && !source.matches(walker.getIndex(), end) /*&& !sourceFile.matches(walker.getIndex(), language.getStringForbiddenEscapes())*/) {
|
||
|
if (source.matches(walker.getIndex(), language.getStringEscapes())) {
|
||
|
walker.advance();
|
||
|
i++;
|
||
|
}
|
||
|
walker.advance();
|
||
|
i++;
|
||
|
}
|
||
|
|
||
|
boolean unterminated = false;
|
||
|
boolean badEscape = false;
|
||
|
|
||
|
if (walker.isAtEnd()) {
|
||
|
unterminated = true;
|
||
|
} else if (source.matches(walker.getIndex(), language.getCharForbiddenEscapes())) {
|
||
|
badEscape = true;
|
||
|
} else {
|
||
|
walker.advance(end.intsLength());
|
||
|
i += end.intsLength();
|
||
|
}
|
||
|
|
||
|
Token result = new Token(TokenType.CHAR, new SourceSnippet(startPosition, i));
|
||
|
if (unterminated) {
|
||
|
result.annotate(ErrorType.UNTERMINATED_TOKEN);
|
||
|
}
|
||
|
if (badEscape) {
|
||
|
result.annotate(ErrorType.BAD_ESCAPE);
|
||
|
}
|
||
|
return result;
|
||
|
} /*else if (sourceFile.matches(walker.getIndex(), language.getHexPrefixes())) {
|
||
|
SourcePosition startPosition = walker.getPosition();
|
||
|
|
||
|
String prefix = sourceFile.match(walker.getIndex(), language.getHexPrefixes());
|
||
|
|
||
|
int i = prefix.intsLength();
|
||
|
walker.advance(i);
|
||
|
|
||
|
while (encoding.isDigitOrUnderscore(sourceFile.getCharacter(walker.getIndex()), 16)) {
|
||
|
walker.advance();
|
||
|
i++;
|
||
|
}
|
||
|
|
||
|
while (encoding.isIntegerSuffix(sourceFile.getCharacter(walker.getIndex()))) {
|
||
|
walker.advance();
|
||
|
i++;
|
||
|
}
|
||
|
|
||
|
return new Token(TokenType.HEX_INTEGER, new SourceSnippet(startPosition, i));
|
||
|
}*/ else if (source.matches(walker.getIndex(), language.getHexPrefixes())
|
||
|
|| source.matches(walker.getIndex(), language.getBinPrefixes())
|
||
|
|| encoding.isDigit(source.getCharacter(walker.getIndex()))
|
||
|
// Also check obscure case where float starts with a dot
|
||
|
|| (encoding.isDecimalPoint(source.getCharacter(walker.getIndex())) && encoding.isDigit(source.getCharacter(walker.getIndex() + 1)))) {
|
||
|
SourcePosition startPosition = walker.getPosition();
|
||
|
String prefix = "";
|
||
|
int i = 0;
|
||
|
boolean isFloat = false;
|
||
|
int radix = 10;
|
||
|
|
||
|
if (source.matches(walker.getIndex(), language.getHexPrefixes())) {
|
||
|
prefix = source.match(walker.getIndex(), language.getHexPrefixes());
|
||
|
|
||
|
i = prefix.intsLength();
|
||
|
walker.advance(i);
|
||
|
|
||
|
radix = 16;
|
||
|
} else if (source.matches(walker.getIndex(), language.getBinPrefixes())) {
|
||
|
prefix = source.match(walker.getIndex(), language.getBinPrefixes());
|
||
|
|
||
|
i = prefix.intsLength();
|
||
|
walker.advance(i);
|
||
|
|
||
|
radix = 2;
|
||
|
} else {
|
||
|
if (encoding.isDecimalPoint(source.getCharacter(walker.getIndex()))) {
|
||
|
isFloat = true;
|
||
|
}
|
||
|
walker.advance();
|
||
|
i = 1;
|
||
|
}
|
||
|
|
||
|
while (encoding.isDigitOrUnderscore(source.getCharacter(walker.getIndex()), radix)) {
|
||
|
walker.advance();
|
||
|
i++;
|
||
|
}
|
||
|
|
||
|
if (encoding.isDecimalPoint(source.getCharacter(walker.getIndex()))) {
|
||
|
isFloat = true;
|
||
|
walker.advance();
|
||
|
i++;
|
||
|
while (encoding.isDigitOrUnderscore(source.getCharacter(walker.getIndex()), radix)) {
|
||
|
walker.advance();
|
||
|
i++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (encoding.isExponentSymbol(source.getCharacter(walker.getIndex()))) {
|
||
|
isFloat = true; // Allow floats to be made using exponent alone without decimal point
|
||
|
walker.advance();
|
||
|
i++;
|
||
|
if (encoding.isSign(source.getCharacter(walker.getIndex()))) {
|
||
|
walker.advance();
|
||
|
i++;
|
||
|
}
|
||
|
while (encoding.isDigitOrUnderscore(source.getCharacter(walker.getIndex()), radix)) {
|
||
|
walker.advance();
|
||
|
i++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
while (encoding.isFloatSuffix(source.getCharacter(walker.getIndex()))) {
|
||
|
isFloat = true; // Allow floats to be made just by adding a float suffix
|
||
|
walker.advance();
|
||
|
i++;
|
||
|
}
|
||
|
|
||
|
if (!isFloat) {
|
||
|
while (encoding.isIntegerSuffix(source.getCharacter(walker.getIndex()))) {
|
||
|
walker.advance();
|
||
|
i++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
TokenType t;
|
||
|
if (isFloat) {
|
||
|
switch(radix) {
|
||
|
case 2:
|
||
|
t = TokenType.BIN_FLOAT;
|
||
|
break;
|
||
|
case 10:
|
||
|
t = TokenType.DEC_FLOAT;
|
||
|
break;
|
||
|
case 16:
|
||
|
t = TokenType.HEX_FLOAT;
|
||
|
break;
|
||
|
default:
|
||
|
throw new Error("INTERNAL ERROR: Bad base number: " + radix);
|
||
|
}
|
||
|
} else {
|
||
|
switch(radix) {
|
||
|
case 2:
|
||
|
t = TokenType.BIN_INTEGER;
|
||
|
break;
|
||
|
case 10:
|
||
|
t = TokenType.DEC_INTEGER;
|
||
|
break;
|
||
|
case 16:
|
||
|
t = TokenType.HEX_INTEGER;
|
||
|
break;
|
||
|
default:
|
||
|
throw new Error("INTERNAL ERROR: Bad base number: " + radix);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return new Token(t, new SourceSnippet(startPosition, i));
|
||
|
} else if (source.matches(walker.getIndex(), language.getOperators())) {
|
||
|
SourcePosition startPosition = walker.getPosition();
|
||
|
|
||
|
String op = source.match(walker.getIndex(), language.getOperators());
|
||
|
|
||
|
walker.advance(op.intsLength());
|
||
|
|
||
|
return new Token(TokenType.OPERATOR, new SourceSnippet(startPosition, op.intsLength()));
|
||
|
} else if (encoding.isValidNameHead(source.getCharacter(walker.getIndex()))) {
|
||
|
SourcePosition startPosition = walker.getPosition();
|
||
|
|
||
|
walker.advance();
|
||
|
int i = 1;
|
||
|
|
||
|
int c;
|
||
|
|
||
|
while (((c = source.getCharacter(walker.getIndex())) >= 'a' && c <= 'z') || encoding.isValidNameTail(c)) {
|
||
|
walker.advance();
|
||
|
i++;
|
||
|
}
|
||
|
|
||
|
SourceSnippet s = new SourceSnippet(startPosition, i);
|
||
|
String n = s.getSource();
|
||
|
|
||
|
String[] kws = language.getKeywords();
|
||
|
for (int j = 0; j < kws.length; j++) {
|
||
|
if (n == kws[j]) {
|
||
|
return new Token(TokenType.KEYWORD, s);
|
||
|
}
|
||
|
}
|
||
|
return new Token(TokenType.NAME, s);
|
||
|
} else {
|
||
|
SourcePosition startPosition = walker.getPosition();
|
||
|
|
||
|
//throw new Error("Unexpected character #" + startPosition.getSource().getCharacter(startPosition.getIndex()));
|
||
|
|
||
|
walker.advance();
|
||
|
return new Token(TokenType.UNEXPECTED_CHARACTER, new SourceSnippet(startPosition, 1));
|
||
|
}
|
||
|
}
|
||
|
}
|