slcom/slangc/parser/Scan.sauce

424 lines
12 KiB
Plaintext

package slangc.parser;
public class Scan {
private final Language language;
private final Source source;
private final SourceEncoding encoding;
private Token[] tokens = new Token[1000];
int ntokens = 0;
public void reset() {
tokens = new Token[20];
ntokens = 0;
}
public Scan(Language language, Source source) {
this.language = language;
this.source = source;
encoding = source.getEncoding();
//Log.line("Encoding=" + source.getEncoding());
}
public Language getLanguage() {
return language;
}
public Source getSource() {
return source;
}
public int countTokens() {
return ntokens; //tokens.length;
}
public Token getToken(int index) {
if (index < 0 || index >= ntokens /*tokens.length*/) {
return null;
} else {
return tokens[index];
}
}
public int scanAll() {
SourceWalker w = new SourceWalker(source);
Token t;
int i = 0;
do {
t = tokenAt(w);
append(t);
i++;
} while (!t.is(TokenType.END_OF_FILE));
return i;
}
boolean nullifyComments = false;
public int scanQuick() {
nullifyComments = true;
SourceWalker w = new SourceWalker(source);
Token t;
int i = 0;
do {
t = tokenAt(w);
if (t == null || t.is(TokenType.LINE_COMMENT) || t.is(TokenType.LONG_COMMENT)) {
// Ignore these
} else {
append(t);
i++;
}
} while (t == null || !t.is(TokenType.END_OF_FILE));
return i;
}
public int scanPedantic() {
nullifyComments = false;
SourceWalker w = new SourceWalker(source);
Token t;
Token precedingNonComment = null;
Token[] precedingComments = new Token[] {};
int i = 0;
do {
t = tokenAt(w);
if (t.is(TokenType.LINE_COMMENT) || t.is(TokenType.LONG_COMMENT)) {
if (precedingNonComment != null && precedingNonComment.getSnippet().getStart().getLineCount() == t.getSnippet().getStart().getLineCount()) {
precedingNonComment.annotate(new CommentAnnotation(t));
} else {
Token[] nprecoms = new Token[precedingComments.length + 1];
for (int j = 0; j < precedingComments.length; j++) {
nprecoms[j] = precedingComments[j];
}
nprecoms[nprecoms.length - 1] = t;
precedingComments = nprecoms;
}
} else {
precedingNonComment = t;
if (precedingComments.length != 0) {
for (int j = 0; j < precedingComments.length; j++) {
precedingNonComment.annotate(new CommentAnnotation(precedingComments[j]));
}
precedingComments = new Token[] {};
}
append(t);
i++;
}
} while (!t.is(TokenType.END_OF_FILE));
return i;
}
public void append(Token t) {
if (ntokens >= tokens.length) {
Token[] narr = new Token[tokens.length * 2 + 1];
for (int i = 0; i < tokens.length; i++) {
narr[i] = tokens[i];
}
tokens = narr;
}
tokens[ntokens] = t;
t.setOwner(this);
ntokens++;
/*
Token[] ntokens = new Token[tokens.length + 1];
for (int i = 0; i < tokens.length; i++) {
ntokens[i] = tokens[i];
}
ntokens[ntokens.length - 1] = t;
t.setOwner(this);
tokens = ntokens;
*/
}
public /*static */void skipSpaces(SourceWalker walker) {
while (!walker.isAtEnd() && encoding.isSpace(walker.getSource().getCharacter(walker.getIndex()))) {
walker.advance();
}
}
public Token tokenAt(SourceWalker walker) {
Token t = innerTokenAt(walker);
if (t != null) {
t.setOwner(this);
}
return t;
}
public Token innerTokenAt(SourceWalker walker) {
//assert walker.getSource() == source;
//position = position.clone();
skipSpaces(walker);
if (walker.isAtEnd()) {
return new Token(TokenType.END_OF_FILE, new SourceSnippet(walker.getPosition(), 0));
}
if (source.matches(walker.getIndex(), language.getLineCommentPrefixes())) {
SourcePosition startPosition = walker.getPosition();
int i = 0;
while (!walker.isAtEnd() && !encoding.isNewline(walker.getSource().getCharacter(walker.getIndex()))) {
walker.advance();
i++;
}
if (nullifyComments) return null;
return new Token(TokenType.LINE_COMMENT, new SourceSnippet(startPosition, i));
} else if (source.matches(walker.getIndex(), language.getLongCommentTerminators())) {
SourcePosition startPosition = walker.getPosition();
String end = source.match(walker.getIndex(), language.getLongCommentTerminators());
int i = 0;
while (!walker.isAtEnd() && !source.matches(walker.getIndex(), end)) {
walker.advance();
i++;
}
boolean unterminated = false;
if (walker.isAtEnd()) {
unterminated = true;
} else {
walker.advance(end.intsLength());
i += end.intsLength();
}
if (nullifyComments && !unterminated) return null;
Token result = new Token(TokenType.LONG_COMMENT, new SourceSnippet(startPosition, i));
if (unterminated) {
result.annotate(ErrorType.UNTERMINATED_TOKEN);
}
return result;
} else if (source.matches(walker.getIndex(), language.getStringTerminators())) {
SourcePosition startPosition = walker.getPosition();
String end = source.match(walker.getIndex(), language.getStringTerminators());
walker.advance(); // Must read at least the quote or it will likely match as end too!
int i = 1;
while (!walker.isAtEnd() && !source.matches(walker.getIndex(), end) && !source.matches(walker.getIndex(), language.getStringForbiddenEscapes())) {
if (source.matches(walker.getIndex(), language.getStringEscapes())) {
walker.advance();
i++;
}
walker.advance();
i++;
}
boolean unterminated = false;
boolean badEscape = false;
if (walker.isAtEnd()) {
unterminated = true;
} else if (source.matches(walker.getIndex(), language.getStringForbiddenEscapes())) {
badEscape = true;
} else {
walker.advance(end.intsLength());
i += end.intsLength();
}
Token result = new Token(TokenType.STRING, new SourceSnippet(startPosition, i));
if (unterminated) {
result.annotate(ErrorType.UNTERMINATED_TOKEN);
}
if (badEscape) {
result.annotate(ErrorType.BAD_ESCAPE);
}
return result;
} else if (source.matches(walker.getIndex(), language.getCharTerminators())) {
SourcePosition startPosition = walker.getPosition();
String end = source.match(walker.getIndex(), language.getCharTerminators());
walker.advance(); // Must read at least the quote or it will likely match as end too!
int i = 1;
while (!walker.isAtEnd() && !source.matches(walker.getIndex(), end) /*&& !sourceFile.matches(walker.getIndex(), language.getStringForbiddenEscapes())*/) {
if (source.matches(walker.getIndex(), language.getStringEscapes())) {
walker.advance();
i++;
}
walker.advance();
i++;
}
boolean unterminated = false;
boolean badEscape = false;
if (walker.isAtEnd()) {
unterminated = true;
} else if (source.matches(walker.getIndex(), language.getCharForbiddenEscapes())) {
badEscape = true;
} else {
walker.advance(end.intsLength());
i += end.intsLength();
}
Token result = new Token(TokenType.CHAR, new SourceSnippet(startPosition, i));
if (unterminated) {
result.annotate(ErrorType.UNTERMINATED_TOKEN);
}
if (badEscape) {
result.annotate(ErrorType.BAD_ESCAPE);
}
return result;
} /*else if (sourceFile.matches(walker.getIndex(), language.getHexPrefixes())) {
SourcePosition startPosition = walker.getPosition();
String prefix = sourceFile.match(walker.getIndex(), language.getHexPrefixes());
int i = prefix.intsLength();
walker.advance(i);
while (encoding.isDigitOrUnderscore(sourceFile.getCharacter(walker.getIndex()), 16)) {
walker.advance();
i++;
}
while (encoding.isIntegerSuffix(sourceFile.getCharacter(walker.getIndex()))) {
walker.advance();
i++;
}
return new Token(TokenType.HEX_INTEGER, new SourceSnippet(startPosition, i));
}*/ else if (source.matches(walker.getIndex(), language.getHexPrefixes())
|| source.matches(walker.getIndex(), language.getBinPrefixes())
|| encoding.isDigit(source.getCharacter(walker.getIndex()))
// Also check obscure case where float starts with a dot
|| (encoding.isDecimalPoint(source.getCharacter(walker.getIndex())) && encoding.isDigit(source.getCharacter(walker.getIndex() + 1)))) {
SourcePosition startPosition = walker.getPosition();
String prefix = "";
int i = 0;
boolean isFloat = false;
int radix = 10;
if (source.matches(walker.getIndex(), language.getHexPrefixes())) {
prefix = source.match(walker.getIndex(), language.getHexPrefixes());
i = prefix.intsLength();
walker.advance(i);
radix = 16;
} else if (source.matches(walker.getIndex(), language.getBinPrefixes())) {
prefix = source.match(walker.getIndex(), language.getBinPrefixes());
i = prefix.intsLength();
walker.advance(i);
radix = 2;
} else {
if (encoding.isDecimalPoint(source.getCharacter(walker.getIndex()))) {
isFloat = true;
}
walker.advance();
i = 1;
}
while (encoding.isDigitOrUnderscore(source.getCharacter(walker.getIndex()), radix)) {
walker.advance();
i++;
}
if (encoding.isDecimalPoint(source.getCharacter(walker.getIndex()))) {
isFloat = true;
walker.advance();
i++;
while (encoding.isDigitOrUnderscore(source.getCharacter(walker.getIndex()), radix)) {
walker.advance();
i++;
}
}
if (encoding.isExponentSymbol(source.getCharacter(walker.getIndex()))) {
isFloat = true; // Allow floats to be made using exponent alone without decimal point
walker.advance();
i++;
if (encoding.isSign(source.getCharacter(walker.getIndex()))) {
walker.advance();
i++;
}
while (encoding.isDigitOrUnderscore(source.getCharacter(walker.getIndex()), radix)) {
walker.advance();
i++;
}
}
while (encoding.isFloatSuffix(source.getCharacter(walker.getIndex()))) {
isFloat = true; // Allow floats to be made just by adding a float suffix
walker.advance();
i++;
}
if (!isFloat) {
while (encoding.isIntegerSuffix(source.getCharacter(walker.getIndex()))) {
walker.advance();
i++;
}
}
TokenType t;
if (isFloat) {
switch(radix) {
case 2:
t = TokenType.BIN_FLOAT;
break;
case 10:
t = TokenType.DEC_FLOAT;
break;
case 16:
t = TokenType.HEX_FLOAT;
break;
default:
throw new Error("INTERNAL ERROR: Bad base number: " + radix);
}
} else {
switch(radix) {
case 2:
t = TokenType.BIN_INTEGER;
break;
case 10:
t = TokenType.DEC_INTEGER;
break;
case 16:
t = TokenType.HEX_INTEGER;
break;
default:
throw new Error("INTERNAL ERROR: Bad base number: " + radix);
}
}
return new Token(t, new SourceSnippet(startPosition, i));
} else if (source.matches(walker.getIndex(), language.getOperators())) {
SourcePosition startPosition = walker.getPosition();
String op = source.match(walker.getIndex(), language.getOperators());
walker.advance(op.intsLength());
return new Token(TokenType.OPERATOR, new SourceSnippet(startPosition, op.intsLength()));
} else if (encoding.isValidNameHead(source.getCharacter(walker.getIndex()))) {
SourcePosition startPosition = walker.getPosition();
walker.advance();
int i = 1;
int c;
while (((c = source.getCharacter(walker.getIndex())) >= 'a' && c <= 'z') || encoding.isValidNameTail(c)) {
walker.advance();
i++;
}
SourceSnippet s = new SourceSnippet(startPosition, i);
String n = s.getSource();
String[] kws = language.getKeywords();
for (int j = 0; j < kws.length; j++) {
if (n == kws[j]) {
return new Token(TokenType.KEYWORD, s);
}
}
return new Token(TokenType.NAME, s);
} else {
SourcePosition startPosition = walker.getPosition();
//throw new Error("Unexpected character #" + startPosition.getSource().getCharacter(startPosition.getIndex()));
walker.advance();
return new Token(TokenType.UNEXPECTED_CHARACTER, new SourceSnippet(startPosition, 1));
}
}
}