package slangc.parser; public class Scan { private final Language language; private final Source source; private final SourceEncoding encoding; private Token[] tokens = new Token[1000]; int ntokens = 0; public void reset() { tokens = new Token[20]; ntokens = 0; } public Scan(Language language, Source source) { this.language = language; this.source = source; encoding = source.getEncoding(); //Log.line("Encoding=" + source.getEncoding()); } public Language getLanguage() { return language; } public Source getSource() { return source; } public int countTokens() { return ntokens; //tokens.length; } public Token getToken(int index) { if (index < 0 || index >= ntokens /*tokens.length*/) { return null; } else { return tokens[index]; } } public int scanAll() { SourceWalker w = new SourceWalker(source); Token t; int i = 0; do { t = tokenAt(w); append(t); i++; } while (!t.is(TokenType.END_OF_FILE)); return i; } boolean nullifyComments = false; public int scanQuick() { nullifyComments = true; SourceWalker w = new SourceWalker(source); Token t; int i = 0; do { t = tokenAt(w); if (t == null || t.is(TokenType.LINE_COMMENT) || t.is(TokenType.LONG_COMMENT)) { // Ignore these } else { append(t); i++; } } while (t == null || !t.is(TokenType.END_OF_FILE)); return i; } public int scanPedantic() { nullifyComments = false; SourceWalker w = new SourceWalker(source); Token t; Token precedingNonComment = null; Token[] precedingComments = new Token[] {}; int i = 0; do { t = tokenAt(w); if (t.is(TokenType.LINE_COMMENT) || t.is(TokenType.LONG_COMMENT)) { if (precedingNonComment != null && precedingNonComment.getSnippet().getStart().getLineCount() == t.getSnippet().getStart().getLineCount()) { precedingNonComment.annotate(new CommentAnnotation(t)); } else { Token[] nprecoms = new Token[precedingComments.length + 1]; for (int j = 0; j < precedingComments.length; j++) { nprecoms[j] = precedingComments[j]; } nprecoms[nprecoms.length - 1] = t; precedingComments = nprecoms; } } else { precedingNonComment = t; if (precedingComments.length != 0) { for (int j = 0; j < precedingComments.length; j++) { precedingNonComment.annotate(new CommentAnnotation(precedingComments[j])); } precedingComments = new Token[] {}; } append(t); i++; } } while (!t.is(TokenType.END_OF_FILE)); return i; } public void append(Token t) { if (ntokens >= tokens.length) { Token[] narr = new Token[tokens.length * 2 + 1]; for (int i = 0; i < tokens.length; i++) { narr[i] = tokens[i]; } tokens = narr; } tokens[ntokens] = t; t.setOwner(this); ntokens++; /* Token[] ntokens = new Token[tokens.length + 1]; for (int i = 0; i < tokens.length; i++) { ntokens[i] = tokens[i]; } ntokens[ntokens.length - 1] = t; t.setOwner(this); tokens = ntokens; */ } public /*static */void skipSpaces(SourceWalker walker) { while (!walker.isAtEnd() && encoding.isSpace(walker.getSource().getCharacter(walker.getIndex()))) { walker.advance(); } } public Token tokenAt(SourceWalker walker) { Token t = innerTokenAt(walker); if (t != null) { t.setOwner(this); } return t; } public Token innerTokenAt(SourceWalker walker) { //assert walker.getSource() == source; //position = position.clone(); skipSpaces(walker); if (walker.isAtEnd()) { return new Token(TokenType.END_OF_FILE, new SourceSnippet(walker.getPosition(), 0)); } if (source.matches(walker.getIndex(), language.getLineCommentPrefixes())) { SourcePosition startPosition = walker.getPosition(); int i = 0; while (!walker.isAtEnd() && !encoding.isNewline(walker.getSource().getCharacter(walker.getIndex()))) { walker.advance(); i++; } if (nullifyComments) return null; return new Token(TokenType.LINE_COMMENT, new SourceSnippet(startPosition, i)); } else if (source.matches(walker.getIndex(), language.getLongCommentTerminators())) { SourcePosition startPosition = walker.getPosition(); String end = source.match(walker.getIndex(), language.getLongCommentTerminators()); int i = 0; while (!walker.isAtEnd() && !source.matches(walker.getIndex(), end)) { walker.advance(); i++; } boolean unterminated = false; if (walker.isAtEnd()) { unterminated = true; } else { walker.advance(end.intsLength()); i += end.intsLength(); } if (nullifyComments && !unterminated) return null; Token result = new Token(TokenType.LONG_COMMENT, new SourceSnippet(startPosition, i)); if (unterminated) { result.annotate(ErrorType.UNTERMINATED_TOKEN); } return result; } else if (source.matches(walker.getIndex(), language.getStringTerminators())) { SourcePosition startPosition = walker.getPosition(); String end = source.match(walker.getIndex(), language.getStringTerminators()); walker.advance(); // Must read at least the quote or it will likely match as end too! int i = 1; while (!walker.isAtEnd() && !source.matches(walker.getIndex(), end) && !source.matches(walker.getIndex(), language.getStringForbiddenEscapes())) { if (source.matches(walker.getIndex(), language.getStringEscapes())) { walker.advance(); i++; } walker.advance(); i++; } boolean unterminated = false; boolean badEscape = false; if (walker.isAtEnd()) { unterminated = true; } else if (source.matches(walker.getIndex(), language.getStringForbiddenEscapes())) { badEscape = true; } else { walker.advance(end.intsLength()); i += end.intsLength(); } Token result = new Token(TokenType.STRING, new SourceSnippet(startPosition, i)); if (unterminated) { result.annotate(ErrorType.UNTERMINATED_TOKEN); } if (badEscape) { result.annotate(ErrorType.BAD_ESCAPE); } return result; } else if (source.matches(walker.getIndex(), language.getCharTerminators())) { SourcePosition startPosition = walker.getPosition(); String end = source.match(walker.getIndex(), language.getCharTerminators()); walker.advance(); // Must read at least the quote or it will likely match as end too! int i = 1; while (!walker.isAtEnd() && !source.matches(walker.getIndex(), end) /*&& !sourceFile.matches(walker.getIndex(), language.getStringForbiddenEscapes())*/) { if (source.matches(walker.getIndex(), language.getStringEscapes())) { walker.advance(); i++; } walker.advance(); i++; } boolean unterminated = false; boolean badEscape = false; if (walker.isAtEnd()) { unterminated = true; } else if (source.matches(walker.getIndex(), language.getCharForbiddenEscapes())) { badEscape = true; } else { walker.advance(end.intsLength()); i += end.intsLength(); } Token result = new Token(TokenType.CHAR, new SourceSnippet(startPosition, i)); if (unterminated) { result.annotate(ErrorType.UNTERMINATED_TOKEN); } if (badEscape) { result.annotate(ErrorType.BAD_ESCAPE); } return result; } /*else if (sourceFile.matches(walker.getIndex(), language.getHexPrefixes())) { SourcePosition startPosition = walker.getPosition(); String prefix = sourceFile.match(walker.getIndex(), language.getHexPrefixes()); int i = prefix.intsLength(); walker.advance(i); while (encoding.isDigitOrUnderscore(sourceFile.getCharacter(walker.getIndex()), 16)) { walker.advance(); i++; } while (encoding.isIntegerSuffix(sourceFile.getCharacter(walker.getIndex()))) { walker.advance(); i++; } return new Token(TokenType.HEX_INTEGER, new SourceSnippet(startPosition, i)); }*/ else if (source.matches(walker.getIndex(), language.getHexPrefixes()) || source.matches(walker.getIndex(), language.getBinPrefixes()) || encoding.isDigit(source.getCharacter(walker.getIndex())) // Also check obscure case where float starts with a dot || (encoding.isDecimalPoint(source.getCharacter(walker.getIndex())) && encoding.isDigit(source.getCharacter(walker.getIndex() + 1)))) { SourcePosition startPosition = walker.getPosition(); String prefix = ""; int i = 0; boolean isFloat = false; int radix = 10; if (source.matches(walker.getIndex(), language.getHexPrefixes())) { prefix = source.match(walker.getIndex(), language.getHexPrefixes()); i = prefix.intsLength(); walker.advance(i); radix = 16; } else if (source.matches(walker.getIndex(), language.getBinPrefixes())) { prefix = source.match(walker.getIndex(), language.getBinPrefixes()); i = prefix.intsLength(); walker.advance(i); radix = 2; } else { if (encoding.isDecimalPoint(source.getCharacter(walker.getIndex()))) { isFloat = true; } walker.advance(); i = 1; } while (encoding.isDigitOrUnderscore(source.getCharacter(walker.getIndex()), radix)) { walker.advance(); i++; } if (encoding.isDecimalPoint(source.getCharacter(walker.getIndex()))) { isFloat = true; walker.advance(); i++; while (encoding.isDigitOrUnderscore(source.getCharacter(walker.getIndex()), radix)) { walker.advance(); i++; } } if (encoding.isExponentSymbol(source.getCharacter(walker.getIndex()))) { isFloat = true; // Allow floats to be made using exponent alone without decimal point walker.advance(); i++; if (encoding.isSign(source.getCharacter(walker.getIndex()))) { walker.advance(); i++; } while (encoding.isDigitOrUnderscore(source.getCharacter(walker.getIndex()), radix)) { walker.advance(); i++; } } while (encoding.isFloatSuffix(source.getCharacter(walker.getIndex()))) { isFloat = true; // Allow floats to be made just by adding a float suffix walker.advance(); i++; } if (!isFloat) { while (encoding.isIntegerSuffix(source.getCharacter(walker.getIndex()))) { walker.advance(); i++; } } TokenType t; if (isFloat) { switch(radix) { case 2: t = TokenType.BIN_FLOAT; break; case 10: t = TokenType.DEC_FLOAT; break; case 16: t = TokenType.HEX_FLOAT; break; default: throw new Error("INTERNAL ERROR: Bad base number: " + radix); } } else { switch(radix) { case 2: t = TokenType.BIN_INTEGER; break; case 10: t = TokenType.DEC_INTEGER; break; case 16: t = TokenType.HEX_INTEGER; break; default: throw new Error("INTERNAL ERROR: Bad base number: " + radix); } } return new Token(t, new SourceSnippet(startPosition, i)); } else if (source.matches(walker.getIndex(), language.getOperators())) { SourcePosition startPosition = walker.getPosition(); String op = source.match(walker.getIndex(), language.getOperators()); walker.advance(op.intsLength()); return new Token(TokenType.OPERATOR, new SourceSnippet(startPosition, op.intsLength())); } else if (encoding.isValidNameHead(source.getCharacter(walker.getIndex()))) { SourcePosition startPosition = walker.getPosition(); walker.advance(); int i = 1; int c; while (((c = source.getCharacter(walker.getIndex())) >= 'a' && c <= 'z') || encoding.isValidNameTail(c)) { walker.advance(); i++; } SourceSnippet s = new SourceSnippet(startPosition, i); String n = s.getSource(); String[] kws = language.getKeywords(); for (int j = 0; j < kws.length; j++) { if (n == kws[j]) { return new Token(TokenType.KEYWORD, s); } } return new Token(TokenType.NAME, s); } else { SourcePosition startPosition = walker.getPosition(); //throw new Error("Unexpected character #" + startPosition.getSource().getCharacter(startPosition.getIndex())); walker.advance(); return new Token(TokenType.UNEXPECTED_CHARACTER, new SourceSnippet(startPosition, 1)); } } }