Initial commit of main compiler sources (or should I say ... SAUCES!)

This commit is contained in:
2025-06-08 23:58:21 +10:00
parent 60c566025c
commit 06f2613083
214 changed files with 22210 additions and 0 deletions

View File

@@ -0,0 +1,15 @@
package slangc.parser;
public abstract class Annotation extends Node {
public Annotation() {
// TODO Auto-generated constructor stub
}
@Override
public final NodeType getNodeType() {
return NodeType.ANNOTATION;
}
public abstract AnnotationType getAnnotationType();
}

View File

@@ -0,0 +1,9 @@
package slangc.parser;
public enum AnnotationType {
ERROR,
WARNING,
NOTE,
COMMENT,
LOCATION
}

122
slangc/parser/Branch.sauce Normal file
View File

@@ -0,0 +1,122 @@
package slangc.parser;
import slang.streams.SyncOutput;
import slang.streams.SyncInput;
public final class Branch extends Node {
private NodeType type;
private Node[] subnodes;
public Branch(NodeType type, Node... subnodes ) {
this.type = type;
this.subnodes = subnodes;
}
Branch(NodeType type, int nnodes) {
this.type = type;
subnodes = new Node[nnodes];
}
@Override
public NodeType getNodeType() {
return type;
}
private static byte[] outbuffer = new byte[7];
public static Branch loadBinary(Scan sc, NodeType typ, SyncInput<byte> inp) {
inp.readBuffer(outbuffer, 0, 1);
int len = ((int) outbuffer[0]) & 0xFF;
if (len == 255) {
inp.readBuffer(outbuffer, 0, 4);
int len = Token.decInt(outbuffer, 0);
//Log.line("Type " + typ.name() + " length " + len);
if (len < 0 || len > 1000) {
Log.line("WARNING: Unusual length in binary cache");
}
}
Branch b = new Branch(typ, len);
for (int i = 0; i < len; i++) {
b.subnodes[i] = Node.loadBinary(sc,inp);
}
return b;
}
public void dumpBinary(SyncOutput<byte> o) {
if (type.value > 1000) {
throw new Error("Too many node types for this simple format!");
}
outbuffer[0] = (byte) (type.value >> 8);
outbuffer[1] = (byte) (type.value);
if (subnodes.length > 200) {
outbuffer[2] = (byte) 255;
Token.encInt(outbuffer, 3, subnodes.length);
o.writeBuffer(outbuffer, 0, 7);
} else {
outbuffer[2] = (byte) subnodes.length;
o.writeBuffer(outbuffer, 0, 3);
}
for (int i = 0; i < subnodes.length; i++) {
if (subnodes[i] == null) {
//while(true){}
throw new Error("Can't encode nulls in parse dumps");
}
subnodes[i].dumpBinary(o);
}
//throw new Error("Can't dump AST nodes of type " + Type.of(this).fullName());
}
public void append(Node n) {
Node[] nsubnodes = new Node[subnodes.length + 1];
for (int i = 0; i < subnodes.length; i++) {
nsubnodes[i] = subnodes[i];
}
nsubnodes[nsubnodes.length - 1] = n;
subnodes = nsubnodes;
}
public int countSubnodes() {
return subnodes.length;
}
public Node getSubnode(int index) {
if (index < 0 || index >= subnodes.length) {
return null;
} else {
return subnodes[index];
}
}
@Override
public int countErrorsRecursively() {
if (!mightHaveErrors) {
return 0;
}
int result = super.countErrorsRecursively();
for (int i = 0; i < subnodes.length; i++) {
if (subnodes[i] == null) {
result++;
} else {
result += subnodes[i].countErrorsRecursively();
}
}
return result;
}
@Override
public int deleteErrorsRecursively() {
int result = super.deleteErrorsRecursively();
for (int i = 0; i < subnodes.length; i++) {
if (subnodes[i] == null) {
result++;
} else {
result += subnodes[i].deleteErrorsRecursively();
}
}
return result;
}
}

View File

@@ -0,0 +1,257 @@
package slangc.parser;
import slang.data.List;
import slangc.streams.LogWriter;
public class CleopatraModeLanguage extends Language {
private Option[] options = new Option[0];
private final boolean enableFallbacks;
public CleopatraModeLanguage(boolean enableFallbacks) {
this.enableFallbacks = enableFallbacks;
}
public static class Option {
public final String language;
public final Key key;
public final String value;
public Option(String language, Key key, String value) {
this.language = language;
this.key = key;
this.value = value;
}
}
public void addOption(Option o) {
Option[] newarr = new Option[options.length + 1];
for (int i = 0; i < newarr.length; i++) {
newarr[i] = (i < options.length ? options[i] : o);
}
options = newarr;
}
public void addOption(String language, Key key, String value) {
addOption(new Option(language, key, value));
}
public void addOption(Medu medu, Key key, String value) {
addOption(""+medu /*.name()*/, key, value);
}
String[] cachedKeywords = null;
@Override
public String[] getKeywords() {
if (cachedKeywords == null) {
cachedKeywords = recalculateKeywords();
}
return cachedKeywords;
}
public String[] recalculateKeywords() {
String[] initial;
if (enableFallbacks) {
initial = super.getKeywords();
} else {
initial = new String[0];
}
String[] result = new String[initial.length + options.length];
for (int i = 0; i < result.length; i++) {
result[i] = (i < initial.length ? initial[i] : options[i - initial.length].value);
}
return result;
}
@Override
public boolean matches(Key key, String str) {
for (int i = 0; i < options.length; i++) {
if (options[i].key == key && options[i].value.equals(str)) {
return true;
}
}
if (enableFallbacks) {
return super.matches(key, str);
} else {
return false;
}
}
public enum Medu {
USERDEFINED,
NETER,
NUBIAN,
COPTIC,
GREEK
}
public void addMedu(Medu m) {
switch (m) {
case Medu.NUBIAN:
addNubian();
break;
case Medu.COPTIC:
addCoptic();
break;
default:
throw new Error("TODO: This medu mode " + m + " is not implemented yet");
}
}
public void printTable(LogWriter out) {
/*
List<String> columns = new List<String>();
for (int i = 0; i < options.length; i++) {
if (!columns.contains(options[i].language)) {
columns.add(options[i].language);
}
}
out.print("[%header,cols=\"1,");
for (int i = 0; i < columns.size(); i++) {
if (i != 0) {
out.print(",");
}
out.print(2);
}
out.println("\"]");
out.println("|===");
out.print("|Keyword Id ");
for (int i = 0; i < columns.size(); i++) {
//if (i != 0) {
// out.print(",");
//}
out.print("|" + columns.get(i) + " ");
}
out.println();
out.println();
for (int i = 0; i < Key.values().length; i++) {
printTableRow(out, columns, Key.values()[i]);
out.println();
}
out.println("|===");
*/
throw new Error("TODO: Printing language tables");
}
public void printTableRow(LogWriter out, List<String> columns, Key key) {
out.print("|" + key + " ");
for (int i = 0; i < columns.count(); i++) {
out.print("| ");
boolean addedOne = false;
for (int j = 0; j < options.length; j++) {
Option o = options[j];
if (o.key == key && o.language.equals(columns.get(i))) {
if (addedOne) {
out.print(", ");
} else {
addedOne = true;
}
out.print("`" + o.value + "`");
}
}
if (addedOne) {
out.print(" ");
}
}
out.println();
}
public void addNubian() {
addOption(Medu.NUBIAN, Key.CLASS, "ⲕⲗⲁⲥⲥ");
addOption(Medu.NUBIAN, Key.PUBLIC, "ⲡⲩⲃⲗⲓⲕ");
// From "An English-Nubian Comparative Dictionary", 1923
// NOTE: There is a small but strong Nubian community on Twitter that can provide more links.
addOption(Medu.NUBIAN, Key.THIS, "ⲉⲓⲛ"); // p.xxxv, p.73 (note: "that" would be ⲙⲁⲛ)
addOption(Medu.NUBIAN, Key.BREAK, "ⲡⲁⲗ"); // p.49
addOption(Medu.NUBIAN, Key.TRUE, "ⲉⲩⲟ"); // ⲡ.48 ("yes", speculative spelling, only phonetics provided)
addOption(Medu.NUBIAN, Key.VOID, "ⲡⲁⲣⲕ"); //ⲡ.50 (a valley or place water goes without releasing)
addOption(Medu.NUBIAN, Key.IF, "ⲡⲉϣϣ"); //ⲡ.51 ("judge")
addOption(Medu.NUBIAN, Key.IF, "ⲟⲛ"); // p.188 (and/if)
addOption(Medu.NUBIAN, Key.STATIC, "ⲡⲓ"); //ⲡ.52 (i.e. to stay, as in the sentence "I'm staying at home")
addOption(Medu.NUBIAN, Key.RETURN, "ⳟⲓⲡⲓⲣⲧ"); //p.60 (repeat or return, NOTE: need to check first character)
addOption(Medu.NUBIAN, Key.THROW, "ⳝⲟⳝ"); //ⲡ.62 (kill/fight/sacrifice)
addOption(Medu.NUBIAN, Key.THROW, "ⲙⲟⲩⲣⲧ"); //ⲡ.125 (warn)
addOption(Medu.NUBIAN, Key.FOR, "ⲕⲉⲗ"); // p.95 (limit or boundary, related to the word used for WHILE)
addOption(Medu.NUBIAN, Key.WHILE, "ⲕⲉⲗⲗ"); //p.95 ("as much as", "sometimes", related to the word used for FOR)
addOption(Medu.NUBIAN, Key.DUCK, "ⲕⲉⲗⲁⲓ"); // p.95 (wild duck, speculative spelling, only phonetics provided)
addOption(Medu.NUBIAN, Key.NULL, "ⲕⲓⳡ"); // p.99 (empty or exhausted, NOTE: need to check last character)
addOption(Medu.NUBIAN, Key.FINAL, "ⲕⲓⲣⲓⳟ"); // p.100 (finished or complete, NOTE: need to check last character)
addOption(Medu.NUBIAN, Key.ABSTRACT, "ⲙⲓⲛⲇⲓ"); // ⲡ.122 (partial, incomplete, NOTE: this seems to be a rarely-used word)
addOption(Medu.NUBIAN, Key.NEW, "ⳟⲁⲓⲉⲣ"); // ⲡ.134 ("bring to life")
addOption(Medu.NUBIAN, Key.INT, "ⲥⲁⲗ"); // p.144 ("word", which fits with an old way of basically saying int - "machine word")
addOption(Medu.NUBIAN, Key.EXTENDS, "ⲥⲉⲩ"); // p.147 ("inherit")
addOption(Medu.NUBIAN, Key.MODULE, "ϣⲟⲗ"); // p.157 (a book or charm)
addOption(Medu.NUBIAN, Key.PRIVATE, "ⲧⲟⲩ"); // p.169 (internals, belly/guts)
addOption(Medu.NUBIAN, Key.THIS, "ⲟⲩⲣ"); // p. 178 ("self")
// p.161 for class/etc.?
// ⲉⲧⲉⲣ for "do"? p.48
}
public void addCoptic() {
// From "Aegyptisches Glossar", 1904
// NOTE: This source was chosen as the primary one because it focuses on words
// present in both Medu Neter and Coptic (also with many connections to Greek, Hebrew,
// Arabic and so on).
addOption(Medu.COPTIC, Key.WHILE, "ⲟⲩⲱϩⲙ"); // p.32, "repeat"/"to repeat"
addOption(Medu.COPTIC, Key.FOR, "ⲟⲩⲱϣ"); // p.32, "search"
// NOTE: See also p.56 for iteration-releated words
addOption(Medu.COPTIC, Key.RETURN, "ⲟⲩⲱϣⲃ"); // p.34, "answer"
addOption(Medu.COPTIC, Key.TRY, "ⲟⲩⲇⲁⲓ"); // p.35, "be safe"
// NOTE: See also p.56 for "witness", p.65 for "harness"
addOption(Medu.COPTIC, Key.THROW, "ⲕⲱ"); // p.90, literally "throw" or "leave"
addOption(Medu.COPTIC, Key.DO, "ⲃⲁⲕ"); // p.39, "work"
addOption(Medu.COPTIC, Key.CATCH, "ⲡⲏⲓ"); // p.41, "escaped" (fits with "catch" because that's why you need to catch it!)
addOption(Medu.COPTIC, Key.FINALLY, "ⲁϩⲟⲩ"); // p.43, "the end"/"the rear" (as in "reaching the end"? literally "ahoy"? The Euros seem to think not)
addOption(Medu.COPTIC, Key.FINALLY, "ⲡⲁϩⲟⲩ"); // Alternative spelling
// See also p.124 for a word for "tail"
addOption(Medu.COPTIC, Key.FINAL, "ⲙⲟⲩⲛⲕ"); // p.51, "complete" (literally "monk")
addOption(Medu.COPTIC, Key.FINAL, "ⲧⲱⲱⲃⲉ"); // p. 155, "seal"
addOption(Medu.COPTIC, Key.SUPER, "ⲡⲧⲁϩ"); // p.45 (Ptah = Phthah, we still use the same word "father" in English)
addOption(Medu.COPTIC, Key.SUPER, "ⲙⲁⲁⲩ"); // p.49, (Maa', from which mother is derived)
addOption(Medu.COPTIC, Key.ELSE, "ⲙ"); // p.46, "Negation of the Imperative" lmao perfect (ⲙ︦? needs line above?)
addOption(Medu.COPTIC, Key.TRUE, "ⲙⲉ"); // p.47, "true"
addOption(Medu.COPTIC, Key.TRUE, "ⲙⲏⲓ"); // Alternative spelling? (or I've interpreted this entry incorrectly...)
addOption(Medu.COPTIC, Key.STATIC, "ⲙⲟⲩⲛ"); // p.50, "stay"
addOption(Medu.COPTIC, Key.NEW, "ⲙⲓⲥⲉ"); // p.55, "give birth", "generate", "form"
addOption(Medu.COPTIC, Key.VOID, "ⲛⲟⲩϥⲉⲛ"); // p.62, spelling assumed based on nearby term. Intended meaning is "never", related to "nether".
addOption(Medu.COPTIC, Key.NULL, "ϣⲱⲩⲉⲓⲧ"); // p.127, "be empty of something"
addOption(Medu.COPTIC, Key.DUCK, ""); // p.70, slight pun (a goose is basically a duck right?)
addOption(Medu.COPTIC, Key.BREAK, "ⲗⲟ"); // p.71, "stop" or "flee"
addOption(Medu.COPTIC, Key.PRIVATE, "ϩⲱⲃⲥ"); // p.82, "conceal" or "dress"
addOption(Medu.COPTIC, Key.SWITCH, "ⲥⲱⲧⲡ"); // p.122, "choose"
addOption(Medu.COPTIC, Key.CASE, "ⲥⲟⲡ"); // p.103, "type of" or "case of" (note, type seems to be "Art" in German?)
addOption(Medu.COPTIC, Key.SHORT, "ϣⲓⲣⲉ"); // p.130, "be small"
addOption(Medu.COPTIC, Key.BYTE, "ϣⲏⲣⲉ"); // p.130, "smaller" (literally same as ϣⲓⲣⲉ except shorter "i" vowel)
addOption(Medu.COPTIC, Key.CLASS, "ⲕⲟⲧ"); // p.135, "circle", "shape", "kind", "essence"
// p.140 for "equip" as extends/implements? 151 for similar
// p.153 for "enter" as main?
// p.33 for "say" as logLine?
// p.45 for "run" ?
// p.58 for "depth" as length?
}
public boolean applySetting(String key, String value, boolean reset, boolean append) {
if (reset && key == "*") {
options = new Option[0];
return true;
} else if (append && value != null && !reset) {
Key k;
try {
k = (Key) Key.lookup(Key.class, key);
} catch (Error e) {
return false;
}
addOption(Medu.USERDEFINED, k, value);
return true;
} else {
return super.applySetting(key, value, reset, append);
}
}
}

View File

@@ -0,0 +1,23 @@
package slangc.parser;
public final class CommentAnnotation extends Annotation {
private final Token comment;
public CommentAnnotation(Token comment) {
this.comment = comment;
}
public Token getComment() {
return comment;
}
@Override
public AnnotationType getAnnotationType() {
return AnnotationType.COMMENT;
}
@Override
public String toString() {
return "CommentAnnotation(" + comment.toString() + ")";
}
}

View File

@@ -0,0 +1,23 @@
package slangc.parser;
public final class ErrorAnnotation extends Annotation {
private final ErrorType errorType;
public ErrorAnnotation(ErrorType errorType) {
this.errorType = errorType;
}
public ErrorType getErrorType() {
return errorType;
}
@Override
public String toString() {
return "ErrorAnnotation(" + errorType.name() + ")";
}
@Override
public AnnotationType getAnnotationType() {
return AnnotationType.ERROR;
}
}

View File

@@ -0,0 +1,10 @@
package slangc.parser;
public enum ErrorType {
INTERNAL_ERROR, // Shouldn't really be used,
UNTERMINATED_TOKEN,
EXPECTING,
MISSING_PART,
UNEXPECTED_TOKEN,
BAD_ESCAPE
}

View File

@@ -0,0 +1,310 @@
package slangc.parser;
/**
* Based on experience with previous scanners, I decided to split the language definition from the scanner sourceFile
* as much as practical. This makes it a lot easier to reuse scanner code in other projects later.
*
* @author Zak
*
*/
public class Language {
public static final String[] DEFAULT_LINE_COMMENT_PREFIXES = new String[] {"#!", "//"};
public static final String[][] DEFAULT_LONG_COMMENT_TERMINATORS = new String[][] {new String[] {"/*", "*/"}};
public static final String[][] DEFAULT_STRING_TERMINATORS = new String[][] {new String[] {"\"", "\""}};
public static final String[][] DEFAULT_CHAR_TERMINATORS = new String[][] {new String[] {"'", "'"}};
public static final String[][] DEFAULT_ESCAPES = new String[][] {
new String[] {"\\n", "\n"}, new String[] {"\\t", "\t"},
new String[] {"\\\"", "\""}, new String[] {"\\'", "'"},
new String[] {"\\\\", "\\"}
};
public static final String[] DEFAULT_STRING_FORBIDDEN_ESCAPES = new String[] {/*"\\", "\""*/};
public static final String[] DEFAULT_CHAR_FORBIDDEN_ESCAPES = new String[] {"\\"/*, "'"*/};
public static final String[] DEFAULT_HEX_PREFIXES = new String[] { "0x", "0X" };
public static final String[] DEFAULT_BIN_PREFIXES = new String[] { "0b", "0B" };
public static final String[] DEFAULT_OPERATORS = new String[] {
"{", "}", "(", ")", "[", "]",
"==",
"+=", "-=", "*=", "/=", "%=", "&=", "|=", "^=", "++", "--", "=",
"&&", "||", "&", "|", "?", "::", ":", "->",
"<<=", ">>=", ">>>=",
"<<",
/*">>>", ">>",*/ /* These are handled as special cases of ">", to allow for ">>" in generic types */
"!=", "<=", ">=", "<", ">",
"+", "-", "*", "/", "%", "^",
"!",
"~",
// The "@" & "..." operators could possibly be disabled for some legacy code,
// but there's no practical need to be that strict
"@", "...", "##","#",
".", ",", ";"
};
public enum Key {
MODULE,
IMPORT,
PUBLIC,
PRIVATE,
PROTECTED,
ABSTRACT,
NATIVE,
SYNCHRONISED,
CLASS,
INTERFACE,
ENUM,
IMPLEMENTS,
EXTENDS,
THROWS,
VOID,
BIT,
BYTE,
SHORT,
COMPAT_CHAR,
INT,
LONG,
FLOAT,
DOUBLE,
IF,
ELSE,
DO,
WHILE,
FOR,
BREAK,
CONTINUE,
SWITCH,
CASE,
DEFAULT,
RETURN,
TRY,
CATCH,
FINALLY,
THROW,
ASSERT,
NEW,
TRUE,
FALSE,
NULL,
THIS,
SUPER,
INSTANCEOF,
TRANSIENT,
VOLATILE,
COMPAT_STRICTFP,
GOTO,
CONST,
WILDCARD,
NON_SEALED,
POINTER,
DUCK,
STATIC, FINAL
}
public static final String[] DEFAULT_KEYWORDS = new String[] {
"package", "module", "import",
"public", "private", "protected", "final", "abstract", "static", "native",
"synchronized", "synchronised",
"class", "interface",
"enum",// NOTE: The enum keyword will be disabled if necessary by the LegacyLanguage subclass
"implements", "extends", "throws",
"void",
"boolean", "byte", "short", "char", "int", "long", "float", "double",
"if", "else", "do", "while", "for", "break", "continue", "switch", "case", "default", "return",
"try", "catch", "finally", "throw", "assert",
"new",
"true", "false", "null",
"this", "super",
"instanceof",
"transient", "volatile", "strictfp",
"goto", "const", // These are reserved but not implemented
// This will be disabled by LegacyLanguage as necessary:
"_", // A special keyword as of v9
"non-sealed", // Added at some point???
// Not technically keywords: "var", // Added in v10
// Also not technically keywords: "record", "permits", "sealed", "yield"
// These will also be disabled for legacy code:
//"int8","int16","int32","int64","uint8","uint16","uint32","uint64",
"pointer",
"duck"
// maybe "cpuword" & "ucpuword"?
};
public Language() {
// TODO Auto-generated constructor stub
}
public String[][] getStringEscapes() {
return DEFAULT_ESCAPES;
}
public String[] getLineCommentPrefixes() {
return DEFAULT_LINE_COMMENT_PREFIXES;
}
public String[][] getLongCommentTerminators() {
return DEFAULT_LONG_COMMENT_TERMINATORS;
}
public String[][] getStringTerminators() {
return DEFAULT_STRING_TERMINATORS;
}
public String[] getStringForbiddenEscapes() {
return DEFAULT_STRING_FORBIDDEN_ESCAPES;
}
public String[][] getCharTerminators() {
return DEFAULT_CHAR_TERMINATORS;
}
public String[] getCharForbiddenEscapes() {
return DEFAULT_CHAR_FORBIDDEN_ESCAPES;
}
public String[][] getEscapes() {
// TODO: Better escape handling...
return DEFAULT_ESCAPES;
}
public String[] getHexPrefixes() {
return DEFAULT_HEX_PREFIXES;
}
public String[] getBinPrefixes() {
return DEFAULT_BIN_PREFIXES;
}
public String[] getOperators() {
return DEFAULT_OPERATORS;
}
public String[] getKeywords() {
return DEFAULT_KEYWORDS;
}
public boolean matches(Key key, String str) {
switch (key) {
case Key.MODULE:
return str.equals("module") || str.equals("package");
case Key.IMPORT:
return str.equals("import");
case Key.PUBLIC:
return str.equals("public");
case Key.PRIVATE:
return str.equals("private");
case Key.PROTECTED:
return str.equals("protected");
case Key.ABSTRACT:
return str.equals("abstract");
case Key.NATIVE:
return str.equals("native");
case Key.SYNCHRONISED:
return str.equals("synchronised") || str.equals("synchronized");
case Key.CLASS:
return str.equals("class");
case Key.INTERFACE:
return str.equals("interface");
case Key.ENUM:
return str.equals("enum");
case Key.IMPLEMENTS:
return str.equals("implements");
case Key.EXTENDS:
return str.equals("extends");
case Key.THROWS:
return str.equals("throws");
case Key.VOID:
return str.equals("void");
case Key.BIT:
return str.equals("boolean");
case Key.BYTE:
return str.equals("byte");
case Key.SHORT:
return str.equals("short");
case Key.COMPAT_CHAR:
return str.equals("char");
case Key.INT:
return str.equals("int");
case Key.LONG:
return str.equals("long");
case Key.FLOAT:
return str.equals("float");
case Key.DOUBLE:
return str.equals("double");
case Key.IF:
return str.equals("if");
case Key.ELSE:
return str.equals("else");
case Key.DO:
return str.equals("do");
case Key.WHILE:
return str.equals("while");
case Key.FOR:
return str.equals("for");
case Key.BREAK:
return str.equals("break");
case Key.CONTINUE:
return str.equals("continue");
case Key.SWITCH:
return str.equals("switch");
case Key.CASE:
return str.equals("case");
case Key.DEFAULT:
return str.equals("default");
case Key.RETURN:
return str.equals("return");
case Key.TRY:
return str.equals("try");
case Key.CATCH:
return str.equals("catch");
case Key.FINALLY:
return str.equals("finally");
case Key.THROW:
return str.equals("throw");
case Key.ASSERT:
return str.equals("assert");
case Key.NEW:
return str.equals("new");
case Key.TRUE:
return str.equals("true");
case Key.FALSE:
return str.equals("false");
case Key.NULL:
return str.equals("null");
case Key.THIS:
return str.equals("this");
case Key.SUPER:
return str.equals("super");
case Key.INSTANCEOF:
return str.equals("instanceof");
case Key.TRANSIENT:
return str.equals("transient");
case Key.VOLATILE:
return str.equals("volatile");
case Key.COMPAT_STRICTFP:
return str.equals("strictfp");
case Key.GOTO:
return str.equals("goto");
case Key.CONST:
return str.equals("const");
case Key.WILDCARD:
return str.equals("wildcard");
case Key.NON_SEALED:
return str.equals("non-sealed");
case Key.POINTER:
return str.equals("pointer");
case Key.DUCK:
return str.equals("duck");
case Key.STATIC:
return str.equals("static");
case Key.FINAL:
return str.equals("final");
default:
throw new Error("TODO: Keyword matching for " + key);
}
}
public boolean applySetting(String key, String value, boolean reset, boolean append) {
return false; // This should only return true if the setting is applied
}
public String getSettingInfo(String key) {
return "Setting info cannot be retrieved";
}
}

View File

@@ -0,0 +1,82 @@
package slangc.parser;
/**
* An extension to the default language options configured for legacy code.
*
* <p/>
* Most of the job of this class is just to disable specific keywords to allow legacy code
* to build. There is no strict feature-detection when handling other changes at this point,
* but generally it's the addition of keywords that stop older code from building (this is
* less and less of a problem with updates to newer versions, which usually don't require
* new keywords). If more specific checks are required, this class can still be used to
* differentiate versions (e.g. it might be worth adding some boolean methods like
* "supportsGenerics()").
*
* @author Zak
*
*/
public class LegacyLanguage extends Language {
private final int version;
public LegacyLanguage(int version) {
switch (version) {
case 0: // First major version
case 1: // Added inner classes, possibly other changes
case 2: // Added "strictfp", maybe other changes? (first widely-used version)
case 3: // No language changes from 2?
case 4: // Added assert
case 5: // Added generics, enums, annotations/attributes, etc. (major update)
case 6: // No language changes from 5?
case 7: // Added string switch, binary literals, enhanced try-catch, "diamond operator" new ArrayList<>() (maybe other language features?)
case 8: // Added lambda expressions, enhanced interfaces
case 9: // Added interface private methods, probably other changes
case 10: // Added "var" for local variable type inference
case 11: // TODO...
case 12:
case 13:
case 14:
case 15:
case 16:
this.version = version;
break;
default:
throw new Error("Bad legacy language version: " + version);
}
}
public int getLegacyVersion() {
return version;
}
private String[] kws = null;
@Override
public String[] getKeywords() {
if (kws == null) {
String[] arr1 = super.getKeywords();
String[] kws = new String[arr1.length];
for (int i = 0; i < kws.length; i++) {
kws[i] = arr1[i];
// Some newly-introduced keywords are always disabled in legacy mode
if (kws[i].equals("syncrhonised") || kws[i].equals("module") || kws[i].equals("pointer") || kws[i].equals("duck")) {
kws[i] = "void"; // Disable by replacing with a duplicate "void" keyword
}
// Others depend on version - are you trying to compile 90's code or something from 2013?
if (version < 2 && kws[i].equals("strictfp")) {
kws[i] = "void"; // Disable by replacing with a duplicate "void" keyword
}
if (version < 4 && kws[i].equals("assert")) {
kws[i] = "void"; // Disable by replacing with a duplicate "void" keyword
}
if (version < 5 && kws[i].equals("enum")) {
kws[i] = "void"; // Disable by replacing with a duplicate "void" keyword
}
if (version < 9 && kws[i].equals("_")) {
kws[i] = "void"; // Disable by replacing with a duplicate "void" keyword
}
}
}
return kws;
}
}

View File

@@ -0,0 +1,56 @@
package slangc.parser;
public class Location {
private final Source source;
private final LocationType type;
private final int indexBegin, lineBegin, characterBegin, indexEnd, lineEnd, characterEnd;
public Location(Source source, LocationType type, int indexBegin, int lineBegin, int characterBegin, int indexEnd, int lineEnd, int characterEnd) {
this.source = source;
this.type = type;
this.indexBegin = indexBegin;
this.lineBegin = lineBegin;
this.characterBegin = characterBegin;
this.indexEnd = indexEnd;
this.lineEnd = lineEnd;
this.characterEnd = characterEnd;
}
public Location(Source source) {
this(source, LocationType.WITHIN, -1, -1, -1, -1, -1, -1);
}
public Source getSource() {
return source;
}
public LocationType getType() {
return type;
}
public int getIndexBegin() {
return indexBegin;
}
public int getLineBegin() {
return lineBegin;
}
public int getCharacterBegin() {
return characterBegin;
}
public int getIndexEnd() {
return indexEnd;
}
public int getLineEnd() {
return lineEnd;
}
public int getCharacterEnd() {
return characterEnd;
}
}

View File

@@ -0,0 +1,56 @@
package slangc.parser;
public final class LocationAnnotation extends Annotation {
private final LocationType locationType;
private final Token token;
public LocationAnnotation(LocationType locationType, Token token) {
this.token = token;
this.locationType = locationType;
}
public Token getToken() {
return token;
}
@Override
public AnnotationType getAnnotationType() {
return AnnotationType.LOCATION; // TODO...
}
@Override
public String toString() {
return "LocationAnnotation(" + locationType.name() + ", "+ token.toString() + ")";
}
public String niceString() {
String start = "";
boolean useEnd = false;
switch (locationType) {
case LocationType.AFTER:
//start = "After ";
useEnd = true;
break;
case LocationType.BEFORE:
//start = "Before ";
break;
default:
//start = "Around ";
break;
}
start = "Around ";
// NOTE: The 'filename:linenumber...' format is used since VS Code (at least) provides a link back to the source
start += "'" + getToken().getSnippet().getStart().getSource().getFilename();
int l = getToken().getSnippet().getStart().getLineCount();
int c = getToken().getSnippet().getStart().getCharacterCount();
if (useEnd) {
c += getToken().getSnippet().getLength();
}
start += ":" + l + "@" + c + "'";
return start;
}
}

View File

@@ -0,0 +1,9 @@
package slangc.parser;
public enum LocationType {
SOMEWHERE_NEAR,
WITHIN,
AROUND,
BEFORE,
AFTER
}

173
slangc/parser/Node.sauce Normal file
View File

@@ -0,0 +1,173 @@
package slangc.parser;
import slang.streams.SyncOutput;
import slang.streams.SyncInput;
import slang.data.SimpleEnum;
public abstract class Node {
public Object userdata;
Annotation[] annotations = null;
Node() {
// TODO Auto-generated constructor stub
}
public abstract NodeType getNodeType();
public abstract String toString();
private static byte[] inbuffer = new byte[1];
static SimpleEnum[] toknums = TokenType.values(TokenType.class);
static SimpleEnum[] nnums = NodeType.values(NodeType.class);
public static Node loadBinary(Scan sc, SyncInput<byte> inp) {
inp.readBuffer(inbuffer, 0, 1);
if (((int) inbuffer[0]) < 0) {
int toknum = (-(int)inbuffer[0]) - 1;
//Log.line("Read number " + inbuffer[0] + " as " + toknum);
if (toknum == 127) {
inp.readBuffer(inbuffer, 0, 1);
toknum = (-(int)inbuffer[0]) - 1;
TokenType tt = (TokenType)toknums[toknum]; //TokenType.lookup(TokenType.class, toknum);
return Token.loadBinaryLong(sc, tt, inp);
} else {
TokenType tt = (TokenType)toknums[toknum]; //TokenType.lookup(TokenType.class, toknum);
return Token.loadBinaryShort(sc, tt, inp);
}
} else {
int top = (int)(inbuffer[0] & 0xFF);
inp.readBuffer(inbuffer, 0, 1);
int nnum = (top << 8) | (int)(inbuffer[0] & 0xFF);
NodeType nt = (NodeType)nnums[nnum]; //NodeType.lookup(NodeType.class, nnum);
return Branch.loadBinary(sc, nt, inp);
}
}
public abstract void dumpBinary(SyncOutput<byte> o) {
throw new Error("Can't dump AST nodes of type " + Type.of(this).fullName());
}
public int countAnnotations() {
if (annotations == null) {
return 0;
} else {
return annotations.length;
}
}
public Annotation[] getAnnotations(AnnotationType type) {
Annotation[] result = new Annotation[0];
for (int i = 0; i < countAnnotations(); i++) {
Annotation a = getAnnotation(i);
if (a.getAnnotationType() == type) {
Annotation[] nresult = new Annotation[result.length + 1];
for (int j = 0; j < result.length; j++) {
nresult[j] = result[j];
}
nresult[nresult.length - 1] = a;
result = nresult;
}
}
return result;
}
public int deleteAnnotations(AnnotationType type) {
int ndeleted = 0;
Annotation[] result = new Annotation[0];
for (int i = 0; i < countAnnotations(); i++) {
Annotation a = getAnnotation(i);
if (a.getAnnotationType() == type) {
ndeleted++;
} else {
Annotation[] nresult = new Annotation[result.length + 1];
for (int j = 0; j < result.length; j++) {
nresult[j] = result[j];
}
nresult[nresult.length - 1] = a;
result = nresult;
}
}
annotations = result;
return result.length;
}
public int countAnnotations(AnnotationType type) {
return getAnnotations(type).length;
}
public int countErrorsHere() {
return countAnnotations(AnnotationType.ERROR);
}
public int deleteErrorsHere() {
return deleteAnnotations(AnnotationType.ERROR);
}
public int countErrorsRecursively() {
return countErrorsHere();
}
public int deleteErrorsRecursively() {
return deleteErrorsHere();
}
public Node annotate(ErrorType e) {
return annotate(new ErrorAnnotation(e));
}
public Node annotate(ErrorType e, String n) {
ErrorAnnotation a = new ErrorAnnotation(e);
a.annotate(n);
return annotate(a);
}
public Node annotate(WarningType w) {
return annotate(new WarningAnnotation(w));
}
public Node annotate(WarningType w, String n) {
WarningAnnotation a = new WarningAnnotation(w);
a.annotate(n);
return annotate(a);
}
public Node annotate(String note) {
return annotate(new NoteAnnotation(note));
}
/* As a special optimisation, we set a global flag when an error is found.
* This means that if no errors have been found in this run, looking for/counting errors in
* the tree can be avoided entirely!
*/
public static boolean mightHaveErrors = true;
public static boolean mightHaveWarnings = false;
public Node annotate(Annotation n) {
if (n instanceof ErrorAnnotation) {
mightHaveErrors = true;
}
if (n instanceof WarningAnnotation) {
mightHaveWarnings = true;
}
if (annotations == null || annotations.length < 1) {
annotations = new Annotation[] {n};
return this;
}
Annotation[] nsubnodes = new Annotation[annotations.length + 1];
for (int i = 0; i < annotations.length; i++) {
nsubnodes[i] = annotations[i];
}
nsubnodes[nsubnodes.length - 1] = n;
annotations = nsubnodes;
return this;
}
public Annotation getAnnotation(int i) {
if (i < 0 || i >= countAnnotations()) {
return null;
} else {
return annotations[i];
}
}
}

View File

@@ -0,0 +1,268 @@
package slangc.parser;
public enum NodeType {
EOF,
TOKEN,
ANNOTATION,
ERROR_ONLY,
UNIT,
NAME,
GENERIC_NAME,
INDEXED_NAME,
DOT,
COMMA,
WILDCARD,
SEMICOLON,
OPEN_ROUND_BRACE,
CLOSE_ROUND_BRACE,
OPEN_CURLY_BRACE,
CLOSE_CURLY_BRACE,
OPEN_SQUARE_BRACE,
CLOSE_SQUARE_BRACE,
OPEN_ANGLE_BRACE,
CLOSE_ANGLE_BRACE,
MODIFIER_LIST,
SIMPLE_MODIFIER,
SIMPLE_TAGGED_MODIFIER,
COMPLEX_TAGGED_MODIFIER,
NAME_TAGGED_MODIFIER,
PACKAGE_DECLARATION,
PACKAGE_NAME,
IMPORT_DECLARATIONS,
IMPORT_DECLARATION,
IMPORTED_TYPE,
IMPORTED_PACKAGE,
TYPE_DECLARATIONS,
TYPE_REFERENCE,
TYPE_REFERENCES,
GENERIC_ONLY_TYPE_REFERENCE,
SIMPLE_TYPE,
ARRAY_TYPE,
TYPE,
RETURN_TYPE,
NO_RETURN_TYPE,
THROWS,
NO_THROWS,
METHOD_DECLARATION,
FIELD_DECLARATION,
CONSTRUCTOR_DECLARATION,
STATIC_CONSTRUCTOR_DECLARATION,
SLOTS,
INITIALISED_SLOT,
UNINITIALISED_SLOT,
ENUM_MEMBERS,
ENUM_CLASS_MEMBERS,
NO_ENUM_CLASS_MEMBERS,
INTERFACE_MEMBERS,
CLASS_MEMBERS,
ENUM_MEMBER,
INTERFACE_MEMBER,
CLASS_MEMBER,
CLASS_DECLARATION,
INTERFACE_DECLARATION,
ENUM_DECLARATION,
INTERFACE_BASES,
NO_INTERFACE_BASES,
CLASS_BASE,
NO_CLASS_BASE,
CLASS_IMPLEMENTS,
NO_CLASS_IMPLEMENTS,
NO_GENERIC_DECLARATIONS,
GENERIC_DECLARATIONS,
GENERIC_DECLARATION_LIST,
SIMPLE_GENERIC_DECLARATION,
SIMPLE_TYPED_GENERIC_DECLARATION,
VARIABLE_GENERIC_DECLARATION,
VARIABLE_TYPED_GENERIC_DECLARATION,
ARGUMENT_DECLARATIONS,
ARGUMENT_DECLARATION_LIST,
SIMPLE_ARGUMENT_DECLARATION,
VARIABLE_ARGUMENT_DECLARATION,
METHOD_BODY,
NO_METHOD_BODY,
STATEMENTS,
STATEMENT,
BLOCK_STATEMENT,
VARIABLE_STATEMENT,
REFERENCE_STATEMENT,
IF_STATEMENT,
ELSE_CLAUSE,
NO_ELSE_CLAUSE,
WHILE_STATEMENT,
DO_WHILE_STATEMENT,
RETURN_EXPRESSION_STATEMENT,
RETURN_NOTHING_STATEMENT,
ASSERT_STATEMENT,
THROW_STATEMENT,
EXPRESSION_STATEMENT,
EXPRESSION,
EXPRESSIONS,
ARGUMENTS,
GENERIC_ARGUMENTS,
GENERIC_EXPRESSIONS,
GENERIC_VALUE,
BRACED_EXPRESSION,
LITERAL_EXPRESSION,
NULL_LITERAL_EXPRESSION,
BOOLEAN_LITERAL_EXPRESSION,
INTEGER_LITERAL_EXPRESSION,
FLOAT_LITERAL_EXPRESSION,
CHAR_LITERAL_EXPRESSION,
STRING_LITERAL_EXPRESSION,
NEW_EXPRESSION,
NEW_OBJECT_EXPRESSION,
NEW_CLEARED_ARRAY_EXPRESSION,
NEW_INITIALISED_ARRAY_EXPRESSION,
NEW_CLASS_EXPRESSION,
THIS_EXPRESSION,
SUPER,
REFERENCE_EXPRESSION,
SUBREFERENCE_EXPRESSION,
ARRAY_INDEX_EXPRESSION,
THIS_METHOD_CALL_EXPRESSION,
NORMAL_METHOD_CALL_EXPRESSION,
SUPER_METHOD_CALL_EXPRESSION,
SUPER_CONSTRUCTOR_CALL_EXPRESSION,
THIS_CONSTRUCTOR_CALL_EXPRESSION,
AUTOMATIC_METHOD_CALL_EXPRESSION,
SIMPLE_EXPRESSION,
CAST_EXPRESSION,
UNARY_EXPRESSION,
MULTIPLICATIVE_EXPRESSION,
ADDITIVE_EXPRESSION,
COMPARISON_EXPRESSION,
INSTANCEOF_EXPRESSION,
LOGICAL_EXPRESSION,
UNARY_OPERATOR,
MULTIPLICATIVE_OPERATOR,
ADDITIVE_OPERATOR,
COMPARISON_OPERATOR,
LOGICAL_OPERATOR,
ASSIGNMENT_EXPRESSION,
COUNT_EXPRESSION,
ASSIGNMENT_OPERATOR,
COUNT_OPERATOR,
TRY_STATEMENT,
CATCH_CLAUSES,
CATCH_CLAUSE,
FINALLY_CLAUSE,
NO_FINALLY_CLAUSE,
SYNCHRONIZED_STATEMENT,
OUTER_THIS_EXPRESSION,
TYPE_EXPRESSION,
CONDITIONAL_EXPRESSION,
FOR_STATEMENT,
FOR_VARIABLES,
BREAK_STATEMENT,
CONTINUE_STATEMENT,
BREAK_TO_STATEMENT,
CONTINUE_TO_STATEMENT,
LABEL_STATEMENT,
SWITCH_STATEMENT,
SWITCH_MEMBERS,
CASE_LABEL,
DEFAULT_LABEL,
COLON,
ASSERT_DEBUG_STATEMENT,
ARRAY_INITIALISER,
ARRAY_EXPRESSIONS,
ARRAY_TYPE_PART, // []
ARRAY_TYPE_TAIL, // a tailing set of [][]..., parsed as empty if not present.
IMPORT_STATIC_DECLARATION,
SHIFT_EXPRESSION,
SHIFT_OPERATOR,
OUTER_SUPER_CONSTRUCTOR_CALL_EXPRESSION, // A strange edge-case
OUTER_SUPER_METHOD_CALL_EXPRESSION,
EQUALITY_OPERATOR,
EQUALITY_EXPRESSION,
SPECIAL_NEW_EXPRESSION,
ATTRIBUTE_DECLARATION,
MODIFIER_ARGUMENTS,
QUESTIONMARK,
SIMPLE_EXTENDED_GENERIC_DECLARATION,
FOR_EACH_STATEMENT,
SIMPLE_GENERIC_WILDCARD,
EXTENDED_GENERIC_WILDCARD,
SUPER_GENERIC_WILDCARD,
SPECIALISED_ENUM_MEMBER,
LAZY_ARGUMENT_DECLARATION,
LAZY_TYPE,
DOUBLECOLON,
NAMED_LAMBDA_EXPRESSION,
LAMBDA_ARGS,
LAMBDA_EXPRESSION,
TRY_RESOURCES,
NO_TRY_RESOURCES,
TRY_RESOURCE_LIST,
GENERIC_REFERENCE_EXPRESSION,
GENERIC_REFERENCE_NAME,
MODIFIER_EXPRESSIONS,
SIMPLE_ASSIGN,
ANDABLE_TYPE,
SINGLE_AND,
SPECIAL_ARRAY_TYPE_EXPRESSION,
TYPE_NAMED_LAMBDA_EXPRESSION,
SPECIALISED_ENUM_SUBTYPE,
SPECIALISED_ENUM_SUBTYPE_WITH_ARGS,
DEFAULT_METHOD_BODY,
THIS_REFERENCE_EXPRESSION,
SUPER_REFERENCE_EXPRESSION,
BITWISE_AND_OPERATOR,
BITWISE_AND_EXPRESSION,
BITWISE_XOR_OPERATOR,
BITWISE_XOR_EXPRESSION,
BITWISE_OR_OPERATOR,
BITWISE_OR_EXPRESSION,
LOGICAL_AND_OPERATOR,
LOGICAL_AND_EXPRESSION,
LOGICAL_OR_OPERATOR,
LOGICAL_OR_EXPRESSION
;
int x() { return 1;}
}

View File

@@ -0,0 +1,23 @@
package slangc.parser;
public final class NoteAnnotation extends Annotation {
private final String text;
public NoteAnnotation(String text) {
this.text = text;
}
public String getText() {
return text;
}
@Override
public String toString() {
return "NoteAnnotation(\"" + text + "\")";
}
@Override
public AnnotationType getAnnotationType() {
return AnnotationType.NOTE;
}
}

2399
slangc/parser/Parse.sauce Normal file

File diff suppressed because it is too large Load Diff

423
slangc/parser/Scan.sauce Normal file
View File

@@ -0,0 +1,423 @@
package slangc.parser;
public class Scan {
private final Language language;
private final Source source;
private final SourceEncoding encoding;
private Token[] tokens = new Token[1000];
int ntokens = 0;
public void reset() {
tokens = new Token[20];
ntokens = 0;
}
public Scan(Language language, Source source) {
this.language = language;
this.source = source;
encoding = source.getEncoding();
//Log.line("Encoding=" + source.getEncoding());
}
public Language getLanguage() {
return language;
}
public Source getSource() {
return source;
}
public int countTokens() {
return ntokens; //tokens.length;
}
public Token getToken(int index) {
if (index < 0 || index >= ntokens /*tokens.length*/) {
return null;
} else {
return tokens[index];
}
}
public int scanAll() {
SourceWalker w = new SourceWalker(source);
Token t;
int i = 0;
do {
t = tokenAt(w);
append(t);
i++;
} while (!t.is(TokenType.END_OF_FILE));
return i;
}
boolean nullifyComments = false;
public int scanQuick() {
nullifyComments = true;
SourceWalker w = new SourceWalker(source);
Token t;
int i = 0;
do {
t = tokenAt(w);
if (t == null || t.is(TokenType.LINE_COMMENT) || t.is(TokenType.LONG_COMMENT)) {
// Ignore these
} else {
append(t);
i++;
}
} while (t == null || !t.is(TokenType.END_OF_FILE));
return i;
}
public int scanPedantic() {
nullifyComments = false;
SourceWalker w = new SourceWalker(source);
Token t;
Token precedingNonComment = null;
Token[] precedingComments = new Token[] {};
int i = 0;
do {
t = tokenAt(w);
if (t.is(TokenType.LINE_COMMENT) || t.is(TokenType.LONG_COMMENT)) {
if (precedingNonComment != null && precedingNonComment.getSnippet().getStart().getLineCount() == t.getSnippet().getStart().getLineCount()) {
precedingNonComment.annotate(new CommentAnnotation(t));
} else {
Token[] nprecoms = new Token[precedingComments.length + 1];
for (int j = 0; j < precedingComments.length; j++) {
nprecoms[j] = precedingComments[j];
}
nprecoms[nprecoms.length - 1] = t;
precedingComments = nprecoms;
}
} else {
precedingNonComment = t;
if (precedingComments.length != 0) {
for (int j = 0; j < precedingComments.length; j++) {
precedingNonComment.annotate(new CommentAnnotation(precedingComments[j]));
}
precedingComments = new Token[] {};
}
append(t);
i++;
}
} while (!t.is(TokenType.END_OF_FILE));
return i;
}
public void append(Token t) {
if (ntokens >= tokens.length) {
Token[] narr = new Token[tokens.length * 2 + 1];
for (int i = 0; i < tokens.length; i++) {
narr[i] = tokens[i];
}
tokens = narr;
}
tokens[ntokens] = t;
t.setOwner(this);
ntokens++;
/*
Token[] ntokens = new Token[tokens.length + 1];
for (int i = 0; i < tokens.length; i++) {
ntokens[i] = tokens[i];
}
ntokens[ntokens.length - 1] = t;
t.setOwner(this);
tokens = ntokens;
*/
}
public /*static */void skipSpaces(SourceWalker walker) {
while (!walker.isAtEnd() && encoding.isSpace(walker.getSource().getCharacter(walker.getIndex()))) {
walker.advance();
}
}
public Token tokenAt(SourceWalker walker) {
Token t = innerTokenAt(walker);
if (t != null) {
t.setOwner(this);
}
return t;
}
public Token innerTokenAt(SourceWalker walker) {
//assert walker.getSource() == source;
//position = position.clone();
skipSpaces(walker);
if (walker.isAtEnd()) {
return new Token(TokenType.END_OF_FILE, new SourceSnippet(walker.getPosition(), 0));
}
if (source.matches(walker.getIndex(), language.getLineCommentPrefixes())) {
SourcePosition startPosition = walker.getPosition();
int i = 0;
while (!walker.isAtEnd() && !encoding.isNewline(walker.getSource().getCharacter(walker.getIndex()))) {
walker.advance();
i++;
}
if (nullifyComments) return null;
return new Token(TokenType.LINE_COMMENT, new SourceSnippet(startPosition, i));
} else if (source.matches(walker.getIndex(), language.getLongCommentTerminators())) {
SourcePosition startPosition = walker.getPosition();
String end = source.match(walker.getIndex(), language.getLongCommentTerminators());
int i = 0;
while (!walker.isAtEnd() && !source.matches(walker.getIndex(), end)) {
walker.advance();
i++;
}
boolean unterminated = false;
if (walker.isAtEnd()) {
unterminated = true;
} else {
walker.advance(end.intsLength());
i += end.intsLength();
}
if (nullifyComments && !unterminated) return null;
Token result = new Token(TokenType.LONG_COMMENT, new SourceSnippet(startPosition, i));
if (unterminated) {
result.annotate(ErrorType.UNTERMINATED_TOKEN);
}
return result;
} else if (source.matches(walker.getIndex(), language.getStringTerminators())) {
SourcePosition startPosition = walker.getPosition();
String end = source.match(walker.getIndex(), language.getStringTerminators());
walker.advance(); // Must read at least the quote or it will likely match as end too!
int i = 1;
while (!walker.isAtEnd() && !source.matches(walker.getIndex(), end) && !source.matches(walker.getIndex(), language.getStringForbiddenEscapes())) {
if (source.matches(walker.getIndex(), language.getStringEscapes())) {
walker.advance();
i++;
}
walker.advance();
i++;
}
boolean unterminated = false;
boolean badEscape = false;
if (walker.isAtEnd()) {
unterminated = true;
} else if (source.matches(walker.getIndex(), language.getStringForbiddenEscapes())) {
badEscape = true;
} else {
walker.advance(end.intsLength());
i += end.intsLength();
}
Token result = new Token(TokenType.STRING, new SourceSnippet(startPosition, i));
if (unterminated) {
result.annotate(ErrorType.UNTERMINATED_TOKEN);
}
if (badEscape) {
result.annotate(ErrorType.BAD_ESCAPE);
}
return result;
} else if (source.matches(walker.getIndex(), language.getCharTerminators())) {
SourcePosition startPosition = walker.getPosition();
String end = source.match(walker.getIndex(), language.getCharTerminators());
walker.advance(); // Must read at least the quote or it will likely match as end too!
int i = 1;
while (!walker.isAtEnd() && !source.matches(walker.getIndex(), end) /*&& !sourceFile.matches(walker.getIndex(), language.getStringForbiddenEscapes())*/) {
if (source.matches(walker.getIndex(), language.getStringEscapes())) {
walker.advance();
i++;
}
walker.advance();
i++;
}
boolean unterminated = false;
boolean badEscape = false;
if (walker.isAtEnd()) {
unterminated = true;
} else if (source.matches(walker.getIndex(), language.getCharForbiddenEscapes())) {
badEscape = true;
} else {
walker.advance(end.intsLength());
i += end.intsLength();
}
Token result = new Token(TokenType.CHAR, new SourceSnippet(startPosition, i));
if (unterminated) {
result.annotate(ErrorType.UNTERMINATED_TOKEN);
}
if (badEscape) {
result.annotate(ErrorType.BAD_ESCAPE);
}
return result;
} /*else if (sourceFile.matches(walker.getIndex(), language.getHexPrefixes())) {
SourcePosition startPosition = walker.getPosition();
String prefix = sourceFile.match(walker.getIndex(), language.getHexPrefixes());
int i = prefix.intsLength();
walker.advance(i);
while (encoding.isDigitOrUnderscore(sourceFile.getCharacter(walker.getIndex()), 16)) {
walker.advance();
i++;
}
while (encoding.isIntegerSuffix(sourceFile.getCharacter(walker.getIndex()))) {
walker.advance();
i++;
}
return new Token(TokenType.HEX_INTEGER, new SourceSnippet(startPosition, i));
}*/ else if (source.matches(walker.getIndex(), language.getHexPrefixes())
|| source.matches(walker.getIndex(), language.getBinPrefixes())
|| encoding.isDigit(source.getCharacter(walker.getIndex()))
// Also check obscure case where float starts with a dot
|| (encoding.isDecimalPoint(source.getCharacter(walker.getIndex())) && encoding.isDigit(source.getCharacter(walker.getIndex() + 1)))) {
SourcePosition startPosition = walker.getPosition();
String prefix = "";
int i = 0;
boolean isFloat = false;
int radix = 10;
if (source.matches(walker.getIndex(), language.getHexPrefixes())) {
prefix = source.match(walker.getIndex(), language.getHexPrefixes());
i = prefix.intsLength();
walker.advance(i);
radix = 16;
} else if (source.matches(walker.getIndex(), language.getBinPrefixes())) {
prefix = source.match(walker.getIndex(), language.getBinPrefixes());
i = prefix.intsLength();
walker.advance(i);
radix = 2;
} else {
if (encoding.isDecimalPoint(source.getCharacter(walker.getIndex()))) {
isFloat = true;
}
walker.advance();
i = 1;
}
while (encoding.isDigitOrUnderscore(source.getCharacter(walker.getIndex()), radix)) {
walker.advance();
i++;
}
if (encoding.isDecimalPoint(source.getCharacter(walker.getIndex()))) {
isFloat = true;
walker.advance();
i++;
while (encoding.isDigitOrUnderscore(source.getCharacter(walker.getIndex()), radix)) {
walker.advance();
i++;
}
}
if (encoding.isExponentSymbol(source.getCharacter(walker.getIndex()))) {
isFloat = true; // Allow floats to be made using exponent alone without decimal point
walker.advance();
i++;
if (encoding.isSign(source.getCharacter(walker.getIndex()))) {
walker.advance();
i++;
}
while (encoding.isDigitOrUnderscore(source.getCharacter(walker.getIndex()), radix)) {
walker.advance();
i++;
}
}
while (encoding.isFloatSuffix(source.getCharacter(walker.getIndex()))) {
isFloat = true; // Allow floats to be made just by adding a float suffix
walker.advance();
i++;
}
if (!isFloat) {
while (encoding.isIntegerSuffix(source.getCharacter(walker.getIndex()))) {
walker.advance();
i++;
}
}
TokenType t;
if (isFloat) {
switch(radix) {
case 2:
t = TokenType.BIN_FLOAT;
break;
case 10:
t = TokenType.DEC_FLOAT;
break;
case 16:
t = TokenType.HEX_FLOAT;
break;
default:
throw new Error("INTERNAL ERROR: Bad base number: " + radix);
}
} else {
switch(radix) {
case 2:
t = TokenType.BIN_INTEGER;
break;
case 10:
t = TokenType.DEC_INTEGER;
break;
case 16:
t = TokenType.HEX_INTEGER;
break;
default:
throw new Error("INTERNAL ERROR: Bad base number: " + radix);
}
}
return new Token(t, new SourceSnippet(startPosition, i));
} else if (source.matches(walker.getIndex(), language.getOperators())) {
SourcePosition startPosition = walker.getPosition();
String op = source.match(walker.getIndex(), language.getOperators());
walker.advance(op.intsLength());
return new Token(TokenType.OPERATOR, new SourceSnippet(startPosition, op.intsLength()));
} else if (encoding.isValidNameHead(source.getCharacter(walker.getIndex()))) {
SourcePosition startPosition = walker.getPosition();
walker.advance();
int i = 1;
int c;
while (((c = source.getCharacter(walker.getIndex())) >= 'a' && c <= 'z') || encoding.isValidNameTail(c)) {
walker.advance();
i++;
}
SourceSnippet s = new SourceSnippet(startPosition, i);
String n = s.getSource();
String[] kws = language.getKeywords();
for (int j = 0; j < kws.length; j++) {
if (n == kws[j]) {
return new Token(TokenType.KEYWORD, s);
}
}
return new Token(TokenType.NAME, s);
} else {
SourcePosition startPosition = walker.getPosition();
//throw new Error("Unexpected character #" + startPosition.getSource().getCharacter(startPosition.getIndex()));
walker.advance();
return new Token(TokenType.UNEXPECTED_CHARACTER, new SourceSnippet(startPosition, 1));
}
}
}

View File

@@ -0,0 +1,115 @@
package slangc.parser;
public final class ScanWalker {
private Scan scan;
private int index;
public ScanWalker(Scan scan, int index) {
reset(scan, index);
}
private ScanWalker() {
}
public ScanWalker(Scan scan) {
reset(scan);
}
public Scan getScan() {
return scan;
}
public int getIndex() {
return index;
}
public void reset(ScanWalker from) {
reset(from.getScan(), from.getIndex());
}
public void reset(Scan scan, int index) {
if (scan.countTokens() < 1) {
throw new Error("This scan is empty");
}
this.scan = scan;
this.index = index;
cachedToken = null;
}
public void reset(Scan scan) {
reset(scan, 0);
}
public void reset() {
reset(scan);
}
Token cachedToken = null;
public void advance() {
cachedToken = null;
//if (!isAtEnd()) {
index++;
//}
}
public void advance(int n) {
for (int i = 0; i < n; i++) {
advance();
}
}
public static ScanWalker[] walkerCache = new ScanWalker[1000];
public static int walkerCount = 0;
public static ScanWalker alloc() {
if (walkerCount > 0) {
walkerCount--;
return walkerCache[walkerCount];
} else {
return new ScanWalker();
}
}
public static void recycle(ScanWalker w) {
if (walkerCount < 1000) {
walkerCache[walkerCount] = w;
walkerCount++;
}
}
public ScanWalker clone() {
ScanWalker w = alloc();
w.reset(this);
return w;
//return new ScanWalker(scan, index);
}
public boolean isAtEnd() {
if (cachedToken == null) {
return index >= scan.countTokens();
} else {
return false;
}
//return cachedToken == null && (index < 0 || index >= scan.countTokens());
}
public Token peek() {
/*if (isAtEnd()) {
return null;
} else {*/
if (cachedToken == null) {
return cachedToken = scan.getToken(index);
}
return cachedToken;
//}
}
public Token peekOffset(int i) {
/*if (index + i < 0 || index + i >= scan.countTokens()) {
return null;
} else {*/
return scan.getToken(index + i);
//}
}
}

134
slangc/parser/Source.sauce Normal file
View File

@@ -0,0 +1,134 @@
package slangc.parser;
//import slang.data.Map;
import slang.data.Mappable;
import slang.vm.Ankh;
/**
* The sourceFile class is a simple abstraction over a sourceFile file, suitable for easily writing a
* scanner on top of. The main differences between this and just using a stream or array or string
* are that it's easier to handle different encodings and access mechanisms this way.
*
* @author Zak
*
*/
public abstract class Source {
private final SourceEncoding encoding;
private final String filename;
public Source(SourceEncoding encoding, String filename) {
this.encoding = encoding;
this.filename = filename;
}
public Source(String filename) {
this(new SourceEncoding(), filename);
}
public Source() {
this("");
}
public SourceEncoding getEncoding() {
return encoding;
}
public String getFilename() {
return filename;
}
public abstract int getIndexLength();
public final boolean isIndexWithinBounds(int index) {
return index >= 0 && index < getIndexLength();
}
public abstract int getCharacter(int index);
public abstract int getNextIndex(int currentIndex);
public String getString(int index, int maximumLength) {
String result = "";
for (int i = 0; i < maximumLength; i++) {
if (isIndexWithinBounds(index)) {
result += getEncoding().getString(getCharacter(index));
index = getNextIndex(index);
}
}
return result;
}
private String[][] matchCache = null;
private int[] matchCheck = new int[1];
private int lastidx = -1;
private int lastch = 0;
public boolean matches(int index, String value) {
if (index != lastidx) {
lastidx = index;
lastch = getCharacter(index);
}
Ankh.stringToArray(value, "COMPAT32", matchCheck);
if (matchCheck[0] != lastch) {
return false;
}
if (matchCache == null) {
matchCache = new String[getIndexLength()][];
}
if (matchCache[index] == null) {
matchCache[index] = new String[100];
}
int len = value.intsLength();
String s = matchCache[index][len];
if (s == null) {
s = getString(index, len);
matchCache[index][len] = s;
}
return s == value;
//return getString(index, value.intsLength()) == value;
}
public boolean matches(int index, String[] values) {
for (int i = 0; i < values.length; i++) {
if (matches(index, values[i])) {
return true;
}
}
return false;
}
public String match(int index, String[] values) {
for (int i = 0; i < values.length; i++) {
if (matches(index, values[i])) {
return values[i];
}
}
return null;
}
public boolean matches(int index, String[][] values) {
for (int i = 0; i < values.length; i++) {
if (matches(index, values[i][0])) {
return true;
}
}
return false;
}
public String match(int index, String[][] values) {
for (int i = 0; i < values.length; i++) {
if (matches(index, values[i][0])) {
return values[i][1];
}
}
return null;
}
@Override
public String toString() {
return Type.of(this).getTypeName() + "(\"" + getFilename() + "\")";
}
}

View File

@@ -0,0 +1,136 @@
package slangc.parser;
import slang.vm.SystemCall;
public class SourceEncoding {
public SourceEncoding() {
// TODO Auto-generated constructor stub
}
public String getEncodingName() {
return "Generic Latin/Unicode+Hieroglyphs";
}
public boolean isNewline(int ch) {
return ch == '\n';
}
public boolean isSpace(int ch) {
return ch == ' ' || ch == '\t' || ch == '\f' || ch == '\r' || isNewline(ch);
}
public boolean isDigit(int ch) {
return isDigit(ch, 10);
}
public boolean isDigitOrUnderscore(int ch) {
return isDigit(ch) || ch == '_';
}
public boolean isDigit(int ch, int base) {
if (base != 2 && base != 10 && base != 16) {
throw new Error("Bad base (expected 10 or 16): " + base);
}
if (ch == '0' || ch == '1' || ch == 0x660/*'٠'*/ || ch == 0x661/*'١'*/) {
return true;
} else if ((ch >= '2' && ch <= '9') || (ch >= 0x662/*'٢'*/ && ch <= 0x669 /*'٩'*/)) {
return base >= 10;
} else if ((ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')) {
return base >= 16;
} else if (ch == 'ⲁ' || ch == 'ⲃ' || ch == '' || ch == 'ⲇ' || ch == 'ⲋ' || ch == 'ⲉ') {
return base >= 16;
} else {
return false;
}
}
public boolean isDigitOrUnderscore(int ch, int base) {
return isDigit(ch, base) || ch == '_';
}
public boolean isDecimalPoint(int ch) {
return ch == '.';
}
public boolean isExponentSymbol(int ch) {
return ch == 'e' || ch == 'E' || ch == 'p' || ch == 'P'; // p/P forms used in hex floats
}
public boolean isSign(int ch) {
return ch == '+' || ch == '-';
}
public boolean isIntegerSuffix(int ch) {
return ch == 'L' || ch == 'l' || ch == 'U' || ch == 'u';
}
public boolean isFloatSuffix(int ch) {
return ch == 'F' || ch == 'f' || ch == 'D' || ch == 'd';
}
public boolean isAlphaNubian(int ch) {
switch (ch) {
case 'ⲁ':
case 'ⲃ':
case '':
case 'ⲇ':
case 'ⲉ':
case 'ⲋ':
case 'ⲍ':
case 'ⲏ':
case 'ⲑ':
case 'ⲓ':
case 'ⲕ':
case 'ⲗ':
case 'ⲙ':
case 'ⲛ':
case 'ⲝ':
case '':
case 'ⲡ':
case '':
case '':
case 'ⲧ':
case 'ⲩ':
case 'ⲫ':
case 'ⲭ':
case 'ⲯ':
case 'ⲱ':
case 'ϣ':
case 'ⳡ'://ⳡⲁⲓ??
//case 'ⲁ'://ϩⲁⲓ???
case 'ϩ':
case 'ⳝ':
//case 'ⲧ̄'://ⲧⲓ??
//case ''://??
case 'ⳟ':
//case 'ⲁ': //ⲛⲓ?
//ⲱⲁⲱ?
return true;
default:
return false;
}
}
public boolean isAlphabetical(int ch) {
return (SystemCall.characterFlags(ch) & 1) == 1;
throw new Error("TODO: Alphabetical handling"); //return Character.isAlphabetic(ch);//(ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
}
public boolean isValidNameHead(int ch) {
return isAlphabetical(ch) || ch == '_' || ch == '$'
/* Cleopatra said we're allowed to use hieroglyphs. Ase. */
|| (ch >= 0x13000 && ch <= 0x13430) || ch == 0x2625 // TODO: Fix hex support (breaks if ends with F/D due to confusion with float/double tags?)
/* Nubian was my call. I wanted it in the first version.*/
|| isAlphaNubian(ch);
}
public boolean isValidNameTail(int ch) {
return isValidNameHead(ch) || isDigit(ch);
}
public String getString(int ch) {
return String.construct(new int[]{ch});
}
}

View File

@@ -0,0 +1,46 @@
package slangc.parser;
public final class SourcePosition {
private final Source source;
private final int index, line, character;
public SourcePosition(Source source, int index, int line, int character) {
this.source = source;
this.index = index;
this.line = line;
this.character = character;
}
public SourcePosition(Source source) {
this(source, 0, 1, 1);
}
public SourcePosition clone() {
return new SourcePosition(source, index, line, character);
}
public Source getSource() {
return source;
}
public int getIndex() {
return index;
}
public int getLineCount() {
return line;
}
public int getCharacterCount() {
return character;
}
public boolean isAtEnd() {
return !source.isIndexWithinBounds(index);
}
@Override
public String toString() {
return "SourcePosition(" + source.toString() + ", " + index + ", " + line + ", " + character + ")";
}
}

View File

@@ -0,0 +1,36 @@
package slangc.parser;
public final class SourceSnippet {
private SourcePosition start;
private int length;
private String cacheValue = null;
public SourceSnippet(SourcePosition start, int length) {
reset(start, length);
}
public void reset(SourcePosition start, int length) {
this.start = start;
this.length = length;
}
public SourcePosition getStart() {
return start;
}
public int getLength() {
return length;
}
public String getSource() {
if (cacheValue == null) {
cacheValue = start.getSource().getString(start.getIndex(), length);
}
return cacheValue;
}
@Override
public String toString() {
return "SourceSnippet(" + start.toString() + ", " + length + "): \"" + getSource() + "\"";
}
}

View File

@@ -0,0 +1,82 @@
package slangc.parser;
/**
* Represents a moving point within a sourceFile file, i.e. the point currently being scanned.
* This is held in it's own class to make it easier for the scanner to be modified/reimplemented.
*
* @author Zak
*
*/
public final class SourceWalker {
private Source source;
private int index, line, character;
public SourceWalker(Source source, int index, int line, int character) {
reset(source, index, line, character);
}
public SourceWalker(Source source) {
reset(source);
}
public SourceWalker clone() {
return new SourceWalker(source, index, line, character);
}
public SourcePosition getPosition() {
return new SourcePosition(source, index, line, character);
}
public Source getSource() {
return source;
}
public int getIndex() {
return index;
}
public int getLineCount() {
return line;
}
public int getCharacterCount() {
return character;
}
public void reset(Source source, int index, int line, int character) {
this.source = source;
this.index = index;
this.line = line;
this.character = character;
}
public void reset(Source source) {
reset(source, 0, 1, 1);
}
public void reset() {
reset(source);
}
public boolean isAtEnd() {
return !source.isIndexWithinBounds(index);
}
public void advance() {
if (!isAtEnd()) {
if (source.getEncoding().isNewline(source.getCharacter(index))) {
line++;
character = 1;
} else {
character++;
}
index++;
}
}
public void advance(int numberOfCharacters) {
for (int i = 0; i < numberOfCharacters; i++) {
advance();
}
}
}

126
slangc/parser/Token.sauce Normal file
View File

@@ -0,0 +1,126 @@
package slangc.parser;
import slang.streams.SyncOutput;
import slang.streams.SyncInput;
public final class Token extends Node {
private Scan owner;
private TokenType type;
private SourceSnippet snippet;
public Token(TokenType type, SourceSnippet snippet) {
reset(type, snippet);
}
public static void encInt(byte[] t, int index, int val) {
t[index] = (byte) val;
t[index+1] = (byte) (val >> 8);
t[index+2] = (byte) (val >> 16);
t[index+3] = (byte) (val >> 24);
}
public static int decInt(byte[] t, int index) {
int v = 0;
v |= (((int)t[index]) & 0xFF);
v |= ((((int)t[index+1]) & 0xFF) << 8);
v |= ((((int)t[index+2]) & 0xFF) << 16);
v |= ((((int)t[index+3]) & 0xFF) << 24);
return v;
}
static byte[] outbuffer = new byte[1 + 1 + 4 + 4 + 4 + 4];
public static Token loadBinaryShort(Scan scn, TokenType typ, SyncInput<byte> inp) {
inp.readBuffer(outbuffer, 0, 6); // First byte is already decoded as the typ argument
int idx = (((int)outbuffer[0]) & 0xFF) | ((((int)outbuffer[1]) & 0xFF) << 8);
int lin = (((int)outbuffer[2]) & 0xFF) | ((((int)outbuffer[3]) & 0xFF) << 8);
int chr = ((int)outbuffer[4]) & 0xFF;
int len = ((int)outbuffer[5]) & 0xFF;
Token t = new Token(typ, new SourceSnippet(new SourcePosition(scn.getSource(),idx,lin,chr), len));
t.setOwner(scn);
return t;
}
public static Token loadBinaryLong(Scan scn, TokenType typ, SyncInput<byte> inp) {
inp.readBuffer(outbuffer, 0, 16); // First bytes are already decoded as the typ argument
int idx = decInt(outbuffer, 0);
int lin = decInt(outbuffer, 4);
int chr = decInt(outbuffer, 8);
int len = decInt(outbuffer, 12);
Token t = new Token(typ, new SourceSnippet(new SourcePosition(scn.getSource(),idx,lin,chr), len));
t.setOwner(scn);
return t;
}
public void dumpBinary(SyncOutput<byte> o) {
if (type.value > 100) {
throw new Error("Too many token types for this simple format!");
}
if (owner != null) owner.reset();//owner = null; // Owner will be cleared in cached runs
if (snippet.start.index > 65535 || snippet.start.line > 65535 || snippet.start.character > 255 || snippet.length > 255) {
outbuffer[0] = (byte) (-128);
outbuffer[1] = (byte) (-(type.value+1));
encInt(outbuffer, 2, snippet.start.index);
encInt(outbuffer, 6, snippet.start.line);
encInt(outbuffer, 10, snippet.start.character);
encInt(outbuffer, 14, snippet.length);
o.writeBuffer(outbuffer, 0, 18);
} else {
outbuffer[0] = (byte) (-(type.value+1));
outbuffer[1] = (byte) snippet.start.index;
outbuffer[2] = (byte) (snippet.start.index >> 8);
outbuffer[3] = (byte) snippet.start.line;
outbuffer[4] = (byte) (snippet.start.line >> 8);
outbuffer[5] = (byte) snippet.start.character;
outbuffer[6] = (byte) snippet.length;
o.writeBuffer(outbuffer, 0, 7);
}
//throw new Error("Can't dump AST nodes of type " + Type.of(this).fullName());
}
public void setOwner(Scan scan) {
this.owner = scan;
}
public Scan getOwner() {
return owner;
}
public void reset(TokenType type, SourceSnippet snippet) {
this.type = type;
this.snippet = snippet;
}
public TokenType getType() {
return type;
}
public SourceSnippet getSnippet() {
return snippet;
}
public boolean is(TokenType t) {
return type == t;
}
public boolean is(TokenType t, String match) {
return type == t && snippet.getSource().equals(match);
}
@Override
public String toString() { // TODO: Refactor
return "Token(" + type.name() + ", " + snippet.toString() + ")";
}
@Override
public NodeType getNodeType() {
return NodeType.TOKEN;
}
public boolean is(String value) {
return snippet.getSource().equals(value);
}
}

View File

@@ -0,0 +1,23 @@
package slangc.parser;
public enum TokenType {
UNEXPECTED_CHARACTER,
END_OF_FILE,
LINE_COMMENT,
LONG_COMMENT,
DEC_INTEGER,
HEX_INTEGER,
BIN_INTEGER,
DEC_FLOAT,
HEX_FLOAT,
BIN_FLOAT,
STRING,
CHAR,
NAME,
OPERATOR,
KEYWORD
}

View File

@@ -0,0 +1,23 @@
package slangc.parser;
public final class WarningAnnotation extends Annotation {
private final WarningType warningType;
public WarningAnnotation(WarningType warningType) {
this.warningType = warningType;
}
public WarningType getWarningType() {
return warningType;
}
@Override
public String toString() {
return "WarningAnnotation(" + warningType + ")";
}
@Override
public AnnotationType getAnnotationType() {
return AnnotationType.WARNING;
}
}

View File

@@ -0,0 +1,5 @@
package slangc.parser;
public enum WarningType {
INTERNAL_WARNING,
}