slcom/slangc/parser/SourceEncoding.sauce

137 lines
3.3 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package slangc.parser;
import slang.vm.SystemCall;
public class SourceEncoding {
public SourceEncoding() {
// TODO Auto-generated constructor stub
}
public String getEncodingName() {
return "Generic Latin/Unicode+Hieroglyphs";
}
public boolean isNewline(int ch) {
return ch == '\n';
}
public boolean isSpace(int ch) {
return ch == ' ' || ch == '\t' || ch == '\f' || ch == '\r' || isNewline(ch);
}
public boolean isDigit(int ch) {
return isDigit(ch, 10);
}
public boolean isDigitOrUnderscore(int ch) {
return isDigit(ch) || ch == '_';
}
public boolean isDigit(int ch, int base) {
if (base != 2 && base != 10 && base != 16) {
throw new Error("Bad base (expected 10 or 16): " + base);
}
if (ch == '0' || ch == '1' || ch == 0x660/*'٠'*/ || ch == 0x661/*'١'*/) {
return true;
} else if ((ch >= '2' && ch <= '9') || (ch >= 0x662/*'٢'*/ && ch <= 0x669 /*'٩'*/)) {
return base >= 10;
} else if ((ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')) {
return base >= 16;
} else if (ch == 'ⲁ' || ch == 'ⲃ' || ch == '' || ch == 'ⲇ' || ch == 'ⲋ' || ch == 'ⲉ') {
return base >= 16;
} else {
return false;
}
}
public boolean isDigitOrUnderscore(int ch, int base) {
return isDigit(ch, base) || ch == '_';
}
public boolean isDecimalPoint(int ch) {
return ch == '.';
}
public boolean isExponentSymbol(int ch) {
return ch == 'e' || ch == 'E' || ch == 'p' || ch == 'P'; // p/P forms used in hex floats
}
public boolean isSign(int ch) {
return ch == '+' || ch == '-';
}
public boolean isIntegerSuffix(int ch) {
return ch == 'L' || ch == 'l' || ch == 'U' || ch == 'u';
}
public boolean isFloatSuffix(int ch) {
return ch == 'F' || ch == 'f' || ch == 'D' || ch == 'd';
}
public boolean isAlphaNubian(int ch) {
switch (ch) {
case 'ⲁ':
case 'ⲃ':
case '':
case 'ⲇ':
case 'ⲉ':
case 'ⲋ':
case 'ⲍ':
case 'ⲏ':
case 'ⲑ':
case 'ⲓ':
case 'ⲕ':
case 'ⲗ':
case 'ⲙ':
case 'ⲛ':
case 'ⲝ':
case '':
case 'ⲡ':
case '':
case '':
case 'ⲧ':
case 'ⲩ':
case 'ⲫ':
case 'ⲭ':
case 'ⲯ':
case 'ⲱ':
case 'ϣ':
case 'ⳡ'://ⳡⲁⲓ??
//case 'ⲁ'://ϩⲁⲓ???
case 'ϩ':
case 'ⳝ':
//case 'ⲧ̄'://ⲧⲓ??
//case ''://??
case 'ⳟ':
//case 'ⲁ': //ⲛⲓ?
//ⲱⲁⲱ?
return true;
default:
return false;
}
}
public boolean isAlphabetical(int ch) {
return (SystemCall.characterFlags(ch) & 1) == 1;
throw new Error("TODO: Alphabetical handling"); //return Character.isAlphabetic(ch);//(ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
}
public boolean isValidNameHead(int ch) {
return isAlphabetical(ch) || ch == '_' || ch == '$'
/* Cleopatra said we're allowed to use hieroglyphs. Ase. */
|| (ch >= 0x13000 && ch <= 0x13430) || ch == 0x2625 // TODO: Fix hex support (breaks if ends with F/D due to confusion with float/double tags?)
/* Nubian was my call. I wanted it in the first version.*/
|| isAlphaNubian(ch);
}
public boolean isValidNameTail(int ch) {
return isValidNameHead(ch) || isDigit(ch);
}
public String getString(int ch) {
return String.construct(new int[]{ch});
}
}