Token.java 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. import java.util.*;
  2. import java.util.regex.*;
  3. // Token class with match patterns (used with the built-in Scan class)
  4. public class Token {
  5. // patternFail is set to an error message string
  6. // if there are pattern compile errors
  7. public static String patternFail = null; //
  8. public static final Match $eof = Match.$EOF;
  9. public enum TokType {
  10. TOKEN,
  11. SKIP,
  12. LINE_TOGGLE,
  13. SPECIAL;
  14. }
  15. public enum Match {
  16. WHITESPACE ("\\s+", TokType.SKIP),
  17. NUM ("[0-9]+"),
  18. VAR ("[A-Za-z_][A-Za-z0-9_]*"),
  19. OP ("\\+|-"),
  20. MOP ("\\*|/"),
  21. IN ("==>"),
  22. OUT ("<=="),
  23. ASSIGN ("->"),
  24. SEMI (";"),
  25. $ERROR (null),
  26. $EOF (null),
  27. $LINE (null);
  28. public String pattern;
  29. public TokType tokType;
  30. public Pattern cPattern = null; // compiled pattern
  31. // a SPECIAL token type or a TOKEN/LINE_TOGGLE
  32. Match(String pattern) {
  33. this(pattern, null);
  34. }
  35. // legacy ??
  36. Match(String pattern, boolean skip) {
  37. this(pattern, TokType.SKIP);
  38. }
  39. Match(String pattern, TokType tokType) {
  40. if (pattern != null) {
  41. if (tokType == TokType.SKIP) {
  42. this.tokType = TokType.SKIP;
  43. } else if (pattern.length() >= 2 &&
  44. pattern.substring(0,2).equals("^^")) {
  45. pattern = pattern.substring(1);
  46. this.tokType = TokType.LINE_TOGGLE;
  47. } else {
  48. this.tokType = TokType.TOKEN;
  49. }
  50. this.pattern = pattern;
  51. try {
  52. this.cPattern = Pattern.compile(pattern, Pattern.DOTALL);
  53. } catch (PatternSyntaxException e) {
  54. if (patternFail == null) {
  55. patternFail = "Lexical specification errors() for";
  56. }
  57. patternFail += (" " +this);
  58. this.cPattern = null;
  59. }
  60. } else {
  61. this.tokType = TokType.SPECIAL; // SPECIAL
  62. }
  63. }
  64. // Use this to force loading Match class to compile patterns.
  65. public static String init() {
  66. return patternFail; // returns null if no errors
  67. }
  68. }
  69. public Match match; // token match
  70. public String str; // this token's lexeme
  71. public int lno; // the line number where this token was found
  72. public String line; // the text line where this token appears
  73. public Token() {
  74. match = null;
  75. str = null;
  76. lno = 0;
  77. line = null;
  78. }
  79. public Token(Match match, String str, int lno, String line) {
  80. this.match = match;
  81. this.str = str;
  82. this.lno = lno;
  83. this.line = line;
  84. }
  85. public Token(Match match, String str) {
  86. this(match, str, 0, null);
  87. }
  88. public String toString() {
  89. return str;
  90. }
  91. public String errString() {
  92. switch(match) {
  93. case $EOF:
  94. case $ERROR:
  95. return str;
  96. default:
  97. return match.toString(); // just the match name
  98. }
  99. }
  100. public boolean isEOF() {
  101. return this.match == $eof;
  102. }
  103. public static void main(String [] args) {
  104. String msg = Match.init();
  105. if (msg != null) {
  106. System.out.println(msg);
  107. System.exit(1);
  108. }
  109. for (Match match: Match.values()) {
  110. if (match.tokType == TokType.SPECIAL) {
  111. System.out.println(
  112. String.format("special "+match.toString())
  113. );
  114. continue; // not a real token
  115. }
  116. String what = "??";
  117. switch(match.tokType) {
  118. case SKIP:
  119. what = "skip";
  120. break;
  121. case TOKEN:
  122. what = "token";
  123. break;
  124. case LINE_TOGGLE:
  125. what = "token (line toggle)";
  126. break;
  127. }
  128. System.out.println(
  129. String.format("%s %s '%s'",what,match.toString(),match.pattern)
  130. );
  131. }
  132. }
  133. //Token//
  134. }