Scan.java 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. import java.util.regex.*;
  2. import java.util.*;
  3. import java.io.*;
  4. public class Scan implements IScan {
  5. private BufferedReader rdr; // get input from here, line by line
  6. private String s; // current string being scanned
  7. private int start; // starting position in the string to scan
  8. private int end; // ending position
  9. public int lno; // current line number
  10. public Token tok; // this is persistent across all calls to cur()
  11. public Token lineMode; // token to toggle line mode
  12. // create a scanner object on a buffered reader
  13. public Scan(BufferedReader rdr) {
  14. this.rdr = rdr;
  15. this.lno = 0;
  16. this.lineMode = null;
  17. this.s = null;
  18. this.tok = null;
  19. // force the enum Match class to compile its patterns
  20. String msg = Token.Match.init();
  21. if (msg != null) {
  22. // one or more pattern compilation errors have occurred
  23. System.err.println(msg);
  24. System.exit(1);
  25. }
  26. }
  27. // create a scanner object on a string
  28. public Scan(String s) {
  29. this(new BufferedReader(new StringReader(s)));
  30. }
  31. public void reset() {
  32. // force the scanner to process the next line
  33. s = null;
  34. tok = null;
  35. lineMode = null;
  36. }
  37. // fill the string buffer from the reader if it's exhausted or null)
  38. public void fillString() {
  39. if (s == null || start >= end) {
  40. // get the next line from the reader
  41. try {
  42. s = rdr.readLine();
  43. if (s == null)
  44. return; // end of file
  45. lno++;
  46. s += "\n"; // make sure the string has a newline
  47. start = 0;
  48. end = s.length();
  49. } catch (IOException e) {
  50. s = null;
  51. }
  52. // System.err.print("s=" + s);
  53. }
  54. }
  55. public Token cur() {
  56. // lazy
  57. if (tok != null)
  58. return tok; // don't get a new token if we already have one
  59. String matchString = "";
  60. Token.Match matchFound = null;
  61. LOOP:
  62. while (true) {
  63. fillString(); // get another line if necessary
  64. if (s == null) {
  65. tok = new Token(Token.$eof, "!EOF", lno, null); // EOF
  66. return tok;
  67. }
  68. // s cannot be null here
  69. // are we in line mode?
  70. if (lineMode != null) {
  71. Pattern cpat = lineMode.match.cPattern;
  72. Matcher m = cpat.matcher(s);
  73. m.region(0,end);
  74. start = end; // consume the line before next match
  75. if (m.lookingAt()) {
  76. // found the lineMode token, exit line mode
  77. // and return the matched lineMode token
  78. // System.out.println("leaving line mode...");
  79. tok = new Token(lineMode.match, m.group(), lno, s);
  80. lineMode = null;
  81. return tok;
  82. } else {
  83. // return the entire line as a token
  84. tok = new Token(Token.Match.$LINE, s, lno, s);
  85. return tok;
  86. }
  87. }
  88. int matchEnd = start; // current end of match
  89. for (Token.Match match : Token.Match.values()) {
  90. Pattern cpat = match.cPattern;
  91. if (cpat == null)
  92. break; // nothing matches, so can't find a token
  93. if (match.tokType == Token.TokType.SKIP && matchFound != null)
  94. continue; // ignore skips if we have a pending token
  95. if (start != 0 && match.pattern.charAt(0) == '^')
  96. continue; // '^' must match at start of line
  97. Matcher m = cpat.matcher(s);
  98. m.region(start, end);
  99. if (m.lookingAt()) {
  100. int e = m.end();
  101. if (e == start)
  102. continue; // empty match, so try next pattern
  103. if (match.tokType == Token.TokType.SKIP) {
  104. // there's a non-empty skip match,
  105. // so we skip over the matched part
  106. // and get more stuff to read
  107. start = e;
  108. continue LOOP;
  109. }
  110. if (matchEnd < e) {
  111. // found a longer match -- keep it!
  112. matchEnd = e;
  113. matchString = m.group();
  114. matchFound = match;
  115. }
  116. }
  117. }
  118. if (matchFound == null) { // got to $ERROR, so nothing matches!!
  119. char ch = s.charAt(start++); // grab the char and advance
  120. String sch;
  121. if (ch >= ' ' && ch <= '~')
  122. sch = String.format("\"%c\"", ch);
  123. else
  124. sch = String.format("\\u%04x", (int)ch);
  125. tok = new Token(Token.Match.$ERROR, "!ERROR("+sch+")", lno, s);
  126. return tok;
  127. }
  128. start = matchEnd; // start of next token match
  129. // matchString is the matching string
  130. tok = new Token(matchFound, matchString, lno, s); // persistent
  131. // System.out.println(String.format("match=%s\n", toggle));
  132. if (matchFound.tokType == Token.TokType.LINE_TOGGLE) {
  133. // System.out.println("going to line mode...");
  134. start = end; // swallow the rest of the line
  135. lineMode = tok;
  136. }
  137. return tok;
  138. }
  139. }
  140. public void adv() {
  141. // if we have already advanced past the current token,
  142. // we'll have to do it again
  143. if (tok == null)
  144. cur();
  145. tok = null;
  146. }
  147. public void put(Token t) {
  148. throw new PLCCException("PLCC Scan error",
  149. "put not implemented");
  150. }
  151. // See if the expected token match is the same as the match
  152. // of the current token
  153. public Token match(Token.Match match, Trace trace) {
  154. Token t = cur();
  155. Token.Match mcur = t.match; // the token we got
  156. if (match == mcur) { // compare the match expected with the token we got
  157. if (trace != null)
  158. trace.print(t);
  159. adv();
  160. } else {
  161. String msg = "expected token " + match + ", got " + t.errString();
  162. throw new PLCCException ("Parse error", msg);
  163. }
  164. return t;
  165. }
  166. public boolean isEOF() {
  167. return cur().isEOF();
  168. }
  169. public void printTokens() {
  170. while (hasNext()) {
  171. Token t = next();
  172. String s;
  173. switch(t.match) {
  174. case $ERROR:
  175. s = t.toString();
  176. break;
  177. default:
  178. s = String.format("%s '%s'", t.match.toString(), t.toString());
  179. }
  180. System.out.println(String.format("%4d: %s", lno, s));
  181. }
  182. }
  183. public boolean hasNext() {
  184. return !cur().isEOF();
  185. }
  186. public Token next() {
  187. Token t = cur();
  188. adv();
  189. return t;
  190. }
  191. public static void main(String [] args) {
  192. BufferedReader rdr = null;
  193. if (args.length == 0) {
  194. rdr = new BufferedReader(new InputStreamReader(System.in));
  195. } else if (args.length == 1) {
  196. try {
  197. rdr = new BufferedReader(new FileReader(args[0]));
  198. } catch (Exception e) {
  199. System.out.println(e.getMessage());
  200. System.exit(1);
  201. }
  202. }
  203. else {
  204. System.err.println("usage: Scan [filename]");
  205. System.exit(1);
  206. }
  207. Scan scn = new Scan(rdr);
  208. scn.printTokens();
  209. }
  210. }