|
@@ -0,0 +1,228 @@
|
|
|
|
|
+import java.util.regex.*;
|
|
|
|
|
+import java.util.*;
|
|
|
|
|
+import java.io.*;
|
|
|
|
|
+
|
|
|
|
|
+public class Scan implements IScan {
|
|
|
|
|
+
|
|
|
|
|
+ private BufferedReader rdr; // get input from here, line by line
|
|
|
|
|
+ private String s; // current string being scanned
|
|
|
|
|
+ private int start; // starting position in the string to scan
|
|
|
|
|
+ private int end; // ending position
|
|
|
|
|
+
|
|
|
|
|
+ public int lno; // current line number
|
|
|
|
|
+ public Token tok; // this is persistent across all calls to cur()
|
|
|
|
|
+ public Token lineMode; // token to toggle line mode
|
|
|
|
|
+
|
|
|
|
|
+ // create a scanner object on a buffered reader
|
|
|
|
|
+ public Scan(BufferedReader rdr) {
|
|
|
|
|
+ this.rdr = rdr;
|
|
|
|
|
+ this.lno = 0;
|
|
|
|
|
+ this.lineMode = null;
|
|
|
|
|
+ this.s = null;
|
|
|
|
|
+ this.tok = null;
|
|
|
|
|
+ // force the enum Match class to compile its patterns
|
|
|
|
|
+ String msg = Token.Match.init();
|
|
|
|
|
+ if (msg != null) {
|
|
|
|
|
+ // one or more pattern compilation errors have occurred
|
|
|
|
|
+ System.err.println(msg);
|
|
|
|
|
+ System.exit(1);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // create a scanner object on a string
|
|
|
|
|
+ public Scan(String s) {
|
|
|
|
|
+ this(new BufferedReader(new StringReader(s)));
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ public void reset() {
|
|
|
|
|
+ // force the scanner to process the next line
|
|
|
|
|
+ s = null;
|
|
|
|
|
+ tok = null;
|
|
|
|
|
+ lineMode = null;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // fill the string buffer from the reader if it's exhausted or null)
|
|
|
|
|
+ public void fillString() {
|
|
|
|
|
+ if (s == null || start >= end) {
|
|
|
|
|
+ // get the next line from the reader
|
|
|
|
|
+ try {
|
|
|
|
|
+ s = rdr.readLine();
|
|
|
|
|
+ if (s == null)
|
|
|
|
|
+ return; // end of file
|
|
|
|
|
+ lno++;
|
|
|
|
|
+ s += "\n"; // make sure the string has a newline
|
|
|
|
|
+ start = 0;
|
|
|
|
|
+ end = s.length();
|
|
|
|
|
+ } catch (IOException e) {
|
|
|
|
|
+ s = null;
|
|
|
|
|
+ }
|
|
|
|
|
+ // System.err.print("s=" + s);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ public Token cur() {
|
|
|
|
|
+ // lazy
|
|
|
|
|
+ if (tok != null)
|
|
|
|
|
+ return tok; // don't get a new token if we already have one
|
|
|
|
|
+
|
|
|
|
|
+ String matchString = "";
|
|
|
|
|
+ Token.Match matchFound = null;
|
|
|
|
|
+
|
|
|
|
|
+ LOOP:
|
|
|
|
|
+ while (true) {
|
|
|
|
|
+ fillString(); // get another line if necessary
|
|
|
|
|
+ if (s == null) {
|
|
|
|
|
+ tok = new Token(Token.$eof, "!EOF", lno, null); // EOF
|
|
|
|
|
+ return tok;
|
|
|
|
|
+ }
|
|
|
|
|
+ // s cannot be null here
|
|
|
|
|
+ // are we in line mode?
|
|
|
|
|
+ if (lineMode != null) {
|
|
|
|
|
+ Pattern cpat = lineMode.match.cPattern;
|
|
|
|
|
+ Matcher m = cpat.matcher(s);
|
|
|
|
|
+ m.region(0,end);
|
|
|
|
|
+ start = end; // consume the line before next match
|
|
|
|
|
+ if (m.lookingAt()) {
|
|
|
|
|
+ // found the lineMode token, exit line mode
|
|
|
|
|
+ // and return the matched lineMode token
|
|
|
|
|
+ // System.out.println("leaving line mode...");
|
|
|
|
|
+ tok = new Token(lineMode.match, m.group(), lno, s);
|
|
|
|
|
+ lineMode = null;
|
|
|
|
|
+ return tok;
|
|
|
|
|
+ } else {
|
|
|
|
|
+ // return the entire line as a token
|
|
|
|
|
+ tok = new Token(Token.Match.$LINE, s, lno, s);
|
|
|
|
|
+ return tok;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ int matchEnd = start; // current end of match
|
|
|
|
|
+ for (Token.Match match : Token.Match.values()) {
|
|
|
|
|
+ Pattern cpat = match.cPattern;
|
|
|
|
|
+ if (cpat == null)
|
|
|
|
|
+ break; // nothing matches, so can't find a token
|
|
|
|
|
+ if (match.tokType == Token.TokType.SKIP && matchFound != null)
|
|
|
|
|
+ continue; // ignore skips if we have a pending token
|
|
|
|
|
+ if (start != 0 && match.pattern.charAt(0) == '^')
|
|
|
|
|
+ continue; // '^' must match at start of line
|
|
|
|
|
+ Matcher m = cpat.matcher(s);
|
|
|
|
|
+ m.region(start, end);
|
|
|
|
|
+ if (m.lookingAt()) {
|
|
|
|
|
+ int e = m.end();
|
|
|
|
|
+ if (e == start)
|
|
|
|
|
+ continue; // empty match, so try next pattern
|
|
|
|
|
+ if (match.tokType == Token.TokType.SKIP) {
|
|
|
|
|
+ // there's a non-empty skip match,
|
|
|
|
|
+ // so we skip over the matched part
|
|
|
|
|
+ // and get more stuff to read
|
|
|
|
|
+ start = e;
|
|
|
|
|
+ continue LOOP;
|
|
|
|
|
+ }
|
|
|
|
|
+ if (matchEnd < e) {
|
|
|
|
|
+ // found a longer match -- keep it!
|
|
|
|
|
+ matchEnd = e;
|
|
|
|
|
+ matchString = m.group();
|
|
|
|
|
+ matchFound = match;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ if (matchFound == null) { // got to $ERROR, so nothing matches!!
|
|
|
|
|
+ char ch = s.charAt(start++); // grab the char and advance
|
|
|
|
|
+ String sch;
|
|
|
|
|
+ if (ch >= ' ' && ch <= '~')
|
|
|
|
|
+ sch = String.format("\"%c\"", ch);
|
|
|
|
|
+ else
|
|
|
|
|
+ sch = String.format("\\u%04x", (int)ch);
|
|
|
|
|
+ tok = new Token(Token.Match.$ERROR, "!ERROR("+sch+")", lno, s);
|
|
|
|
|
+ return tok;
|
|
|
|
|
+ }
|
|
|
|
|
+ start = matchEnd; // start of next token match
|
|
|
|
|
+ // matchString is the matching string
|
|
|
|
|
+ tok = new Token(matchFound, matchString, lno, s); // persistent
|
|
|
|
|
+ // System.out.println(String.format("match=%s\n", toggle));
|
|
|
|
|
+ if (matchFound.tokType == Token.TokType.LINE_TOGGLE) {
|
|
|
|
|
+ // System.out.println("going to line mode...");
|
|
|
|
|
+ start = end; // swallow the rest of the line
|
|
|
|
|
+ lineMode = tok;
|
|
|
|
|
+ }
|
|
|
|
|
+ return tok;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ public void adv() {
|
|
|
|
|
+ // if we have already advanced past the current token,
|
|
|
|
|
+ // we'll have to do it again
|
|
|
|
|
+ if (tok == null)
|
|
|
|
|
+ cur();
|
|
|
|
|
+ tok = null;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ public void put(Token t) {
|
|
|
|
|
+ throw new PLCCException("PLCC Scan error",
|
|
|
|
|
+ "put not implemented");
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // See if the expected token match is the same as the match
|
|
|
|
|
+ // of the current token
|
|
|
|
|
+ public Token match(Token.Match match, Trace trace) {
|
|
|
|
|
+ Token t = cur();
|
|
|
|
|
+ Token.Match mcur = t.match; // the token we got
|
|
|
|
|
+ if (match == mcur) { // compare the match expected with the token we got
|
|
|
|
|
+ if (trace != null)
|
|
|
|
|
+ trace.print(t);
|
|
|
|
|
+ adv();
|
|
|
|
|
+ } else {
|
|
|
|
|
+ String msg = "expected token " + match + ", got " + t.errString();
|
|
|
|
|
+ throw new PLCCException ("Parse error", msg);
|
|
|
|
|
+ }
|
|
|
|
|
+ return t;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ public boolean isEOF() {
|
|
|
|
|
+ return cur().isEOF();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ public void printTokens() {
|
|
|
|
|
+ while (hasNext()) {
|
|
|
|
|
+ Token t = next();
|
|
|
|
|
+ String s;
|
|
|
|
|
+ switch(t.match) {
|
|
|
|
|
+ case $ERROR:
|
|
|
|
|
+ s = t.toString();
|
|
|
|
|
+ break;
|
|
|
|
|
+ default:
|
|
|
|
|
+ s = String.format("%s '%s'", t.match.toString(), t.toString());
|
|
|
|
|
+ }
|
|
|
|
|
+ System.out.println(String.format("%4d: %s", lno, s));
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ public boolean hasNext() {
|
|
|
|
|
+ return !cur().isEOF();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ public Token next() {
|
|
|
|
|
+ Token t = cur();
|
|
|
|
|
+ adv();
|
|
|
|
|
+ return t;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ public static void main(String [] args) {
|
|
|
|
|
+ BufferedReader rdr = null;
|
|
|
|
|
+ if (args.length == 0) {
|
|
|
|
|
+ rdr = new BufferedReader(new InputStreamReader(System.in));
|
|
|
|
|
+ } else if (args.length == 1) {
|
|
|
|
|
+ try {
|
|
|
|
|
+ rdr = new BufferedReader(new FileReader(args[0]));
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ System.out.println(e.getMessage());
|
|
|
|
|
+ System.exit(1);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ else {
|
|
|
|
|
+ System.err.println("usage: Scan [filename]");
|
|
|
|
|
+ System.exit(1);
|
|
|
|
|
+ }
|
|
|
|
|
+ Scan scn = new Scan(rdr);
|
|
|
|
|
+ scn.printTokens();
|
|
|
|
|
+ }
|
|
|
|
|
+}
|