| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228 |
- import java.util.regex.*;
- import java.util.*;
- import java.io.*;
- public class Scan implements IScan {
- private BufferedReader rdr; // get input from here, line by line
- private String s; // current string being scanned
- private int start; // starting position in the string to scan
- private int end; // ending position
- public int lno; // current line number
- public Token tok; // this is persistent across all calls to cur()
- public Token lineMode; // token to toggle line mode
- // create a scanner object on a buffered reader
- public Scan(BufferedReader rdr) {
- this.rdr = rdr;
- this.lno = 0;
- this.lineMode = null;
- this.s = null;
- this.tok = null;
- // force the enum Match class to compile its patterns
- String msg = Token.Match.init();
- if (msg != null) {
- // one or more pattern compilation errors have occurred
- System.err.println(msg);
- System.exit(1);
- }
- }
- // create a scanner object on a string
- public Scan(String s) {
- this(new BufferedReader(new StringReader(s)));
- }
- public void reset() {
- // force the scanner to process the next line
- s = null;
- tok = null;
- lineMode = null;
- }
- // fill the string buffer from the reader if it's exhausted or null)
- public void fillString() {
- if (s == null || start >= end) {
- // get the next line from the reader
- try {
- s = rdr.readLine();
- if (s == null)
- return; // end of file
- lno++;
- s += "\n"; // make sure the string has a newline
- start = 0;
- end = s.length();
- } catch (IOException e) {
- s = null;
- }
- // System.err.print("s=" + s);
- }
- }
- public Token cur() {
- // lazy
- if (tok != null)
- return tok; // don't get a new token if we already have one
- String matchString = "";
- Token.Match matchFound = null;
- LOOP:
- while (true) {
- fillString(); // get another line if necessary
- if (s == null) {
- tok = new Token(Token.$eof, "!EOF", lno, null); // EOF
- return tok;
- }
- // s cannot be null here
- // are we in line mode?
- if (lineMode != null) {
- Pattern cpat = lineMode.match.cPattern;
- Matcher m = cpat.matcher(s);
- m.region(0,end);
- start = end; // consume the line before next match
- if (m.lookingAt()) {
- // found the lineMode token, exit line mode
- // and return the matched lineMode token
- // System.out.println("leaving line mode...");
- tok = new Token(lineMode.match, m.group(), lno, s);
- lineMode = null;
- return tok;
- } else {
- // return the entire line as a token
- tok = new Token(Token.Match.$LINE, s, lno, s);
- return tok;
- }
- }
- int matchEnd = start; // current end of match
- for (Token.Match match : Token.Match.values()) {
- Pattern cpat = match.cPattern;
- if (cpat == null)
- break; // nothing matches, so can't find a token
- if (match.tokType == Token.TokType.SKIP && matchFound != null)
- continue; // ignore skips if we have a pending token
- if (start != 0 && match.pattern.charAt(0) == '^')
- continue; // '^' must match at start of line
- Matcher m = cpat.matcher(s);
- m.region(start, end);
- if (m.lookingAt()) {
- int e = m.end();
- if (e == start)
- continue; // empty match, so try next pattern
- if (match.tokType == Token.TokType.SKIP) {
- // there's a non-empty skip match,
- // so we skip over the matched part
- // and get more stuff to read
- start = e;
- continue LOOP;
- }
- if (matchEnd < e) {
- // found a longer match -- keep it!
- matchEnd = e;
- matchString = m.group();
- matchFound = match;
- }
- }
- }
- if (matchFound == null) { // got to $ERROR, so nothing matches!!
- char ch = s.charAt(start++); // grab the char and advance
- String sch;
- if (ch >= ' ' && ch <= '~')
- sch = String.format("\"%c\"", ch);
- else
- sch = String.format("\\u%04x", (int)ch);
- tok = new Token(Token.Match.$ERROR, "!ERROR("+sch+")", lno, s);
- return tok;
- }
- start = matchEnd; // start of next token match
- // matchString is the matching string
- tok = new Token(matchFound, matchString, lno, s); // persistent
- // System.out.println(String.format("match=%s\n", toggle));
- if (matchFound.tokType == Token.TokType.LINE_TOGGLE) {
- // System.out.println("going to line mode...");
- start = end; // swallow the rest of the line
- lineMode = tok;
- }
- return tok;
- }
- }
- public void adv() {
- // if we have already advanced past the current token,
- // we'll have to do it again
- if (tok == null)
- cur();
- tok = null;
- }
- public void put(Token t) {
- throw new PLCCException("PLCC Scan error",
- "put not implemented");
- }
- // See if the expected token match is the same as the match
- // of the current token
- public Token match(Token.Match match, Trace trace) {
- Token t = cur();
- Token.Match mcur = t.match; // the token we got
- if (match == mcur) { // compare the match expected with the token we got
- if (trace != null)
- trace.print(t);
- adv();
- } else {
- String msg = "expected token " + match + ", got " + t.errString();
- throw new PLCCException ("Parse error", msg);
- }
- return t;
- }
- public boolean isEOF() {
- return cur().isEOF();
- }
- public void printTokens() {
- while (hasNext()) {
- Token t = next();
- String s;
- switch(t.match) {
- case $ERROR:
- s = t.toString();
- break;
- default:
- s = String.format("%s '%s'", t.match.toString(), t.toString());
- }
- System.out.println(String.format("%4d: %s", lno, s));
- }
- }
- public boolean hasNext() {
- return !cur().isEOF();
- }
- public Token next() {
- Token t = cur();
- adv();
- return t;
- }
- public static void main(String [] args) {
- BufferedReader rdr = null;
- if (args.length == 0) {
- rdr = new BufferedReader(new InputStreamReader(System.in));
- } else if (args.length == 1) {
- try {
- rdr = new BufferedReader(new FileReader(args[0]));
- } catch (Exception e) {
- System.out.println(e.getMessage());
- System.exit(1);
- }
- }
- else {
- System.err.println("usage: Scan [filename]");
- System.exit(1);
- }
- Scan scn = new Scan(rdr);
- scn.printTokens();
- }
- }
|