// Copyright 1996, Marimba Inc. All Rights Reserved.


// Confidential and Proprietary Information of Marimba, Inc.


// @(#)Scanner.java, 1.7, 12/15/96





package marimba.text.editor;





import java.io.*;


import java.util.Hashtable;


import marimba.util.*;


import marimba.text.*;





/**


 * A scanner for Java code.  It returns "higher level" tokens for


 * Java programs: keywords, types, storage class, comment.


 *


 * Input is a Text object.  As tokens are returned, the caller can


 * query the scanner for the start and end positions in the text


 * object.


 *


 * @author	Jonathan Payne


 * @version 1.7, 12/15/96


 */


public class Scanner {


    static final int UNKNOWN = 0;


    static final int KEYWORD = 1;


    static final int TYPE = 2;


    static final int STORAGE = 3;


    static final int COMMENT = 4;


    static final int STRING = 5;


    static final int SLASH_COMMENT = 6;


    static final int CHAR_CONSTANT = 7;





    static Hashtable words = new Hashtable(50);


    static {


	String keywords[] = {


	    "private", "protected", "public", "break", "byvalue",


	    "case", "catch", "class", "continue", "default", "do",


	    "else if", "else", "extends", "false", "finally", "for",


	    "if", "implements", "import", "instanceof", "interface",


	    "new", "null", "package", "return", "super", "switch",


	    "this", "throw", "throws", "rest", "var", "outer",


	    "operator", "inner", "generic", "future", "cast", "true",


	    "try", "synchronize", "while"


	};





	String storage[] = {


	    "static", "abstract", "const", "final",


	    "synchronized", "threadsafe", "transient", "native"


	};





	String types[] = {


	    "boolean", "int", "char", "byte", "float",


	    "double", "void"


	};





	Integer key = new Integer(KEYWORD);


	for (int i = keywords.length; --i >= 0; )


	    words.put(new ByteString(keywords[i]), key);





	Integer sto = new Integer(STORAGE);


	for (int i = storage.length; --i >= 0; )


	    words.put(new ByteString(storage[i]), sto);





	Integer typ = new Integer(TYPE);


	for (int i = types.length; --i >= 0; )


	    words.put(new ByteString(types[i]), typ);


    }





    /** Data we're tokenizing. */


    byte data[];





    /** Current offset into the data. */


    int pos0;





    /** Last position we're supposed to look at. */


    int pos1;





    /** Last position in text buffer. */


    int hardLimit;





    /** Bytestring we use to store words with. */


    public ByteString token = new ByteString();





    public Scanner(Text text, int pos0, int pos1) {


	token.data = data = text.getBytes();


	this.pos0 = pos0;


	this.pos1 = pos1;


	hardLimit = text.length();


    }





    public Scanner(Text text) {


	this(text, 0, text.length());


    }


    


    public int scan() {


	int limit = pos1;


	byte data[] = this.data;





      main:


	while (true) {


	  whitespace:


	    while (pos0 < limit) {


		switch (data[pos0]) {


		  case ' ': case '\t': case '\n': case '\r':


		    pos0 += 1;


		    continue;





		  default:


		    break whitespace;


		}


	    }


	    if (pos0 >= limit)


		return -1;





	    token.off = pos0;


	    switch (data[pos0++]) {


	      case 'a': case 'b': case 'c': case 'd': case 'e':


	      case 'f': case 'g': case 'h': case 'i': case 'j':


	      case 'k': case 'l': case 'm': case 'n': case 'o':


	      case 'p': case 'q': case 'r': case 's': case 't':


	      case 'u': case 'v': case 'w': case 'x': case 'y':


	      case 'z':


	      case 'A': case 'B': case 'C': case 'D': case 'E':


	      case 'F': case 'G': case 'H': case 'I': case 'J':


	      case 'K': case 'L': case 'M': case 'N': case 'O':


	      case 'P': case 'Q': case 'R': case 'S': case 'T':


	      case 'U': case 'V': case 'W': case 'X': case 'Y':


	      case 'Z':


	      case '$': case '_':


		scanWord();


		token.length = pos0 - token.off;


		Integer v = (Integer) words.get(token);


		if (v != null)


		    return v.intValue();


		continue main;





	      case '/':


		switch (data[pos0]) {


		  case '/':


		    pos0 += 1;


		    scanComment(true);


		    token.length = pos0 - token.off;


		    return SLASH_COMMENT;





		  case '*':


		    pos0 += 1;


		    scanComment(false);


		    token.length = pos0 - token.off;


		    return COMMENT;





		  default:


		    continue main;


		}





	      case '"':


	      case '\'': {


		  int c = data[pos0 - 1];


		  scanString(data[pos0 - 1]);


		  token.length = pos0 - token.off;


		  return (c == '"') ? STRING : CHAR_CONSTANT;


	      }





	      case '\\':


		pos0 += 1;


		break;





	      default:


		break;


	    }


	}


    }





    final void scanWord() {


	int limit = hardLimit;


	byte data[] = this.data;





	while (pos0 < limit) {


	    switch (data[pos0]) {


	      case 'a': case 'b': case 'c': case 'd': case 'e':


	      case 'f': case 'g': case 'h': case 'i': case 'j':


	      case 'k': case 'l': case 'm': case 'n': case 'o':


	      case 'p': case 'q': case 'r': case 's': case 't':


	      case 'u': case 'v': case 'w': case 'x': case 'y':


	      case 'z':


	      case 'A': case 'B': case 'C': case 'D': case 'E':


	      case 'F': case 'G': case 'H': case 'I': case 'J':


	      case 'K': case 'L': case 'M': case 'N': case 'O':


	      case 'P': case 'Q': case 'R': case 'S': case 'T':


	      case 'U': case 'V': case 'W': case 'X': case 'Y':


	      case 'Z':


	      case '$': case '_':


	      case '0': case '1': case '2': case '3': case '4':


	      case '5': case '6': case '7': case '8': case '9':


		pos0 += 1;


		continue;





	      default:


		return;


	    }


	}


    }





    final void scanComment(boolean slashSlash) {


	int limit = hardLimit;


	byte data[] = this.data;





	if (slashSlash) {


	    while (pos0 < limit && data[pos0] != '\n')


		pos0 += 1;


	} else {


	    while (pos0 < limit) {


		if (data[pos0++] == '*') {


		    if (data[pos0] == '/') {


			pos0 += 1;


			return;


		    }


		}


	    }


	}


    }





    final void scanString(int quote) {


	int limit = hardLimit;


	byte data[] = this.data;





	while (pos0 < limit) {


	    int c = data[pos0++];





	    if (c == '\\')


		pos0 += 1;


	    else if (c == quote)


		return;


	}


    }


}


