/* * File: tokenscanner.h * -------------------- * This interface exports a scanner facility that divides a string into * individual tokens. */ #ifndef _tokenscanner_h #define _tokenscanner_h #include "cslib.h"/* * Type: TokenType * --------------- * This enumerated type defines the values of the getTokenType method. */ typedef enum { SEPARATOR, WORD, NUMBER, STRING, OPERATOR } TokenType;/* * Type: TokenScanner * ------------------ * This abstract type divides a string into individual tokens. The typical * use of the TokenScanner ADT is illustrated by the following pattern, * which reads the tokens in the string variable input: * * string token; * TokenScanner scanner; * * scanner = newTokenScanner(); * setInputString(scanner, input); * while (hasMoreTokens(scanner)) { * token = nextToken(scanner); * . . . process the token . . . * freeBlock(token); * } * freeTokenScanner(scanner); * * The TokenScanner ADT exports several additional methods that give * clients more control over its behavior. Those methods are described * individually in the documentation. */ typedef struct TokenScannerCDT *TokenScanner;/* * Function: newTokenScanner * Usage: scanner = newTokenScanner(); * ----------------------------------- * Creates a new TokenScanner with an empty token stream. Before using the * scanner, an input stream must be set by calling either setInputString or * setInputFile. */ TokenScanner newTokenScanner(void);/* * Function: freeTokenScanner * Usage: freeTokenScanner(scanner); * --------------------------------- * Frees the storage associated with the TokenScanner. */ void freeTokenScanner();/* * Function: setInputString * Usage: setInputString(scanner, str); * ------------------------------------ * Sets the token stream for this scanner to the specified string. */ void setInputString(TokenScanner scanner, string str);/* * Function: setInputFile * Usage: setInputFile(scanner, infile); * ------------------------------------- * Sets the token stream for this scanner to the specified file, which must * be open for input. */ void setInputFile(TokenScanner scanner, FILE *infile);/* * Function: hasMoreTokens * Usage: if (hasMoreTokens(scanner)) . . . * ---------------------------------------- * Returns true if there are additional tokens for this scanner to read. */ bool hasMoreTokens(TokenScanner scanner);/* * Function: nextToken * Usage: token = nextToken(scanner); * ---------------------------------- * Returns the next token from this If nextToken is called when no tokens * are available, it returns the empty string. */ string nextToken(TokenScanner scanner);/* * Function: saveToken * Usage: saveToken(scanner, token); * --------------------------------- * Pushes the specified token back into this scanner's input stream. On * the next call to nextToken, the scanner will return the saved token * without reading any additional characters from the token stream. */ void saveToken(TokenScanner scanner, string token);/* * Function: getPosition * Usage: pos = getPosition(scanner); * ---------------------------------- * Returns the current position of the scanner in the input stream. If * saveToken has been called, this position corresponds to the beginning of * the saved token. If saveToken is called more than once, getPosition * returns -1. */ int getPosition(TokenScanner scanner);/* * Function: ignoreWhitespace * Usage: ignoreWhitespace(scanner); * --------------------------------- * Tells the scanner to ignore whitespace characters. By default, the * nextToken method treats whitespace characters (typically spaces and * tabs) just like any other punctuation mark and returns them as * single-character tokens. Calling * * ignoreWhitespace(scanner); * * changes this behavior so that the scanner ignore whitespace characters. */ void ignoreWhitespace(TokenScanner scanner);/* * Function: ignoreComments * Usage: ignoreComments(scanner); * ------------------------------- * Tells the scanner to ignore comments. The scanner package recognizes * both the slash-star and slash-slash comment format from the C-based * family of languages. Calling * * ignoreComments(scanner); * * sets the parser to ignore comments. */ void ignoreComments(TokenScanner scanner);/* * Function: scanNumbers * Usage: scanNumbers(scanner); * ---------------------------- * Controls how the scanner treats tokens that begin with a digit. By * default, the nextToken method treats numbers and letters identically and * therefore does not provide any special processing for numbers. Calling * * scanNumbers(scanner); * * changes this behavior so that nextToken returns the longest substring * that can be interpreted as a real number. */ void scanNumbers(TokenScanner scanner);/* * Function: scanStrings * Usage: scanStrings(scanner); * ---------------------------- * Controls how the scanner treats tokens enclosed in quotation marks. By * default, quotation marks (either single or double) are treated just like * any other punctuation character. Calling * * scanStrings(scanner); * * changes this assumption so that nextToken returns a single token * consisting of all characters through the matching quotation mark. The * quotation marks are returned as part of the scanned token so that * clients can differentiate strings from other token types. */ void scanStrings(TokenScanner scanner);/* * Function: addWordCharacters * Usage: addWordCharacters(scanner, str); * --------------------------------------- * Adds the characters in str to the set of characters legal in a WORD * token. For example, calling addWordCharacters("_") adds the underscore * to the set of characters that are accepted as part of a word. */ void addWordCharacters(TokenScanner scanner, string str);/* * Function: isWordCharacter * Usage: if (isWordCharacter(scanner, ch)) . . . * ---------------------------------------------- * Returns true if the character is valid in a word. */ bool isWordCharacter(TokenScanner scanner, char ch);/* * Function: addOperator * Usage: addOperator(scanner, op); * -------------------------------- * Defines a new multicharacter operator. Whenever you call nextToken when * the input stream contains operator characters, the scanner returns the * longest possible operator string that can be read at that point. */ void addOperator(TokenScanner scanner, string op);/* * Function: verifyToken * Usage: verifyToken(scanner, expected); * -------------------------------------- * Reads the next token and makes sure it matches the string expected. If * it does not, verifyToken throws an error. */ void verifyToken(TokenScanner scanner, string expected);/* * Function: getTokenType * Usage: type = getTokenType(scanner, token); * ------------------------------------------- * Returns the type of this token. This type will match one of the * following enumerated type constants: EOF, SEPARATOR, WORD, NUMBER, * STRING, or OPERATOR. */ TokenType getTokenType(TokenScanner scanner, string token);/* * Method: getChar * Usage: int ch = getChar(scanner); * --------------------------------- * Reads the next character from the scanner input stream. */ int getChar(TokenScanner scanner);/* * Method: ungetChar * Usage: ungetChar(scanner, ch); * ------------------------------ * Pushes the character ch back into the scanner stream. The character * must match the one that was read. */ void ungetChar(TokenScanner scanner, int ch);/* * Function: getStringValue * Usage: str = getStringValue(token); * ----------------------------------- * Returns the string value of a token. This value is formed by removing * any surrounding quotation marks and replacing escape sequences by the * appropriate characters. */ string getStringValue(string token); #endif