Code archives/Miscellaneous/Blitz Lexical Analyser
This code has been declared by its author to be Public Domain code.
Download source code
| |||||
| This code splits a Blitz source file into tokens. Each token represents items like operators, keywords, comments, end of line, functions etc. Useful if you want to write a pre-processor or other tool that manipulates Blitz code files. Contains an example for just printing the tokens in a file. | |||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; Lexer.bb
;
; Tokenises Blitz Basic code
;
; Steve Hill, 2003
;
; OpenState(fileName$) - creates a new TState
; CloseState() - destroys and closes the current state
; GetToken(state) - read the next token
;
; The current token is available in state\tok$
;
; Versions
; 0.1 Initial version 27 Aug 2003
; 0.2 Added >< => =< 29 Aug 2003
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; CONSTANTS
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Constants for various character types
;
Const SPACE = 32
Const TAB = 9
Const CR = 13
Const LF = 10
Global WHITE_SPACE$ = Chr$(SPACE) + Chr$(TAB) + Chr$(CR)
Const ALPHA$ = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
Const DIGITS$ = "0123456789"
Const HEXDIGITS$ = "0123456789abcdefABCDEF"
Const BINDIGITS$ = "01"
Const DELIM$ = "^*+-~<>/\#%.$()[],=\:"
Global QUOTE$ = Chr$(34)
Global ALPHANUM$ = ALPHA$ + DIGITS$
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; TYPES
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; TState
;
; Keeps track of the current file, look-ahead character and token.
; Can be used as a stack for include files.
;
Type TState
Field file
Field ch$
Field tok$
Field tokType
Field lineNum
; Field charNum
End Type
Const TOK_WORD = 0
Const TOK_OPERATOR = 1
Const TOK_COMMENT = 2
Const TOK_DEC_NUMBER = 3
Const TOK_HEX_NUMBER = 4
Const TOK_BIN_NUMBER = 5
Const TOK_STRING = 6
Const TOK_EOL = 7
Const TOK_EOF = 8
Const TOK_UNKNOWN = 9
; TDescriptor
;
; Describes a function: name, return type and parameter type list.
; Assigned unique id for each functions ... its "pointer"
;
Type TDescriptor
Field name$
Field typ$
Field params$
Field id
End Type
; Error
;
; Something has gone wrong bail out.
;
Function Error(e$, state.TState)
Print e$
If state\file <> 0
Print "Error on line " + Str$(state\lineNum)
EndIf
Print "Press a key"
WaitKey
End
End Function
; OpenState
;
; Open the file initialise the fields
;
Function OpenState(name$)
Print "Parsing " + name$
state.TState = New TState
state\file = ReadFile(name$)
If state\file = 0 Then
Error("File " + name$ + "not found", state)
EndIf
state\lineNum = 1
state\ch$ = ""
state\tok$ = ""
GetToken(state)
End Function
; CloseState
;
; Close current file, pop state
;
Function CloseState()
For state.TState = Each TState
CloseFile(state\file)
Next
Delete Each TState
End Function
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; LEXICAL FUNCTIONS
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; GetChar
;
; Read a character from the current file into the state
;
Function GetChar(state.TState)
If state\ch$ = Chr$(LF)
state\lineNum = state\LineNum + 1
EndIf
If Eof(state\file) Then
state\ch$ = ""
Return
EndIf
state\ch$ = Chr$(ReadByte(state\file))
End Function
; SkipSpace
;
; Skip white space
;
Function SkipSpace(state.TState)
While Instr(WHITE_SPACE$, state\ch$) <> 0
GetChar(state)
If state\ch$ = "" Return
Wend
End Function
; GetFollowing
;
; Generic token reader, reads while characters are
; those in pat. Places token in state.
;
Function GetFollowing(state.TState, pat$)
tok$ = state\ch$
GetChar(state)
While Instr(pat$, state\ch$) <> 0
If state\ch$ = "" Exit
tok$ = tok$ + state\ch$
GetChar(state)
Wend
state\tok$ = tok$
End Function
; GetDecNumber
;
; eg. 1 or 1.2
;
Function GetDecNumber$(state.TState)
GetFollowing(state, DIGITS$ + ".")
state\tokType = TOK_DEC_NUMBER
End Function
; GetBinNumber
;
; eg. %1100
;
Function GetBinNumber$(state.TState)
GetFollowing(state, BINDIGITS$)
state\tokType = TOK_BIN_NUMBER
End Function
; GetHexNumber
;
; eg. $abC1
;
Function GetHexNumber$(state.TState)
GetFollowing(state, HEXDIGITS$)
If state\tok$ = "$"
state\tokType = TOK_OPERATOR
Else
state\tokType = TOK_HEX_NUMBER
EndIf
End Function
; GetEOL
;
; Get end of line
Function GetEOL(state.TState)
state\tok$ = state\ch$
GetChar(state)
state\tokType = TOK_EOL
End Function
; GetWord
;
; eg. myVar_2 or WaitKey
;
Function GetWord(state.TState)
GetFollowing(state, ALPHANUM$ + "_")
If state\tok$ = "Or" Or state\tok$ = "And" Or state\tok$ = "Xor" Then
state\tokType = TOK_OPERATOR
Else
state\tokType = TOK_WORD
EndIf
End Function
; GetOperator
;
; eg. , . \ + - = > < <> etc.
;
Function GetOperator(state.TState)
t$ = state\ch$
GetChar(state)
r$ = t$
Select t$
Case ">"
t$ = state\ch$
Select t$
Case "="
r$ = ">="
GetChar(state)
Case "<"
r$ = "><"
GetChar(state)
Default
r$ = ">"
End Select
Case "<"
t$ = state\ch$
Select t$
Case "="
r$ = "<="
GetChar(state)
Case ">"
r$ = "<>"
GetChar(state)
Default
r$ = "<"
End Select
Case "="
t$ = state\ch$
Select t$
Case ">"
r$ = "=>"
GetChar(state)
Case "<"
r$ = "=<"
GetChar(state)
Default
r$ = "="
End Select
End Select
state\tok$ = r$
state\tokType = TOK_OPERATOR
End Function
; GetComment
;
; eg. ; a comment
;
Function GetComment(state.TState)
tok$ = state\ch$
GetChar(state)
While state\ch$ <> Chr$(LF)
If state\ch$ = "" Exit
If state\ch$ <> Chr$(CR)
tok$ = tok$ + state\ch$
EndIf
GetChar(state)
Wend
state\tok$ = tok$
state\tokType = TOK_COMMENT
End Function
; GetString
;
; eg. "a string"
;
Function GetString(state.TState)
tok$ = ""
GetChar(state)
While state\ch$ <> QUOTE$ And state\ch$ <> ""
tok$ = tok$ + state\ch$
GetChar(state)
Wend
If state\ch$ <> ""
state\tok$ = QUOTE$ + tok$ + QUOTE$
GetChar(state)
EndIf
state\tokType = TOK_STRING
End Function
; GetToken
;
; Use first character to determine type of token and then
; read appropriate token using the corresponding Get function
;
Function GetToken(state.TState)
SkipSpace(state)
ch$ = state\ch$
If ch$ = "" Then
state\tok$ = ""
state\tokType = TOK_EOF
Return
EndIf
If Instr(DIGITS$, ch$) <> 0 Then
GetDecNumber$(state)
ElseIf Instr(ALPHA$, ch$) <> 0 Then
GetWord(state)
ElseIf ch$ = ";" Then
GetComment(state)
ElseIf ch$ = QUOTE$ Then
GetString(state)
ElseIf ch$ = "%" Then
GetBinNumber(state)
ElseIf ch$ = "$" Then
GetHexNumber(state)
ElseIf Instr(DELIM$, ch$) <> 0 Then
GetOperator(state)
ElseIf ch$ = Chr$(LF)
GetEOL(state)
Else
Error("Unrecognised character " + ch$ + "(" + Asc(ch$) + ") in file", state)
EndIf
; DebugLog Str$(state\lineNum) + ": " + state\tok$
End Function
; Example usage
;
;
;
;inFile$ = Input$("Input file: ")
;
;OpenState(inFile$)
;state.TState = Last TState
;While state\tok$ <> ""
; If state\tokType <> TOK_EOL Then
; Print state\tok$
; EndIf
; GetToken(state)
;Wend
;CloseState()
;
;Print "Press a key"
;WaitKey
;
;End |
Comments
| ||
| My practicle adaptation of previous code. func_tokenizer.bb : TEST: |
Code Archives Forum