mirror of
https://github.com/ChronosX88/vala-parser.git
synced 2024-10-18 14:00:59 +00:00
287 lines
6.7 KiB
Go
287 lines
6.7 KiB
Go
|
package scanner
|
||
|
|
||
|
import (
|
||
|
"bytes"
|
||
|
"fmt"
|
||
|
"io"
|
||
|
|
||
|
"github.com/ChronosX88/vala-parser/utils"
|
||
|
)
|
||
|
|
||
|
const (
|
||
|
eof = rune(0) // end of file
|
||
|
)
|
||
|
|
||
|
type Scanner struct {
|
||
|
buf *bytes.Reader
|
||
|
}
|
||
|
|
||
|
func NewScanner(reader io.Reader) *Scanner {
|
||
|
buffer := new(bytes.Buffer)
|
||
|
buffer.ReadFrom(reader)
|
||
|
r := bytes.NewReader(buffer.Bytes())
|
||
|
return &Scanner{
|
||
|
buf: r,
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (s *Scanner) Scan() Token {
|
||
|
// Read the next rune.
|
||
|
ch := s.read()
|
||
|
|
||
|
// If we see whitespace then consume all contiguous whitespace.
|
||
|
// If we see a letter then consume as an ident or reserved word.
|
||
|
// If we see a digit then consume as a number.
|
||
|
if isWhitespace(ch) {
|
||
|
s.unread()
|
||
|
return s.scanWhitespace()
|
||
|
} else if isLetter(ch) {
|
||
|
s.unread()
|
||
|
return s.scanIdent()
|
||
|
} else if isSpecialSymbol(ch) {
|
||
|
s.unread()
|
||
|
return s.scanSpecSymbol()
|
||
|
} else if isDigit(ch) {
|
||
|
s.unread()
|
||
|
return s.scanNumber()
|
||
|
}
|
||
|
|
||
|
// Otherwise read the individual character.
|
||
|
switch ch {
|
||
|
case eof:
|
||
|
return Token{EOF, ""}
|
||
|
}
|
||
|
|
||
|
return Token{Illegal, string(ch)}
|
||
|
}
|
||
|
|
||
|
// scanWhitespace consumes the current rune and all contiguous whitespace.
|
||
|
func (s *Scanner) scanWhitespace() Token {
|
||
|
// Create a buffer and read the current character into it.
|
||
|
var buf bytes.Buffer
|
||
|
buf.WriteRune(s.read())
|
||
|
|
||
|
// Read every subsequent whitespace character into the buffer.
|
||
|
// Non-whitespace characters and EOF will cause the loop to exit.
|
||
|
for {
|
||
|
if ch := s.read(); ch == eof {
|
||
|
break
|
||
|
} else if !isWhitespace(ch) {
|
||
|
s.unread()
|
||
|
break
|
||
|
} else {
|
||
|
buf.WriteRune(ch)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return Token{Whitespace, buf.String()}
|
||
|
}
|
||
|
|
||
|
// scanIdent consumes the current rune and all contiguous ident runes.
|
||
|
func (s *Scanner) scanIdent() Token {
|
||
|
// Create a buffer and read the current character into it.
|
||
|
var buf bytes.Buffer
|
||
|
//buf.WriteRune(s.read())
|
||
|
|
||
|
// Read every subsequent ident character into the buffer.
|
||
|
// Non-ident characters and EOF will cause the loop to exit.
|
||
|
for {
|
||
|
if ch := s.read(); ch == eof {
|
||
|
break
|
||
|
} else if !isLetter(ch) && !isDigit(ch) && ch != '_' {
|
||
|
s.unread()
|
||
|
break
|
||
|
} else {
|
||
|
_, _ = buf.WriteRune(ch)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
parsedToken := Token{
|
||
|
Kind: -1,
|
||
|
Literal: buf.String(),
|
||
|
}
|
||
|
|
||
|
// If the string matches a keyword then return that keyword.
|
||
|
switch buf.String() {
|
||
|
case Using.String(): // using
|
||
|
parsedToken.Kind = Using
|
||
|
case Namespace.String(): // namespace
|
||
|
parsedToken.Kind = Namespace
|
||
|
case PublicModifier.String(): // public
|
||
|
parsedToken.Kind = PublicModifier
|
||
|
case PrivateModifier.String(): // private
|
||
|
parsedToken.Kind = PrivateModifier
|
||
|
case Class.String(): // class
|
||
|
parsedToken.Kind = Class
|
||
|
case Var.String(): // var
|
||
|
parsedToken.Kind = Var
|
||
|
case Return.String(): // return
|
||
|
parsedToken.Kind = Return
|
||
|
case Null.String(): // null
|
||
|
parsedToken.Kind = Null
|
||
|
case If.String(): // if
|
||
|
parsedToken.Kind = If
|
||
|
case ProtectedModifier.String(): // protected
|
||
|
parsedToken.Kind = ProtectedModifier
|
||
|
case False.String(): // false
|
||
|
parsedToken.Kind = False
|
||
|
case True.String(): // true
|
||
|
parsedToken.Kind = True
|
||
|
case New.String(): // new
|
||
|
parsedToken.Kind = New
|
||
|
default:
|
||
|
parsedToken.Kind = Identifier
|
||
|
}
|
||
|
|
||
|
return parsedToken
|
||
|
}
|
||
|
|
||
|
func (s *Scanner) scanSpecSymbol() Token {
|
||
|
// Create a buffer and read the current character into it.
|
||
|
var buf bytes.Buffer
|
||
|
buf.WriteRune(s.read())
|
||
|
|
||
|
// Read every subsequent ident character into the buffer.
|
||
|
// Non-ident characters and EOF will cause the loop to exit.
|
||
|
for {
|
||
|
if ch := s.read(); ch == eof {
|
||
|
break
|
||
|
} else if !isSpecialSymbol(ch) {
|
||
|
s.unread()
|
||
|
break
|
||
|
} else {
|
||
|
_, _ = buf.WriteRune(ch)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
parsedToken := Token{
|
||
|
Kind: -1,
|
||
|
Literal: buf.String(),
|
||
|
}
|
||
|
|
||
|
// If the string matches a keyword then return that keyword.
|
||
|
matchSpecSymbol(&parsedToken)
|
||
|
|
||
|
if parsedToken.Kind == Illegal && len(parsedToken.Literal) > 1 { // then two or more special characters in a row detected
|
||
|
for i := 0; i < len(parsedToken.Literal)-1; i++ {
|
||
|
s.buf.Seek(-1, io.SeekCurrent)
|
||
|
}
|
||
|
parsedToken.Literal = string(utils.RuneAt(parsedToken.Literal, 0))
|
||
|
matchSpecSymbol(&parsedToken)
|
||
|
}
|
||
|
|
||
|
return parsedToken
|
||
|
}
|
||
|
|
||
|
func (s *Scanner) scanNumber() Token {
|
||
|
var buf bytes.Buffer
|
||
|
buf.WriteRune(s.read())
|
||
|
|
||
|
for {
|
||
|
if ch := s.read(); ch == eof {
|
||
|
break
|
||
|
} else if !isDigit(ch) && !isXDigit(ch) && (ch != 'x') && (ch != '.') {
|
||
|
fmt.Println(string(ch))
|
||
|
s.unread()
|
||
|
break
|
||
|
} else {
|
||
|
_, _ = buf.WriteRune(ch)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
parsedToken := Token{
|
||
|
Kind: IntegerLiteral,
|
||
|
Literal: buf.String(),
|
||
|
}
|
||
|
|
||
|
for _, v := range []rune(parsedToken.Literal) {
|
||
|
if v == '.' {
|
||
|
parsedToken.Kind = RealLiteral
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return parsedToken
|
||
|
}
|
||
|
|
||
|
func matchSpecSymbol(tok *Token) {
|
||
|
switch tok.Literal {
|
||
|
case Add.String(): // +
|
||
|
tok.Kind = Add
|
||
|
case Minus.String(): // -
|
||
|
tok.Kind = Minus
|
||
|
case Mult.String(): // *
|
||
|
tok.Kind = Mult
|
||
|
case Divide.String(): // /
|
||
|
tok.Kind = Divide
|
||
|
case Colon.String(): // :
|
||
|
tok.Kind = Colon
|
||
|
case Comma.String(): // ,
|
||
|
tok.Kind = Comma
|
||
|
case Semicolon.String(): // ;
|
||
|
tok.Kind = Semicolon
|
||
|
case OpenBrace.String(): // {
|
||
|
tok.Kind = OpenBrace
|
||
|
case CloseBrace.String(): // }
|
||
|
tok.Kind = CloseBrace
|
||
|
case StringLiteral.String(): // "
|
||
|
tok.Kind = StringLiteral
|
||
|
case Percent.String(): // %
|
||
|
tok.Kind = Percent
|
||
|
case OpenParens.String(): // (
|
||
|
tok.Kind = OpenParens
|
||
|
case CloseParens.String(): // )
|
||
|
tok.Kind = CloseParens
|
||
|
case Dot.String(): // .
|
||
|
tok.Kind = Dot
|
||
|
case OpenMultilineComments.String(): // /*
|
||
|
tok.Kind = OpenMultilineComments
|
||
|
case CloseMultilineComments.String(): // */
|
||
|
tok.Kind = CloseMultilineComments
|
||
|
case LambdaArrow.String(): // =>
|
||
|
tok.Kind = LambdaArrow
|
||
|
case Equal.String(): // ==
|
||
|
tok.Kind = Equal
|
||
|
case OpenSingleComments.String():
|
||
|
tok.Kind = OpenSingleComments
|
||
|
case Assign.String():
|
||
|
tok.Kind = Assign
|
||
|
case OpenBracket.String():
|
||
|
tok.Kind = OpenBracket
|
||
|
case CloseBracket.String():
|
||
|
tok.Kind = CloseBracket
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// read reads the next rune from the buffered reader.
|
||
|
// Returns the rune(0) if an error occurs (or io.EOF is returned).
|
||
|
func (s *Scanner) read() rune {
|
||
|
ch, _, err := s.buf.ReadRune()
|
||
|
if err != nil {
|
||
|
return eof
|
||
|
}
|
||
|
return ch
|
||
|
}
|
||
|
|
||
|
// unread places the previously read rune back on the reader.
|
||
|
func (s *Scanner) unread() {
|
||
|
err := s.buf.UnreadRune()
|
||
|
if err != nil {
|
||
|
fmt.Println("Error when unread: " + err.Error())
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// isWhitespace returns true if the rune is a space, tab, or newline.
|
||
|
func isWhitespace(ch rune) bool { return ch == ' ' || ch == '\t' || ch == '\n' }
|
||
|
|
||
|
// isLetter returns true if the rune is a letter.
|
||
|
func isLetter(ch rune) bool { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') }
|
||
|
|
||
|
// isDigit returns true if the rune is a digit.
|
||
|
func isDigit(ch rune) bool { return (ch >= '0' && ch <= '9') }
|
||
|
|
||
|
func isSpecialSymbol(ch rune) bool {
|
||
|
return (ch >= '!' && ch <= '/') || (ch >= ':' && ch <= '?') || (ch >= '[' && ch <= '`') || (ch >= '{' && ch <= '~') && (ch != '_')
|
||
|
}
|
||
|
|
||
|
func isXDigit(ch rune) bool { return (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'F') }
|