vala-parser/scanner/scanner.go
2020-03-22 18:24:45 +04:00

287 lines
6.7 KiB
Go

package scanner
import (
"bytes"
"fmt"
"io"
"github.com/ChronosX88/vala-parser/utils"
)
const (
eof = rune(0) // end of file
)
type Scanner struct {
buf *bytes.Reader
}
func NewScanner(reader io.Reader) *Scanner {
buffer := new(bytes.Buffer)
buffer.ReadFrom(reader)
r := bytes.NewReader(buffer.Bytes())
return &Scanner{
buf: r,
}
}
func (s *Scanner) Scan() Token {
// Read the next rune.
ch := s.read()
// If we see whitespace then consume all contiguous whitespace.
// If we see a letter then consume as an ident or reserved word.
// If we see a digit then consume as a number.
if isWhitespace(ch) {
s.unread()
return s.scanWhitespace()
} else if isLetter(ch) {
s.unread()
return s.scanIdent()
} else if isSpecialSymbol(ch) {
s.unread()
return s.scanSpecSymbol()
} else if isDigit(ch) {
s.unread()
return s.scanNumber()
}
// Otherwise read the individual character.
switch ch {
case eof:
return Token{EOF, ""}
}
return Token{Illegal, string(ch)}
}
// scanWhitespace consumes the current rune and all contiguous whitespace.
func (s *Scanner) scanWhitespace() Token {
// Create a buffer and read the current character into it.
var buf bytes.Buffer
buf.WriteRune(s.read())
// Read every subsequent whitespace character into the buffer.
// Non-whitespace characters and EOF will cause the loop to exit.
for {
if ch := s.read(); ch == eof {
break
} else if !isWhitespace(ch) {
s.unread()
break
} else {
buf.WriteRune(ch)
}
}
return Token{Whitespace, buf.String()}
}
// scanIdent consumes the current rune and all contiguous ident runes.
func (s *Scanner) scanIdent() Token {
// Create a buffer and read the current character into it.
var buf bytes.Buffer
//buf.WriteRune(s.read())
// Read every subsequent ident character into the buffer.
// Non-ident characters and EOF will cause the loop to exit.
for {
if ch := s.read(); ch == eof {
break
} else if !isLetter(ch) && !isDigit(ch) && ch != '_' {
s.unread()
break
} else {
_, _ = buf.WriteRune(ch)
}
}
parsedToken := Token{
Kind: -1,
Literal: buf.String(),
}
// If the string matches a keyword then return that keyword.
switch buf.String() {
case Using.String(): // using
parsedToken.Kind = Using
case Namespace.String(): // namespace
parsedToken.Kind = Namespace
case PublicModifier.String(): // public
parsedToken.Kind = PublicModifier
case PrivateModifier.String(): // private
parsedToken.Kind = PrivateModifier
case Class.String(): // class
parsedToken.Kind = Class
case Var.String(): // var
parsedToken.Kind = Var
case Return.String(): // return
parsedToken.Kind = Return
case Null.String(): // null
parsedToken.Kind = Null
case If.String(): // if
parsedToken.Kind = If
case ProtectedModifier.String(): // protected
parsedToken.Kind = ProtectedModifier
case False.String(): // false
parsedToken.Kind = False
case True.String(): // true
parsedToken.Kind = True
case New.String(): // new
parsedToken.Kind = New
default:
parsedToken.Kind = Identifier
}
return parsedToken
}
func (s *Scanner) scanSpecSymbol() Token {
// Create a buffer and read the current character into it.
var buf bytes.Buffer
buf.WriteRune(s.read())
// Read every subsequent ident character into the buffer.
// Non-ident characters and EOF will cause the loop to exit.
for {
if ch := s.read(); ch == eof {
break
} else if !isSpecialSymbol(ch) {
s.unread()
break
} else {
_, _ = buf.WriteRune(ch)
}
}
parsedToken := Token{
Kind: -1,
Literal: buf.String(),
}
// If the string matches a keyword then return that keyword.
matchSpecSymbol(&parsedToken)
if parsedToken.Kind == Illegal && len(parsedToken.Literal) > 1 { // then two or more special characters in a row detected
for i := 0; i < len(parsedToken.Literal)-1; i++ {
s.buf.Seek(-1, io.SeekCurrent)
}
parsedToken.Literal = string(utils.RuneAt(parsedToken.Literal, 0))
matchSpecSymbol(&parsedToken)
}
return parsedToken
}
func (s *Scanner) scanNumber() Token {
var buf bytes.Buffer
buf.WriteRune(s.read())
for {
if ch := s.read(); ch == eof {
break
} else if !isDigit(ch) && !isXDigit(ch) && (ch != 'x') && (ch != '.') {
fmt.Println(string(ch))
s.unread()
break
} else {
_, _ = buf.WriteRune(ch)
}
}
parsedToken := Token{
Kind: IntegerLiteral,
Literal: buf.String(),
}
for _, v := range []rune(parsedToken.Literal) {
if v == '.' {
parsedToken.Kind = RealLiteral
}
}
return parsedToken
}
func matchSpecSymbol(tok *Token) {
switch tok.Literal {
case Add.String(): // +
tok.Kind = Add
case Minus.String(): // -
tok.Kind = Minus
case Mult.String(): // *
tok.Kind = Mult
case Divide.String(): // /
tok.Kind = Divide
case Colon.String(): // :
tok.Kind = Colon
case Comma.String(): // ,
tok.Kind = Comma
case Semicolon.String(): // ;
tok.Kind = Semicolon
case OpenBrace.String(): // {
tok.Kind = OpenBrace
case CloseBrace.String(): // }
tok.Kind = CloseBrace
case StringLiteral.String(): // "
tok.Kind = StringLiteral
case Percent.String(): // %
tok.Kind = Percent
case OpenParens.String(): // (
tok.Kind = OpenParens
case CloseParens.String(): // )
tok.Kind = CloseParens
case Dot.String(): // .
tok.Kind = Dot
case OpenMultilineComments.String(): // /*
tok.Kind = OpenMultilineComments
case CloseMultilineComments.String(): // */
tok.Kind = CloseMultilineComments
case LambdaArrow.String(): // =>
tok.Kind = LambdaArrow
case Equal.String(): // ==
tok.Kind = Equal
case OpenSingleComments.String():
tok.Kind = OpenSingleComments
case Assign.String():
tok.Kind = Assign
case OpenBracket.String():
tok.Kind = OpenBracket
case CloseBracket.String():
tok.Kind = CloseBracket
}
}
// read reads the next rune from the buffered reader.
// Returns the rune(0) if an error occurs (or io.EOF is returned).
func (s *Scanner) read() rune {
ch, _, err := s.buf.ReadRune()
if err != nil {
return eof
}
return ch
}
// unread places the previously read rune back on the reader.
func (s *Scanner) unread() {
err := s.buf.UnreadRune()
if err != nil {
fmt.Println("Error when unread: " + err.Error())
}
}
// isWhitespace returns true if the rune is a space, tab, or newline.
func isWhitespace(ch rune) bool { return ch == ' ' || ch == '\t' || ch == '\n' }
// isLetter returns true if the rune is a letter.
func isLetter(ch rune) bool { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') }
// isDigit returns true if the rune is a digit.
func isDigit(ch rune) bool { return (ch >= '0' && ch <= '9') }
func isSpecialSymbol(ch rune) bool {
return (ch >= '!' && ch <= '/') || (ch >= ':' && ch <= '?') || (ch >= '[' && ch <= '`') || (ch >= '{' && ch <= '~') && (ch != '_')
}
func isXDigit(ch rune) bool { return (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'F') }