mirror of
https://github.com/ChronosX88/vala-parser.git
synced 2024-11-23 10:52:27 +00:00
first commit
This commit is contained in:
commit
ccc1981707
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
/bin
|
41
example/main.go
Normal file
41
example/main.go
Normal file
@ -0,0 +1,41 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/ChronosX88/vala-parser/scanner"
|
||||
)
|
||||
|
||||
func check(e error) {
|
||||
if e != nil {
|
||||
panic(e)
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
var filePath string
|
||||
flag.StringVar(&filePath, "path", "", "Path to the file which need to read")
|
||||
flag.Parse()
|
||||
if filePath == "" {
|
||||
panic(fmt.Errorf("file path isn't specified"))
|
||||
}
|
||||
f, err := os.Open(filePath)
|
||||
check(err)
|
||||
fileInfo, err := f.Stat()
|
||||
check(err)
|
||||
if fileInfo.IsDir() {
|
||||
panic(fmt.Errorf("file is a dir, not a file"))
|
||||
}
|
||||
s := scanner.NewScanner(f)
|
||||
for {
|
||||
tok := s.Scan()
|
||||
if tok.Kind == scanner.EOF {
|
||||
os.Exit(0)
|
||||
} else if tok.Kind == scanner.Whitespace {
|
||||
continue
|
||||
}
|
||||
fmt.Println(tok)
|
||||
}
|
||||
}
|
1
parser/parser.go
Normal file
1
parser/parser.go
Normal file
@ -0,0 +1 @@
|
||||
package parser
|
286
scanner/scanner.go
Normal file
286
scanner/scanner.go
Normal file
@ -0,0 +1,286 @@
|
||||
package scanner
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/ChronosX88/vala-parser/utils"
|
||||
)
|
||||
|
||||
const (
|
||||
eof = rune(0) // end of file
|
||||
)
|
||||
|
||||
type Scanner struct {
|
||||
buf *bytes.Reader
|
||||
}
|
||||
|
||||
func NewScanner(reader io.Reader) *Scanner {
|
||||
buffer := new(bytes.Buffer)
|
||||
buffer.ReadFrom(reader)
|
||||
r := bytes.NewReader(buffer.Bytes())
|
||||
return &Scanner{
|
||||
buf: r,
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Scanner) Scan() Token {
|
||||
// Read the next rune.
|
||||
ch := s.read()
|
||||
|
||||
// If we see whitespace then consume all contiguous whitespace.
|
||||
// If we see a letter then consume as an ident or reserved word.
|
||||
// If we see a digit then consume as a number.
|
||||
if isWhitespace(ch) {
|
||||
s.unread()
|
||||
return s.scanWhitespace()
|
||||
} else if isLetter(ch) {
|
||||
s.unread()
|
||||
return s.scanIdent()
|
||||
} else if isSpecialSymbol(ch) {
|
||||
s.unread()
|
||||
return s.scanSpecSymbol()
|
||||
} else if isDigit(ch) {
|
||||
s.unread()
|
||||
return s.scanNumber()
|
||||
}
|
||||
|
||||
// Otherwise read the individual character.
|
||||
switch ch {
|
||||
case eof:
|
||||
return Token{EOF, ""}
|
||||
}
|
||||
|
||||
return Token{Illegal, string(ch)}
|
||||
}
|
||||
|
||||
// scanWhitespace consumes the current rune and all contiguous whitespace.
|
||||
func (s *Scanner) scanWhitespace() Token {
|
||||
// Create a buffer and read the current character into it.
|
||||
var buf bytes.Buffer
|
||||
buf.WriteRune(s.read())
|
||||
|
||||
// Read every subsequent whitespace character into the buffer.
|
||||
// Non-whitespace characters and EOF will cause the loop to exit.
|
||||
for {
|
||||
if ch := s.read(); ch == eof {
|
||||
break
|
||||
} else if !isWhitespace(ch) {
|
||||
s.unread()
|
||||
break
|
||||
} else {
|
||||
buf.WriteRune(ch)
|
||||
}
|
||||
}
|
||||
|
||||
return Token{Whitespace, buf.String()}
|
||||
}
|
||||
|
||||
// scanIdent consumes the current rune and all contiguous ident runes.
|
||||
func (s *Scanner) scanIdent() Token {
|
||||
// Create a buffer and read the current character into it.
|
||||
var buf bytes.Buffer
|
||||
//buf.WriteRune(s.read())
|
||||
|
||||
// Read every subsequent ident character into the buffer.
|
||||
// Non-ident characters and EOF will cause the loop to exit.
|
||||
for {
|
||||
if ch := s.read(); ch == eof {
|
||||
break
|
||||
} else if !isLetter(ch) && !isDigit(ch) && ch != '_' {
|
||||
s.unread()
|
||||
break
|
||||
} else {
|
||||
_, _ = buf.WriteRune(ch)
|
||||
}
|
||||
}
|
||||
|
||||
parsedToken := Token{
|
||||
Kind: -1,
|
||||
Literal: buf.String(),
|
||||
}
|
||||
|
||||
// If the string matches a keyword then return that keyword.
|
||||
switch buf.String() {
|
||||
case Using.String(): // using
|
||||
parsedToken.Kind = Using
|
||||
case Namespace.String(): // namespace
|
||||
parsedToken.Kind = Namespace
|
||||
case PublicModifier.String(): // public
|
||||
parsedToken.Kind = PublicModifier
|
||||
case PrivateModifier.String(): // private
|
||||
parsedToken.Kind = PrivateModifier
|
||||
case Class.String(): // class
|
||||
parsedToken.Kind = Class
|
||||
case Var.String(): // var
|
||||
parsedToken.Kind = Var
|
||||
case Return.String(): // return
|
||||
parsedToken.Kind = Return
|
||||
case Null.String(): // null
|
||||
parsedToken.Kind = Null
|
||||
case If.String(): // if
|
||||
parsedToken.Kind = If
|
||||
case ProtectedModifier.String(): // protected
|
||||
parsedToken.Kind = ProtectedModifier
|
||||
case False.String(): // false
|
||||
parsedToken.Kind = False
|
||||
case True.String(): // true
|
||||
parsedToken.Kind = True
|
||||
case New.String(): // new
|
||||
parsedToken.Kind = New
|
||||
default:
|
||||
parsedToken.Kind = Identifier
|
||||
}
|
||||
|
||||
return parsedToken
|
||||
}
|
||||
|
||||
func (s *Scanner) scanSpecSymbol() Token {
|
||||
// Create a buffer and read the current character into it.
|
||||
var buf bytes.Buffer
|
||||
buf.WriteRune(s.read())
|
||||
|
||||
// Read every subsequent ident character into the buffer.
|
||||
// Non-ident characters and EOF will cause the loop to exit.
|
||||
for {
|
||||
if ch := s.read(); ch == eof {
|
||||
break
|
||||
} else if !isSpecialSymbol(ch) {
|
||||
s.unread()
|
||||
break
|
||||
} else {
|
||||
_, _ = buf.WriteRune(ch)
|
||||
}
|
||||
}
|
||||
|
||||
parsedToken := Token{
|
||||
Kind: -1,
|
||||
Literal: buf.String(),
|
||||
}
|
||||
|
||||
// If the string matches a keyword then return that keyword.
|
||||
matchSpecSymbol(&parsedToken)
|
||||
|
||||
if parsedToken.Kind == Illegal && len(parsedToken.Literal) > 1 { // then two or more special characters in a row detected
|
||||
for i := 0; i < len(parsedToken.Literal)-1; i++ {
|
||||
s.buf.Seek(-1, io.SeekCurrent)
|
||||
}
|
||||
parsedToken.Literal = string(utils.RuneAt(parsedToken.Literal, 0))
|
||||
matchSpecSymbol(&parsedToken)
|
||||
}
|
||||
|
||||
return parsedToken
|
||||
}
|
||||
|
||||
func (s *Scanner) scanNumber() Token {
|
||||
var buf bytes.Buffer
|
||||
buf.WriteRune(s.read())
|
||||
|
||||
for {
|
||||
if ch := s.read(); ch == eof {
|
||||
break
|
||||
} else if !isDigit(ch) && !isXDigit(ch) && (ch != 'x') && (ch != '.') {
|
||||
fmt.Println(string(ch))
|
||||
s.unread()
|
||||
break
|
||||
} else {
|
||||
_, _ = buf.WriteRune(ch)
|
||||
}
|
||||
}
|
||||
|
||||
parsedToken := Token{
|
||||
Kind: IntegerLiteral,
|
||||
Literal: buf.String(),
|
||||
}
|
||||
|
||||
for _, v := range []rune(parsedToken.Literal) {
|
||||
if v == '.' {
|
||||
parsedToken.Kind = RealLiteral
|
||||
}
|
||||
}
|
||||
|
||||
return parsedToken
|
||||
}
|
||||
|
||||
func matchSpecSymbol(tok *Token) {
|
||||
switch tok.Literal {
|
||||
case Add.String(): // +
|
||||
tok.Kind = Add
|
||||
case Minus.String(): // -
|
||||
tok.Kind = Minus
|
||||
case Mult.String(): // *
|
||||
tok.Kind = Mult
|
||||
case Divide.String(): // /
|
||||
tok.Kind = Divide
|
||||
case Colon.String(): // :
|
||||
tok.Kind = Colon
|
||||
case Comma.String(): // ,
|
||||
tok.Kind = Comma
|
||||
case Semicolon.String(): // ;
|
||||
tok.Kind = Semicolon
|
||||
case OpenBrace.String(): // {
|
||||
tok.Kind = OpenBrace
|
||||
case CloseBrace.String(): // }
|
||||
tok.Kind = CloseBrace
|
||||
case StringLiteral.String(): // "
|
||||
tok.Kind = StringLiteral
|
||||
case Percent.String(): // %
|
||||
tok.Kind = Percent
|
||||
case OpenParens.String(): // (
|
||||
tok.Kind = OpenParens
|
||||
case CloseParens.String(): // )
|
||||
tok.Kind = CloseParens
|
||||
case Dot.String(): // .
|
||||
tok.Kind = Dot
|
||||
case OpenMultilineComments.String(): // /*
|
||||
tok.Kind = OpenMultilineComments
|
||||
case CloseMultilineComments.String(): // */
|
||||
tok.Kind = CloseMultilineComments
|
||||
case LambdaArrow.String(): // =>
|
||||
tok.Kind = LambdaArrow
|
||||
case Equal.String(): // ==
|
||||
tok.Kind = Equal
|
||||
case OpenSingleComments.String():
|
||||
tok.Kind = OpenSingleComments
|
||||
case Assign.String():
|
||||
tok.Kind = Assign
|
||||
case OpenBracket.String():
|
||||
tok.Kind = OpenBracket
|
||||
case CloseBracket.String():
|
||||
tok.Kind = CloseBracket
|
||||
}
|
||||
}
|
||||
|
||||
// read reads the next rune from the buffered reader.
|
||||
// Returns the rune(0) if an error occurs (or io.EOF is returned).
|
||||
func (s *Scanner) read() rune {
|
||||
ch, _, err := s.buf.ReadRune()
|
||||
if err != nil {
|
||||
return eof
|
||||
}
|
||||
return ch
|
||||
}
|
||||
|
||||
// unread places the previously read rune back on the reader.
|
||||
func (s *Scanner) unread() {
|
||||
err := s.buf.UnreadRune()
|
||||
if err != nil {
|
||||
fmt.Println("Error when unread: " + err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
// isWhitespace returns true if the rune is a space, tab, or newline.
|
||||
func isWhitespace(ch rune) bool { return ch == ' ' || ch == '\t' || ch == '\n' }
|
||||
|
||||
// isLetter returns true if the rune is a letter.
|
||||
func isLetter(ch rune) bool { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') }
|
||||
|
||||
// isDigit returns true if the rune is a digit.
|
||||
func isDigit(ch rune) bool { return (ch >= '0' && ch <= '9') }
|
||||
|
||||
func isSpecialSymbol(ch rune) bool {
|
||||
return (ch >= '!' && ch <= '/') || (ch >= ':' && ch <= '?') || (ch >= '[' && ch <= '`') || (ch >= '{' && ch <= '~') && (ch != '_')
|
||||
}
|
||||
|
||||
func isXDigit(ch rune) bool { return (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'F') }
|
220
scanner/token.go
Normal file
220
scanner/token.go
Normal file
@ -0,0 +1,220 @@
|
||||
package scanner
|
||||
|
||||
type TokenKind int
|
||||
|
||||
const (
|
||||
Using TokenKind = iota // using
|
||||
Class // class
|
||||
Identifier // any identifier
|
||||
Colon // :
|
||||
Comma // ,
|
||||
Semicolon // ;
|
||||
OpenBrace // {
|
||||
CloseBrace // }
|
||||
StringLiteral // "
|
||||
Namespace // namespace
|
||||
PublicModifier // public
|
||||
PrivateModifier // private
|
||||
Add // +
|
||||
Minus // -
|
||||
Mult // *
|
||||
Divide // /
|
||||
Var // var
|
||||
Whitespace //
|
||||
OpenParens // (
|
||||
CloseParens // )
|
||||
Percent // %
|
||||
OpenMultilineComments // /*
|
||||
CloseMultilineComments // */
|
||||
Dot // .
|
||||
Return // return
|
||||
Null // null
|
||||
LambdaArrow // =>
|
||||
If // if
|
||||
Equal // ==
|
||||
OpenSingleComments // //
|
||||
Assign // =
|
||||
OpenBracket // [
|
||||
CloseBracket // ]
|
||||
ProtectedModifier // protected
|
||||
False // false
|
||||
True // true
|
||||
New // new
|
||||
IntegerLiteral // any int number
|
||||
RealLiteral // any real number
|
||||
EOF // end of file
|
||||
Illegal = -1 // just illegal token
|
||||
)
|
||||
|
||||
func (tok TokenKind) String() string {
|
||||
switch tok {
|
||||
case Class:
|
||||
return "class"
|
||||
case Colon:
|
||||
return ":"
|
||||
case Comma:
|
||||
return ","
|
||||
case Semicolon:
|
||||
return ";"
|
||||
case OpenBrace:
|
||||
return "{"
|
||||
case CloseBrace:
|
||||
return "}"
|
||||
case StringLiteral:
|
||||
return "\""
|
||||
case Using:
|
||||
return "using"
|
||||
case Var:
|
||||
return "var"
|
||||
case Add:
|
||||
return "+"
|
||||
case Minus:
|
||||
return "-"
|
||||
case Mult:
|
||||
return "*"
|
||||
case Divide:
|
||||
return "/"
|
||||
case Namespace:
|
||||
return "namespace"
|
||||
case PublicModifier:
|
||||
return "public"
|
||||
case PrivateModifier:
|
||||
return "private"
|
||||
case OpenParens:
|
||||
return "("
|
||||
case CloseParens:
|
||||
return ")"
|
||||
case Percent:
|
||||
return "%"
|
||||
case OpenMultilineComments:
|
||||
return "/*"
|
||||
case CloseMultilineComments:
|
||||
return "*/"
|
||||
case Dot:
|
||||
return "."
|
||||
case Return:
|
||||
return "return"
|
||||
case Null:
|
||||
return "null"
|
||||
case LambdaArrow:
|
||||
return "=>"
|
||||
case If:
|
||||
return "if"
|
||||
case Equal:
|
||||
return "=="
|
||||
case OpenSingleComments:
|
||||
return "//"
|
||||
case Assign:
|
||||
return "="
|
||||
case OpenBracket:
|
||||
return "["
|
||||
case CloseBracket:
|
||||
return "]"
|
||||
case ProtectedModifier:
|
||||
return "protected"
|
||||
case False:
|
||||
return "false"
|
||||
case True:
|
||||
return "true"
|
||||
case New:
|
||||
return "new"
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (tok TokenKind) PrettyString() string {
|
||||
switch tok {
|
||||
case Class:
|
||||
return "CLASS"
|
||||
case Colon:
|
||||
return "COLON"
|
||||
case Comma:
|
||||
return "COMMA"
|
||||
case Semicolon:
|
||||
return "SEMICOLON"
|
||||
case OpenBrace:
|
||||
return "OPEN_BRACE"
|
||||
case CloseBrace:
|
||||
return "CLOSE_BRACE"
|
||||
case StringLiteral:
|
||||
return "STRING_LITERAL"
|
||||
case Using:
|
||||
return "USING"
|
||||
case Var:
|
||||
return "VAR"
|
||||
case Add:
|
||||
return "OP_SUM"
|
||||
case Minus:
|
||||
return "OP_MINUS"
|
||||
case Mult:
|
||||
return "OP_MULT"
|
||||
case Divide:
|
||||
return "OP_DIVIDE"
|
||||
case Namespace:
|
||||
return "NAMESPACE"
|
||||
case PublicModifier:
|
||||
return "PUBLIC_MODIFIER"
|
||||
case PrivateModifier:
|
||||
return "PRIVATE_MODIFIER"
|
||||
case OpenParens:
|
||||
return "OPEN_PARENS"
|
||||
case CloseParens:
|
||||
return "CLOSE_PARENS"
|
||||
case Percent:
|
||||
return "PERCENT"
|
||||
case OpenMultilineComments:
|
||||
return "OPEN_MULTILINE_COMMENTS"
|
||||
case CloseMultilineComments:
|
||||
return "CLOSE_MULTILINE_COMMENTS"
|
||||
case Dot:
|
||||
return "DOT"
|
||||
case Return:
|
||||
return "RETURN"
|
||||
case Null:
|
||||
return "NULL"
|
||||
case LambdaArrow:
|
||||
return "LAMBDA_ARROW"
|
||||
case If:
|
||||
return "IF_STMT"
|
||||
case Illegal:
|
||||
return "ILLEGAL"
|
||||
case Equal:
|
||||
return "EQUAL"
|
||||
case OpenSingleComments:
|
||||
return "OPEN_SINGLE_COMMENTS"
|
||||
case Assign:
|
||||
return "ASSIGN_OP"
|
||||
case OpenBracket:
|
||||
return "OPEN_BRACKET"
|
||||
case CloseBracket:
|
||||
return "CLOSE_BRACKET"
|
||||
case ProtectedModifier:
|
||||
return "PROTECTED_MODIFIER"
|
||||
case False:
|
||||
return "FALSE"
|
||||
case True:
|
||||
return "TRUE"
|
||||
case New:
|
||||
return "NEW"
|
||||
case IntegerLiteral:
|
||||
return "INTEGER_LITERAL"
|
||||
case RealLiteral:
|
||||
return "REAL_LITERAL"
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
type Token struct {
|
||||
Kind TokenKind
|
||||
Literal string
|
||||
}
|
||||
|
||||
func (t Token) String() string {
|
||||
if t.Kind != Identifier && t.Kind != Whitespace {
|
||||
return "Token{kind: " + t.Kind.PrettyString() + ", literal: " + t.Literal + "}"
|
||||
} else if t.Kind == Whitespace {
|
||||
return "Token{kind: WHITESPACE}"
|
||||
} else {
|
||||
return "Token{kind: IDENTIFIER, literal: " + t.Literal + "}"
|
||||
}
|
||||
}
|
9
utils/utils.go
Normal file
9
utils/utils.go
Normal file
@ -0,0 +1,9 @@
|
||||
package utils
|
||||
|
||||
func RuneAt(s string, idx int) rune {
|
||||
rs := []rune(s)
|
||||
if idx >= len(rs) {
|
||||
return 0
|
||||
}
|
||||
return rs[idx]
|
||||
}
|
Loading…
Reference in New Issue
Block a user