first commit

This commit is contained in:
ChronosX88 2020-03-22 18:24:45 +04:00
commit ccc1981707
7 changed files with 561 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/bin

41
example/main.go Normal file
View File

@ -0,0 +1,41 @@
package main
import (
"flag"
"fmt"
"os"
"github.com/ChronosX88/vala-parser/scanner"
)
func check(e error) {
if e != nil {
panic(e)
}
}
func main() {
var filePath string
flag.StringVar(&filePath, "path", "", "Path to the file which need to read")
flag.Parse()
if filePath == "" {
panic(fmt.Errorf("file path isn't specified"))
}
f, err := os.Open(filePath)
check(err)
fileInfo, err := f.Stat()
check(err)
if fileInfo.IsDir() {
panic(fmt.Errorf("file is a dir, not a file"))
}
s := scanner.NewScanner(f)
for {
tok := s.Scan()
if tok.Kind == scanner.EOF {
os.Exit(0)
} else if tok.Kind == scanner.Whitespace {
continue
}
fmt.Println(tok)
}
}

3
go.mod Normal file
View File

@ -0,0 +1,3 @@
module github.com/ChronosX88/vala-parser
go 1.14

1
parser/parser.go Normal file
View File

@ -0,0 +1 @@
package parser

286
scanner/scanner.go Normal file
View File

@ -0,0 +1,286 @@
package scanner
import (
"bytes"
"fmt"
"io"
"github.com/ChronosX88/vala-parser/utils"
)
const (
eof = rune(0) // end of file
)
type Scanner struct {
buf *bytes.Reader
}
func NewScanner(reader io.Reader) *Scanner {
buffer := new(bytes.Buffer)
buffer.ReadFrom(reader)
r := bytes.NewReader(buffer.Bytes())
return &Scanner{
buf: r,
}
}
func (s *Scanner) Scan() Token {
// Read the next rune.
ch := s.read()
// If we see whitespace then consume all contiguous whitespace.
// If we see a letter then consume as an ident or reserved word.
// If we see a digit then consume as a number.
if isWhitespace(ch) {
s.unread()
return s.scanWhitespace()
} else if isLetter(ch) {
s.unread()
return s.scanIdent()
} else if isSpecialSymbol(ch) {
s.unread()
return s.scanSpecSymbol()
} else if isDigit(ch) {
s.unread()
return s.scanNumber()
}
// Otherwise read the individual character.
switch ch {
case eof:
return Token{EOF, ""}
}
return Token{Illegal, string(ch)}
}
// scanWhitespace consumes the current rune and all contiguous whitespace.
func (s *Scanner) scanWhitespace() Token {
// Create a buffer and read the current character into it.
var buf bytes.Buffer
buf.WriteRune(s.read())
// Read every subsequent whitespace character into the buffer.
// Non-whitespace characters and EOF will cause the loop to exit.
for {
if ch := s.read(); ch == eof {
break
} else if !isWhitespace(ch) {
s.unread()
break
} else {
buf.WriteRune(ch)
}
}
return Token{Whitespace, buf.String()}
}
// scanIdent consumes the current rune and all contiguous ident runes.
func (s *Scanner) scanIdent() Token {
// Create a buffer and read the current character into it.
var buf bytes.Buffer
//buf.WriteRune(s.read())
// Read every subsequent ident character into the buffer.
// Non-ident characters and EOF will cause the loop to exit.
for {
if ch := s.read(); ch == eof {
break
} else if !isLetter(ch) && !isDigit(ch) && ch != '_' {
s.unread()
break
} else {
_, _ = buf.WriteRune(ch)
}
}
parsedToken := Token{
Kind: -1,
Literal: buf.String(),
}
// If the string matches a keyword then return that keyword.
switch buf.String() {
case Using.String(): // using
parsedToken.Kind = Using
case Namespace.String(): // namespace
parsedToken.Kind = Namespace
case PublicModifier.String(): // public
parsedToken.Kind = PublicModifier
case PrivateModifier.String(): // private
parsedToken.Kind = PrivateModifier
case Class.String(): // class
parsedToken.Kind = Class
case Var.String(): // var
parsedToken.Kind = Var
case Return.String(): // return
parsedToken.Kind = Return
case Null.String(): // null
parsedToken.Kind = Null
case If.String(): // if
parsedToken.Kind = If
case ProtectedModifier.String(): // protected
parsedToken.Kind = ProtectedModifier
case False.String(): // false
parsedToken.Kind = False
case True.String(): // true
parsedToken.Kind = True
case New.String(): // new
parsedToken.Kind = New
default:
parsedToken.Kind = Identifier
}
return parsedToken
}
func (s *Scanner) scanSpecSymbol() Token {
// Create a buffer and read the current character into it.
var buf bytes.Buffer
buf.WriteRune(s.read())
// Read every subsequent ident character into the buffer.
// Non-ident characters and EOF will cause the loop to exit.
for {
if ch := s.read(); ch == eof {
break
} else if !isSpecialSymbol(ch) {
s.unread()
break
} else {
_, _ = buf.WriteRune(ch)
}
}
parsedToken := Token{
Kind: -1,
Literal: buf.String(),
}
// If the string matches a keyword then return that keyword.
matchSpecSymbol(&parsedToken)
if parsedToken.Kind == Illegal && len(parsedToken.Literal) > 1 { // then two or more special characters in a row detected
for i := 0; i < len(parsedToken.Literal)-1; i++ {
s.buf.Seek(-1, io.SeekCurrent)
}
parsedToken.Literal = string(utils.RuneAt(parsedToken.Literal, 0))
matchSpecSymbol(&parsedToken)
}
return parsedToken
}
func (s *Scanner) scanNumber() Token {
var buf bytes.Buffer
buf.WriteRune(s.read())
for {
if ch := s.read(); ch == eof {
break
} else if !isDigit(ch) && !isXDigit(ch) && (ch != 'x') && (ch != '.') {
fmt.Println(string(ch))
s.unread()
break
} else {
_, _ = buf.WriteRune(ch)
}
}
parsedToken := Token{
Kind: IntegerLiteral,
Literal: buf.String(),
}
for _, v := range []rune(parsedToken.Literal) {
if v == '.' {
parsedToken.Kind = RealLiteral
}
}
return parsedToken
}
func matchSpecSymbol(tok *Token) {
switch tok.Literal {
case Add.String(): // +
tok.Kind = Add
case Minus.String(): // -
tok.Kind = Minus
case Mult.String(): // *
tok.Kind = Mult
case Divide.String(): // /
tok.Kind = Divide
case Colon.String(): // :
tok.Kind = Colon
case Comma.String(): // ,
tok.Kind = Comma
case Semicolon.String(): // ;
tok.Kind = Semicolon
case OpenBrace.String(): // {
tok.Kind = OpenBrace
case CloseBrace.String(): // }
tok.Kind = CloseBrace
case StringLiteral.String(): // "
tok.Kind = StringLiteral
case Percent.String(): // %
tok.Kind = Percent
case OpenParens.String(): // (
tok.Kind = OpenParens
case CloseParens.String(): // )
tok.Kind = CloseParens
case Dot.String(): // .
tok.Kind = Dot
case OpenMultilineComments.String(): // /*
tok.Kind = OpenMultilineComments
case CloseMultilineComments.String(): // */
tok.Kind = CloseMultilineComments
case LambdaArrow.String(): // =>
tok.Kind = LambdaArrow
case Equal.String(): // ==
tok.Kind = Equal
case OpenSingleComments.String():
tok.Kind = OpenSingleComments
case Assign.String():
tok.Kind = Assign
case OpenBracket.String():
tok.Kind = OpenBracket
case CloseBracket.String():
tok.Kind = CloseBracket
}
}
// read reads the next rune from the buffered reader.
// Returns the rune(0) if an error occurs (or io.EOF is returned).
func (s *Scanner) read() rune {
ch, _, err := s.buf.ReadRune()
if err != nil {
return eof
}
return ch
}
// unread places the previously read rune back on the reader.
func (s *Scanner) unread() {
err := s.buf.UnreadRune()
if err != nil {
fmt.Println("Error when unread: " + err.Error())
}
}
// isWhitespace returns true if the rune is a space, tab, or newline.
func isWhitespace(ch rune) bool { return ch == ' ' || ch == '\t' || ch == '\n' }
// isLetter returns true if the rune is a letter.
func isLetter(ch rune) bool { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') }
// isDigit returns true if the rune is a digit.
func isDigit(ch rune) bool { return (ch >= '0' && ch <= '9') }
func isSpecialSymbol(ch rune) bool {
return (ch >= '!' && ch <= '/') || (ch >= ':' && ch <= '?') || (ch >= '[' && ch <= '`') || (ch >= '{' && ch <= '~') && (ch != '_')
}
func isXDigit(ch rune) bool { return (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'F') }

220
scanner/token.go Normal file
View File

@ -0,0 +1,220 @@
package scanner
type TokenKind int
const (
Using TokenKind = iota // using
Class // class
Identifier // any identifier
Colon // :
Comma // ,
Semicolon // ;
OpenBrace // {
CloseBrace // }
StringLiteral // "
Namespace // namespace
PublicModifier // public
PrivateModifier // private
Add // +
Minus // -
Mult // *
Divide // /
Var // var
Whitespace //
OpenParens // (
CloseParens // )
Percent // %
OpenMultilineComments // /*
CloseMultilineComments // */
Dot // .
Return // return
Null // null
LambdaArrow // =>
If // if
Equal // ==
OpenSingleComments // //
Assign // =
OpenBracket // [
CloseBracket // ]
ProtectedModifier // protected
False // false
True // true
New // new
IntegerLiteral // any int number
RealLiteral // any real number
EOF // end of file
Illegal = -1 // just illegal token
)
func (tok TokenKind) String() string {
switch tok {
case Class:
return "class"
case Colon:
return ":"
case Comma:
return ","
case Semicolon:
return ";"
case OpenBrace:
return "{"
case CloseBrace:
return "}"
case StringLiteral:
return "\""
case Using:
return "using"
case Var:
return "var"
case Add:
return "+"
case Minus:
return "-"
case Mult:
return "*"
case Divide:
return "/"
case Namespace:
return "namespace"
case PublicModifier:
return "public"
case PrivateModifier:
return "private"
case OpenParens:
return "("
case CloseParens:
return ")"
case Percent:
return "%"
case OpenMultilineComments:
return "/*"
case CloseMultilineComments:
return "*/"
case Dot:
return "."
case Return:
return "return"
case Null:
return "null"
case LambdaArrow:
return "=>"
case If:
return "if"
case Equal:
return "=="
case OpenSingleComments:
return "//"
case Assign:
return "="
case OpenBracket:
return "["
case CloseBracket:
return "]"
case ProtectedModifier:
return "protected"
case False:
return "false"
case True:
return "true"
case New:
return "new"
}
return ""
}
func (tok TokenKind) PrettyString() string {
switch tok {
case Class:
return "CLASS"
case Colon:
return "COLON"
case Comma:
return "COMMA"
case Semicolon:
return "SEMICOLON"
case OpenBrace:
return "OPEN_BRACE"
case CloseBrace:
return "CLOSE_BRACE"
case StringLiteral:
return "STRING_LITERAL"
case Using:
return "USING"
case Var:
return "VAR"
case Add:
return "OP_SUM"
case Minus:
return "OP_MINUS"
case Mult:
return "OP_MULT"
case Divide:
return "OP_DIVIDE"
case Namespace:
return "NAMESPACE"
case PublicModifier:
return "PUBLIC_MODIFIER"
case PrivateModifier:
return "PRIVATE_MODIFIER"
case OpenParens:
return "OPEN_PARENS"
case CloseParens:
return "CLOSE_PARENS"
case Percent:
return "PERCENT"
case OpenMultilineComments:
return "OPEN_MULTILINE_COMMENTS"
case CloseMultilineComments:
return "CLOSE_MULTILINE_COMMENTS"
case Dot:
return "DOT"
case Return:
return "RETURN"
case Null:
return "NULL"
case LambdaArrow:
return "LAMBDA_ARROW"
case If:
return "IF_STMT"
case Illegal:
return "ILLEGAL"
case Equal:
return "EQUAL"
case OpenSingleComments:
return "OPEN_SINGLE_COMMENTS"
case Assign:
return "ASSIGN_OP"
case OpenBracket:
return "OPEN_BRACKET"
case CloseBracket:
return "CLOSE_BRACKET"
case ProtectedModifier:
return "PROTECTED_MODIFIER"
case False:
return "FALSE"
case True:
return "TRUE"
case New:
return "NEW"
case IntegerLiteral:
return "INTEGER_LITERAL"
case RealLiteral:
return "REAL_LITERAL"
}
return ""
}
type Token struct {
Kind TokenKind
Literal string
}
func (t Token) String() string {
if t.Kind != Identifier && t.Kind != Whitespace {
return "Token{kind: " + t.Kind.PrettyString() + ", literal: " + t.Literal + "}"
} else if t.Kind == Whitespace {
return "Token{kind: WHITESPACE}"
} else {
return "Token{kind: IDENTIFIER, literal: " + t.Literal + "}"
}
}

9
utils/utils.go Normal file
View File

@ -0,0 +1,9 @@
package utils
func RuneAt(s string, idx int) rune {
rs := []rune(s)
if idx >= len(rs) {
return 0
}
return rs[idx]
}