这是两部分系列中的第一篇文章,该系列采用基于教程的方法来探索Go编译器。编译器很大,可能需要一本书去正确描述,本文章的想法是提供一种“深度优先"的探索思路。作者计划在将来写更多关于编译器特定领域的描述性文章。 我们将更改Go编译器添加一个新的语言特性(仅用来探索编译器的实现),并构建一个修改后的编译器来使用。
原文链接
注:一些编译器专业术语仍然沿用了英文。
whilefor
for <some-condition> {
<loop body>
}
whilewhileforuntiluntilwhile
i := 4
until i == 0 {
i--
fmt.Println("Hello, until!")
}
与下面代码的意思是相同的:
i := 4
for i != 0 {
i--
fmt.Println("Hello, until!")
}
until
until i := 4; i == 0 {
i--
fmt.Println("Hello, until!")
}
until
Go编译器的高级结构
Go默认的编译器(gc)有一个相当传统的结构,如果您之前使用过其他编译器,你可以很快熟悉它:
src/cmd/compile/internaluntilsrc/cmd/compileREADME
词法分析器
for_For..._DotDotDot._Dotsyntaxuntilsyntax/tokens.gotokens_Until
_Fallthrough // fallthrough
_For // for
_Until // until
_Func // func
tokentokensgo generate//go:generate stringer -type token -linecommenttokengo generatesyntax/token_string.gosyntaxGOROOT= go generate tokens.gorunning "stringer": exec: "stringer": executable file not found in $PATHgo get -u golang.org/x/tools/cmd/stringerGOROOTgo generatesyntax/token_string.gopanic: imperfect hashpanicsyntax/scanner.go
// hash is a perfect hash function for keywords.
// It assumes that s has at least length 2.
func hash(s []byte) uint {
return (uint(s[0])<<4 ^ uint(s[1]) + uint(len(s))) & uint(len(keywordMap)-1)
}
var keywordMap [1 << 6]token // size must be power of two
func init() {
// populate keywordMap
for tok := _Break; tok <= _Var; tok++ {
h := hash([]byte(tok.String()))
if keywordMap[h] != 0 {
panic("imperfect hash")
}
keywordMap[h] = tok
}
}
token[1<<7]tokensyntax/scanner.govar keywordMap [1 << 6]tokenvar keywordMap [1 << 7]token
语法分析器
tokenssyntax/nodes.gountilForStmt struct
UntilStmt struct {
Init SimpleStmt
Cond Expr
Body *BlockStmt
stmt
}
ForStmtforforuntil
until <init>; <cond> {
<body>
}
UntilStmt.stmtsyntax/parser.goparser.stmtOrNilswitch p.tokcase _Until:
switch p.tok {
case _Lbrace:
return p.blockStmt("")
// ...
case _For:
return p.forStmt()
case _Until:
return p.untilStmt()
untilStmt
func (p *parser) untilStmt() Stmt {
if trace {
defer p.trace("untilStmt")()
}
s := new(UntilStmt)
s.pos = p.pos()
s.Init, s.Cond, _ = p.header(_Until)
s.Body = p.blockStmt("until clause")
return s
}
parser.headerifforheaderforuntilheadertokenifuntiluntilsyntax.Fdump
i = 4
until i == 0 {
i--
fmt.Println("Hello, until!")
}
until
84 . . . . . 3: *syntax.UntilStmt {
85 . . . . . . Init: nil
86 . . . . . . Cond: *syntax.Operation {
87 . . . . . . . Op: ==
88 . . . . . . . X: i @ ./useuntil.go:13:8
89 . . . . . . . Y: *syntax.BasicLit {
90 . . . . . . . . Value: "0"
91 . . . . . . . . Kind: 0
92 . . . . . . . }
93 . . . . . . }
94 . . . . . . Body: *syntax.BlockStmt {
95 . . . . . . . List: []syntax.Stmt (2 entries) {
96 . . . . . . . . 0: *syntax.AssignStmt {
97 . . . . . . . . . Op: -
98 . . . . . . . . . Lhs: i @ ./useuntil.go:14:3
99 . . . . . . . . . Rhs: *(Node @ 52)
100 . . . . . . . . }
101 . . . . . . . . 1: *syntax.ExprStmt {
102 . . . . . . . . . X: *syntax.CallExpr {
103 . . . . . . . . . . Fun: *syntax.SelectorExpr {
104 . . . . . . . . . . . X: fmt @ ./useuntil.go:15:3
105 . . . . . . . . . . . Sel: Println @ ./useuntil.go:15:7
106 . . . . . . . . . . }
107 . . . . . . . . . . ArgList: []syntax.Expr (1 entries) {
108 . . . . . . . . . . . 0: *syntax.BasicLit {
109 . . . . . . . . . . . . Value: "\"Hello, until!\""
110 . . . . . . . . . . . . Kind: 4
111 . . . . . . . . . . . }
112 . . . . . . . . . . }
113 . . . . . . . . . . HasDots: false
114 . . . . . . . . . }
115 . . . . . . . . }
116 . . . . . . . }
117 . . . . . . . Rbrace: syntax.Pos {}
118 . . . . . . }
119 . . . . . }
建立抽象语法树(AST)
gcgc/syntax.gosyntax.Node
// A Node is a single node in the syntax tree.
// Actually the syntax tree is a syntax DAG, because there is only one
// node with Op=ONAME for a given instance of a variable x.
// The same is true for Op=OTYPE and Op=OLITERAL. See Node.mayBeShared.
type Node struct {
// Tree structure.
// Generic recursive walks should follow these fields.
Left *Node
Right *Node
Ninit Nodes
Nbody Nodes
List Nodes
Rlist Nodes
// ...
gc/syntax.go的constuntil
// statements
// ...
OFALL // fallthrough
OFOR // for Ninit; Left; Right { Nbody }
OUNTIL // until Ninit; Left { Nbody }
go generategc/syntax.go
// from the gc directory
GOROOT=<src checkout> go generate syntax.go
gc/op_string.goOUNTILgc/noder.goforstmtFallstmtFallswitch-casegc/noder.gostmtFallcase *syntax.UntilStmt
case *syntax.ForStmt:
return p.forStmt(stmt)
case *syntax.UntilStmt:
return p.untilStmt(stmt)
gc/noder.gonoderuntilStmt
// untilStmt converts the concrete syntax tree node UntilStmt into an AST
// node.
func (p *noder) untilStmt(stmt *syntax.UntilStmt) *Node {
p.openScope(stmt.Pos())
var n *Node
n = p.nod(stmt, OUNTIL, nil, nil)
if stmt.Init != nil {
n.Ninit.Set1(p.stmt(stmt.Init))
}
if stmt.Cond != nil {
n.Left = p.expr(stmt.Cond)
}
n.Nbody.Set(p.blockStmt(stmt.Body))
p.closeAnotherScope()
return n
}
NodeInitLeftNbodyuntildump
. . UNTIL l(13)
. . . EQ l(13)
. . . . NAME-main.i a(true) g(1) l(6) x(0) class(PAUTO)
. . . . LITERAL-0 l(13) untyped number
. . UNTIL-body
. . . ASOP-SUB l(14) implicit(true)
. . . . NAME-main.i a(true) g(1) l(6) x(0) class(PAUTO)
. . . . LITERAL-1 l(14) untyped number
. . . CALL l(15)
. . . . NONAME-fmt.Println a(true) x(0) fmt.Println
. . . CALL-list
. . . . LITERAL-"Hello, until!" l(15) untyped string
类型检查
res, err := func(args)reserrgc/typecheck.gofortypecheckswitch-casegc/typecheck.gotypecheck1switch n.Op
case OUNTIL:
ok |= ctxStmt
typecheckslice(n.Ninit.Slice(), ctxStmt)
decldepth++
n.Left = typecheck(n.Left, ctxExpr)
n.Left = defaultlit(n.Left, nil)
if n.Left != nil {
t := n.Left.Type
if t != nil && !t.IsBoolean() {
yyerror("non-bool %L used as for condition", n.Left)
}
}
typecheckslice(n.Nbody.Slice(), ctxStmt)
decldepth--
它为语句的各个部分分配类型,并检查条件在布尔上下文中是否有效。
分析和重写抽象语法树gc/ main.gogc.Mainpassespassuntilgc.NodeEscape.stmtswitch-case
case OUNTIL:
e.loopDepth++
e.discard(n.Left)
e.stmts(n.Nbody)
e.loopDepth--
gc.Maingc/pgen.gocompileordergc/order.gofoo /= 10foo = foo/10untilgc/order.goOrder.stmtswitch-case
case OUNTIL:
t := o.markTemp()
n.Left = o.exprInPlace(n.Left)
n.Nbody.Prepend(o.cleanTempNoPop(t)...)
orderBlock(&n.Nbody, o.free)
o.out = append(o.out, n)
o.cleanTemp(t)
ordercompilegc/walk.gowalkwalkforrangeforwalkwalkstmtswitch-caseuntiluntilcasefor
case OUNTIL:
if n.Left != nil {
walkstmtlist(n.Left.Ninit.Slice())
init := n.Left.Ninit
n.Left.Ninit.Set(nil)
n.Left = nod(ONOT, walkexpr(n.Left, &init), nil)
n.Left = addinit(n.Left, init.Slice())
n.Op = OFOR
}
walkstmtlist(n.Nbody.Slice())
n.LeftONOT!n.LeftOFORn.OpwalkdumpOUNTILOFOR
看下效果
until
$ cat useuntil.go
package main
import "fmt"
func main() {
i := 4
until i == 0 {
i--
fmt.Println("Hello, until!")
}
}
$ <src checkout>/bin/go run useuntil.go
Hello, until!
Hello, until!
Hello, until!
Hello, until!
i := 4 until i == 0until i:=4;i == 0
结束
foruntil
附录-编译Go工具链
请先阅读《Go贡献指南》。 以下是有关复制修改后的Go编译器的一些快速说明,如本文所示。 有两种方式可以尝试本文的修改:
src/./make.bash./all.bashmake.bashbin /go install cmd/compile
[1]Go has some special "magic" range clauses like a range over a string which splits its up into runes. This is where such transformations are implemented.