这是两部分系列中的第一篇文章,该系列采用基于教程的方法来探索Go编译器。编译器很大,可能需要一本书去正确描述,本文章的想法是提供一种“深度优先"的探索思路。作者计划在将来写更多关于编译器特定领域的描述性文章。 我们将更改Go编译器添加一个新的语言特性(仅用来探索编译器的实现),并构建一个修改后的编译器来使用。
原文链接
注:一些编译器专业术语仍然沿用了英文。

任务:添加一个新语句
whilefor
for <some-condition> {
  <loop body>
}
whilewhileforuntiluntilwhile
i := 4
until i == 0 {
  i--
  fmt.Println("Hello, until!")
}

与下面代码的意思是相同的:

i := 4
for i != 0 {
  i--
  fmt.Println("Hello, until!")
}
until
until i := 4; i == 0 {
  i--
  fmt.Println("Hello, until!")
}
until
Go编译器的高级结构

Go默认的编译器(gc)有一个相当传统的结构,如果您之前使用过其他编译器,你可以很快熟悉它:


src/cmd/compile/internaluntilsrc/cmd/compileREADME
词法分析器
for_For..._DotDotDot._Dotsyntaxuntilsyntax/tokens.gotokens_Until
_Fallthrough // fallthrough
_For         // for
_Until       // until
_Func        // func
tokentokensgo generate//go:generate stringer -type token -linecommenttokengo generatesyntax/token_string.gosyntaxGOROOT= go generate tokens.gorunning "stringer": exec: "stringer": executable file not found in $PATHgo get -u golang.org/x/tools/cmd/stringerGOROOTgo generatesyntax/token_string.gopanic: imperfect hashpanicsyntax/scanner.go
// hash is a perfect hash function for keywords.
// It assumes that s has at least length 2.
func hash(s []byte) uint {
  return (uint(s[0])<<4 ^ uint(s[1]) + uint(len(s))) & uint(len(keywordMap)-1)
}

var keywordMap [1 << 6]token // size must be power of two

func init() {
  // populate keywordMap
  for tok := _Break; tok <= _Var; tok++ {
    h := hash([]byte(tok.String()))
    if keywordMap[h] != 0 {
      panic("imperfect hash")
    }
    keywordMap[h] = tok
  }
}
token[1<<7]tokensyntax/scanner.govar keywordMap [1 << 6]tokenvar keywordMap [1 << 7]token
语法分析器
tokenssyntax/nodes.gountilForStmt struct
UntilStmt struct {
  Init SimpleStmt
  Cond Expr
  Body *BlockStmt
  stmt
}
ForStmtforforuntil
until <init>; <cond> {
  <body>
}
UntilStmt.stmtsyntax/parser.goparser.stmtOrNilswitch p.tokcase _Until:
switch p.tok {
case _Lbrace:
  return p.blockStmt("")

// ...

case _For:
  return p.forStmt()

case _Until:
  return p.untilStmt()
untilStmt
func (p *parser) untilStmt() Stmt {
  if trace {
    defer p.trace("untilStmt")()
  }

  s := new(UntilStmt)
  s.pos = p.pos()

  s.Init, s.Cond, _ = p.header(_Until)
  s.Body = p.blockStmt("until clause")

  return s
}
parser.headerifforheaderforuntilheadertokenifuntiluntilsyntax.Fdump
i = 4
until i == 0 {
  i--
  fmt.Println("Hello, until!")
}
until
 84  .  .  .  .  .  3: *syntax.UntilStmt {
 85  .  .  .  .  .  .  Init: nil
 86  .  .  .  .  .  .  Cond: *syntax.Operation {
 87  .  .  .  .  .  .  .  Op: ==
 88  .  .  .  .  .  .  .  X: i @ ./useuntil.go:13:8
 89  .  .  .  .  .  .  .  Y: *syntax.BasicLit {
 90  .  .  .  .  .  .  .  .  Value: "0"
 91  .  .  .  .  .  .  .  .  Kind: 0
 92  .  .  .  .  .  .  .  }
 93  .  .  .  .  .  .  }
 94  .  .  .  .  .  .  Body: *syntax.BlockStmt {
 95  .  .  .  .  .  .  .  List: []syntax.Stmt (2 entries) {
 96  .  .  .  .  .  .  .  .  0: *syntax.AssignStmt {
 97  .  .  .  .  .  .  .  .  .  Op: -
 98  .  .  .  .  .  .  .  .  .  Lhs: i @ ./useuntil.go:14:3
 99  .  .  .  .  .  .  .  .  .  Rhs: *(Node @ 52)
100  .  .  .  .  .  .  .  .  }
101  .  .  .  .  .  .  .  .  1: *syntax.ExprStmt {
102  .  .  .  .  .  .  .  .  .  X: *syntax.CallExpr {
103  .  .  .  .  .  .  .  .  .  .  Fun: *syntax.SelectorExpr {
104  .  .  .  .  .  .  .  .  .  .  .  X: fmt @ ./useuntil.go:15:3
105  .  .  .  .  .  .  .  .  .  .  .  Sel: Println @ ./useuntil.go:15:7
106  .  .  .  .  .  .  .  .  .  .  }
107  .  .  .  .  .  .  .  .  .  .  ArgList: []syntax.Expr (1 entries) {
108  .  .  .  .  .  .  .  .  .  .  .  0: *syntax.BasicLit {
109  .  .  .  .  .  .  .  .  .  .  .  .  Value: "\"Hello, until!\""
110  .  .  .  .  .  .  .  .  .  .  .  .  Kind: 4
111  .  .  .  .  .  .  .  .  .  .  .  }
112  .  .  .  .  .  .  .  .  .  .  }
113  .  .  .  .  .  .  .  .  .  .  HasDots: false
114  .  .  .  .  .  .  .  .  .  }
115  .  .  .  .  .  .  .  .  }
116  .  .  .  .  .  .  .  }
117  .  .  .  .  .  .  .  Rbrace: syntax.Pos {}
118  .  .  .  .  .  .  }
119  .  .  .  .  .  }
建立抽象语法树(AST)
gcgc/syntax.gosyntax.Node
// A Node is a single node in the syntax tree.
// Actually the syntax tree is a syntax DAG, because there is only one
// node with Op=ONAME for a given instance of a variable x.
// The same is true for Op=OTYPE and Op=OLITERAL. See Node.mayBeShared.
type Node struct {
  // Tree structure.
  // Generic recursive walks should follow these fields.
  Left  *Node
  Right *Node
  Ninit Nodes
  Nbody Nodes
  List  Nodes
  Rlist Nodes

  // ...
gc/syntax.go的constuntil
// statements
// ...
OFALL     // fallthrough
OFOR      // for Ninit; Left; Right { Nbody }
OUNTIL    // until Ninit; Left { Nbody }
go generategc/syntax.go
// from the gc directory
GOROOT=<src checkout> go generate syntax.go
gc/op_string.goOUNTILgc/noder.goforstmtFallstmtFallswitch-casegc/noder.gostmtFallcase *syntax.UntilStmt
case *syntax.ForStmt:
  return p.forStmt(stmt)
case *syntax.UntilStmt:
  return p.untilStmt(stmt)
gc/noder.gonoderuntilStmt
// untilStmt converts the concrete syntax tree node UntilStmt into an AST
// node.
func (p *noder) untilStmt(stmt *syntax.UntilStmt) *Node {
  p.openScope(stmt.Pos())
  var n *Node
  n = p.nod(stmt, OUNTIL, nil, nil)
  if stmt.Init != nil {
    n.Ninit.Set1(p.stmt(stmt.Init))
  }
  if stmt.Cond != nil {
    n.Left = p.expr(stmt.Cond)
  }
  n.Nbody.Set(p.blockStmt(stmt.Body))
  p.closeAnotherScope()
  return n
}
NodeInitLeftNbodyuntildump
.   .   UNTIL l(13)
.   .   .   EQ l(13)
.   .   .   .   NAME-main.i a(true) g(1) l(6) x(0) class(PAUTO)
.   .   .   .   LITERAL-0 l(13) untyped number
.   .   UNTIL-body
.   .   .   ASOP-SUB l(14) implicit(true)
.   .   .   .   NAME-main.i a(true) g(1) l(6) x(0) class(PAUTO)
.   .   .   .   LITERAL-1 l(14) untyped number

.   .   .   CALL l(15)
.   .   .   .   NONAME-fmt.Println a(true) x(0) fmt.Println
.   .   .   CALL-list
.   .   .   .   LITERAL-"Hello, until!" l(15) untyped string
类型检查
res, err := func(args)reserrgc/typecheck.gofortypecheckswitch-casegc/typecheck.gotypecheck1switch n.Op
case OUNTIL:
  ok |= ctxStmt
  typecheckslice(n.Ninit.Slice(), ctxStmt)
  decldepth++
  n.Left = typecheck(n.Left, ctxExpr)
  n.Left = defaultlit(n.Left, nil)
  if n.Left != nil {
    t := n.Left.Type
    if t != nil && !t.IsBoolean() {
      yyerror("non-bool %L used as for condition", n.Left)
    }
  }
  typecheckslice(n.Nbody.Slice(), ctxStmt)
  decldepth--

它为语句的各个部分分配类型,并检查条件在布尔上下文中是否有效。

分析和重写抽象语法树
gc/ main.gogc.Mainpassespassuntilgc.NodeEscape.stmtswitch-case
case OUNTIL:
  e.loopDepth++
  e.discard(n.Left)
  e.stmts(n.Nbody)
  e.loopDepth--
gc.Maingc/pgen.gocompileordergc/order.gofoo /= 10foo = foo/10untilgc/order.goOrder.stmtswitch-case
case OUNTIL:
  t := o.markTemp()
  n.Left = o.exprInPlace(n.Left)
  n.Nbody.Prepend(o.cleanTempNoPop(t)...)
  orderBlock(&n.Nbody, o.free)
  o.out = append(o.out, n)
  o.cleanTemp(t)
ordercompilegc/walk.gowalkwalkforrangeforwalkwalkstmtswitch-caseuntiluntilcasefor
case OUNTIL:
  if n.Left != nil {
    walkstmtlist(n.Left.Ninit.Slice())
    init := n.Left.Ninit
    n.Left.Ninit.Set(nil)
    n.Left = nod(ONOT, walkexpr(n.Left, &init), nil)
    n.Left = addinit(n.Left, init.Slice())
    n.Op = OFOR
  }
  walkstmtlist(n.Nbody.Slice())
n.LeftONOT!n.LeftOFORn.OpwalkdumpOUNTILOFOR
看下效果
until
$ cat useuntil.go
package main

import "fmt"

func main() {
  i := 4
  until i == 0 {
    i--
    fmt.Println("Hello, until!")
  }
}

$ <src checkout>/bin/go run useuntil.go
Hello, until!
Hello, until!
Hello, until!
Hello, until!
i := 4 until i == 0until i:=4;i == 0
结束
foruntil
附录-编译Go工具链

请先阅读《Go贡献指南》。 以下是有关复制修改后的Go编译器的一些快速说明,如本文所示。 有两种方式可以尝试本文的修改:

src/./make.bash./all.bashmake.bashbin /go install cmd/compile

[1]Go has some special "magic" range clauses like a range over a string which splits its up into runes. This is where such transformations are implemented.