您的位置: 首页> Vue

vue3.5.18-编译-生成ast树

匿名上传

发布时间:2025-09-08 12:00:02

vue中的编译过程将字符串转换成了ast树，这是一件神奇的事。

接上篇的第一步const ast = isString(source) ? baseParse(source, resolvedOptions) :source，我们将以图文结合的方式探索vue底层是如何用解析器将字符串转换成ast树的。

一、寻找解析器

1、`baseParse`

// baseParse函数
function baseParse(input, options) {
  // 重置ast生成条件
  reset();
  // 当前template模版
  currentInput = input;
  // 当前options
  currentOptions = extend({}, defaultParserOptions);
  // 如果存在参数传入的options[key]，覆盖其currentOptions[key]
  if (options) {
    let key;
    for (key in options) {
      if (options[key] != null) {
        currentOptions[key] = options[key];
      }
    }
  }
  // 创建根节点
  const root = (currentRoot = createRoot([], input));
  // ✨✨✨词法分析器编译模版✨✨✨
  tokenizer.parse(currentInput);
  // 根节点的字符串
  root.loc = getLoc(0, input.length);
  // 压缩过滤子节点中的空白部分
  root.children = condenseWhitespace(root.children);
  // 当前根节点设为null
  currentRoot = null;
  return root;
}
// 重置ast生成条件
function reset() {
  tokenizer.reset(); // 词法分析器重置
  currentOpenTag = null; // 当前开始标签
  currentProp = null; // 当前属性
  currentAttrValue = ""; // 当前属性值
  currentAttrStartIndex = -1; // 当前属性开始索引
  currentAttrEndIndex = -1; // 当前属性结束索引
  stack.length = 0; // 清空标签栈
}
// 生成根节点
function createRoot(children, source = "") {
  return {
    type: 0,
    source,
    children,
    helpers: /* @__PURE__ */ new Set(),
    components: [],
    directives: [],
    hoists: [],
    imports: [],
    cached: 0,
    temps: 0,
    codegenNode: void 0,
    loc: locStub,
  };
}

以上逻辑主要是重置解析条件reset，生成根节点createRoot，执行解析tokenizer.parse，过滤空白子元素condenseWhitespace。tokenizer到底是啥？

它是Tokenizer构造类的一个实例，解析来看看Tokenizer：

2、`Tokenizer`

  class Tokenizer {
    // constructor
    constructor(stack, cbs) {
      this.stack = stack; // ✨✨✨栈，ast树构建的核心✨✨✨
      this.cbs = cbs; // cbs，里面包含各种方法
      this.state = 1;
      this.buffer = "";
      this.sectionStart = 0;
      this.index = 0;
      this.newlines = [];
      this.mode = 0;
      this.delimiterOpen = defaultDelimitersOpen;
      this.delimiterClose = defaultDelimitersClose;
      this.delimiterIndex = -1;
      this.currentSequence = void 0;
      this.sequenceIndex = 0;
      // 还有其他属性...
    }
    // 方法
    reset(){},
    parse(input){}, // 编译方法
    getPos(index){},
    peek(){},
    stateText(){},
    cleanup(){},
    finish(){},
    handleTagName(c){},
    stateInTagName(c){},
    handleAttrStart(c){},
    stateInAttrName(c){},
    stateInDirName(c){},
    stateInDeclaration(c){},
    // 还有其他方法...
  }

在创建Tokenizer时，传入了stack和各方法组成的对象{}

const tokenizer = new Tokenizer(stack, {
  onerr: emitError,
  ontext(start, end) {},
  ontextentity(char, start, end) {},
  oninterpolation(start, end) {},
  onopentagname(start, end) {},
  onopentagend(end) {},
  onclosetag(start, end) {},
  onselfclosingtag(end) {},
  onattribname(start, end) {},
  ondirname(start, end) {},
  ondirarg(start, end) {},
  ondirmodifier(start, end) {},
  onattribdata(start, end) {},
  onattribentity(char, start, end) {},
  onattribnameend(end) {},
  onattribend(quote, end) {},
  oncomment(start, end) {},
  onend() {},
  oncdata(start, end) {},
  onprocessinginstruction(start) {},
});

以上首先定义了一个Tokenizer，其中包含属性和方法。再通过new Tokenizer的方式定义实例new tokenizer，并传入stack栈和cbs方法集合。

接下来继续介绍核心逻辑：tokenizer.parse(currentInput)。

二、指针扫描过程

我们继续以下面template为例：

<div class="myApp">
  <!-- 这是注释文案 -->
  <h3>编译原理</h3>
  <div v-if="flag">
    <p>{{ first + second }}</p>
  </div>
  <childComp v-else></childComp>
</div>

接下来看parse方法：

/**
   * Iterates through the buffer, calling the function corresponding to the current state.
   *
   * States that are more likely to be hit are higher up, as a performance improvement.
   */
  public parse(input: string) {
    this.buffer = input
    while (this.index < this.buffer.length) {
      const c = this.buffer.charCodeAt(this.index)
      if (c === CharCodes.NewLine) {
        this.newlines.push(this.index)
      }
      // 根据当前扫描位置的状态，执行不同的函数
      switch (this.state) {
        case State.Text: {
          this.stateText(c)
          break
        }
        case State.InterpolationOpen: {
          this.stateInterpolationOpen(c)
          break
        }
        case State.Interpolation: {
          this.stateInterpolation(c)
          break
        }
        case State.InterpolationClose: {
          this.stateInterpolationClose(c)
          break
        }
        case State.SpecialStartSequence: {
          this.stateSpecialStartSequence(c)
          break
        }
        case State.InRCDATA: {
          this.stateInRCDATA(c)
          break
        }
        case State.CDATASequence: {
          this.stateCDATASequence(c)
          break
        }
        case State.InAttrValueDq: {
          this.stateInAttrValueDoubleQuotes(c)
          break
        }
        case State.InAttrName: {
          this.stateInAttrName(c)
          break
        }
        case State.InDirName: {
          this.stateInDirName(c)
          break
        }
        case State.InDirArg: {
          this.stateInDirArg(c)
          break
        }
        case State.InDirDynamicArg: {
          this.stateInDynamicDirArg(c)
          break
        }
        case State.InDirModifier: {
          this.stateInDirModifier(c)
          break
        }
        case State.InCommentLike: {
          this.stateInCommentLike(c)
          break
        }
        case State.InSpecialComment: {
          this.stateInSpecialComment(c)
          break
        }
        case State.BeforeAttrName: {
          this.stateBeforeAttrName(c)
          break
        }
        case State.InTagName: {
          this.stateInTagName(c)
          break
        }
        case State.InSFCRootTagName: {
          this.stateInSFCRootTagName(c)
          break
        }
        case State.InClosingTagName: {
          this.stateInClosingTagName(c)
          break
        }
        case State.BeforeTagName: {
          this.stateBeforeTagName(c)
          break
        }
        case State.AfterAttrName: {
          this.stateAfterAttrName(c)
          break
        }
        case State.InAttrValueSq: {
          this.stateInAttrValueSingleQuotes(c)
          break
        }
        case State.BeforeAttrValue: {
          this.stateBeforeAttrValue(c)
          break
        }
        case State.BeforeClosingTagName: {
          this.stateBeforeClosingTagName(c)
          break
        }
        case State.AfterClosingTagName: {
          this.stateAfterClosingTagName(c)
          break
        }
        case State.BeforeSpecialS: {
          this.stateBeforeSpecialS(c)
          break
        }
        case State.BeforeSpecialT: {
          this.stateBeforeSpecialT(c)
          break
        }
        case State.InAttrValueNq: {
          this.stateInAttrValueNoQuotes(c)
          break
        }
        case State.InSelfClosingTag: {
          this.stateInSelfClosingTag(c)
          break
        }
        case State.InDeclaration: {
          this.stateInDeclaration(c)
          break
        }
        case State.BeforeDeclaration: {
          this.stateBeforeDeclaration(c)
          break
        }
        case State.BeforeComment: {
          this.stateBeforeComment(c)
          break
        }
        case State.InProcessingInstruction: {
          this.stateInProcessingInstruction(c)
          break
        }
        case State.InEntity: {
          this.stateInEntity()
          break
        }
      }
      this.index++
    }
    this.cleanup()
    this.finish()
  }

可以看出这里根据state值产生了众多分支，这些分支处理了各种可能的场景。我们现在结合实例，一步步进行扫描，将字符串变成ast树。

我们根据当前例子，选取以下的状态：

1、`State.Text`

在class Tokenizer内部第一行就定义了public state = State.Text。所以，扫描的第一个字符<(c === 60)时，执行this.stateText(c)的分支逻辑：

我们定义如下指针，指针上带的属性值默认为：

c: 60（<）
state: State.Text
this.index: 0
this.sectionStart: 0

  private stateText(c: number): void {
    if (c === CharCodes.Lt) {
      if (this.index > this.sectionStart) {
        this.cbs.ontext(this.sectionStart, this.index)
      }
      this.state = State.BeforeTagName
      this.sectionStart = this.index
    } else if (!__BROWSER__ && c === CharCodes.Amp) {
      this.startEntity()
    } else if (!this.inVPre && c === this.delimiterOpen[0]) {
      this.state = State.InterpolationOpen
      this.delimiterIndex = 0
      this.stateInterpolationOpen(c)
    }
  }

以上逻辑中，满足c === CharCodes.Lt，因此this.state = State.BeforeTagName，指针指向下一个阶段，标签tag开始前的名称。

2、`State.BeforeTagName`

指针变为：

c: 100(d)
state: State.BeforeTagName
this.index: 1
this.sectionStart: 0

  private stateBeforeTagName(c: number): void {
    if (c === CharCodes.ExclamationMark) {
      this.state = State.BeforeDeclaration
      this.sectionStart = this.index + 1
    } else if (c === CharCodes.Questionmark) {
      this.state = State.InProcessingInstruction
      this.sectionStart = this.index + 1
    } else if (isTagStartChar(c)) {
      this.sectionStart = this.index
      if (this.mode === ParseMode.BASE) {
        this.state = State.InTagName
      } else if (this.inSFCRoot) {
        this.state = State.InSFCRootTagName
      } else if (!this.inXML) {
        if (c === 116 /* t */) {
          this.state = State.BeforeSpecialT
        } else {
          this.state =
            c === 115 /* s */ ? State.BeforeSpecialS : State.InTagName
        }
      } else {
        this.state = State.InTagName
      }
    } else if (c === CharCodes.Slash) {
      this.state = State.BeforeClosingTagName
    } else {
      this.state = State.Text
      this.stateText(c)
    }
  }

以上逻辑中，满足isTagStartChar(c)，因此this.sectionStart = this.index = 1，并且，this.state = State.InTagName，指针指向下一个阶段，标签tag中。

3、`State.InTagName`

指针变为：

c: 105(i)
state: State.InTagName
this.index: 2
this.sectionStart: 1

  private stateInTagName(c: number): void {
    if (isEndOfTagSection(c)) {
      this.handleTagName(c)
    }
  }

以上逻辑中，字符i不满足isEndOfTagSection(c)，所以指针继续扫描，直到扫描到空格位置，此时进入方法this.handleTagName(c)。

指针变为：

c: 32(空格)
state: State.InTagName
this.index: 4
this.sectionStart: 1

  // this.cbs.onopentagname
  onopentagname(start, end) {
    const name = getSlice(start, end)
    currentOpenTag = {
      type: NodeTypes.ELEMENT,
      tag: name,
      ns: currentOptions.getNamespace(name, stack[0], currentOptions.ns),
      tagType: ElementTypes.ELEMENT, // will be refined on tag close
      props: [],
      children: [],
      loc: getLoc(start - 1, end),
      codegenNode: undefined,
    }
  },
  private handleTagName(c: number) {
    this.cbs.onopentagname(this.sectionStart, this.index)
    this.sectionStart = -1
    this.state = State.BeforeAttrName
    this.stateBeforeAttrName(c)
  }

在以上逻辑中，我们得到name就时div，currentOpenTag就是一个丰富的描述当前节点信息的对象。至此，我们的第一个标签div产生。结束之后，执行this.sectionStart = -1和this.state = State.BeforeAttrName，最后执行的this.stateBeforeAttrName(c)是为了判断当前是否是闭合标签<div>，明显当前例子中不是，执行过程未有实际逻辑发生，我们继续移动指针。

4、`State.BeforeAttrName`

指针变为：

c: 99(c)
state: State.BeforeAttrName
this.index: 5
this.sectionStart: -1

  private stateBeforeAttrName(c: number): void {
    if (c === CharCodes.Gt) {
      this.cbs.onopentagend(this.index)
      if (this.inRCDATA) {
        this.state = State.InRCDATA
      } else {
        this.state = State.Text
      }
      this.sectionStart = this.index + 1
    } else if (c === CharCodes.Slash) {
      this.state = State.InSelfClosingTag
      if ((__DEV__ || !__BROWSER__) && this.peek() !== CharCodes.Gt) {
        this.cbs.onerr(ErrorCodes.UNEXPECTED_SOLIDUS_IN_TAG, this.index)
      }
    } else if (c === CharCodes.Lt && this.peek() === CharCodes.Slash) {
      this.cbs.onopentagend(this.index)
      this.state = State.BeforeTagName
      this.sectionStart = this.index
    } else if (!isWhitespace(c)) {
      if ((__DEV__ || !__BROWSER__) && c === CharCodes.Eq) {
        this.cbs.onerr(
          ErrorCodes.UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME,
          this.index,
        )
      }
      this.handleAttrStart(c)
    }
  }

以上例子中字符c不为空，因此继续执行this.handleAttrStart(c)

  private handleAttrStart(c: number) {
    if (c === CharCodes.LowerV && this.peek() === CharCodes.Dash) {
      this.state = State.InDirName
      this.sectionStart = this.index
    } else if (
      c === CharCodes.Dot ||
      c === CharCodes.Colon ||
      c === CharCodes.At ||
      c === CharCodes.Number
    ) {
      this.cbs.ondirname(this.index, this.index + 1)
      this.state = State.InDirArg
      this.sectionStart = this.index + 1
    } else {
      this.state = State.InAttrName
      this.sectionStart = this.index
    }
  }

当前例子中，执行到了this.state = State.InAttrName和this.sectionStart = this.index。指针继续移动：

5、`State.InAttrName`

指针变为：

c: 108(l)
state: State.InAttrName
this.index: 5
this.sectionStart: 5

  // this.cbs.onattribname
  onattribname(start, end) {
    currentProp = {
      type: NodeTypes.ATTRIBUTE,
      name: getSlice(start, end),
      nameLoc: getLoc(start, end),
      value: undefined,
      loc: getLoc(start),
    }
  },
  private stateInAttrName(c: number): void {
    if (c === CharCodes.Eq || isEndOfTagSection(c)) {
      this.cbs.onattribname(this.sectionStart, this.index)
      this.handleAttrNameEnd(c)
    } else if (
      (__DEV__ || !__BROWSER__) &&
      (c === CharCodes.DoubleQuote ||
        c === CharCodes.SingleQuote ||
        c === CharCodes.Lt)
    ) {
      this.cbs.onerr(
        ErrorCodes.UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME,
        this.index,
      )
    }
  }

这里指针持续移动，直到满足c === CharCodes.Eq时，去执行this.cbs.onattribname(this.sectionStart, this.index)，获取了第一个描述属性的currentProp，这里暂时没有value值，需要我们继续移动指针。后面会扫描到例子中的myApp，这个步骤可以自行调试。

6、扫描至`>`时

指针变为：

c: 62(>)
state: State.BeforeAttrName
this.index: 18
this.sectionStart: -1

// addNode
function addNode(node: TemplateChildNode) {
  ;(stack[0] || currentRoot).children.push(node)
}
// endOpenTag
function endOpenTag(end: number) {
  if (tokenizer.inSFCRoot) {
    currentOpenTag!.innerLoc = getLoc(end + 1, end + 1)
  }
  addNode(currentOpenTag!)
  const { tag, ns } = currentOpenTag!
  if (ns === Namespaces.HTML && currentOptions.isPreTag(tag)) {
    inPre++
  }
  if (currentOptions.isVoidTag(tag)) {
    onCloseTag(currentOpenTag!, end)
  } else {
    stack.unshift(currentOpenTag!)
    if (ns === Namespaces.SVG || ns === Namespaces.MATH_ML) {
      tokenizer.inXML = true
    }
  }
  currentOpenTag = null
}