b612咔叽鸿蒙版
217.26M · 2025-10-31
vue中的编译过程将字符串转换成了ast树,这是一件神奇的事。
接上篇的第一步const ast = isString(source) ? baseParse(source, resolvedOptions) :source,我们将以图文结合的方式探索vue底层是如何用解析器将字符串转换成ast树的。
baseParse// baseParse函数
function baseParse(input, options) {
// 重置ast生成条件
reset();
// 当前template模版
currentInput = input;
// 当前options
currentOptions = extend({}, defaultParserOptions);
// 如果存在参数传入的options[key],覆盖其currentOptions[key]
if (options) {
let key;
for (key in options) {
if (options[key] != null) {
currentOptions[key] = options[key];
}
}
}
// 创建根节点
const root = (currentRoot = createRoot([], input));
// ✨✨✨词法分析器编译模版✨✨✨
tokenizer.parse(currentInput);
// 根节点的字符串
root.loc = getLoc(0, input.length);
// 压缩过滤子节点中的空白部分
root.children = condenseWhitespace(root.children);
// 当前根节点设为null
currentRoot = null;
return root;
}
// 重置ast生成条件
function reset() {
tokenizer.reset(); // 词法分析器重置
currentOpenTag = null; // 当前开始标签
currentProp = null; // 当前属性
currentAttrValue = ""; // 当前属性值
currentAttrStartIndex = -1; // 当前属性开始索引
currentAttrEndIndex = -1; // 当前属性结束索引
stack.length = 0; // 清空标签栈
}
// 生成根节点
function createRoot(children, source = "") {
return {
type: 0,
source,
children,
helpers: /* @__PURE__ */ new Set(),
components: [],
directives: [],
hoists: [],
imports: [],
cached: 0,
temps: 0,
codegenNode: void 0,
loc: locStub,
};
}
以上逻辑主要是重置解析条件reset,生成根节点createRoot,执行解析tokenizer.parse,过滤空白子元素condenseWhitespace。tokenizer到底是啥?
它是Tokenizer构造类的一个实例,解析来看看Tokenizer:
Tokenizer class Tokenizer {
// constructor
constructor(stack, cbs) {
this.stack = stack; // ✨✨✨栈,ast树构建的核心✨✨✨
this.cbs = cbs; // cbs,里面包含各种方法
this.state = 1;
this.buffer = "";
this.sectionStart = 0;
this.index = 0;
this.newlines = [];
this.mode = 0;
this.delimiterOpen = defaultDelimitersOpen;
this.delimiterClose = defaultDelimitersClose;
this.delimiterIndex = -1;
this.currentSequence = void 0;
this.sequenceIndex = 0;
// 还有其他属性...
}
// 方法
reset(){},
parse(input){}, // 编译方法
getPos(index){},
peek(){},
stateText(){},
cleanup(){},
finish(){},
handleTagName(c){},
stateInTagName(c){},
handleAttrStart(c){},
stateInAttrName(c){},
stateInDirName(c){},
stateInDeclaration(c){},
// 还有其他方法...
}
在创建Tokenizer时,传入了stack和各方法组成的对象{}
const tokenizer = new Tokenizer(stack, {
onerr: emitError,
ontext(start, end) {},
ontextentity(char, start, end) {},
oninterpolation(start, end) {},
onopentagname(start, end) {},
onopentagend(end) {},
onclosetag(start, end) {},
onselfclosingtag(end) {},
onattribname(start, end) {},
ondirname(start, end) {},
ondirarg(start, end) {},
ondirmodifier(start, end) {},
onattribdata(start, end) {},
onattribentity(char, start, end) {},
onattribnameend(end) {},
onattribend(quote, end) {},
oncomment(start, end) {},
onend() {},
oncdata(start, end) {},
onprocessinginstruction(start) {},
});
以上首先定义了一个Tokenizer,其中包含属性和方法。再通过new Tokenizer的方式定义实例new tokenizer,并传入stack栈和cbs方法集合。
接下来继续介绍核心逻辑:tokenizer.parse(currentInput)。
我们继续以下面template为例:
<div class="myApp">
<!-- 这是注释文案 -->
<h3>编译原理</h3>
<div v-if="flag">
<p>{{ first + second }}</p>
</div>
<childComp v-else></childComp>
</div>
接下来看parse方法:
/**
* Iterates through the buffer, calling the function corresponding to the current state.
*
* States that are more likely to be hit are higher up, as a performance improvement.
*/
public parse(input: string) {
this.buffer = input
while (this.index < this.buffer.length) {
const c = this.buffer.charCodeAt(this.index)
if (c === CharCodes.NewLine) {
this.newlines.push(this.index)
}
// 根据当前扫描位置的状态,执行不同的函数
switch (this.state) {
case State.Text: {
this.stateText(c)
break
}
case State.InterpolationOpen: {
this.stateInterpolationOpen(c)
break
}
case State.Interpolation: {
this.stateInterpolation(c)
break
}
case State.InterpolationClose: {
this.stateInterpolationClose(c)
break
}
case State.SpecialStartSequence: {
this.stateSpecialStartSequence(c)
break
}
case State.InRCDATA: {
this.stateInRCDATA(c)
break
}
case State.CDATASequence: {
this.stateCDATASequence(c)
break
}
case State.InAttrValueDq: {
this.stateInAttrValueDoubleQuotes(c)
break
}
case State.InAttrName: {
this.stateInAttrName(c)
break
}
case State.InDirName: {
this.stateInDirName(c)
break
}
case State.InDirArg: {
this.stateInDirArg(c)
break
}
case State.InDirDynamicArg: {
this.stateInDynamicDirArg(c)
break
}
case State.InDirModifier: {
this.stateInDirModifier(c)
break
}
case State.InCommentLike: {
this.stateInCommentLike(c)
break
}
case State.InSpecialComment: {
this.stateInSpecialComment(c)
break
}
case State.BeforeAttrName: {
this.stateBeforeAttrName(c)
break
}
case State.InTagName: {
this.stateInTagName(c)
break
}
case State.InSFCRootTagName: {
this.stateInSFCRootTagName(c)
break
}
case State.InClosingTagName: {
this.stateInClosingTagName(c)
break
}
case State.BeforeTagName: {
this.stateBeforeTagName(c)
break
}
case State.AfterAttrName: {
this.stateAfterAttrName(c)
break
}
case State.InAttrValueSq: {
this.stateInAttrValueSingleQuotes(c)
break
}
case State.BeforeAttrValue: {
this.stateBeforeAttrValue(c)
break
}
case State.BeforeClosingTagName: {
this.stateBeforeClosingTagName(c)
break
}
case State.AfterClosingTagName: {
this.stateAfterClosingTagName(c)
break
}
case State.BeforeSpecialS: {
this.stateBeforeSpecialS(c)
break
}
case State.BeforeSpecialT: {
this.stateBeforeSpecialT(c)
break
}
case State.InAttrValueNq: {
this.stateInAttrValueNoQuotes(c)
break
}
case State.InSelfClosingTag: {
this.stateInSelfClosingTag(c)
break
}
case State.InDeclaration: {
this.stateInDeclaration(c)
break
}
case State.BeforeDeclaration: {
this.stateBeforeDeclaration(c)
break
}
case State.BeforeComment: {
this.stateBeforeComment(c)
break
}
case State.InProcessingInstruction: {
this.stateInProcessingInstruction(c)
break
}
case State.InEntity: {
this.stateInEntity()
break
}
}
this.index++
}
this.cleanup()
this.finish()
}
可以看出这里根据state值产生了众多分支,这些分支处理了各种可能的场景。我们现在结合实例,一步步进行扫描,将字符串变成ast树。
我们根据当前例子,选取以下的状态:
State.Text在class Tokenizer内部第一行就定义了public state = State.Text。所以,扫描的第一个字符<(c === 60)时,执行this.stateText(c)的分支逻辑:
我们定义如下指针,指针上带的属性值默认为:
private stateText(c: number): void {
if (c === CharCodes.Lt) {
if (this.index > this.sectionStart) {
this.cbs.ontext(this.sectionStart, this.index)
}
this.state = State.BeforeTagName
this.sectionStart = this.index
} else if (!__BROWSER__ && c === CharCodes.Amp) {
this.startEntity()
} else if (!this.inVPre && c === this.delimiterOpen[0]) {
this.state = State.InterpolationOpen
this.delimiterIndex = 0
this.stateInterpolationOpen(c)
}
}
以上逻辑中,满足c === CharCodes.Lt,因此this.state = State.BeforeTagName,指针指向下一个阶段,标签tag开始前的名称。
State.BeforeTagName指针变为:
private stateBeforeTagName(c: number): void {
if (c === CharCodes.ExclamationMark) {
this.state = State.BeforeDeclaration
this.sectionStart = this.index + 1
} else if (c === CharCodes.Questionmark) {
this.state = State.InProcessingInstruction
this.sectionStart = this.index + 1
} else if (isTagStartChar(c)) {
this.sectionStart = this.index
if (this.mode === ParseMode.BASE) {
this.state = State.InTagName
} else if (this.inSFCRoot) {
this.state = State.InSFCRootTagName
} else if (!this.inXML) {
if (c === 116 /* t */) {
this.state = State.BeforeSpecialT
} else {
this.state =
c === 115 /* s */ ? State.BeforeSpecialS : State.InTagName
}
} else {
this.state = State.InTagName
}
} else if (c === CharCodes.Slash) {
this.state = State.BeforeClosingTagName
} else {
this.state = State.Text
this.stateText(c)
}
}
以上逻辑中,满足isTagStartChar(c),因此this.sectionStart = this.index = 1,并且,this.state = State.InTagName,指针指向下一个阶段,标签tag中。
State.InTagName指针变为:
private stateInTagName(c: number): void {
if (isEndOfTagSection(c)) {
this.handleTagName(c)
}
}
以上逻辑中,字符i不满足isEndOfTagSection(c),所以指针继续扫描,直到扫描到空格位置,此时进入方法this.handleTagName(c)。
指针变为:
// this.cbs.onopentagname
onopentagname(start, end) {
const name = getSlice(start, end)
currentOpenTag = {
type: NodeTypes.ELEMENT,
tag: name,
ns: currentOptions.getNamespace(name, stack[0], currentOptions.ns),
tagType: ElementTypes.ELEMENT, // will be refined on tag close
props: [],
children: [],
loc: getLoc(start - 1, end),
codegenNode: undefined,
}
},
private handleTagName(c: number) {
this.cbs.onopentagname(this.sectionStart, this.index)
this.sectionStart = -1
this.state = State.BeforeAttrName
this.stateBeforeAttrName(c)
}
在以上逻辑中,我们得到name就时div,currentOpenTag就是一个丰富的描述当前节点信息的对象。至此,我们的第一个标签div产生。结束之后,执行this.sectionStart = -1和this.state = State.BeforeAttrName,最后执行的this.stateBeforeAttrName(c)是为了判断当前是否是闭合标签<div>,明显当前例子中不是,执行过程未有实际逻辑发生,我们继续移动指针。
State.BeforeAttrName指针变为:
private stateBeforeAttrName(c: number): void {
if (c === CharCodes.Gt) {
this.cbs.onopentagend(this.index)
if (this.inRCDATA) {
this.state = State.InRCDATA
} else {
this.state = State.Text
}
this.sectionStart = this.index + 1
} else if (c === CharCodes.Slash) {
this.state = State.InSelfClosingTag
if ((__DEV__ || !__BROWSER__) && this.peek() !== CharCodes.Gt) {
this.cbs.onerr(ErrorCodes.UNEXPECTED_SOLIDUS_IN_TAG, this.index)
}
} else if (c === CharCodes.Lt && this.peek() === CharCodes.Slash) {
this.cbs.onopentagend(this.index)
this.state = State.BeforeTagName
this.sectionStart = this.index
} else if (!isWhitespace(c)) {
if ((__DEV__ || !__BROWSER__) && c === CharCodes.Eq) {
this.cbs.onerr(
ErrorCodes.UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME,
this.index,
)
}
this.handleAttrStart(c)
}
}
以上例子中字符c不为空,因此继续执行this.handleAttrStart(c)
private handleAttrStart(c: number) {
if (c === CharCodes.LowerV && this.peek() === CharCodes.Dash) {
this.state = State.InDirName
this.sectionStart = this.index
} else if (
c === CharCodes.Dot ||
c === CharCodes.Colon ||
c === CharCodes.At ||
c === CharCodes.Number
) {
this.cbs.ondirname(this.index, this.index + 1)
this.state = State.InDirArg
this.sectionStart = this.index + 1
} else {
this.state = State.InAttrName
this.sectionStart = this.index
}
}
当前例子中,执行到了this.state = State.InAttrName和this.sectionStart = this.index。指针继续移动:
State.InAttrName指针变为:
// this.cbs.onattribname
onattribname(start, end) {
currentProp = {
type: NodeTypes.ATTRIBUTE,
name: getSlice(start, end),
nameLoc: getLoc(start, end),
value: undefined,
loc: getLoc(start),
}
},
private stateInAttrName(c: number): void {
if (c === CharCodes.Eq || isEndOfTagSection(c)) {
this.cbs.onattribname(this.sectionStart, this.index)
this.handleAttrNameEnd(c)
} else if (
(__DEV__ || !__BROWSER__) &&
(c === CharCodes.DoubleQuote ||
c === CharCodes.SingleQuote ||
c === CharCodes.Lt)
) {
this.cbs.onerr(
ErrorCodes.UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME,
this.index,
)
}
}
这里指针持续移动,直到满足c === CharCodes.Eq时,去执行this.cbs.onattribname(this.sectionStart, this.index),获取了第一个描述属性的currentProp,这里暂时没有value值,需要我们继续移动指针。后面会扫描到例子中的myApp,这个步骤可以自行调试。
>时指针变为:
// addNode
function addNode(node: TemplateChildNode) {
;(stack[0] || currentRoot).children.push(node)
}
// endOpenTag
function endOpenTag(end: number) {
if (tokenizer.inSFCRoot) {
currentOpenTag!.innerLoc = getLoc(end + 1, end + 1)
}
addNode(currentOpenTag!)
const { tag, ns } = currentOpenTag!
if (ns === Namespaces.HTML && currentOptions.isPreTag(tag)) {
inPre++
}
if (currentOptions.isVoidTag(tag)) {
onCloseTag(currentOpenTag!, end)
} else {
stack.unshift(currentOpenTag!)
if (ns === Namespaces.SVG || ns === Namespaces.MATH_ML) {
tokenizer.inXML = true
}
}
currentOpenTag = null
}
首先注意addNode,这里会将当前获取到的节点div对象currentOpenTag推入到栈顶元素stack[0]的children中去,如果栈为空,则推入到根节点currentRoot的children中去,实现了父子关系的建立。
其次关注stack.unshift(currentOpenTag),这里会将当前currentOpenTag推入到栈中,通过栈的方式维护树形结构。
接下来按照指针扫描一行为单位,介绍树的构建和栈的维护。
以上介绍了解析器针对字符串转换成ast树的过程:
栈,用来构建树。root节点作为树的根节点。<div ***>起始节点会构建节点,并将其推入到栈顶,作为一个中间节点。</div>的闭合标签则会进行出栈操作,结束当前中间节点树的构建操作。children中去。通过以上操作,就将字符串转换成了ast树。