C++实现编译器(7): 跳转指令¶
参照《Writing An Interpreter/Compiler In Go》,改用C++实现。
引言¶
本篇对应的源码位于目录: src/06/
src
|06
| |token
| | |token.hpp
| |evaluator
| | |evaluator.hpp
| | |builtins.hpp
| |CMakeLists.txt
| |test
| | |lexer_test.hpp
| | |parser_test.hpp
| | |evaluator_test.hpp
| | |vm_test.hpp
| | |objects_test.hpp
| | |ast_test.hpp
| | |code_test.hpp
| | |compiler_test.hpp
| | |main.cpp
| |lexer
| | |lexer.hpp
| |repl
| | |repl.hpp
| |code
| | |code.hpp
| |objects
| | |objects.hpp
| | |environment.hpp
| |parser
| | |parser.hpp
| | |parser_tracing.hpp
| |vm
| | |vm.hpp
| |ast
| | |ast.hpp
| |main
| | |monkey.cpp
| |compiler
| | |compiler.hpp
之前字节码和编译器已经支持整数加法运算,现在添加对表达式和条件语句的支持,会涉及到编译顺序
的调整,执行条件语句的跳转指令
等。
出栈操作码:OpPop¶
栈顶的值使用后不再继续留在调用栈中,添加出栈操作码:
std::shared_ptr<objects::Error> Compile([[maybe_unused]] std::shared_ptr<ast::Node> node)
{
//...
else if(node->GetNodeType() == ast::NodeType::ExpressionStatement)
{
std::shared_ptr<ast::ExpressionStatement> exprStmt = std::dynamic_pointer_cast<ast::ExpressionStatement>(node);
auto resultObj = Compile(exprStmt->pExpression);
if (evaluator::isError(resultObj))
{
return resultObj;
}
emit(bytecode::OpcodeType::OpPop, {});
}
//...
}
在每个表达式语句后执行自动出栈操作,防止执行结果继续停留在栈顶。
中缀表达式¶
8种中缀表达式中和算术运算相关的是: +
,-
,*
,/
,在加法基础上补充即可。
编译器部分:
else if(node->GetNodeType() == ast::NodeType::InfixExpression)
{
// ...
if (infixObj->Operator == "+")
{
emit(bytecode::OpcodeType::OpAdd, {});
}
else if (infixObj->Operator == "-")
{
emit(bytecode::OpcodeType::OpSub, {});
}
else if (infixObj->Operator == "*")
{
emit(bytecode::OpcodeType::OpMul, {});
}
else if (infixObj->Operator == "/")
{
emit(bytecode::OpcodeType::OpDiv, {});
}
else {
return evaluator::newError("unknow operator: " + infixObj->Operator);
}
}
虚拟机部分:
std::shared_ptr<objects::Object> executeBInaryIntegerOperaction(bytecode::OpcodeType op,
std::shared_ptr<objects::Object> left,
std::shared_ptr<objects::Object> right)
{
auto rightObj = std::dynamic_pointer_cast<objects::Integer>(right);
auto leftObj = std::dynamic_pointer_cast<objects::Integer>(left);
int64_t result = 0;
switch (op)
{
case bytecode::OpcodeType::OpAdd:
result = leftObj->Value + rightObj->Value;
break;
case bytecode::OpcodeType::OpSub:
result = leftObj->Value - rightObj->Value;
break;
case bytecode::OpcodeType::OpMul:
result = leftObj->Value * rightObj->Value;
break;
case bytecode::OpcodeType::OpDiv:
result = leftObj->Value / rightObj->Value;
break;
default:
return evaluator::newError("unknow integer operator: " + bytecode::OpcodeTypeStr(op));
break;
}
return Push(std::make_shared<objects::Integer>(result));
}
布尔类型¶
编译器部分:
else if(node->GetNodeType() == ast::NodeType::Boolean)
{
auto boolAst = std::dynamic_pointer_cast<ast::Boolean>(node);
if(boolAst->Value)
{
emit(bytecode::OpcodeType::OpTrue,{});
} else {
emit(bytecode::OpcodeType::OpFalse,{});
}
}
虚拟机部分:
case bytecode::OpcodeType::OpTrue:
{
auto result = Push(objects::TRUE_OBJ);
if(evaluator::isError(result))
{
return result;
}
}
break;
case bytecode::OpcodeType::OpFalse:
{
auto result = Push(objects::FALSE_OBJ);
if(evaluator::isError(result))
{
return result;
}
}
break;
比较运算符¶
==
!=
>
<
else if(node->GetNodeType() == ast::NodeType::InfixExpression)
{
std::shared_ptr<ast::InfixExpression> infixObj = std::dynamic_pointer_cast<ast::InfixExpression>(node);
if (infixObj->Operator == "<")
{
auto resultObj = Compile(infixObj->pRight);
if (evaluator::isError(resultObj))
{
return resultObj;
}
resultObj = Compile(infixObj->pLeft);
if (evaluator::isError(resultObj))
{
return resultObj;
}
emit(bytecode::OpcodeType::OpGreaterThan, {});
return nullptr;
}
// ...
else if (infixObj->Operator == ">")
{
emit(bytecode::OpcodeType::OpGreaterThan, {});
}
else if (infixObj->Operator == "==")
{
emit(bytecode::OpcodeType::OpEqual, {});
}
else if (infixObj->Operator == "!=")
{
emit(bytecode::OpcodeType::OpNotEqual, {});
}
else {
return evaluator::newError("unknow operator: " + infixObj->Operator);
}
}
4个运算符只需要定义3个操作码,通过调整编译顺序,可以将大于与小于都使用一个操作码完成。
前缀运算¶
!
-
编译器:
else if(node->GetNodeType() == ast::NodeType::PrefixExpression)
{
auto prefixObj = std::dynamic_pointer_cast<ast::PrefixExpression>(node);
auto resultObj = Compile(prefixObj->pRight);
if (evaluator::isError(resultObj))
{
return resultObj;
}
if(prefixObj->Operator == "!")
{
emit(bytecode::OpcodeType::OpBang, {});
}
else if(prefixObj->Operator == "-")
{
emit(bytecode::OpcodeType::OpMinus, {});
}
else{
return evaluator::newError("unknow operator: " + prefixObj->Operator);
}
}
虚拟机:
std::shared_ptr<objects::Object> executeBangOperator()
{
auto operand = Pop();
if(operand == objects::TRUE_OBJ)
{
return Push(objects::FALSE_OBJ);
}
else if(operand == objects::FALSE_OBJ)
{
return Push(objects::TRUE_OBJ);
}
else
{
return Push(objects::FALSE_OBJ);
}
}
std::shared_ptr<objects::Object> executeMinusOperator()
{
auto operand = Pop();
if(operand->Type() != objects::ObjectType::INTEGER)
{
return evaluator::newError("unsupported type for negation: " + operand->TypeStr());
}
auto integerObj = std::dynamic_pointer_cast<objects::Integer>(operand);
return Push(std::make_shared<objects::Integer>(-1 * integerObj->Value));
}
跳转指令与偏移量¶
当AST已经在字节码中铺平后,字节码就是一系列的指令,它们只有先后关系,没有任何节点或递归;当需要根据条件语句来执行不同指令的时候,需要通过偏移量
和跳转指令
来实现;而这里的偏移量可能是绝对偏移,也可能是相对偏移,如果编译的时候单次扫描的话,可以通过回填
的方式,在知道具体的真实偏移量后再填入预设位置。
以if
条件语句为例:
当编译为字节码时,先编码条件语句部分,然后要有字节操作码来决定是进入哪个分支,新增两个操作码:
前一个是有条件的跳转指令,如果它已经是true了则没有必要再去判断后面的else部分字节码了,使用OpJump
这个无条件跳转指令。
每次语句执行后位于栈顶的元素会出栈,那么这里条件语句执行完后在调用栈就没有它,但它必须交给OpJumpNotTruthy
有条件指令来作为条件进行判断,因此需要记住前一个出栈元素是什么:
struct EmittedInstruction{
bytecode::OpcodeType Opcode;
int Position;
EmittedInstruction(){}
EmittedInstruction(bytecode::OpcodeType op, int pos): Opcode(op), Position(pos){}
};
struct Compiler
{
EmittedInstruction lastInstruction;
EmittedInstruction prevInstruction;
//...
};
添加一些辅助函数来记录这两个信息:
int emit(bytecode::OpcodeType op, std::vector<int> operands)
{
auto ins = bytecode::Make(op, operands);
auto pos = addInstruction(ins);
setLastInstruction(op, pos);
return pos;
}
void setLastInstruction(bytecode::OpcodeType op, int pos)
{
prevInstruction = lastInstruction;
lastInstruction = EmittedInstruction(op, pos);
}
bool lastInstructionIsPop()
{
return (lastInstruction.Opcode == bytecode::OpcodeType::OpPop);
}
void removeLastPop()
{
instructions.assign(instructions.begin(), instructions.begin() + lastInstruction.Position);
lastInstruction = prevInstruction;
}
每个刚出栈的元素都可以记录和判断,则可以编译期去除一些出栈操作,起到回退的效果。
至于跳转指令的真实偏移量,在编译到可以确定这个值时回填过去:
void replaceInstruction(int pos, const bytecode::Instructions& newInstruction)
{
for (int i = 0, size = newInstruction.size(); i < size; i++)
{
instructions[pos + i] = newInstruction[i];
}
}
void changeOperand(int opPos, int operand)
{
bytecode::OpcodeType op = static_cast<bytecode::OpcodeType>(instructions[opPos]);
auto newInstruction = bytecode::Make(op, {operand});
replaceInstruction(opPos, newInstruction);
}
当知道具体的偏移量后,构建新的字节码,然后直接替换预设偏移量的位置,反正字节长度是一样的,等同直接覆盖也不需要担心引起乱码。
if-else表达式¶
else if(node->GetNodeType() == ast::NodeType::IfExpression)
{
std::shared_ptr<ast::IfExpression> ifObj = std::dynamic_pointer_cast<ast::IfExpression>(node);
auto resultObj = Compile(ifObj->pCondition);
if (evaluator::isError(resultObj))
{
return resultObj;
}
// 预设一个偏移量方便后续回填
auto jumpNotTruthyPos = emit(bytecode::OpcodeType::OpJumpNotTruthy, {9999});
resultObj = Compile(ifObj->pConsequence); // 会引入一个OpPop
if (evaluator::isError(resultObj))
{
return resultObj;
}
// 清除这个多余的OpPop
if(lastInstructionIsPop())
{
removeLastPop();
}
// 如果没有else部分,则可以回填真实偏移量,跳转到此次即条件语句结束
if(ifObj->pAlternative == nullptr)
{
auto afterConsequencePos = instructions.size();
changeOperand(jumpNotTruthyPos, afterConsequencePos);
}
else
{
// 无条件跳转指令
// 前面条件为真时需要跳过else部分所有字节码
auto jumpPos = emit(bytecode::OpcodeType::OpJump, {9999});
auto afterConsequencePos = instructions.size();
changeOperand(jumpNotTruthyPos, afterConsequencePos);
resultObj = Compile(ifObj->pAlternative);
if (evaluator::isError(resultObj))
{
return resultObj;
}
if (lastInstructionIsPop())
{
removeLastPop();
}
afterConsequencePos = instructions.size();
changeOperand(jumpPos, afterConsequencePos);
}
}
这里的OpJump
无条件跳转指令是必须的,当条件为真时执行完如果不跳转,则会继续解码else部分。
执行跳转指令¶
case bytecode::OpcodeType::OpJump:
{
uint16_t pos;
bytecode::ReadUint16(instructions, ip+1, pos);
ip = pos - 1;
}
break;
case bytecode::OpcodeType::OpJumpNotTruthy:
{
uint16_t pos;
bytecode::ReadUint16(instructions, ip+1, pos);
ip += 2;
auto condition = Pop();
if(!evaluator::isTruthy(condition))
{
ip = pos - 1;
}
}
break;
直接读取跳转指令偏移量,根据条件设置下一个读取字节码位置即可。
OpNull操作码¶
在if-else语句中,如果条件为false也没有else部分,则字节码如何安排呢?为了处理这个问题,总是认为存在一个else部分,如果不存在就用一个Null代替,以保证语法的完整性。
else if(node->GetNodeType() == ast::NodeType::IfExpression)
{
std::shared_ptr<ast::IfExpression> ifObj = std::dynamic_pointer_cast<ast::IfExpression>(node);
auto resultObj = Compile(ifObj->pCondition);
if (evaluator::isError(resultObj))
{
return resultObj;
}
// 预设一个偏移量方便后续回填
auto jumpNotTruthyPos = emit(bytecode::OpcodeType::OpJumpNotTruthy, {9999});
resultObj = Compile(ifObj->pConsequence);
if (evaluator::isError(resultObj))
{
return resultObj;
}
if(lastInstructionIsPop())
{
removeLastPop();
}
auto jumpPos = emit(bytecode::OpcodeType::OpJump, {9999});
auto afterConsequencePos = instructions.size();
changeOperand(jumpNotTruthyPos, afterConsequencePos);
if(ifObj->pAlternative == nullptr)
{
emit(bytecode::OpcodeType::OpNull, {});
}
else
{
resultObj = Compile(ifObj->pAlternative);
if (evaluator::isError(resultObj))
{
return resultObj;
}
if (lastInstructionIsPop())
{
removeLastPop();
}
}
afterConsequencePos = instructions.size();
changeOperand(jumpPos, afterConsequencePos);
}
虚拟机:
case bytecode::OpcodeType::OpNull:
auto result = Push(objects::NULL_OBJ);
if(evaluator::isError(result))
{
return result;
}
break;
测试用例¶
全部通过测试用例:
$ ./test_monkey
[==========] Running 49 tests from 49 test suites.
[----------] Global test environment set-up.
[----------] 1 test from TestNextToken
[ RUN ] TestNextToken.BasicAssertions
[ OK ] TestNextToken.BasicAssertions (0 ms)
[----------] 1 test from TestNextToken (0 ms total)
......
[----------] 1 test from testVMBooleanExpression
[ RUN ] testVMBooleanExpression.basicTest
[ OK ] testVMBooleanExpression.basicTest (0 ms)
[----------] 1 test from testVMBooleanExpression (0 ms total)
[----------] 1 test from testVMConditionals
[ RUN ] testVMConditionals.basicTest
[ OK ] testVMConditionals.basicTest (0 ms)
[----------] 1 test from testVMConditionals (0 ms total)
[----------] Global test environment tear-down
[==========] 49 tests from 49 test suites ran. (16 ms total)
[ PASSED ] 49 tests.
总结¶
通过跳转指令实现跳转,对于具体跳转的偏移量通过预设位置和回填真实值的方式进行,执行跳转指令时只是改变下一个解码位置。
- 微信搜索: 「 MinYiLife 」, 关注公众号!
- 本文链接: https://www.lesliezhu.com/blog/2022/12/12/writing_an_interpreter_in_cpp_7/
- 版权声明: 原创文章,如需转载请注明文章作者和出处。谢谢!