From b835dbb36e6ad6cc9e2ab3963f8b872c496dcd52 Mon Sep 17 00:00:00 2001 From: Andre Wagner Date: Thu, 30 Apr 2026 13:34:49 -0500 Subject: [PATCH] VM basics (#5) Co-authored-by: Andre Wagner Reviewed-on: https://192.168.5.48/andre/tyche/pulls/5 --- CMakeLists.txt | 13 +++-- TODO.md | 9 ++- src/bytecode/bytecode.cc | 2 +- src/bytecode/bytecode.hh | 4 +- src/bytecode/bytecodeprototype.hh | 4 +- src/bytecode/constant.hh | 2 +- src/bytecode/tests.cc | 3 +- src/{bytecode => common}/bytearray.cc | 0 src/{bytecode => common}/bytearray.hh | 0 src/vm/code.cc | 43 ++++++++++++++- src/vm/code.hh | 18 +++++- src/vm/expr.cc | 18 ++++++ src/vm/expr.hh | 12 ++++ src/vm/instruction.cc | 46 +++++++++++----- src/vm/instruction.hh | 7 ++- src/vm/location.hh | 16 ++++++ src/vm/stack.cc | 15 ++++- src/vm/stack.hh | 6 +- src/vm/tests.cc | 26 ++++++++- src/vm/value.cc | 14 ++++- src/vm/value.hh | 19 +++++-- src/vm/vm.cc | 79 +++++++++++++++++++++++++++ src/vm/vm.hh | 35 ++++++++++++ src/vm/vm_exceptions.hh | 22 +++++++- 24 files changed, 366 insertions(+), 47 deletions(-) rename src/{bytecode => common}/bytearray.cc (100%) rename src/{bytecode => common}/bytearray.hh (100%) create mode 100644 src/vm/expr.cc create mode 100644 src/vm/expr.hh create mode 100644 src/vm/location.hh create mode 100644 src/vm/vm.cc create mode 100644 src/vm/vm.hh diff --git a/CMakeLists.txt b/CMakeLists.txt index ca26996..901ac73 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,15 +59,15 @@ FetchContent_MakeAvailable(googletest) # add_library(lib${PROJECT_NAME} SHARED - src/bytecode/bytearray.hh - src/bytecode/bytearray.cc + src/common/overloaded.hh + src/common/bytearray.hh + src/common/bytearray.cc src/bytecode/bytecode.cc src/bytecode/bytecode.hh src/bytecode/bytecodeprototype.hh - src/common/overloaded.hh + src/bytecode/constant.hh src/vm/code.cc src/vm/code.hh - src/bytecode/constant.hh src/vm/instruction.hh src/vm/instruction.cc src/vm/value.cc @@ -75,6 +75,11 @@ add_library(lib${PROJECT_NAME} SHARED src/vm/stack.cc src/vm/stack.hh src/vm/vm_exceptions.hh + src/vm/vm.cc + src/vm/vm.hh + src/vm/expr.cc + src/vm/expr.hh + src/vm/location.hh ) target_compile_options(lib${PROJECT_NAME} PRIVATE ${warnings}) diff --git a/TODO.md b/TODO.md index a00258d..fedadc1 100644 --- a/TODO.md +++ b/TODO.md @@ -25,9 +25,12 @@ After some additional development: - [x] Output bytecode format - [x] Value object - [x] Stack object - - [ ] External interface - - [ ] Code execution (except functions) - - [ ] Functions + - [x] External interface + - [x] Code execution (except functions) + - [x] Functions + - [x] Print stack + +- [ ] Assembler After some additional development: - [ ] Bytecode loader diff --git a/src/bytecode/bytecode.cc b/src/bytecode/bytecode.cc index 2f75371..1dd8d02 100644 --- a/src/bytecode/bytecode.cc +++ b/src/bytecode/bytecode.cc @@ -1,7 +1,7 @@ #include "bytecode.hh" #include "../common/overloaded.hh" -namespace tyche { +namespace tyche::bc { Bytecode::Bytecode(ByteArray ba) : byte_array_(std::move(ba)) diff --git a/src/bytecode/bytecode.hh b/src/bytecode/bytecode.hh index 9542f89..cc6ec04 100644 --- a/src/bytecode/bytecode.hh +++ b/src/bytecode/bytecode.hh @@ -1,10 +1,10 @@ #ifndef TYCHE_BYTECODE_HH #define TYCHE_BYTECODE_HH -#include "bytearray.hh" +#include "../common/bytearray.hh" #include "bytecodeprototype.hh" -namespace tyche { +namespace tyche::bc { class Bytecode { public: diff --git a/src/bytecode/bytecodeprototype.hh b/src/bytecode/bytecodeprototype.hh index 3f32ea9..df92c81 100644 --- a/src/bytecode/bytecodeprototype.hh +++ b/src/bytecode/bytecodeprototype.hh @@ -6,9 +6,9 @@ #include #include #include "constant.hh" -#include "bytearray.hh" +#include "../common/bytearray.hh" -namespace tyche { +namespace tyche::bc { struct BytecodePrototype { struct Function { diff --git a/src/bytecode/constant.hh b/src/bytecode/constant.hh index 5972d46..968c8e4 100644 --- a/src/bytecode/constant.hh +++ b/src/bytecode/constant.hh @@ -4,7 +4,7 @@ #include #include -namespace tyche { +namespace tyche::bc { using ConstantValue = std::variant; diff --git a/src/bytecode/tests.cc b/src/bytecode/tests.cc index fba1aa5..049332f 100644 --- a/src/bytecode/tests.cc +++ b/src/bytecode/tests.cc @@ -3,11 +3,12 @@ #include #include -#include "bytearray.hh" +#include "../common/bytearray.hh" #include "bytecodeprototype.hh" #include "bytecode.hh" using namespace tyche; +using namespace tyche::bc; TEST(ByteArray, ByteArray) { diff --git a/src/bytecode/bytearray.cc b/src/common/bytearray.cc similarity index 100% rename from src/bytecode/bytearray.cc rename to src/common/bytearray.cc diff --git a/src/bytecode/bytearray.hh b/src/common/bytearray.hh similarity index 100% rename from src/bytecode/bytearray.hh rename to src/common/bytearray.hh diff --git a/src/vm/code.cc b/src/vm/code.cc index 83fe958..d780fbc 100644 --- a/src/vm/code.cc +++ b/src/vm/code.cc @@ -2,14 +2,51 @@ #include "../common/overloaded.hh" #include "instruction.hh" -namespace tyche { +namespace tyche::vm { -void Code::import_bytecode(ByteArray incoming) +FunctionId Code::import_bytecode(ByteArray incoming) { - Bytecode bc(std::move(incoming)); + bc::Bytecode bc(std::move(incoming)); // TODO - adjust function calls, constants bytecode_ = std::move(bc); + + return 0; // TODO +} + +Operation Code::operation(Location const& location) const +{ + Instruction inst = (Instruction) bytecode_.get_code_byte(location.function_id, location.pc); + OperandType opet = instruction_operand_type(inst); + + switch (opet) { + case OperandType::NoOperand: + return { + .instruction = inst, + .operator_ = 0, + .next_location = { .function_id = location.function_id, .pc = location.pc + 1 }, + }; + case OperandType::Int8: + return { + .instruction = inst, + .operator_ = bytecode_.get_code_int8(location.function_id, location.pc + 1), + .next_location = { .function_id = location.function_id, .pc = location.pc + 2 }, + }; + case OperandType::Int16: + return { + .instruction = inst, + .operator_ = bytecode_.get_code_int16(location.function_id, location.pc + 1), + .next_location = { .function_id = location.function_id, .pc = location.pc + 3 }, + }; + case OperandType::Int32: + return { + .instruction = inst, + .operator_ = bytecode_.get_code_int32(location.function_id, location.pc + 1), + .next_location = { .function_id = location.function_id, .pc = location.pc + 5 }, + }; + } + + throw std::logic_error("Should not get here"); } std::string Code::disassemble() const diff --git a/src/vm/code.hh b/src/vm/code.hh index 1a86df2..1c4710b 100644 --- a/src/vm/code.hh +++ b/src/vm/code.hh @@ -1,18 +1,30 @@ #ifndef TYCHE_CODE_HH #define TYCHE_CODE_HH +#include "instruction.hh" +#include "location.hh" +#include "value.hh" #include "../bytecode/bytecode.hh" -namespace tyche { +namespace tyche::vm { + +struct Operation +{ + Instruction instruction; + int32_t operator_; + Location next_location; +}; class Code { public: - void import_bytecode(ByteArray incoming); + FunctionId import_bytecode(ByteArray incoming); [[nodiscard]] std::string disassemble() const; + [[nodiscard]] Operation operation(Location const& location) const; + private: - Bytecode bytecode_; + bc::Bytecode bytecode_; }; } // tyche diff --git a/src/vm/expr.cc b/src/vm/expr.cc new file mode 100644 index 0000000..26878a7 --- /dev/null +++ b/src/vm/expr.cc @@ -0,0 +1,18 @@ +#include "expr.hh" + +#include "vm_exceptions.hh" + +namespace tyche::vm { + +Value binary_operation(Value const& a, Value const& b, BinaryOperationType op) +{ + // TODO - this is temporary code + + if (a.type() == Type::Integer && b.type() == Type::Integer && op == BinaryOperationType::Sum) { + return Value::CreateInteger(a.as_integer() + b.as_integer()); + } else { + throw VMInvalidOperation(op, a.type(), b.type()); + } +} + +} \ No newline at end of file diff --git a/src/vm/expr.hh b/src/vm/expr.hh new file mode 100644 index 0000000..539672e --- /dev/null +++ b/src/vm/expr.hh @@ -0,0 +1,12 @@ +#ifndef TYCHE_EXPR_HH +#define TYCHE_EXPR_HH +#include "value.hh" + +namespace tyche::vm { + +enum class BinaryOperationType { Sum }; + +Value binary_operation(Value const& a, Value const& b, BinaryOperationType op); + +} +#endif //TYCHE_EXPR_HH diff --git a/src/vm/instruction.cc b/src/vm/instruction.cc index 50077a5..5508962 100644 --- a/src/vm/instruction.cc +++ b/src/vm/instruction.cc @@ -1,6 +1,6 @@ #include "instruction.hh" -namespace tyche { +namespace tyche::vm { std::pair debug_instruction(Instruction inst, int oper) { @@ -98,30 +98,48 @@ std::pair debug_instruction(Instruction inst, int oper) out = "???"; } - if ((uint8_t) inst < 0xa0) + OperandType operands = instruction_operand_type(inst); + + if (operands == OperandType::NoOperand) return { out, 1 }; out += " " + std::to_string(oper); - if ((uint8_t) inst >= 0xe0) + if (operands == OperandType::Int32) return { out, 5 }; - else if ((uint8_t) inst >= 0xc0) + if (operands == OperandType::Int16) return { out, 3 }; - else - return { out, 2 }; + + return { out, 2 }; } -std::pair debug_instruction(Bytecode const& bt, uint32_t function_id, uint32_t addr) +std::pair debug_instruction(bc::Bytecode const& bt, uint32_t function_id, uint32_t addr) { auto inst = (Instruction) bt.get_code_byte(function_id, addr); - if ((uint8_t) inst >= 0xe0) - return debug_instruction(inst, bt.get_code_int32(function_id, addr + 1)); - else if ((uint8_t) inst >= 0xc0) - return debug_instruction(inst, bt.get_code_int16(function_id, addr + 1)); - else if ((uint8_t) inst >= 0xa0) - return debug_instruction(inst, bt.get_code_int8(function_id, addr + 1)); + switch (instruction_operand_type(inst)) { + case OperandType::NoOperand: + return debug_instruction(inst); + case OperandType::Int8: + return debug_instruction(inst, bt.get_code_int8(function_id, addr + 1)); + case OperandType::Int16: + return debug_instruction(inst, bt.get_code_int16(function_id, addr + 1)); + case OperandType::Int32: + return debug_instruction(inst, bt.get_code_int32(function_id, addr + 1)); + default: + } - return debug_instruction(inst); + return { "???", 1 }; +} + +OperandType instruction_operand_type(Instruction inst) +{ + if ((uint8_t) inst >= 0xe0) + return OperandType::Int32; + if ((uint8_t) inst >= 0xc0) + return OperandType::Int16; + if ((uint8_t) inst >= 0xa0) + return OperandType::Int8; + return OperandType::NoOperand; } } \ No newline at end of file diff --git a/src/vm/instruction.hh b/src/vm/instruction.hh index 0cb9959..1710cdf 100644 --- a/src/vm/instruction.hh +++ b/src/vm/instruction.hh @@ -7,7 +7,7 @@ #include "../bytecode/bytecode.hh" -namespace tyche { +namespace tyche::vm { enum class Instruction : uint8_t { @@ -96,7 +96,10 @@ enum class Instruction : uint8_t { }; std::pair debug_instruction(Instruction inst, int oper=0); -std::pair debug_instruction(Bytecode const& bt, uint32_t function_id, uint32_t addr); +std::pair debug_instruction(bc::Bytecode const& bt, uint32_t function_id, uint32_t addr); + +enum class OperandType { NoOperand, Int8, Int16, Int32 }; +OperandType instruction_operand_type(Instruction instruction); } diff --git a/src/vm/location.hh b/src/vm/location.hh new file mode 100644 index 0000000..caff343 --- /dev/null +++ b/src/vm/location.hh @@ -0,0 +1,16 @@ +#ifndef TYCHE_LOCATION_HH +#define TYCHE_LOCATION_HH + +#include + +namespace tyche::vm { + +struct Location +{ + uint32_t function_id; + uint32_t pc; +}; + +} + +#endif //TYCHE_LOCATION_HH diff --git a/src/vm/stack.cc b/src/vm/stack.cc index 1a298f0..801b9fa 100644 --- a/src/vm/stack.cc +++ b/src/vm/stack.cc @@ -2,7 +2,7 @@ #include "vm_exceptions.hh" -namespace tyche { +namespace tyche::vm { Stack::Stack() { @@ -16,7 +16,7 @@ void Stack::push(Value const& value) Value Stack::pop() { - if (stack_.size() <= fps_.size()) + if (stack_.size() <= fps_.top()) throw VMStackUnderflow(); Value v = stack_.back(); @@ -58,4 +58,15 @@ void Stack::pop_fp() fps_.pop(); } +std::string Stack::debug() const +{ + if (stack_.empty()) + return "empty"; + + std::string out; + for (size_t i = 0; i < stack_.size(); ++i) + out += "[" + stack_.at(i).to_string() + "] "; + return out; +} + } // tyche diff --git a/src/vm/stack.hh b/src/vm/stack.hh index c6b59ac..96d04e9 100644 --- a/src/vm/stack.hh +++ b/src/vm/stack.hh @@ -6,7 +6,7 @@ #include "value.hh" -namespace tyche { +namespace tyche::vm { class Stack { public: @@ -21,6 +21,10 @@ public: void push_fp(); void pop_fp(); + [[nodiscard]] size_t fp_level() const { return fps_.size(); } + + [[nodiscard]] std::string debug() const; + private: std::vector stack_; std::stack fps_; diff --git a/src/vm/tests.cc b/src/vm/tests.cc index 0f92135..0d75fdf 100644 --- a/src/vm/tests.cc +++ b/src/vm/tests.cc @@ -1,12 +1,15 @@ #include "gtest/gtest.h" #include "../bytecode/bytecodeprototype.hh" -#include "../bytecode/bytearray.hh" +#include "../common/bytearray.hh" #include "../bytecode/bytecode.hh" #include "code.hh" #include "stack.hh" +#include "vm.hh" using namespace tyche; +using namespace tyche::bc; +using namespace tyche::vm; TEST(Code, ImportSingleAndDebug) { @@ -68,6 +71,27 @@ TEST(Stack, FramePointer) ASSERT_EQ(stack.at(-2).as_integer(), 10); } +TEST(VM, BasicCode) +{ + // code (2+3) + BytecodePrototype bp; + bp.functions.emplace_back(0, 0); + bp.functions.at(0).code.append_byte((uint8_t) Instruction::PushInt8); + bp.functions.at(0).code.append_int8(2); + bp.functions.at(0).code.append_byte((uint8_t) Instruction::PushInt8); + bp.functions.at(0).code.append_int8(3); + bp.functions.at(0).code.append_byte((uint8_t) Instruction::Sum); + bp.functions.at(0).code.append_byte((uint8_t) Instruction::Return); + ByteArray ba = Bytecode::generate(bp); + + VM vm; + vm.load_bytecode(std::move(ba)); + vm.call(0); + + int32_t result = vm.to_integer(-1); + ASSERT_EQ(result, 5); +} + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); diff --git a/src/vm/value.cc b/src/vm/value.cc index 4d3bc49..13f015e 100644 --- a/src/vm/value.cc +++ b/src/vm/value.cc @@ -2,7 +2,7 @@ #include "../common/overloaded.hh" -namespace tyche { +namespace tyche::vm { Type Value::type() const { @@ -11,6 +11,18 @@ Type Value::type() const [](int32_t) { return Type::Integer; }, [](float) { return Type::Float; }, [](std::string const&) { return Type::String; }, + [](Function const&) { return Type::Function; }, + }, value_); +} + +std::string Value::to_string() const +{ + return std::visit(overloaded { + [](std::monostate) { return std::string("nil"); }, + [](int32_t i) { return std::to_string(i); }, + [](float f) { return std::to_string(f); }, + [](std::string const& s) { return s; }, + [](Function const& f) { return "@" + std::to_string(f.f_id); } }, value_); } diff --git a/src/vm/value.hh b/src/vm/value.hh index 8039233..2592cd6 100644 --- a/src/vm/value.hh +++ b/src/vm/value.hh @@ -4,14 +4,19 @@ #include #include -namespace tyche { +namespace tyche::vm { + +using FunctionId = uint32_t; enum class Type : uint8_t { Nil = 0, Integer, Float, String, Array, Table, Function, NativePointer, }; + class Value { + struct Function { FunctionId f_id; }; + public: Value() : value_(std::monostate()) {} @@ -19,15 +24,19 @@ public: static Value CreateInteger(int32_t v) { return Value(v); } static Value CreateFloat(float f) { return Value(f); } static Value CreateString(std::string const& str) { return Value(str); } + static Value CreateFunctionId(FunctionId f_id) { return Value(Function { f_id }); } [[nodiscard]] Type type() const; - [[nodiscard]] int32_t as_integer() const { return std::get(value_); } - [[nodiscard]] float as_float() const { return std::get(value_); } - [[nodiscard]] std::string as_string() const { return std::get(value_); } + [[nodiscard]] int32_t as_integer() const { return std::get(value_); } + [[nodiscard]] float as_float() const { return std::get(value_); } + [[nodiscard]] std::string as_string() const { return std::get(value_); } + [[nodiscard]] FunctionId as_function_id() const { return std::get(value_).f_id; } + + [[nodiscard]] std::string to_string() const; private: - using Internal = std::variant; + using Internal = std::variant; Internal value_; explicit Value(Internal const& internal) : value_(internal) {} diff --git a/src/vm/vm.cc b/src/vm/vm.cc new file mode 100644 index 0000000..502fad8 --- /dev/null +++ b/src/vm/vm.cc @@ -0,0 +1,79 @@ +#include "vm.hh" + +#include "vm_exceptions.hh" +#include "expr.hh" + +namespace tyche::vm { + +void VM::load_bytecode(ByteArray const& ba) +{ + FunctionId f_id = code_.import_bytecode(ba); + stack_.push(Value::CreateFunctionId(f_id)); +} + +void VM::call(size_t n_params) +{ + // TODO - parameters + + Value f = stack_.pop(); + if (f.type() != Type::Function) + throw VMTypeError(Type::Function, f.type()); + + loc_.emplace(f.as_function_id(), 0); + stack_.push_fp(); + run_until_return(); + // stack_.pop_fp(); + loc_.pop(); +} + +int32_t VM::to_integer(int index) const +{ + Value i = stack_.at(index); + assert_type(i, Type::Integer); + return i.as_integer(); +} + +void VM::push_integer(int32_t value) +{ + stack_.push(Value::CreateInteger(value)); +} + +void VM::run_until_return() +{ + size_t level = stack_.fp_level(); + + while (stack_.fp_level() >= level) + step(); +} + +void VM::step() +{ + Operation op = code_.operation(loc_.top()); + switch (op.instruction) { + case Instruction::PushInt8: + case Instruction::PushInt16: + case Instruction::PushInt32: + push_integer(op.operator_); + break; + case Instruction::Sum: + stack_.push(binary_operation(stack_.pop(), stack_.pop(), BinaryOperationType::Sum)); + break; + case Instruction::Return: { + Value v = stack_.pop(); + stack_.pop_fp(); + stack_.push(v); + return; + } + default: + throw VMInvalidOpcode((uint8_t) op.instruction); + } + loc_.top() = op.next_location; +} + +void VM::assert_type(Value const& val, Type type) +{ + if (val.type() != type) + throw VMTypeError(type, val.type()); +} + +} // tyche diff --git a/src/vm/vm.hh b/src/vm/vm.hh new file mode 100644 index 0000000..d989268 --- /dev/null +++ b/src/vm/vm.hh @@ -0,0 +1,35 @@ +#ifndef TYCHE_VM_HH +#define TYCHE_VM_HH + +#include "code.hh" +#include "location.hh" +#include "stack.hh" + +namespace tyche::vm { + +class VM { +public: + void load_bytecode(ByteArray const& ba); + + void call(size_t n_params); + + [[nodiscard]] int32_t to_integer(int index) const; + + void push_integer(int32_t value); + + [[nodiscard]] std::string debug_stack() const { return stack_.debug(); } + +private: + void run_until_return(); + void step(); + + static void assert_type(Value const& val, Type type); + + Stack stack_; + Code code_; + std::stack loc_; +}; + +} // tyche + +#endif //TYCHE_VM_HH diff --git a/src/vm/vm_exceptions.hh b/src/vm/vm_exceptions.hh index dc4160f..610f43b 100644 --- a/src/vm/vm_exceptions.hh +++ b/src/vm/vm_exceptions.hh @@ -4,7 +4,9 @@ #include #include -namespace tyche { +#include "expr.hh" + +namespace tyche::vm { class VMRuntimeError : public std::runtime_error { @@ -24,6 +26,24 @@ public: explicit VMStackOutOfRange() : VMRuntimeError("Item does not exist in stack") {} }; +class VMTypeError : public VMRuntimeError +{ +public: + explicit VMTypeError(Type expected, Type found) : VMRuntimeError("Type error") {} // TODO - print types +}; + +class VMInvalidOpcode : public VMRuntimeError +{ +public: + explicit VMInvalidOpcode(uint8_t opcode) : VMRuntimeError("Invalid opcode " + std::to_string(opcode)) {} +}; + +class VMInvalidOperation : public VMRuntimeError +{ +public: + explicit VMInvalidOperation(BinaryOperationType op, Type type1, Type type2) : VMRuntimeError("Invalid binary operation") {} // TODO - print types +}; + } #endif //TYCHE_VM_EXCEPTIONS_HH