From 148c98e64208804e8721dab20db890c20b9142e2 Mon Sep 17 00:00:00 2001 From: Andre Wagner Date: Wed, 29 Apr 2026 14:44:33 -0500 Subject: [PATCH] code (#4) Reviewed-on: http://gitea.andrewagner.uk/andre/tyche/pulls/4 --- CMakeLists.txt | 18 ++-- TODO.md | 6 +- doc/OPCODES | 137 ++++++++++++++++-------------- src/bytecode/bytearray.cc | 7 +- src/bytecode/bytecode.cc | 2 +- src/bytecode/bytecode.hh | 1 + src/bytecode/bytecodeprototype.hh | 1 + src/bytecode/tests.cc | 3 +- src/vm/code.cc | 43 ++++++++++ src/vm/code.hh | 20 +++++ src/vm/instruction.cc | 127 +++++++++++++++++++++++++++ src/vm/instruction.hh | 103 ++++++++++++++++++++++ src/vm/tests.cc | 35 ++++++++ 13 files changed, 424 insertions(+), 79 deletions(-) create mode 100644 src/vm/code.cc create mode 100644 src/vm/code.hh create mode 100644 src/vm/instruction.cc create mode 100644 src/vm/instruction.hh create mode 100644 src/vm/tests.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index c21a4e3..81b6b6c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -65,6 +65,11 @@ add_library(lib${PROJECT_NAME} SHARED src/bytecode/bytecode.hh src/bytecode/bytecodeprototype.hh src/common/overloaded.hh + src/vm/code.cc + src/vm/code.hh + src/bytecode/constant.hh + src/vm/instruction.hh + src/vm/instruction.cc ) target_compile_options(lib${PROJECT_NAME} PRIVATE ${warnings}) @@ -73,21 +78,24 @@ target_compile_options(lib${PROJECT_NAME} PRIVATE ${warnings}) # tests # -add_executable(${PROJECT_NAME}-bytecode-test src/bytecode/tests.cc - src/bytecode/constant.hh) +add_executable(${PROJECT_NAME}-bytecode-test src/bytecode/tests.cc) target_link_libraries(${PROJECT_NAME}-bytecode-test lib${PROJECT_NAME} gtest_main) add_test(NAME tyche_bytecode_test COMMAND ${PROJECT_NAME}-bytecode-test) +add_executable(${PROJECT_NAME}-vm-test src/vm/tests.cc) +target_link_libraries(${PROJECT_NAME}-vm-test lib${PROJECT_NAME} gtest_main) +add_test(NAME tyche_vm_test COMMAND ${PROJECT_NAME}-vm-test) + # # check for leaks # -add_custom_target(leaks) -add_custom_command(TARGET leaks +add_custom_target(leaks-vm-test) +add_custom_command(TARGET leaks-vm-test POST_BUILD COMMENT "Check for leaks using valgrind." WORKING_DIRECTORY ${CMAKE_BINARY_DIR} - COMMAND valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --suppressions=${CMAKE_SOURCE_DIR}/valgrind.supp ./${PROJECT_NAME} + COMMAND valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --suppressions=${CMAKE_SOURCE_DIR}/valgrind.supp ./${PROJECT_NAME}-vm-test ) # diff --git a/TODO.md b/TODO.md index 2fb18a7..35fcfb6 100644 --- a/TODO.md +++ b/TODO.md @@ -20,9 +20,9 @@ After some additional development: ## VM - [ ] VM - - [ ] Code - - [ ] Simple bytecode loader - - [ ] Output bytecode format + - [x] Code + - [x] Simple bytecode loader + - [x] Output bytecode format - [ ] Value object - [ ] Stack object - [ ] Function context diff --git a/doc/OPCODES b/doc/OPCODES index d9b57f1..a22f4c2 100644 --- a/doc/OPCODES +++ b/doc/OPCODES @@ -3,77 +3,86 @@ Operations Operations take either 0 or 1 parameter. The ones that take a parameter, it can be either a int8, int16 or int32. -The binary of the opcode is: XXYY.YYYY, where XX defines the parameter type, and YY.YYYY is the instruction. For the XX values: +Instructions follow this logic: -00 - no parameter -01 - int8 -10 - int16 -11 - int32 +00 ~ 9F : no parameter +A0 ~ BF : int8 (1 byte) +C0 ~ DF : int16 (2 bytes) +E0 ~ FF : int32 (4 bytes) -Stack operations: (0x00~0x1f) - pushn [int] Push int - pushc [index] Push constant - pushf [function] Push function id - pushz Push zero (or false) - pusht Push true - newa Push (create) empty array - newt Push (create) empty table - pop - dup +The operations of 1, 2 and 4 bytes are always interchangeable by adding/subtracting 0x20. -Local variables: (0x20~0x2f) - setl [int] Set stack top as indexed local variable - getl [int] Get indexed local variable and place on stack - setg [int] Set global variable - getg [int] Get global variable + ,----------- no parameter + | ,-------- int8 + | | ,----- int16 + | | | ,-- int32 +NP I8 I16 I32 Opc Instruction Description -Function operations: (0x30~0x3f) - call [n_pars] Enter function on stack toplevel (passing n next stack values as parameters) - ret Leave a function (return value in stack) - retn Leave a function (return nil) +Stack operations: + a0 c0 e0 pushi [int] Push int + a1 c1 e1 pushc [index] Push constant + a2 c2 e2 pushf [function] Push function id +00 pushz Push zero (or false) +01 pusht Push true +02 newa Push (create) empty array +03 newt Push (create) empty table +04 pop +05 dup -Control flow: (0x40~0x4f) - bz [pc] Branch if zero - bnz [pc] Branch if not zero - jmp [pc] Unconditional jump +Local variables: + a3 c3 e3 setl [int] Set stack top as indexed local variable + a4 c4 e4 getl [int] Get indexed local variable and place on stack + a5 c5 e5 setg [int] Set global variable + a6 c6 e6 getg [int] Get global variable + +Function operations: + a7 c7 e7 call [n_pars] Enter function on stack toplevel (passing n next stack values as parameters) +10 ret Leave a function (return value in stack) +11 retn Leave a function (return nil) + +Table and array operations: +16 getkv Get table's value based on key (pull 1 value, push 1 value) +17 setkv Set table's key and value (pull 2 values from stack) +18 geta Get array's position value +19 seta Set array's position value (pull 2 values from stack) +1a appnd Add value to the end of array +1b next Push the next pair into the stack (for loops) +1c smt Set value metatable +1d mt Get value metatable + +Logical/arithmetic: +20 sum Sum top 2 values in stack +21 sub Subtract top 2 values in stack +22 mul Multiply top 2 values in stack +23 div Float division +24 idiv Integer division +25 eq Equality +26 neq Inequality +27 lt Less than +28 lte Less than or equals +29 gt Greater than +2a gte Greater than or equals +2b and Bitwise AND +2c or Bitwise OR +2d xor Bitwise XOR + +Other value operations: +30 len Get table, array or string size +31 type Get type from value at the top of the stack + b0 cast [type] Cast type to another type +32 ver Return VM version + +External code: +38 cmpl Compile code to assembly +39 asmbl Assemble code to bytecode format +3a load Load bytecode as function (will place function on stack) + +Control flow: + a8 c8 e8 bz [pc] Branch if zero + a9 c9 e9 bnz [pc] Branch if not zero + aa ca ea jmp [pc] Unconditional jump * Jumps can only happen within the same function. -Logical/arithmetic: (0x50~0x6f) - sum Sum top 2 values in stack - sub Subtract top 2 values in stack - mul Multiply top 2 values in stack - div Float division - idiv Integer division - eq Equality - neq Inequality - lt Less than - lte Less than or equals - gt Greater than - gte Greater than or equals - and Bitwise AND - or Bitwise OR - xor Bitwise XOR - -Table and array operations: (0x70~07xf) - getkv Get table's value based on key (pull 1 value, push 1 value) - setkv Set table's key and value (pull 2 values from stack) - geta Get array's position value - seta Set array's position value (pull 2 values from stack) - appnd Add value to the end of array - next Push the next pair into the stack (for loops) - smt Set value metatable - mt Get value metatable - -Other value operations: (0x80~0x8f) - len Get table, array or string size - type Get type from value at the top of the stack - cast [type] Cast type to another type - ver Return VM version - -External code: (0x90~0x9f) - cmpl Compile code to assembly - asmbl Assemble code to bytecode format - load Load bytecode as function (will place function on stack) Error handling: (0xa0~0xaf) ??? diff --git a/src/bytecode/bytearray.cc b/src/bytecode/bytearray.cc index 317218a..2af83b2 100644 --- a/src/bytecode/bytearray.cc +++ b/src/bytecode/bytearray.cc @@ -7,12 +7,9 @@ namespace tyche { void ByteArray::set_byte(uint32_t addr, uint8_t byte) { - try { - data_.at(addr) = byte; - } catch (std::out_of_range&) { + if (data_.size() < (addr + 1)) data_.resize(addr + 1, 0); - data_.at(addr) = byte; - } + data_.at(addr) = byte; } void ByteArray::set_int8(uint32_t addr, int8_t value) diff --git a/src/bytecode/bytecode.cc b/src/bytecode/bytecode.cc index 58950d1..2f75371 100644 --- a/src/bytecode/bytecode.cc +++ b/src/bytecode/bytecode.cc @@ -25,7 +25,7 @@ Bytecode::Bytecode(ByteArray ba) uint32_t code_start = byte_array_.get_uint32(TOC_START + (3 * TOC_RECORD_SZ)); for (uint32_t i = 0; i < cache_.n_functions; ++i) { cache_.function_addr.emplace_back(code_start + byte_array_.get_uint32(cache_.functions_idx_addr + (i * FUNCTION_RECORD_SZ))); - cache_.function_sz.emplace_back(code_start + byte_array_.get_uint32(cache_.functions_idx_addr + (i * FUNCTION_RECORD_SZ) + 8)); + cache_.function_sz.emplace_back(byte_array_.get_uint32(cache_.functions_idx_addr + (i * FUNCTION_RECORD_SZ) + 8)); } } diff --git a/src/bytecode/bytecode.hh b/src/bytecode/bytecode.hh index a6001c6..9542f89 100644 --- a/src/bytecode/bytecode.hh +++ b/src/bytecode/bytecode.hh @@ -8,6 +8,7 @@ namespace tyche { class Bytecode { public: + Bytecode() = default; explicit Bytecode(ByteArray ba); [[nodiscard]] uint32_t n_constants() const; diff --git a/src/bytecode/bytecodeprototype.hh b/src/bytecode/bytecodeprototype.hh index d7159fb..3f32ea9 100644 --- a/src/bytecode/bytecodeprototype.hh +++ b/src/bytecode/bytecodeprototype.hh @@ -6,6 +6,7 @@ #include #include #include "constant.hh" +#include "bytearray.hh" namespace tyche { diff --git a/src/bytecode/tests.cc b/src/bytecode/tests.cc index 83f764c..fba1aa5 100644 --- a/src/bytecode/tests.cc +++ b/src/bytecode/tests.cc @@ -1,5 +1,4 @@ #include "gtest/gtest.h" -#include "gmock/gmock.h" #include #include @@ -141,6 +140,8 @@ TEST(Bytecode, Parsing) ASSERT_EQ(bc.n_constants(), 2); ASSERT_EQ(bc.n_functions(), 2); + ASSERT_EQ(bc.get_function_sz(0), 2); + ASSERT_EQ(bc.get_function_sz(1), 1); ASSERT_FLOAT_EQ(std::get(bc.get_constant(0)), 3.14f); ASSERT_EQ(std::get(bc.get_constant(1)), "HELLO"); diff --git a/src/vm/code.cc b/src/vm/code.cc new file mode 100644 index 0000000..83fe958 --- /dev/null +++ b/src/vm/code.cc @@ -0,0 +1,43 @@ +#include "code.hh" +#include "../common/overloaded.hh" +#include "instruction.hh" + +namespace tyche { + +void Code::import_bytecode(ByteArray incoming) +{ + Bytecode bc(std::move(incoming)); + // TODO - adjust function calls, constants + + bytecode_ = std::move(bc); +} + +std::string Code::disassemble() const +{ + std::string out; + + out += ".const\n"; + for (size_t i = 0; i < bytecode_.n_constants(); ++i) { + out += "\t" + std::to_string(i) + ": "; + std::visit(overloaded { + [&out](float f) { out += std::to_string(f); }, + [&out](std::string const& str) { out += "\"" + str + "\""; }, + }, bytecode_.get_constant(i)); + out += "\n"; + } + out += "\n"; + + for (size_t i = 0; i < bytecode_.n_functions(); ++i) { + out += ".func " + std::to_string(i) + "\n"; + uint32_t addr = 0; + while (addr < bytecode_.get_function_sz(i)) { + auto [op, sz] = debug_instruction(bytecode_, i, addr); + out += "\t" + op + "\n"; + addr += sz; + } + } + + return out; +} + +} // tyche diff --git a/src/vm/code.hh b/src/vm/code.hh new file mode 100644 index 0000000..1a86df2 --- /dev/null +++ b/src/vm/code.hh @@ -0,0 +1,20 @@ +#ifndef TYCHE_CODE_HH +#define TYCHE_CODE_HH + +#include "../bytecode/bytecode.hh" + +namespace tyche { + +class Code { +public: + void import_bytecode(ByteArray incoming); + + [[nodiscard]] std::string disassemble() const; + +private: + Bytecode bytecode_; +}; + +} // tyche + +#endif //TYCHE_CODE_HH diff --git a/src/vm/instruction.cc b/src/vm/instruction.cc new file mode 100644 index 0000000..50077a5 --- /dev/null +++ b/src/vm/instruction.cc @@ -0,0 +1,127 @@ +#include "instruction.hh" + +namespace tyche { + +std::pair debug_instruction(Instruction inst, int oper) +{ + std::string out; + switch (inst) { + + case Instruction::PushInt8: + case Instruction::PushInt16: + case Instruction::PushInt32: + out = "pushi"; + break; + case Instruction::PushConstant8: + case Instruction::PushConstant16: + case Instruction::PushConstant32: + out = "pushc"; + break; + case Instruction::PushZero: out = "pushz"; break; + case Instruction::PushTrue: out = "pusht"; break; + case Instruction::NewArray: out = "newa"; break; + case Instruction::NewTable: out = "newt"; break; + case Instruction::Pop: out = "pop"; break; + case Instruction::Duplicate: out = "dup"; break; + case Instruction::SetLocal8: + case Instruction::SetLocal16: + case Instruction::SetLocal32: + out = "setl"; + break; + case Instruction::GetLocal8: + case Instruction::GetLocal16: + case Instruction::GetLocal32: + out = "getl"; + break; + case Instruction::SetGlobal8: + case Instruction::SetGlobal16: + case Instruction::SetGlobal32: + out = "setg"; + break; + case Instruction::GetGlobal8: + case Instruction::GetGlobal16: + case Instruction::GetGlobal32: + out = "getg"; + break; + case Instruction::Call8: + case Instruction::Call16: + case Instruction::Call32: + out = "call"; + break; + case Instruction::Return: out = "ret"; break; + case Instruction::ReturnNil: out = "retn"; break; + case Instruction::GetKeyValue: out = "getkv"; break; + case Instruction::SetKeyValue: out = "setkv"; break; + case Instruction::GetArrayItem: out = "geta"; break; + case Instruction::SetArrayItem: out = "seta"; break; + case Instruction::Append: out = "appnd"; break; + case Instruction::Next: out = "next"; break; + case Instruction::SetMetatable: out = "smt"; break; + case Instruction::GetMetatable: out = "mt"; break; + case Instruction::Sum: out = "sum"; break; + case Instruction::Subtract: out = "sub"; break; + case Instruction::Multiply: out = "mul"; break; + case Instruction::Divide: out = "div"; break; + case Instruction::DivideInt: out = "idiv"; break; + case Instruction::Equals: out = "eq"; break; + case Instruction::NotEquals: out = "neq"; break; + case Instruction::LessThan: out = "lt"; break; + case Instruction::LessThanEq: out = "lte"; break; + case Instruction::GreaterThan: out = "gt"; break; + case Instruction::GreaterThanEq: out = "gte"; break; + case Instruction::And: out = "and"; break; + case Instruction::Or: out = "or"; break; + case Instruction::Xor: out = "xor"; break; + case Instruction::Len: out = "len"; break; + case Instruction::Type: out = "type"; break; + case Instruction::Cast: out = "cast"; break; + case Instruction::Version: out = "ver"; break; + case Instruction::BranchIfZero8: + case Instruction::BranchIfZero16: + case Instruction::BranchIfZero32: + out = "bz"; + break; + case Instruction::BranchIfNotZero8: + case Instruction::BranchIfNotZero16: + case Instruction::BranchIfNotZero32: + out = "bnz"; + break; + case Instruction::Jump8: + case Instruction::Jump16: + case Instruction::Jump32: + out = "jmp"; + break; + case Instruction::Compile: out = "cmpl"; break; + case Instruction::Assemble: out = "asmbl"; break; + case Instruction::Load: out = "load"; break; + default: + out = "???"; + } + + if ((uint8_t) inst < 0xa0) + return { out, 1 }; + + out += " " + std::to_string(oper); + if ((uint8_t) inst >= 0xe0) + return { out, 5 }; + else if ((uint8_t) inst >= 0xc0) + return { out, 3 }; + else + return { out, 2 }; +} + +std::pair debug_instruction(Bytecode const& bt, uint32_t function_id, uint32_t addr) +{ + auto inst = (Instruction) bt.get_code_byte(function_id, addr); + + if ((uint8_t) inst >= 0xe0) + return debug_instruction(inst, bt.get_code_int32(function_id, addr + 1)); + else if ((uint8_t) inst >= 0xc0) + return debug_instruction(inst, bt.get_code_int16(function_id, addr + 1)); + else if ((uint8_t) inst >= 0xa0) + return debug_instruction(inst, bt.get_code_int8(function_id, addr + 1)); + + return debug_instruction(inst); +} + +} \ No newline at end of file diff --git a/src/vm/instruction.hh b/src/vm/instruction.hh new file mode 100644 index 0000000..0cb9959 --- /dev/null +++ b/src/vm/instruction.hh @@ -0,0 +1,103 @@ +#ifndef TYCHE_INSTRUCTION_HH +#define TYCHE_INSTRUCTION_HH + +#include +#include +#include + +#include "../bytecode/bytecode.hh" + +namespace tyche { + +enum class Instruction : uint8_t { + + // stack operations + PushInt8 = 0xa0, + PushInt16 = 0xc0, + PushInt32 = 0xe0, + PushConstant8 = 0xa1, + PushConstant16 = 0xc1, + PushConstant32 = 0xe1, + PushZero = 0x00, + PushTrue = 0x01, + NewArray = 0x02, + NewTable = 0x03, + Pop = 0x04, + Duplicate = 0x05, + + // local variables + SetLocal8 = 0xa3, + SetLocal16 = 0xc3, + SetLocal32 = 0xe3, + GetLocal8 = 0xa4, + GetLocal16 = 0xc4, + GetLocal32 = 0xe4, + SetGlobal8 = 0xa5, + SetGlobal16 = 0xc5, + SetGlobal32 = 0xe5, + GetGlobal8 = 0xa6, + GetGlobal16 = 0xc6, + GetGlobal32 = 0xe6, + + // function operations + Call8 = 0xa7, + Call16 = 0xc7, + Call32 = 0xe7, + Return = 0x10, + ReturnNil = 0x11, + + // table and array operations + GetKeyValue = 0x16, + SetKeyValue = 0x17, + GetArrayItem = 0x18, + SetArrayItem = 0x19, + Append = 0x1a, + Next = 0x1b, + SetMetatable = 0x1c, + GetMetatable = 0x1d, + + // logical/arithmetic + Sum = 0x20, + Subtract = 0x21, + Multiply = 0x22, + Divide = 0x23, + DivideInt = 0x24, + Equals = 0x25, + NotEquals = 0x26, + LessThan = 0x27, + LessThanEq = 0x28, + GreaterThan = 0x29, + GreaterThanEq = 0x2a, + And = 0x2b, + Or = 0x2c, + Xor = 0x2d, + + // other value operations + Len = 0x30, + Type = 0x31, + Cast = 0x32, + Version = 0x33, + + // control flow + BranchIfZero8 = 0xa8, + BranchIfZero16 = 0xc8, + BranchIfZero32 = 0xe8, + BranchIfNotZero8 = 0xa9, + BranchIfNotZero16 = 0xc9, + BranchIfNotZero32 = 0xe9, + Jump8 = 0xaa, + Jump16 = 0xca, + Jump32 = 0xea, + + // external code + Compile = 0x38, + Assemble = 0x39, + Load = 0x3a, +}; + +std::pair debug_instruction(Instruction inst, int oper=0); +std::pair debug_instruction(Bytecode const& bt, uint32_t function_id, uint32_t addr); + +} + +#endif //TYCHE_INSTRUCTION_HH diff --git a/src/vm/tests.cc b/src/vm/tests.cc new file mode 100644 index 0000000..40b0bb0 --- /dev/null +++ b/src/vm/tests.cc @@ -0,0 +1,35 @@ +#include "gtest/gtest.h" + +#include "../bytecode/bytecodeprototype.hh" +#include "../bytecode/bytearray.hh" +#include "../bytecode/bytecode.hh" +#include "code.hh" + +using namespace tyche; + +TEST(Code, ImportSingleAndDebug) +{ + BytecodePrototype bp; + + bp.constants.emplace_back(3.14f); + bp.constants.emplace_back("HELLO"); + + bp.functions.emplace_back(0, 0); + bp.functions.at(0).code.append_byte(0xa0); // pushi + bp.functions.at(0).code.append_int8(42); + + bp.functions.emplace_back(2, 1); + bp.functions.at(1).code.append_byte(0x1a); // appnd + + ByteArray ba = Bytecode::generate(bp); + + Code code; + code.import_bytecode(std::move(ba)); + printf("%s\n", code.disassemble().c_str()); +} + +int main(int argc, char** argv) +{ + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +}