From 7bcf3b181bae9f27de64225d15b49098de508d80 Mon Sep 17 00:00:00 2001 From: Andre Wagner Date: Mon, 27 Apr 2026 10:23:43 -0500 Subject: [PATCH 01/20] . --- CMakeLists.txt | 6 +++-- TODO.md | 5 ++-- src/bytecode/bytecode.cc | 6 +++++ src/bytecode/bytecode.hh | 49 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 62 insertions(+), 4 deletions(-) create mode 100644 src/bytecode/bytecode.cc create mode 100644 src/bytecode/bytecode.hh diff --git a/CMakeLists.txt b/CMakeLists.txt index 2fac843..935c03d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,8 +59,10 @@ FetchContent_MakeAvailable(googletest) # add_library(lib${PROJECT_NAME} STATIC - src/bytecode/bytearray.hh - src/bytecode/bytearray.cc + src/bytecode/bytearray.hh + src/bytecode/bytearray.cc + src/bytecode/bytecode.cc + src/bytecode/bytecode.hh ) target_compile_options(lib${PROJECT_NAME} PRIVATE ${warnings}) diff --git a/TODO.md b/TODO.md index 70c6169..88c7594 100644 --- a/TODO.md +++ b/TODO.md @@ -4,9 +4,10 @@ - Auto-expand - Add/retrive byte/int/float/string - Should not be larger than the byte array itself -- [ ] Chunk +- [ ] Bytecode - Add/retrive all types of data - Keeps no memory except for caching -- [ ] Chunk loader +- [ ] Bytecode debugging info +- [ ] Bytecode loader - Combine multiple chunks - Resolve function ids, constant ids, etc \ No newline at end of file diff --git a/src/bytecode/bytecode.cc b/src/bytecode/bytecode.cc new file mode 100644 index 0000000..d9171fc --- /dev/null +++ b/src/bytecode/bytecode.cc @@ -0,0 +1,6 @@ +#include "bytecode.hh" + +namespace tyche { + + +} \ No newline at end of file diff --git a/src/bytecode/bytecode.hh b/src/bytecode/bytecode.hh new file mode 100644 index 0000000..1a0fa14 --- /dev/null +++ b/src/bytecode/bytecode.hh @@ -0,0 +1,49 @@ +#ifndef TYCHE_BYTECODE_HH +#define TYCHE_BYTECODE_HH + +#include "bytearray.hh" + +namespace tyche { + +class Bytecode { +public: + // reading + + Bytecode(std::vector data); + + [[nodiscard]] uint32_t n_constants() const; + [[nodiscard]] uint32_t n_functions() const; + + [[nodiscard]] int32_t get_constant_int(uint32_t addr) const; + [[nodiscard]] float get_constant_float(uint32_t addr) const; + [[nodiscard]] std::string get_constant_string(uint32_t addr) const; + + struct FunctionDef { uint16_t n_params, locals; }; + [[nodiscard]] FunctionDef get_function_def(uint32_t function_id) const; + + [[nodiscard]] uint8_t get_code_byte(uint32_t function_id, uint32_t idx) const; + [[nodiscard]] int32_t get_code_int(uint32_t function_id, uint32_t idx) const; + [[nodiscard]] float get_code_float(uint32_t function_id, uint32_t idx) const; + + // writing + + Bytecode(); + + uint32_t add_constant(int32_t value); + uint32_t add_constant(float value); + uint32_t add_constant(std::string const& str); + + uint32_t add_function(uint16_t n_params, uint16_t locals); + + uint32_t add_code(uint8_t operation); + uint32_t add_code(uint8_t operation, int32_t operand_); + + // TODO - debugging info + +private: + ByteArray byte_array_; +}; + +} + +#endif //TYCHE_BYTECODE_HH -- 2.49.1 From 84fca2a615f87cd4ff6a06e8b7d2802dd5b56790 Mon Sep 17 00:00:00 2001 From: Andre Wagner Date: Mon, 27 Apr 2026 13:28:03 -0500 Subject: [PATCH 02/20] . --- CMakeLists.txt | 1 + doc/OPCODES | 4 ++-- src/bytecode/bytearray.hh | 5 +++++ src/bytecode/bytecode.hh | 17 ++--------------- src/bytecode/bytecodeprototype.hh | 29 +++++++++++++++++++++++++++++ 5 files changed, 39 insertions(+), 17 deletions(-) create mode 100644 src/bytecode/bytecodeprototype.hh diff --git a/CMakeLists.txt b/CMakeLists.txt index 935c03d..ac29828 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -63,6 +63,7 @@ add_library(lib${PROJECT_NAME} STATIC src/bytecode/bytearray.cc src/bytecode/bytecode.cc src/bytecode/bytecode.hh + src/bytecode/bytecodeprototype.hh ) target_compile_options(lib${PROJECT_NAME} PRIVATE ${warnings}) diff --git a/doc/OPCODES b/doc/OPCODES index 6476112..a84051c 100644 --- a/doc/OPCODES +++ b/doc/OPCODES @@ -80,8 +80,8 @@ Bytecode format The bytecode file is composed of the following sections: * [0x0] 16-byte header - [00]: VM format - [??]: reserved + [0:3]: Magic + [4]: VM format * [0x1] Index: pointers to each one of the sections, up to 8 Each pointer: 4 bits * [0x2] Constants: all constants (such as strings) used in the code diff --git a/src/bytecode/bytearray.hh b/src/bytecode/bytearray.hh index a7d5b85..4b25e2f 100644 --- a/src/bytecode/bytearray.hh +++ b/src/bytecode/bytearray.hh @@ -15,6 +15,11 @@ public: void add_float(uint32_t addr, float value); void add_string(uint32_t addr, std::string const& str); + void append_byte(uint32_t addr, uint8_t byte) { add_byte(data_.size(), byte); } + void append_int(uint32_t addr, int32_t value) { add_int(data_.size(), value); } + void append_float(uint32_t addr, float value) { add_float(data_.size(), value); } + void append_string(uint32_t addr, std::string const& str) { add_string(data_.size(), str); } + [[nodiscard]] uint8_t get_byte(uint32_t addr) const; [[nodiscard]] std::pair get_int(uint32_t addr) const; [[nodiscard]] std::pair get_float(uint32_t addr) const; diff --git a/src/bytecode/bytecode.hh b/src/bytecode/bytecode.hh index 1a0fa14..4b2e69a 100644 --- a/src/bytecode/bytecode.hh +++ b/src/bytecode/bytecode.hh @@ -2,14 +2,14 @@ #define TYCHE_BYTECODE_HH #include "bytearray.hh" +#include "bytecodeprototype.hh" namespace tyche { class Bytecode { public: - // reading - Bytecode(std::vector data); + Bytecode(BytecodePrototype const& bp); [[nodiscard]] uint32_t n_constants() const; [[nodiscard]] uint32_t n_functions() const; @@ -25,19 +25,6 @@ public: [[nodiscard]] int32_t get_code_int(uint32_t function_id, uint32_t idx) const; [[nodiscard]] float get_code_float(uint32_t function_id, uint32_t idx) const; - // writing - - Bytecode(); - - uint32_t add_constant(int32_t value); - uint32_t add_constant(float value); - uint32_t add_constant(std::string const& str); - - uint32_t add_function(uint16_t n_params, uint16_t locals); - - uint32_t add_code(uint8_t operation); - uint32_t add_code(uint8_t operation, int32_t operand_); - // TODO - debugging info private: diff --git a/src/bytecode/bytecodeprototype.hh b/src/bytecode/bytecodeprototype.hh new file mode 100644 index 0000000..e292589 --- /dev/null +++ b/src/bytecode/bytecodeprototype.hh @@ -0,0 +1,29 @@ +#ifndef TYCHE_BYTECODEPROTOTYPE_HH +#define TYCHE_BYTECODEPROTOTYPE_HH + +#include +#include +#include +#include + +namespace tyche { + +struct BytecodePrototype { + using ConstantValue = std::variant; + using Value = std::variant; + + struct Function { + uint16_t n_pars = 0; + uint16_t n_locals = 0; + ByteArray code {}; + }; + + std::vector constants {}; + std::vector functions {}; + + // TODO - debugging info +}; + +} + +#endif //TYCHE_BYTECODEPROTOTYPE_HH -- 2.49.1 From 566f210f3f4fc381654c9b578c674a84ccf987a0 Mon Sep 17 00:00:00 2001 From: Andre Wagner Date: Mon, 27 Apr 2026 14:02:48 -0500 Subject: [PATCH 03/20] . --- src/bytecode/bytearray.cc | 16 ++++++++++ src/bytecode/bytearray.hh | 6 ++++ src/bytecode/bytecode.cc | 62 +++++++++++++++++++++++++++++++++++++++ src/bytecode/bytecode.hh | 6 ++-- 4 files changed, 88 insertions(+), 2 deletions(-) diff --git a/src/bytecode/bytearray.cc b/src/bytecode/bytearray.cc index 9968642..e8fbbd9 100644 --- a/src/bytecode/bytearray.cc +++ b/src/bytecode/bytearray.cc @@ -24,6 +24,14 @@ void ByteArray::add_int(uint32_t addr, int32_t value) add_byte(addr, zz & 0x7F); } +void ByteArray::add_uint32(uint32_t addr, uint32_t value) +{ + add_byte(addr, (uint8_t)(value)); + add_byte(addr+1, (uint8_t)(value >> 8)); + add_byte(addr+2, (uint8_t)(value >> 16)); + add_byte(addr+3, (uint8_t)(value >> 24)); +} + void ByteArray::add_float(uint32_t addr, float value) { uint32_t bits; @@ -46,6 +54,14 @@ uint8_t ByteArray::get_byte(uint32_t addr) const return data_.at(addr); } +uint32_t ByteArray::get_uint32(uint32_t addr) const +{ + return (uint32_t) get_byte(addr) + | (uint32_t) get_byte(addr+1) << 8 + | (uint32_t) get_byte(addr+2) << 16 + | (uint32_t) get_byte(addr+3) << 24; +} + std::pair ByteArray::get_int(uint32_t addr) const { uint32_t zz = 0; diff --git a/src/bytecode/bytearray.hh b/src/bytecode/bytearray.hh index 4b25e2f..c189ba6 100644 --- a/src/bytecode/bytearray.hh +++ b/src/bytecode/bytearray.hh @@ -10,17 +10,23 @@ namespace tyche { class ByteArray { public: + ByteArray() = default; + explicit ByteArray(std::vector data) : data_(std::move(data)) {} + void add_byte(uint32_t addr, uint8_t byte); + void add_uint32(uint32_t addr, uint32_t value); void add_int(uint32_t addr, int32_t value); void add_float(uint32_t addr, float value); void add_string(uint32_t addr, std::string const& str); void append_byte(uint32_t addr, uint8_t byte) { add_byte(data_.size(), byte); } + void append_uint32(uint32_t addr, uint32_t value) { add_uint32(data_.size(), value); } void append_int(uint32_t addr, int32_t value) { add_int(data_.size(), value); } void append_float(uint32_t addr, float value) { add_float(data_.size(), value); } void append_string(uint32_t addr, std::string const& str) { add_string(data_.size(), str); } [[nodiscard]] uint8_t get_byte(uint32_t addr) const; + [[nodiscard]] uint32_t get_uint32(uint32_t addr) const; [[nodiscard]] std::pair get_int(uint32_t addr) const; [[nodiscard]] std::pair get_float(uint32_t addr) const; [[nodiscard]] std::pair get_string(uint32_t addr) const; diff --git a/src/bytecode/bytecode.cc b/src/bytecode/bytecode.cc index d9171fc..5069221 100644 --- a/src/bytecode/bytecode.cc +++ b/src/bytecode/bytecode.cc @@ -2,5 +2,67 @@ namespace tyche { +Bytecode::Bytecode(BytecodePrototype const& bp) +{ + // header + byte_array_.add_uint32(0, MAGIC); + byte_array_.add_byte(4, VERSION); + + // constants + std::vector constant_indexes; + std::vector constant_array; + for (auto const& constant: bp.constants) { + + } + + // constants table + + // function table +} + +uint32_t Bytecode::n_constants() const +{ + return 0; +} + +uint32_t Bytecode::n_functions() const +{ + return 0; +} + +int32_t Bytecode::get_constant_int(uint32_t addr) const +{ + return 0; +} + +float Bytecode::get_constant_float(uint32_t addr) const +{ + return 0; +} + +std::string Bytecode::get_constant_string(uint32_t addr) const +{ + return std::string(); +} + +Bytecode::FunctionDef Bytecode::get_function_def(uint32_t function_id) const +{ + return Bytecode::FunctionDef(); +} + +uint8_t Bytecode::get_code_byte(uint32_t function_id, uint32_t idx) const +{ + return 0; +} + +int32_t Bytecode::get_code_int(uint32_t function_id, uint32_t idx) const +{ + return 0; +} + +float Bytecode::get_code_float(uint32_t function_id, uint32_t idx) const +{ + return 0; +} } \ No newline at end of file diff --git a/src/bytecode/bytecode.hh b/src/bytecode/bytecode.hh index 4b2e69a..2095bba 100644 --- a/src/bytecode/bytecode.hh +++ b/src/bytecode/bytecode.hh @@ -8,8 +8,8 @@ namespace tyche { class Bytecode { public: - Bytecode(std::vector data); - Bytecode(BytecodePrototype const& bp); + explicit Bytecode(std::vector data) : byte_array_(std::move(data)) {} + explicit Bytecode(BytecodePrototype const& bp); [[nodiscard]] uint32_t n_constants() const; [[nodiscard]] uint32_t n_functions() const; @@ -29,6 +29,8 @@ public: private: ByteArray byte_array_; + static constexpr uint8_t VERSION = 1; + static constexpr uint32_t MAGIC = 0x74b3c138; }; } -- 2.49.1 From 149c4c3d8078a2e3da9498e390234bec9135309b Mon Sep 17 00:00:00 2001 From: Andre Wagner Date: Mon, 27 Apr 2026 15:10:57 -0500 Subject: [PATCH 04/20] . --- CMakeLists.txt | 3 ++- doc/OPCODES | 4 ++-- src/bytecode/bytearray.cc | 39 ++++++++++++++++++++++----------------- src/bytecode/bytearray.hh | 22 ++++++++++++---------- src/bytecode/bytecode.cc | 39 ++++++++++++++++++++++++++++++--------- src/bytecode/bytecode.hh | 2 ++ src/bytecode/tests.cc | 4 ++-- src/common/overloaded.hh | 8 ++++++++ 8 files changed, 80 insertions(+), 41 deletions(-) create mode 100644 src/common/overloaded.hh diff --git a/CMakeLists.txt b/CMakeLists.txt index ac29828..158f32b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -72,7 +72,8 @@ target_compile_options(lib${PROJECT_NAME} PRIVATE ${warnings}) # tests # -add_executable(${PROJECT_NAME}-bytecode-test src/bytecode/tests.cc) +add_executable(${PROJECT_NAME}-bytecode-test src/bytecode/tests.cc + common/overloaded.hh) target_link_libraries(${PROJECT_NAME}-bytecode-test lib${PROJECT_NAME} gtest_main) add_test(NAME tyche_bytecode_test COMMAND ${PROJECT_NAME}-bytecode-test) diff --git a/doc/OPCODES b/doc/OPCODES index a84051c..4edbf82 100644 --- a/doc/OPCODES +++ b/doc/OPCODES @@ -83,9 +83,9 @@ The bytecode file is composed of the following sections: [0:3]: Magic [4]: VM format * [0x1] Index: pointers to each one of the sections, up to 8 - Each pointer: 4 bits + Each pointer: 4 bytes * [0x2] Constants: all constants (such as strings) used in the code - * Table of 4-bit constant indexes with pointer to constant + * Table of 4-byte constant indexes with pointer to constant * Raw constant data * [0x3] Functions: Pointer to functions within the code [0:3]: function pointer diff --git a/src/bytecode/bytearray.cc b/src/bytecode/bytearray.cc index e8fbbd9..6b06f1e 100644 --- a/src/bytecode/bytearray.cc +++ b/src/bytecode/bytearray.cc @@ -4,7 +4,7 @@ namespace tyche { -void ByteArray::add_byte(uint32_t addr, uint8_t byte) +void ByteArray::set_byte(uint32_t addr, uint8_t byte) { try { data_.at(addr) = byte; @@ -14,39 +14,39 @@ void ByteArray::add_byte(uint32_t addr, uint8_t byte) } } -void ByteArray::add_int(uint32_t addr, int32_t value) +void ByteArray::set_int(uint32_t addr, int32_t value) { uint32_t zz = ((uint32_t)(value << 1)) ^ ((uint32_t)(value >> 31)); while (zz > 0x7F) { - add_byte(addr++, (zz & 0x7F) | 0x80); + set_byte(addr++, (zz & 0x7F) | 0x80); zz >>= 7; } - add_byte(addr, zz & 0x7F); + set_byte(addr, zz & 0x7F); } -void ByteArray::add_uint32(uint32_t addr, uint32_t value) +void ByteArray::set_uint32(uint32_t addr, uint32_t value) { - add_byte(addr, (uint8_t)(value)); - add_byte(addr+1, (uint8_t)(value >> 8)); - add_byte(addr+2, (uint8_t)(value >> 16)); - add_byte(addr+3, (uint8_t)(value >> 24)); + set_byte(addr, (uint8_t) (value)); + set_byte(addr+1, (uint8_t) (value >> 8)); + set_byte(addr+2, (uint8_t) (value >> 16)); + set_byte(addr+3, (uint8_t) (value >> 24)); } -void ByteArray::add_float(uint32_t addr, float value) +void ByteArray::set_float(uint32_t addr, float value) { uint32_t bits; std::memcpy(&bits, &value, 4); - add_byte(addr, (uint8_t)(bits)); - add_byte(addr+1, (uint8_t)(bits >> 8)); - add_byte(addr+2, (uint8_t)(bits >> 16)); - add_byte(addr+3, (uint8_t)(bits >> 24)); + set_byte(addr, (uint8_t) (bits)); + set_byte(addr+1, (uint8_t) (bits >> 8)); + set_byte(addr+2, (uint8_t) (bits >> 16)); + set_byte(addr+3, (uint8_t) (bits >> 24)); } -void ByteArray::add_string(uint32_t addr, std::string const& str) +void ByteArray::set_string(uint32_t addr, std::string const& str) { for (uint8_t c: str) - add_byte(addr++, c); - add_byte(addr, 0); + set_byte(addr++, c); + set_byte(addr, 0); } uint8_t ByteArray::get_byte(uint32_t addr) const @@ -97,4 +97,9 @@ std::pair ByteArray::get_string(uint32_t addr) const return { data, data.size() + 1 }; } +void ByteArray::append_bytearray(ByteArray const& bytearray) +{ + data_.insert(data_.end(), bytearray.data().begin(), bytearray.data().end()); +} + } \ No newline at end of file diff --git a/src/bytecode/bytearray.hh b/src/bytecode/bytearray.hh index c189ba6..c0f97da 100644 --- a/src/bytecode/bytearray.hh +++ b/src/bytecode/bytearray.hh @@ -13,17 +13,18 @@ public: ByteArray() = default; explicit ByteArray(std::vector data) : data_(std::move(data)) {} - void add_byte(uint32_t addr, uint8_t byte); - void add_uint32(uint32_t addr, uint32_t value); - void add_int(uint32_t addr, int32_t value); - void add_float(uint32_t addr, float value); - void add_string(uint32_t addr, std::string const& str); + void set_byte(uint32_t addr, uint8_t byte); + void set_uint32(uint32_t addr, uint32_t value); + void set_int(uint32_t addr, int32_t value); + void set_float(uint32_t addr, float value); + void set_string(uint32_t addr, std::string const& str); - void append_byte(uint32_t addr, uint8_t byte) { add_byte(data_.size(), byte); } - void append_uint32(uint32_t addr, uint32_t value) { add_uint32(data_.size(), value); } - void append_int(uint32_t addr, int32_t value) { add_int(data_.size(), value); } - void append_float(uint32_t addr, float value) { add_float(data_.size(), value); } - void append_string(uint32_t addr, std::string const& str) { add_string(data_.size(), str); } + void append_byte(uint8_t byte) { set_byte(data_.size(), byte); } + void append_uint32(uint32_t value) { set_uint32(data_.size(), value); } + void append_int(int32_t value) { set_int(data_.size(), value); } + void append_float(float value) { set_float(data_.size(), value); } + void append_string(std::string const& str) { set_string(data_.size(), str); } + void append_bytearray(ByteArray const& bytearray); [[nodiscard]] uint8_t get_byte(uint32_t addr) const; [[nodiscard]] uint32_t get_uint32(uint32_t addr) const; @@ -32,6 +33,7 @@ public: [[nodiscard]] std::pair get_string(uint32_t addr) const; [[nodiscard]] std::vector const& data() const { return data_; } + [[nodiscard]] size_t size() const { return data_.size(); } private: std::vector data_; diff --git a/src/bytecode/bytecode.cc b/src/bytecode/bytecode.cc index 5069221..05548c5 100644 --- a/src/bytecode/bytecode.cc +++ b/src/bytecode/bytecode.cc @@ -1,23 +1,44 @@ #include "bytecode.hh" +#include "../common/overloaded.hh" namespace tyche { Bytecode::Bytecode(BytecodePrototype const& bp) { - // header - byte_array_.add_uint32(0, MAGIC); - byte_array_.add_byte(4, VERSION); - // constants - std::vector constant_indexes; - std::vector constant_array; + std::vector constant_table; + ByteArray constant_array; + uint32_t idx = 0; for (auto const& constant: bp.constants) { - + constant_table.emplace_back(idx); + std::visit(overloaded { + [&](int32_t i) { constant_array.append_int(i); }, + [&](float f) { constant_array.append_float(f); }, + [&](std::string const& s) { constant_array.append_string(s); }, + }, constant); + idx = constant_array.size(); } - // constants table - // function table + + // + // build binary + // + + // header + byte_array_.set_uint32(0, MAGIC); + byte_array_.set_byte(4, VERSION); + + // index - other entries created later + byte_array_.set_uint32(HEADER_SZ, HEADER_SZ + INDEX_SZ); + + // constants + idx = HEADER_SZ + INDEX_SZ; + for (auto const& const_idx: constant_table) { + byte_array_.set_uint32(idx, const_idx); + idx += 4; + } + byte_array_.append_bytearray(constant_array); } uint32_t Bytecode::n_constants() const diff --git a/src/bytecode/bytecode.hh b/src/bytecode/bytecode.hh index 2095bba..4946946 100644 --- a/src/bytecode/bytecode.hh +++ b/src/bytecode/bytecode.hh @@ -31,6 +31,8 @@ private: ByteArray byte_array_; static constexpr uint8_t VERSION = 1; static constexpr uint32_t MAGIC = 0x74b3c138; + static constexpr uint32_t HEADER_SZ = 16, + INDEX_SZ = 8 * 4; }; } diff --git a/src/bytecode/tests.cc b/src/bytecode/tests.cc index 7d5ab87..60d1031 100644 --- a/src/bytecode/tests.cc +++ b/src/bytecode/tests.cc @@ -18,10 +18,10 @@ TEST(ByteArray, ByteArray) #define TESTX(a, ...) test([](ByteArray& ba) { a; }, std::vector({ __VA_ARGS__ })); - TESTX(ba.add_byte(1, 0xab), 0x00, 0xab) + TESTX(ba.set_byte(1, 0xab), 0x00, 0xab) ByteArray ba; - ba.add_byte(1, 0xab); ASSERT_EQ(ba.get_byte(1), 0xab); + ba.set_byte(1, 0xab); ASSERT_EQ(ba.get_byte(1), 0xab); ba.add_int(1, 12); ASSERT_EQ(ba.get_int(1), std::make_pair(12, 1)); ba.add_int(1, -12); ASSERT_EQ(ba.get_int(1), std::make_pair(-12, 1)); diff --git a/src/common/overloaded.hh b/src/common/overloaded.hh new file mode 100644 index 0000000..f0ad1a0 --- /dev/null +++ b/src/common/overloaded.hh @@ -0,0 +1,8 @@ +#ifndef TYCHE_OVERLOADED_HH +#define TYCHE_OVERLOADED_HH + +// used by std::visitor +template +struct overloaded : Ts... { using Ts::operator()...; }; + +#endif //TYCHE_OVERLOADED_HH -- 2.49.1 From 3313b897718bc18321058d6f95c871a553fd687f Mon Sep 17 00:00:00 2001 From: Andre Wagner Date: Mon, 27 Apr 2026 15:11:16 -0500 Subject: [PATCH 05/20] . --- src/bytecode/tests.cc | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/bytecode/tests.cc b/src/bytecode/tests.cc index 60d1031..6dda626 100644 --- a/src/bytecode/tests.cc +++ b/src/bytecode/tests.cc @@ -23,17 +23,17 @@ TEST(ByteArray, ByteArray) ByteArray ba; ba.set_byte(1, 0xab); ASSERT_EQ(ba.get_byte(1), 0xab); - ba.add_int(1, 12); ASSERT_EQ(ba.get_int(1), std::make_pair(12, 1)); - ba.add_int(1, -12); ASSERT_EQ(ba.get_int(1), std::make_pair(-12, 1)); - ba.add_int(1, 5000); ASSERT_EQ(ba.get_int(1), std::make_pair(5000, 2)); - ba.add_int(1, 5000300); ASSERT_EQ(ba.get_int(1), std::make_pair(5000300, 4)); - ba.add_int(1, -5000300); ASSERT_EQ(ba.get_int(1), std::make_pair(-5000300, 4)); + ba.set_int(1, 12); ASSERT_EQ(ba.get_int(1), std::make_pair(12, 1)); + ba.set_int(1, -12); ASSERT_EQ(ba.get_int(1), std::make_pair(-12, 1)); + ba.set_int(1, 5000); ASSERT_EQ(ba.get_int(1), std::make_pair(5000, 2)); + ba.set_int(1, 5000300); ASSERT_EQ(ba.get_int(1), std::make_pair(5000300, 4)); + ba.set_int(1, -5000300); ASSERT_EQ(ba.get_int(1), std::make_pair(-5000300, 4)); - ba.add_float(1, 3.14); ASSERT_FLOAT_EQ(ba.get_float(1).first, 3.14); - ba.add_float(1, -3.14); ASSERT_FLOAT_EQ(ba.get_float(1).first, -3.14); - ba.add_float(1, -5000300.1324); ASSERT_FLOAT_EQ(ba.get_float(1).first, -5000300.1324); + ba.set_float(1, 3.14); ASSERT_FLOAT_EQ(ba.get_float(1).first, 3.14); + ba.set_float(1, -3.14); ASSERT_FLOAT_EQ(ba.get_float(1).first, -3.14); + ba.set_float(1, -5000300.1324); ASSERT_FLOAT_EQ(ba.get_float(1).first, -5000300.1324); - ba.add_string(1, "Hello world!"); ASSERT_EQ(ba.get_string(1), std::make_pair("Hello world!", 13)); + ba.set_string(1, "Hello world!"); ASSERT_EQ(ba.get_string(1), std::make_pair("Hello world!", 13)); #undef TESTX } -- 2.49.1 From 7b39a40a32c21dcc8ddb3a0779f0e96f7e0477f2 Mon Sep 17 00:00:00 2001 From: Andre Wagner Date: Mon, 27 Apr 2026 15:54:31 -0500 Subject: [PATCH 06/20] . --- src/bytecode/bytecode.hh | 2 ++ src/bytecode/tests.cc | 42 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/src/bytecode/bytecode.hh b/src/bytecode/bytecode.hh index 4946946..f3483f1 100644 --- a/src/bytecode/bytecode.hh +++ b/src/bytecode/bytecode.hh @@ -27,6 +27,8 @@ public: // TODO - debugging info + [[nodiscard]] std::vector const& data() const { return byte_array_.data(); } + private: ByteArray byte_array_; static constexpr uint8_t VERSION = 1; diff --git a/src/bytecode/tests.cc b/src/bytecode/tests.cc index 6dda626..4547771 100644 --- a/src/bytecode/tests.cc +++ b/src/bytecode/tests.cc @@ -1,9 +1,12 @@ #include "gtest/gtest.h" +#include "gmock/gmock.h" #include #include #include "bytearray.hh" +#include "bytecodeprototype.hh" +#include "bytecode.hh" using namespace tyche; @@ -38,6 +41,45 @@ TEST(ByteArray, ByteArray) #undef TESTX } +TEST(Bytecode, Constants) +{ + BytecodePrototype bp; + bp.constants.emplace_back(42); + bp.constants.emplace_back("HELLO"); + + Bytecode bc(bp); + auto binary = bc.data(); + + ByteArray ba; + ba.append_int(42); + + std::vector expected = { + // header + 0x38, 0xc1, 0xb3, 0x74, // magic + 0x01, 0x00, 0x00, 0x00, // version + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + // index + 0x30, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + // constant indexes + 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, + + // constant values + 0x54, 'H', 'e', 'l', 'l', 'o', 0x00 + }; + + ASSERT_EQ(binary, expected); +} + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); -- 2.49.1 From 960cc7600559299c6c6586ae7e6fd13e48a07153 Mon Sep 17 00:00:00 2001 From: Andre Wagner Date: Mon, 27 Apr 2026 20:38:49 -0500 Subject: [PATCH 07/20] . --- doc/OPCODES | 12 +++++----- src/bytecode/bytearray.cc | 12 ++++++++++ src/bytecode/bytearray.hh | 3 +++ src/bytecode/bytecode.cc | 31 +++++++++++++++++++++--- src/bytecode/tests.cc | 50 ++++++++++++++++++++++++++++++++------- 5 files changed, 90 insertions(+), 18 deletions(-) diff --git a/doc/OPCODES b/doc/OPCODES index 4edbf82..b024f7e 100644 --- a/doc/OPCODES +++ b/doc/OPCODES @@ -79,22 +79,22 @@ Bytecode format The bytecode file is composed of the following sections: - * [0x0] 16-byte header + * 16-byte header [0:3]: Magic [4]: VM format - * [0x1] Index: pointers to each one of the sections, up to 8 + * Index: pointers to each one of the sections, up to 8 Each pointer: 4 bytes - * [0x2] Constants: all constants (such as strings) used in the code + * [0x0] Constants: all constants (such as strings) used in the code * Table of 4-byte constant indexes with pointer to constant * Raw constant data - * [0x3] Functions: Pointer to functions within the code + * [0x1] Functions: Pointer to functions within the code [0:3]: function pointer [4:5]: number of parameters [6:7]: number of local variables - * [0x4] Code: executable code + * [0x2] Code: executable code [1-byte]: operation [variable]: operand (see value encoding below) - * [0x5] Debugging info + * [0x3] Debugging info ??? The max file size is 2 Gb. diff --git a/src/bytecode/bytearray.cc b/src/bytecode/bytearray.cc index 6b06f1e..c3c5a0c 100644 --- a/src/bytecode/bytearray.cc +++ b/src/bytecode/bytearray.cc @@ -24,6 +24,12 @@ void ByteArray::set_int(uint32_t addr, int32_t value) set_byte(addr, zz & 0x7F); } +void ByteArray::set_uint16(uint32_t addr, uint16_t value) +{ + set_byte(addr, (uint8_t) (value)); + set_byte(addr+1, (uint8_t) (value >> 8)); +} + void ByteArray::set_uint32(uint32_t addr, uint32_t value) { set_byte(addr, (uint8_t) (value)); @@ -54,6 +60,12 @@ uint8_t ByteArray::get_byte(uint32_t addr) const return data_.at(addr); } +uint16_t ByteArray::get_uint16(uint32_t addr) const +{ + return (uint32_t) get_byte(addr) + | (uint32_t) get_byte(addr+1) << 8; +} + uint32_t ByteArray::get_uint32(uint32_t addr) const { return (uint32_t) get_byte(addr) diff --git a/src/bytecode/bytearray.hh b/src/bytecode/bytearray.hh index c0f97da..96e8311 100644 --- a/src/bytecode/bytearray.hh +++ b/src/bytecode/bytearray.hh @@ -14,12 +14,14 @@ public: explicit ByteArray(std::vector data) : data_(std::move(data)) {} void set_byte(uint32_t addr, uint8_t byte); + void set_uint16(uint32_t addr, uint16_t value); void set_uint32(uint32_t addr, uint32_t value); void set_int(uint32_t addr, int32_t value); void set_float(uint32_t addr, float value); void set_string(uint32_t addr, std::string const& str); void append_byte(uint8_t byte) { set_byte(data_.size(), byte); } + void append_uint16(uint16_t value) { set_uint16(data_.size(), value); } void append_uint32(uint32_t value) { set_uint32(data_.size(), value); } void append_int(int32_t value) { set_int(data_.size(), value); } void append_float(float value) { set_float(data_.size(), value); } @@ -27,6 +29,7 @@ public: void append_bytearray(ByteArray const& bytearray); [[nodiscard]] uint8_t get_byte(uint32_t addr) const; + [[nodiscard]] uint16_t get_uint16(uint32_t addr) const; [[nodiscard]] uint32_t get_uint32(uint32_t addr) const; [[nodiscard]] std::pair get_int(uint32_t addr) const; [[nodiscard]] std::pair get_float(uint32_t addr) const; diff --git a/src/bytecode/bytecode.cc b/src/bytecode/bytecode.cc index 05548c5..65fde0b 100644 --- a/src/bytecode/bytecode.cc +++ b/src/bytecode/bytecode.cc @@ -20,6 +20,12 @@ Bytecode::Bytecode(BytecodePrototype const& bp) } // function table + std::vector> functions; + ByteArray code; + for (auto const& f: bp.functions) { + functions.emplace_back(std::make_pair(FunctionDef { f.n_pars, f.n_locals }, code.size())); + code.append_bytearray(f.code); + } // // build binary @@ -29,9 +35,6 @@ Bytecode::Bytecode(BytecodePrototype const& bp) byte_array_.set_uint32(0, MAGIC); byte_array_.set_byte(4, VERSION); - // index - other entries created later - byte_array_.set_uint32(HEADER_SZ, HEADER_SZ + INDEX_SZ); - // constants idx = HEADER_SZ + INDEX_SZ; for (auto const& const_idx: constant_table) { @@ -39,6 +42,28 @@ Bytecode::Bytecode(BytecodePrototype const& bp) idx += 4; } byte_array_.append_bytearray(constant_array); + + // constant index + if (!constant_table.empty()) + byte_array_.set_uint32(HEADER_SZ, HEADER_SZ + INDEX_SZ); + + // functions + size_t functions_start = idx + (constant_table.size() * 4) + byte_array_.size(); + idx += functions_start; + uint32_t code_idx = 0; + for (auto const& f: functions) { + byte_array_.set_uint32(idx, code_idx); + byte_array_.set_uint16(idx + 4, f.first.n_params); + byte_array_.set_uint16(idx + 6, f.first.n_params); + idx += 8; + code_idx += f.second; + } + for (auto const& f: bp.functions) + byte_array_.append_bytearray(f.code); + + // function index + if (!functions.empty()) + byte_array_.set_uint32(HEADER_SZ + 4, functions_start); } uint32_t Bytecode::n_constants() const diff --git a/src/bytecode/tests.cc b/src/bytecode/tests.cc index 4547771..c4aeb7f 100644 --- a/src/bytecode/tests.cc +++ b/src/bytecode/tests.cc @@ -47,17 +47,12 @@ TEST(Bytecode, Constants) bp.constants.emplace_back(42); bp.constants.emplace_back("HELLO"); - Bytecode bc(bp); - auto binary = bc.data(); - - ByteArray ba; - ba.append_int(42); - std::vector expected = { // header 0x38, 0xc1, 0xb3, 0x74, // magic 0x01, 0x00, 0x00, 0x00, // version - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, // index 0x30, 0x00, 0x00, 0x00, @@ -74,10 +69,47 @@ TEST(Bytecode, Constants) 0x01, 0x00, 0x00, 0x00, // constant values - 0x54, 'H', 'e', 'l', 'l', 'o', 0x00 + 0x54, 'H', 'E', 'L', 'L', 'O', 0x00 }; - ASSERT_EQ(binary, expected); + Bytecode bc(bp); + ASSERT_EQ(bc.data(), expected); +} + +TEST(Bytecode, Code) +{ + BytecodePrototype bp; + auto& f = bp.functions.emplace_back(0, 0); + f.code.append_byte(0x68); + f.code.append_int(42); + + f = bp.functions.emplace_back(2, 1); + f.code.append_byte(0x42); + + std::vector expected = { + // header + 0x38, 0xc1, 0xb3, 0x74, // magic + 0x01, 0x00, 0x00, 0x00, // version + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + // index + 0x00, 0x00, 0x00, 0x00, + 0x30, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + // function definitions + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, + + // code + 0x68, 0x54, 0x42, + }; } int main(int argc, char** argv) -- 2.49.1 From 8ff66ac1e20632d2323824bc4da19dc199c5db82 Mon Sep 17 00:00:00 2001 From: Andre Wagner Date: Tue, 28 Apr 2026 08:04:04 -0500 Subject: [PATCH 08/20] . --- src/bytecode/bytearray.hh | 2 +- src/bytecode/bytecodeprototype.hh | 8 +++++--- src/bytecode/tests.cc | 4 ++-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/bytecode/bytearray.hh b/src/bytecode/bytearray.hh index 96e8311..d1906ad 100644 --- a/src/bytecode/bytearray.hh +++ b/src/bytecode/bytearray.hh @@ -39,7 +39,7 @@ public: [[nodiscard]] size_t size() const { return data_.size(); } private: - std::vector data_; + std::vector data_ {}; }; class BytecodeParsingError : public std::runtime_error { diff --git a/src/bytecode/bytecodeprototype.hh b/src/bytecode/bytecodeprototype.hh index e292589..682986e 100644 --- a/src/bytecode/bytecodeprototype.hh +++ b/src/bytecode/bytecodeprototype.hh @@ -13,9 +13,11 @@ struct BytecodePrototype { using Value = std::variant; struct Function { - uint16_t n_pars = 0; - uint16_t n_locals = 0; - ByteArray code {}; + uint16_t n_pars; + uint16_t n_locals; + ByteArray code; + + Function(uint16_t n_pars_, uint16_t n_locals_) : n_pars(n_pars_), n_locals(n_locals_), code(ByteArray {}) {} }; std::vector constants {}; diff --git a/src/bytecode/tests.cc b/src/bytecode/tests.cc index c4aeb7f..d5cb4e9 100644 --- a/src/bytecode/tests.cc +++ b/src/bytecode/tests.cc @@ -83,8 +83,8 @@ TEST(Bytecode, Code) f.code.append_byte(0x68); f.code.append_int(42); - f = bp.functions.emplace_back(2, 1); - f.code.append_byte(0x42); + auto& f2 = bp.functions.emplace_back(2, 1); + f2.code.append_byte(0x42); std::vector expected = { // header -- 2.49.1 From 44a51acad1bdd43cdb487bb4af4abe7988e36405 Mon Sep 17 00:00:00 2001 From: Andre Wagner Date: Tue, 28 Apr 2026 09:42:27 -0500 Subject: [PATCH 09/20] . --- CMakeLists.txt | 4 +-- doc/CODE_ORGANIZATION | 0 doc/OPCODES | 1 + src/bytecode/bytecode.cc | 40 +++++++++++++++++----- src/bytecode/bytecode.hh | 22 ++++++++---- src/bytecode/tests.cc | 72 +++++++++++++++++++++++++++++++--------- 6 files changed, 107 insertions(+), 32 deletions(-) create mode 100644 doc/CODE_ORGANIZATION diff --git a/CMakeLists.txt b/CMakeLists.txt index 158f32b..a5f5f06 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,6 +64,7 @@ add_library(lib${PROJECT_NAME} STATIC src/bytecode/bytecode.cc src/bytecode/bytecode.hh src/bytecode/bytecodeprototype.hh + src/common/overloaded.hh ) target_compile_options(lib${PROJECT_NAME} PRIVATE ${warnings}) @@ -72,8 +73,7 @@ target_compile_options(lib${PROJECT_NAME} PRIVATE ${warnings}) # tests # -add_executable(${PROJECT_NAME}-bytecode-test src/bytecode/tests.cc - common/overloaded.hh) +add_executable(${PROJECT_NAME}-bytecode-test src/bytecode/tests.cc) target_link_libraries(${PROJECT_NAME}-bytecode-test lib${PROJECT_NAME} gtest_main) add_test(NAME tyche_bytecode_test COMMAND ${PROJECT_NAME}-bytecode-test) diff --git a/doc/CODE_ORGANIZATION b/doc/CODE_ORGANIZATION new file mode 100644 index 0000000..e69de29 diff --git a/doc/OPCODES b/doc/OPCODES index b024f7e..09a3f5d 100644 --- a/doc/OPCODES +++ b/doc/OPCODES @@ -84,6 +84,7 @@ The bytecode file is composed of the following sections: [4]: VM format * Index: pointers to each one of the sections, up to 8 Each pointer: 4 bytes + Each count: 4 bytes * [0x0] Constants: all constants (such as strings) used in the code * Table of 4-byte constant indexes with pointer to constant * Raw constant data diff --git a/src/bytecode/bytecode.cc b/src/bytecode/bytecode.cc index 65fde0b..1158928 100644 --- a/src/bytecode/bytecode.cc +++ b/src/bytecode/bytecode.cc @@ -44,8 +44,10 @@ Bytecode::Bytecode(BytecodePrototype const& bp) byte_array_.append_bytearray(constant_array); // constant index - if (!constant_table.empty()) + if (!constant_table.empty()) { byte_array_.set_uint32(HEADER_SZ, HEADER_SZ + INDEX_SZ); + byte_array_.set_uint16(HEADER_SZ + 4, constant_table.size()); + } // functions size_t functions_start = idx + (constant_table.size() * 4) + byte_array_.size(); @@ -62,31 +64,53 @@ Bytecode::Bytecode(BytecodePrototype const& bp) byte_array_.append_bytearray(f.code); // function index - if (!functions.empty()) - byte_array_.set_uint32(HEADER_SZ + 4, functions_start); + if (!functions.empty()) { + byte_array_.set_uint32(HEADER_SZ + 6, functions_start); + byte_array_.set_uint16(HEADER_SZ + 6 + 4, functions.size()); + } +} + +Bytecode::Bytecode(std::vector data) + : byte_array_(std::move(data)) +{ + // check file size + if (byte_array_.size() < (HEADER_SZ + INDEX_SZ)) + throw BytecodeParsingError("Invalid bytecode format (file too short)"); + + // check magic number and version + if (byte_array_.get_uint32(0) != MAGIC) + throw BytecodeParsingError("Invalid bytecode format (magic number not matching)"); + if (byte_array_.get_uint32(4) != VERSION) + throw BytecodeParsingError("Unexpected bytecode format version"); + + // load cache + cache_.constants_addr = byte_array_.get_uint32(HEADER_SZ); + cache_.n_constants = byte_array_.get_uint16(HEADER_SZ + 4); + cache_.functions_addr = byte_array_.get_uint32(HEADER_SZ + 6); + cache_.n_functions = byte_array_.get_uint16(HEADER_SZ + 10); } uint32_t Bytecode::n_constants() const { - return 0; + return cache_.n_constants; } uint32_t Bytecode::n_functions() const { - return 0; + return cache_.n_functions; } -int32_t Bytecode::get_constant_int(uint32_t addr) const +int32_t Bytecode::get_constant_int(uint32_t idx) const { return 0; } -float Bytecode::get_constant_float(uint32_t addr) const +float Bytecode::get_constant_float(uint32_t idx) const { return 0; } -std::string Bytecode::get_constant_string(uint32_t addr) const +std::string Bytecode::get_constant_string(uint32_t idx) const { return std::string(); } diff --git a/src/bytecode/bytecode.hh b/src/bytecode/bytecode.hh index f3483f1..c184cbb 100644 --- a/src/bytecode/bytecode.hh +++ b/src/bytecode/bytecode.hh @@ -8,15 +8,15 @@ namespace tyche { class Bytecode { public: - explicit Bytecode(std::vector data) : byte_array_(std::move(data)) {} + explicit Bytecode(std::vector data); explicit Bytecode(BytecodePrototype const& bp); [[nodiscard]] uint32_t n_constants() const; [[nodiscard]] uint32_t n_functions() const; - [[nodiscard]] int32_t get_constant_int(uint32_t addr) const; - [[nodiscard]] float get_constant_float(uint32_t addr) const; - [[nodiscard]] std::string get_constant_string(uint32_t addr) const; + [[nodiscard]] int32_t get_constant_int(uint32_t idx) const; + [[nodiscard]] float get_constant_float(uint32_t idx) const; + [[nodiscard]] std::string get_constant_string(uint32_t idx) const; struct FunctionDef { uint16_t n_params, locals; }; [[nodiscard]] FunctionDef get_function_def(uint32_t function_id) const; @@ -30,11 +30,21 @@ public: [[nodiscard]] std::vector const& data() const { return byte_array_.data(); } private: - ByteArray byte_array_; + ByteArray byte_array_; // the actual data + static constexpr uint8_t VERSION = 1; static constexpr uint32_t MAGIC = 0x74b3c138; static constexpr uint32_t HEADER_SZ = 16, - INDEX_SZ = 8 * 4; + INDEX_SZ = 8 * 6; + + // caching for faster reading of data + struct Cache { + uint32_t constants_addr; + uint16_t n_constants; + uint32_t functions_addr; + uint32_t n_functions; + }; + Cache cache_ {}; }; } diff --git a/src/bytecode/tests.cc b/src/bytecode/tests.cc index d5cb4e9..ba965c6 100644 --- a/src/bytecode/tests.cc +++ b/src/bytecode/tests.cc @@ -10,6 +10,18 @@ using namespace tyche; +static void print(std::vector const& data) +{ + for (size_t i = 0; i < data.size(); ++i) { + if (i % 16 == 0) + printf("%04X : ", i); + printf("%02X ", data.at(i)); + if (i % 16 == 15) + printf("\n"); + } + printf("\n"); +} + TEST(ByteArray, ByteArray) { auto test = [](std::function const& f, std::vector const& expected) { @@ -55,14 +67,14 @@ TEST(Bytecode, Constants) 0x00, 0x00, 0x00, 0x00, // index - 0x30, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x02, 0x00, // constants + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // functions + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // constant indexes 0x00, 0x00, 0x00, 0x00, @@ -94,14 +106,14 @@ TEST(Bytecode, Code) 0x00, 0x00, 0x00, 0x00, // index - 0x00, 0x00, 0x00, 0x00, - 0x30, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // constants + 0x40, 0x00, 0x00, 0x00, 0x02, 0x00, // variables + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // function definitions 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -112,6 +124,34 @@ TEST(Bytecode, Code) }; } +TEST(Bytecode, Parsing) +{ + // write bytecode + + BytecodePrototype bp; + + bp.constants.emplace_back(42); + bp.constants.emplace_back(3.14f); + bp.constants.emplace_back("HELLO"); + + auto& f = bp.functions.emplace_back(0, 0); + f.code.append_byte(0x68); + f.code.append_int(42); + + auto& f2 = bp.functions.emplace_back(2, 1); + f2.code.append_byte(0x42); + + Bytecode bc1(bp); + auto data = bc1.data(); + + // read bytecode + + Bytecode bc2(data); + + ASSERT_EQ(bc2.n_constants(), 3); + ASSERT_EQ(bc2.n_functions(), 2); +} + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); -- 2.49.1 From 30ecab3403e13ba1f446bb3c99635ced0d7a0d7c Mon Sep 17 00:00:00 2001 From: Andre Wagner Date: Tue, 28 Apr 2026 09:48:16 -0500 Subject: [PATCH 10/20] . --- src/bytecode/bytecode.cc | 142 ++++++++++++++++++++------------------- src/bytecode/bytecode.hh | 5 +- src/bytecode/tests.cc | 14 ++-- 3 files changed, 85 insertions(+), 76 deletions(-) diff --git a/src/bytecode/bytecode.cc b/src/bytecode/bytecode.cc index 1158928..099a735 100644 --- a/src/bytecode/bytecode.cc +++ b/src/bytecode/bytecode.cc @@ -3,75 +3,8 @@ namespace tyche { -Bytecode::Bytecode(BytecodePrototype const& bp) -{ - // constants - std::vector constant_table; - ByteArray constant_array; - uint32_t idx = 0; - for (auto const& constant: bp.constants) { - constant_table.emplace_back(idx); - std::visit(overloaded { - [&](int32_t i) { constant_array.append_int(i); }, - [&](float f) { constant_array.append_float(f); }, - [&](std::string const& s) { constant_array.append_string(s); }, - }, constant); - idx = constant_array.size(); - } - - // function table - std::vector> functions; - ByteArray code; - for (auto const& f: bp.functions) { - functions.emplace_back(std::make_pair(FunctionDef { f.n_pars, f.n_locals }, code.size())); - code.append_bytearray(f.code); - } - - // - // build binary - // - - // header - byte_array_.set_uint32(0, MAGIC); - byte_array_.set_byte(4, VERSION); - - // constants - idx = HEADER_SZ + INDEX_SZ; - for (auto const& const_idx: constant_table) { - byte_array_.set_uint32(idx, const_idx); - idx += 4; - } - byte_array_.append_bytearray(constant_array); - - // constant index - if (!constant_table.empty()) { - byte_array_.set_uint32(HEADER_SZ, HEADER_SZ + INDEX_SZ); - byte_array_.set_uint16(HEADER_SZ + 4, constant_table.size()); - } - - // functions - size_t functions_start = idx + (constant_table.size() * 4) + byte_array_.size(); - idx += functions_start; - uint32_t code_idx = 0; - for (auto const& f: functions) { - byte_array_.set_uint32(idx, code_idx); - byte_array_.set_uint16(idx + 4, f.first.n_params); - byte_array_.set_uint16(idx + 6, f.first.n_params); - idx += 8; - code_idx += f.second; - } - for (auto const& f: bp.functions) - byte_array_.append_bytearray(f.code); - - // function index - if (!functions.empty()) { - byte_array_.set_uint32(HEADER_SZ + 6, functions_start); - byte_array_.set_uint16(HEADER_SZ + 6 + 4, functions.size()); - } -} - -Bytecode::Bytecode(std::vector data) - : byte_array_(std::move(data)) +Bytecode::Bytecode(ByteArray ba) + : byte_array_(std::move(ba)) { // check file size if (byte_array_.size() < (HEADER_SZ + INDEX_SZ)) @@ -135,4 +68,75 @@ float Bytecode::get_code_float(uint32_t function_id, uint32_t idx) const return 0; } +ByteArray Bytecode::generate(BytecodePrototype const& bp) +{ + ByteArray ba; + + // constants + std::vector constant_table; + ByteArray constant_array; + uint32_t idx = 0; + for (auto const& constant: bp.constants) { + constant_table.emplace_back(idx); + std::visit(overloaded { + [&](int32_t i) { constant_array.append_int(i); }, + [&](float f) { constant_array.append_float(f); }, + [&](std::string const& s) { constant_array.append_string(s); }, + }, constant); + idx = constant_array.size(); + } + + // function table + std::vector> functions; + ByteArray code; + for (auto const& f: bp.functions) { + functions.emplace_back(std::make_pair(FunctionDef { f.n_pars, f.n_locals }, code.size())); + code.append_bytearray(f.code); + } + + // + // build binary + // + + // header + ba.set_uint32(0, MAGIC); + ba.set_byte(4, VERSION); + + // constants + idx = HEADER_SZ + INDEX_SZ; + for (auto const& const_idx: constant_table) { + ba.set_uint32(idx, const_idx); + idx += 4; + } + ba.append_bytearray(constant_array); + + // constant index + if (!constant_table.empty()) { + ba.set_uint32(HEADER_SZ, HEADER_SZ + INDEX_SZ); + ba.set_uint16(HEADER_SZ + 4, constant_table.size()); + } + + // functions + size_t functions_start = idx + (constant_table.size() * 4) + ba.size(); + idx += functions_start; + uint32_t code_idx = 0; + for (auto const& f: functions) { + ba.set_uint32(idx, code_idx); + ba.set_uint16(idx + 4, f.first.n_params); + ba.set_uint16(idx + 6, f.first.n_params); + idx += 8; + code_idx += f.second; + } + for (auto const& f: bp.functions) + ba.append_bytearray(f.code); + + // function index + if (!functions.empty()) { + ba.set_uint32(HEADER_SZ + 6, functions_start); + ba.set_uint16(HEADER_SZ + 6 + 4, functions.size()); + } + + return ba; +} + } \ No newline at end of file diff --git a/src/bytecode/bytecode.hh b/src/bytecode/bytecode.hh index c184cbb..9e0dbb6 100644 --- a/src/bytecode/bytecode.hh +++ b/src/bytecode/bytecode.hh @@ -8,8 +8,7 @@ namespace tyche { class Bytecode { public: - explicit Bytecode(std::vector data); - explicit Bytecode(BytecodePrototype const& bp); + explicit Bytecode(ByteArray ba); [[nodiscard]] uint32_t n_constants() const; [[nodiscard]] uint32_t n_functions() const; @@ -29,6 +28,8 @@ public: [[nodiscard]] std::vector const& data() const { return byte_array_.data(); } + [[nodiscard]] static ByteArray generate(BytecodePrototype const& bp); + private: ByteArray byte_array_; // the actual data diff --git a/src/bytecode/tests.cc b/src/bytecode/tests.cc index ba965c6..8ad0bc5 100644 --- a/src/bytecode/tests.cc +++ b/src/bytecode/tests.cc @@ -84,8 +84,8 @@ TEST(Bytecode, Constants) 0x54, 'H', 'E', 'L', 'L', 'O', 0x00 }; - Bytecode bc(bp); - ASSERT_EQ(bc.data(), expected); + ByteArray ba = Bytecode::generate(bp); + ASSERT_EQ(ba.data(), expected); } TEST(Bytecode, Code) @@ -122,6 +122,11 @@ TEST(Bytecode, Code) // code 0x68, 0x54, 0x42, }; + + /* + ByteArray ba = Bytecode::generate(bp); + ASSERT_EQ(ba.data(), expected); + */ } TEST(Bytecode, Parsing) @@ -141,12 +146,11 @@ TEST(Bytecode, Parsing) auto& f2 = bp.functions.emplace_back(2, 1); f2.code.append_byte(0x42); - Bytecode bc1(bp); - auto data = bc1.data(); + ByteArray ba = Bytecode::generate(bp); // read bytecode - Bytecode bc2(data); + Bytecode bc2(std::move(ba)); ASSERT_EQ(bc2.n_constants(), 3); ASSERT_EQ(bc2.n_functions(), 2); -- 2.49.1 From 61a071cee6588a48aae454e73cb21c57588e30a9 Mon Sep 17 00:00:00 2001 From: Andre Wagner Date: Tue, 28 Apr 2026 09:48:49 -0500 Subject: [PATCH 11/20] . --- src/bytecode/bytecode.hh | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/bytecode/bytecode.hh b/src/bytecode/bytecode.hh index 9e0dbb6..d26341d 100644 --- a/src/bytecode/bytecode.hh +++ b/src/bytecode/bytecode.hh @@ -26,8 +26,6 @@ public: // TODO - debugging info - [[nodiscard]] std::vector const& data() const { return byte_array_.data(); } - [[nodiscard]] static ByteArray generate(BytecodePrototype const& bp); private: -- 2.49.1 From 84f725634eb78e251dd3c56dbea18acc7460ad59 Mon Sep 17 00:00:00 2001 From: Andre Wagner Date: Tue, 28 Apr 2026 09:56:08 -0500 Subject: [PATCH 12/20] . --- src/bytecode/bytecode.cc | 8 ++++---- src/bytecode/tests.cc | 4 +--- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/bytecode/bytecode.cc b/src/bytecode/bytecode.cc index 099a735..c1232fe 100644 --- a/src/bytecode/bytecode.cc +++ b/src/bytecode/bytecode.cc @@ -90,8 +90,8 @@ ByteArray Bytecode::generate(BytecodePrototype const& bp) std::vector> functions; ByteArray code; for (auto const& f: bp.functions) { - functions.emplace_back(std::make_pair(FunctionDef { f.n_pars, f.n_locals }, code.size())); code.append_bytearray(f.code); + functions.emplace_back(std::make_pair(FunctionDef { f.n_pars, f.n_locals }, code.size())); } // @@ -117,13 +117,13 @@ ByteArray Bytecode::generate(BytecodePrototype const& bp) } // functions - size_t functions_start = idx + (constant_table.size() * 4) + ba.size(); - idx += functions_start; + size_t functions_start = idx + (constant_table.size() * 4); + idx = functions_start; uint32_t code_idx = 0; for (auto const& f: functions) { ba.set_uint32(idx, code_idx); ba.set_uint16(idx + 4, f.first.n_params); - ba.set_uint16(idx + 6, f.first.n_params); + ba.set_uint16(idx + 6, f.first.locals); idx += 8; code_idx += f.second; } diff --git a/src/bytecode/tests.cc b/src/bytecode/tests.cc index 8ad0bc5..009f525 100644 --- a/src/bytecode/tests.cc +++ b/src/bytecode/tests.cc @@ -117,16 +117,14 @@ TEST(Bytecode, Code) // function definitions 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, // code 0x68, 0x54, 0x42, }; - /* ByteArray ba = Bytecode::generate(bp); ASSERT_EQ(ba.data(), expected); - */ } TEST(Bytecode, Parsing) -- 2.49.1 From c17788eeabcc1ffda5f67a0fb295ef89a7393014 Mon Sep 17 00:00:00 2001 From: Andre Wagner Date: Tue, 28 Apr 2026 11:42:12 -0500 Subject: [PATCH 13/20] . --- doc/BYTECODE | 33 +++++++++++++++++++++++++++++++++ doc/CODE_ORGANIZATION | 0 doc/OPCODES | 33 --------------------------------- src/bytecode/bytecode.cc | 17 ++++++++++++++--- src/bytecode/bytecode.hh | 6 ++++-- src/bytecode/tests.cc | 16 +++++++++++----- 6 files changed, 62 insertions(+), 43 deletions(-) create mode 100644 doc/BYTECODE delete mode 100644 doc/CODE_ORGANIZATION diff --git a/doc/BYTECODE b/doc/BYTECODE new file mode 100644 index 0000000..c696aeb --- /dev/null +++ b/doc/BYTECODE @@ -0,0 +1,33 @@ +Bytecode format +--------------- + +The bytecode file is composed of the following sections: + + * 16-byte header + [0:3]: Magic + [4]: VM format + * Index: pointers to each one of the sections, up to 8 + Each pointer: 4 bytes + Each count: 4 bytes + * [0x0] Constants indexes: all constants (such as strings) used in the code + * Table of 4-byte constant indexes with pointer to constant + * [0x1] Functions indexes: Pointer to functions within the code + [0:3]: function pointer + [4:5]: number of parameters + [6:7]: number of local variables + * [0x2] Constants raw data + * [0x3] Code: executable code + [1-byte]: operation + [variable]: operand (see value encoding below) + * [0x4] Debugging info + ??? + +The max file size is 2 Gb. + +## Values can be encoded in the following ways: + * The type is defined by the operator. + * Encoding varies according to the type: + int: use protobuf format + float: 4-bit floating point + string: int-defined length, followed by the string proper - no null terminator + * Constant indexes and function ids are encoded as ints diff --git a/doc/CODE_ORGANIZATION b/doc/CODE_ORGANIZATION deleted file mode 100644 index e69de29..0000000 diff --git a/doc/OPCODES b/doc/OPCODES index 09a3f5d..c365591 100644 --- a/doc/OPCODES +++ b/doc/OPCODES @@ -74,39 +74,6 @@ Error handling: (0xa0~0xaf) ??? -Bytecode format ---------------- - -The bytecode file is composed of the following sections: - - * 16-byte header - [0:3]: Magic - [4]: VM format - * Index: pointers to each one of the sections, up to 8 - Each pointer: 4 bytes - Each count: 4 bytes - * [0x0] Constants: all constants (such as strings) used in the code - * Table of 4-byte constant indexes with pointer to constant - * Raw constant data - * [0x1] Functions: Pointer to functions within the code - [0:3]: function pointer - [4:5]: number of parameters - [6:7]: number of local variables - * [0x2] Code: executable code - [1-byte]: operation - [variable]: operand (see value encoding below) - * [0x3] Debugging info - ??? - -The max file size is 2 Gb. - -## Values can be encoded in the following ways: - * The type is defined by the operator. - * Encoding varies according to the type: - int: use protobuf format - float: 4-bit floating point - string: int-defined length, followed by the string proper - no null terminator - * Constant indexes and function ids are encoded as ints Internal handling of values diff --git a/src/bytecode/bytecode.cc b/src/bytecode/bytecode.cc index c1232fe..8cbde31 100644 --- a/src/bytecode/bytecode.cc +++ b/src/bytecode/bytecode.cc @@ -17,9 +17,9 @@ Bytecode::Bytecode(ByteArray ba) throw BytecodeParsingError("Unexpected bytecode format version"); // load cache - cache_.constants_addr = byte_array_.get_uint32(HEADER_SZ); + cache_.constants_idx_addr = byte_array_.get_uint32(HEADER_SZ); cache_.n_constants = byte_array_.get_uint16(HEADER_SZ + 4); - cache_.functions_addr = byte_array_.get_uint32(HEADER_SZ + 6); + cache_.functions_idx_addr = byte_array_.get_uint32(HEADER_SZ + 6); cache_.n_functions = byte_array_.get_uint16(HEADER_SZ + 10); } @@ -35,7 +35,8 @@ uint32_t Bytecode::n_functions() const int32_t Bytecode::get_constant_int(uint32_t idx) const { - return 0; + uint32_t constant_idx = byte_array_.get_uint32(cache_.constants_idx_addr + (idx * 4)); + return byte_array_.get_int(constant_idx).first; } float Bytecode::get_constant_float(uint32_t idx) const @@ -108,6 +109,7 @@ ByteArray Bytecode::generate(BytecodePrototype const& bp) ba.set_uint32(idx, const_idx); idx += 4; } + uint32_t constants_raw_data_addr = ba.size(); ba.append_bytearray(constant_array); // constant index @@ -127,6 +129,7 @@ ByteArray Bytecode::generate(BytecodePrototype const& bp) idx += 8; code_idx += f.second; } + uint32_t functions_raw_data_addr = ba.size(); for (auto const& f: bp.functions) ba.append_bytearray(f.code); @@ -136,6 +139,14 @@ ByteArray Bytecode::generate(BytecodePrototype const& bp) ba.set_uint16(HEADER_SZ + 6 + 4, functions.size()); } + // constants raw data + if (!constant_table.empty()) + ba.set_uint32(HEADER_SZ + (2 * 6), constants_raw_data_addr); + + // function raw data + if (!functions.empty()) + ba.set_uint32(HEADER_SZ + (3 * 6), functions_raw_data_addr); + return ba; } diff --git a/src/bytecode/bytecode.hh b/src/bytecode/bytecode.hh index d26341d..b20caf9 100644 --- a/src/bytecode/bytecode.hh +++ b/src/bytecode/bytecode.hh @@ -38,10 +38,12 @@ private: // caching for faster reading of data struct Cache { - uint32_t constants_addr; + uint32_t constants_idx_addr; uint16_t n_constants; - uint32_t functions_addr; + uint32_t constants_start_addr; + uint32_t functions_idx_addr; uint32_t n_functions; + uint32_t functions_start_addr; }; Cache cache_ {}; }; diff --git a/src/bytecode/tests.cc b/src/bytecode/tests.cc index 009f525..b387f0f 100644 --- a/src/bytecode/tests.cc +++ b/src/bytecode/tests.cc @@ -69,7 +69,7 @@ TEST(Bytecode, Constants) // index 0x40, 0x00, 0x00, 0x00, 0x02, 0x00, // constants 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // functions - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -109,7 +109,7 @@ TEST(Bytecode, Code) 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // constants 0x40, 0x00, 0x00, 0x00, 0x02, 0x00, // variables 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -124,6 +124,7 @@ TEST(Bytecode, Code) }; ByteArray ba = Bytecode::generate(bp); + print(ba.data()); print(expected); ASSERT_EQ(ba.data(), expected); } @@ -145,13 +146,18 @@ TEST(Bytecode, Parsing) f2.code.append_byte(0x42); ByteArray ba = Bytecode::generate(bp); + print(ba.data()); // read bytecode - Bytecode bc2(std::move(ba)); + Bytecode bc(std::move(ba)); - ASSERT_EQ(bc2.n_constants(), 3); - ASSERT_EQ(bc2.n_functions(), 2); + ASSERT_EQ(bc.n_constants(), 3); + ASSERT_EQ(bc.n_functions(), 2); + + ASSERT_EQ(bc.get_constant_int(0), 42); + ASSERT_FLOAT_EQ(bc.get_constant_float(1), 3.14f); + ASSERT_EQ(bc.get_constant_string(2), "HELLO"); } int main(int argc, char** argv) -- 2.49.1 From 8f5f470eddda7291f7eccab169328f7d01f131ba Mon Sep 17 00:00:00 2001 From: Andre Wagner Date: Tue, 28 Apr 2026 16:51:19 -0500 Subject: [PATCH 14/20] . --- TODO.md | 1 + doc/BYTECODE | 21 +++--- src/bytecode/bytearray.cc | 10 ++- src/bytecode/bytearray.hh | 3 +- src/bytecode/bytecode.cc | 152 +++++++++++++++++++------------------- src/bytecode/bytecode.hh | 23 ++++-- src/bytecode/tests.cc | 55 ++++++++------ 7 files changed, 146 insertions(+), 119 deletions(-) diff --git a/TODO.md b/TODO.md index 88c7594..d2f4882 100644 --- a/TODO.md +++ b/TODO.md @@ -7,6 +7,7 @@ - [ ] Bytecode - Add/retrive all types of data - Keeps no memory except for caching + - [ ] Refactor bytecode code - [ ] Bytecode debugging info - [ ] Bytecode loader - Combine multiple chunks diff --git a/doc/BYTECODE b/doc/BYTECODE index c696aeb..3305954 100644 --- a/doc/BYTECODE +++ b/doc/BYTECODE @@ -3,22 +3,23 @@ Bytecode format The bytecode file is composed of the following sections: - * 16-byte header - [0:3]: Magic - [4]: VM format - * Index: pointers to each one of the sections, up to 8 - Each pointer: 4 bytes - Each count: 4 bytes - * [0x0] Constants indexes: all constants (such as strings) used in the code + * HEADER: 16-byte header + [0:3]: Magic + [4]: VM format + [rest]: Reserved for future use + * TABLE_OF_CONTENTS: list of 8 records pointing to each one of the sections + Each record (6 bytes): + - Pointer to section: 4 bytes + - Number of records in section: 2 bytes + * [0x0] Constants indexes: pointers to each of the constant locations * Table of 4-byte constant indexes with pointer to constant + (counter start at beginning of raw constants) * [0x1] Functions indexes: Pointer to functions within the code - [0:3]: function pointer + [0:3]: function pointer (counter start at the beginning of executable code) [4:5]: number of parameters [6:7]: number of local variables * [0x2] Constants raw data * [0x3] Code: executable code - [1-byte]: operation - [variable]: operand (see value encoding below) * [0x4] Debugging info ??? diff --git a/src/bytecode/bytearray.cc b/src/bytecode/bytearray.cc index c3c5a0c..fcd4858 100644 --- a/src/bytecode/bytearray.cc +++ b/src/bytecode/bytearray.cc @@ -55,6 +55,12 @@ void ByteArray::set_string(uint32_t addr, std::string const& str) set_byte(addr, 0); } +void ByteArray::set_bytearray(uint32_t addr, ByteArray const& bytearray) +{ + for (uint8_t byte: bytearray.data()) + set_byte(addr++, byte); +} + uint8_t ByteArray::get_byte(uint32_t addr) const { return data_.at(addr); @@ -90,7 +96,7 @@ std::pair ByteArray::get_int(uint32_t addr) const throw BytecodeParsingError("Error parsing int32 at position " + std::to_string(addr)); } -std::pair ByteArray::get_float(uint32_t addr) const +float ByteArray::get_float(uint32_t addr) const { uint32_t bits = (uint32_t) get_byte(addr) | (uint32_t) get_byte(addr+1) << 8 @@ -98,7 +104,7 @@ std::pair ByteArray::get_float(uint32_t addr) const | (uint32_t) get_byte(addr+3) << 24; float value; std::memcpy(&value, &bits, 4); - return { value, 4 }; + return value; } std::pair ByteArray::get_string(uint32_t addr) const diff --git a/src/bytecode/bytearray.hh b/src/bytecode/bytearray.hh index d1906ad..99984ce 100644 --- a/src/bytecode/bytearray.hh +++ b/src/bytecode/bytearray.hh @@ -19,6 +19,7 @@ public: void set_int(uint32_t addr, int32_t value); void set_float(uint32_t addr, float value); void set_string(uint32_t addr, std::string const& str); + void set_bytearray(uint32_t addr, ByteArray const& bytearray); void append_byte(uint8_t byte) { set_byte(data_.size(), byte); } void append_uint16(uint16_t value) { set_uint16(data_.size(), value); } @@ -32,7 +33,7 @@ public: [[nodiscard]] uint16_t get_uint16(uint32_t addr) const; [[nodiscard]] uint32_t get_uint32(uint32_t addr) const; [[nodiscard]] std::pair get_int(uint32_t addr) const; - [[nodiscard]] std::pair get_float(uint32_t addr) const; + [[nodiscard]] float get_float(uint32_t addr) const; [[nodiscard]] std::pair get_string(uint32_t addr) const; [[nodiscard]] std::vector const& data() const { return data_; } diff --git a/src/bytecode/bytecode.cc b/src/bytecode/bytecode.cc index 8cbde31..59c2336 100644 --- a/src/bytecode/bytecode.cc +++ b/src/bytecode/bytecode.cc @@ -7,20 +7,24 @@ Bytecode::Bytecode(ByteArray ba) : byte_array_(std::move(ba)) { // check file size - if (byte_array_.size() < (HEADER_SZ + INDEX_SZ)) + if (byte_array_.size() < (TOC_START + TOC_SZ)) throw BytecodeParsingError("Invalid bytecode format (file too short)"); // check magic number and version - if (byte_array_.get_uint32(0) != MAGIC) + if (byte_array_.get_uint32(0) != MAGIC_NUMBER) throw BytecodeParsingError("Invalid bytecode format (magic number not matching)"); - if (byte_array_.get_uint32(4) != VERSION) + if (byte_array_.get_uint32(4) != BYTECODE_VERSION) throw BytecodeParsingError("Unexpected bytecode format version"); // load cache - cache_.constants_idx_addr = byte_array_.get_uint32(HEADER_SZ); - cache_.n_constants = byte_array_.get_uint16(HEADER_SZ + 4); - cache_.functions_idx_addr = byte_array_.get_uint32(HEADER_SZ + 6); - cache_.n_functions = byte_array_.get_uint16(HEADER_SZ + 10); + cache_.constants_idx_addr = byte_array_.get_uint32(TOC_START); + cache_.n_constants = byte_array_.get_uint16(TOC_START + 4); + cache_.constants_start_addr = byte_array_.get_uint32(TOC_START + (6 * 2)); + cache_.functions_idx_addr = byte_array_.get_uint32(TOC_START + 6); + cache_.n_functions = byte_array_.get_uint16(TOC_START + 10); + uint32_t code_start = byte_array_.get_uint32(TOC_START + (6 * 3)); + for (uint32_t i = 0; i < cache_.n_functions; ++i) + cache_.function_addr.emplace_back(code_start + byte_array_.get_uint32(cache_.functions_idx_addr + (i * 8))); } uint32_t Bytecode::n_constants() const @@ -36,117 +40,111 @@ uint32_t Bytecode::n_functions() const int32_t Bytecode::get_constant_int(uint32_t idx) const { uint32_t constant_idx = byte_array_.get_uint32(cache_.constants_idx_addr + (idx * 4)); - return byte_array_.get_int(constant_idx).first; + return byte_array_.get_int(cache_.constants_start_addr + constant_idx).first; } float Bytecode::get_constant_float(uint32_t idx) const { - return 0; + uint32_t constant_idx = byte_array_.get_uint32(cache_.constants_idx_addr + (idx * 4)); + return byte_array_.get_float(cache_.constants_start_addr + constant_idx); } std::string Bytecode::get_constant_string(uint32_t idx) const { - return std::string(); + uint32_t constant_idx = byte_array_.get_uint32(cache_.constants_idx_addr + (idx * 4)); + return byte_array_.get_string(cache_.constants_start_addr + constant_idx).first; } Bytecode::FunctionDef Bytecode::get_function_def(uint32_t function_id) const { - return Bytecode::FunctionDef(); + uint32_t idx = cache_.functions_idx_addr + (function_id * 8); + return { + .n_params = byte_array_.get_uint16(idx + 4), + .locals = byte_array_.get_uint16(idx + 6), + }; } uint8_t Bytecode::get_code_byte(uint32_t function_id, uint32_t idx) const { - return 0; + return byte_array_.get_byte(cache_.function_addr.at(function_id) + idx); } -int32_t Bytecode::get_code_int(uint32_t function_id, uint32_t idx) const +std::pair Bytecode::get_code_int(uint32_t function_id, uint32_t idx) const { - return 0; + return byte_array_.get_int(cache_.function_addr.at(function_id) + idx); } float Bytecode::get_code_float(uint32_t function_id, uint32_t idx) const { - return 0; + return byte_array_.get_float(cache_.function_addr.at(function_id) + idx); } ByteArray Bytecode::generate(BytecodePrototype const& bp) { - ByteArray ba; + // header section + ByteArray header; + header.set_uint32(0, MAGIC_NUMBER); + header.set_byte(4, BYTECODE_VERSION); // constants - std::vector constant_table; - ByteArray constant_array; + ByteArray constant_indexes; + ByteArray raw_constants; + uint32_t idx = 0; for (auto const& constant: bp.constants) { - constant_table.emplace_back(idx); + constant_indexes.append_uint32(idx); std::visit(overloaded { - [&](int32_t i) { constant_array.append_int(i); }, - [&](float f) { constant_array.append_float(f); }, - [&](std::string const& s) { constant_array.append_string(s); }, + [&](int32_t i) { raw_constants.append_int(i); }, + [&](float f) { raw_constants.append_float(f); }, + [&](std::string const& s) { raw_constants.append_string(s); }, }, constant); - idx = constant_array.size(); - } - - // function table - std::vector> functions; - ByteArray code; - for (auto const& f: bp.functions) { - code.append_bytearray(f.code); - functions.emplace_back(std::make_pair(FunctionDef { f.n_pars, f.n_locals }, code.size())); - } - - // - // build binary - // - - // header - ba.set_uint32(0, MAGIC); - ba.set_byte(4, VERSION); - - // constants - idx = HEADER_SZ + INDEX_SZ; - for (auto const& const_idx: constant_table) { - ba.set_uint32(idx, const_idx); - idx += 4; - } - uint32_t constants_raw_data_addr = ba.size(); - ba.append_bytearray(constant_array); - - // constant index - if (!constant_table.empty()) { - ba.set_uint32(HEADER_SZ, HEADER_SZ + INDEX_SZ); - ba.set_uint16(HEADER_SZ + 4, constant_table.size()); + idx = raw_constants.size(); } // functions - size_t functions_start = idx + (constant_table.size() * 4); - idx = functions_start; - uint32_t code_idx = 0; - for (auto const& f: functions) { - ba.set_uint32(idx, code_idx); - ba.set_uint16(idx + 4, f.first.n_params); - ba.set_uint16(idx + 6, f.first.locals); - idx += 8; - code_idx += f.second; - } - uint32_t functions_raw_data_addr = ba.size(); - for (auto const& f: bp.functions) - ba.append_bytearray(f.code); + ByteArray functions_indexes; + ByteArray raw_code; - // function index - if (!functions.empty()) { - ba.set_uint32(HEADER_SZ + 6, functions_start); - ba.set_uint16(HEADER_SZ + 6 + 4, functions.size()); + uint32_t idx_idx = 0, code_idx = 0; + for (auto const& f: bp.functions) { + functions_indexes.set_uint32(idx_idx, code_idx); + functions_indexes.set_uint16(idx_idx + 4, f.n_pars); + functions_indexes.set_uint16(idx_idx + 6, f.n_locals); + raw_code.append_bytearray(f.code); + code_idx = raw_code.size(); + idx_idx += FUNCTION_RECORD_SZ; } - // constants raw data - if (!constant_table.empty()) - ba.set_uint32(HEADER_SZ + (2 * 6), constants_raw_data_addr); + // table of contents + uint32_t function_idx_start = CONST_IDX_START + constant_indexes.size(); + uint32_t raw_constant_start = function_idx_start + functions_indexes.size(); + uint32_t raw_code_start = raw_constant_start + raw_constants.size(); - // function raw data - if (!functions.empty()) - ba.set_uint32(HEADER_SZ + (3 * 6), functions_raw_data_addr); + ByteArray toc; + if (!bp.constants.empty()) { + toc.set_uint32(SEC_CONST_IDX * TOC_RECORD_SZ, CONST_IDX_START); + toc.set_uint32(SEC_CONST_IDX * TOC_RECORD_SZ + 4, constant_indexes.size() / CONST_RECORD_SZ); + toc.set_uint32(SEC_CONST_DATA * TOC_RECORD_SZ, raw_constant_start); + toc.set_uint32(SEC_CONST_DATA * TOC_RECORD_SZ + 4, raw_constants.size()); + } + if (!bp.functions.empty()) { + toc.set_uint32(SEC_FUNC_IDX * TOC_RECORD_SZ, function_idx_start); + toc.set_uint32(SEC_FUNC_IDX * TOC_RECORD_SZ + 4, functions_indexes.size() / FUNCTION_RECORD_SZ); + toc.set_uint32(SEC_CODE * TOC_RECORD_SZ, raw_code_start); + toc.set_uint32(SEC_CODE * TOC_RECORD_SZ + 4, raw_code.size()); + } + // + // assemble bytecode + // + + ByteArray ba; + ba.set_bytearray(0, header); + ba.set_bytearray(TOC_START, toc); + ba.set_bytearray(CONST_IDX_START, constant_indexes); + ba.set_bytearray(function_idx_start, functions_indexes); + ba.set_bytearray(raw_constant_start, raw_constants); + ba.set_bytearray(raw_code_start, raw_code); return ba; } diff --git a/src/bytecode/bytecode.hh b/src/bytecode/bytecode.hh index b20caf9..f9ba207 100644 --- a/src/bytecode/bytecode.hh +++ b/src/bytecode/bytecode.hh @@ -20,9 +20,9 @@ public: struct FunctionDef { uint16_t n_params, locals; }; [[nodiscard]] FunctionDef get_function_def(uint32_t function_id) const; - [[nodiscard]] uint8_t get_code_byte(uint32_t function_id, uint32_t idx) const; - [[nodiscard]] int32_t get_code_int(uint32_t function_id, uint32_t idx) const; - [[nodiscard]] float get_code_float(uint32_t function_id, uint32_t idx) const; + [[nodiscard]] uint8_t get_code_byte(uint32_t function_id, uint32_t idx) const; + [[nodiscard]] std::pair get_code_int(uint32_t function_id, uint32_t idx) const; + [[nodiscard]] float get_code_float(uint32_t function_id, uint32_t idx) const; // TODO - debugging info @@ -31,10 +31,17 @@ public: private: ByteArray byte_array_; // the actual data - static constexpr uint8_t VERSION = 1; - static constexpr uint32_t MAGIC = 0x74b3c138; - static constexpr uint32_t HEADER_SZ = 16, - INDEX_SZ = 8 * 6; + static constexpr uint8_t BYTECODE_VERSION = 1; + static constexpr uint32_t MAGIC_NUMBER = 0x74b3c138; + static constexpr uint32_t TOC_START = 16, + TOC_N_RECORDS = 8, + TOC_RECORD_SZ = 8, + TOC_SZ = TOC_N_RECORDS * TOC_RECORD_SZ; + static constexpr uint32_t CONST_IDX_START = TOC_START + TOC_SZ, + CONST_RECORD_SZ = 4; + static constexpr uint32_t FUNCTION_RECORD_SZ = 8; + + enum Sections { SEC_CONST_IDX = 0, SEC_FUNC_IDX = 1, SEC_CONST_DATA = 2, SEC_CODE = 3 }; // caching for faster reading of data struct Cache { @@ -43,7 +50,7 @@ private: uint32_t constants_start_addr; uint32_t functions_idx_addr; uint32_t n_functions; - uint32_t functions_start_addr; + std::vector function_addr; }; Cache cache_ {}; }; diff --git a/src/bytecode/tests.cc b/src/bytecode/tests.cc index b387f0f..cdedf1c 100644 --- a/src/bytecode/tests.cc +++ b/src/bytecode/tests.cc @@ -44,9 +44,9 @@ TEST(ByteArray, ByteArray) ba.set_int(1, 5000300); ASSERT_EQ(ba.get_int(1), std::make_pair(5000300, 4)); ba.set_int(1, -5000300); ASSERT_EQ(ba.get_int(1), std::make_pair(-5000300, 4)); - ba.set_float(1, 3.14); ASSERT_FLOAT_EQ(ba.get_float(1).first, 3.14); - ba.set_float(1, -3.14); ASSERT_FLOAT_EQ(ba.get_float(1).first, -3.14); - ba.set_float(1, -5000300.1324); ASSERT_FLOAT_EQ(ba.get_float(1).first, -5000300.1324); + ba.set_float(1, 3.14); ASSERT_FLOAT_EQ(ba.get_float(1), 3.14); + ba.set_float(1, -3.14); ASSERT_FLOAT_EQ(ba.get_float(1), -3.14); + ba.set_float(1, -5000300.1324); ASSERT_FLOAT_EQ(ba.get_float(1), -5000300.1324); ba.set_string(1, "Hello world!"); ASSERT_EQ(ba.get_string(1), std::make_pair("Hello world!", 13)); @@ -67,14 +67,14 @@ TEST(Bytecode, Constants) 0x00, 0x00, 0x00, 0x00, // index - 0x40, 0x00, 0x00, 0x00, 0x02, 0x00, // constants - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // functions - 0x48, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x50, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, // constant index + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // function undex + 0x58, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, // raw constants + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // raw code + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // constant indexes 0x00, 0x00, 0x00, 0x00, @@ -85,6 +85,7 @@ TEST(Bytecode, Constants) }; ByteArray ba = Bytecode::generate(bp); + print(ba.data()); print(expected); ASSERT_EQ(ba.data(), expected); } @@ -106,14 +107,14 @@ TEST(Bytecode, Code) 0x00, 0x00, 0x00, 0x00, // index - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // constants - 0x40, 0x00, 0x00, 0x00, 0x02, 0x00, // variables - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // constant index + 0x50, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, // variable index + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // raw constants + 0x60, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, // raw code + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // function definitions 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -142,8 +143,8 @@ TEST(Bytecode, Parsing) f.code.append_byte(0x68); f.code.append_int(42); - auto& f2 = bp.functions.emplace_back(2, 1); - f2.code.append_byte(0x42); + auto& ff = bp.functions.emplace_back(2, 1); + ff.code.append_byte(0x42); ByteArray ba = Bytecode::generate(bp); print(ba.data()); @@ -158,6 +159,18 @@ TEST(Bytecode, Parsing) ASSERT_EQ(bc.get_constant_int(0), 42); ASSERT_FLOAT_EQ(bc.get_constant_float(1), 3.14f); ASSERT_EQ(bc.get_constant_string(2), "HELLO"); + + Bytecode::FunctionDef f1 = bc.get_function_def(0); + ASSERT_EQ(f1.n_params, 0); + ASSERT_EQ(f1.locals, 0); + + Bytecode::FunctionDef f2 = bc.get_function_def(1); + ASSERT_EQ(f2.n_params, 2); + ASSERT_EQ(f2.locals, 1); + + ASSERT_EQ(bc.get_code_byte(0, 0), 0x68); + ASSERT_EQ(bc.get_code_int(0, 1), std::make_pair(42, 1)); + ASSERT_EQ(bc.get_code_byte(1, 0), 0x42); } int main(int argc, char** argv) -- 2.49.1 From a91e65dc840c49eca1901ae626c27f89ec1da3ec Mon Sep 17 00:00:00 2001 From: Andre Wagner Date: Tue, 28 Apr 2026 17:14:35 -0500 Subject: [PATCH 15/20] . --- src/bytecode/bytecode.cc | 25 ++++++++++++++++--------- src/bytecode/bytecode.hh | 2 ++ src/bytecode/tests.cc | 6 +++--- 3 files changed, 21 insertions(+), 12 deletions(-) diff --git a/src/bytecode/bytecode.cc b/src/bytecode/bytecode.cc index 59c2336..563b536 100644 --- a/src/bytecode/bytecode.cc +++ b/src/bytecode/bytecode.cc @@ -19,12 +19,14 @@ Bytecode::Bytecode(ByteArray ba) // load cache cache_.constants_idx_addr = byte_array_.get_uint32(TOC_START); cache_.n_constants = byte_array_.get_uint16(TOC_START + 4); - cache_.constants_start_addr = byte_array_.get_uint32(TOC_START + (6 * 2)); - cache_.functions_idx_addr = byte_array_.get_uint32(TOC_START + 6); - cache_.n_functions = byte_array_.get_uint16(TOC_START + 10); - uint32_t code_start = byte_array_.get_uint32(TOC_START + (6 * 3)); - for (uint32_t i = 0; i < cache_.n_functions; ++i) + cache_.functions_idx_addr = byte_array_.get_uint32(TOC_START + (1 * TOC_RECORD_SZ)); + cache_.n_functions = byte_array_.get_uint16(TOC_START + (1 * TOC_RECORD_SZ) + 4); + cache_.constants_start_addr = byte_array_.get_uint32(TOC_START + (2 * TOC_RECORD_SZ)); + uint32_t code_start = byte_array_.get_uint32(TOC_START + (3 * TOC_RECORD_SZ)); + for (uint32_t i = 0; i < cache_.n_functions; ++i) { cache_.function_addr.emplace_back(code_start + byte_array_.get_uint32(cache_.functions_idx_addr + (i * 8))); + cache_.function_sz.emplace_back(0); // TODO + } } uint32_t Bytecode::n_constants() const @@ -39,31 +41,36 @@ uint32_t Bytecode::n_functions() const int32_t Bytecode::get_constant_int(uint32_t idx) const { - uint32_t constant_idx = byte_array_.get_uint32(cache_.constants_idx_addr + (idx * 4)); + uint32_t constant_idx = byte_array_.get_uint32(cache_.constants_idx_addr + (idx * CONST_RECORD_SZ)); return byte_array_.get_int(cache_.constants_start_addr + constant_idx).first; } float Bytecode::get_constant_float(uint32_t idx) const { - uint32_t constant_idx = byte_array_.get_uint32(cache_.constants_idx_addr + (idx * 4)); + uint32_t constant_idx = byte_array_.get_uint32(cache_.constants_idx_addr + (idx * CONST_RECORD_SZ)); return byte_array_.get_float(cache_.constants_start_addr + constant_idx); } std::string Bytecode::get_constant_string(uint32_t idx) const { - uint32_t constant_idx = byte_array_.get_uint32(cache_.constants_idx_addr + (idx * 4)); + uint32_t constant_idx = byte_array_.get_uint32(cache_.constants_idx_addr + (idx * CONST_RECORD_SZ)); return byte_array_.get_string(cache_.constants_start_addr + constant_idx).first; } Bytecode::FunctionDef Bytecode::get_function_def(uint32_t function_id) const { - uint32_t idx = cache_.functions_idx_addr + (function_id * 8); + uint32_t idx = cache_.functions_idx_addr + (function_id * FUNCTION_RECORD_SZ); return { .n_params = byte_array_.get_uint16(idx + 4), .locals = byte_array_.get_uint16(idx + 6), }; } +uint32_t Bytecode::get_function_sz(uint32_t function_id) const +{ + return cache_.function_sz.at(function_id); +} + uint8_t Bytecode::get_code_byte(uint32_t function_id, uint32_t idx) const { return byte_array_.get_byte(cache_.function_addr.at(function_id) + idx); diff --git a/src/bytecode/bytecode.hh b/src/bytecode/bytecode.hh index f9ba207..8ba8fe2 100644 --- a/src/bytecode/bytecode.hh +++ b/src/bytecode/bytecode.hh @@ -19,6 +19,7 @@ public: struct FunctionDef { uint16_t n_params, locals; }; [[nodiscard]] FunctionDef get_function_def(uint32_t function_id) const; + [[nodiscard]] uint32_t get_function_sz(uint32_t function_id) const; [[nodiscard]] uint8_t get_code_byte(uint32_t function_id, uint32_t idx) const; [[nodiscard]] std::pair get_code_int(uint32_t function_id, uint32_t idx) const; @@ -51,6 +52,7 @@ private: uint32_t functions_idx_addr; uint32_t n_functions; std::vector function_addr; + std::vector function_sz; }; Cache cache_ {}; }; diff --git a/src/bytecode/tests.cc b/src/bytecode/tests.cc index cdedf1c..29c5ce9 100644 --- a/src/bytecode/tests.cc +++ b/src/bytecode/tests.cc @@ -85,7 +85,7 @@ TEST(Bytecode, Constants) }; ByteArray ba = Bytecode::generate(bp); - print(ba.data()); print(expected); + // print(ba.data()); print(expected); ASSERT_EQ(ba.data(), expected); } @@ -125,7 +125,7 @@ TEST(Bytecode, Code) }; ByteArray ba = Bytecode::generate(bp); - print(ba.data()); print(expected); + // print(ba.data()); print(expected); ASSERT_EQ(ba.data(), expected); } @@ -147,7 +147,7 @@ TEST(Bytecode, Parsing) ff.code.append_byte(0x42); ByteArray ba = Bytecode::generate(bp); - print(ba.data()); + // print(ba.data()); // read bytecode -- 2.49.1 From 9092a7ea6459bbb8f5f84999184d48a35b15acc9 Mon Sep 17 00:00:00 2001 From: Andre Wagner Date: Tue, 28 Apr 2026 17:19:05 -0500 Subject: [PATCH 16/20] . --- doc/BYTECODE | 1 + src/bytecode/bytecode.cc | 5 +++-- src/bytecode/bytecode.hh | 2 +- src/bytecode/tests.cc | 6 +++--- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/doc/BYTECODE b/doc/BYTECODE index 3305954..d0d6571 100644 --- a/doc/BYTECODE +++ b/doc/BYTECODE @@ -18,6 +18,7 @@ The bytecode file is composed of the following sections: [0:3]: function pointer (counter start at the beginning of executable code) [4:5]: number of parameters [6:7]: number of local variables + [8:b]: function size * [0x2] Constants raw data * [0x3] Code: executable code * [0x4] Debugging info diff --git a/src/bytecode/bytecode.cc b/src/bytecode/bytecode.cc index 563b536..b5a3c5c 100644 --- a/src/bytecode/bytecode.cc +++ b/src/bytecode/bytecode.cc @@ -24,8 +24,8 @@ Bytecode::Bytecode(ByteArray ba) cache_.constants_start_addr = byte_array_.get_uint32(TOC_START + (2 * TOC_RECORD_SZ)); uint32_t code_start = byte_array_.get_uint32(TOC_START + (3 * TOC_RECORD_SZ)); for (uint32_t i = 0; i < cache_.n_functions; ++i) { - cache_.function_addr.emplace_back(code_start + byte_array_.get_uint32(cache_.functions_idx_addr + (i * 8))); - cache_.function_sz.emplace_back(0); // TODO + cache_.function_addr.emplace_back(code_start + byte_array_.get_uint32(cache_.functions_idx_addr + (i * FUNCTION_RECORD_SZ))); + cache_.function_sz.emplace_back(code_start + byte_array_.get_uint32(cache_.functions_idx_addr + (i * FUNCTION_RECORD_SZ) + 8)); } } @@ -117,6 +117,7 @@ ByteArray Bytecode::generate(BytecodePrototype const& bp) functions_indexes.set_uint32(idx_idx, code_idx); functions_indexes.set_uint16(idx_idx + 4, f.n_pars); functions_indexes.set_uint16(idx_idx + 6, f.n_locals); + functions_indexes.set_uint32(idx_idx + 8, f.code.size()); raw_code.append_bytearray(f.code); code_idx = raw_code.size(); idx_idx += FUNCTION_RECORD_SZ; diff --git a/src/bytecode/bytecode.hh b/src/bytecode/bytecode.hh index 8ba8fe2..fa3dc98 100644 --- a/src/bytecode/bytecode.hh +++ b/src/bytecode/bytecode.hh @@ -40,7 +40,7 @@ private: TOC_SZ = TOC_N_RECORDS * TOC_RECORD_SZ; static constexpr uint32_t CONST_IDX_START = TOC_START + TOC_SZ, CONST_RECORD_SZ = 4; - static constexpr uint32_t FUNCTION_RECORD_SZ = 8; + static constexpr uint32_t FUNCTION_RECORD_SZ = 12; enum Sections { SEC_CONST_IDX = 0, SEC_FUNC_IDX = 1, SEC_CONST_DATA = 2, SEC_CODE = 3 }; diff --git a/src/bytecode/tests.cc b/src/bytecode/tests.cc index 29c5ce9..435ce45 100644 --- a/src/bytecode/tests.cc +++ b/src/bytecode/tests.cc @@ -110,15 +110,15 @@ TEST(Bytecode, Code) 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // constant index 0x50, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, // variable index 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // raw constants - 0x60, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, // raw code + 0x68, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, // raw code 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // function definitions - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, // code 0x68, 0x54, 0x42, -- 2.49.1 From c344b865093746c871930f5e238780d4e61c1b68 Mon Sep 17 00:00:00 2001 From: Andre Wagner Date: Tue, 28 Apr 2026 17:22:28 -0500 Subject: [PATCH 17/20] . --- TODO.md | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/TODO.md b/TODO.md index d2f4882..38d1a97 100644 --- a/TODO.md +++ b/TODO.md @@ -4,11 +4,26 @@ - Auto-expand - Add/retrive byte/int/float/string - Should not be larger than the byte array itself -- [ ] Bytecode +- [x] Bytecode - Add/retrive all types of data - Keeps no memory except for caching - - [ ] Refactor bytecode code + - [x] Refactor bytecode code + +After some additional development: - [ ] Bytecode debugging info + + +## VM + +- [ ] VM + - [ ] Code + - [ ] Simple bytecode loader + - [ ] Output bytecode format + - [ ] Value object + - [ ] Stack object + - [ ] Function context + +After some additional development: - [ ] Bytecode loader - Combine multiple chunks - Resolve function ids, constant ids, etc \ No newline at end of file -- 2.49.1 From d721adad3bec4eaa2744d5df81be94c8ee6b0534 Mon Sep 17 00:00:00 2001 From: Andre Wagner Date: Tue, 28 Apr 2026 17:24:26 -0500 Subject: [PATCH 18/20] . --- TODO.md | 1 + 1 file changed, 1 insertion(+) diff --git a/TODO.md b/TODO.md index 38d1a97..7196b66 100644 --- a/TODO.md +++ b/TODO.md @@ -8,6 +8,7 @@ - Add/retrive all types of data - Keeps no memory except for caching - [x] Refactor bytecode code +- [ ] Output bytecode format After some additional development: - [ ] Bytecode debugging info -- 2.49.1 From 9d4cec0553c177dcb5e00a563378a53428a89e24 Mon Sep 17 00:00:00 2001 From: Andre Wagner Date: Tue, 28 Apr 2026 18:40:26 -0500 Subject: [PATCH 19/20] . --- doc/OPCODES | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/OPCODES b/doc/OPCODES index c365591..fd38433 100644 --- a/doc/OPCODES +++ b/doc/OPCODES @@ -4,7 +4,6 @@ Operations Stack operations: (0x00~0x1f) pushn [int] Push int pushr [float] Push float (real) - pushs [string] Push string pshcn [index] Push int from constant list pshcr [index] Push float from constant list pshcs [index] Push string from constant list -- 2.49.1 From be4ee63453d41635a3ca41514d39bdaa391eb0a4 Mon Sep 17 00:00:00 2001 From: Andre Wagner Date: Tue, 28 Apr 2026 19:47:11 -0500 Subject: [PATCH 20/20] . --- src/bytecode/bytecodeprototype.hh | 1 - src/bytecode/tests.cc | 13 ------------- 2 files changed, 14 deletions(-) diff --git a/src/bytecode/bytecodeprototype.hh b/src/bytecode/bytecodeprototype.hh index 682986e..0a4a6bf 100644 --- a/src/bytecode/bytecodeprototype.hh +++ b/src/bytecode/bytecodeprototype.hh @@ -10,7 +10,6 @@ namespace tyche { struct BytecodePrototype { using ConstantValue = std::variant; - using Value = std::variant; struct Function { uint16_t n_pars; diff --git a/src/bytecode/tests.cc b/src/bytecode/tests.cc index 435ce45..a696065 100644 --- a/src/bytecode/tests.cc +++ b/src/bytecode/tests.cc @@ -10,18 +10,6 @@ using namespace tyche; -static void print(std::vector const& data) -{ - for (size_t i = 0; i < data.size(); ++i) { - if (i % 16 == 0) - printf("%04X : ", i); - printf("%02X ", data.at(i)); - if (i % 16 == 15) - printf("\n"); - } - printf("\n"); -} - TEST(ByteArray, ByteArray) { auto test = [](std::function const& f, std::vector const& expected) { @@ -125,7 +113,6 @@ TEST(Bytecode, Code) }; ByteArray ba = Bytecode::generate(bp); - // print(ba.data()); print(expected); ASSERT_EQ(ba.data(), expected); } -- 2.49.1