From 960cc7600559299c6c6586ae7e6fd13e48a07153 Mon Sep 17 00:00:00 2001 From: Andre Wagner Date: Mon, 27 Apr 2026 20:38:49 -0500 Subject: [PATCH] . --- doc/OPCODES | 12 +++++----- src/bytecode/bytearray.cc | 12 ++++++++++ src/bytecode/bytearray.hh | 3 +++ src/bytecode/bytecode.cc | 31 +++++++++++++++++++++--- src/bytecode/tests.cc | 50 ++++++++++++++++++++++++++++++++------- 5 files changed, 90 insertions(+), 18 deletions(-) diff --git a/doc/OPCODES b/doc/OPCODES index 4edbf82..b024f7e 100644 --- a/doc/OPCODES +++ b/doc/OPCODES @@ -79,22 +79,22 @@ Bytecode format The bytecode file is composed of the following sections: - * [0x0] 16-byte header + * 16-byte header [0:3]: Magic [4]: VM format - * [0x1] Index: pointers to each one of the sections, up to 8 + * Index: pointers to each one of the sections, up to 8 Each pointer: 4 bytes - * [0x2] Constants: all constants (such as strings) used in the code + * [0x0] Constants: all constants (such as strings) used in the code * Table of 4-byte constant indexes with pointer to constant * Raw constant data - * [0x3] Functions: Pointer to functions within the code + * [0x1] Functions: Pointer to functions within the code [0:3]: function pointer [4:5]: number of parameters [6:7]: number of local variables - * [0x4] Code: executable code + * [0x2] Code: executable code [1-byte]: operation [variable]: operand (see value encoding below) - * [0x5] Debugging info + * [0x3] Debugging info ??? The max file size is 2 Gb. diff --git a/src/bytecode/bytearray.cc b/src/bytecode/bytearray.cc index 6b06f1e..c3c5a0c 100644 --- a/src/bytecode/bytearray.cc +++ b/src/bytecode/bytearray.cc @@ -24,6 +24,12 @@ void ByteArray::set_int(uint32_t addr, int32_t value) set_byte(addr, zz & 0x7F); } +void ByteArray::set_uint16(uint32_t addr, uint16_t value) +{ + set_byte(addr, (uint8_t) (value)); + set_byte(addr+1, (uint8_t) (value >> 8)); +} + void ByteArray::set_uint32(uint32_t addr, uint32_t value) { set_byte(addr, (uint8_t) (value)); @@ -54,6 +60,12 @@ uint8_t ByteArray::get_byte(uint32_t addr) const return data_.at(addr); } +uint16_t ByteArray::get_uint16(uint32_t addr) const +{ + return (uint32_t) get_byte(addr) + | (uint32_t) get_byte(addr+1) << 8; +} + uint32_t ByteArray::get_uint32(uint32_t addr) const { return (uint32_t) get_byte(addr) diff --git a/src/bytecode/bytearray.hh b/src/bytecode/bytearray.hh index c0f97da..96e8311 100644 --- a/src/bytecode/bytearray.hh +++ b/src/bytecode/bytearray.hh @@ -14,12 +14,14 @@ public: explicit ByteArray(std::vector data) : data_(std::move(data)) {} void set_byte(uint32_t addr, uint8_t byte); + void set_uint16(uint32_t addr, uint16_t value); void set_uint32(uint32_t addr, uint32_t value); void set_int(uint32_t addr, int32_t value); void set_float(uint32_t addr, float value); void set_string(uint32_t addr, std::string const& str); void append_byte(uint8_t byte) { set_byte(data_.size(), byte); } + void append_uint16(uint16_t value) { set_uint16(data_.size(), value); } void append_uint32(uint32_t value) { set_uint32(data_.size(), value); } void append_int(int32_t value) { set_int(data_.size(), value); } void append_float(float value) { set_float(data_.size(), value); } @@ -27,6 +29,7 @@ public: void append_bytearray(ByteArray const& bytearray); [[nodiscard]] uint8_t get_byte(uint32_t addr) const; + [[nodiscard]] uint16_t get_uint16(uint32_t addr) const; [[nodiscard]] uint32_t get_uint32(uint32_t addr) const; [[nodiscard]] std::pair get_int(uint32_t addr) const; [[nodiscard]] std::pair get_float(uint32_t addr) const; diff --git a/src/bytecode/bytecode.cc b/src/bytecode/bytecode.cc index 05548c5..65fde0b 100644 --- a/src/bytecode/bytecode.cc +++ b/src/bytecode/bytecode.cc @@ -20,6 +20,12 @@ Bytecode::Bytecode(BytecodePrototype const& bp) } // function table + std::vector> functions; + ByteArray code; + for (auto const& f: bp.functions) { + functions.emplace_back(std::make_pair(FunctionDef { f.n_pars, f.n_locals }, code.size())); + code.append_bytearray(f.code); + } // // build binary @@ -29,9 +35,6 @@ Bytecode::Bytecode(BytecodePrototype const& bp) byte_array_.set_uint32(0, MAGIC); byte_array_.set_byte(4, VERSION); - // index - other entries created later - byte_array_.set_uint32(HEADER_SZ, HEADER_SZ + INDEX_SZ); - // constants idx = HEADER_SZ + INDEX_SZ; for (auto const& const_idx: constant_table) { @@ -39,6 +42,28 @@ Bytecode::Bytecode(BytecodePrototype const& bp) idx += 4; } byte_array_.append_bytearray(constant_array); + + // constant index + if (!constant_table.empty()) + byte_array_.set_uint32(HEADER_SZ, HEADER_SZ + INDEX_SZ); + + // functions + size_t functions_start = idx + (constant_table.size() * 4) + byte_array_.size(); + idx += functions_start; + uint32_t code_idx = 0; + for (auto const& f: functions) { + byte_array_.set_uint32(idx, code_idx); + byte_array_.set_uint16(idx + 4, f.first.n_params); + byte_array_.set_uint16(idx + 6, f.first.n_params); + idx += 8; + code_idx += f.second; + } + for (auto const& f: bp.functions) + byte_array_.append_bytearray(f.code); + + // function index + if (!functions.empty()) + byte_array_.set_uint32(HEADER_SZ + 4, functions_start); } uint32_t Bytecode::n_constants() const diff --git a/src/bytecode/tests.cc b/src/bytecode/tests.cc index 4547771..c4aeb7f 100644 --- a/src/bytecode/tests.cc +++ b/src/bytecode/tests.cc @@ -47,17 +47,12 @@ TEST(Bytecode, Constants) bp.constants.emplace_back(42); bp.constants.emplace_back("HELLO"); - Bytecode bc(bp); - auto binary = bc.data(); - - ByteArray ba; - ba.append_int(42); - std::vector expected = { // header 0x38, 0xc1, 0xb3, 0x74, // magic 0x01, 0x00, 0x00, 0x00, // version - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, // index 0x30, 0x00, 0x00, 0x00, @@ -74,10 +69,47 @@ TEST(Bytecode, Constants) 0x01, 0x00, 0x00, 0x00, // constant values - 0x54, 'H', 'e', 'l', 'l', 'o', 0x00 + 0x54, 'H', 'E', 'L', 'L', 'O', 0x00 }; - ASSERT_EQ(binary, expected); + Bytecode bc(bp); + ASSERT_EQ(bc.data(), expected); +} + +TEST(Bytecode, Code) +{ + BytecodePrototype bp; + auto& f = bp.functions.emplace_back(0, 0); + f.code.append_byte(0x68); + f.code.append_int(42); + + f = bp.functions.emplace_back(2, 1); + f.code.append_byte(0x42); + + std::vector expected = { + // header + 0x38, 0xc1, 0xb3, 0x74, // magic + 0x01, 0x00, 0x00, 0x00, // version + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + // index + 0x00, 0x00, 0x00, 0x00, + 0x30, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + // function definitions + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, + + // code + 0x68, 0x54, 0x42, + }; } int main(int argc, char** argv)