diff --git a/CMakeLists.txt b/CMakeLists.txt index 158f32b..a5f5f06 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,6 +64,7 @@ add_library(lib${PROJECT_NAME} STATIC src/bytecode/bytecode.cc src/bytecode/bytecode.hh src/bytecode/bytecodeprototype.hh + src/common/overloaded.hh ) target_compile_options(lib${PROJECT_NAME} PRIVATE ${warnings}) @@ -72,8 +73,7 @@ target_compile_options(lib${PROJECT_NAME} PRIVATE ${warnings}) # tests # -add_executable(${PROJECT_NAME}-bytecode-test src/bytecode/tests.cc - common/overloaded.hh) +add_executable(${PROJECT_NAME}-bytecode-test src/bytecode/tests.cc) target_link_libraries(${PROJECT_NAME}-bytecode-test lib${PROJECT_NAME} gtest_main) add_test(NAME tyche_bytecode_test COMMAND ${PROJECT_NAME}-bytecode-test) diff --git a/doc/CODE_ORGANIZATION b/doc/CODE_ORGANIZATION new file mode 100644 index 0000000..e69de29 diff --git a/doc/OPCODES b/doc/OPCODES index b024f7e..09a3f5d 100644 --- a/doc/OPCODES +++ b/doc/OPCODES @@ -84,6 +84,7 @@ The bytecode file is composed of the following sections: [4]: VM format * Index: pointers to each one of the sections, up to 8 Each pointer: 4 bytes + Each count: 4 bytes * [0x0] Constants: all constants (such as strings) used in the code * Table of 4-byte constant indexes with pointer to constant * Raw constant data diff --git a/src/bytecode/bytecode.cc b/src/bytecode/bytecode.cc index 65fde0b..1158928 100644 --- a/src/bytecode/bytecode.cc +++ b/src/bytecode/bytecode.cc @@ -44,8 +44,10 @@ Bytecode::Bytecode(BytecodePrototype const& bp) byte_array_.append_bytearray(constant_array); // constant index - if (!constant_table.empty()) + if (!constant_table.empty()) { byte_array_.set_uint32(HEADER_SZ, HEADER_SZ + INDEX_SZ); + byte_array_.set_uint16(HEADER_SZ + 4, constant_table.size()); + } // functions size_t functions_start = idx + (constant_table.size() * 4) + byte_array_.size(); @@ -62,31 +64,53 @@ Bytecode::Bytecode(BytecodePrototype const& bp) byte_array_.append_bytearray(f.code); // function index - if (!functions.empty()) - byte_array_.set_uint32(HEADER_SZ + 4, functions_start); + if (!functions.empty()) { + byte_array_.set_uint32(HEADER_SZ + 6, functions_start); + byte_array_.set_uint16(HEADER_SZ + 6 + 4, functions.size()); + } +} + +Bytecode::Bytecode(std::vector data) + : byte_array_(std::move(data)) +{ + // check file size + if (byte_array_.size() < (HEADER_SZ + INDEX_SZ)) + throw BytecodeParsingError("Invalid bytecode format (file too short)"); + + // check magic number and version + if (byte_array_.get_uint32(0) != MAGIC) + throw BytecodeParsingError("Invalid bytecode format (magic number not matching)"); + if (byte_array_.get_uint32(4) != VERSION) + throw BytecodeParsingError("Unexpected bytecode format version"); + + // load cache + cache_.constants_addr = byte_array_.get_uint32(HEADER_SZ); + cache_.n_constants = byte_array_.get_uint16(HEADER_SZ + 4); + cache_.functions_addr = byte_array_.get_uint32(HEADER_SZ + 6); + cache_.n_functions = byte_array_.get_uint16(HEADER_SZ + 10); } uint32_t Bytecode::n_constants() const { - return 0; + return cache_.n_constants; } uint32_t Bytecode::n_functions() const { - return 0; + return cache_.n_functions; } -int32_t Bytecode::get_constant_int(uint32_t addr) const +int32_t Bytecode::get_constant_int(uint32_t idx) const { return 0; } -float Bytecode::get_constant_float(uint32_t addr) const +float Bytecode::get_constant_float(uint32_t idx) const { return 0; } -std::string Bytecode::get_constant_string(uint32_t addr) const +std::string Bytecode::get_constant_string(uint32_t idx) const { return std::string(); } diff --git a/src/bytecode/bytecode.hh b/src/bytecode/bytecode.hh index f3483f1..c184cbb 100644 --- a/src/bytecode/bytecode.hh +++ b/src/bytecode/bytecode.hh @@ -8,15 +8,15 @@ namespace tyche { class Bytecode { public: - explicit Bytecode(std::vector data) : byte_array_(std::move(data)) {} + explicit Bytecode(std::vector data); explicit Bytecode(BytecodePrototype const& bp); [[nodiscard]] uint32_t n_constants() const; [[nodiscard]] uint32_t n_functions() const; - [[nodiscard]] int32_t get_constant_int(uint32_t addr) const; - [[nodiscard]] float get_constant_float(uint32_t addr) const; - [[nodiscard]] std::string get_constant_string(uint32_t addr) const; + [[nodiscard]] int32_t get_constant_int(uint32_t idx) const; + [[nodiscard]] float get_constant_float(uint32_t idx) const; + [[nodiscard]] std::string get_constant_string(uint32_t idx) const; struct FunctionDef { uint16_t n_params, locals; }; [[nodiscard]] FunctionDef get_function_def(uint32_t function_id) const; @@ -30,11 +30,21 @@ public: [[nodiscard]] std::vector const& data() const { return byte_array_.data(); } private: - ByteArray byte_array_; + ByteArray byte_array_; // the actual data + static constexpr uint8_t VERSION = 1; static constexpr uint32_t MAGIC = 0x74b3c138; static constexpr uint32_t HEADER_SZ = 16, - INDEX_SZ = 8 * 4; + INDEX_SZ = 8 * 6; + + // caching for faster reading of data + struct Cache { + uint32_t constants_addr; + uint16_t n_constants; + uint32_t functions_addr; + uint32_t n_functions; + }; + Cache cache_ {}; }; } diff --git a/src/bytecode/tests.cc b/src/bytecode/tests.cc index d5cb4e9..ba965c6 100644 --- a/src/bytecode/tests.cc +++ b/src/bytecode/tests.cc @@ -10,6 +10,18 @@ using namespace tyche; +static void print(std::vector const& data) +{ + for (size_t i = 0; i < data.size(); ++i) { + if (i % 16 == 0) + printf("%04X : ", i); + printf("%02X ", data.at(i)); + if (i % 16 == 15) + printf("\n"); + } + printf("\n"); +} + TEST(ByteArray, ByteArray) { auto test = [](std::function const& f, std::vector const& expected) { @@ -55,14 +67,14 @@ TEST(Bytecode, Constants) 0x00, 0x00, 0x00, 0x00, // index - 0x30, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x02, 0x00, // constants + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // functions + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // constant indexes 0x00, 0x00, 0x00, 0x00, @@ -94,14 +106,14 @@ TEST(Bytecode, Code) 0x00, 0x00, 0x00, 0x00, // index - 0x00, 0x00, 0x00, 0x00, - 0x30, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // constants + 0x40, 0x00, 0x00, 0x00, 0x02, 0x00, // variables + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // function definitions 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -112,6 +124,34 @@ TEST(Bytecode, Code) }; } +TEST(Bytecode, Parsing) +{ + // write bytecode + + BytecodePrototype bp; + + bp.constants.emplace_back(42); + bp.constants.emplace_back(3.14f); + bp.constants.emplace_back("HELLO"); + + auto& f = bp.functions.emplace_back(0, 0); + f.code.append_byte(0x68); + f.code.append_int(42); + + auto& f2 = bp.functions.emplace_back(2, 1); + f2.code.append_byte(0x42); + + Bytecode bc1(bp); + auto data = bc1.data(); + + // read bytecode + + Bytecode bc2(data); + + ASSERT_EQ(bc2.n_constants(), 3); + ASSERT_EQ(bc2.n_functions(), 2); +} + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv);