From d8130272a04e860ab40cad489fe82abda80616cf Mon Sep 17 00:00:00 2001 From: Andre Wagner Date: Tue, 28 Apr 2026 19:50:48 -0500 Subject: [PATCH] bytecode2 (#2) Reviewed-on: http://gitea.andrewagner.uk/andre/tyche/pulls/2 --- CMakeLists.txt | 8 +- TODO.md | 22 ++++- doc/BYTECODE | 35 +++++++ doc/OPCODES | 33 ------- src/bytecode/bytearray.cc | 67 ++++++++++--- src/bytecode/bytearray.hh | 29 ++++-- src/bytecode/bytecode.cc | 159 ++++++++++++++++++++++++++++++ src/bytecode/bytecode.hh | 62 ++++++++++++ src/bytecode/bytecodeprototype.hh | 30 ++++++ src/bytecode/tests.cc | 144 ++++++++++++++++++++++++--- src/common/overloaded.hh | 8 ++ 11 files changed, 529 insertions(+), 68 deletions(-) create mode 100644 doc/BYTECODE create mode 100644 src/bytecode/bytecode.cc create mode 100644 src/bytecode/bytecode.hh create mode 100644 src/bytecode/bytecodeprototype.hh create mode 100644 src/common/overloaded.hh diff --git a/CMakeLists.txt b/CMakeLists.txt index 2fac843..a5f5f06 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,8 +59,12 @@ FetchContent_MakeAvailable(googletest) # add_library(lib${PROJECT_NAME} STATIC - src/bytecode/bytearray.hh - src/bytecode/bytearray.cc + src/bytecode/bytearray.hh + src/bytecode/bytearray.cc + src/bytecode/bytecode.cc + src/bytecode/bytecode.hh + src/bytecode/bytecodeprototype.hh + src/common/overloaded.hh ) target_compile_options(lib${PROJECT_NAME} PRIVATE ${warnings}) diff --git a/TODO.md b/TODO.md index 70c6169..7196b66 100644 --- a/TODO.md +++ b/TODO.md @@ -4,9 +4,27 @@ - Auto-expand - Add/retrive byte/int/float/string - Should not be larger than the byte array itself -- [ ] Chunk +- [x] Bytecode - Add/retrive all types of data - Keeps no memory except for caching -- [ ] Chunk loader + - [x] Refactor bytecode code +- [ ] Output bytecode format + +After some additional development: +- [ ] Bytecode debugging info + + +## VM + +- [ ] VM + - [ ] Code + - [ ] Simple bytecode loader + - [ ] Output bytecode format + - [ ] Value object + - [ ] Stack object + - [ ] Function context + +After some additional development: +- [ ] Bytecode loader - Combine multiple chunks - Resolve function ids, constant ids, etc \ No newline at end of file diff --git a/doc/BYTECODE b/doc/BYTECODE new file mode 100644 index 0000000..d0d6571 --- /dev/null +++ b/doc/BYTECODE @@ -0,0 +1,35 @@ +Bytecode format +--------------- + +The bytecode file is composed of the following sections: + + * HEADER: 16-byte header + [0:3]: Magic + [4]: VM format + [rest]: Reserved for future use + * TABLE_OF_CONTENTS: list of 8 records pointing to each one of the sections + Each record (6 bytes): + - Pointer to section: 4 bytes + - Number of records in section: 2 bytes + * [0x0] Constants indexes: pointers to each of the constant locations + * Table of 4-byte constant indexes with pointer to constant + (counter start at beginning of raw constants) + * [0x1] Functions indexes: Pointer to functions within the code + [0:3]: function pointer (counter start at the beginning of executable code) + [4:5]: number of parameters + [6:7]: number of local variables + [8:b]: function size + * [0x2] Constants raw data + * [0x3] Code: executable code + * [0x4] Debugging info + ??? + +The max file size is 2 Gb. + +## Values can be encoded in the following ways: + * The type is defined by the operator. + * Encoding varies according to the type: + int: use protobuf format + float: 4-bit floating point + string: int-defined length, followed by the string proper - no null terminator + * Constant indexes and function ids are encoded as ints diff --git a/doc/OPCODES b/doc/OPCODES index 6476112..fd38433 100644 --- a/doc/OPCODES +++ b/doc/OPCODES @@ -4,7 +4,6 @@ Operations Stack operations: (0x00~0x1f) pushn [int] Push int pushr [float] Push float (real) - pushs [string] Push string pshcn [index] Push int from constant list pshcr [index] Push float from constant list pshcs [index] Push string from constant list @@ -74,38 +73,6 @@ Error handling: (0xa0~0xaf) ??? -Bytecode format ---------------- - -The bytecode file is composed of the following sections: - - * [0x0] 16-byte header - [00]: VM format - [??]: reserved - * [0x1] Index: pointers to each one of the sections, up to 8 - Each pointer: 4 bits - * [0x2] Constants: all constants (such as strings) used in the code - * Table of 4-bit constant indexes with pointer to constant - * Raw constant data - * [0x3] Functions: Pointer to functions within the code - [0:3]: function pointer - [4:5]: number of parameters - [6:7]: number of local variables - * [0x4] Code: executable code - [1-byte]: operation - [variable]: operand (see value encoding below) - * [0x5] Debugging info - ??? - -The max file size is 2 Gb. - -## Values can be encoded in the following ways: - * The type is defined by the operator. - * Encoding varies according to the type: - int: use protobuf format - float: 4-bit floating point - string: int-defined length, followed by the string proper - no null terminator - * Constant indexes and function ids are encoded as ints Internal handling of values diff --git a/src/bytecode/bytearray.cc b/src/bytecode/bytearray.cc index 9968642..fcd4858 100644 --- a/src/bytecode/bytearray.cc +++ b/src/bytecode/bytearray.cc @@ -4,7 +4,7 @@ namespace tyche { -void ByteArray::add_byte(uint32_t addr, uint8_t byte) +void ByteArray::set_byte(uint32_t addr, uint8_t byte) { try { data_.at(addr) = byte; @@ -14,31 +14,51 @@ void ByteArray::add_byte(uint32_t addr, uint8_t byte) } } -void ByteArray::add_int(uint32_t addr, int32_t value) +void ByteArray::set_int(uint32_t addr, int32_t value) { uint32_t zz = ((uint32_t)(value << 1)) ^ ((uint32_t)(value >> 31)); while (zz > 0x7F) { - add_byte(addr++, (zz & 0x7F) | 0x80); + set_byte(addr++, (zz & 0x7F) | 0x80); zz >>= 7; } - add_byte(addr, zz & 0x7F); + set_byte(addr, zz & 0x7F); } -void ByteArray::add_float(uint32_t addr, float value) +void ByteArray::set_uint16(uint32_t addr, uint16_t value) +{ + set_byte(addr, (uint8_t) (value)); + set_byte(addr+1, (uint8_t) (value >> 8)); +} + +void ByteArray::set_uint32(uint32_t addr, uint32_t value) +{ + set_byte(addr, (uint8_t) (value)); + set_byte(addr+1, (uint8_t) (value >> 8)); + set_byte(addr+2, (uint8_t) (value >> 16)); + set_byte(addr+3, (uint8_t) (value >> 24)); +} + +void ByteArray::set_float(uint32_t addr, float value) { uint32_t bits; std::memcpy(&bits, &value, 4); - add_byte(addr, (uint8_t)(bits)); - add_byte(addr+1, (uint8_t)(bits >> 8)); - add_byte(addr+2, (uint8_t)(bits >> 16)); - add_byte(addr+3, (uint8_t)(bits >> 24)); + set_byte(addr, (uint8_t) (bits)); + set_byte(addr+1, (uint8_t) (bits >> 8)); + set_byte(addr+2, (uint8_t) (bits >> 16)); + set_byte(addr+3, (uint8_t) (bits >> 24)); } -void ByteArray::add_string(uint32_t addr, std::string const& str) +void ByteArray::set_string(uint32_t addr, std::string const& str) { for (uint8_t c: str) - add_byte(addr++, c); - add_byte(addr, 0); + set_byte(addr++, c); + set_byte(addr, 0); +} + +void ByteArray::set_bytearray(uint32_t addr, ByteArray const& bytearray) +{ + for (uint8_t byte: bytearray.data()) + set_byte(addr++, byte); } uint8_t ByteArray::get_byte(uint32_t addr) const @@ -46,6 +66,20 @@ uint8_t ByteArray::get_byte(uint32_t addr) const return data_.at(addr); } +uint16_t ByteArray::get_uint16(uint32_t addr) const +{ + return (uint32_t) get_byte(addr) + | (uint32_t) get_byte(addr+1) << 8; +} + +uint32_t ByteArray::get_uint32(uint32_t addr) const +{ + return (uint32_t) get_byte(addr) + | (uint32_t) get_byte(addr+1) << 8 + | (uint32_t) get_byte(addr+2) << 16 + | (uint32_t) get_byte(addr+3) << 24; +} + std::pair ByteArray::get_int(uint32_t addr) const { uint32_t zz = 0; @@ -62,7 +96,7 @@ std::pair ByteArray::get_int(uint32_t addr) const throw BytecodeParsingError("Error parsing int32 at position " + std::to_string(addr)); } -std::pair ByteArray::get_float(uint32_t addr) const +float ByteArray::get_float(uint32_t addr) const { uint32_t bits = (uint32_t) get_byte(addr) | (uint32_t) get_byte(addr+1) << 8 @@ -70,7 +104,7 @@ std::pair ByteArray::get_float(uint32_t addr) const | (uint32_t) get_byte(addr+3) << 24; float value; std::memcpy(&value, &bits, 4); - return { value, 4 }; + return value; } std::pair ByteArray::get_string(uint32_t addr) const @@ -81,4 +115,9 @@ std::pair ByteArray::get_string(uint32_t addr) const return { data, data.size() + 1 }; } +void ByteArray::append_bytearray(ByteArray const& bytearray) +{ + data_.insert(data_.end(), bytearray.data().begin(), bytearray.data().end()); +} + } \ No newline at end of file diff --git a/src/bytecode/bytearray.hh b/src/bytecode/bytearray.hh index a7d5b85..99984ce 100644 --- a/src/bytecode/bytearray.hh +++ b/src/bytecode/bytearray.hh @@ -10,20 +10,37 @@ namespace tyche { class ByteArray { public: - void add_byte(uint32_t addr, uint8_t byte); - void add_int(uint32_t addr, int32_t value); - void add_float(uint32_t addr, float value); - void add_string(uint32_t addr, std::string const& str); + ByteArray() = default; + explicit ByteArray(std::vector data) : data_(std::move(data)) {} + + void set_byte(uint32_t addr, uint8_t byte); + void set_uint16(uint32_t addr, uint16_t value); + void set_uint32(uint32_t addr, uint32_t value); + void set_int(uint32_t addr, int32_t value); + void set_float(uint32_t addr, float value); + void set_string(uint32_t addr, std::string const& str); + void set_bytearray(uint32_t addr, ByteArray const& bytearray); + + void append_byte(uint8_t byte) { set_byte(data_.size(), byte); } + void append_uint16(uint16_t value) { set_uint16(data_.size(), value); } + void append_uint32(uint32_t value) { set_uint32(data_.size(), value); } + void append_int(int32_t value) { set_int(data_.size(), value); } + void append_float(float value) { set_float(data_.size(), value); } + void append_string(std::string const& str) { set_string(data_.size(), str); } + void append_bytearray(ByteArray const& bytearray); [[nodiscard]] uint8_t get_byte(uint32_t addr) const; + [[nodiscard]] uint16_t get_uint16(uint32_t addr) const; + [[nodiscard]] uint32_t get_uint32(uint32_t addr) const; [[nodiscard]] std::pair get_int(uint32_t addr) const; - [[nodiscard]] std::pair get_float(uint32_t addr) const; + [[nodiscard]] float get_float(uint32_t addr) const; [[nodiscard]] std::pair get_string(uint32_t addr) const; [[nodiscard]] std::vector const& data() const { return data_; } + [[nodiscard]] size_t size() const { return data_.size(); } private: - std::vector data_; + std::vector data_ {}; }; class BytecodeParsingError : public std::runtime_error { diff --git a/src/bytecode/bytecode.cc b/src/bytecode/bytecode.cc new file mode 100644 index 0000000..b5a3c5c --- /dev/null +++ b/src/bytecode/bytecode.cc @@ -0,0 +1,159 @@ +#include "bytecode.hh" +#include "../common/overloaded.hh" + +namespace tyche { + +Bytecode::Bytecode(ByteArray ba) + : byte_array_(std::move(ba)) +{ + // check file size + if (byte_array_.size() < (TOC_START + TOC_SZ)) + throw BytecodeParsingError("Invalid bytecode format (file too short)"); + + // check magic number and version + if (byte_array_.get_uint32(0) != MAGIC_NUMBER) + throw BytecodeParsingError("Invalid bytecode format (magic number not matching)"); + if (byte_array_.get_uint32(4) != BYTECODE_VERSION) + throw BytecodeParsingError("Unexpected bytecode format version"); + + // load cache + cache_.constants_idx_addr = byte_array_.get_uint32(TOC_START); + cache_.n_constants = byte_array_.get_uint16(TOC_START + 4); + cache_.functions_idx_addr = byte_array_.get_uint32(TOC_START + (1 * TOC_RECORD_SZ)); + cache_.n_functions = byte_array_.get_uint16(TOC_START + (1 * TOC_RECORD_SZ) + 4); + cache_.constants_start_addr = byte_array_.get_uint32(TOC_START + (2 * TOC_RECORD_SZ)); + uint32_t code_start = byte_array_.get_uint32(TOC_START + (3 * TOC_RECORD_SZ)); + for (uint32_t i = 0; i < cache_.n_functions; ++i) { + cache_.function_addr.emplace_back(code_start + byte_array_.get_uint32(cache_.functions_idx_addr + (i * FUNCTION_RECORD_SZ))); + cache_.function_sz.emplace_back(code_start + byte_array_.get_uint32(cache_.functions_idx_addr + (i * FUNCTION_RECORD_SZ) + 8)); + } +} + +uint32_t Bytecode::n_constants() const +{ + return cache_.n_constants; +} + +uint32_t Bytecode::n_functions() const +{ + return cache_.n_functions; +} + +int32_t Bytecode::get_constant_int(uint32_t idx) const +{ + uint32_t constant_idx = byte_array_.get_uint32(cache_.constants_idx_addr + (idx * CONST_RECORD_SZ)); + return byte_array_.get_int(cache_.constants_start_addr + constant_idx).first; +} + +float Bytecode::get_constant_float(uint32_t idx) const +{ + uint32_t constant_idx = byte_array_.get_uint32(cache_.constants_idx_addr + (idx * CONST_RECORD_SZ)); + return byte_array_.get_float(cache_.constants_start_addr + constant_idx); +} + +std::string Bytecode::get_constant_string(uint32_t idx) const +{ + uint32_t constant_idx = byte_array_.get_uint32(cache_.constants_idx_addr + (idx * CONST_RECORD_SZ)); + return byte_array_.get_string(cache_.constants_start_addr + constant_idx).first; +} + +Bytecode::FunctionDef Bytecode::get_function_def(uint32_t function_id) const +{ + uint32_t idx = cache_.functions_idx_addr + (function_id * FUNCTION_RECORD_SZ); + return { + .n_params = byte_array_.get_uint16(idx + 4), + .locals = byte_array_.get_uint16(idx + 6), + }; +} + +uint32_t Bytecode::get_function_sz(uint32_t function_id) const +{ + return cache_.function_sz.at(function_id); +} + +uint8_t Bytecode::get_code_byte(uint32_t function_id, uint32_t idx) const +{ + return byte_array_.get_byte(cache_.function_addr.at(function_id) + idx); +} + +std::pair Bytecode::get_code_int(uint32_t function_id, uint32_t idx) const +{ + return byte_array_.get_int(cache_.function_addr.at(function_id) + idx); +} + +float Bytecode::get_code_float(uint32_t function_id, uint32_t idx) const +{ + return byte_array_.get_float(cache_.function_addr.at(function_id) + idx); +} + +ByteArray Bytecode::generate(BytecodePrototype const& bp) +{ + // header section + ByteArray header; + header.set_uint32(0, MAGIC_NUMBER); + header.set_byte(4, BYTECODE_VERSION); + + // constants + ByteArray constant_indexes; + ByteArray raw_constants; + + uint32_t idx = 0; + for (auto const& constant: bp.constants) { + constant_indexes.append_uint32(idx); + std::visit(overloaded { + [&](int32_t i) { raw_constants.append_int(i); }, + [&](float f) { raw_constants.append_float(f); }, + [&](std::string const& s) { raw_constants.append_string(s); }, + }, constant); + idx = raw_constants.size(); + } + + // functions + ByteArray functions_indexes; + ByteArray raw_code; + + uint32_t idx_idx = 0, code_idx = 0; + for (auto const& f: bp.functions) { + functions_indexes.set_uint32(idx_idx, code_idx); + functions_indexes.set_uint16(idx_idx + 4, f.n_pars); + functions_indexes.set_uint16(idx_idx + 6, f.n_locals); + functions_indexes.set_uint32(idx_idx + 8, f.code.size()); + raw_code.append_bytearray(f.code); + code_idx = raw_code.size(); + idx_idx += FUNCTION_RECORD_SZ; + } + + // table of contents + uint32_t function_idx_start = CONST_IDX_START + constant_indexes.size(); + uint32_t raw_constant_start = function_idx_start + functions_indexes.size(); + uint32_t raw_code_start = raw_constant_start + raw_constants.size(); + + ByteArray toc; + if (!bp.constants.empty()) { + toc.set_uint32(SEC_CONST_IDX * TOC_RECORD_SZ, CONST_IDX_START); + toc.set_uint32(SEC_CONST_IDX * TOC_RECORD_SZ + 4, constant_indexes.size() / CONST_RECORD_SZ); + toc.set_uint32(SEC_CONST_DATA * TOC_RECORD_SZ, raw_constant_start); + toc.set_uint32(SEC_CONST_DATA * TOC_RECORD_SZ + 4, raw_constants.size()); + } + if (!bp.functions.empty()) { + toc.set_uint32(SEC_FUNC_IDX * TOC_RECORD_SZ, function_idx_start); + toc.set_uint32(SEC_FUNC_IDX * TOC_RECORD_SZ + 4, functions_indexes.size() / FUNCTION_RECORD_SZ); + toc.set_uint32(SEC_CODE * TOC_RECORD_SZ, raw_code_start); + toc.set_uint32(SEC_CODE * TOC_RECORD_SZ + 4, raw_code.size()); + } + + // + // assemble bytecode + // + + ByteArray ba; + ba.set_bytearray(0, header); + ba.set_bytearray(TOC_START, toc); + ba.set_bytearray(CONST_IDX_START, constant_indexes); + ba.set_bytearray(function_idx_start, functions_indexes); + ba.set_bytearray(raw_constant_start, raw_constants); + ba.set_bytearray(raw_code_start, raw_code); + return ba; +} + +} \ No newline at end of file diff --git a/src/bytecode/bytecode.hh b/src/bytecode/bytecode.hh new file mode 100644 index 0000000..fa3dc98 --- /dev/null +++ b/src/bytecode/bytecode.hh @@ -0,0 +1,62 @@ +#ifndef TYCHE_BYTECODE_HH +#define TYCHE_BYTECODE_HH + +#include "bytearray.hh" +#include "bytecodeprototype.hh" + +namespace tyche { + +class Bytecode { +public: + explicit Bytecode(ByteArray ba); + + [[nodiscard]] uint32_t n_constants() const; + [[nodiscard]] uint32_t n_functions() const; + + [[nodiscard]] int32_t get_constant_int(uint32_t idx) const; + [[nodiscard]] float get_constant_float(uint32_t idx) const; + [[nodiscard]] std::string get_constant_string(uint32_t idx) const; + + struct FunctionDef { uint16_t n_params, locals; }; + [[nodiscard]] FunctionDef get_function_def(uint32_t function_id) const; + [[nodiscard]] uint32_t get_function_sz(uint32_t function_id) const; + + [[nodiscard]] uint8_t get_code_byte(uint32_t function_id, uint32_t idx) const; + [[nodiscard]] std::pair get_code_int(uint32_t function_id, uint32_t idx) const; + [[nodiscard]] float get_code_float(uint32_t function_id, uint32_t idx) const; + + // TODO - debugging info + + [[nodiscard]] static ByteArray generate(BytecodePrototype const& bp); + +private: + ByteArray byte_array_; // the actual data + + static constexpr uint8_t BYTECODE_VERSION = 1; + static constexpr uint32_t MAGIC_NUMBER = 0x74b3c138; + static constexpr uint32_t TOC_START = 16, + TOC_N_RECORDS = 8, + TOC_RECORD_SZ = 8, + TOC_SZ = TOC_N_RECORDS * TOC_RECORD_SZ; + static constexpr uint32_t CONST_IDX_START = TOC_START + TOC_SZ, + CONST_RECORD_SZ = 4; + static constexpr uint32_t FUNCTION_RECORD_SZ = 12; + + enum Sections { SEC_CONST_IDX = 0, SEC_FUNC_IDX = 1, SEC_CONST_DATA = 2, SEC_CODE = 3 }; + + // caching for faster reading of data + struct Cache { + uint32_t constants_idx_addr; + uint16_t n_constants; + uint32_t constants_start_addr; + uint32_t functions_idx_addr; + uint32_t n_functions; + std::vector function_addr; + std::vector function_sz; + }; + Cache cache_ {}; +}; + +} + +#endif //TYCHE_BYTECODE_HH diff --git a/src/bytecode/bytecodeprototype.hh b/src/bytecode/bytecodeprototype.hh new file mode 100644 index 0000000..0a4a6bf --- /dev/null +++ b/src/bytecode/bytecodeprototype.hh @@ -0,0 +1,30 @@ +#ifndef TYCHE_BYTECODEPROTOTYPE_HH +#define TYCHE_BYTECODEPROTOTYPE_HH + +#include +#include +#include +#include + +namespace tyche { + +struct BytecodePrototype { + using ConstantValue = std::variant; + + struct Function { + uint16_t n_pars; + uint16_t n_locals; + ByteArray code; + + Function(uint16_t n_pars_, uint16_t n_locals_) : n_pars(n_pars_), n_locals(n_locals_), code(ByteArray {}) {} + }; + + std::vector constants {}; + std::vector functions {}; + + // TODO - debugging info +}; + +} + +#endif //TYCHE_BYTECODEPROTOTYPE_HH diff --git a/src/bytecode/tests.cc b/src/bytecode/tests.cc index 7d5ab87..a696065 100644 --- a/src/bytecode/tests.cc +++ b/src/bytecode/tests.cc @@ -1,9 +1,12 @@ #include "gtest/gtest.h" +#include "gmock/gmock.h" #include #include #include "bytearray.hh" +#include "bytecodeprototype.hh" +#include "bytecode.hh" using namespace tyche; @@ -18,26 +21,145 @@ TEST(ByteArray, ByteArray) #define TESTX(a, ...) test([](ByteArray& ba) { a; }, std::vector({ __VA_ARGS__ })); - TESTX(ba.add_byte(1, 0xab), 0x00, 0xab) + TESTX(ba.set_byte(1, 0xab), 0x00, 0xab) ByteArray ba; - ba.add_byte(1, 0xab); ASSERT_EQ(ba.get_byte(1), 0xab); + ba.set_byte(1, 0xab); ASSERT_EQ(ba.get_byte(1), 0xab); - ba.add_int(1, 12); ASSERT_EQ(ba.get_int(1), std::make_pair(12, 1)); - ba.add_int(1, -12); ASSERT_EQ(ba.get_int(1), std::make_pair(-12, 1)); - ba.add_int(1, 5000); ASSERT_EQ(ba.get_int(1), std::make_pair(5000, 2)); - ba.add_int(1, 5000300); ASSERT_EQ(ba.get_int(1), std::make_pair(5000300, 4)); - ba.add_int(1, -5000300); ASSERT_EQ(ba.get_int(1), std::make_pair(-5000300, 4)); + ba.set_int(1, 12); ASSERT_EQ(ba.get_int(1), std::make_pair(12, 1)); + ba.set_int(1, -12); ASSERT_EQ(ba.get_int(1), std::make_pair(-12, 1)); + ba.set_int(1, 5000); ASSERT_EQ(ba.get_int(1), std::make_pair(5000, 2)); + ba.set_int(1, 5000300); ASSERT_EQ(ba.get_int(1), std::make_pair(5000300, 4)); + ba.set_int(1, -5000300); ASSERT_EQ(ba.get_int(1), std::make_pair(-5000300, 4)); - ba.add_float(1, 3.14); ASSERT_FLOAT_EQ(ba.get_float(1).first, 3.14); - ba.add_float(1, -3.14); ASSERT_FLOAT_EQ(ba.get_float(1).first, -3.14); - ba.add_float(1, -5000300.1324); ASSERT_FLOAT_EQ(ba.get_float(1).first, -5000300.1324); + ba.set_float(1, 3.14); ASSERT_FLOAT_EQ(ba.get_float(1), 3.14); + ba.set_float(1, -3.14); ASSERT_FLOAT_EQ(ba.get_float(1), -3.14); + ba.set_float(1, -5000300.1324); ASSERT_FLOAT_EQ(ba.get_float(1), -5000300.1324); - ba.add_string(1, "Hello world!"); ASSERT_EQ(ba.get_string(1), std::make_pair("Hello world!", 13)); + ba.set_string(1, "Hello world!"); ASSERT_EQ(ba.get_string(1), std::make_pair("Hello world!", 13)); #undef TESTX } +TEST(Bytecode, Constants) +{ + BytecodePrototype bp; + bp.constants.emplace_back(42); + bp.constants.emplace_back("HELLO"); + + std::vector expected = { + // header + 0x38, 0xc1, 0xb3, 0x74, // magic + 0x01, 0x00, 0x00, 0x00, // version + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + // index + 0x50, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, // constant index + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // function undex + 0x58, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, // raw constants + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // raw code + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + // constant indexes + 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, + + // constant values + 0x54, 'H', 'E', 'L', 'L', 'O', 0x00 + }; + + ByteArray ba = Bytecode::generate(bp); + // print(ba.data()); print(expected); + ASSERT_EQ(ba.data(), expected); +} + +TEST(Bytecode, Code) +{ + BytecodePrototype bp; + auto& f = bp.functions.emplace_back(0, 0); + f.code.append_byte(0x68); + f.code.append_int(42); + + auto& f2 = bp.functions.emplace_back(2, 1); + f2.code.append_byte(0x42); + + std::vector expected = { + // header + 0x38, 0xc1, 0xb3, 0x74, // magic + 0x01, 0x00, 0x00, 0x00, // version + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + // index + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // constant index + 0x50, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, // variable index + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // raw constants + 0x68, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, // raw code + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + // function definitions + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, + + // code + 0x68, 0x54, 0x42, + }; + + ByteArray ba = Bytecode::generate(bp); + ASSERT_EQ(ba.data(), expected); +} + +TEST(Bytecode, Parsing) +{ + // write bytecode + + BytecodePrototype bp; + + bp.constants.emplace_back(42); + bp.constants.emplace_back(3.14f); + bp.constants.emplace_back("HELLO"); + + auto& f = bp.functions.emplace_back(0, 0); + f.code.append_byte(0x68); + f.code.append_int(42); + + auto& ff = bp.functions.emplace_back(2, 1); + ff.code.append_byte(0x42); + + ByteArray ba = Bytecode::generate(bp); + // print(ba.data()); + + // read bytecode + + Bytecode bc(std::move(ba)); + + ASSERT_EQ(bc.n_constants(), 3); + ASSERT_EQ(bc.n_functions(), 2); + + ASSERT_EQ(bc.get_constant_int(0), 42); + ASSERT_FLOAT_EQ(bc.get_constant_float(1), 3.14f); + ASSERT_EQ(bc.get_constant_string(2), "HELLO"); + + Bytecode::FunctionDef f1 = bc.get_function_def(0); + ASSERT_EQ(f1.n_params, 0); + ASSERT_EQ(f1.locals, 0); + + Bytecode::FunctionDef f2 = bc.get_function_def(1); + ASSERT_EQ(f2.n_params, 2); + ASSERT_EQ(f2.locals, 1); + + ASSERT_EQ(bc.get_code_byte(0, 0), 0x68); + ASSERT_EQ(bc.get_code_int(0, 1), std::make_pair(42, 1)); + ASSERT_EQ(bc.get_code_byte(1, 0), 0x42); +} + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); diff --git a/src/common/overloaded.hh b/src/common/overloaded.hh new file mode 100644 index 0000000..f0ad1a0 --- /dev/null +++ b/src/common/overloaded.hh @@ -0,0 +1,8 @@ +#ifndef TYCHE_OVERLOADED_HH +#define TYCHE_OVERLOADED_HH + +// used by std::visitor +template +struct overloaded : Ts... { using Ts::operator()...; }; + +#endif //TYCHE_OVERLOADED_HH