diff --git a/doc/BYTECODE b/doc/BYTECODE new file mode 100644 index 0000000..c696aeb --- /dev/null +++ b/doc/BYTECODE @@ -0,0 +1,33 @@ +Bytecode format +--------------- + +The bytecode file is composed of the following sections: + + * 16-byte header + [0:3]: Magic + [4]: VM format + * Index: pointers to each one of the sections, up to 8 + Each pointer: 4 bytes + Each count: 4 bytes + * [0x0] Constants indexes: all constants (such as strings) used in the code + * Table of 4-byte constant indexes with pointer to constant + * [0x1] Functions indexes: Pointer to functions within the code + [0:3]: function pointer + [4:5]: number of parameters + [6:7]: number of local variables + * [0x2] Constants raw data + * [0x3] Code: executable code + [1-byte]: operation + [variable]: operand (see value encoding below) + * [0x4] Debugging info + ??? + +The max file size is 2 Gb. + +## Values can be encoded in the following ways: + * The type is defined by the operator. + * Encoding varies according to the type: + int: use protobuf format + float: 4-bit floating point + string: int-defined length, followed by the string proper - no null terminator + * Constant indexes and function ids are encoded as ints diff --git a/doc/CODE_ORGANIZATION b/doc/CODE_ORGANIZATION deleted file mode 100644 index e69de29..0000000 diff --git a/doc/OPCODES b/doc/OPCODES index 09a3f5d..c365591 100644 --- a/doc/OPCODES +++ b/doc/OPCODES @@ -74,39 +74,6 @@ Error handling: (0xa0~0xaf) ??? -Bytecode format ---------------- - -The bytecode file is composed of the following sections: - - * 16-byte header - [0:3]: Magic - [4]: VM format - * Index: pointers to each one of the sections, up to 8 - Each pointer: 4 bytes - Each count: 4 bytes - * [0x0] Constants: all constants (such as strings) used in the code - * Table of 4-byte constant indexes with pointer to constant - * Raw constant data - * [0x1] Functions: Pointer to functions within the code - [0:3]: function pointer - [4:5]: number of parameters - [6:7]: number of local variables - * [0x2] Code: executable code - [1-byte]: operation - [variable]: operand (see value encoding below) - * [0x3] Debugging info - ??? - -The max file size is 2 Gb. - -## Values can be encoded in the following ways: - * The type is defined by the operator. - * Encoding varies according to the type: - int: use protobuf format - float: 4-bit floating point - string: int-defined length, followed by the string proper - no null terminator - * Constant indexes and function ids are encoded as ints Internal handling of values diff --git a/src/bytecode/bytecode.cc b/src/bytecode/bytecode.cc index c1232fe..8cbde31 100644 --- a/src/bytecode/bytecode.cc +++ b/src/bytecode/bytecode.cc @@ -17,9 +17,9 @@ Bytecode::Bytecode(ByteArray ba) throw BytecodeParsingError("Unexpected bytecode format version"); // load cache - cache_.constants_addr = byte_array_.get_uint32(HEADER_SZ); + cache_.constants_idx_addr = byte_array_.get_uint32(HEADER_SZ); cache_.n_constants = byte_array_.get_uint16(HEADER_SZ + 4); - cache_.functions_addr = byte_array_.get_uint32(HEADER_SZ + 6); + cache_.functions_idx_addr = byte_array_.get_uint32(HEADER_SZ + 6); cache_.n_functions = byte_array_.get_uint16(HEADER_SZ + 10); } @@ -35,7 +35,8 @@ uint32_t Bytecode::n_functions() const int32_t Bytecode::get_constant_int(uint32_t idx) const { - return 0; + uint32_t constant_idx = byte_array_.get_uint32(cache_.constants_idx_addr + (idx * 4)); + return byte_array_.get_int(constant_idx).first; } float Bytecode::get_constant_float(uint32_t idx) const @@ -108,6 +109,7 @@ ByteArray Bytecode::generate(BytecodePrototype const& bp) ba.set_uint32(idx, const_idx); idx += 4; } + uint32_t constants_raw_data_addr = ba.size(); ba.append_bytearray(constant_array); // constant index @@ -127,6 +129,7 @@ ByteArray Bytecode::generate(BytecodePrototype const& bp) idx += 8; code_idx += f.second; } + uint32_t functions_raw_data_addr = ba.size(); for (auto const& f: bp.functions) ba.append_bytearray(f.code); @@ -136,6 +139,14 @@ ByteArray Bytecode::generate(BytecodePrototype const& bp) ba.set_uint16(HEADER_SZ + 6 + 4, functions.size()); } + // constants raw data + if (!constant_table.empty()) + ba.set_uint32(HEADER_SZ + (2 * 6), constants_raw_data_addr); + + // function raw data + if (!functions.empty()) + ba.set_uint32(HEADER_SZ + (3 * 6), functions_raw_data_addr); + return ba; } diff --git a/src/bytecode/bytecode.hh b/src/bytecode/bytecode.hh index d26341d..b20caf9 100644 --- a/src/bytecode/bytecode.hh +++ b/src/bytecode/bytecode.hh @@ -38,10 +38,12 @@ private: // caching for faster reading of data struct Cache { - uint32_t constants_addr; + uint32_t constants_idx_addr; uint16_t n_constants; - uint32_t functions_addr; + uint32_t constants_start_addr; + uint32_t functions_idx_addr; uint32_t n_functions; + uint32_t functions_start_addr; }; Cache cache_ {}; }; diff --git a/src/bytecode/tests.cc b/src/bytecode/tests.cc index 009f525..b387f0f 100644 --- a/src/bytecode/tests.cc +++ b/src/bytecode/tests.cc @@ -69,7 +69,7 @@ TEST(Bytecode, Constants) // index 0x40, 0x00, 0x00, 0x00, 0x02, 0x00, // constants 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // functions - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -109,7 +109,7 @@ TEST(Bytecode, Code) 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // constants 0x40, 0x00, 0x00, 0x00, 0x02, 0x00, // variables 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -124,6 +124,7 @@ TEST(Bytecode, Code) }; ByteArray ba = Bytecode::generate(bp); + print(ba.data()); print(expected); ASSERT_EQ(ba.data(), expected); } @@ -145,13 +146,18 @@ TEST(Bytecode, Parsing) f2.code.append_byte(0x42); ByteArray ba = Bytecode::generate(bp); + print(ba.data()); // read bytecode - Bytecode bc2(std::move(ba)); + Bytecode bc(std::move(ba)); - ASSERT_EQ(bc2.n_constants(), 3); - ASSERT_EQ(bc2.n_functions(), 2); + ASSERT_EQ(bc.n_constants(), 3); + ASSERT_EQ(bc.n_functions(), 2); + + ASSERT_EQ(bc.get_constant_int(0), 42); + ASSERT_FLOAT_EQ(bc.get_constant_float(1), 3.14f); + ASSERT_EQ(bc.get_constant_string(2), "HELLO"); } int main(int argc, char** argv)