bytecode2 #2

Merged
andre merged 20 commits from bytecode2 into master 2026-04-28 19:50:48 -05:00
6 changed files with 62 additions and 43 deletions
Showing only changes of commit c17788eeab - Show all commits

33
doc/BYTECODE Normal file
View File

@@ -0,0 +1,33 @@
Bytecode format
---------------
The bytecode file is composed of the following sections:
* 16-byte header
[0:3]: Magic
[4]: VM format
* Index: pointers to each one of the sections, up to 8
Each pointer: 4 bytes
Each count: 4 bytes
* [0x0] Constants indexes: all constants (such as strings) used in the code
* Table of 4-byte constant indexes with pointer to constant
* [0x1] Functions indexes: Pointer to functions within the code
[0:3]: function pointer
[4:5]: number of parameters
[6:7]: number of local variables
* [0x2] Constants raw data
* [0x3] Code: executable code
[1-byte]: operation
[variable]: operand (see value encoding below)
* [0x4] Debugging info
???
The max file size is 2 Gb.
## Values can be encoded in the following ways:
* The type is defined by the operator.
* Encoding varies according to the type:
int: use protobuf format
float: 4-bit floating point
string: int-defined length, followed by the string proper - no null terminator
* Constant indexes and function ids are encoded as ints

View File

View File

@@ -74,39 +74,6 @@ Error handling: (0xa0~0xaf)
??? ???
Bytecode format
---------------
The bytecode file is composed of the following sections:
* 16-byte header
[0:3]: Magic
[4]: VM format
* Index: pointers to each one of the sections, up to 8
Each pointer: 4 bytes
Each count: 4 bytes
* [0x0] Constants: all constants (such as strings) used in the code
* Table of 4-byte constant indexes with pointer to constant
* Raw constant data
* [0x1] Functions: Pointer to functions within the code
[0:3]: function pointer
[4:5]: number of parameters
[6:7]: number of local variables
* [0x2] Code: executable code
[1-byte]: operation
[variable]: operand (see value encoding below)
* [0x3] Debugging info
???
The max file size is 2 Gb.
## Values can be encoded in the following ways:
* The type is defined by the operator.
* Encoding varies according to the type:
int: use protobuf format
float: 4-bit floating point
string: int-defined length, followed by the string proper - no null terminator
* Constant indexes and function ids are encoded as ints
Internal handling of values Internal handling of values

View File

@@ -17,9 +17,9 @@ Bytecode::Bytecode(ByteArray ba)
throw BytecodeParsingError("Unexpected bytecode format version"); throw BytecodeParsingError("Unexpected bytecode format version");
// load cache // load cache
cache_.constants_addr = byte_array_.get_uint32(HEADER_SZ); cache_.constants_idx_addr = byte_array_.get_uint32(HEADER_SZ);
cache_.n_constants = byte_array_.get_uint16(HEADER_SZ + 4); cache_.n_constants = byte_array_.get_uint16(HEADER_SZ + 4);
cache_.functions_addr = byte_array_.get_uint32(HEADER_SZ + 6); cache_.functions_idx_addr = byte_array_.get_uint32(HEADER_SZ + 6);
cache_.n_functions = byte_array_.get_uint16(HEADER_SZ + 10); cache_.n_functions = byte_array_.get_uint16(HEADER_SZ + 10);
} }
@@ -35,7 +35,8 @@ uint32_t Bytecode::n_functions() const
int32_t Bytecode::get_constant_int(uint32_t idx) const int32_t Bytecode::get_constant_int(uint32_t idx) const
{ {
return 0; uint32_t constant_idx = byte_array_.get_uint32(cache_.constants_idx_addr + (idx * 4));
return byte_array_.get_int(constant_idx).first;
} }
float Bytecode::get_constant_float(uint32_t idx) const float Bytecode::get_constant_float(uint32_t idx) const
@@ -108,6 +109,7 @@ ByteArray Bytecode::generate(BytecodePrototype const& bp)
ba.set_uint32(idx, const_idx); ba.set_uint32(idx, const_idx);
idx += 4; idx += 4;
} }
uint32_t constants_raw_data_addr = ba.size();
ba.append_bytearray(constant_array); ba.append_bytearray(constant_array);
// constant index // constant index
@@ -127,6 +129,7 @@ ByteArray Bytecode::generate(BytecodePrototype const& bp)
idx += 8; idx += 8;
code_idx += f.second; code_idx += f.second;
} }
uint32_t functions_raw_data_addr = ba.size();
for (auto const& f: bp.functions) for (auto const& f: bp.functions)
ba.append_bytearray(f.code); ba.append_bytearray(f.code);
@@ -136,6 +139,14 @@ ByteArray Bytecode::generate(BytecodePrototype const& bp)
ba.set_uint16(HEADER_SZ + 6 + 4, functions.size()); ba.set_uint16(HEADER_SZ + 6 + 4, functions.size());
} }
// constants raw data
if (!constant_table.empty())
ba.set_uint32(HEADER_SZ + (2 * 6), constants_raw_data_addr);
// function raw data
if (!functions.empty())
ba.set_uint32(HEADER_SZ + (3 * 6), functions_raw_data_addr);
return ba; return ba;
} }

View File

@@ -38,10 +38,12 @@ private:
// caching for faster reading of data // caching for faster reading of data
struct Cache { struct Cache {
uint32_t constants_addr; uint32_t constants_idx_addr;
uint16_t n_constants; uint16_t n_constants;
uint32_t functions_addr; uint32_t constants_start_addr;
uint32_t functions_idx_addr;
uint32_t n_functions; uint32_t n_functions;
uint32_t functions_start_addr;
}; };
Cache cache_ {}; Cache cache_ {};
}; };

View File

@@ -69,7 +69,7 @@ TEST(Bytecode, Constants)
// index // index
0x40, 0x00, 0x00, 0x00, 0x02, 0x00, // constants 0x40, 0x00, 0x00, 0x00, 0x02, 0x00, // constants
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // functions 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // functions
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -109,7 +109,7 @@ TEST(Bytecode, Code)
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // constants 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // constants
0x40, 0x00, 0x00, 0x00, 0x02, 0x00, // variables 0x40, 0x00, 0x00, 0x00, 0x02, 0x00, // variables
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -124,6 +124,7 @@ TEST(Bytecode, Code)
}; };
ByteArray ba = Bytecode::generate(bp); ByteArray ba = Bytecode::generate(bp);
print(ba.data()); print(expected);
ASSERT_EQ(ba.data(), expected); ASSERT_EQ(ba.data(), expected);
} }
@@ -145,13 +146,18 @@ TEST(Bytecode, Parsing)
f2.code.append_byte(0x42); f2.code.append_byte(0x42);
ByteArray ba = Bytecode::generate(bp); ByteArray ba = Bytecode::generate(bp);
print(ba.data());
// read bytecode // read bytecode
Bytecode bc2(std::move(ba)); Bytecode bc(std::move(ba));
ASSERT_EQ(bc2.n_constants(), 3); ASSERT_EQ(bc.n_constants(), 3);
ASSERT_EQ(bc2.n_functions(), 2); ASSERT_EQ(bc.n_functions(), 2);
ASSERT_EQ(bc.get_constant_int(0), 42);
ASSERT_FLOAT_EQ(bc.get_constant_float(1), 3.14f);
ASSERT_EQ(bc.get_constant_string(2), "HELLO");
} }
int main(int argc, char** argv) int main(int argc, char** argv)