This commit is contained in:
2026-04-28 16:51:19 -05:00
parent c17788eeab
commit 8f5f470edd
7 changed files with 146 additions and 119 deletions

View File

@@ -7,6 +7,7 @@
- [ ] Bytecode - [ ] Bytecode
- Add/retrive all types of data - Add/retrive all types of data
- Keeps no memory except for caching - Keeps no memory except for caching
- [ ] Refactor bytecode code
- [ ] Bytecode debugging info - [ ] Bytecode debugging info
- [ ] Bytecode loader - [ ] Bytecode loader
- Combine multiple chunks - Combine multiple chunks

View File

@@ -3,22 +3,23 @@ Bytecode format
The bytecode file is composed of the following sections: The bytecode file is composed of the following sections:
* 16-byte header * HEADER: 16-byte header
[0:3]: Magic [0:3]: Magic
[4]: VM format [4]: VM format
* Index: pointers to each one of the sections, up to 8 [rest]: Reserved for future use
Each pointer: 4 bytes * TABLE_OF_CONTENTS: list of 8 records pointing to each one of the sections
Each count: 4 bytes Each record (6 bytes):
* [0x0] Constants indexes: all constants (such as strings) used in the code - Pointer to section: 4 bytes
- Number of records in section: 2 bytes
* [0x0] Constants indexes: pointers to each of the constant locations
* Table of 4-byte constant indexes with pointer to constant * Table of 4-byte constant indexes with pointer to constant
(counter start at beginning of raw constants)
* [0x1] Functions indexes: Pointer to functions within the code * [0x1] Functions indexes: Pointer to functions within the code
[0:3]: function pointer [0:3]: function pointer (counter start at the beginning of executable code)
[4:5]: number of parameters [4:5]: number of parameters
[6:7]: number of local variables [6:7]: number of local variables
* [0x2] Constants raw data * [0x2] Constants raw data
* [0x3] Code: executable code * [0x3] Code: executable code
[1-byte]: operation
[variable]: operand (see value encoding below)
* [0x4] Debugging info * [0x4] Debugging info
??? ???

View File

@@ -55,6 +55,12 @@ void ByteArray::set_string(uint32_t addr, std::string const& str)
set_byte(addr, 0); set_byte(addr, 0);
} }
void ByteArray::set_bytearray(uint32_t addr, ByteArray const& bytearray)
{
for (uint8_t byte: bytearray.data())
set_byte(addr++, byte);
}
uint8_t ByteArray::get_byte(uint32_t addr) const uint8_t ByteArray::get_byte(uint32_t addr) const
{ {
return data_.at(addr); return data_.at(addr);
@@ -90,7 +96,7 @@ std::pair<int32_t, size_t> ByteArray::get_int(uint32_t addr) const
throw BytecodeParsingError("Error parsing int32 at position " + std::to_string(addr)); throw BytecodeParsingError("Error parsing int32 at position " + std::to_string(addr));
} }
std::pair<float, size_t> ByteArray::get_float(uint32_t addr) const float ByteArray::get_float(uint32_t addr) const
{ {
uint32_t bits = (uint32_t) get_byte(addr) uint32_t bits = (uint32_t) get_byte(addr)
| (uint32_t) get_byte(addr+1) << 8 | (uint32_t) get_byte(addr+1) << 8
@@ -98,7 +104,7 @@ std::pair<float, size_t> ByteArray::get_float(uint32_t addr) const
| (uint32_t) get_byte(addr+3) << 24; | (uint32_t) get_byte(addr+3) << 24;
float value; float value;
std::memcpy(&value, &bits, 4); std::memcpy(&value, &bits, 4);
return { value, 4 }; return value;
} }
std::pair<std::string, size_t> ByteArray::get_string(uint32_t addr) const std::pair<std::string, size_t> ByteArray::get_string(uint32_t addr) const

View File

@@ -19,6 +19,7 @@ public:
void set_int(uint32_t addr, int32_t value); void set_int(uint32_t addr, int32_t value);
void set_float(uint32_t addr, float value); void set_float(uint32_t addr, float value);
void set_string(uint32_t addr, std::string const& str); void set_string(uint32_t addr, std::string const& str);
void set_bytearray(uint32_t addr, ByteArray const& bytearray);
void append_byte(uint8_t byte) { set_byte(data_.size(), byte); } void append_byte(uint8_t byte) { set_byte(data_.size(), byte); }
void append_uint16(uint16_t value) { set_uint16(data_.size(), value); } void append_uint16(uint16_t value) { set_uint16(data_.size(), value); }
@@ -32,7 +33,7 @@ public:
[[nodiscard]] uint16_t get_uint16(uint32_t addr) const; [[nodiscard]] uint16_t get_uint16(uint32_t addr) const;
[[nodiscard]] uint32_t get_uint32(uint32_t addr) const; [[nodiscard]] uint32_t get_uint32(uint32_t addr) const;
[[nodiscard]] std::pair<int32_t, size_t> get_int(uint32_t addr) const; [[nodiscard]] std::pair<int32_t, size_t> get_int(uint32_t addr) const;
[[nodiscard]] std::pair<float, size_t> get_float(uint32_t addr) const; [[nodiscard]] float get_float(uint32_t addr) const;
[[nodiscard]] std::pair<std::string, size_t> get_string(uint32_t addr) const; [[nodiscard]] std::pair<std::string, size_t> get_string(uint32_t addr) const;
[[nodiscard]] std::vector<uint8_t> const& data() const { return data_; } [[nodiscard]] std::vector<uint8_t> const& data() const { return data_; }

View File

@@ -7,20 +7,24 @@ Bytecode::Bytecode(ByteArray ba)
: byte_array_(std::move(ba)) : byte_array_(std::move(ba))
{ {
// check file size // check file size
if (byte_array_.size() < (HEADER_SZ + INDEX_SZ)) if (byte_array_.size() < (TOC_START + TOC_SZ))
throw BytecodeParsingError("Invalid bytecode format (file too short)"); throw BytecodeParsingError("Invalid bytecode format (file too short)");
// check magic number and version // check magic number and version
if (byte_array_.get_uint32(0) != MAGIC) if (byte_array_.get_uint32(0) != MAGIC_NUMBER)
throw BytecodeParsingError("Invalid bytecode format (magic number not matching)"); throw BytecodeParsingError("Invalid bytecode format (magic number not matching)");
if (byte_array_.get_uint32(4) != VERSION) if (byte_array_.get_uint32(4) != BYTECODE_VERSION)
throw BytecodeParsingError("Unexpected bytecode format version"); throw BytecodeParsingError("Unexpected bytecode format version");
// load cache // load cache
cache_.constants_idx_addr = byte_array_.get_uint32(HEADER_SZ); cache_.constants_idx_addr = byte_array_.get_uint32(TOC_START);
cache_.n_constants = byte_array_.get_uint16(HEADER_SZ + 4); cache_.n_constants = byte_array_.get_uint16(TOC_START + 4);
cache_.functions_idx_addr = byte_array_.get_uint32(HEADER_SZ + 6); cache_.constants_start_addr = byte_array_.get_uint32(TOC_START + (6 * 2));
cache_.n_functions = byte_array_.get_uint16(HEADER_SZ + 10); cache_.functions_idx_addr = byte_array_.get_uint32(TOC_START + 6);
cache_.n_functions = byte_array_.get_uint16(TOC_START + 10);
uint32_t code_start = byte_array_.get_uint32(TOC_START + (6 * 3));
for (uint32_t i = 0; i < cache_.n_functions; ++i)
cache_.function_addr.emplace_back(code_start + byte_array_.get_uint32(cache_.functions_idx_addr + (i * 8)));
} }
uint32_t Bytecode::n_constants() const uint32_t Bytecode::n_constants() const
@@ -36,117 +40,111 @@ uint32_t Bytecode::n_functions() const
int32_t Bytecode::get_constant_int(uint32_t idx) const int32_t Bytecode::get_constant_int(uint32_t idx) const
{ {
uint32_t constant_idx = byte_array_.get_uint32(cache_.constants_idx_addr + (idx * 4)); uint32_t constant_idx = byte_array_.get_uint32(cache_.constants_idx_addr + (idx * 4));
return byte_array_.get_int(constant_idx).first; return byte_array_.get_int(cache_.constants_start_addr + constant_idx).first;
} }
float Bytecode::get_constant_float(uint32_t idx) const float Bytecode::get_constant_float(uint32_t idx) const
{ {
return 0; uint32_t constant_idx = byte_array_.get_uint32(cache_.constants_idx_addr + (idx * 4));
return byte_array_.get_float(cache_.constants_start_addr + constant_idx);
} }
std::string Bytecode::get_constant_string(uint32_t idx) const std::string Bytecode::get_constant_string(uint32_t idx) const
{ {
return std::string(); uint32_t constant_idx = byte_array_.get_uint32(cache_.constants_idx_addr + (idx * 4));
return byte_array_.get_string(cache_.constants_start_addr + constant_idx).first;
} }
Bytecode::FunctionDef Bytecode::get_function_def(uint32_t function_id) const Bytecode::FunctionDef Bytecode::get_function_def(uint32_t function_id) const
{ {
return Bytecode::FunctionDef(); uint32_t idx = cache_.functions_idx_addr + (function_id * 8);
return {
.n_params = byte_array_.get_uint16(idx + 4),
.locals = byte_array_.get_uint16(idx + 6),
};
} }
uint8_t Bytecode::get_code_byte(uint32_t function_id, uint32_t idx) const uint8_t Bytecode::get_code_byte(uint32_t function_id, uint32_t idx) const
{ {
return 0; return byte_array_.get_byte(cache_.function_addr.at(function_id) + idx);
} }
int32_t Bytecode::get_code_int(uint32_t function_id, uint32_t idx) const std::pair<int32_t, size_t> Bytecode::get_code_int(uint32_t function_id, uint32_t idx) const
{ {
return 0; return byte_array_.get_int(cache_.function_addr.at(function_id) + idx);
} }
float Bytecode::get_code_float(uint32_t function_id, uint32_t idx) const float Bytecode::get_code_float(uint32_t function_id, uint32_t idx) const
{ {
return 0; return byte_array_.get_float(cache_.function_addr.at(function_id) + idx);
} }
ByteArray Bytecode::generate(BytecodePrototype const& bp) ByteArray Bytecode::generate(BytecodePrototype const& bp)
{ {
ByteArray ba; // header section
ByteArray header;
header.set_uint32(0, MAGIC_NUMBER);
header.set_byte(4, BYTECODE_VERSION);
// constants // constants
std::vector<uint32_t> constant_table; ByteArray constant_indexes;
ByteArray constant_array; ByteArray raw_constants;
uint32_t idx = 0; uint32_t idx = 0;
for (auto const& constant: bp.constants) { for (auto const& constant: bp.constants) {
constant_table.emplace_back(idx); constant_indexes.append_uint32(idx);
std::visit(overloaded { std::visit(overloaded {
[&](int32_t i) { constant_array.append_int(i); }, [&](int32_t i) { raw_constants.append_int(i); },
[&](float f) { constant_array.append_float(f); }, [&](float f) { raw_constants.append_float(f); },
[&](std::string const& s) { constant_array.append_string(s); }, [&](std::string const& s) { raw_constants.append_string(s); },
}, constant); }, constant);
idx = constant_array.size(); idx = raw_constants.size();
}
// function table
std::vector<std::pair<FunctionDef, uint32_t>> functions;
ByteArray code;
for (auto const& f: bp.functions) {
code.append_bytearray(f.code);
functions.emplace_back(std::make_pair(FunctionDef { f.n_pars, f.n_locals }, code.size()));
}
//
// build binary
//
// header
ba.set_uint32(0, MAGIC);
ba.set_byte(4, VERSION);
// constants
idx = HEADER_SZ + INDEX_SZ;
for (auto const& const_idx: constant_table) {
ba.set_uint32(idx, const_idx);
idx += 4;
}
uint32_t constants_raw_data_addr = ba.size();
ba.append_bytearray(constant_array);
// constant index
if (!constant_table.empty()) {
ba.set_uint32(HEADER_SZ, HEADER_SZ + INDEX_SZ);
ba.set_uint16(HEADER_SZ + 4, constant_table.size());
} }
// functions // functions
size_t functions_start = idx + (constant_table.size() * 4); ByteArray functions_indexes;
idx = functions_start; ByteArray raw_code;
uint32_t code_idx = 0;
for (auto const& f: functions) {
ba.set_uint32(idx, code_idx);
ba.set_uint16(idx + 4, f.first.n_params);
ba.set_uint16(idx + 6, f.first.locals);
idx += 8;
code_idx += f.second;
}
uint32_t functions_raw_data_addr = ba.size();
for (auto const& f: bp.functions)
ba.append_bytearray(f.code);
// function index uint32_t idx_idx = 0, code_idx = 0;
if (!functions.empty()) { for (auto const& f: bp.functions) {
ba.set_uint32(HEADER_SZ + 6, functions_start); functions_indexes.set_uint32(idx_idx, code_idx);
ba.set_uint16(HEADER_SZ + 6 + 4, functions.size()); functions_indexes.set_uint16(idx_idx + 4, f.n_pars);
functions_indexes.set_uint16(idx_idx + 6, f.n_locals);
raw_code.append_bytearray(f.code);
code_idx = raw_code.size();
idx_idx += FUNCTION_RECORD_SZ;
} }
// constants raw data // table of contents
if (!constant_table.empty()) uint32_t function_idx_start = CONST_IDX_START + constant_indexes.size();
ba.set_uint32(HEADER_SZ + (2 * 6), constants_raw_data_addr); uint32_t raw_constant_start = function_idx_start + functions_indexes.size();
uint32_t raw_code_start = raw_constant_start + raw_constants.size();
// function raw data ByteArray toc;
if (!functions.empty()) if (!bp.constants.empty()) {
ba.set_uint32(HEADER_SZ + (3 * 6), functions_raw_data_addr); toc.set_uint32(SEC_CONST_IDX * TOC_RECORD_SZ, CONST_IDX_START);
toc.set_uint32(SEC_CONST_IDX * TOC_RECORD_SZ + 4, constant_indexes.size() / CONST_RECORD_SZ);
toc.set_uint32(SEC_CONST_DATA * TOC_RECORD_SZ, raw_constant_start);
toc.set_uint32(SEC_CONST_DATA * TOC_RECORD_SZ + 4, raw_constants.size());
}
if (!bp.functions.empty()) {
toc.set_uint32(SEC_FUNC_IDX * TOC_RECORD_SZ, function_idx_start);
toc.set_uint32(SEC_FUNC_IDX * TOC_RECORD_SZ + 4, functions_indexes.size() / FUNCTION_RECORD_SZ);
toc.set_uint32(SEC_CODE * TOC_RECORD_SZ, raw_code_start);
toc.set_uint32(SEC_CODE * TOC_RECORD_SZ + 4, raw_code.size());
}
//
// assemble bytecode
//
ByteArray ba;
ba.set_bytearray(0, header);
ba.set_bytearray(TOC_START, toc);
ba.set_bytearray(CONST_IDX_START, constant_indexes);
ba.set_bytearray(function_idx_start, functions_indexes);
ba.set_bytearray(raw_constant_start, raw_constants);
ba.set_bytearray(raw_code_start, raw_code);
return ba; return ba;
} }

View File

@@ -21,7 +21,7 @@ public:
[[nodiscard]] FunctionDef get_function_def(uint32_t function_id) const; [[nodiscard]] FunctionDef get_function_def(uint32_t function_id) const;
[[nodiscard]] uint8_t get_code_byte(uint32_t function_id, uint32_t idx) const; [[nodiscard]] uint8_t get_code_byte(uint32_t function_id, uint32_t idx) const;
[[nodiscard]] int32_t get_code_int(uint32_t function_id, uint32_t idx) const; [[nodiscard]] std::pair<int32_t, size_t> get_code_int(uint32_t function_id, uint32_t idx) const;
[[nodiscard]] float get_code_float(uint32_t function_id, uint32_t idx) const; [[nodiscard]] float get_code_float(uint32_t function_id, uint32_t idx) const;
// TODO - debugging info // TODO - debugging info
@@ -31,10 +31,17 @@ public:
private: private:
ByteArray byte_array_; // the actual data ByteArray byte_array_; // the actual data
static constexpr uint8_t VERSION = 1; static constexpr uint8_t BYTECODE_VERSION = 1;
static constexpr uint32_t MAGIC = 0x74b3c138; static constexpr uint32_t MAGIC_NUMBER = 0x74b3c138;
static constexpr uint32_t HEADER_SZ = 16, static constexpr uint32_t TOC_START = 16,
INDEX_SZ = 8 * 6; TOC_N_RECORDS = 8,
TOC_RECORD_SZ = 8,
TOC_SZ = TOC_N_RECORDS * TOC_RECORD_SZ;
static constexpr uint32_t CONST_IDX_START = TOC_START + TOC_SZ,
CONST_RECORD_SZ = 4;
static constexpr uint32_t FUNCTION_RECORD_SZ = 8;
enum Sections { SEC_CONST_IDX = 0, SEC_FUNC_IDX = 1, SEC_CONST_DATA = 2, SEC_CODE = 3 };
// caching for faster reading of data // caching for faster reading of data
struct Cache { struct Cache {
@@ -43,7 +50,7 @@ private:
uint32_t constants_start_addr; uint32_t constants_start_addr;
uint32_t functions_idx_addr; uint32_t functions_idx_addr;
uint32_t n_functions; uint32_t n_functions;
uint32_t functions_start_addr; std::vector<uint32_t> function_addr;
}; };
Cache cache_ {}; Cache cache_ {};
}; };

View File

@@ -44,9 +44,9 @@ TEST(ByteArray, ByteArray)
ba.set_int(1, 5000300); ASSERT_EQ(ba.get_int(1), std::make_pair(5000300, 4)); ba.set_int(1, 5000300); ASSERT_EQ(ba.get_int(1), std::make_pair(5000300, 4));
ba.set_int(1, -5000300); ASSERT_EQ(ba.get_int(1), std::make_pair(-5000300, 4)); ba.set_int(1, -5000300); ASSERT_EQ(ba.get_int(1), std::make_pair(-5000300, 4));
ba.set_float(1, 3.14); ASSERT_FLOAT_EQ(ba.get_float(1).first, 3.14); ba.set_float(1, 3.14); ASSERT_FLOAT_EQ(ba.get_float(1), 3.14);
ba.set_float(1, -3.14); ASSERT_FLOAT_EQ(ba.get_float(1).first, -3.14); ba.set_float(1, -3.14); ASSERT_FLOAT_EQ(ba.get_float(1), -3.14);
ba.set_float(1, -5000300.1324); ASSERT_FLOAT_EQ(ba.get_float(1).first, -5000300.1324); ba.set_float(1, -5000300.1324); ASSERT_FLOAT_EQ(ba.get_float(1), -5000300.1324);
ba.set_string(1, "Hello world!"); ASSERT_EQ(ba.get_string(1), std::make_pair("Hello world!", 13)); ba.set_string(1, "Hello world!"); ASSERT_EQ(ba.get_string(1), std::make_pair("Hello world!", 13));
@@ -67,14 +67,14 @@ TEST(Bytecode, Constants)
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// index // index
0x40, 0x00, 0x00, 0x00, 0x02, 0x00, // constants 0x50, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, // constant index
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // functions 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // function undex
0x48, 0x00, 0x00, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, // raw constants
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // raw code
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// constant indexes // constant indexes
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -85,6 +85,7 @@ TEST(Bytecode, Constants)
}; };
ByteArray ba = Bytecode::generate(bp); ByteArray ba = Bytecode::generate(bp);
print(ba.data()); print(expected);
ASSERT_EQ(ba.data(), expected); ASSERT_EQ(ba.data(), expected);
} }
@@ -106,14 +107,14 @@ TEST(Bytecode, Code)
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// index // index
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // constants 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // constant index
0x40, 0x00, 0x00, 0x00, 0x02, 0x00, // variables 0x50, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, // variable index
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // raw constants
0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, // raw code
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// function definitions // function definitions
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -142,8 +143,8 @@ TEST(Bytecode, Parsing)
f.code.append_byte(0x68); f.code.append_byte(0x68);
f.code.append_int(42); f.code.append_int(42);
auto& f2 = bp.functions.emplace_back(2, 1); auto& ff = bp.functions.emplace_back(2, 1);
f2.code.append_byte(0x42); ff.code.append_byte(0x42);
ByteArray ba = Bytecode::generate(bp); ByteArray ba = Bytecode::generate(bp);
print(ba.data()); print(ba.data());
@@ -158,6 +159,18 @@ TEST(Bytecode, Parsing)
ASSERT_EQ(bc.get_constant_int(0), 42); ASSERT_EQ(bc.get_constant_int(0), 42);
ASSERT_FLOAT_EQ(bc.get_constant_float(1), 3.14f); ASSERT_FLOAT_EQ(bc.get_constant_float(1), 3.14f);
ASSERT_EQ(bc.get_constant_string(2), "HELLO"); ASSERT_EQ(bc.get_constant_string(2), "HELLO");
Bytecode::FunctionDef f1 = bc.get_function_def(0);
ASSERT_EQ(f1.n_params, 0);
ASSERT_EQ(f1.locals, 0);
Bytecode::FunctionDef f2 = bc.get_function_def(1);
ASSERT_EQ(f2.n_params, 2);
ASSERT_EQ(f2.locals, 1);
ASSERT_EQ(bc.get_code_byte(0, 0), 0x68);
ASSERT_EQ(bc.get_code_int(0, 1), std::make_pair(42, 1));
ASSERT_EQ(bc.get_code_byte(1, 0), 0x42);
} }
int main(int argc, char** argv) int main(int argc, char** argv)