This commit is contained in:
2026-05-10 07:53:28 -05:00
parent 9561d5cacd
commit 1b53c813b4
34 changed files with 79 additions and 89 deletions

View File

@@ -0,0 +1,15 @@
#ifndef TYCHE_BC_EXCEPTIONS_HH
#define TYCHE_BC_EXCEPTIONS_HH
#include <stdexcept>
namespace tyche::bc {
class BytecodeParsingError : public std::runtime_error {
public:
explicit BytecodeParsingError(std::string const& str) : std::runtime_error(str.c_str()) {}
};
}
#endif //TYCHE_BC_EXCEPTIONS_HH

View File

@@ -0,0 +1,166 @@
#include "bytecode.hh"
#include "bc_exceptions.hh"
#include "../common/overloaded.hh"
namespace tyche::bc {
Bytecode::Bytecode(ByteArray ba)
: byte_array_(std::move(ba))
{
// check file size
if (byte_array_.size() < (TOC_START + TOC_SZ))
throw BytecodeParsingError("Invalid bytecode format (file too short)");
// check magic number and version
if (byte_array_.get_uint32(0) != MAGIC_NUMBER)
throw BytecodeParsingError("Invalid bytecode format (magic number not matching)");
if (byte_array_.get_uint32(4) != BYTECODE_VERSION)
throw BytecodeParsingError("Unexpected bytecode format version");
// load cache
cache_.constants_idx_addr = byte_array_.get_uint32(TOC_START);
cache_.n_constants = byte_array_.get_uint16(TOC_START + 4);
cache_.functions_idx_addr = byte_array_.get_uint32(TOC_START + (1 * TOC_RECORD_SZ));
cache_.n_functions = byte_array_.get_uint16(TOC_START + (1 * TOC_RECORD_SZ) + 4);
cache_.constants_start_addr = byte_array_.get_uint32(TOC_START + (2 * TOC_RECORD_SZ));
uint32_t code_start = byte_array_.get_uint32(TOC_START + (3 * TOC_RECORD_SZ));
for (uint32_t i = 0; i < cache_.n_functions; ++i) {
cache_.function_addr.emplace_back(code_start + byte_array_.get_uint32(cache_.functions_idx_addr + (i * FUNCTION_RECORD_SZ)));
cache_.function_sz.emplace_back(byte_array_.get_uint32(cache_.functions_idx_addr + (i * FUNCTION_RECORD_SZ) + 8));
}
}
uint32_t Bytecode::n_constants() const
{
return cache_.n_constants;
}
uint32_t Bytecode::n_functions() const
{
return cache_.n_functions;
}
ConstantValue Bytecode::get_constant(uint32_t idx) const
{
uint32_t constant_idx = byte_array_.get_uint32(cache_.constants_idx_addr + (idx * CONST_RECORD_SZ));
switch ((ConstantType) byte_array_.get_byte(cache_.constants_start_addr + constant_idx)) {
case CONST_TYPE_FLOAT:
return byte_array_.get_float(cache_.constants_start_addr + constant_idx + 1);
case CONST_TYPE_STRING:
return byte_array_.get_string(cache_.constants_start_addr + constant_idx + 1).first;
default:
throw BytecodeParsingError("Invalid bytecode format (invalid constant type)");
}
}
Bytecode::FunctionDef Bytecode::get_function_def(uint32_t function_id) const
{
uint32_t idx = cache_.functions_idx_addr + (function_id * FUNCTION_RECORD_SZ);
return {
.n_params = byte_array_.get_uint16(idx + 4),
.locals = byte_array_.get_uint16(idx + 6),
};
}
uint32_t Bytecode::get_function_sz(uint32_t function_id) const
{
return cache_.function_sz.at(function_id);
}
uint8_t Bytecode::get_code_byte(uint32_t function_id, uint32_t idx) const
{
return byte_array_.get_byte(cache_.function_addr.at(function_id) + idx);
}
int8_t Bytecode::get_code_int8(uint32_t function_id, uint32_t idx) const
{
return byte_array_.get_int8(cache_.function_addr.at(function_id) + idx);
}
int16_t Bytecode::get_code_int16(uint32_t function_id, uint32_t idx) const
{
return byte_array_.get_int16(cache_.function_addr.at(function_id) + idx);
}
int32_t Bytecode::get_code_int32(uint32_t function_id, uint32_t idx) const
{
return byte_array_.get_int32(cache_.function_addr.at(function_id) + idx);
}
ByteArray Bytecode::generate(BytecodePrototype const& bp)
{
// header section
ByteArray header;
header.set_uint32(0, MAGIC_NUMBER);
header.set_byte(4, BYTECODE_VERSION);
// constants
ByteArray constant_indexes;
ByteArray raw_constants;
uint32_t idx = 0;
for (auto const& constant: bp.constants) {
constant_indexes.append_uint32(idx);
std::visit(overloaded {
[&](float f) {
raw_constants.append_byte(CONST_TYPE_FLOAT);
raw_constants.append_float(f);
},
[&](std::string const& s) {
raw_constants.append_byte(CONST_TYPE_STRING);
raw_constants.append_string(s);
},
}, constant);
idx = raw_constants.size();
}
// functions
ByteArray functions_indexes;
ByteArray raw_code;
uint32_t idx_idx = 0, code_idx = 0;
for (auto const& f: bp.functions) {
functions_indexes.set_uint32(idx_idx, code_idx);
functions_indexes.set_uint16(idx_idx + 4, f.n_pars);
functions_indexes.set_uint16(idx_idx + 6, f.n_locals);
functions_indexes.set_uint32(idx_idx + 8, f.code.size());
raw_code.append_bytearray(f.code);
code_idx = raw_code.size();
idx_idx += FUNCTION_RECORD_SZ;
}
// table of contents
uint32_t function_idx_start = CONST_IDX_START + constant_indexes.size();
uint32_t raw_constant_start = function_idx_start + functions_indexes.size();
uint32_t raw_code_start = raw_constant_start + raw_constants.size();
ByteArray toc;
if (!bp.constants.empty()) {
toc.set_uint32(SEC_CONST_IDX * TOC_RECORD_SZ, CONST_IDX_START);
toc.set_uint32(SEC_CONST_IDX * TOC_RECORD_SZ + 4, constant_indexes.size() / CONST_RECORD_SZ);
toc.set_uint32(SEC_CONST_DATA * TOC_RECORD_SZ, raw_constant_start);
toc.set_uint32(SEC_CONST_DATA * TOC_RECORD_SZ + 4, raw_constants.size());
}
if (!bp.functions.empty()) {
toc.set_uint32(SEC_FUNC_IDX * TOC_RECORD_SZ, function_idx_start);
toc.set_uint32(SEC_FUNC_IDX * TOC_RECORD_SZ + 4, functions_indexes.size() / FUNCTION_RECORD_SZ);
toc.set_uint32(SEC_CODE * TOC_RECORD_SZ, raw_code_start);
toc.set_uint32(SEC_CODE * TOC_RECORD_SZ + 4, raw_code.size());
}
//
// assemble bytecode
//
ByteArray ba;
ba.set_bytearray(0, header);
ba.set_bytearray(TOC_START, toc);
ba.set_bytearray(CONST_IDX_START, constant_indexes);
ba.set_bytearray(function_idx_start, functions_indexes);
ba.set_bytearray(raw_constant_start, raw_constants);
ba.set_bytearray(raw_code_start, raw_code);
return ba;
}
}

View File

@@ -0,0 +1,62 @@
#ifndef TYCHE_BYTECODE_HH
#define TYCHE_BYTECODE_HH
#include "../common/bytearray.hh"
#include "bytecodeprototype.hh"
namespace tyche::bc {
class Bytecode {
public:
Bytecode() = default;
explicit Bytecode(ByteArray ba);
[[nodiscard]] uint32_t n_constants() const;
[[nodiscard]] uint32_t n_functions() const;
[[nodiscard]] ConstantValue get_constant(uint32_t idx) const;
struct FunctionDef { uint16_t n_params, locals; };
[[nodiscard]] FunctionDef get_function_def(uint32_t function_id) const;
[[nodiscard]] uint32_t get_function_sz(uint32_t function_id) const;
[[nodiscard]] uint8_t get_code_byte(uint32_t function_id, uint32_t idx) const;
[[nodiscard]] int8_t get_code_int8(uint32_t function_id, uint32_t idx) const;
[[nodiscard]] int16_t get_code_int16(uint32_t function_id, uint32_t idx) const;
[[nodiscard]] int32_t get_code_int32(uint32_t function_id, uint32_t idx) const;
// TODO - debugging info
[[nodiscard]] static ByteArray generate(BytecodePrototype const& bp);
private:
ByteArray byte_array_; // the actual data
static constexpr uint8_t BYTECODE_VERSION = 1;
static constexpr uint32_t MAGIC_NUMBER = 0x74b3c138;
static constexpr uint32_t TOC_START = 16,
TOC_N_RECORDS = 8,
TOC_RECORD_SZ = 8,
TOC_SZ = TOC_N_RECORDS * TOC_RECORD_SZ;
static constexpr uint32_t CONST_IDX_START = TOC_START + TOC_SZ,
CONST_RECORD_SZ = 4;
static constexpr uint32_t FUNCTION_RECORD_SZ = 12;
enum Sections { SEC_CONST_IDX = 0, SEC_FUNC_IDX = 1, SEC_CONST_DATA = 2, SEC_CODE = 3 };
// caching for faster reading of data
struct Cache {
uint32_t constants_idx_addr;
uint16_t n_constants;
uint32_t constants_start_addr;
uint32_t functions_idx_addr;
uint32_t n_functions;
std::vector<uint32_t> function_addr;
std::vector<uint32_t> function_sz;
};
Cache cache_ {};
};
}
#endif //TYCHE_BYTECODE_HH

View File

@@ -0,0 +1,30 @@
#ifndef TYCHE_BYTECODEPROTOTYPE_HH
#define TYCHE_BYTECODEPROTOTYPE_HH
#include <cstdint>
#include <string>
#include <variant>
#include <vector>
#include "constant.hh"
#include "../common/bytearray.hh"
namespace tyche::bc {
struct BytecodePrototype {
struct Function {
uint16_t n_pars;
uint16_t n_locals;
ByteArray code {};
Function(uint16_t n_pars_, uint16_t n_locals_) : n_pars(n_pars_), n_locals(n_locals_), code(ByteArray {}) {}
};
std::vector<ConstantValue> constants {};
std::vector<Function> functions {};
// TODO - debugging info
};
}
#endif //TYCHE_BYTECODEPROTOTYPE_HH

View File

@@ -0,0 +1,15 @@
#ifndef TYCHE_CONSTANT_HH
#define TYCHE_CONSTANT_HH
#include <string>
#include <variant>
namespace tyche::bc {
using ConstantValue = std::variant<float, std::string>;
enum ConstantType : uint8_t { CONST_TYPE_FLOAT = 1, CONST_TYPE_STRING = 2 };
}
#endif //TYCHE_CONSTANT_HH

167
.old/src/bytecode/tests.cc Normal file
View File

@@ -0,0 +1,167 @@
#include "gtest/gtest.h"
#include <cstring>
#include <functional>
#include "../common/bytearray.hh"
#include "bytecodeprototype.hh"
#include "bytecode.hh"
using namespace tyche;
using namespace tyche::bc;
TEST(ByteArray, ByteArray)
{
auto test = [](std::function<void(ByteArray&)> const& f, std::vector<uint8_t> const& expected) {
ByteArray ba;
f(ba);
ASSERT_EQ(ba.data().size(), expected.size());
ASSERT_EQ(std::memcmp(ba.data().data(), expected.data(), ba.data().size()), 0);
};
#define TESTX(a, ...) test([](ByteArray& ba) { a; }, std::vector<uint8_t>({ __VA_ARGS__ }));
TESTX(ba.set_byte(1, 0xab), 0x00, 0xab)
ByteArray ba;
ba.set_byte(1, 0xab); ASSERT_EQ(ba.get_byte(1), 0xab);
ba.set_int8(1, 12); ASSERT_EQ(ba.get_int8(1), 12);
ba.set_int8(1, -12); ASSERT_EQ(ba.get_int8(1), -12);
ba.set_int16(1, 5000); ASSERT_EQ(ba.get_int16(1), 5000);
ba.set_int32(1, 5000300); ASSERT_EQ(ba.get_int32(1), 5000300);
ba.set_int32(1, -5000300); ASSERT_EQ(ba.get_int32(1), -5000300);
ba.set_float(1, 3.14); ASSERT_FLOAT_EQ(ba.get_float(1), 3.14);
ba.set_float(1, -3.14); ASSERT_FLOAT_EQ(ba.get_float(1), -3.14);
ba.set_float(1, -5000300.1324); ASSERT_FLOAT_EQ(ba.get_float(1), -5000300.1324);
ba.set_string(1, "Hello world!"); ASSERT_EQ(ba.get_string(1), std::make_pair("Hello world!", 13));
#undef TESTX
}
TEST(Bytecode, Constants)
{
BytecodePrototype bp;
bp.constants.emplace_back(42.3f);
bp.constants.emplace_back("HELLO");
std::vector<uint8_t> expected = {
// header
0x38, 0xc1, 0xb3, 0x74, // magic
0x01, 0x00, 0x00, 0x00, // version
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
// index
0x50, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, // constant index
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // function undex
0x58, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, // raw constants
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // raw code
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// constant indexes
0x00, 0x00, 0x00, 0x00,
0x05, 0x00, 0x00, 0x00,
// constant values
CONST_TYPE_FLOAT, 0x33, 0x33, 0x29, 0x42, // float: 42.3f
CONST_TYPE_STRING, 'H', 'E', 'L', 'L', 'O', 0x00
};
ByteArray ba = Bytecode::generate(bp);
ASSERT_EQ(ba.data(), expected);
}
TEST(Bytecode, Code)
{
BytecodePrototype bp;
auto& f = bp.functions.emplace_back(0, 0);
f.code.append_byte(0x68);
f.code.append_int8(42);
auto& f2 = bp.functions.emplace_back(2, 1);
f2.code.append_byte(0x42);
std::vector<uint8_t> expected = {
// header
0x38, 0xc1, 0xb3, 0x74, // magic
0x01, 0x00, 0x00, 0x00, // version
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
// index
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // constant index
0x50, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, // variable index
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // raw constants
0x68, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, // raw code
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// function definitions
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00,
// code
0x68, 42, 0x42,
};
ByteArray ba = Bytecode::generate(bp);
ASSERT_EQ(ba.data(), expected);
}
TEST(Bytecode, Parsing)
{
// write bytecode
BytecodePrototype bp;
bp.constants.emplace_back(3.14f);
bp.constants.emplace_back("HELLO");
auto& f = bp.functions.emplace_back(0, 0);
f.code.append_byte(0x68);
f.code.append_int8(42);
auto& ff = bp.functions.emplace_back(2, 1);
ff.code.append_byte(0x42);
ByteArray ba = Bytecode::generate(bp);
// print(ba.data());
// read bytecode
Bytecode bc(std::move(ba));
ASSERT_EQ(bc.n_constants(), 2);
ASSERT_EQ(bc.n_functions(), 2);
ASSERT_EQ(bc.get_function_sz(0), 2);
ASSERT_EQ(bc.get_function_sz(1), 1);
ASSERT_FLOAT_EQ(std::get<float>(bc.get_constant(0)), 3.14f);
ASSERT_EQ(std::get<std::string>(bc.get_constant(1)), "HELLO");
Bytecode::FunctionDef f1 = bc.get_function_def(0);
ASSERT_EQ(f1.n_params, 0);
ASSERT_EQ(f1.locals, 0);
Bytecode::FunctionDef f2 = bc.get_function_def(1);
ASSERT_EQ(f2.n_params, 2);
ASSERT_EQ(f2.locals, 1);
ASSERT_EQ(bc.get_code_byte(0, 0), 0x68);
ASSERT_EQ(bc.get_code_int8(0, 1), 42);
ASSERT_EQ(bc.get_code_byte(1, 0), 0x42);
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}