bytecode-improvements (#3)

Reviewed-on: #3
This commit was merged in pull request #3.
This commit is contained in:
2026-04-29 11:40:46 -05:00
parent d8130272a0
commit 54729c1e14
10 changed files with 151 additions and 85 deletions

View File

@@ -58,7 +58,7 @@ FetchContent_MakeAvailable(googletest)
# library # library
# #
add_library(lib${PROJECT_NAME} STATIC add_library(lib${PROJECT_NAME} SHARED
src/bytecode/bytearray.hh src/bytecode/bytearray.hh
src/bytecode/bytearray.cc src/bytecode/bytearray.cc
src/bytecode/bytecode.cc src/bytecode/bytecode.cc
@@ -73,7 +73,8 @@ target_compile_options(lib${PROJECT_NAME} PRIVATE ${warnings})
# tests # tests
# #
add_executable(${PROJECT_NAME}-bytecode-test src/bytecode/tests.cc) add_executable(${PROJECT_NAME}-bytecode-test src/bytecode/tests.cc
src/bytecode/constant.hh)
target_link_libraries(${PROJECT_NAME}-bytecode-test lib${PROJECT_NAME} gtest_main) target_link_libraries(${PROJECT_NAME}-bytecode-test lib${PROJECT_NAME} gtest_main)
add_test(NAME tyche_bytecode_test COMMAND ${PROJECT_NAME}-bytecode-test) add_test(NAME tyche_bytecode_test COMMAND ${PROJECT_NAME}-bytecode-test)

View File

@@ -8,7 +8,10 @@
- Add/retrive all types of data - Add/retrive all types of data
- Keeps no memory except for caching - Keeps no memory except for caching
- [x] Refactor bytecode code - [x] Refactor bytecode code
- [ ] Output bytecode format
Improvements:
- [x] Fixed int type (based on opcode)
- [x] Constant type (only floats and strings for now)
After some additional development: After some additional development:
- [ ] Bytecode debugging info - [ ] Bytecode debugging info

View File

@@ -1,17 +1,23 @@
Operations Operations
---------- ----------
Operations take either 0 or 1 parameter. The ones that take a parameter, it can be either a int8, int16 or int32.
The binary of the opcode is: XXYY.YYYY, where XX defines the parameter type, and YY.YYYY is the instruction. For the XX values:
00 - no parameter
01 - int8
10 - int16
11 - int32
Stack operations: (0x00~0x1f) Stack operations: (0x00~0x1f)
pushn [int] Push int pushn [int] Push int
pushr [float] Push float (real) pushc [index] Push constant
pshcn [index] Push int from constant list
pshcr [index] Push float from constant list
pshcs [index] Push string from constant list
pushf [function] Push function id pushf [function] Push function id
pushz Push zero (or false) pushz Push zero (or false)
pusht Push true pusht Push true
newa [array] Push (create) empty array newa Push (create) empty array
newt [table] Push (create) empty table newt Push (create) empty table
pop pop
dup dup

View File

@@ -1,6 +1,7 @@
#include "bytearray.hh" #include "bytearray.hh"
#include <cstring> #include <cstring>
#include <cstdio>
namespace tyche { namespace tyche {
@@ -14,14 +15,23 @@ void ByteArray::set_byte(uint32_t addr, uint8_t byte)
} }
} }
void ByteArray::set_int(uint32_t addr, int32_t value) void ByteArray::set_int8(uint32_t addr, int8_t value)
{ {
uint32_t zz = ((uint32_t)(value << 1)) ^ ((uint32_t)(value >> 31)); set_byte(addr, (uint8_t) value);
while (zz > 0x7F) { }
set_byte(addr++, (zz & 0x7F) | 0x80);
zz >>= 7; void ByteArray::set_int16(uint32_t addr, int16_t value)
} {
set_byte(addr, zz & 0x7F); set_byte(addr, (uint8_t) (value));
set_byte(addr+1, (uint8_t) (value >> 8));
}
void ByteArray::set_int32(uint32_t addr, int32_t value)
{
set_byte(addr, (uint8_t) (value));
set_byte(addr+1, (uint8_t) (value >> 8));
set_byte(addr+2, (uint8_t) (value >> 16));
set_byte(addr+3, (uint8_t) (value >> 24));
} }
void ByteArray::set_uint16(uint32_t addr, uint16_t value) void ByteArray::set_uint16(uint32_t addr, uint16_t value)
@@ -80,20 +90,23 @@ uint32_t ByteArray::get_uint32(uint32_t addr) const
| (uint32_t) get_byte(addr+3) << 24; | (uint32_t) get_byte(addr+3) << 24;
} }
std::pair<int32_t, size_t> ByteArray::get_int(uint32_t addr) const int8_t ByteArray::get_int8(uint32_t addr) const
{ {
uint32_t zz = 0; return std::bit_cast<int8_t>(get_byte(addr));
int shift = 0; }
for (size_t i = 0; shift < 35; i++) {
uint8_t byte = get_byte(addr++); int16_t ByteArray::get_int16(uint32_t addr) const
zz |= (uint32_t)(byte & 0x7F) << shift; {
if (!(byte & 0x80)) { return (uint16_t) get_byte(addr)
int32_t value = (int32_t)((zz >> 1) ^ -(zz & 1)); | (uint16_t) get_byte(addr+1) << 8;
return std::make_pair(value, (int)(i + 1)); }
}
shift += 7; int32_t ByteArray::get_int32(uint32_t addr) const
} {
throw BytecodeParsingError("Error parsing int32 at position " + std::to_string(addr)); return std::bit_cast<int32_t>((uint32_t) get_byte(addr)
| (uint32_t) get_byte(addr+1) << 8
| (uint32_t) get_byte(addr+2) << 16
| (uint32_t) get_byte(addr+3) << 24);
} }
float ByteArray::get_float(uint32_t addr) const float ByteArray::get_float(uint32_t addr) const
@@ -120,4 +133,23 @@ void ByteArray::append_bytearray(ByteArray const& bytearray)
data_.insert(data_.end(), bytearray.data().begin(), bytearray.data().end()); data_.insert(data_.end(), bytearray.data().begin(), bytearray.data().end());
} }
std::string ByteArray::hexdump() const
{
auto to_hex = [](uint32_t value, size_t n_chars) -> std::string {
char buf[15];
snprintf(buf, sizeof buf, (std::string("%0") + std::to_string(n_chars) + "X").c_str(), value);
return { buf };
};
std::string out;
for (size_t i = 0; i < data_.size(); ++i) {
if (i % 16 == 0)
out += to_hex(i, 4) + " | ";
out += to_hex(data_.at(i), 2) + " ";
if (i % 16 == 15)
out += "\n";
}
return out + "\n";
}
} }

View File

@@ -16,29 +16,37 @@ public:
void set_byte(uint32_t addr, uint8_t byte); void set_byte(uint32_t addr, uint8_t byte);
void set_uint16(uint32_t addr, uint16_t value); void set_uint16(uint32_t addr, uint16_t value);
void set_uint32(uint32_t addr, uint32_t value); void set_uint32(uint32_t addr, uint32_t value);
void set_int(uint32_t addr, int32_t value); void set_int8(uint32_t addr, int8_t value);
void set_int16(uint32_t addr, int16_t value);
void set_int32(uint32_t addr, int32_t value);
void set_float(uint32_t addr, float value); void set_float(uint32_t addr, float value);
void set_string(uint32_t addr, std::string const& str); void set_string(uint32_t addr, std::string const& str);
void set_bytearray(uint32_t addr, ByteArray const& bytearray); void set_bytearray(uint32_t addr, ByteArray const& bytearray);
void append_byte(uint8_t byte) { set_byte(data_.size(), byte); } void append_byte(uint8_t byte) { set_byte(data_.size(), byte); }
void append_uint16(uint16_t value) { set_uint16(data_.size(), value); } void append_uint16(uint16_t value) { set_uint16(data_.size(), value); }
void append_uint32(uint32_t value) { set_uint32(data_.size(), value); } void append_uint32(uint32_t value) { set_uint32(data_.size(), value); }
void append_int(int32_t value) { set_int(data_.size(), value); } void append_int8(int8_t value) { set_int8(data_.size(), value); }
void append_float(float value) { set_float(data_.size(), value); } void append_int16(int16_t value) { set_int16(data_.size(), value); }
void append_string(std::string const& str) { set_string(data_.size(), str); } void append_int32(int32_t value) { set_int32(data_.size(), value); }
void append_float(float value) { set_float(data_.size(), value); }
void append_string(std::string const& str) { set_string(data_.size(), str); }
void append_bytearray(ByteArray const& bytearray); void append_bytearray(ByteArray const& bytearray);
[[nodiscard]] uint8_t get_byte(uint32_t addr) const; [[nodiscard]] uint8_t get_byte(uint32_t addr) const;
[[nodiscard]] uint16_t get_uint16(uint32_t addr) const; [[nodiscard]] uint16_t get_uint16(uint32_t addr) const;
[[nodiscard]] uint32_t get_uint32(uint32_t addr) const; [[nodiscard]] uint32_t get_uint32(uint32_t addr) const;
[[nodiscard]] std::pair<int32_t, size_t> get_int(uint32_t addr) const; [[nodiscard]] int8_t get_int8(uint32_t addr) const;
[[nodiscard]] int16_t get_int16(uint32_t addr) const;
[[nodiscard]] int32_t get_int32(uint32_t addr) const;
[[nodiscard]] float get_float(uint32_t addr) const; [[nodiscard]] float get_float(uint32_t addr) const;
[[nodiscard]] std::pair<std::string, size_t> get_string(uint32_t addr) const; [[nodiscard]] std::pair<std::string, size_t> get_string(uint32_t addr) const;
[[nodiscard]] std::vector<uint8_t> const& data() const { return data_; } [[nodiscard]] std::vector<uint8_t> const& data() const { return data_; }
[[nodiscard]] size_t size() const { return data_.size(); } [[nodiscard]] size_t size() const { return data_.size(); }
[[nodiscard]] std::string hexdump() const;
private: private:
std::vector<uint8_t> data_ {}; std::vector<uint8_t> data_ {};
}; };

View File

@@ -39,22 +39,17 @@ uint32_t Bytecode::n_functions() const
return cache_.n_functions; return cache_.n_functions;
} }
int32_t Bytecode::get_constant_int(uint32_t idx) const ConstantValue Bytecode::get_constant(uint32_t idx) const
{ {
uint32_t constant_idx = byte_array_.get_uint32(cache_.constants_idx_addr + (idx * CONST_RECORD_SZ)); uint32_t constant_idx = byte_array_.get_uint32(cache_.constants_idx_addr + (idx * CONST_RECORD_SZ));
return byte_array_.get_int(cache_.constants_start_addr + constant_idx).first; switch ((ConstantType) byte_array_.get_byte(cache_.constants_start_addr + constant_idx)) {
} case CONST_TYPE_FLOAT:
return byte_array_.get_float(cache_.constants_start_addr + constant_idx + 1);
float Bytecode::get_constant_float(uint32_t idx) const case CONST_TYPE_STRING:
{ return byte_array_.get_string(cache_.constants_start_addr + constant_idx + 1).first;
uint32_t constant_idx = byte_array_.get_uint32(cache_.constants_idx_addr + (idx * CONST_RECORD_SZ)); default:
return byte_array_.get_float(cache_.constants_start_addr + constant_idx); throw BytecodeParsingError("Invalid bytecode format (invalid constant type)");
} }
std::string Bytecode::get_constant_string(uint32_t idx) const
{
uint32_t constant_idx = byte_array_.get_uint32(cache_.constants_idx_addr + (idx * CONST_RECORD_SZ));
return byte_array_.get_string(cache_.constants_start_addr + constant_idx).first;
} }
Bytecode::FunctionDef Bytecode::get_function_def(uint32_t function_id) const Bytecode::FunctionDef Bytecode::get_function_def(uint32_t function_id) const
@@ -76,14 +71,19 @@ uint8_t Bytecode::get_code_byte(uint32_t function_id, uint32_t idx) const
return byte_array_.get_byte(cache_.function_addr.at(function_id) + idx); return byte_array_.get_byte(cache_.function_addr.at(function_id) + idx);
} }
std::pair<int32_t, size_t> Bytecode::get_code_int(uint32_t function_id, uint32_t idx) const int8_t Bytecode::get_code_int8(uint32_t function_id, uint32_t idx) const
{ {
return byte_array_.get_int(cache_.function_addr.at(function_id) + idx); return byte_array_.get_int8(cache_.function_addr.at(function_id) + idx);
} }
float Bytecode::get_code_float(uint32_t function_id, uint32_t idx) const int16_t Bytecode::get_code_int16(uint32_t function_id, uint32_t idx) const
{ {
return byte_array_.get_float(cache_.function_addr.at(function_id) + idx); return byte_array_.get_int16(cache_.function_addr.at(function_id) + idx);
}
int32_t Bytecode::get_code_int32(uint32_t function_id, uint32_t idx) const
{
return byte_array_.get_int32(cache_.function_addr.at(function_id) + idx);
} }
ByteArray Bytecode::generate(BytecodePrototype const& bp) ByteArray Bytecode::generate(BytecodePrototype const& bp)
@@ -101,9 +101,14 @@ ByteArray Bytecode::generate(BytecodePrototype const& bp)
for (auto const& constant: bp.constants) { for (auto const& constant: bp.constants) {
constant_indexes.append_uint32(idx); constant_indexes.append_uint32(idx);
std::visit(overloaded { std::visit(overloaded {
[&](int32_t i) { raw_constants.append_int(i); }, [&](float f) {
[&](float f) { raw_constants.append_float(f); }, raw_constants.append_byte(CONST_TYPE_FLOAT);
[&](std::string const& s) { raw_constants.append_string(s); }, raw_constants.append_float(f);
},
[&](std::string const& s) {
raw_constants.append_byte(CONST_TYPE_STRING);
raw_constants.append_string(s);
},
}, constant); }, constant);
idx = raw_constants.size(); idx = raw_constants.size();
} }

View File

@@ -13,17 +13,16 @@ public:
[[nodiscard]] uint32_t n_constants() const; [[nodiscard]] uint32_t n_constants() const;
[[nodiscard]] uint32_t n_functions() const; [[nodiscard]] uint32_t n_functions() const;
[[nodiscard]] int32_t get_constant_int(uint32_t idx) const; [[nodiscard]] ConstantValue get_constant(uint32_t idx) const;
[[nodiscard]] float get_constant_float(uint32_t idx) const;
[[nodiscard]] std::string get_constant_string(uint32_t idx) const;
struct FunctionDef { uint16_t n_params, locals; }; struct FunctionDef { uint16_t n_params, locals; };
[[nodiscard]] FunctionDef get_function_def(uint32_t function_id) const; [[nodiscard]] FunctionDef get_function_def(uint32_t function_id) const;
[[nodiscard]] uint32_t get_function_sz(uint32_t function_id) const; [[nodiscard]] uint32_t get_function_sz(uint32_t function_id) const;
[[nodiscard]] uint8_t get_code_byte(uint32_t function_id, uint32_t idx) const; [[nodiscard]] uint8_t get_code_byte(uint32_t function_id, uint32_t idx) const;
[[nodiscard]] std::pair<int32_t, size_t> get_code_int(uint32_t function_id, uint32_t idx) const; [[nodiscard]] int8_t get_code_int8(uint32_t function_id, uint32_t idx) const;
[[nodiscard]] float get_code_float(uint32_t function_id, uint32_t idx) const; [[nodiscard]] int16_t get_code_int16(uint32_t function_id, uint32_t idx) const;
[[nodiscard]] int32_t get_code_int32(uint32_t function_id, uint32_t idx) const;
// TODO - debugging info // TODO - debugging info

View File

@@ -5,12 +5,11 @@
#include <string> #include <string>
#include <variant> #include <variant>
#include <vector> #include <vector>
#include "constant.hh"
namespace tyche { namespace tyche {
struct BytecodePrototype { struct BytecodePrototype {
using ConstantValue = std::variant<int32_t, float, std::string>;
struct Function { struct Function {
uint16_t n_pars; uint16_t n_pars;
uint16_t n_locals; uint16_t n_locals;

15
src/bytecode/constant.hh Normal file
View File

@@ -0,0 +1,15 @@
#ifndef TYCHE_CONSTANT_HH
#define TYCHE_CONSTANT_HH
#include <string>
#include <variant>
namespace tyche {
using ConstantValue = std::variant<float, std::string>;
enum ConstantType : uint8_t { CONST_TYPE_FLOAT = 1, CONST_TYPE_STRING = 2 };
}
#endif //TYCHE_CONSTANT_HH

View File

@@ -26,11 +26,11 @@ TEST(ByteArray, ByteArray)
ByteArray ba; ByteArray ba;
ba.set_byte(1, 0xab); ASSERT_EQ(ba.get_byte(1), 0xab); ba.set_byte(1, 0xab); ASSERT_EQ(ba.get_byte(1), 0xab);
ba.set_int(1, 12); ASSERT_EQ(ba.get_int(1), std::make_pair(12, 1)); ba.set_int8(1, 12); ASSERT_EQ(ba.get_int8(1), 12);
ba.set_int(1, -12); ASSERT_EQ(ba.get_int(1), std::make_pair(-12, 1)); ba.set_int8(1, -12); ASSERT_EQ(ba.get_int8(1), -12);
ba.set_int(1, 5000); ASSERT_EQ(ba.get_int(1), std::make_pair(5000, 2)); ba.set_int16(1, 5000); ASSERT_EQ(ba.get_int16(1), 5000);
ba.set_int(1, 5000300); ASSERT_EQ(ba.get_int(1), std::make_pair(5000300, 4)); ba.set_int32(1, 5000300); ASSERT_EQ(ba.get_int32(1), 5000300);
ba.set_int(1, -5000300); ASSERT_EQ(ba.get_int(1), std::make_pair(-5000300, 4)); ba.set_int32(1, -5000300); ASSERT_EQ(ba.get_int32(1), -5000300);
ba.set_float(1, 3.14); ASSERT_FLOAT_EQ(ba.get_float(1), 3.14); ba.set_float(1, 3.14); ASSERT_FLOAT_EQ(ba.get_float(1), 3.14);
ba.set_float(1, -3.14); ASSERT_FLOAT_EQ(ba.get_float(1), -3.14); ba.set_float(1, -3.14); ASSERT_FLOAT_EQ(ba.get_float(1), -3.14);
@@ -44,7 +44,7 @@ TEST(ByteArray, ByteArray)
TEST(Bytecode, Constants) TEST(Bytecode, Constants)
{ {
BytecodePrototype bp; BytecodePrototype bp;
bp.constants.emplace_back(42); bp.constants.emplace_back(42.3f);
bp.constants.emplace_back("HELLO"); bp.constants.emplace_back("HELLO");
std::vector<uint8_t> expected = { std::vector<uint8_t> expected = {
@@ -57,7 +57,7 @@ TEST(Bytecode, Constants)
// index // index
0x50, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, // constant index 0x50, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, // constant index
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // function undex 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // function undex
0x58, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, // raw constants 0x58, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, // raw constants
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // raw code 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // raw code
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -66,14 +66,14 @@ TEST(Bytecode, Constants)
// constant indexes // constant indexes
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
// constant values // constant values
0x54, 'H', 'E', 'L', 'L', 'O', 0x00 CONST_TYPE_FLOAT, 0x33, 0x33, 0x29, 0x42, // float: 42.3f
CONST_TYPE_STRING, 'H', 'E', 'L', 'L', 'O', 0x00
}; };
ByteArray ba = Bytecode::generate(bp); ByteArray ba = Bytecode::generate(bp);
// print(ba.data()); print(expected);
ASSERT_EQ(ba.data(), expected); ASSERT_EQ(ba.data(), expected);
} }
@@ -82,7 +82,7 @@ TEST(Bytecode, Code)
BytecodePrototype bp; BytecodePrototype bp;
auto& f = bp.functions.emplace_back(0, 0); auto& f = bp.functions.emplace_back(0, 0);
f.code.append_byte(0x68); f.code.append_byte(0x68);
f.code.append_int(42); f.code.append_int8(42);
auto& f2 = bp.functions.emplace_back(2, 1); auto& f2 = bp.functions.emplace_back(2, 1);
f2.code.append_byte(0x42); f2.code.append_byte(0x42);
@@ -109,7 +109,7 @@ TEST(Bytecode, Code)
0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00,
// code // code
0x68, 0x54, 0x42, 0x68, 42, 0x42,
}; };
ByteArray ba = Bytecode::generate(bp); ByteArray ba = Bytecode::generate(bp);
@@ -122,13 +122,12 @@ TEST(Bytecode, Parsing)
BytecodePrototype bp; BytecodePrototype bp;
bp.constants.emplace_back(42);
bp.constants.emplace_back(3.14f); bp.constants.emplace_back(3.14f);
bp.constants.emplace_back("HELLO"); bp.constants.emplace_back("HELLO");
auto& f = bp.functions.emplace_back(0, 0); auto& f = bp.functions.emplace_back(0, 0);
f.code.append_byte(0x68); f.code.append_byte(0x68);
f.code.append_int(42); f.code.append_int8(42);
auto& ff = bp.functions.emplace_back(2, 1); auto& ff = bp.functions.emplace_back(2, 1);
ff.code.append_byte(0x42); ff.code.append_byte(0x42);
@@ -140,12 +139,11 @@ TEST(Bytecode, Parsing)
Bytecode bc(std::move(ba)); Bytecode bc(std::move(ba));
ASSERT_EQ(bc.n_constants(), 3); ASSERT_EQ(bc.n_constants(), 2);
ASSERT_EQ(bc.n_functions(), 2); ASSERT_EQ(bc.n_functions(), 2);
ASSERT_EQ(bc.get_constant_int(0), 42); ASSERT_FLOAT_EQ(std::get<float>(bc.get_constant(0)), 3.14f);
ASSERT_FLOAT_EQ(bc.get_constant_float(1), 3.14f); ASSERT_EQ(std::get<std::string>(bc.get_constant(1)), "HELLO");
ASSERT_EQ(bc.get_constant_string(2), "HELLO");
Bytecode::FunctionDef f1 = bc.get_function_def(0); Bytecode::FunctionDef f1 = bc.get_function_def(0);
ASSERT_EQ(f1.n_params, 0); ASSERT_EQ(f1.n_params, 0);
@@ -156,7 +154,7 @@ TEST(Bytecode, Parsing)
ASSERT_EQ(f2.locals, 1); ASSERT_EQ(f2.locals, 1);
ASSERT_EQ(bc.get_code_byte(0, 0), 0x68); ASSERT_EQ(bc.get_code_byte(0, 0), 0x68);
ASSERT_EQ(bc.get_code_int(0, 1), std::make_pair(42, 1)); ASSERT_EQ(bc.get_code_int8(0, 1), 42);
ASSERT_EQ(bc.get_code_byte(1, 0), 0x42); ASSERT_EQ(bc.get_code_byte(1, 0), 0x42);
} }