bytecode-improvements #3

Merged
andre merged 3 commits from bytecode-improvements into master 2026-04-29 11:40:46 -05:00
10 changed files with 151 additions and 85 deletions

View File

@@ -58,7 +58,7 @@ FetchContent_MakeAvailable(googletest)
# library
#
add_library(lib${PROJECT_NAME} STATIC
add_library(lib${PROJECT_NAME} SHARED
src/bytecode/bytearray.hh
src/bytecode/bytearray.cc
src/bytecode/bytecode.cc
@@ -73,7 +73,8 @@ target_compile_options(lib${PROJECT_NAME} PRIVATE ${warnings})
# tests
#
add_executable(${PROJECT_NAME}-bytecode-test src/bytecode/tests.cc)
add_executable(${PROJECT_NAME}-bytecode-test src/bytecode/tests.cc
src/bytecode/constant.hh)
target_link_libraries(${PROJECT_NAME}-bytecode-test lib${PROJECT_NAME} gtest_main)
add_test(NAME tyche_bytecode_test COMMAND ${PROJECT_NAME}-bytecode-test)

View File

@@ -8,7 +8,10 @@
- Add/retrive all types of data
- Keeps no memory except for caching
- [x] Refactor bytecode code
- [ ] Output bytecode format
Improvements:
- [x] Fixed int type (based on opcode)
- [x] Constant type (only floats and strings for now)
After some additional development:
- [ ] Bytecode debugging info

View File

@@ -1,17 +1,23 @@
Operations
----------
Operations take either 0 or 1 parameter. The ones that take a parameter, it can be either a int8, int16 or int32.
The binary of the opcode is: XXYY.YYYY, where XX defines the parameter type, and YY.YYYY is the instruction. For the XX values:
00 - no parameter
01 - int8
10 - int16
11 - int32
Stack operations: (0x00~0x1f)
pushn [int] Push int
pushr [float] Push float (real)
pshcn [index] Push int from constant list
pshcr [index] Push float from constant list
pshcs [index] Push string from constant list
pushc [index] Push constant
pushf [function] Push function id
pushz Push zero (or false)
pusht Push true
newa [array] Push (create) empty array
newt [table] Push (create) empty table
newa Push (create) empty array
newt Push (create) empty table
pop
dup

View File

@@ -1,6 +1,7 @@
#include "bytearray.hh"
#include <cstring>
#include <cstdio>
namespace tyche {
@@ -14,14 +15,23 @@ void ByteArray::set_byte(uint32_t addr, uint8_t byte)
}
}
void ByteArray::set_int(uint32_t addr, int32_t value)
void ByteArray::set_int8(uint32_t addr, int8_t value)
{
uint32_t zz = ((uint32_t)(value << 1)) ^ ((uint32_t)(value >> 31));
while (zz > 0x7F) {
set_byte(addr++, (zz & 0x7F) | 0x80);
zz >>= 7;
}
set_byte(addr, zz & 0x7F);
set_byte(addr, (uint8_t) value);
}
void ByteArray::set_int16(uint32_t addr, int16_t value)
{
set_byte(addr, (uint8_t) (value));
set_byte(addr+1, (uint8_t) (value >> 8));
}
void ByteArray::set_int32(uint32_t addr, int32_t value)
{
set_byte(addr, (uint8_t) (value));
set_byte(addr+1, (uint8_t) (value >> 8));
set_byte(addr+2, (uint8_t) (value >> 16));
set_byte(addr+3, (uint8_t) (value >> 24));
}
void ByteArray::set_uint16(uint32_t addr, uint16_t value)
@@ -80,20 +90,23 @@ uint32_t ByteArray::get_uint32(uint32_t addr) const
| (uint32_t) get_byte(addr+3) << 24;
}
std::pair<int32_t, size_t> ByteArray::get_int(uint32_t addr) const
int8_t ByteArray::get_int8(uint32_t addr) const
{
uint32_t zz = 0;
int shift = 0;
for (size_t i = 0; shift < 35; i++) {
uint8_t byte = get_byte(addr++);
zz |= (uint32_t)(byte & 0x7F) << shift;
if (!(byte & 0x80)) {
int32_t value = (int32_t)((zz >> 1) ^ -(zz & 1));
return std::make_pair(value, (int)(i + 1));
}
shift += 7;
}
throw BytecodeParsingError("Error parsing int32 at position " + std::to_string(addr));
return std::bit_cast<int8_t>(get_byte(addr));
}
int16_t ByteArray::get_int16(uint32_t addr) const
{
return (uint16_t) get_byte(addr)
| (uint16_t) get_byte(addr+1) << 8;
}
int32_t ByteArray::get_int32(uint32_t addr) const
{
return std::bit_cast<int32_t>((uint32_t) get_byte(addr)
| (uint32_t) get_byte(addr+1) << 8
| (uint32_t) get_byte(addr+2) << 16
| (uint32_t) get_byte(addr+3) << 24);
}
float ByteArray::get_float(uint32_t addr) const
@@ -120,4 +133,23 @@ void ByteArray::append_bytearray(ByteArray const& bytearray)
data_.insert(data_.end(), bytearray.data().begin(), bytearray.data().end());
}
std::string ByteArray::hexdump() const
{
auto to_hex = [](uint32_t value, size_t n_chars) -> std::string {
char buf[15];
snprintf(buf, sizeof buf, (std::string("%0") + std::to_string(n_chars) + "X").c_str(), value);
return { buf };
};
std::string out;
for (size_t i = 0; i < data_.size(); ++i) {
if (i % 16 == 0)
out += to_hex(i, 4) + " | ";
out += to_hex(data_.at(i), 2) + " ";
if (i % 16 == 15)
out += "\n";
}
return out + "\n";
}
}

View File

@@ -16,29 +16,37 @@ public:
void set_byte(uint32_t addr, uint8_t byte);
void set_uint16(uint32_t addr, uint16_t value);
void set_uint32(uint32_t addr, uint32_t value);
void set_int(uint32_t addr, int32_t value);
void set_int8(uint32_t addr, int8_t value);
void set_int16(uint32_t addr, int16_t value);
void set_int32(uint32_t addr, int32_t value);
void set_float(uint32_t addr, float value);
void set_string(uint32_t addr, std::string const& str);
void set_bytearray(uint32_t addr, ByteArray const& bytearray);
void append_byte(uint8_t byte) { set_byte(data_.size(), byte); }
void append_uint16(uint16_t value) { set_uint16(data_.size(), value); }
void append_uint32(uint32_t value) { set_uint32(data_.size(), value); }
void append_int(int32_t value) { set_int(data_.size(), value); }
void append_float(float value) { set_float(data_.size(), value); }
void append_string(std::string const& str) { set_string(data_.size(), str); }
void append_byte(uint8_t byte) { set_byte(data_.size(), byte); }
void append_uint16(uint16_t value) { set_uint16(data_.size(), value); }
void append_uint32(uint32_t value) { set_uint32(data_.size(), value); }
void append_int8(int8_t value) { set_int8(data_.size(), value); }
void append_int16(int16_t value) { set_int16(data_.size(), value); }
void append_int32(int32_t value) { set_int32(data_.size(), value); }
void append_float(float value) { set_float(data_.size(), value); }
void append_string(std::string const& str) { set_string(data_.size(), str); }
void append_bytearray(ByteArray const& bytearray);
[[nodiscard]] uint8_t get_byte(uint32_t addr) const;
[[nodiscard]] uint16_t get_uint16(uint32_t addr) const;
[[nodiscard]] uint32_t get_uint32(uint32_t addr) const;
[[nodiscard]] std::pair<int32_t, size_t> get_int(uint32_t addr) const;
[[nodiscard]] int8_t get_int8(uint32_t addr) const;
[[nodiscard]] int16_t get_int16(uint32_t addr) const;
[[nodiscard]] int32_t get_int32(uint32_t addr) const;
[[nodiscard]] float get_float(uint32_t addr) const;
[[nodiscard]] std::pair<std::string, size_t> get_string(uint32_t addr) const;
[[nodiscard]] std::vector<uint8_t> const& data() const { return data_; }
[[nodiscard]] size_t size() const { return data_.size(); }
[[nodiscard]] std::string hexdump() const;
private:
std::vector<uint8_t> data_ {};
};

View File

@@ -39,22 +39,17 @@ uint32_t Bytecode::n_functions() const
return cache_.n_functions;
}
int32_t Bytecode::get_constant_int(uint32_t idx) const
ConstantValue Bytecode::get_constant(uint32_t idx) const
{
uint32_t constant_idx = byte_array_.get_uint32(cache_.constants_idx_addr + (idx * CONST_RECORD_SZ));
return byte_array_.get_int(cache_.constants_start_addr + constant_idx).first;
}
float Bytecode::get_constant_float(uint32_t idx) const
{
uint32_t constant_idx = byte_array_.get_uint32(cache_.constants_idx_addr + (idx * CONST_RECORD_SZ));
return byte_array_.get_float(cache_.constants_start_addr + constant_idx);
}
std::string Bytecode::get_constant_string(uint32_t idx) const
{
uint32_t constant_idx = byte_array_.get_uint32(cache_.constants_idx_addr + (idx * CONST_RECORD_SZ));
return byte_array_.get_string(cache_.constants_start_addr + constant_idx).first;
switch ((ConstantType) byte_array_.get_byte(cache_.constants_start_addr + constant_idx)) {
case CONST_TYPE_FLOAT:
return byte_array_.get_float(cache_.constants_start_addr + constant_idx + 1);
case CONST_TYPE_STRING:
return byte_array_.get_string(cache_.constants_start_addr + constant_idx + 1).first;
default:
throw BytecodeParsingError("Invalid bytecode format (invalid constant type)");
}
}
Bytecode::FunctionDef Bytecode::get_function_def(uint32_t function_id) const
@@ -76,14 +71,19 @@ uint8_t Bytecode::get_code_byte(uint32_t function_id, uint32_t idx) const
return byte_array_.get_byte(cache_.function_addr.at(function_id) + idx);
}
std::pair<int32_t, size_t> Bytecode::get_code_int(uint32_t function_id, uint32_t idx) const
int8_t Bytecode::get_code_int8(uint32_t function_id, uint32_t idx) const
{
return byte_array_.get_int(cache_.function_addr.at(function_id) + idx);
return byte_array_.get_int8(cache_.function_addr.at(function_id) + idx);
}
float Bytecode::get_code_float(uint32_t function_id, uint32_t idx) const
int16_t Bytecode::get_code_int16(uint32_t function_id, uint32_t idx) const
{
return byte_array_.get_float(cache_.function_addr.at(function_id) + idx);
return byte_array_.get_int16(cache_.function_addr.at(function_id) + idx);
}
int32_t Bytecode::get_code_int32(uint32_t function_id, uint32_t idx) const
{
return byte_array_.get_int32(cache_.function_addr.at(function_id) + idx);
}
ByteArray Bytecode::generate(BytecodePrototype const& bp)
@@ -101,9 +101,14 @@ ByteArray Bytecode::generate(BytecodePrototype const& bp)
for (auto const& constant: bp.constants) {
constant_indexes.append_uint32(idx);
std::visit(overloaded {
[&](int32_t i) { raw_constants.append_int(i); },
[&](float f) { raw_constants.append_float(f); },
[&](std::string const& s) { raw_constants.append_string(s); },
[&](float f) {
raw_constants.append_byte(CONST_TYPE_FLOAT);
raw_constants.append_float(f);
},
[&](std::string const& s) {
raw_constants.append_byte(CONST_TYPE_STRING);
raw_constants.append_string(s);
},
}, constant);
idx = raw_constants.size();
}

View File

@@ -13,17 +13,16 @@ public:
[[nodiscard]] uint32_t n_constants() const;
[[nodiscard]] uint32_t n_functions() const;
[[nodiscard]] int32_t get_constant_int(uint32_t idx) const;
[[nodiscard]] float get_constant_float(uint32_t idx) const;
[[nodiscard]] std::string get_constant_string(uint32_t idx) const;
[[nodiscard]] ConstantValue get_constant(uint32_t idx) const;
struct FunctionDef { uint16_t n_params, locals; };
[[nodiscard]] FunctionDef get_function_def(uint32_t function_id) const;
[[nodiscard]] uint32_t get_function_sz(uint32_t function_id) const;
[[nodiscard]] uint8_t get_code_byte(uint32_t function_id, uint32_t idx) const;
[[nodiscard]] std::pair<int32_t, size_t> get_code_int(uint32_t function_id, uint32_t idx) const;
[[nodiscard]] float get_code_float(uint32_t function_id, uint32_t idx) const;
[[nodiscard]] uint8_t get_code_byte(uint32_t function_id, uint32_t idx) const;
[[nodiscard]] int8_t get_code_int8(uint32_t function_id, uint32_t idx) const;
[[nodiscard]] int16_t get_code_int16(uint32_t function_id, uint32_t idx) const;
[[nodiscard]] int32_t get_code_int32(uint32_t function_id, uint32_t idx) const;
// TODO - debugging info

View File

@@ -5,12 +5,11 @@
#include <string>
#include <variant>
#include <vector>
#include "constant.hh"
namespace tyche {
struct BytecodePrototype {
using ConstantValue = std::variant<int32_t, float, std::string>;
struct Function {
uint16_t n_pars;
uint16_t n_locals;

15
src/bytecode/constant.hh Normal file
View File

@@ -0,0 +1,15 @@
#ifndef TYCHE_CONSTANT_HH
#define TYCHE_CONSTANT_HH
#include <string>
#include <variant>
namespace tyche {
using ConstantValue = std::variant<float, std::string>;
enum ConstantType : uint8_t { CONST_TYPE_FLOAT = 1, CONST_TYPE_STRING = 2 };
}
#endif //TYCHE_CONSTANT_HH

View File

@@ -26,11 +26,11 @@ TEST(ByteArray, ByteArray)
ByteArray ba;
ba.set_byte(1, 0xab); ASSERT_EQ(ba.get_byte(1), 0xab);
ba.set_int(1, 12); ASSERT_EQ(ba.get_int(1), std::make_pair(12, 1));
ba.set_int(1, -12); ASSERT_EQ(ba.get_int(1), std::make_pair(-12, 1));
ba.set_int(1, 5000); ASSERT_EQ(ba.get_int(1), std::make_pair(5000, 2));
ba.set_int(1, 5000300); ASSERT_EQ(ba.get_int(1), std::make_pair(5000300, 4));
ba.set_int(1, -5000300); ASSERT_EQ(ba.get_int(1), std::make_pair(-5000300, 4));
ba.set_int8(1, 12); ASSERT_EQ(ba.get_int8(1), 12);
ba.set_int8(1, -12); ASSERT_EQ(ba.get_int8(1), -12);
ba.set_int16(1, 5000); ASSERT_EQ(ba.get_int16(1), 5000);
ba.set_int32(1, 5000300); ASSERT_EQ(ba.get_int32(1), 5000300);
ba.set_int32(1, -5000300); ASSERT_EQ(ba.get_int32(1), -5000300);
ba.set_float(1, 3.14); ASSERT_FLOAT_EQ(ba.get_float(1), 3.14);
ba.set_float(1, -3.14); ASSERT_FLOAT_EQ(ba.get_float(1), -3.14);
@@ -44,7 +44,7 @@ TEST(ByteArray, ByteArray)
TEST(Bytecode, Constants)
{
BytecodePrototype bp;
bp.constants.emplace_back(42);
bp.constants.emplace_back(42.3f);
bp.constants.emplace_back("HELLO");
std::vector<uint8_t> expected = {
@@ -57,7 +57,7 @@ TEST(Bytecode, Constants)
// index
0x50, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, // constant index
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // function undex
0x58, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, // raw constants
0x58, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, // raw constants
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // raw code
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -66,14 +66,14 @@ TEST(Bytecode, Constants)
// constant indexes
0x00, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00,
0x05, 0x00, 0x00, 0x00,
// constant values
0x54, 'H', 'E', 'L', 'L', 'O', 0x00
CONST_TYPE_FLOAT, 0x33, 0x33, 0x29, 0x42, // float: 42.3f
CONST_TYPE_STRING, 'H', 'E', 'L', 'L', 'O', 0x00
};
ByteArray ba = Bytecode::generate(bp);
// print(ba.data()); print(expected);
ASSERT_EQ(ba.data(), expected);
}
@@ -82,7 +82,7 @@ TEST(Bytecode, Code)
BytecodePrototype bp;
auto& f = bp.functions.emplace_back(0, 0);
f.code.append_byte(0x68);
f.code.append_int(42);
f.code.append_int8(42);
auto& f2 = bp.functions.emplace_back(2, 1);
f2.code.append_byte(0x42);
@@ -109,7 +109,7 @@ TEST(Bytecode, Code)
0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00,
// code
0x68, 0x54, 0x42,
0x68, 42, 0x42,
};
ByteArray ba = Bytecode::generate(bp);
@@ -122,13 +122,12 @@ TEST(Bytecode, Parsing)
BytecodePrototype bp;
bp.constants.emplace_back(42);
bp.constants.emplace_back(3.14f);
bp.constants.emplace_back("HELLO");
auto& f = bp.functions.emplace_back(0, 0);
f.code.append_byte(0x68);
f.code.append_int(42);
f.code.append_int8(42);
auto& ff = bp.functions.emplace_back(2, 1);
ff.code.append_byte(0x42);
@@ -140,12 +139,11 @@ TEST(Bytecode, Parsing)
Bytecode bc(std::move(ba));
ASSERT_EQ(bc.n_constants(), 3);
ASSERT_EQ(bc.n_constants(), 2);
ASSERT_EQ(bc.n_functions(), 2);
ASSERT_EQ(bc.get_constant_int(0), 42);
ASSERT_FLOAT_EQ(bc.get_constant_float(1), 3.14f);
ASSERT_EQ(bc.get_constant_string(2), "HELLO");
ASSERT_FLOAT_EQ(std::get<float>(bc.get_constant(0)), 3.14f);
ASSERT_EQ(std::get<std::string>(bc.get_constant(1)), "HELLO");
Bytecode::FunctionDef f1 = bc.get_function_def(0);
ASSERT_EQ(f1.n_params, 0);
@@ -156,7 +154,7 @@ TEST(Bytecode, Parsing)
ASSERT_EQ(f2.locals, 1);
ASSERT_EQ(bc.get_code_byte(0, 0), 0x68);
ASSERT_EQ(bc.get_code_int(0, 1), std::make_pair(42, 1));
ASSERT_EQ(bc.get_code_int8(0, 1), 42);
ASSERT_EQ(bc.get_code_byte(1, 0), 0x42);
}