code #4

Merged
andre merged 5 commits from code into master 2026-04-29 14:44:34 -05:00
13 changed files with 424 additions and 79 deletions

View File

@@ -65,6 +65,11 @@ add_library(lib${PROJECT_NAME} SHARED
src/bytecode/bytecode.hh
src/bytecode/bytecodeprototype.hh
src/common/overloaded.hh
src/vm/code.cc
src/vm/code.hh
src/bytecode/constant.hh
src/vm/instruction.hh
src/vm/instruction.cc
)
target_compile_options(lib${PROJECT_NAME} PRIVATE ${warnings})
@@ -73,21 +78,24 @@ target_compile_options(lib${PROJECT_NAME} PRIVATE ${warnings})
# tests
#
add_executable(${PROJECT_NAME}-bytecode-test src/bytecode/tests.cc
src/bytecode/constant.hh)
add_executable(${PROJECT_NAME}-bytecode-test src/bytecode/tests.cc)
target_link_libraries(${PROJECT_NAME}-bytecode-test lib${PROJECT_NAME} gtest_main)
add_test(NAME tyche_bytecode_test COMMAND ${PROJECT_NAME}-bytecode-test)
add_executable(${PROJECT_NAME}-vm-test src/vm/tests.cc)
target_link_libraries(${PROJECT_NAME}-vm-test lib${PROJECT_NAME} gtest_main)
add_test(NAME tyche_vm_test COMMAND ${PROJECT_NAME}-vm-test)
#
# check for leaks
#
add_custom_target(leaks)
add_custom_command(TARGET leaks
add_custom_target(leaks-vm-test)
add_custom_command(TARGET leaks-vm-test
POST_BUILD
COMMENT "Check for leaks using valgrind."
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
COMMAND valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --suppressions=${CMAKE_SOURCE_DIR}/valgrind.supp ./${PROJECT_NAME}
COMMAND valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --suppressions=${CMAKE_SOURCE_DIR}/valgrind.supp ./${PROJECT_NAME}-vm-test
)
#

View File

@@ -20,9 +20,9 @@ After some additional development:
## VM
- [ ] VM
- [ ] Code
- [ ] Simple bytecode loader
- [ ] Output bytecode format
- [x] Code
- [x] Simple bytecode loader
- [x] Output bytecode format
- [ ] Value object
- [ ] Stack object
- [ ] Function context

View File

@@ -3,77 +3,86 @@ Operations
Operations take either 0 or 1 parameter. The ones that take a parameter, it can be either a int8, int16 or int32.
The binary of the opcode is: XXYY.YYYY, where XX defines the parameter type, and YY.YYYY is the instruction. For the XX values:
Instructions follow this logic:
00 - no parameter
01 - int8
10 - int16
11 - int32
00 ~ 9F : no parameter
A0 ~ BF : int8 (1 byte)
C0 ~ DF : int16 (2 bytes)
E0 ~ FF : int32 (4 bytes)
Stack operations: (0x00~0x1f)
pushn [int] Push int
pushc [index] Push constant
pushf [function] Push function id
pushz Push zero (or false)
pusht Push true
newa Push (create) empty array
newt Push (create) empty table
pop
dup
The operations of 1, 2 and 4 bytes are always interchangeable by adding/subtracting 0x20.
Local variables: (0x20~0x2f)
setl [int] Set stack top as indexed local variable
getl [int] Get indexed local variable and place on stack
setg [int] Set global variable
getg [int] Get global variable
,----------- no parameter
| ,-------- int8
| | ,----- int16
| | | ,-- int32
NP I8 I16 I32 Opc Instruction Description
Function operations: (0x30~0x3f)
call [n_pars] Enter function on stack toplevel (passing n next stack values as parameters)
ret Leave a function (return value in stack)
retn Leave a function (return nil)
Stack operations:
a0 c0 e0 pushi [int] Push int
a1 c1 e1 pushc [index] Push constant
a2 c2 e2 pushf [function] Push function id
00 pushz Push zero (or false)
01 pusht Push true
02 newa Push (create) empty array
03 newt Push (create) empty table
04 pop
05 dup
Control flow: (0x40~0x4f)
bz [pc] Branch if zero
bnz [pc] Branch if not zero
jmp [pc] Unconditional jump
Local variables:
a3 c3 e3 setl [int] Set stack top as indexed local variable
a4 c4 e4 getl [int] Get indexed local variable and place on stack
a5 c5 e5 setg [int] Set global variable
a6 c6 e6 getg [int] Get global variable
Function operations:
a7 c7 e7 call [n_pars] Enter function on stack toplevel (passing n next stack values as parameters)
10 ret Leave a function (return value in stack)
11 retn Leave a function (return nil)
Table and array operations:
16 getkv Get table's value based on key (pull 1 value, push 1 value)
17 setkv Set table's key and value (pull 2 values from stack)
18 geta Get array's position value
19 seta Set array's position value (pull 2 values from stack)
1a appnd Add value to the end of array
1b next Push the next pair into the stack (for loops)
1c smt Set value metatable
1d mt Get value metatable
Logical/arithmetic:
20 sum Sum top 2 values in stack
21 sub Subtract top 2 values in stack
22 mul Multiply top 2 values in stack
23 div Float division
24 idiv Integer division
25 eq Equality
26 neq Inequality
27 lt Less than
28 lte Less than or equals
29 gt Greater than
2a gte Greater than or equals
2b and Bitwise AND
2c or Bitwise OR
2d xor Bitwise XOR
Other value operations:
30 len Get table, array or string size
31 type Get type from value at the top of the stack
b0 cast [type] Cast type to another type
32 ver Return VM version
External code:
38 cmpl Compile code to assembly
39 asmbl Assemble code to bytecode format
3a load Load bytecode as function (will place function on stack)
Control flow:
a8 c8 e8 bz [pc] Branch if zero
a9 c9 e9 bnz [pc] Branch if not zero
aa ca ea jmp [pc] Unconditional jump
* Jumps can only happen within the same function.
Logical/arithmetic: (0x50~0x6f)
sum Sum top 2 values in stack
sub Subtract top 2 values in stack
mul Multiply top 2 values in stack
div Float division
idiv Integer division
eq Equality
neq Inequality
lt Less than
lte Less than or equals
gt Greater than
gte Greater than or equals
and Bitwise AND
or Bitwise OR
xor Bitwise XOR
Table and array operations: (0x70~07xf)
getkv Get table's value based on key (pull 1 value, push 1 value)
setkv Set table's key and value (pull 2 values from stack)
geta Get array's position value
seta Set array's position value (pull 2 values from stack)
appnd Add value to the end of array
next Push the next pair into the stack (for loops)
smt Set value metatable
mt Get value metatable
Other value operations: (0x80~0x8f)
len Get table, array or string size
type Get type from value at the top of the stack
cast [type] Cast type to another type
ver Return VM version
External code: (0x90~0x9f)
cmpl Compile code to assembly
asmbl Assemble code to bytecode format
load Load bytecode as function (will place function on stack)
Error handling: (0xa0~0xaf)
???

View File

@@ -7,12 +7,9 @@ namespace tyche {
void ByteArray::set_byte(uint32_t addr, uint8_t byte)
{
try {
data_.at(addr) = byte;
} catch (std::out_of_range&) {
if (data_.size() < (addr + 1))
data_.resize(addr + 1, 0);
data_.at(addr) = byte;
}
data_.at(addr) = byte;
}
void ByteArray::set_int8(uint32_t addr, int8_t value)

View File

@@ -25,7 +25,7 @@ Bytecode::Bytecode(ByteArray ba)
uint32_t code_start = byte_array_.get_uint32(TOC_START + (3 * TOC_RECORD_SZ));
for (uint32_t i = 0; i < cache_.n_functions; ++i) {
cache_.function_addr.emplace_back(code_start + byte_array_.get_uint32(cache_.functions_idx_addr + (i * FUNCTION_RECORD_SZ)));
cache_.function_sz.emplace_back(code_start + byte_array_.get_uint32(cache_.functions_idx_addr + (i * FUNCTION_RECORD_SZ) + 8));
cache_.function_sz.emplace_back(byte_array_.get_uint32(cache_.functions_idx_addr + (i * FUNCTION_RECORD_SZ) + 8));
}
}

View File

@@ -8,6 +8,7 @@ namespace tyche {
class Bytecode {
public:
Bytecode() = default;
explicit Bytecode(ByteArray ba);
[[nodiscard]] uint32_t n_constants() const;

View File

@@ -6,6 +6,7 @@
#include <variant>
#include <vector>
#include "constant.hh"
#include "bytearray.hh"
namespace tyche {

View File

@@ -1,5 +1,4 @@
#include "gtest/gtest.h"
#include "gmock/gmock.h"
#include <cstring>
#include <functional>
@@ -141,6 +140,8 @@ TEST(Bytecode, Parsing)
ASSERT_EQ(bc.n_constants(), 2);
ASSERT_EQ(bc.n_functions(), 2);
ASSERT_EQ(bc.get_function_sz(0), 2);
ASSERT_EQ(bc.get_function_sz(1), 1);
ASSERT_FLOAT_EQ(std::get<float>(bc.get_constant(0)), 3.14f);
ASSERT_EQ(std::get<std::string>(bc.get_constant(1)), "HELLO");

43
src/vm/code.cc Normal file
View File

@@ -0,0 +1,43 @@
#include "code.hh"
#include "../common/overloaded.hh"
#include "instruction.hh"
namespace tyche {
void Code::import_bytecode(ByteArray incoming)
{
Bytecode bc(std::move(incoming));
// TODO - adjust function calls, constants
bytecode_ = std::move(bc);
}
std::string Code::disassemble() const
{
std::string out;
out += ".const\n";
for (size_t i = 0; i < bytecode_.n_constants(); ++i) {
out += "\t" + std::to_string(i) + ": ";
std::visit(overloaded {
[&out](float f) { out += std::to_string(f); },
[&out](std::string const& str) { out += "\"" + str + "\""; },
}, bytecode_.get_constant(i));
out += "\n";
}
out += "\n";
for (size_t i = 0; i < bytecode_.n_functions(); ++i) {
out += ".func " + std::to_string(i) + "\n";
uint32_t addr = 0;
while (addr < bytecode_.get_function_sz(i)) {
auto [op, sz] = debug_instruction(bytecode_, i, addr);
out += "\t" + op + "\n";
addr += sz;
}
}
return out;
}
} // tyche

20
src/vm/code.hh Normal file
View File

@@ -0,0 +1,20 @@
#ifndef TYCHE_CODE_HH
#define TYCHE_CODE_HH
#include "../bytecode/bytecode.hh"
namespace tyche {
class Code {
public:
void import_bytecode(ByteArray incoming);
[[nodiscard]] std::string disassemble() const;
private:
Bytecode bytecode_;
};
} // tyche
#endif //TYCHE_CODE_HH

127
src/vm/instruction.cc Normal file
View File

@@ -0,0 +1,127 @@
#include "instruction.hh"
namespace tyche {
std::pair<std::string, size_t> debug_instruction(Instruction inst, int oper)
{
std::string out;
switch (inst) {
case Instruction::PushInt8:
case Instruction::PushInt16:
case Instruction::PushInt32:
out = "pushi";
break;
case Instruction::PushConstant8:
case Instruction::PushConstant16:
case Instruction::PushConstant32:
out = "pushc";
break;
case Instruction::PushZero: out = "pushz"; break;
case Instruction::PushTrue: out = "pusht"; break;
case Instruction::NewArray: out = "newa"; break;
case Instruction::NewTable: out = "newt"; break;
case Instruction::Pop: out = "pop"; break;
case Instruction::Duplicate: out = "dup"; break;
case Instruction::SetLocal8:
case Instruction::SetLocal16:
case Instruction::SetLocal32:
out = "setl";
break;
case Instruction::GetLocal8:
case Instruction::GetLocal16:
case Instruction::GetLocal32:
out = "getl";
break;
case Instruction::SetGlobal8:
case Instruction::SetGlobal16:
case Instruction::SetGlobal32:
out = "setg";
break;
case Instruction::GetGlobal8:
case Instruction::GetGlobal16:
case Instruction::GetGlobal32:
out = "getg";
break;
case Instruction::Call8:
case Instruction::Call16:
case Instruction::Call32:
out = "call";
break;
case Instruction::Return: out = "ret"; break;
case Instruction::ReturnNil: out = "retn"; break;
case Instruction::GetKeyValue: out = "getkv"; break;
case Instruction::SetKeyValue: out = "setkv"; break;
case Instruction::GetArrayItem: out = "geta"; break;
case Instruction::SetArrayItem: out = "seta"; break;
case Instruction::Append: out = "appnd"; break;
case Instruction::Next: out = "next"; break;
case Instruction::SetMetatable: out = "smt"; break;
case Instruction::GetMetatable: out = "mt"; break;
case Instruction::Sum: out = "sum"; break;
case Instruction::Subtract: out = "sub"; break;
case Instruction::Multiply: out = "mul"; break;
case Instruction::Divide: out = "div"; break;
case Instruction::DivideInt: out = "idiv"; break;
case Instruction::Equals: out = "eq"; break;
case Instruction::NotEquals: out = "neq"; break;
case Instruction::LessThan: out = "lt"; break;
case Instruction::LessThanEq: out = "lte"; break;
case Instruction::GreaterThan: out = "gt"; break;
case Instruction::GreaterThanEq: out = "gte"; break;
case Instruction::And: out = "and"; break;
case Instruction::Or: out = "or"; break;
case Instruction::Xor: out = "xor"; break;
case Instruction::Len: out = "len"; break;
case Instruction::Type: out = "type"; break;
case Instruction::Cast: out = "cast"; break;
case Instruction::Version: out = "ver"; break;
case Instruction::BranchIfZero8:
case Instruction::BranchIfZero16:
case Instruction::BranchIfZero32:
out = "bz";
break;
case Instruction::BranchIfNotZero8:
case Instruction::BranchIfNotZero16:
case Instruction::BranchIfNotZero32:
out = "bnz";
break;
case Instruction::Jump8:
case Instruction::Jump16:
case Instruction::Jump32:
out = "jmp";
break;
case Instruction::Compile: out = "cmpl"; break;
case Instruction::Assemble: out = "asmbl"; break;
case Instruction::Load: out = "load"; break;
default:
out = "???";
}
if ((uint8_t) inst < 0xa0)
return { out, 1 };
out += " " + std::to_string(oper);
if ((uint8_t) inst >= 0xe0)
return { out, 5 };
else if ((uint8_t) inst >= 0xc0)
return { out, 3 };
else
return { out, 2 };
}
std::pair<std::string, size_t> debug_instruction(Bytecode const& bt, uint32_t function_id, uint32_t addr)
{
auto inst = (Instruction) bt.get_code_byte(function_id, addr);
if ((uint8_t) inst >= 0xe0)
return debug_instruction(inst, bt.get_code_int32(function_id, addr + 1));
else if ((uint8_t) inst >= 0xc0)
return debug_instruction(inst, bt.get_code_int16(function_id, addr + 1));
else if ((uint8_t) inst >= 0xa0)
return debug_instruction(inst, bt.get_code_int8(function_id, addr + 1));
return debug_instruction(inst);
}
}

103
src/vm/instruction.hh Normal file
View File

@@ -0,0 +1,103 @@
#ifndef TYCHE_INSTRUCTION_HH
#define TYCHE_INSTRUCTION_HH
#include <cstdint>
#include <string>
#include <utility>
#include "../bytecode/bytecode.hh"
namespace tyche {
enum class Instruction : uint8_t {
// stack operations
PushInt8 = 0xa0,
PushInt16 = 0xc0,
PushInt32 = 0xe0,
PushConstant8 = 0xa1,
PushConstant16 = 0xc1,
PushConstant32 = 0xe1,
PushZero = 0x00,
PushTrue = 0x01,
NewArray = 0x02,
NewTable = 0x03,
Pop = 0x04,
Duplicate = 0x05,
// local variables
SetLocal8 = 0xa3,
SetLocal16 = 0xc3,
SetLocal32 = 0xe3,
GetLocal8 = 0xa4,
GetLocal16 = 0xc4,
GetLocal32 = 0xe4,
SetGlobal8 = 0xa5,
SetGlobal16 = 0xc5,
SetGlobal32 = 0xe5,
GetGlobal8 = 0xa6,
GetGlobal16 = 0xc6,
GetGlobal32 = 0xe6,
// function operations
Call8 = 0xa7,
Call16 = 0xc7,
Call32 = 0xe7,
Return = 0x10,
ReturnNil = 0x11,
// table and array operations
GetKeyValue = 0x16,
SetKeyValue = 0x17,
GetArrayItem = 0x18,
SetArrayItem = 0x19,
Append = 0x1a,
Next = 0x1b,
SetMetatable = 0x1c,
GetMetatable = 0x1d,
// logical/arithmetic
Sum = 0x20,
Subtract = 0x21,
Multiply = 0x22,
Divide = 0x23,
DivideInt = 0x24,
Equals = 0x25,
NotEquals = 0x26,
LessThan = 0x27,
LessThanEq = 0x28,
GreaterThan = 0x29,
GreaterThanEq = 0x2a,
And = 0x2b,
Or = 0x2c,
Xor = 0x2d,
// other value operations
Len = 0x30,
Type = 0x31,
Cast = 0x32,
Version = 0x33,
// control flow
BranchIfZero8 = 0xa8,
BranchIfZero16 = 0xc8,
BranchIfZero32 = 0xe8,
BranchIfNotZero8 = 0xa9,
BranchIfNotZero16 = 0xc9,
BranchIfNotZero32 = 0xe9,
Jump8 = 0xaa,
Jump16 = 0xca,
Jump32 = 0xea,
// external code
Compile = 0x38,
Assemble = 0x39,
Load = 0x3a,
};
std::pair<std::string, size_t> debug_instruction(Instruction inst, int oper=0);
std::pair<std::string, size_t> debug_instruction(Bytecode const& bt, uint32_t function_id, uint32_t addr);
}
#endif //TYCHE_INSTRUCTION_HH

35
src/vm/tests.cc Normal file
View File

@@ -0,0 +1,35 @@
#include "gtest/gtest.h"
#include "../bytecode/bytecodeprototype.hh"
#include "../bytecode/bytearray.hh"
#include "../bytecode/bytecode.hh"
#include "code.hh"
using namespace tyche;
TEST(Code, ImportSingleAndDebug)
{
BytecodePrototype bp;
bp.constants.emplace_back(3.14f);
bp.constants.emplace_back("HELLO");
bp.functions.emplace_back(0, 0);
bp.functions.at(0).code.append_byte(0xa0); // pushi
bp.functions.at(0).code.append_int8(42);
bp.functions.emplace_back(2, 1);
bp.functions.at(1).code.append_byte(0x1a); // appnd
ByteArray ba = Bytecode::generate(bp);
Code code;
code.import_bytecode(std::move(ba));
printf("%s\n", code.disassemble().c_str());
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}