Byte array #1

Merged
andre merged 7 commits from bytecode into master 2026-04-27 09:42:03 -05:00
11 changed files with 423 additions and 0 deletions

5
.idea/.gitignore generated vendored Normal file
View File

@@ -0,0 +1,5 @@
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/

7
.idea/misc.xml generated Normal file
View File

@@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CMakePythonSetting">
<option name="pythonIntegrationState" value="YES" />
</component>
<component name="CMakeWorkspace" PROJECT_DIR="$PROJECT_DIR$" />
</project>

8
.idea/modules.xml generated Normal file
View File

@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/tyche.iml" filepath="$PROJECT_DIR$/.idea/tyche.iml" />
</modules>
</component>
</project>

2
.idea/tyche.iml generated Normal file
View File

@@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8"?>
<module classpath="CMake" type="CPP_MODULE" version="4" />

7
.idea/vcs.xml generated Normal file
View File

@@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

92
CMakeLists.txt Normal file
View File

@@ -0,0 +1,92 @@
cmake_minimum_required (VERSION 3.24)
project(tyche
VERSION 0.0.1
DESCRIPTION "An embeddable/standalone programming language"
LANGUAGES C CXX ASM)
#
# project options / configuration
#
set(CMAKE_C_STANDARD 17)
set(CMAKE_CXX_STANDARD 23 CACHE STRING "C++ Standard")
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set_property(GLOBAL PROPERTY CXX_EXTENSIONS OFF)
set_property(GLOBAL PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE)
set_property(GLOBAL PROPERTY LINK_WHAT_YOU_USE TRUE)
# warnings / flags
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
set(warnings -Wall -Wextra -Wformat-nonliteral -Wundef -Wshadow -Wwrite-strings -Wfloat-equal -Wswitch-default -Wmissing-format-attribute -Wswitch-enum -Wmissing-noreturn -Wno-unused-parameter -Wno-unused)
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(warnings ${warnings} -Wsuggest-attribute=pure -Wsuggest-attribute=const -Wsuggest-attribute=noreturn -Wsuggest-attribute=malloc -Wsuggest-attribute=format -Wsuggest-attribute=cold)
endif()
endif()
# try to use ccache, if available
find_program(CCACHE_PROGRAM ccache)
if(CCACHE_PROGRAM)
set(CMAKE_CXX_COMPILER_LAUNCHER "${CCACHE_PROGRAM}")
endif()
# ignore warnings in imported files
set_source_files_properties(${IMGUI_SRC} PROPERTIES COMPILE_FLAGS "-w")
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
add_compile_options(-ggdb -O0)
endif()
if(CMAKE_BUILD_TYPE STREQUAL "Release")
set(DEF B_PRODUCTION_MODE=ON)
add_compile_options(-Ofast -flto)
endif()
#
# libraries
#
include(FetchContent)
FetchContent_Declare(
googletest
# Specify the commit you depend on and update it regularly.
URL https://github.com/google/googletest/releases/download/v1.17.0/googletest-1.17.0.tar.gz
)
FetchContent_MakeAvailable(googletest)
#
# library
#
add_library(lib${PROJECT_NAME} STATIC
src/bytecode/bytearray.hh
src/bytecode/bytearray.cc
)
target_compile_options(lib${PROJECT_NAME} PRIVATE ${warnings})
#
# tests
#
add_executable(${PROJECT_NAME}-bytecode-test src/bytecode/tests.cc)
target_link_libraries(${PROJECT_NAME}-bytecode-test lib${PROJECT_NAME} gtest_main)
add_test(NAME tyche_bytecode_test COMMAND ${PROJECT_NAME}-bytecode-test)
#
# check for leaks
#
add_custom_target(leaks)
add_custom_command(TARGET leaks
POST_BUILD
COMMENT "Check for leaks using valgrind."
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
COMMAND valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --suppressions=${CMAKE_SOURCE_DIR}/valgrind.supp ./${PROJECT_NAME}
)
#
# installation
#
install(TARGETS lib${CMAKE_PROJECT_NAME} RUNTIME DESTINATION lib)

12
TODO.md Normal file
View File

@@ -0,0 +1,12 @@
## Bytecode
- [x] Byte array
- Auto-expand
- Add/retrive byte/int/float/string
- Should not be larger than the byte array itself
- [ ] Chunk
- Add/retrive all types of data
- Keeps no memory except for caching
- [ ] Chunk loader
- Combine multiple chunks
- Resolve function ids, constant ids, etc

125
doc/OPCODES Normal file
View File

@@ -0,0 +1,125 @@
Operations
----------
Stack operations: (0x00~0x1f)
pushn [int] Push int
pushr [float] Push float (real)
pushs [string] Push string
pshcn [index] Push int from constant list
pshcr [index] Push float from constant list
pshcs [index] Push string from constant list
pushf [function] Push function id
pushz Push zero (or false)
pusht Push true
newa [array] Push (create) empty array
newt [table] Push (create) empty table
pop
dup
Local variables: (0x20~0x2f)
setl [int] Set stack top as indexed local variable
getl [int] Get indexed local variable and place on stack
setg [int] Set global variable
getg [int] Get global variable
Function operations: (0x30~0x3f)
call [n_pars] Enter function on stack toplevel (passing n next stack values as parameters)
ret Leave a function (return value in stack)
retn Leave a function (return nil)
Control flow: (0x40~0x4f)
bz [pc] Branch if zero
bnz [pc] Branch if not zero
jmp [pc] Unconditional jump
* Jumps can only happen within the same function.
Logical/arithmetic: (0x50~0x6f)
sum Sum top 2 values in stack
sub Subtract top 2 values in stack
mul Multiply top 2 values in stack
div Float division
idiv Integer division
eq Equality
neq Inequality
lt Less than
lte Less than or equals
gt Greater than
gte Greater than or equals
and Bitwise AND
or Bitwise OR
xor Bitwise XOR
Table and array operations: (0x70~07xf)
getkv Get table's value based on key (pull 1 value, push 1 value)
setkv Set table's key and value (pull 2 values from stack)
geta Get array's position value
seta Set array's position value (pull 2 values from stack)
appnd Add value to the end of array
next Push the next pair into the stack (for loops)
smt Set value metatable
mt Get value metatable
Other value operations: (0x80~0x8f)
len Get table, array or string size
type Get type from value at the top of the stack
cast [type] Cast type to another type
ver Return VM version
External code: (0x90~0x9f)
cmpl Compile code to assembly
asmbl Assemble code to bytecode format
load Load bytecode as function (will place function on stack)
Error handling: (0xa0~0xaf)
???
Bytecode format
---------------
The bytecode file is composed of the following sections:
* [0x0] 16-byte header
[00]: VM format
[??]: reserved
* [0x1] Index: pointers to each one of the sections, up to 8
Each pointer: 4 bits
* [0x2] Constants: all constants (such as strings) used in the code
* Table of 4-bit constant indexes with pointer to constant
* Raw constant data
* [0x3] Functions: Pointer to functions within the code
[0:3]: function pointer
[4:5]: number of parameters
[6:7]: number of local variables
* [0x4] Code: executable code
[1-byte]: operation
[variable]: operand (see value encoding below)
* [0x5] Debugging info
???
The max file size is 2 Gb.
## Values can be encoded in the following ways:
* The type is defined by the operator.
* Encoding varies according to the type:
int: use protobuf format
float: 4-bit floating point
string: int-defined length, followed by the string proper - no null terminator
* Constant indexes and function ids are encoded as ints
Internal handling of values
---------------------------
## Supported types
Nil 0
Integer 1
Float 2
String 3
Array 4
Table 5
Function 6
NativePointer 7
## Internal format
???

84
src/bytecode/bytearray.cc Normal file
View File

@@ -0,0 +1,84 @@
#include "bytearray.hh"
#include <cstring>
namespace tyche {
void ByteArray::add_byte(uint32_t addr, uint8_t byte)
{
try {
data_.at(addr) = byte;
} catch (std::out_of_range&) {
data_.resize(addr + 1, 0);
data_.at(addr) = byte;
}
}
void ByteArray::add_int(uint32_t addr, int32_t value)
{
uint32_t zz = ((uint32_t)(value << 1)) ^ ((uint32_t)(value >> 31));
while (zz > 0x7F) {
add_byte(addr++, (zz & 0x7F) | 0x80);
zz >>= 7;
}
add_byte(addr, zz & 0x7F);
}
void ByteArray::add_float(uint32_t addr, float value)
{
uint32_t bits;
std::memcpy(&bits, &value, 4);
add_byte(addr, (uint8_t)(bits));
add_byte(addr+1, (uint8_t)(bits >> 8));
add_byte(addr+2, (uint8_t)(bits >> 16));
add_byte(addr+3, (uint8_t)(bits >> 24));
}
void ByteArray::add_string(uint32_t addr, std::string const& str)
{
for (uint8_t c: str)
add_byte(addr++, c);
add_byte(addr, 0);
}
uint8_t ByteArray::get_byte(uint32_t addr) const
{
return data_.at(addr);
}
std::pair<int32_t, size_t> ByteArray::get_int(uint32_t addr) const
{
uint32_t zz = 0;
int shift = 0;
for (size_t i = 0; shift < 35; i++) {
uint8_t byte = get_byte(addr++);
zz |= (uint32_t)(byte & 0x7F) << shift;
if (!(byte & 0x80)) {
int32_t value = (int32_t)((zz >> 1) ^ -(zz & 1));
return std::make_pair(value, (int)(i + 1));
}
shift += 7;
}
throw BytecodeParsingError("Error parsing int32 at position " + std::to_string(addr));
}
std::pair<float, size_t> ByteArray::get_float(uint32_t addr) const
{
uint32_t bits = (uint32_t) get_byte(addr)
| (uint32_t) get_byte(addr+1) << 8
| (uint32_t) get_byte(addr+2) << 16
| (uint32_t) get_byte(addr+3) << 24;
float value;
std::memcpy(&value, &bits, 4);
return { value, 4 };
}
std::pair<std::string, size_t> ByteArray::get_string(uint32_t addr) const
{
std::string data;
while (char c = (char) get_byte(addr++))
data += c;
return { data, data.size() + 1 };
}
}

36
src/bytecode/bytearray.hh Normal file
View File

@@ -0,0 +1,36 @@
#ifndef TYCHE_BYTEARRAY_HH
#define TYCHE_BYTEARRAY_HH
#include <cstdint>
#include <stdexcept>
#include <string>
#include <vector>
namespace tyche {
class ByteArray {
public:
void add_byte(uint32_t addr, uint8_t byte);
void add_int(uint32_t addr, int32_t value);
void add_float(uint32_t addr, float value);
void add_string(uint32_t addr, std::string const& str);
[[nodiscard]] uint8_t get_byte(uint32_t addr) const;
[[nodiscard]] std::pair<int32_t, size_t> get_int(uint32_t addr) const;
[[nodiscard]] std::pair<float, size_t> get_float(uint32_t addr) const;
[[nodiscard]] std::pair<std::string, size_t> get_string(uint32_t addr) const;
[[nodiscard]] std::vector<uint8_t> const& data() const { return data_; }
private:
std::vector<uint8_t> data_;
};
class BytecodeParsingError : public std::runtime_error {
public:
explicit BytecodeParsingError(std::string const& str) : std::runtime_error(str.c_str()) {}
};
}
#endif //TYCHE_BYTEARRAY_HH

45
src/bytecode/tests.cc Normal file
View File

@@ -0,0 +1,45 @@
#include "gtest/gtest.h"
#include <cstring>
#include <functional>
#include "bytearray.hh"
using namespace tyche;
TEST(ByteArray, ByteArray)
{
auto test = [](std::function<void(ByteArray&)> const& f, std::vector<uint8_t> const& expected) {
ByteArray ba;
f(ba);
ASSERT_EQ(ba.data().size(), expected.size());
ASSERT_EQ(std::memcmp(ba.data().data(), expected.data(), ba.data().size()), 0);
};
#define TESTX(a, ...) test([](ByteArray& ba) { a; }, std::vector<uint8_t>({ __VA_ARGS__ }));
TESTX(ba.add_byte(1, 0xab), 0x00, 0xab)
ByteArray ba;
ba.add_byte(1, 0xab); ASSERT_EQ(ba.get_byte(1), 0xab);
ba.add_int(1, 12); ASSERT_EQ(ba.get_int(1), std::make_pair(12, 1));
ba.add_int(1, -12); ASSERT_EQ(ba.get_int(1), std::make_pair(-12, 1));
ba.add_int(1, 5000); ASSERT_EQ(ba.get_int(1), std::make_pair(5000, 2));
ba.add_int(1, 5000300); ASSERT_EQ(ba.get_int(1), std::make_pair(5000300, 4));
ba.add_int(1, -5000300); ASSERT_EQ(ba.get_int(1), std::make_pair(-5000300, 4));
ba.add_float(1, 3.14); ASSERT_FLOAT_EQ(ba.get_float(1).first, 3.14);
ba.add_float(1, -3.14); ASSERT_FLOAT_EQ(ba.get_float(1).first, -3.14);
ba.add_float(1, -5000300.1324); ASSERT_FLOAT_EQ(ba.get_float(1).first, -5000300.1324);
ba.add_string(1, "Hello world!"); ASSERT_EQ(ba.get_string(1), std::make_pair("Hello world!", 13));
#undef TESTX
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}