diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..b58b603 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,5 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..0b76fe5 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..e77599d --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/tyche.iml b/.idea/tyche.iml new file mode 100644 index 0000000..f08604b --- /dev/null +++ b/.idea/tyche.iml @@ -0,0 +1,2 @@ + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..8306744 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..2fac843 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,92 @@ +cmake_minimum_required (VERSION 3.24) + +project(tyche + VERSION 0.0.1 + DESCRIPTION "An embeddable/standalone programming language" + LANGUAGES C CXX ASM) + +# +# project options / configuration +# + +set(CMAKE_C_STANDARD 17) +set(CMAKE_CXX_STANDARD 23 CACHE STRING "C++ Standard") +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set_property(GLOBAL PROPERTY CXX_EXTENSIONS OFF) +set_property(GLOBAL PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) +set_property(GLOBAL PROPERTY LINK_WHAT_YOU_USE TRUE) + +# warnings / flags +if(CMAKE_BUILD_TYPE STREQUAL "Debug") + set(warnings -Wall -Wextra -Wformat-nonliteral -Wundef -Wshadow -Wwrite-strings -Wfloat-equal -Wswitch-default -Wmissing-format-attribute -Wswitch-enum -Wmissing-noreturn -Wno-unused-parameter -Wno-unused) + if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + set(warnings ${warnings} -Wsuggest-attribute=pure -Wsuggest-attribute=const -Wsuggest-attribute=noreturn -Wsuggest-attribute=malloc -Wsuggest-attribute=format -Wsuggest-attribute=cold) + endif() +endif() + +# try to use ccache, if available +find_program(CCACHE_PROGRAM ccache) +if(CCACHE_PROGRAM) + set(CMAKE_CXX_COMPILER_LAUNCHER "${CCACHE_PROGRAM}") +endif() + +# ignore warnings in imported files +set_source_files_properties(${IMGUI_SRC} PROPERTIES COMPILE_FLAGS "-w") + +if(CMAKE_BUILD_TYPE STREQUAL "Debug") + add_compile_options(-ggdb -O0) +endif() + +if(CMAKE_BUILD_TYPE STREQUAL "Release") + set(DEF B_PRODUCTION_MODE=ON) + add_compile_options(-Ofast -flto) +endif() + +# +# libraries +# + +include(FetchContent) +FetchContent_Declare( + googletest + # Specify the commit you depend on and update it regularly. + URL https://github.com/google/googletest/releases/download/v1.17.0/googletest-1.17.0.tar.gz +) +FetchContent_MakeAvailable(googletest) + +# +# library +# + +add_library(lib${PROJECT_NAME} STATIC + src/bytecode/bytearray.hh + src/bytecode/bytearray.cc +) + +target_compile_options(lib${PROJECT_NAME} PRIVATE ${warnings}) + +# +# tests +# + +add_executable(${PROJECT_NAME}-bytecode-test src/bytecode/tests.cc) +target_link_libraries(${PROJECT_NAME}-bytecode-test lib${PROJECT_NAME} gtest_main) +add_test(NAME tyche_bytecode_test COMMAND ${PROJECT_NAME}-bytecode-test) + +# +# check for leaks +# + +add_custom_target(leaks) +add_custom_command(TARGET leaks + POST_BUILD + COMMENT "Check for leaks using valgrind." + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + COMMAND valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --suppressions=${CMAKE_SOURCE_DIR}/valgrind.supp ./${PROJECT_NAME} +) + +# +# installation +# + +install(TARGETS lib${CMAKE_PROJECT_NAME} RUNTIME DESTINATION lib) \ No newline at end of file diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..70c6169 --- /dev/null +++ b/TODO.md @@ -0,0 +1,12 @@ +## Bytecode + +- [x] Byte array + - Auto-expand + - Add/retrive byte/int/float/string + - Should not be larger than the byte array itself +- [ ] Chunk + - Add/retrive all types of data + - Keeps no memory except for caching +- [ ] Chunk loader + - Combine multiple chunks + - Resolve function ids, constant ids, etc \ No newline at end of file diff --git a/doc/OPCODES b/doc/OPCODES new file mode 100644 index 0000000..6476112 --- /dev/null +++ b/doc/OPCODES @@ -0,0 +1,125 @@ +Operations +---------- + +Stack operations: (0x00~0x1f) + pushn [int] Push int + pushr [float] Push float (real) + pushs [string] Push string + pshcn [index] Push int from constant list + pshcr [index] Push float from constant list + pshcs [index] Push string from constant list + pushf [function] Push function id + pushz Push zero (or false) + pusht Push true + newa [array] Push (create) empty array + newt [table] Push (create) empty table + pop + dup + +Local variables: (0x20~0x2f) + setl [int] Set stack top as indexed local variable + getl [int] Get indexed local variable and place on stack + setg [int] Set global variable + getg [int] Get global variable + +Function operations: (0x30~0x3f) + call [n_pars] Enter function on stack toplevel (passing n next stack values as parameters) + ret Leave a function (return value in stack) + retn Leave a function (return nil) + +Control flow: (0x40~0x4f) + bz [pc] Branch if zero + bnz [pc] Branch if not zero + jmp [pc] Unconditional jump +* Jumps can only happen within the same function. + +Logical/arithmetic: (0x50~0x6f) + sum Sum top 2 values in stack + sub Subtract top 2 values in stack + mul Multiply top 2 values in stack + div Float division + idiv Integer division + eq Equality + neq Inequality + lt Less than + lte Less than or equals + gt Greater than + gte Greater than or equals + and Bitwise AND + or Bitwise OR + xor Bitwise XOR + +Table and array operations: (0x70~07xf) + getkv Get table's value based on key (pull 1 value, push 1 value) + setkv Set table's key and value (pull 2 values from stack) + geta Get array's position value + seta Set array's position value (pull 2 values from stack) + appnd Add value to the end of array + next Push the next pair into the stack (for loops) + smt Set value metatable + mt Get value metatable + +Other value operations: (0x80~0x8f) + len Get table, array or string size + type Get type from value at the top of the stack + cast [type] Cast type to another type + ver Return VM version + +External code: (0x90~0x9f) + cmpl Compile code to assembly + asmbl Assemble code to bytecode format + load Load bytecode as function (will place function on stack) + +Error handling: (0xa0~0xaf) + ??? + + +Bytecode format +--------------- + +The bytecode file is composed of the following sections: + + * [0x0] 16-byte header + [00]: VM format + [??]: reserved + * [0x1] Index: pointers to each one of the sections, up to 8 + Each pointer: 4 bits + * [0x2] Constants: all constants (such as strings) used in the code + * Table of 4-bit constant indexes with pointer to constant + * Raw constant data + * [0x3] Functions: Pointer to functions within the code + [0:3]: function pointer + [4:5]: number of parameters + [6:7]: number of local variables + * [0x4] Code: executable code + [1-byte]: operation + [variable]: operand (see value encoding below) + * [0x5] Debugging info + ??? + +The max file size is 2 Gb. + +## Values can be encoded in the following ways: + * The type is defined by the operator. + * Encoding varies according to the type: + int: use protobuf format + float: 4-bit floating point + string: int-defined length, followed by the string proper - no null terminator + * Constant indexes and function ids are encoded as ints + + +Internal handling of values +--------------------------- + +## Supported types + Nil 0 + Integer 1 + Float 2 + String 3 + Array 4 + Table 5 + Function 6 + NativePointer 7 + +## Internal format + ??? \ No newline at end of file diff --git a/src/bytecode/bytearray.cc b/src/bytecode/bytearray.cc new file mode 100644 index 0000000..9968642 --- /dev/null +++ b/src/bytecode/bytearray.cc @@ -0,0 +1,84 @@ +#include "bytearray.hh" + +#include + +namespace tyche { + +void ByteArray::add_byte(uint32_t addr, uint8_t byte) +{ + try { + data_.at(addr) = byte; + } catch (std::out_of_range&) { + data_.resize(addr + 1, 0); + data_.at(addr) = byte; + } +} + +void ByteArray::add_int(uint32_t addr, int32_t value) +{ + uint32_t zz = ((uint32_t)(value << 1)) ^ ((uint32_t)(value >> 31)); + while (zz > 0x7F) { + add_byte(addr++, (zz & 0x7F) | 0x80); + zz >>= 7; + } + add_byte(addr, zz & 0x7F); +} + +void ByteArray::add_float(uint32_t addr, float value) +{ + uint32_t bits; + std::memcpy(&bits, &value, 4); + add_byte(addr, (uint8_t)(bits)); + add_byte(addr+1, (uint8_t)(bits >> 8)); + add_byte(addr+2, (uint8_t)(bits >> 16)); + add_byte(addr+3, (uint8_t)(bits >> 24)); +} + +void ByteArray::add_string(uint32_t addr, std::string const& str) +{ + for (uint8_t c: str) + add_byte(addr++, c); + add_byte(addr, 0); +} + +uint8_t ByteArray::get_byte(uint32_t addr) const +{ + return data_.at(addr); +} + +std::pair ByteArray::get_int(uint32_t addr) const +{ + uint32_t zz = 0; + int shift = 0; + for (size_t i = 0; shift < 35; i++) { + uint8_t byte = get_byte(addr++); + zz |= (uint32_t)(byte & 0x7F) << shift; + if (!(byte & 0x80)) { + int32_t value = (int32_t)((zz >> 1) ^ -(zz & 1)); + return std::make_pair(value, (int)(i + 1)); + } + shift += 7; + } + throw BytecodeParsingError("Error parsing int32 at position " + std::to_string(addr)); +} + +std::pair ByteArray::get_float(uint32_t addr) const +{ + uint32_t bits = (uint32_t) get_byte(addr) + | (uint32_t) get_byte(addr+1) << 8 + | (uint32_t) get_byte(addr+2) << 16 + | (uint32_t) get_byte(addr+3) << 24; + float value; + std::memcpy(&value, &bits, 4); + return { value, 4 }; +} + +std::pair ByteArray::get_string(uint32_t addr) const +{ + std::string data; + while (char c = (char) get_byte(addr++)) + data += c; + return { data, data.size() + 1 }; +} + +} \ No newline at end of file diff --git a/src/bytecode/bytearray.hh b/src/bytecode/bytearray.hh new file mode 100644 index 0000000..a7d5b85 --- /dev/null +++ b/src/bytecode/bytearray.hh @@ -0,0 +1,36 @@ +#ifndef TYCHE_BYTEARRAY_HH +#define TYCHE_BYTEARRAY_HH + +#include +#include +#include +#include + +namespace tyche { + +class ByteArray { +public: + void add_byte(uint32_t addr, uint8_t byte); + void add_int(uint32_t addr, int32_t value); + void add_float(uint32_t addr, float value); + void add_string(uint32_t addr, std::string const& str); + + [[nodiscard]] uint8_t get_byte(uint32_t addr) const; + [[nodiscard]] std::pair get_int(uint32_t addr) const; + [[nodiscard]] std::pair get_float(uint32_t addr) const; + [[nodiscard]] std::pair get_string(uint32_t addr) const; + + [[nodiscard]] std::vector const& data() const { return data_; } + +private: + std::vector data_; +}; + +class BytecodeParsingError : public std::runtime_error { +public: + explicit BytecodeParsingError(std::string const& str) : std::runtime_error(str.c_str()) {} +}; + +} + +#endif //TYCHE_BYTEARRAY_HH diff --git a/src/bytecode/tests.cc b/src/bytecode/tests.cc new file mode 100644 index 0000000..7d5ab87 --- /dev/null +++ b/src/bytecode/tests.cc @@ -0,0 +1,45 @@ +#include "gtest/gtest.h" + +#include +#include + +#include "bytearray.hh" + +using namespace tyche; + +TEST(ByteArray, ByteArray) +{ + auto test = [](std::function const& f, std::vector const& expected) { + ByteArray ba; + f(ba); + ASSERT_EQ(ba.data().size(), expected.size()); + ASSERT_EQ(std::memcmp(ba.data().data(), expected.data(), ba.data().size()), 0); + }; + +#define TESTX(a, ...) test([](ByteArray& ba) { a; }, std::vector({ __VA_ARGS__ })); + + TESTX(ba.add_byte(1, 0xab), 0x00, 0xab) + + ByteArray ba; + ba.add_byte(1, 0xab); ASSERT_EQ(ba.get_byte(1), 0xab); + + ba.add_int(1, 12); ASSERT_EQ(ba.get_int(1), std::make_pair(12, 1)); + ba.add_int(1, -12); ASSERT_EQ(ba.get_int(1), std::make_pair(-12, 1)); + ba.add_int(1, 5000); ASSERT_EQ(ba.get_int(1), std::make_pair(5000, 2)); + ba.add_int(1, 5000300); ASSERT_EQ(ba.get_int(1), std::make_pair(5000300, 4)); + ba.add_int(1, -5000300); ASSERT_EQ(ba.get_int(1), std::make_pair(-5000300, 4)); + + ba.add_float(1, 3.14); ASSERT_FLOAT_EQ(ba.get_float(1).first, 3.14); + ba.add_float(1, -3.14); ASSERT_FLOAT_EQ(ba.get_float(1).first, -3.14); + ba.add_float(1, -5000300.1324); ASSERT_FLOAT_EQ(ba.get_float(1).first, -5000300.1324); + + ba.add_string(1, "Hello world!"); ASSERT_EQ(ba.get_string(1), std::make_pair("Hello world!", 13)); + +#undef TESTX +} + +int main(int argc, char** argv) +{ + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file