Compare commits

18 Commits

Author SHA1 Message Date
Andre Wagner
62d36f68b8 . 2026-05-03 09:53:23 -05:00
Andre Wagner
eee26cc767 . 2026-05-02 21:13:31 -05:00
Andre Wagner
223570478e . 2026-05-02 21:12:12 -05:00
Andre Wagner
ff23e14122 . 2026-05-02 20:54:26 -05:00
9bc6ad1c92 Variables (#8) 2026-05-02 20:12:15 -05:00
f9733f3b20 Expressions (#7) 2026-05-02 15:07:11 -05:00
a1aed4988a Assembler (#6)
Co-authored-by: Andre Wagner <WagnerAndre@JohnDeere.com>
Reviewed-on: https://192.168.5.48/andre/tyche/pulls/6
2026-05-01 10:12:41 -05:00
b835dbb36e VM basics (#5)
Co-authored-by: Andre Wagner <WagnerAndre@JohnDeere.com>
Reviewed-on: https://192.168.5.48/andre/tyche/pulls/5
2026-04-30 13:34:49 -05:00
Andre Wagner
71390b0f84 . 2026-04-29 16:04:30 -05:00
Andre Wagner
b471726e0d . 2026-04-29 15:57:39 -05:00
Andre Wagner
feb272e545 . 2026-04-29 15:50:27 -05:00
Andre Wagner
03b61f4339 . 2026-04-29 15:36:22 -05:00
Andre Wagner
30bfb38e9a . 2026-04-29 15:18:30 -05:00
Andre Wagner
635596c31d . 2026-04-29 14:56:03 -05:00
148c98e642 code (#4)
Reviewed-on: #4
2026-04-29 14:44:33 -05:00
54729c1e14 bytecode-improvements (#3)
Reviewed-on: #3
2026-04-29 11:40:46 -05:00
d8130272a0 bytecode2 (#2)
Reviewed-on: #2
2026-04-28 19:50:48 -05:00
3f097b0ba8 Byte array (#1)
Reviewed-on: #1
2026-04-27 09:42:03 -05:00
43 changed files with 3040 additions and 126 deletions

2
.gitignore vendored
View File

@@ -32,3 +32,5 @@
*.out
*.app
cmake-build-*/
build/

11
.idea/ctestState.xml generated Normal file
View File

@@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CidrCTestProjectState">
<ctestState>
<lineNumber testId="path:CMakeLists.txt test:tyche_as_test" value="116" />
<lineNumber testId="path:CMakeLists.txt test:tyche_bytearray_test" value="102" />
<lineNumber testId="path:CMakeLists.txt test:tyche_bytecode_test" value="106" />
<lineNumber testId="path:CMakeLists.txt test:tyche_vm_test" value="110" />
</ctestState>
</component>
</project>

37
.idea/editor.xml generated Normal file
View File

@@ -0,0 +1,37 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="BackendCodeEditorSettings">
<option name="/Default/CodeInspection/Highlighting/InspectionSeverities/=CppClassCanBeFinal/@EntryIndexedValue" value="DO_NOT_SHOW" type="string" />
<option name="/Default/CodeInspection/Highlighting/InspectionSeverities/=CppDefinitionsOrder/@EntryIndexedValue" value="HINT" type="string" />
<option name="/Default/CodeInspection/Highlighting/InspectionSeverities/=CppDeprecatedOverridenMethod/@EntryIndexedValue" value="WARNING" type="string" />
<option name="/Default/CodeInspection/Highlighting/InspectionSeverities/=CppModulePartitionWithSeveralPartitionUnits/@EntryIndexedValue" value="WARNING" type="string" />
<option name="/Default/CodeInspection/Highlighting/InspectionSeverities/=CppRedundantFwdClassOrEnumSpecifier/@EntryIndexedValue" value="SUGGESTION" type="string" />
<option name="/Default/CodeInspection/Highlighting/InspectionSeverities/=CppRedundantQualifierADL/@EntryIndexedValue" value="DO_NOT_SHOW" type="string" />
<option name="/Default/CodeStyle/CodeFormatting/CppCodeStyle/CVQualifiersPlacement/@EntryValue" value="AfterType" type="string" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/ALIGN_TERNARY/@EntryValue" value="ALIGN_ALL" type="string" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/ALLOW_COMMENT_AFTER_LBRACE/@EntryValue" value="true" type="bool" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/ANONYMOUS_METHOD_DECLARATION_BRACES/@EntryValue" value="END_OF_LINE" type="string" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/CASE_BLOCK_BRACES/@EntryValue" value="END_OF_LINE" type="string" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/EMPTY_BLOCK_STYLE/@EntryValue" value="TOGETHER_SAME_LINE" type="string" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/ENABLE_SLATE_FORMAT/@EntryValue" value="false" type="bool" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/INDENT_CASE_FROM_SWITCH/@EntryValue" value="true" type="bool" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/INDENT_GOTO_LABELS/@EntryValue" value="false" type="bool" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/MAX_ENUM_MEMBERS_ON_LINE/@EntryValue" value="6" type="long" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/NAMESPACE_DECLARATION_BRACES/@EntryValue" value="END_OF_LINE" type="string" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/NAMESPACE_INDENTATION/@EntryValue" value="None" type="string" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/OTHER_BRACES/@EntryValue" value="END_OF_LINE" type="string" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/PLACE_CATCH_ON_NEW_LINE/@EntryValue" value="false" type="bool" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/PLACE_ELSE_ON_NEW_LINE/@EntryValue" value="false" type="bool" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/PLACE_WHILE_ON_NEW_LINE/@EntryValue" value="false" type="bool" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/REQUIRES_EXPRESSION_BRACES/@EntryValue" value="END_OF_LINE" type="string" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/SIMPLE_EMBEDDED_STATEMENT_STYLE/@EntryValue" value="LINE_BREAK" type="string" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/SPACE_AFTER_CAST_EXPRESSION_PARENTHESES/@EntryValue" value="true" type="bool" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/WRAP_LINES/@EntryValue" value="false" type="bool" />
<option name="/Default/CodeStyle/Naming/CppNaming/Rules/=Classes_0020and_0020structs/@EntryIndexedValue" value="&lt;NamingElement Priority=&quot;1&quot;&gt;&lt;Descriptor Static=&quot;Indeterminate&quot; Constexpr=&quot;Indeterminate&quot; Const=&quot;Indeterminate&quot; Volatile=&quot;Indeterminate&quot; Accessibility=&quot;NOT_APPLICABLE&quot;&gt;&lt;type Name=&quot;__interface&quot; /&gt;&lt;type Name=&quot;class&quot; /&gt;&lt;type Name=&quot;struct&quot; /&gt;&lt;/Descriptor&gt;&lt;Policy Inspect=&quot;True&quot; Prefix=&quot;&quot; Suffix=&quot;&quot; Style=&quot;AaBb&quot; /&gt;&lt;/NamingElement&gt;" type="string" />
<option name="/Default/CodeStyle/Naming/CppNaming/Rules/=Concepts/@EntryIndexedValue" value="&lt;NamingElement Priority=&quot;2&quot;&gt;&lt;Descriptor Static=&quot;Indeterminate&quot; Constexpr=&quot;Indeterminate&quot; Const=&quot;Indeterminate&quot; Volatile=&quot;Indeterminate&quot; Accessibility=&quot;NOT_APPLICABLE&quot;&gt;&lt;type Name=&quot;concept&quot; /&gt;&lt;/Descriptor&gt;&lt;Policy Inspect=&quot;True&quot; Prefix=&quot;&quot; Suffix=&quot;&quot; Style=&quot;AaBb&quot; /&gt;&lt;/NamingElement&gt;" type="string" />
<option name="/Default/CodeStyle/Naming/CppNaming/Rules/=Enum_0020members/@EntryIndexedValue" value="&lt;NamingElement Priority=&quot;14&quot;&gt;&lt;Descriptor Static=&quot;Indeterminate&quot; Constexpr=&quot;Indeterminate&quot; Const=&quot;Indeterminate&quot; Volatile=&quot;Indeterminate&quot; Accessibility=&quot;NOT_APPLICABLE&quot;&gt;&lt;type Name=&quot;scoped enumerator&quot; /&gt;&lt;type Name=&quot;unscoped enumerator&quot; /&gt;&lt;/Descriptor&gt;&lt;Policy Inspect=&quot;True&quot; Prefix=&quot;&quot; Suffix=&quot;&quot; Style=&quot;AaBb&quot; /&gt;&lt;/NamingElement&gt;" type="string" />
<option name="/Default/CodeStyle/Naming/CppNaming/Rules/=Enums/@EntryIndexedValue" value="&lt;NamingElement Priority=&quot;3&quot;&gt;&lt;Descriptor Static=&quot;Indeterminate&quot; Constexpr=&quot;Indeterminate&quot; Const=&quot;Indeterminate&quot; Volatile=&quot;Indeterminate&quot; Accessibility=&quot;NOT_APPLICABLE&quot;&gt;&lt;type Name=&quot;enum&quot; /&gt;&lt;/Descriptor&gt;&lt;Policy Inspect=&quot;True&quot; Prefix=&quot;&quot; Suffix=&quot;&quot; Style=&quot;AaBb&quot; /&gt;&lt;/NamingElement&gt;" type="string" />
<option name="/Default/CodeStyle/Naming/CppNaming/Rules/=Other_0020constants/@EntryIndexedValue" value="&lt;NamingElement Priority=&quot;15&quot;&gt;&lt;Descriptor Static=&quot;True&quot; Constexpr=&quot;Indeterminate&quot; Const=&quot;True&quot; Volatile=&quot;Indeterminate&quot; Accessibility=&quot;NOT_APPLICABLE&quot;&gt;&lt;type Name=&quot;class field&quot; /&gt;&lt;type Name=&quot;local variable&quot; /&gt;&lt;type Name=&quot;struct field&quot; /&gt;&lt;/Descriptor&gt;&lt;Policy Inspect=&quot;True&quot; Prefix=&quot;&quot; Suffix=&quot;&quot; Style=&quot;AA_BB&quot; /&gt;&lt;/NamingElement&gt;" type="string" />
<option name="/Default/CodeStyle/Naming/CppNaming/Rules/=Unions/@EntryIndexedValue" value="&lt;NamingElement Priority=&quot;4&quot;&gt;&lt;Descriptor Static=&quot;Indeterminate&quot; Constexpr=&quot;Indeterminate&quot; Const=&quot;Indeterminate&quot; Volatile=&quot;Indeterminate&quot; Accessibility=&quot;NOT_APPLICABLE&quot;&gt;&lt;type Name=&quot;union&quot; /&gt;&lt;/Descriptor&gt;&lt;Policy Inspect=&quot;True&quot; Prefix=&quot;&quot; Suffix=&quot;&quot; Style=&quot;AaBb&quot; /&gt;&lt;/NamingElement&gt;" type="string" />
</component>
</project>

7
.idea/misc.xml generated Normal file
View File

@@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CMakePythonSetting">
<option name="pythonIntegrationState" value="YES" />
</component>
<component name="CMakeWorkspace" PROJECT_DIR="$PROJECT_DIR$" />
</project>

8
.idea/tyche.iml generated
View File

@@ -1,8 +1,2 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="CPP_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
<module classpath="CIDR" type="CPP_MODULE" version="4" />

135
CMakeLists.txt Normal file
View File

@@ -0,0 +1,135 @@
cmake_minimum_required (VERSION 3.24)
project(tyche
VERSION 0.0.1
DESCRIPTION "An embeddable/standalone programming language"
LANGUAGES C CXX ASM)
#
# project options / configuration
#
set(CMAKE_C_STANDARD 17)
set(CMAKE_CXX_STANDARD 23 CACHE STRING "C++ Standard")
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set_property(GLOBAL PROPERTY CXX_EXTENSIONS OFF)
set_property(GLOBAL PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE)
set_property(GLOBAL PROPERTY LINK_WHAT_YOU_USE TRUE)
# warnings / flags
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
set(warnings -Wall -Wextra -Wformat-nonliteral -Wundef -Wshadow -Wwrite-strings -Wfloat-equal -Wswitch-default -Wmissing-format-attribute -Wswitch-enum -Wmissing-noreturn -Wno-unused-parameter -Wno-unused)
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(warnings ${warnings} -Wsuggest-attribute=pure -Wsuggest-attribute=const -Wsuggest-attribute=noreturn -Wsuggest-attribute=malloc -Wsuggest-attribute=format -Wsuggest-attribute=cold)
endif()
endif()
# try to use ccache, if available
find_program(CCACHE_PROGRAM ccache)
if(CCACHE_PROGRAM)
set(CMAKE_CXX_COMPILER_LAUNCHER "${CCACHE_PROGRAM}")
endif()
# ignore warnings in imported files
set_source_files_properties(${IMGUI_SRC} PROPERTIES COMPILE_FLAGS "-w")
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
add_compile_options(-ggdb -O0)
endif()
if(CMAKE_BUILD_TYPE STREQUAL "Release")
set(DEF B_PRODUCTION_MODE=ON)
add_compile_options(-Ofast -flto)
endif()
#
# libraries
#
include(FetchContent)
FetchContent_Declare(
googletest
# Specify the commit you depend on and update it regularly.
URL https://github.com/google/googletest/releases/download/v1.17.0/googletest-1.17.0.tar.gz
)
FetchContent_MakeAvailable(googletest)
#
# library
#
add_library(lib${PROJECT_NAME} SHARED
src/common/overloaded.hh
src/bytearray/bytearray.hh
src/bytearray/bytearray.cc
src/bytearray/bytearraybuilder.hh
src/bytearray/bytearraybuilder.cc
#src/bytecode/bytecode.cc
#src/bytecode/bytecode.hh
#src/bytecode/bytecodeprototype.hh
#src/bytecode/constant.hh
#src/bytecode/bc_exceptions.hh
#src/assembler/lexer.cc
#src/assembler/lexer.hh
#src/assembler/assembler.cc
#src/assembler/assembler.hh
#src/assembler/as_exceptions.hh
#src/instructions/instruction.hh
#src/instructions/instruction.cc
#src/vm/code.cc
#src/vm/code.hh
#src/vm/value.cc
#src/vm/value.hh
#src/vm/stack.cc
#src/vm/stack.hh
#src/vm/vm_exceptions.hh
#src/vm/vm.cc
#src/vm/vm.hh
#src/vm/expr.cc
#src/vm/expr.hh
#src/vm/location.hh
)
target_compile_options(lib${PROJECT_NAME} PRIVATE ${warnings})
#
# tests
#
enable_testing()
add_executable(${PROJECT_NAME}-bytearray-test src/bytearray/tests.cc)
target_link_libraries(${PROJECT_NAME}-bytearray-test lib${PROJECT_NAME} gtest_main)
add_test(NAME tyche_bytearray_test COMMAND ${PROJECT_NAME}-bytearray-test)
add_executable(${PROJECT_NAME}-bytecode-test src/bytecode/tests.cc)
target_link_libraries(${PROJECT_NAME}-bytecode-test lib${PROJECT_NAME} gtest_main)
add_test(NAME tyche_bytecode_test COMMAND ${PROJECT_NAME}-bytecode-test)
add_executable(${PROJECT_NAME}-vm-test src/vm/tests.cc)
target_link_libraries(${PROJECT_NAME}-vm-test lib${PROJECT_NAME} gtest_main)
add_test(NAME tyche_vm_test COMMAND ${PROJECT_NAME}-vm-test)
add_executable(${PROJECT_NAME}-as-test src/assembler/tests.cc
src/bytearray/bytearraybuilder.cc
src/bytearray/bytearraybuilder.hh)
target_link_libraries(${PROJECT_NAME}-as-test lib${PROJECT_NAME} gtest_main)
add_test(NAME tyche_as_test COMMAND ${PROJECT_NAME}-as-test)
#
# check for leaks
#
add_custom_target(leaks-vm-test)
add_custom_command(TARGET leaks-vm-test
POST_BUILD
COMMENT "Check for leaks using valgrind."
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
COMMAND valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --suppressions=${CMAKE_SOURCE_DIR}/valgrind.supp ./${PROJECT_NAME}-vm-test
)
#
# installation
#
install(TARGETS lib${CMAKE_PROJECT_NAME} RUNTIME DESTINATION lib)

63
TODO.md
View File

@@ -1,12 +1,65 @@
## Chunk
## Bytecode
- [ ] Byte array
- [x] Byte array
- Auto-expand
- Add/retrive byte/int/float/string
- Should not be larger than the byte array itself
- [ ] Chunk
- [x] Bytecode
- Add/retrive all types of data
- Keeps no memory except for caching
- [ ] Chunk loader
- [x] Refactor bytecode code
Improvements:
- [x] Fixed int type (based on opcode)
- [x] Constant type (only floats and strings for now)
After some additional development:
- [ ] Bytecode debugging info
## VM
- [x] VM
- [x] Code
- [x] Simple bytecode loader
- [x] Output bytecode format
- [x] Value object
- [x] Stack object
- [x] External interface
- [x] Code execution (except functions)
- [x] Functions
- [x] Print stack
- [x] Assembler
- [ ] VM execution
- [x] Stack operations (nil, integer, float, string, function)
- [x] Integer
- [x] Float
- [x] String
- [x] Expressions
- [x] Integer
- [x] Float
- [x] String
- [ ] Local/global variables
- [ ] Functions
- [ ] Constants
- [ ] Other operations
- [ ] Arrays
- [ ] Iteration
- [ ] Expressions
- [ ] Tables
- [ ] Iteration
- [ ] Metatables
- [ ] Expressions
- [ ] Control flow
- [ ] Compilation
- [ ] Error handling
- [ ] C++ API
- [ ] Run native code on VM
- [ ] Run tyche code from C++
- [ ] C API
After some additional development:
- [ ] Bytecode loader
- Combine multiple chunks
- Resolve function ids, constant ids, etc
- Resolve function ids, constant ids, etc
- [ ] Upvalues

35
doc/BYTECODE Normal file
View File

@@ -0,0 +1,35 @@
Bytecode format
---------------
The bytecode file is composed of the following sections:
* HEADER: 16-byte header
[0:3]: Magic
[4]: VM format
[rest]: Reserved for future use
* TABLE_OF_CONTENTS: list of 8 records pointing to each one of the sections
Each record (6 bytes):
- Pointer to section: 4 bytes
- Number of records in section: 2 bytes
* [0x0] Constants indexes: pointers to each of the constant locations
* Table of 4-byte constant indexes with pointer to constant
(counter start at beginning of raw constants)
* [0x1] Functions indexes: Pointer to functions within the code
[0:3]: function pointer (counter start at the beginning of executable code)
[4:5]: number of parameters
[6:7]: number of local variables
[8:b]: function size
* [0x2] Constants raw data
* [0x3] Code: executable code
* [0x4] Debugging info
???
The max file size is 2 Gb.
## Values can be encoded in the following ways:
* The type is defined by the operator.
* Encoding varies according to the type:
int: use protobuf format
float: 4-bit floating point
string: int-defined length, followed by the string proper - no null terminator
* Constant indexes and function ids are encoded as ints

View File

@@ -1,125 +1,93 @@
Operations
----------
Stack operations: (0x00~0x1f)
pushn [int] Push int
pushr [float] Push float (real)
pushs [string] Push string
pshcn [index] Push int from constant list
pshcr [index] Push float from constant list
pshcs [index] Push string from constant list
pushf [function] Push function id
pushz Push zero (or false)
pusht Push true
newa [array] Push (create) empty array
newt [table] Push (create) empty table
pop
dup
Operations take either 0 or 1 parameter. The ones that take a parameter, it can be either a int8, int16 or int32.
Local variables: (0x20~0x2f)
setl [int] Set stack top as indexed local variable
getl [int] Get indexed local variable and place on stack
setg [int] Set global variable
getg [int] Get global variable
Instructions follow this logic:
Function operations: (0x30~0x3f)
call [n_pars] Enter function on stack toplevel (passing n next stack values as parameters)
ret Leave a function (return value in stack)
retn Leave a function (return nil)
00 ~ 9F : no parameter
A0 ~ BF : int8 (1 byte)
C0 ~ DF : int16 (2 bytes)
E0 ~ FF : int32 (4 bytes)
Control flow: (0x40~0x4f)
bz [pc] Branch if zero
bnz [pc] Branch if not zero
jmp [pc] Unconditional jump
The operations of 1, 2 and 4 bytes are always interchangeable by adding/subtracting 0x20.
,----------- no parameter
| ,-------- int8
| | ,----- int16
| | | ,-- int32
NP I8 I16 I32 Opc Instruction Description
Stack operations:
a0 c0 e0 pushi [int] Push int
a1 c1 e1 pushc [index] Push constant
a2 c2 e2 pushf [function] Push function id
00 pushz Push zero (or false)
01 pusht Push true
02 newa Push (create) empty array
03 newt Push (create) empty table
04 pop
05 dup
Local variables:
a3 c3 e3 pushv [int] Push n nil values into the stack (used to init local vars)
ab cb eb set [index] Set value in stack position (set local variable)
a4 c4 e4 dupv [index] Duplicate stack value (load local variable)
a5 c5 e5 setg [int] Set global variable
a6 c6 e6 getg [int] Get global variable
Function operations:
a7 c7 e7 call [n_pars] Enter function on stack toplevel (passing n next stack values as parameters)
10 ret Leave a function (return value in stack)
11 retn Leave a function (return nil)
Table and array operations:
16 getkv Get table's value based on key (pull 1 value, push 1 value)
17 setkv Set table's key and value (pull 2 values from stack)
18 geta Get array's position value
19 seta Set array's position value (pull 2 values from stack)
1a appnd Add value to the end of array
1b next Push the next pair into the stack (for loops)
1c smt Set value metatable
1d mt Get value metatable
Logical/arithmetic:
20 sum Sum top 2 values in stack
21 sub Subtract top 2 values in stack
22 mul Multiply top 2 values in stack
23 div Float division
24 idiv Integer division
25 eq Equality
26 neq Inequality
27 lt Less than
28 lte Less than or equals
29 gt Greater than
2a gte Greater than or equals
2b and Bitwise AND
2c or Bitwise OR
2d xor Bitwise XOR
2e pow Power
2f shl Shift left
30 shr Shift right
31 mod Modulo
Other value operations:
40 len Get table, array or string size
41 type Get type from value at the top of the stack
b0 cast [type] Cast type to another type
42 ver Return VM version
External code:
48 cmpl Compile code to assembly
49 asmbl Assemble code to bytecode format
4a load Load bytecode as function (will place function on stack)
Control flow:
a8 c8 e8 bz [pc] Branch if zero
a9 c9 e9 bnz [pc] Branch if not zero
aa ca ea jmp [pc] Unconditional jump
* Jumps can only happen within the same function.
Logical/arithmetic: (0x50~0x6f)
sum Sum top 2 values in stack
sub Subtract top 2 values in stack
mul Multiply top 2 values in stack
div Float division
idiv Integer division
eq Equality
neq Inequality
lt Less than
lte Less than or equals
gt Greater than
gte Greater than or equals
and Bitwise AND
or Bitwise OR
xor Bitwise XOR
Table and array operations: (0x70~07xf)
getkv Get table's value based on key (pull 1 value, push 1 value)
setkv Set table's key and value (pull 2 values from stack)
geta Get array's position value
seta Set array's position value (pull 2 values from stack)
appnd Add value to the end of array
next Push the next pair into the stack (for loops)
smt Set value metatable
mt Get value metatable
Other value operations: (0x80~0x8f)
len Get table, array or string size
type Get type from value at the top of the stack
cast [type] Cast type to another type
ver Return VM version
External code: (0x90~0x9f)
cmpl Compile code to assembly
asmbl Assemble code to chunk format
load Load chunk as function (will place function on stack)
Error handling: (0xa0~0xaf)
???
Chunk format
------------
The bytecode file is composed of the following sections:
* [0x0] 16-byte header
[00]: VM format
[??]: reserved
* [0x1] Index: pointers to each one of the sections, up to 8
Each pointer: 4 bits
* [0x2] Constants: all constants (such as strings) used in the code
* Table of 4-bit constant indexes with pointer to constant
* Raw constant data
* [0x3] Functions: Pointer to functions within the code
[0:3]: function pointer
[4:5]: number of parameters
[6:7]: number of local variables
* [0x4] Code: executable code
[1-byte]: operation
[variable]: operand (see value encoding below)
* [0x5] Debugging info
???
The max file size is 2 Gb.
## Values can be encoded in the following ways:
* The type is defined by the operator.
* Encoding varies according to the type:
int: use protobuf format
float: 4-bit floating point
string: int-defined length, followed by the string proper - no null terminator
* Constant indexes and function ids are encoded as ints
Internal handling of values
---------------------------
## Supported types
Nil 0
Integer 1
Float 2
String 3
Array 4
Table 5
Function 6
NativePointer 7
## Internal format
???

15
doc/VM Normal file
View File

@@ -0,0 +1,15 @@
Internal handling of values
---------------------------
## Supported types
Nil 0
Integer 1
Float 2
String 3
Array 4
Table 5
Function 6
NativePointer 7
## Internal format
???

View File

@@ -0,0 +1,18 @@
#ifndef TYCHE_VM_EXCEPTIONS_HH
#define TYCHE_VM_EXCEPTIONS_HH
#include <stdexcept>
#include <string>
namespace tyche::as {
class AssemblyError : public std::runtime_error
{
public:
explicit AssemblyError(std::string const& str, size_t line, size_t column)
: std::runtime_error((str + " at: line " + std::to_string(line) + ", column: " + std::to_string(column)).c_str()) {}
};
}
#endif //TYCHE_VM_EXCEPTIONS_HH

View File

@@ -0,0 +1,98 @@
#include "assembler.hh"
#include <unordered_map>
#include "as_exceptions.hh"
#include "../bytecode/bytecode.hh"
#include "../instructions/instruction.hh"
using namespace std::string_literals;
namespace tyche::as {
StaticByteArray Assembler::assemble()
{
bc::BytecodePrototype bp;
lexer_.reset();
enum class Section { Const, Function } section;
uint32_t function_id = 0;
for (;;) {
Token t = lexer_.ingest();
if (t.type == TokenType::Enter)
continue;
if (t.type == TokenType::Directive) {
if (std::get<std::string>(t.token) == ".const") {
section = Section::Const;
expect_token(TokenType::Enter);
} else if (std::get<std::string>(t.token) == ".func") {
section = Section::Function;
function_id = std::get<int>(expect_token(TokenType::Integer));
if (function_id >= bp.functions.size())
bp.functions.resize(function_id + 1, { 0, 0 });
expect_token(TokenType::Enter);
} else {
throw AssemblyError("Invalid directive " + std::get<std::string>(t.token), t.line, t.column);
}
} else if (section == Section::Const && t.type == TokenType::Integer) {
int index = std::get<int>(t.token);
if ((size_t) index >= bp.constants.size())
bp.constants.resize(index + 1);
expect_token(TokenType::Colon);
Token tt = lexer_.ingest();
if (tt.type == TokenType::Float)
bp.constants[index] = std::get<float>(tt.token);
else if (tt.type == TokenType::String)
bp.constants[index] = std::get<std::string>(tt.token);
else
throw AssemblyError("Expected float or string as constant", tt.line, tt.column);
expect_token(TokenType::Enter);
} else if (section == Section::Function && t.type == TokenType::Instruction) {
std::string instruction = std::get<std::string>(t.token);
std::optional<int> oper = {};
Token tt = lexer_.ingest();
if (tt.type == TokenType::Integer) {
oper = std::get<int>(tt.token);
tt = lexer_.ingest();
}
auto oinst = translate_instruction(instruction, oper);
if (!oinst)
throw AssemblyError("Invalid or misused instruction '" + instruction + "'", tt.line, tt.column);
bp.functions.at(function_id).code.append_byte((uint8_t) *oinst);
switch (instruction_operand_type(*oinst)) {
case OperandType::Int8: bp.functions.at(function_id).code.append_int8((int8_t) *oper); break;
case OperandType::Int16: bp.functions.at(function_id).code.append_int16((int16_t) *oper); break;
case OperandType::Int32: bp.functions.at(function_id).code.append_int32(*oper); break;
case OperandType::NoOperand: default: break;
}
if (tt.type != TokenType::Enter)
throw AssemblyError("Expected enter", tt.line, tt.column);
} else if (t.type == TokenType::EOF_) {
break;
} else if (t.type != TokenType::Enter) {
throw AssemblyError("Unexpected token of type " + token_type_name(t.type) + ")", t.line, t.column);
}
}
return bc::Bytecode::generate(bp);
}
TokenValue Assembler::expect_token(TokenType type)
{
Token t = lexer_.ingest();
if (t.type != type)
throw AssemblyError("Expected " + token_type_name(t.type), t.line, t.column);
return t.token;
}
} // tyche

View File

@@ -0,0 +1,27 @@
#ifndef TYCHE_ASSEMBLER_HH
#define TYCHE_ASSEMBLER_HH
#include <optional>
#include <string>
#include "lexer.hh"
#include "../bytearray/bytearray.hh"
#include "../bytecode/bytecodeprototype.hh"
namespace tyche::as {
class Assembler {
public:
explicit Assembler(std::string source) : lexer_(std::move(source) + "\n") {}
[[nodiscard]] StaticByteArray assemble();
private:
Lexer lexer_;
TokenValue expect_token(TokenType type);
};
} // tyche
#endif //TYCHE_ASSEMBLER_HH

142
src/assembler/lexer.cc Normal file
View File

@@ -0,0 +1,142 @@
#include "lexer.hh"
#include <iostream>
using namespace std::string_literals;
#include "as_exceptions.hh"
namespace tyche::as {
std::string token_type_name(TokenType type)
{
switch (type) {
case TokenType::BOF: return "BOF";
case TokenType::Directive: return "directive";
case TokenType::Instruction: return "instruction";
case TokenType::Integer: return "integer";
case TokenType::Float: return "float";
case TokenType::String: return "string";
case TokenType::Enter: return "enter";
case TokenType::Colon: return "colon";
case TokenType::EOF_: return "EOF";
default: return "???";
}
}
void Lexer::reset()
{
pos_ = 0;
ingest_next_token();
}
Token Lexer::peek() const
{
return current_token_;
}
Token Lexer::ingest()
{
Token t = current_token_;
ingest_next_token();
return t;
}
void Lexer::ingest_next_token()
{
size_t current_line_pos = 1;
size_t current_line = 1;
if (pos_ >= source_.size()) {
current_token_ = { TokenType::EOF_ };
return;
}
char c = source_.at(pos_);
TokenType type {};
std::string stoken;
TokenValue value = std::monostate();
if (c == '.') {
type = TokenType::Directive;
stoken += '.';
while (c = source_.at(++pos_), isalpha(c) || c == '_')
stoken += c;
value = stoken;
} else if (c == '"') {
type = TokenType::String;
++pos_;
while (true) {
if (source_.at(pos_) == '\\') { // TODO - improve this for special characters
++pos_;
} else if (source_.at(pos_) == '"') {
++pos_;
break;
} else if (pos_ >= source_.size()) {
throw AssemblyError("Unterminated string", current_line, pos_ - current_line_pos);
}
stoken += source_.at(pos_++);
}
value = stoken;
} else if (isdigit(c) || c == '-') {
type = TokenType::Integer;
stoken += c;
while (c = source_.at(++pos_), isdigit(c) || c == '.') {
stoken += c;
if (c == '.') {
if (type == TokenType::Integer)
type = TokenType::Float;
else
throw AssemblyError("Double point in floating point number", current_line, pos_ - current_line_pos);
}
}
if (type == TokenType::Integer)
value = std::stoi(stoken);
else
value = std::stof(stoken);
} else if (isalpha(c)) {
type = TokenType::Instruction;
stoken += c;
while (c = source_.at(++pos_), isalpha(c))
stoken += c;
value = stoken;
} else if (c == ':') {
type = TokenType::Colon;
++pos_;
} else if (c == '\n' || c == ';') {
while (pos_ < source_.size() && source_.at(pos_) != '\n')
++pos_;
type = TokenType::Enter;
value = "\n";
++pos_;
++current_line;
current_line_pos = pos_;
} else {
throw AssemblyError(std::string("Unexpected character '") + c + "' (ascii: " + std::to_string((int) c) + ")", current_line, pos_ - current_line_pos);
}
// skip ignored tokens
while (pos_ < source_.size() && (source_.at(pos_) == ' ' || source_.at(pos_) == '\t' || source_.at(pos_) == '\r'))
++pos_;
current_token_ = { .type = type, .token = value, .line = current_line, .column = pos_ - current_line_pos };
}
std::ostream& operator<<(std::ostream& os, Token const& t)
{
switch (t.type) {
case TokenType::BOF: os << "BOF"s; break;
case TokenType::Directive: os << "Directive ("s << std::get<std::string>(t.token) << ")"s;
case TokenType::Instruction: os << "Instruction ("s << std::get<std::string>(t.token) << ")"s;
case TokenType::Integer: os << "Integer ("s << std::to_string(std::get<int>(t.token)) << ")"s;
case TokenType::Float: os << "Float ("s << std::to_string(std::get<float>(t.token)) << ")"s;
case TokenType::String: os << "String ("s << std::get<std::string>(t.token) << ")"s;
case TokenType::Enter: os << "Enter"s;
case TokenType::Colon: os << "Colon"s;
case TokenType::EOF_: os << "EOF"s;
default: os << "???"s;
}
return os;
}
} // tyche

47
src/assembler/lexer.hh Normal file
View File

@@ -0,0 +1,47 @@
#ifndef TYCHE_LEXER_HH
#define TYCHE_LEXER_HH
#include <string>
#include <utility>
#include <variant>
namespace tyche::as {
enum class TokenType {
BOF, Directive, Instruction, Integer, Float, String, Enter, Colon, EOF_
};
using TokenValue = std::variant<std::monostate, int, float, std::string>;
struct Token {
TokenType type;
TokenValue token = std::monostate();
size_t line = 0;
size_t column = 0;
friend bool operator==(Token const& lhs, Token const& rhs) { return std::tie(lhs.type, lhs.token) == std::tie(rhs.type, rhs.token); }
};
std::ostream& operator<<(std::ostream& os, Token const& t);
std::string token_type_name(TokenType type);
class Lexer {
public:
explicit Lexer(std::string source) : source_(std::move(source)) { reset(); }
void reset();
[[nodiscard]] Token peek() const;
[[nodiscard]] Token ingest();
private:
const std::string source_;
size_t pos_ = 0;
Token current_token_ { TokenType::BOF };
void ingest_next_token();
};
} // tyche
#endif //TYCHE_LEXER_HH

75
src/assembler/tests.cc Normal file
View File

@@ -0,0 +1,75 @@
#include "assembler.hh"
#include "gtest/gtest.h"
#include "../bytecode/bytecodeprototype.hh"
#include "../bytecode/bytecode.hh"
#include "../instructions/instruction.hh"
using namespace tyche;
using namespace tyche::as;
using namespace tyche::bc;
TEST(Lexer, Lexer)
{
Token t;
Lexer lexer(".dir push 382 -12 3.14 -12.8 \"Hello\" \"Hel\\\"lo\"\n");
ASSERT_EQ(lexer.ingest(), (Token { TokenType::Directive, ".dir" }));
ASSERT_EQ(lexer.ingest(), (Token { TokenType::Instruction, "push" }));
t = lexer.ingest(); ASSERT_EQ(t.type, TokenType::Integer); ASSERT_EQ(std::get<int>(t.token), 382);
t = lexer.ingest(); ASSERT_EQ(t.type, TokenType::Integer); ASSERT_EQ(std::get<int>(t.token), -12);
t = lexer.ingest(); ASSERT_EQ(t.type, TokenType::Float); ASSERT_FLOAT_EQ(std::get<float>(t.token), 3.14f);
t = lexer.ingest(); ASSERT_EQ(t.type, TokenType::Float); ASSERT_FLOAT_EQ(std::get<float>(t.token), -12.8f);
ASSERT_EQ(lexer.ingest(), (Token { TokenType::String, "Hello" }));
ASSERT_EQ(lexer.ingest(), (Token { TokenType::String, "Hel\"lo" }));
ASSERT_EQ(lexer.ingest(), (Token { TokenType::Enter, "\n" }));
ASSERT_EQ(lexer.ingest(), (Token { TokenType::EOF_ }));
ASSERT_EQ(lexer.ingest(), (Token { TokenType::EOF_ }));
ASSERT_EQ(lexer.ingest(), (Token { TokenType::EOF_ }));
lexer.reset();
ASSERT_EQ(lexer.ingest(), (Token { TokenType::Directive, ".dir" }));
}
TEST(Assember, Assembler)
{
BytecodePrototype bp;
bp.constants.emplace_back(3.14f);
bp.constants.emplace_back("Hello world");
bp.functions.emplace_back(0, 0);
bp.functions.at(0).code.append_byte((uint8_t) Instruction::PushInt8);
bp.functions.at(0).code.append_int8(2);
bp.functions.at(0).code.append_byte((uint8_t) Instruction::PushInt8);
bp.functions.at(0).code.append_int8(3);
bp.functions.at(0).code.append_byte((uint8_t) Instruction::Sum);
bp.functions.at(0).code.append_byte((uint8_t) Instruction::Return);
bp.functions.emplace_back(0, 0);
bp.functions.at(1).code.append_byte((uint8_t) Instruction::PushInt16);
bp.functions.at(1).code.append_int16(5000);
bp.functions.at(1).code.append_byte((uint8_t) Instruction::Return);
StaticByteArray expected = Bytecode::generate(bp);
std::string src = R"(
.const
0: 3.14
1: "Hello world"
.func 0
pushi 2 ; this is a comment
pushi 3
sum
ret
.func 1
pushi 5000
ret
)";
StaticByteArray actual = Assembler(src).assemble();
ASSERT_EQ(expected, actual);
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

View File

@@ -0,0 +1,81 @@
#include "bytearray.hh"
#include <cstring>
#include <cstdio>
namespace tyche {
uint8_t StaticByteArray::get_byte(uint32_t addr) const
{
return data_.at(addr);
}
uint16_t StaticByteArray::get_uint16(uint32_t addr) const
{
return (uint32_t) get_byte(addr)
| (uint32_t) get_byte(addr+1) << 8;
}
uint32_t StaticByteArray::get_uint32(uint32_t addr) const
{
return (uint32_t) get_byte(addr)
| (uint32_t) get_byte(addr+1) << 8
| (uint32_t) get_byte(addr+2) << 16
| (uint32_t) get_byte(addr+3) << 24;
}
int8_t StaticByteArray::get_int8(uint32_t addr) const
{
return std::bit_cast<int8_t>(get_byte(addr));
}
int16_t StaticByteArray::get_int16(uint32_t addr) const
{
return (uint16_t) get_byte(addr)
| (uint16_t) get_byte(addr+1) << 8;
}
int32_t StaticByteArray::get_int32(uint32_t addr) const
{
return std::bit_cast<int32_t>((uint32_t) get_byte(addr)
| (uint32_t) get_byte(addr+1) << 8
| (uint32_t) get_byte(addr+2) << 16
| (uint32_t) get_byte(addr+3) << 24);
}
float StaticByteArray::get_float(uint32_t addr) const
{
uint32_t bits = (uint32_t) get_byte(addr)
| (uint32_t) get_byte(addr+1) << 8
| (uint32_t) get_byte(addr+2) << 16
| (uint32_t) get_byte(addr+3) << 24;
float value;
std::memcpy(&value, &bits, 4);
return value;
}
std::pair<const char*, size_t> StaticByteArray::get_string_ptr(uint32_t addr) const
{
return { (const char *) &data_.at(addr), strlen((const char *) &data_.at(addr)) + 1 };
}
std::string StaticByteArray::hexdump() const
{
auto to_hex = [](uint32_t value, size_t n_chars) -> std::string {
char buf[15];
snprintf(buf, sizeof buf, "%0*X", (int) n_chars, value);
return { buf };
};
std::string out;
for (size_t i = 0; i < data_.size(); ++i) {
if (i % 16 == 0)
out += to_hex(i, 4) + " | ";
out += to_hex(data_.at(i), 2) + " ";
if (i % 16 == 15)
out += "\n";
}
return out + "\n";
}
}

View File

@@ -0,0 +1,44 @@
#ifndef TYCHE_BYTEARRAY_HH
#define TYCHE_BYTEARRAY_HH
#include <cstdint>
#include <stdexcept>
#include <string>
#include <vector>
namespace tyche {
class StaticByteArray {
public:
explicit StaticByteArray(std::vector<uint8_t> const& data) : data_(data) {}
explicit StaticByteArray(StaticByteArray const& ba) : data_(ba.data()) {}
// not assignable or moveable
StaticByteArray(StaticByteArray&&) = delete;
StaticByteArray& operator=(StaticByteArray const&) = delete;
StaticByteArray& operator=(StaticByteArray&&) = delete;
[[nodiscard]] uint8_t get_byte(uint32_t addr) const;
[[nodiscard]] uint16_t get_uint16(uint32_t addr) const;
[[nodiscard]] uint32_t get_uint32(uint32_t addr) const;
[[nodiscard]] int8_t get_int8(uint32_t addr) const;
[[nodiscard]] int16_t get_int16(uint32_t addr) const;
[[nodiscard]] int32_t get_int32(uint32_t addr) const;
[[nodiscard]] float get_float(uint32_t addr) const;
[[nodiscard]] std::pair<const char*, size_t> get_string_ptr(uint32_t addr) const;
[[nodiscard]] std::vector<uint8_t> const& data() const { return data_; }
[[nodiscard]] size_t size() const { return data_.size(); }
[[nodiscard]] std::string hexdump() const;
friend bool operator==(StaticByteArray const& lhs, StaticByteArray const& rhs) { return lhs.data_ == rhs.data_; }
private:
const std::vector<uint8_t> data_ {};
};
}
#endif //TYCHE_BYTEARRAY_HH

View File

@@ -0,0 +1,88 @@
#include "bytearraybuilder.hh"
namespace tyche {
ByteArrayBuilder& ByteArrayBuilder::set_byte(uint32_t addr, uint8_t byte)
{
if (data_.size() < (addr + 1))
data_.resize(addr + 1, 0);
data_.at(addr) = byte;
return *this;
}
ByteArrayBuilder& ByteArrayBuilder::set_int8(uint32_t addr, int8_t value)
{
set_byte(addr, (uint8_t) value);
return *this;
}
ByteArrayBuilder& ByteArrayBuilder::set_int16(uint32_t addr, int16_t value)
{
set_byte(addr, (uint8_t) (value));
set_byte(addr+1, (uint8_t) (value >> 8));
return *this;
}
ByteArrayBuilder& ByteArrayBuilder::set_int32(uint32_t addr, int32_t value)
{
set_byte(addr, (uint8_t) (value));
set_byte(addr+1, (uint8_t) (value >> 8));
set_byte(addr+2, (uint8_t) (value >> 16));
set_byte(addr+3, (uint8_t) (value >> 24));
return *this;
}
ByteArrayBuilder& ByteArrayBuilder::set_uint16(uint32_t addr, uint16_t value)
{
set_byte(addr, (uint8_t) (value));
set_byte(addr+1, (uint8_t) (value >> 8));
return *this;
}
ByteArrayBuilder& ByteArrayBuilder::set_uint32(uint32_t addr, uint32_t value)
{
set_byte(addr, (uint8_t) (value));
set_byte(addr+1, (uint8_t) (value >> 8));
set_byte(addr+2, (uint8_t) (value >> 16));
set_byte(addr+3, (uint8_t) (value >> 24));
return *this;
}
ByteArrayBuilder& ByteArrayBuilder::set_float(uint32_t addr, float value)
{
uint32_t bits;
std::memcpy(&bits, &value, 4);
set_byte(addr, (uint8_t) (bits));
set_byte(addr+1, (uint8_t) (bits >> 8));
set_byte(addr+2, (uint8_t) (bits >> 16));
set_byte(addr+3, (uint8_t) (bits >> 24));
return *this;
}
ByteArrayBuilder& ByteArrayBuilder::set_string(uint32_t addr, std::string const& str)
{
for (uint8_t c: str)
set_byte(addr++, c);
set_byte(addr, 0);
return *this;
}
ByteArrayBuilder& ByteArrayBuilder::set_bytearray(uint32_t addr, ByteArrayBuilder const& bytearray)
{
for (uint8_t byte: bytearray.data_)
set_byte(addr++, byte);
return *this;
}
ByteArrayBuilder& ByteArrayBuilder::append_bytearray(ByteArrayBuilder const& bytearray)
{
data_.insert(data_.end(), bytearray.data_.begin(), bytearray.data_.end());
return *this;
}
StaticByteArray ByteArrayBuilder::build() const
{
return StaticByteArray(data_);
}
}

View File

@@ -0,0 +1,45 @@
#ifndef TYCHE_BYTEARRAYBUILDER_HH
#define TYCHE_BYTEARRAYBUILDER_HH
#include <cstdint>
#include <string>
#include <vector>
#include "bytearray.hh"
namespace tyche {
class ByteArrayBuilder {
public:
ByteArrayBuilder& set_byte(uint32_t addr, uint8_t byte);
ByteArrayBuilder& set_uint16(uint32_t addr, uint16_t value);
ByteArrayBuilder& set_uint32(uint32_t addr, uint32_t value);
ByteArrayBuilder& set_int8(uint32_t addr, int8_t value);
ByteArrayBuilder& set_int16(uint32_t addr, int16_t value);
ByteArrayBuilder& set_int32(uint32_t addr, int32_t value);
ByteArrayBuilder& set_float(uint32_t addr, float value);
ByteArrayBuilder& set_string(uint32_t addr, std::string const& str);
ByteArrayBuilder& set_bytearray(uint32_t addr, ByteArrayBuilder const& bytearray);
ByteArrayBuilder& append_byte(uint8_t byte) { set_byte(data_.size(), byte); return *this; }
ByteArrayBuilder& append_uint16(uint16_t value) { set_uint16(data_.size(), value); return *this; }
ByteArrayBuilder& append_uint32(uint32_t value) { set_uint32(data_.size(), value); return *this; }
ByteArrayBuilder& append_int8(int8_t value) { set_int8(data_.size(), value); return *this; }
ByteArrayBuilder& append_int16(int16_t value) { set_int16(data_.size(), value); return *this; }
ByteArrayBuilder& append_int32(int32_t value) { set_int32(data_.size(), value); return *this; }
ByteArrayBuilder& append_float(float value) { set_float(data_.size(), value); return *this; }
ByteArrayBuilder& append_string(std::string const& str) { set_string(data_.size(), str); return *this; }
ByteArrayBuilder& append_bytearray(ByteArrayBuilder const& bytearray);
[[nodiscard]] std::vector<uint8_t> const& data() const { return data_; }
[[nodiscard]] size_t size() const { return data_.size(); }
[[nodiscard]] StaticByteArray build() const;
private:
std::vector<uint8_t> data_ {};
};
}
#endif //TYCHE_BYTEARRAYBUILDER_HH

51
src/bytearray/tests.cc Normal file
View File

@@ -0,0 +1,51 @@
#include "gtest/gtest.h"
#include <cstring>
#include <functional>
#include "bytearray.hh"
#include "bytearraybuilder.hh"
using namespace tyche;
TEST(StaticByteArray, StaticByteArray)
{
auto test = [](std::function<void(ByteArrayBuilder&)> const& f, std::vector<uint8_t> const& expected) {
ByteArrayBuilder ba;
f(ba);
ASSERT_EQ(ba.data().size(), expected.size());
ASSERT_EQ(std::memcmp(ba.data().data(), expected.data(), ba.data().size()), 0);
};
#define TESTX(a, ...) test([](ByteArrayBuilder& ba) { a; }, std::vector<uint8_t>({ __VA_ARGS__ }));
TESTX(ba.set_byte(1, 0xab), 0x00, 0xab)
ByteArrayBuilder ba;
{ auto b = ba.set_byte(1, 0xab).build(); ASSERT_EQ(b.get_byte(1), 0xab); }
{ auto b = ba.set_int8(1, 12).build(); ASSERT_EQ(b.get_int8(1), 12); }
{ auto b = ba.set_int8(1, -12).build(); ASSERT_EQ(b.get_int8(1), -12); }
{ auto b = ba.set_int16(1, 5000).build(); ASSERT_EQ(b.get_int16(1), 5000); }
{ auto b = ba.set_int32(1, 5000300).build(); ASSERT_EQ(b.get_int32(1), 5000300); }
{ auto b = ba.set_int32(1, -5000300).build(); ASSERT_EQ(b.get_int32(1), -5000300); }
{ auto b = ba.set_float(1, 3.14).build(); ASSERT_FLOAT_EQ(b.get_float(1), 3.14); }
{ auto b = ba.set_float(1, -3.14).build(); ASSERT_FLOAT_EQ(b.get_float(1), -3.14); }
{ auto b = ba.set_float(1, -5000300.1324).build(); ASSERT_FLOAT_EQ(b.get_float(1), -5000300.1324); }
{
auto b = ba.set_string(1, "Hello world!").build();
auto str = b.get_string_ptr(1);
EXPECT_STREQ(str.first, "Hello world!");
ASSERT_EQ(str.second, 13);
}
#undef TESTX
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

View File

@@ -0,0 +1,15 @@
#ifndef TYCHE_BC_EXCEPTIONS_HH
#define TYCHE_BC_EXCEPTIONS_HH
#include <stdexcept>
namespace tyche::bc {
class BytecodeParsingError : public std::runtime_error {
public:
explicit BytecodeParsingError(std::string const& str) : std::runtime_error(str.c_str()) {}
};
}
#endif //TYCHE_BC_EXCEPTIONS_HH

166
src/bytecode/bytecode.cc Normal file
View File

@@ -0,0 +1,166 @@
#include "bytecode.hh"
#include "bc_exceptions.hh"
#include "../common/overloaded.hh"
namespace tyche::bc {
Bytecode::Bytecode(StaticByteArray const* ba)
: byte_array_(ba)
{
// check file size
if (byte_array_->size() < (TOC_START + TOC_SZ))
throw BytecodeParsingError("Invalid bytecode format (file too short)");
// check magic number and version
if (byte_array_->get_uint32(0) != MAGIC_NUMBER)
throw BytecodeParsingError("Invalid bytecode format (magic number not matching)");
if (byte_array_->get_uint32(4) != BYTECODE_VERSION)
throw BytecodeParsingError("Unexpected bytecode format version");
// load cache
cache_.constants_idx_addr = byte_array_->get_uint32(TOC_START);
cache_.n_constants = byte_array_->get_uint16(TOC_START + 4);
cache_.functions_idx_addr = byte_array_->get_uint32(TOC_START + (1 * TOC_RECORD_SZ));
cache_.n_functions = byte_array_->get_uint16(TOC_START + (1 * TOC_RECORD_SZ) + 4);
cache_.constants_start_addr = byte_array_->get_uint32(TOC_START + (2 * TOC_RECORD_SZ));
uint32_t code_start = byte_array_->get_uint32(TOC_START + (3 * TOC_RECORD_SZ));
for (uint32_t i = 0; i < cache_.n_functions; ++i) {
cache_.function_addr.emplace_back(code_start + byte_array_->get_uint32(cache_.functions_idx_addr + (i * FUNCTION_RECORD_SZ)));
cache_.function_sz.emplace_back(byte_array_->get_uint32(cache_.functions_idx_addr + (i * FUNCTION_RECORD_SZ) + 8));
}
}
uint32_t Bytecode::n_constants() const
{
return cache_.n_constants;
}
uint32_t Bytecode::n_functions() const
{
return cache_.n_functions;
}
ConstantValue Bytecode::get_constant(uint32_t idx) const
{
uint32_t constant_idx = byte_array_->get_uint32(cache_.constants_idx_addr + (idx * CONST_RECORD_SZ));
switch ((ConstantType) byte_array_->get_byte(cache_.constants_start_addr + constant_idx)) {
case CONST_TYPE_FLOAT:
return byte_array_->get_float(cache_.constants_start_addr + constant_idx + 1);
case CONST_TYPE_STRING:
return byte_array_->get_string_ptr(cache_.constants_start_addr + constant_idx + 1).first;
default:
throw BytecodeParsingError("Invalid bytecode format (invalid constant type)");
}
}
Bytecode::FunctionDef Bytecode::get_function_def(uint32_t function_id) const
{
uint32_t idx = cache_.functions_idx_addr + (function_id * FUNCTION_RECORD_SZ);
return {
.n_params = byte_array_->get_uint16(idx + 4),
.locals = byte_array_->get_uint16(idx + 6),
};
}
uint32_t Bytecode::get_function_sz(uint32_t function_id) const
{
return cache_.function_sz.at(function_id);
}
uint8_t Bytecode::get_code_byte(uint32_t function_id, uint32_t idx) const
{
return byte_array_->get_byte(cache_.function_addr.at(function_id) + idx);
}
int8_t Bytecode::get_code_int8(uint32_t function_id, uint32_t idx) const
{
return byte_array_->get_int8(cache_.function_addr.at(function_id) + idx);
}
int16_t Bytecode::get_code_int16(uint32_t function_id, uint32_t idx) const
{
return byte_array_->get_int16(cache_.function_addr.at(function_id) + idx);
}
int32_t Bytecode::get_code_int32(uint32_t function_id, uint32_t idx) const
{
return byte_array_->get_int32(cache_.function_addr.at(function_id) + idx);
}
StaticByteArray Bytecode::generate(BytecodePrototype const& bp)
{
// header section
ByteArrayBuilder header;
header.set_uint32(0, MAGIC_NUMBER);
header.set_byte(4, BYTECODE_VERSION);
// constants
ByteArrayBuilder constant_indexes;
ByteArrayBuilder raw_constants;
uint32_t idx = 0;
for (auto const& constant: bp.constants) {
constant_indexes.append_uint32(idx);
std::visit(overloaded {
[&](float f) {
raw_constants.append_byte(CONST_TYPE_FLOAT);
raw_constants.append_float(f);
},
[&](std::string const& s) {
raw_constants.append_byte(CONST_TYPE_STRING);
raw_constants.append_string(s);
},
}, constant);
idx = raw_constants.size();
}
// functions
ByteArrayBuilder functions_indexes;
ByteArrayBuilder raw_code;
uint32_t idx_idx = 0, code_idx = 0;
for (auto const& f: bp.functions) {
functions_indexes.set_uint32(idx_idx, code_idx);
functions_indexes.set_uint16(idx_idx + 4, f.n_pars);
functions_indexes.set_uint16(idx_idx + 6, f.n_locals);
functions_indexes.set_uint32(idx_idx + 8, f.code.size());
raw_code.append_bytearray(f.code);
code_idx = raw_code.size();
idx_idx += FUNCTION_RECORD_SZ;
}
// table of contents
uint32_t function_idx_start = CONST_IDX_START + constant_indexes.size();
uint32_t raw_constant_start = function_idx_start + functions_indexes.size();
uint32_t raw_code_start = raw_constant_start + raw_constants.size();
ByteArrayBuilder toc;
if (!bp.constants.empty()) {
toc.set_uint32(SEC_CONST_IDX * TOC_RECORD_SZ, CONST_IDX_START);
toc.set_uint32(SEC_CONST_IDX * TOC_RECORD_SZ + 4, constant_indexes.size() / CONST_RECORD_SZ);
toc.set_uint32(SEC_CONST_DATA * TOC_RECORD_SZ, raw_constant_start);
toc.set_uint32(SEC_CONST_DATA * TOC_RECORD_SZ + 4, raw_constants.size());
}
if (!bp.functions.empty()) {
toc.set_uint32(SEC_FUNC_IDX * TOC_RECORD_SZ, function_idx_start);
toc.set_uint32(SEC_FUNC_IDX * TOC_RECORD_SZ + 4, functions_indexes.size() / FUNCTION_RECORD_SZ);
toc.set_uint32(SEC_CODE * TOC_RECORD_SZ, raw_code_start);
toc.set_uint32(SEC_CODE * TOC_RECORD_SZ + 4, raw_code.size());
}
//
// assemble bytecode
//
ByteArrayBuilder ba;
ba.set_bytearray(0, header);
ba.set_bytearray(TOC_START, toc);
ba.set_bytearray(CONST_IDX_START, constant_indexes);
ba.set_bytearray(function_idx_start, functions_indexes);
ba.set_bytearray(raw_constant_start, raw_constants);
ba.set_bytearray(raw_code_start, raw_code);
return ba.build();
}
}

62
src/bytecode/bytecode.hh Normal file
View File

@@ -0,0 +1,62 @@
#ifndef TYCHE_BYTECODE_HH
#define TYCHE_BYTECODE_HH
#include "../bytearray/bytearray.hh"
#include "bytecodeprototype.hh"
#include "constant.hh"
namespace tyche::bc {
class Bytecode {
public:
explicit Bytecode(StaticByteArray const* ba);
[[nodiscard]] uint32_t n_constants() const;
[[nodiscard]] uint32_t n_functions() const;
[[nodiscard]] ConstantValue get_constant(uint32_t idx) const;
struct FunctionDef { uint16_t n_params, locals; };
[[nodiscard]] FunctionDef get_function_def(uint32_t function_id) const;
[[nodiscard]] uint32_t get_function_sz(uint32_t function_id) const;
[[nodiscard]] uint8_t get_code_byte(uint32_t function_id, uint32_t idx) const;
[[nodiscard]] int8_t get_code_int8(uint32_t function_id, uint32_t idx) const;
[[nodiscard]] int16_t get_code_int16(uint32_t function_id, uint32_t idx) const;
[[nodiscard]] int32_t get_code_int32(uint32_t function_id, uint32_t idx) const;
// TODO - debugging info
[[nodiscard]] static StaticByteArray generate(BytecodePrototype const& bp);
private:
StaticByteArray const* byte_array_; // the actual data
static constexpr uint8_t BYTECODE_VERSION = 1;
static constexpr uint32_t MAGIC_NUMBER = 0x74b3c138;
static constexpr uint32_t TOC_START = 16,
TOC_N_RECORDS = 8,
TOC_RECORD_SZ = 8,
TOC_SZ = TOC_N_RECORDS * TOC_RECORD_SZ;
static constexpr uint32_t CONST_IDX_START = TOC_START + TOC_SZ,
CONST_RECORD_SZ = 4;
static constexpr uint32_t FUNCTION_RECORD_SZ = 12;
enum Sections { SEC_CONST_IDX = 0, SEC_FUNC_IDX = 1, SEC_CONST_DATA = 2, SEC_CODE = 3 };
// caching for faster reading of data
struct Cache {
uint32_t constants_idx_addr;
uint16_t n_constants;
uint32_t constants_start_addr;
uint32_t functions_idx_addr;
uint32_t n_functions;
std::vector<uint32_t> function_addr;
std::vector<uint32_t> function_sz;
};
Cache cache_ {};
};
}
#endif //TYCHE_BYTECODE_HH

View File

@@ -0,0 +1,30 @@
#ifndef TYCHE_BYTECODEPROTOTYPE_HH
#define TYCHE_BYTECODEPROTOTYPE_HH
#include <cstdint>
#include <string>
#include <variant>
#include <vector>
#include "../bytearray/bytearraybuilder.hh"
namespace tyche::bc {
struct BytecodePrototype {
struct Function {
uint16_t n_pars;
uint16_t n_locals;
ByteArrayBuilder code {};
Function(uint16_t n_pars_, uint16_t n_locals_) : n_pars(n_pars_), n_locals(n_locals_), code(ByteArrayBuilder {}) {}
};
using ConstantValue = std::variant<float, std::string>;
std::vector<ConstantValue> constants {};
std::vector<Function> functions {};
// TODO - debugging info
};
}
#endif //TYCHE_BYTECODEPROTOTYPE_HH

15
src/bytecode/constant.hh Normal file
View File

@@ -0,0 +1,15 @@
#ifndef TYCHE_CONSTANT_HH
#define TYCHE_CONSTANT_HH
#include <string>
#include <variant>
namespace tyche::bc {
using ConstantValue = std::variant<float, const char*>;
enum ConstantType : uint8_t { CONST_TYPE_FLOAT = 1, CONST_TYPE_STRING = 2 };
}
#endif //TYCHE_CONSTANT_HH

173
src/bytecode/tests.cc Normal file
View File

@@ -0,0 +1,173 @@
#include "gtest/gtest.h"
#include <cstring>
#include <functional>
#include "../bytearray/bytearray.hh"
#include "../bytearray/bytearraybuilder.hh"
#include "bytecodeprototype.hh"
#include "bytecode.hh"
using namespace tyche;
using namespace tyche::bc;
TEST(StaticByteArray, StaticByteArray)
{
auto test = [](std::function<void(ByteArrayBuilder&)> const& f, std::vector<uint8_t> const& expected) {
ByteArrayBuilder ba;
f(ba);
ASSERT_EQ(ba.data().size(), expected.size());
ASSERT_EQ(std::memcmp(ba.data().data(), expected.data(), ba.data().size()), 0);
};
#define TESTX(a, ...) test([](ByteArrayBuilder& ba) { a; }, std::vector<uint8_t>({ __VA_ARGS__ }));
TESTX(ba.set_byte(1, 0xab), 0x00, 0xab)
ByteArrayBuilder ba;
{ auto b = ba.set_byte(1, 0xab).build(); ASSERT_EQ(b.get_byte(1), 0xab); }
{ auto b = ba.set_int8(1, 12).build(); ASSERT_EQ(b.get_int8(1), 12); }
{ auto b = ba.set_int8(1, -12).build(); ASSERT_EQ(b.get_int8(1), -12); }
{ auto b = ba.set_int16(1, 5000).build(); ASSERT_EQ(b.get_int16(1), 5000); }
{ auto b = ba.set_int32(1, 5000300).build(); ASSERT_EQ(b.get_int32(1), 5000300); }
{ auto b = ba.set_int32(1, -5000300).build(); ASSERT_EQ(b.get_int32(1), -5000300); }
{ auto b = ba.set_float(1, 3.14).build(); ASSERT_FLOAT_EQ(b.get_float(1), 3.14); }
{ auto b = ba.set_float(1, -3.14).build(); ASSERT_FLOAT_EQ(b.get_float(1), -3.14); }
{ auto b = ba.set_float(1, -5000300.1324).build(); ASSERT_FLOAT_EQ(b.get_float(1), -5000300.1324); }
{
auto b = ba.set_string(1, "Hello world!").build();
auto str = b.get_string_ptr(1);
EXPECT_STREQ(str.first, "Hello world!");
ASSERT_EQ(str.second, 13);
}
#undef TESTX
}
TEST(Bytecode, Constants)
{
BytecodePrototype bp;
bp.constants.emplace_back(42.3f);
bp.constants.emplace_back("HELLO");
std::vector<uint8_t> expected = {
// header
0x38, 0xc1, 0xb3, 0x74, // magic
0x01, 0x00, 0x00, 0x00, // version
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
// index
0x50, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, // constant index
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // function undex
0x58, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, // raw constants
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // raw code
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// constant indexes
0x00, 0x00, 0x00, 0x00,
0x05, 0x00, 0x00, 0x00,
// constant values
CONST_TYPE_FLOAT, 0x33, 0x33, 0x29, 0x42, // float: 42.3f
CONST_TYPE_STRING, 'H', 'E', 'L', 'L', 'O', 0x00
};
StaticByteArray ba = Bytecode::generate(bp);
ASSERT_EQ(ba.data(), expected);
}
TEST(Bytecode, Code)
{
BytecodePrototype bp;
auto& f = bp.functions.emplace_back(0, 0);
f.code.append_byte(0x68);
f.code.append_int8(42);
auto& f2 = bp.functions.emplace_back(2, 1);
f2.code.append_byte(0x42);
std::vector<uint8_t> expected = {
// header
0x38, 0xc1, 0xb3, 0x74, // magic
0x01, 0x00, 0x00, 0x00, // version
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
// index
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // constant index
0x50, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, // variable index
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // raw constants
0x68, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, // raw code
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// function definitions
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00,
// code
0x68, 42, 0x42,
};
StaticByteArray ba = Bytecode::generate(bp);
ASSERT_EQ(ba.data(), expected);
}
TEST(Bytecode, Parsing)
{
// write bytecode
BytecodePrototype bp;
bp.constants.emplace_back(3.14f);
bp.constants.emplace_back("HELLO");
auto& f = bp.functions.emplace_back(0, 0);
f.code.append_byte(0x68);
f.code.append_int8(42);
auto& ff = bp.functions.emplace_back(2, 1);
ff.code.append_byte(0x42);
StaticByteArray ba = Bytecode::generate(bp);
// print(ba.data());
// read bytecode
Bytecode bc(&ba);
ASSERT_EQ(bc.n_constants(), 2);
ASSERT_EQ(bc.n_functions(), 2);
ASSERT_EQ(bc.get_function_sz(0), 2);
ASSERT_EQ(bc.get_function_sz(1), 1);
ASSERT_FLOAT_EQ(std::get<float>(bc.get_constant(0)), 3.14f);
EXPECT_STREQ(std::get<const char*>(bc.get_constant(1)), "HELLO");
Bytecode::FunctionDef f1 = bc.get_function_def(0);
ASSERT_EQ(f1.n_params, 0);
ASSERT_EQ(f1.locals, 0);
Bytecode::FunctionDef f2 = bc.get_function_def(1);
ASSERT_EQ(f2.n_params, 2);
ASSERT_EQ(f2.locals, 1);
ASSERT_EQ(bc.get_code_byte(0, 0), 0x68);
ASSERT_EQ(bc.get_code_int8(0, 1), 42);
ASSERT_EQ(bc.get_code_byte(1, 0), 0x42);
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

8
src/common/overloaded.hh Normal file
View File

@@ -0,0 +1,8 @@
#ifndef TYCHE_OVERLOADED_HH
#define TYCHE_OVERLOADED_HH
// used by std::visitor
template<class... Ts>
struct overloaded : Ts... { using Ts::operator()...; };
#endif //TYCHE_OVERLOADED_HH

View File

@@ -0,0 +1,244 @@
#include "instruction.hh"
#include <limits>
#include <unordered_map>
namespace tyche {
const std::unordered_map<std::string, Instruction> instruction_names = {
{ "pushi", Instruction::PushInt8 },
{ "pushc", Instruction::PushConstant8 },
{ "pushz", Instruction::PushZero },
{ "pusht", Instruction::PushTrue },
{ "pushf", Instruction::PushFunction8 },
{ "newa", Instruction::NewArray },
{ "newt", Instruction::NewTable },
{ "pop", Instruction::Pop },
{ "dup", Instruction::Duplicate },
{ "pushv", Instruction::PushValues8 },
{ "set", Instruction::SetValue8 },
{ "dupv", Instruction::DuplicateValue8 },
{ "setg", Instruction::SetGlobal8 },
{ "getl", Instruction::GetGlobal8 },
{ "call8", Instruction::Call8 },
{ "ret", Instruction::Return },
{ "retn", Instruction::ReturnNil },
{ "getkv", Instruction::GetKeyValue },
{ "setkv", Instruction::SetKeyValue },
{ "geta", Instruction::GetArrayItem },
{ "seta", Instruction::SetArrayItem },
{ "appnd", Instruction::Append },
{ "next", Instruction::Next },
{ "smt", Instruction::SetMetatable },
{ "mt", Instruction::GetMetatable },
{ "sum", Instruction::Sum },
{ "sub", Instruction::Subtract },
{ "mul", Instruction::Multiply },
{ "div", Instruction::Divide },
{ "idiv", Instruction::DivideInt },
{ "eq", Instruction::Equals },
{ "neq", Instruction::NotEquals },
{ "lt", Instruction::LessThan },
{ "lte", Instruction::LessThanEq },
{ "gt", Instruction::GreaterThan },
{ "gte", Instruction::GreaterThanEq },
{ "and", Instruction::And },
{ "or", Instruction::Or },
{ "xor", Instruction::Xor },
{ "pow", Instruction::Power },
{ "shl", Instruction::ShiftLeft },
{ "shr", Instruction::ShiftRight },
{ "mod", Instruction::Modulo },
{ "len", Instruction::Len },
{ "type", Instruction::Type },
{ "cast", Instruction::Cast },
{ "ver", Instruction::Version },
{ "bz", Instruction::BranchIfZero8 },
{ "bnz", Instruction::BranchIfNotZero8 },
{ "jmp", Instruction::Jump8 },
{ "cmpl", Instruction::Compile },
{ "asmbl", Instruction::Assemble },
{ "load", Instruction::Load },
};
std::pair<std::string, size_t> debug_instruction(Instruction inst, int oper)
{
std::string out;
switch (inst) {
case Instruction::PushInt8:
case Instruction::PushInt16:
case Instruction::PushInt32:
out = "pushi";
break;
case Instruction::PushConstant8:
case Instruction::PushConstant16:
case Instruction::PushConstant32:
out = "pushc";
break;
case Instruction::PushFunction8:
case Instruction::PushFunction16:
case Instruction::PushFunction32:
out = "pushf";
break;
case Instruction::PushZero: out = "pushz"; break;
case Instruction::PushTrue: out = "pusht"; break;
case Instruction::NewArray: out = "newa"; break;
case Instruction::NewTable: out = "newt"; break;
case Instruction::Pop: out = "pop"; break;
case Instruction::Duplicate: out = "dup"; break;
case Instruction::PushValues8:
case Instruction::PushValues16:
case Instruction::PushValues32:
out = "pushv";
break;
case Instruction::SetValue8:
case Instruction::SetValue16:
case Instruction::SetValue32:
out = "set";
break;
case Instruction::DuplicateValue8:
case Instruction::DuplicateValue16:
case Instruction::DuplicateValue32:
out = "dupv";
break;
case Instruction::SetGlobal8:
case Instruction::SetGlobal16:
case Instruction::SetGlobal32:
out = "setg";
break;
case Instruction::GetGlobal8:
case Instruction::GetGlobal16:
case Instruction::GetGlobal32:
out = "getg";
break;
case Instruction::Call8:
case Instruction::Call16:
case Instruction::Call32:
out = "call";
break;
case Instruction::Return: out = "ret"; break;
case Instruction::ReturnNil: out = "retn"; break;
case Instruction::GetKeyValue: out = "getkv"; break;
case Instruction::SetKeyValue: out = "setkv"; break;
case Instruction::GetArrayItem: out = "geta"; break;
case Instruction::SetArrayItem: out = "seta"; break;
case Instruction::Append: out = "appnd"; break;
case Instruction::Next: out = "next"; break;
case Instruction::SetMetatable: out = "smt"; break;
case Instruction::GetMetatable: out = "mt"; break;
case Instruction::Sum: out = "sum"; break;
case Instruction::Subtract: out = "sub"; break;
case Instruction::Multiply: out = "mul"; break;
case Instruction::Divide: out = "div"; break;
case Instruction::DivideInt: out = "idiv"; break;
case Instruction::Equals: out = "eq"; break;
case Instruction::NotEquals: out = "neq"; break;
case Instruction::LessThan: out = "lt"; break;
case Instruction::LessThanEq: out = "lte"; break;
case Instruction::GreaterThan: out = "gt"; break;
case Instruction::GreaterThanEq: out = "gte"; break;
case Instruction::And: out = "and"; break;
case Instruction::Or: out = "or"; break;
case Instruction::Xor: out = "xor"; break;
case Instruction::Power: out = "pow"; break;
case Instruction::ShiftLeft: out = "shl"; break;
case Instruction::ShiftRight: out = "shr"; break;
case Instruction::Modulo: out = "mod"; break;
case Instruction::Len: out = "len"; break;
case Instruction::Type: out = "type"; break;
case Instruction::Cast: out = "cast"; break;
case Instruction::Version: out = "ver"; break;
case Instruction::BranchIfZero8:
case Instruction::BranchIfZero16:
case Instruction::BranchIfZero32:
out = "bz";
break;
case Instruction::BranchIfNotZero8:
case Instruction::BranchIfNotZero16:
case Instruction::BranchIfNotZero32:
out = "bnz";
break;
case Instruction::Jump8:
case Instruction::Jump16:
case Instruction::Jump32:
out = "jmp";
break;
case Instruction::Compile: out = "cmpl"; break;
case Instruction::Assemble: out = "asmbl"; break;
case Instruction::Load: out = "load"; break;
default:
out = "???";
}
OperandType operands = instruction_operand_type(inst);
if (operands == OperandType::NoOperand)
return { out, 1 };
out += " " + std::to_string(oper);
if (operands == OperandType::Int32)
return { out, 5 };
if (operands == OperandType::Int16)
return { out, 3 };
return { out, 2 };
}
std::pair<std::string, size_t> debug_instruction(bc::Bytecode const& bt, uint32_t function_id, uint32_t addr)
{
auto inst = (Instruction) bt.get_code_byte(function_id, addr);
switch (instruction_operand_type(inst)) {
case OperandType::NoOperand:
return debug_instruction(inst);
case OperandType::Int8:
return debug_instruction(inst, bt.get_code_int8(function_id, addr + 1));
case OperandType::Int16:
return debug_instruction(inst, bt.get_code_int16(function_id, addr + 1));
case OperandType::Int32:
return debug_instruction(inst, bt.get_code_int32(function_id, addr + 1));
default:
break;
}
return { "???", 1 };
}
OperandType instruction_operand_type(Instruction inst)
{
if ((uint8_t) inst >= 0xe0)
return OperandType::Int32;
if ((uint8_t) inst >= 0xc0)
return OperandType::Int16;
if ((uint8_t) inst >= 0xa0)
return OperandType::Int8;
return OperandType::NoOperand;
}
std::optional<Instruction> translate_instruction(std::string const& txt, std::optional<int> op)
{
auto it = instruction_names.find(txt);
if (it == instruction_names.end())
return {};
Instruction inst = it->second;
OperandType optype = instruction_operand_type(inst);
if (optype == OperandType::NoOperand && op)
return {};
if (optype != OperandType::NoOperand && !op)
return {};
if (optype == OperandType::NoOperand)
return inst;
if (op >= std::numeric_limits<int8_t>::min() && op <= std::numeric_limits<int8_t>::max())
return inst;
if (op >= std::numeric_limits<int16_t>::min() && op <= std::numeric_limits<int16_t>::max())
return (Instruction) ((uint8_t) inst + OPCODE_NEXT_SIZE);
return (Instruction) ((uint8_t) inst + (OPCODE_NEXT_SIZE * 2));
}
}

View File

@@ -0,0 +1,121 @@
#ifndef TYCHE_INSTRUCTION_HH
#define TYCHE_INSTRUCTION_HH
#include <cstdint>
#include <optional>
#include <string>
#include <utility>
#include "../bytecode/bytecode.hh"
namespace tyche {
constexpr uint8_t OPCODE_NEXT_SIZE = 0x20;
enum class Instruction : uint8_t {
// stack operations
PushInt8 = 0xa0,
PushInt16 = 0xc0,
PushInt32 = 0xe0,
PushConstant8 = 0xa1,
PushConstant16 = 0xc1,
PushConstant32 = 0xe1,
PushFunction8 = 0xa2,
PushFunction16 = 0xc2,
PushFunction32 = 0xe2,
PushZero = 0x00,
PushTrue = 0x01,
NewArray = 0x02,
NewTable = 0x03,
Pop = 0x04,
Duplicate = 0x05,
// local variables
PushValues8 = 0xa3,
PushValues16 = 0xc3,
PushValues32 = 0xe3,
SetValue8 = 0xab,
SetValue16 = 0xcb,
SetValue32 = 0xeb,
DuplicateValue8 = 0xa4,
DuplicateValue16 = 0xc4,
DuplicateValue32 = 0xe4,
SetGlobal8 = 0xa5,
SetGlobal16 = 0xc5,
SetGlobal32 = 0xe5,
GetGlobal8 = 0xa6,
GetGlobal16 = 0xc6,
GetGlobal32 = 0xe6,
// function operations
Call8 = 0xa7,
Call16 = 0xc7,
Call32 = 0xe7,
Return = 0x10,
ReturnNil = 0x11,
// table and array operations
GetKeyValue = 0x16,
SetKeyValue = 0x17,
GetArrayItem = 0x18,
SetArrayItem = 0x19,
Append = 0x1a,
Next = 0x1b,
SetMetatable = 0x1c,
GetMetatable = 0x1d,
// logical/arithmetic
Sum = 0x20,
Subtract = 0x21,
Multiply = 0x22,
Divide = 0x23,
DivideInt = 0x24,
Equals = 0x25,
NotEquals = 0x26,
LessThan = 0x27,
LessThanEq = 0x28,
GreaterThan = 0x29,
GreaterThanEq = 0x2a,
And = 0x2b,
Or = 0x2c,
Xor = 0x2d,
Power = 0x2e,
ShiftLeft = 0x2f,
ShiftRight = 0x30,
Modulo = 0x31,
// other value operations
Len = 0x40,
Type = 0x41,
Cast = 0x42,
Version = 0x43,
// control flow
BranchIfZero8 = 0xa8,
BranchIfZero16 = 0xc8,
BranchIfZero32 = 0xe8,
BranchIfNotZero8 = 0xa9,
BranchIfNotZero16 = 0xc9,
BranchIfNotZero32 = 0xe9,
Jump8 = 0xaa,
Jump16 = 0xca,
Jump32 = 0xea,
// external code
Compile = 0x48,
Assemble = 0x49,
Load = 0x4a,
};
std::pair<std::string, size_t> debug_instruction(Instruction inst, int oper=0);
std::pair<std::string, size_t> debug_instruction(bc::Bytecode const& bt, uint32_t function_id, uint32_t addr);
enum class OperandType { NoOperand, Int8, Int16, Int32 };
OperandType instruction_operand_type(Instruction instruction);
std::optional<Instruction> translate_instruction(std::string const& txt, std::optional<int> op);
}
#endif //TYCHE_INSTRUCTION_HH

82
src/vm/code.cc Normal file
View File

@@ -0,0 +1,82 @@
#include "code.hh"
#include "../common/overloaded.hh"
#include "../instructions/instruction.hh"
namespace tyche::vm {
FunctionId Code::import_bytecode(StaticByteArray const* incoming)
{
bc::Bytecode bc(incoming);
// TODO - adjust function calls, constants
bytecode_ = std::move(bc);
return 0; // TODO
}
Operation Code::operation(Location const& location) const
{
Instruction inst = (Instruction) bytecode_.get_code_byte(location.function_id, location.pc);
OperandType opet = instruction_operand_type(inst);
switch (opet) {
case OperandType::NoOperand:
return {
.instruction = inst,
.operator_ = 0,
.next_location = { .function_id = location.function_id, .pc = location.pc + 1 },
};
case OperandType::Int8:
return {
.instruction = inst,
.operator_ = bytecode_.get_code_int8(location.function_id, location.pc + 1),
.next_location = { .function_id = location.function_id, .pc = location.pc + 2 },
};
case OperandType::Int16:
return {
.instruction = inst,
.operator_ = bytecode_.get_code_int16(location.function_id, location.pc + 1),
.next_location = { .function_id = location.function_id, .pc = location.pc + 3 },
};
case OperandType::Int32:
return {
.instruction = inst,
.operator_ = bytecode_.get_code_int32(location.function_id, location.pc + 1),
.next_location = { .function_id = location.function_id, .pc = location.pc + 5 },
};
default:
break;
}
throw std::logic_error("Should not get here");
}
std::string Code::disassemble() const
{
std::string out;
out += ".const\n";
for (size_t i = 0; i < bytecode_.n_constants(); ++i) {
out += "\t" + std::to_string(i) + ": ";
std::visit(overloaded {
[&out](float f) { out += std::to_string(f); },
[&out](std::string const& str) { out += "\"" + str + "\""; },
}, bytecode_.get_constant(i));
out += "\n";
}
out += "\n";
for (size_t i = 0; i < bytecode_.n_functions(); ++i) {
out += ".func " + std::to_string(i) + "\n";
uint32_t addr = 0;
while (addr < bytecode_.get_function_sz(i)) {
auto [op, sz] = debug_instruction(bytecode_, i, addr);
out += "\t" + op + "\n";
addr += sz;
}
}
return out;
}
} // tyche

34
src/vm/code.hh Normal file
View File

@@ -0,0 +1,34 @@
#ifndef TYCHE_CODE_HH
#define TYCHE_CODE_HH
#include "../instructions/instruction.hh"
#include "location.hh"
#include "value.hh"
#include "../bytecode/bytecode.hh"
namespace tyche::vm {
struct Operation
{
Instruction instruction;
int32_t operator_;
Location next_location;
};
class Code {
public:
FunctionId import_bytecode(StaticByteArray const* incoming);
[[nodiscard]] std::string disassemble() const;
[[nodiscard]] Operation operation(Location const& location) const;
[[nodiscard]] bc::Bytecode const& bytecode() const { return bytecode_; }
private:
bc::Bytecode bytecode_;
};
} // tyche
#endif //TYCHE_CODE_HH

114
src/vm/expr.cc Normal file
View File

@@ -0,0 +1,114 @@
#include "expr.hh"
#include <cmath>
#include <functional>
#include "vm_exceptions.hh"
namespace tyche::vm {
std::function<Value(Value const&, Value const&)> binary_ops[(size_t) BinaryOperationType::COUNT][(size_t) Type::COUNT][(size_t) Type::COUNT];
static int init_ = []() {
// every combination, except when explicit, return type error
for (size_t i = 0; i < (size_t) BinaryOperationType::COUNT; ++i) {
for (size_t j = 0; j < (size_t) Type::COUNT; ++j) {
for (size_t k = 0; k < (size_t) Type::COUNT; ++k) {
binary_ops[i][j][k] = [&i](Value const& a, Value const& b) -> Value {
throw VMInvalidOperation((BinaryOperationType) i, a.type(), b.type());
};
}
}
}
// every equality/inequality, by default, return inequal
for (size_t j = 0; j < (size_t) Type::COUNT; ++j) {
for (size_t k = 0; k < (size_t) Type::COUNT; ++k) {
binary_ops[(size_t) BinaryOperationType::Equality][j][k] = [](Value const&, Value const&) { return Value::createFalse(); };
binary_ops[(size_t) BinaryOperationType::Inequality][j][k] = [](Value const&, Value const&) { return Value::createTrue(); };
}
}
#define BIN_OP(op, t1, t2) binary_ops[(size_t) BinaryOperationType::op][(size_t) Type::t1][(size_t) Type::t2] = [](Value const& b, Value const& a)
BIN_OP(Sum, Integer, Integer) { return Value::createInteger(a.as_integer() + b.as_integer()); };
BIN_OP(Sum, Integer, Float) { return Value::createFloat((float) a.as_integer() + b.as_float()); };
BIN_OP(Sum, Float, Integer) { return Value::createFloat(a.as_float() + (float) b.as_integer()); };
BIN_OP(Sum, Float, Float) { return Value::createFloat(a.as_float() + b.as_float()); };
BIN_OP(Sum, String, String) { return Value::createString(std::string(a.as_string_ptr()) + std::string(b.as_string_ptr())); };
BIN_OP(Subtraction, Integer, Integer) { return Value::createInteger(a.as_integer() - b.as_integer()); };
BIN_OP(Subtraction, Integer, Float) { return Value::createFloat((float) a.as_integer() - b.as_float()); };
BIN_OP(Subtraction, Float, Integer) { return Value::createFloat(a.as_float() - (float) b.as_integer()); };
BIN_OP(Subtraction, Float, Float) { return Value::createFloat(a.as_float() - b.as_float()); };
BIN_OP(Multiplication, Integer, Integer) { return Value::createInteger(a.as_integer() * b.as_integer()); };
BIN_OP(Multiplication, Integer, Float) { return Value::createFloat((float) a.as_integer() * b.as_float()); };
BIN_OP(Multiplication, Float, Integer) { return Value::createFloat(a.as_float() * (float) b.as_integer()); };
BIN_OP(Multiplication, Float, Float) { return Value::createFloat(a.as_float() * b.as_float()); };
BIN_OP(Division, Integer, Integer) { return Value::createFloat((float) a.as_integer() / (float) b.as_integer()); };
BIN_OP(Division, Integer, Float) { return Value::createFloat((float) a.as_integer() / b.as_float()); };
BIN_OP(Division, Float, Integer) { return Value::createFloat(a.as_float() / (float) b.as_integer()); };
BIN_OP(Division, Float, Float) { return Value::createFloat(a.as_float() / b.as_float()); };
BIN_OP(IntegerDivision, Integer, Integer) { return Value::createInteger(a.as_integer() / b.as_integer()); };
BIN_OP(IntegerDivision, Integer, Float) { return Value::createInteger(a.as_integer() / (int32_t) b.as_float()); };
BIN_OP(IntegerDivision, Float, Integer) { return Value::createInteger((int32_t) a.as_float() / b.as_integer()); };
BIN_OP(IntegerDivision, Float, Float) { return Value::createInteger((int32_t) a.as_float() / (int32_t) b.as_float()); };
BIN_OP(Equality, Integer, Integer) { return Value::createIntegerFromBool(a.as_integer() == b.as_integer()); };
BIN_OP(Equality, Integer, Float) { return Value::createIntegerFromBool(std::abs((float) a.as_integer() - b.as_float()) < FLOAT_EPSILON); };
BIN_OP(Equality, Float, Integer) { return Value::createIntegerFromBool(std::abs(a.as_float() - (float) b.as_integer()) < FLOAT_EPSILON); };
BIN_OP(Equality, Float, Float) { return Value::createIntegerFromBool(std::abs(a.as_float() - b.as_float()) < FLOAT_EPSILON); };
BIN_OP(Equality, String, String) { return Value::createIntegerFromBool(strcmp(a.as_string_ptr(), b.as_string_ptr()) == 0); };
BIN_OP(Inequality, Integer, Integer) { return Value::createIntegerFromBool(a.as_integer() != b.as_integer()); };
BIN_OP(Inequality, Integer, Float) { return Value::createIntegerFromBool(std::abs((float) a.as_integer() - b.as_float()) >= FLOAT_EPSILON); };
BIN_OP(Inequality, Float, Integer) { return Value::createIntegerFromBool(std::abs(a.as_float() - (float) b.as_integer()) >= FLOAT_EPSILON); };
BIN_OP(Inequality, Float, Float) { return Value::createIntegerFromBool(std::abs(a.as_float() - b.as_float()) >= FLOAT_EPSILON); };
BIN_OP(Inequality, String, String) { return Value::createIntegerFromBool(strcmp(a.as_string_ptr(), b.as_string_ptr()) != 0); };
BIN_OP(LessThan, Integer, Integer) { return Value::createIntegerFromBool(a.as_integer() < b.as_integer()); };
BIN_OP(LessThan, Integer, Float) { return Value::createIntegerFromBool((float) a.as_integer() < b.as_float()); };
BIN_OP(LessThan, Float, Integer) { return Value::createIntegerFromBool(a.as_float() < (float) b.as_integer()); };
BIN_OP(LessThan, Float, Float) { return Value::createIntegerFromBool(a.as_float() < b.as_float()); };
BIN_OP(LessThanOrEquals, Integer, Integer) { return Value::createIntegerFromBool(a.as_integer() <= b.as_integer()); };
BIN_OP(LessThanOrEquals, Integer, Float) { return Value::createIntegerFromBool((float) a.as_integer() <= b.as_float()); };
BIN_OP(LessThanOrEquals, Float, Integer) { return Value::createIntegerFromBool(a.as_float() <= (float) b.as_integer()); };
BIN_OP(LessThanOrEquals, Float, Float) { return Value::createIntegerFromBool(a.as_float() <= b.as_float()); };
BIN_OP(GreaterThan, Integer, Integer) { return Value::createIntegerFromBool(a.as_integer() > b.as_integer()); };
BIN_OP(GreaterThan, Integer, Float) { return Value::createIntegerFromBool((float) a.as_integer() > b.as_float()); };
BIN_OP(GreaterThan, Float, Integer) { return Value::createIntegerFromBool(a.as_float() > (float) b.as_integer()); };
BIN_OP(GreaterThan, Float, Float) { return Value::createIntegerFromBool(a.as_float() > b.as_float()); };
BIN_OP(GreaterThanOrEquals, Integer, Integer) { return Value::createIntegerFromBool(a.as_integer() >= b.as_integer()); };
BIN_OP(GreaterThanOrEquals, Integer, Float) { return Value::createIntegerFromBool((float) a.as_integer() >= b.as_float()); };
BIN_OP(GreaterThanOrEquals, Float, Integer) { return Value::createIntegerFromBool(a.as_float() >= (float) b.as_integer()); };
BIN_OP(GreaterThanOrEquals, Float, Float) { return Value::createIntegerFromBool(a.as_float() >= b.as_float()); };
BIN_OP(Power, Integer, Integer) { return Value::createInteger((int32_t) powl(a.as_integer(), b.as_integer())); };
BIN_OP(Power, Integer, Float) { return Value::createFloat(powf((float) a.as_integer(), b.as_float())); };
BIN_OP(Power, Float, Integer) { return Value::createFloat(powf(a.as_float(), (float) b.as_integer())); };
BIN_OP(Power, Float, Float) { return Value::createFloat(powf(a.as_float(), b.as_float())); };
BIN_OP(Modulo, Integer, Integer) { return Value::createInteger(a.as_integer() % b.as_integer()); };
BIN_OP(ShiftLeft, Integer, Integer) { return Value::createInteger(a.as_integer() << b.as_integer()); };
BIN_OP(ShiftRight, Integer, Integer) { return Value::createInteger(a.as_integer() >> b.as_integer()); };
BIN_OP(BitwiseAnd, Integer, Integer) { return Value::createInteger(a.as_integer() & b.as_integer()); };
BIN_OP(BitwiseOr, Integer, Integer) { return Value::createInteger(a.as_integer() | b.as_integer()); };
BIN_OP(BitwiseXor, Integer, Integer) { return Value::createInteger(a.as_integer() ^ b.as_integer()); };
#undef BIN_OP
return 0;
}();
Value binary_operation(Value const& a, Value const& b, BinaryOperationType op)
{
return binary_ops[(size_t) op][(size_t) b.type()][(size_t) a.type()](a, b);
}
}

21
src/vm/expr.hh Normal file
View File

@@ -0,0 +1,21 @@
#ifndef TYCHE_EXPR_HH
#define TYCHE_EXPR_HH
#include "value.hh"
namespace tyche::vm {
enum class BinaryOperationType
{
Sum, Subtraction, Multiplication, Division, IntegerDivision,
Equality, Inequality, LessThan, LessThanOrEquals,
GreaterThan, GreaterThanOrEquals, Power, Modulo,
BitwiseAnd, BitwiseOr, BitwiseXor, ShiftLeft, ShiftRight,
COUNT
};
constexpr float FLOAT_EPSILON = 0.000001f;
Value binary_operation(Value const& a, Value const& b, BinaryOperationType op);
}
#endif //TYCHE_EXPR_HH

16
src/vm/location.hh Normal file
View File

@@ -0,0 +1,16 @@
#ifndef TYCHE_LOCATION_HH
#define TYCHE_LOCATION_HH
#include <cstdint>
namespace tyche::vm {
struct Location
{
uint32_t function_id;
uint32_t pc;
};
}
#endif //TYCHE_LOCATION_HH

95
src/vm/stack.cc Normal file
View File

@@ -0,0 +1,95 @@
#include "stack.hh"
#include "vm_exceptions.hh"
namespace tyche::vm {
Stack::Stack()
{
fps_.push(0);
}
void Stack::push(Value const& value)
{
stack_.push_back(value);
}
Value Stack::pop()
{
if (stack_.size() <= fps_.top())
throw VMStackUnderflow();
Value v = stack_.back();
stack_.pop_back();
return v;
}
Value Stack::peek() const
{
if (stack_.size() <= fps_.top())
throw VMStackUnderflow();
return stack_.back();
}
Value Stack::at(int pos) const
{
try {
if (pos >= 0) {
return stack_.at(fps_.top() + pos);
} else {
if ((int) fps_.top() + (int) stack_.size() + pos < 0)
throw VMStackOutOfRange();
return stack_.at(stack_.size() + pos);
}
} catch (std::out_of_range&) {
throw VMStackOutOfRange();
}
}
void Stack::set(int pos, Value const& val)
{
try {
if (pos >= 0) {
stack_.at(fps_.top() + pos) = val;
} else {
if ((int) fps_.top() + (int) stack_.size() + pos < 0)
throw VMStackOutOfRange();
stack_.at(stack_.size() + pos) = val;
}
} catch (std::out_of_range&) {
throw VMStackOutOfRange();
}
}
size_t Stack::size() const
{
return stack_.size() - fps_.top();
}
void Stack::push_fp()
{
fps_.push(stack_.size());
}
void Stack::pop_fp()
{
if (fps_.size() == 1)
throw VMStackUnderflow();
stack_.resize(fps_.top());
fps_.pop();
}
std::string Stack::debug() const
{
if (stack_.empty())
return "empty";
std::string out;
for (size_t i = 0; i < stack_.size(); ++i)
out += "[" + stack_.at(i).to_string() + "] ";
return out;
}
} // tyche

38
src/vm/stack.hh Normal file
View File

@@ -0,0 +1,38 @@
#ifndef TYCHE_STACK_HH
#define TYCHE_STACK_HH
#include <stack>
#include <vector>
#include "value.hh"
namespace tyche::vm {
class Stack {
public:
Stack();
void push(Value const& value);
Value pop();
[[nodiscard]] Value peek() const;
[[nodiscard]] Value at(int pos) const;
[[nodiscard]] size_t size() const;
void set(int pos, Value const& val);
void push_fp();
void pop_fp();
[[nodiscard]] size_t fp_level() const { return fps_.size(); }
[[nodiscard]] std::string debug() const;
private:
std::vector<Value> stack_;
std::stack<size_t> fps_;
};
} // tyche
#endif //TYCHE_STACK_HH

292
src/vm/tests.cc Normal file
View File

@@ -0,0 +1,292 @@
#include "gtest/gtest.h"
#include "../bytecode/bytecodeprototype.hh"
#include "../bytecode/bytecode.hh"
#include "../assembler/assembler.hh"
#include "code.hh"
#include "stack.hh"
#include "vm.hh"
using namespace tyche;
using namespace tyche::bc;
using namespace tyche::vm;
static VM run(std::string oper) {
return VM().load_bytecode(as::Assembler(std::format(R"(
.const
0: 3.14
1: "Hello world"
.func 0
{}
ret
)", oper)).assemble()).call(0);
}
TEST(Code, ImportSingleAndDebug)
{
BytecodePrototype bp;
bp.constants.emplace_back(3.14f);
bp.constants.emplace_back("HELLO");
bp.functions.emplace_back(0, 0);
bp.functions.at(0).code.append_byte(0xa0); // pushi
bp.functions.at(0).code.append_int8(42);
bp.functions.emplace_back(2, 1);
bp.functions.at(1).code.append_byte(0x1a); // appnd
ByteArray ba = Bytecode::generate(bp);
Code code;
code.import_bytecode(std::move(ba));
printf("%s\n", code.disassemble().c_str());
}
TEST(Stack, PushPullGet)
{
Stack stack;
stack.push(Value::createInteger(10));
stack.push(Value::createInteger(20));
stack.push(Value::createInteger(30));
ASSERT_EQ(stack.size(), 3);
ASSERT_EQ(stack.at(0).as_integer(), 10);
ASSERT_EQ(stack.at(1).as_integer(), 20);
ASSERT_EQ(stack.at(-1).as_integer(), 30);
ASSERT_EQ(stack.at(-2).as_integer(), 20);
}
TEST(Stack, FramePointer)
{
Stack stack;
stack.push(Value::createInteger(10));
stack.push(Value::createInteger(20));
stack.push_fp();
stack.push(Value::createInteger(30));
stack.push(Value::createInteger(40));
stack.push(Value::createInteger(50));
ASSERT_EQ(stack.size(), 3);
ASSERT_EQ(stack.at(0).as_integer(), 30);
ASSERT_EQ(stack.at(1).as_integer(), 40);
ASSERT_EQ(stack.at(-1).as_integer(), 50);
ASSERT_EQ(stack.at(-2).as_integer(), 40);
stack.pop_fp();
ASSERT_EQ(stack.size(), 2);
ASSERT_EQ(stack.at(0).as_integer(), 10);
ASSERT_EQ(stack.at(1).as_integer(), 20);
ASSERT_EQ(stack.at(-1).as_integer(), 20);
ASSERT_EQ(stack.at(-2).as_integer(), 10);
}
TEST(VM, BasicCode)
{
// code (2+3)
BytecodePrototype bp;
bp.functions.emplace_back(0, 0);
bp.functions.at(0).code.append_byte((uint8_t) Instruction::PushInt8);
bp.functions.at(0).code.append_int8(2);
bp.functions.at(0).code.append_byte((uint8_t) Instruction::PushInt8);
bp.functions.at(0).code.append_int8(3);
bp.functions.at(0).code.append_byte((uint8_t) Instruction::Sum);
bp.functions.at(0).code.append_byte((uint8_t) Instruction::Return);
ByteArray ba = Bytecode::generate(bp);
VM vm;
vm.load_bytecode(std::move(ba));
vm.call(0);
int32_t result = vm.to_integer(-1);
ASSERT_EQ(result, 5);
}
TEST(VM, StackOperations)
{
ASSERT_EQ(run("pushi 5000").to_integer(-1), 5000);
ASSERT_EQ(run("pushi -5000").to_integer(-1), -5000);
ASSERT_FLOAT_EQ(run("pushi 5000").to_float(-1), 5000.f);
ASSERT_FLOAT_EQ(run("pushc 0").to_float(-1), 3.14f);
ASSERT_EQ(run("pushc 0").to_integer(-1), 3);
EXPECT_STREQ(run("pushc 1").to_string_ptr(-1), "Hello world");
ASSERT_TRUE(run("pushf 0").is_function(-1));
ASSERT_EQ(run("pushi 2\n pushi 3\n pop").to_integer(-1), 2);
}
TEST(VM, IntegerIntegerOperations)
{
auto test_op = [](int32_t op1, int32_t op2, std::string oper) {
return VM().load_bytecode(as::Assembler(std::format(R"(
.func 0
pushi {}
pushi {}
{}
ret
)", op1, op2, oper)).assemble()).call(0).to_integer(-1);
};
ASSERT_EQ(test_op(2, 3, "sum"), 5);
ASSERT_EQ(test_op(2, 3, "sub"), -1);
ASSERT_EQ(test_op(2, 3, "mul"), 6);
ASSERT_EQ(test_op(20, 3, "idiv"), 6);
ASSERT_EQ(test_op(2, 3, "eq"), 0);
ASSERT_EQ(test_op(2, 3, "neq"), 1);
ASSERT_EQ(test_op(2, 3, "lt"), 1);
ASSERT_EQ(test_op(2, 3, "lte"), 1);
ASSERT_EQ(test_op(3, 3, "lte"), 1);
ASSERT_EQ(test_op(4, 3, "lte"), 0);
ASSERT_EQ(test_op(2, 3, "gt"), 0);
ASSERT_EQ(test_op(2, 3, "gte"), 0);
ASSERT_EQ(test_op(3, 3, "gte"), 1);
ASSERT_EQ(test_op(4, 3, "gte"), 1);
ASSERT_EQ(test_op(2, 3, "and"), 2);
ASSERT_EQ(test_op(2, 3, "or"), 3);
ASSERT_EQ(test_op(2, 3, "xor"), 1);
ASSERT_EQ(test_op(2, 3, "pow"), 8);
ASSERT_EQ(test_op(2, 3, "shl"), 16);
ASSERT_EQ(test_op(30, 2, "shr"), 7);
ASSERT_EQ(test_op(8, 3, "mod"), 2);
ASSERT_FLOAT_EQ(run("pushi 3\n pushi 2\n div").to_float(-1), 1.5f);
}
TEST(VM, IntegerFloatOperations)
{
auto test_op = [](int op1, std::string const& op2, std::string oper) -> VM {
return VM().load_bytecode(as::Assembler(std::format(R"(
.const
0: {}
.func 0
pushi {}
pushc 0
{}
ret
)", op2, op1, oper)).assemble()).call(0);
};
ASSERT_FLOAT_EQ(test_op(2, "3.5", "sum").to_float(-1), 5.5f);
ASSERT_FLOAT_EQ(test_op(2, "3.5", "sub").to_float(-1), -1.5f);
ASSERT_FLOAT_EQ(test_op(2, "3.5", "mul").to_float(-1), 7.f);
ASSERT_FLOAT_EQ(test_op(20, "3.5", "idiv").to_integer(-1), 6);
ASSERT_FLOAT_EQ(test_op(20, "3.5", "div").to_float(-1), 5.7142859);
ASSERT_FLOAT_EQ(test_op(3, "3.5", "eq").to_integer(-1), 0);
ASSERT_FLOAT_EQ(test_op(3, "3.0", "eq").to_integer(-1), 1);
}
TEST(VM, FloatIntegerOperations)
{
auto test_op = [](std::string const& op1, int op2, std::string oper) -> VM {
return VM().load_bytecode(as::Assembler(std::format(R"(
.const
0: {}
.func 0
pushc 0
pushi {}
{}
ret
)", op1, op2, oper)).assemble()).call(0);
};
ASSERT_FLOAT_EQ(test_op("3.5", 2, "sum").to_float(-1), 5.5f);
ASSERT_FLOAT_EQ(test_op("3.5", 2, "sub").to_float(-1), 1.5f);
ASSERT_FLOAT_EQ(test_op("3.5", 2, "mul").to_float(-1), 7.f);
ASSERT_FLOAT_EQ(test_op("3.5", 2, "idiv").to_integer(-1), 1);
ASSERT_FLOAT_EQ(test_op("3.5", 2, "div").to_float(-1), 1.75f);
ASSERT_FLOAT_EQ(test_op("3.5", 3, "eq").to_integer(-1), 0);
ASSERT_FLOAT_EQ(test_op("3.0", 3, "eq").to_integer(-1), 1);
}
TEST(VM, FloatFloatOperations)
{
auto test_op = [](std::string const& op1, std::string const& op2, std::string oper) -> VM {
return VM().load_bytecode(as::Assembler(std::format(R"(
.const
0: {}
1: {}
.func 0
pushc 0
pushc 1
{}
ret
)", op1, op2, oper)).assemble()).call(0);
};
ASSERT_FLOAT_EQ(test_op("3.5", "2.2", "sum").to_float(-1), 5.7f);
ASSERT_FLOAT_EQ(test_op("3.5", "2.2", "sub").to_float(-1), 1.3f);
ASSERT_FLOAT_EQ(test_op("3.5", "2.2", "mul").to_float(-1), 7.7f);
ASSERT_FLOAT_EQ(test_op("3.5", "2.2", "idiv").to_integer(-1), 1);
ASSERT_FLOAT_EQ(test_op("4.5", "2.5", "div").to_float(-1), 1.8f);
ASSERT_FLOAT_EQ(test_op("3.2005", "3.2", "eq").to_integer(-1), 0);
ASSERT_FLOAT_EQ(test_op("3.2", "3.2", "eq").to_integer(-1), 1);
}
TEST(VM, StringString)
{
EXPECT_STREQ(run(R"(
.const
0: "Hello"
1: "World"
.func 0
pushc 0
pushc 1
sum
ret
)").to_string_ptr(-1), "HelloWorld");
ASSERT_EQ(run(R"(
.const
0: "Hello"
1: "World"
.func 0
pushc 0
pushc 1
eq
ret
)").to_integer(-1), 0);
ASSERT_EQ(run(R"(
.const
0: "Hello"
1: "Hello"
.func 0
pushc 0
pushc 1
eq
ret
)").to_integer(-1), 1);
ASSERT_EQ(run(R"(
.const
0: "Hello"
.func 0
pushc 0
pushi 1
eq
ret
)").to_integer(-1), 0);
}
TEST(VM, LocalVariables)
{
VM vm = run(R"(
.func 0
pushv 2 ; local a, b
pushi 3 ; a = 3
set 0
pushi 4 ; b = 4
set 1
dupv 0 ; return a
ret
)");
ASSERT_EQ(vm.stack_sz(), 1);
ASSERT_EQ(vm.to_integer(-1), 3);
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

53
src/vm/value.cc Normal file
View File

@@ -0,0 +1,53 @@
#include "value.hh"
#include "../common/overloaded.hh"
namespace tyche::vm {
std::string type_name(Type type)
{
switch (type) {
case Type::Nil: return "nil";
case Type::Integer: return "integer";
case Type::Float: return "float";
case Type::String: return "string";
case Type::Array: return "array";
case Type::Table: return "table";
case Type::Function: return "function";
case Type::NativePointer: return "native pointer";
case Type::COUNT: default: return "???";
}
}
Type Value::type() const
{
return std::visit(overloaded {
[](std::monostate) { return Type::Nil; },
[](int32_t) { return Type::Integer; },
[](float) { return Type::Float; },
[](std::string const&) { return Type::String; },
[](Function const&) { return Type::Function; },
}, value_);
}
std::string Value::to_string() const
{
return std::visit(overloaded {
[](std::monostate) { return std::string("nil"); },
[](int32_t i) { return std::to_string(i); },
[](float f) { return std::to_string(f); },
[](std::string const& s) { return s; },
[](Function const& f) { return "@" + std::to_string(f.f_id); }
}, value_);
}
const char* Value::as_string_ptr() const
{
if (auto s = std::get_if<std::string>(&value_))
return s->c_str();
if (auto s = std::get_if<const char*>(&value_))
return *s;
throw std::logic_error("Shouldn't get here");
}
}

54
src/vm/value.hh Normal file
View File

@@ -0,0 +1,54 @@
#ifndef TYCHE_VALUE_HH
#define TYCHE_VALUE_HH
#include <cstdint>
#include <string>
#include <variant>
namespace tyche::vm {
using FunctionId = uint32_t;
enum class Type : uint8_t
{
Nil = 0, Integer, Float, String, Array, Table, Function, NativePointer, COUNT
};
std::string type_name(Type type);
class Value {
struct Function { FunctionId f_id; };
public:
Value() : value_(std::monostate()) {}
static Value createNil() { return Value(std::monostate()); }
static Value createInteger(int32_t v) { return Value(v); }
static Value createFloat(float f) { return Value(f); }
static Value createString(std::string const& str) { return Value(str); }
static Value createStringFromConstant(const char* str) { return Value(str); }
static Value createFunctionId(FunctionId f_id) { return Value(Function { f_id }); }
static Value createFalse() { return createInteger(0); }
static Value createTrue() { return createInteger(1); }
static Value createIntegerFromBool(bool b) { return createInteger(b ? 1 : 0); }
[[nodiscard]] Type type() const;
[[nodiscard]] int32_t as_integer() const { return std::get<int32_t>(value_); }
[[nodiscard]] float as_float() const { return std::get<float>(value_); }
[[nodiscard]] const char* as_string_ptr() const;
[[nodiscard]] FunctionId as_function_id() const { return std::get<Function>(value_).f_id; }
[[nodiscard]] std::string to_string() const;
private:
using Internal = std::variant<std::monostate, int32_t, float, std::string, const char*, Function>;
Internal value_;
explicit Value(Internal const& internal) : value_(internal) {}
};
}
#endif //TYCHE_VALUE_HH

208
src/vm/vm.cc Normal file
View File

@@ -0,0 +1,208 @@
#include "vm.hh"
#include "vm_exceptions.hh"
#include "expr.hh"
namespace tyche::vm {
VM& VM::load_bytecode(ByteArray const& ba)
{
FunctionId f_id = code_.import_bytecode(ba);
stack_.push(Value::createFunctionId(f_id));
return *this;
}
VM& VM::call(size_t n_params)
{
// TODO - parameters
Value f = stack_.pop();
if (f.type() != Type::Function)
throw VMTypeError(Type::Function, f.type());
loc_.emplace(f.as_function_id(), 0);
stack_.push_fp();
run_until_return();
// stack_.pop_fp();
loc_.pop();
return *this;
}
int32_t VM::to_integer(int index) const
{
Value i = stack_.at(index);
if (i.type() == Type::Integer)
return i.as_integer();
if (i.type() == Type::Float)
return (int32_t) i.as_float();
throw VMTypeError(Type::Integer, i.type());
}
float VM::to_float(int index) const
{
Value f = stack_.at(index);
if (f.type() == Type::Float)
return f.as_float();
if (f.type() == Type::Integer)
return (float) f.as_integer();
throw VMTypeError(Type::Float, f.type());
}
const char* VM::to_string_ptr(int index) const
{
Value s = stack_.at(index);
assert_type(s, Type::String);
return s.as_string_ptr();
}
VM& VM::push_nil()
{
stack_.push(Value::createNil());
return *this;
}
VM& VM::push_integer(int32_t value)
{
stack_.push(Value::createInteger(value));
return *this;
}
VM& VM::push_float(float value)
{
stack_.push(Value::createFloat(value));
return *this;
}
VM& VM::push_string(std::string const& str)
{
stack_.push(Value::createString(str));
return *this;
}
void VM::run_until_return()
{
size_t level = stack_.fp_level();
while (stack_.fp_level() >= level)
step();
}
void VM::step()
{
Operation op = code_.operation(loc_.top());
switch (op.instruction) {
//
// stack management
//
case Instruction::PushInt8:
case Instruction::PushInt16:
case Instruction::PushInt32:
push_integer(op.operator_);
break;
case Instruction::PushConstant8:
case Instruction::PushConstant16:
case Instruction::PushConstant32: {
auto cnst = code_.bytecode().get_constant(op.operator_);
if (auto f = std::get_if<float>(&cnst))
push_float(*f);
else if (auto s = std::get_if<const char*>(&cnst))
stack_.push(Value::createStringFromConstant(*s));
else
throw std::logic_error("Shouldn't get here");
break;
}
case Instruction::PushFunction8:
case Instruction::PushFunction16:
case Instruction::PushFunction32:
stack_.push(Value::createFunctionId(op.operator_));
break;
case Instruction::Pop:
stack_.pop();
break;
case Instruction::Duplicate:
stack_.push(stack_.peek());
break;
//
// variables
//
case Instruction::PushValues8:
case Instruction::PushValues16:
case Instruction::PushValues32:
for (int i = 0; i < op.operator_; ++i)
push_nil();
break;
case Instruction::SetValue8:
case Instruction::SetValue16:
case Instruction::SetValue32: {
Value a = stack_.pop();
stack_.set(op.operator_, a);
break;
}
case Instruction::DuplicateValue8:
case Instruction::DuplicateValue16:
case Instruction::DuplicateValue32: {
Value a = stack_.at(op.operator_);
stack_.push(a);
break;
}
//
// logical/arithmetic
//
#define BIN_OP(op) { Value a = stack_.pop(); Value b = stack_.pop(); stack_.push(binary_operation(a, b, BinaryOperationType::op)); }
case Instruction::Sum: BIN_OP(Sum) break;
case Instruction::Subtract: BIN_OP(Subtraction) break;
case Instruction::Multiply: BIN_OP(Multiplication) break;
case Instruction::Divide: BIN_OP(Division) break;
case Instruction::DivideInt: BIN_OP(IntegerDivision) break;
case Instruction::Equals: BIN_OP(Equality) break;
case Instruction::NotEquals: BIN_OP(Inequality) break;
case Instruction::LessThan: BIN_OP(LessThan) break;
case Instruction::LessThanEq: BIN_OP(LessThanOrEquals) break;
case Instruction::GreaterThan: BIN_OP(GreaterThan) break;
case Instruction::GreaterThanEq: BIN_OP(GreaterThanOrEquals) break;
case Instruction::And: BIN_OP(BitwiseAnd) break;
case Instruction::Or: BIN_OP(BitwiseOr) break;
case Instruction::Xor: BIN_OP(BitwiseXor) break;
case Instruction::Power: BIN_OP(Power) break;
case Instruction::ShiftLeft: BIN_OP(ShiftLeft) break;
case Instruction::ShiftRight: BIN_OP(ShiftRight) break;
case Instruction::Modulo: BIN_OP(Modulo) break;
#undef BIN_OP
//
// function operations
//
case Instruction::Return: {
Value v = stack_.pop();
stack_.pop_fp();
stack_.push(v);
return;
}
default:
throw VMInvalidOpcode((uint8_t) op.instruction);
}
loc_.top() = op.next_location;
}
void VM::assert_type(Value const& val, Type type)
{
if (val.type() != type)
throw VMTypeError(type, val.type());
}
} // tyche

51
src/vm/vm.hh Normal file
View File

@@ -0,0 +1,51 @@
#ifndef TYCHE_VM_HH
#define TYCHE_VM_HH
#include "code.hh"
#include "location.hh"
#include "stack.hh"
namespace tyche::vm {
class VM {
public:
VM& load_bytecode(ByteArray const& ba);
VM& call(size_t n_params);
[[nodiscard]] bool is_nil(int index) const { return stack_.at(index).type() == Type::Nil; }
[[nodiscard]] bool is_integer(int index) const { return stack_.at(index).type() == Type::Integer; }
[[nodiscard]] bool is_float(int index) const { return stack_.at(index).type() == Type::Float; }
[[nodiscard]] bool is_string(int index) const { return stack_.at(index).type() == Type::String; }
[[nodiscard]] bool is_array(int index) const { return stack_.at(index).type() == Type::Array; }
[[nodiscard]] bool is_table(int index) const { return stack_.at(index).type() == Type::Table; }
[[nodiscard]] bool is_function(int index) const { return stack_.at(index).type() == Type::Function; }
[[nodiscard]] bool is_native_pointer(int index) const { return stack_.at(index).type() == Type::NativePointer; }
[[nodiscard]] size_t stack_sz() const { return stack_.size(); }
VM& push_nil();
VM& push_integer(int32_t value);
VM& push_float(float value);
VM& push_string(std::string const& string);
[[nodiscard]] int32_t to_integer(int index) const;
[[nodiscard]] float to_float(int index) const;
[[nodiscard]] const char* to_string_ptr(int index) const;
[[nodiscard]] std::string debug_stack() const { return stack_.debug(); }
private:
void run_until_return();
void step();
static void assert_type(Value const& val, Type type);
Stack stack_;
Code code_;
std::stack<Location> loc_;
};
} // tyche
#endif //TYCHE_VM_HH

49
src/vm/vm_exceptions.hh Normal file
View File

@@ -0,0 +1,49 @@
#ifndef TYCHE_VM_EXCEPTIONS_HH
#define TYCHE_VM_EXCEPTIONS_HH
#include <stdexcept>
#include <string>
#include "expr.hh"
namespace tyche::vm {
class VMRuntimeError : public std::runtime_error
{
public:
explicit VMRuntimeError(std::string const& str) : std::runtime_error(str.c_str()) {}
};
class VMStackUnderflow : public VMRuntimeError
{
public:
explicit VMStackUnderflow() : VMRuntimeError("Stack underflow") {}
};
class VMStackOutOfRange : public VMRuntimeError
{
public:
explicit VMStackOutOfRange() : VMRuntimeError("Item does not exist in stack") {}
};
class VMTypeError : public VMRuntimeError
{
public:
explicit VMTypeError(Type expected, Type found) : VMRuntimeError("Type error (expected " + type_name(expected) + ", found " + type_name(found) + ")") {}
};
class VMInvalidOpcode : public VMRuntimeError
{
public:
explicit VMInvalidOpcode(uint8_t opcode) : VMRuntimeError("Invalid opcode " + std::to_string(opcode)) {}
};
class VMInvalidOperation : public VMRuntimeError
{
public:
explicit VMInvalidOperation(BinaryOperationType op, Type type1, Type type2) : VMRuntimeError("Invalid binary operation for types " + type_name(type1) + " and " + type_name(type2)) {}
};
}
#endif //TYCHE_VM_EXCEPTIONS_HH