47 Commits

Author SHA1 Message Date
9561d5cacd . 2026-05-10 07:43:04 -05:00
0116a214f6 . 2026-05-09 15:07:36 -05:00
a70abe76ad . 2026-05-09 15:05:55 -05:00
e2f930641e . 2026-05-09 15:04:44 -05:00
85443ded9d . 2026-05-09 14:23:01 -05:00
6762c49ed3 . 2026-05-09 13:50:16 -05:00
610491c1d7 . 2026-05-09 13:26:13 -05:00
554a7b55c5 . 2026-05-09 11:34:51 -05:00
8a26ba5351 . 2026-05-09 10:37:02 -05:00
83b80f6e7d . 2026-05-09 10:24:09 -05:00
a6adb9b723 . 2026-05-09 09:24:46 -05:00
19b51fcaa0 . 2026-05-09 08:53:11 -05:00
27164aaac3 . 2026-05-08 20:36:21 -05:00
04b9821662 Merge remote-tracking branch 'origin/lua-temp' into lua-temp 2026-05-08 16:32:10 -05:00
b2a829d6e5 . 2026-05-08 16:32:06 -05:00
Andre Wagner
2634ddd1ca . 2026-05-08 14:02:06 -05:00
Andre Wagner
9ff38cd4c0 . 2026-05-07 20:26:03 -05:00
Andre Wagner
4a23c52781 . 2026-05-07 09:20:22 -05:00
Andre Wagner
8f851a330e . 2026-05-06 12:00:53 -05:00
Andre Wagner
7ecffcfdda . 2026-05-06 11:58:15 -05:00
Andre Wagner
88f9ce0ea6 . 2026-05-06 11:12:41 -05:00
Andre Wagner
0fae9a0b37 . 2026-05-06 10:49:45 -05:00
Andre Wagner
2725dc8d33 . 2026-05-05 21:16:21 -05:00
Andre Wagner
516ee9f406 . 2026-05-05 21:11:41 -05:00
Andre Wagner
6428c6cf7f . 2026-05-05 16:26:34 -05:00
43dea6ee8f . 2026-05-05 14:06:44 -05:00
Andre Wagner
566990318b . 2026-05-05 14:04:51 -05:00
Andre Wagner
8c36fb07c0 . 2026-05-05 14:04:30 -05:00
Andre Wagner
60c55304b2 . 2026-05-05 09:54:42 -05:00
Andre Wagner
8614f978ea . 2026-05-05 09:40:22 -05:00
Andre Wagner
0e9c8f6e63 . 2026-05-04 15:56:46 -05:00
Andre Wagner
299984cd4b . 2026-05-04 14:05:42 -05:00
Andre Wagner
8a685ebbc8 . 2026-05-04 11:24:35 -05:00
9bc6ad1c92 Variables (#8) 2026-05-02 20:12:15 -05:00
f9733f3b20 Expressions (#7) 2026-05-02 15:07:11 -05:00
a1aed4988a Assembler (#6)
Co-authored-by: Andre Wagner <WagnerAndre@JohnDeere.com>
Reviewed-on: https://192.168.5.48/andre/tyche/pulls/6
2026-05-01 10:12:41 -05:00
b835dbb36e VM basics (#5)
Co-authored-by: Andre Wagner <WagnerAndre@JohnDeere.com>
Reviewed-on: https://192.168.5.48/andre/tyche/pulls/5
2026-04-30 13:34:49 -05:00
Andre Wagner
71390b0f84 . 2026-04-29 16:04:30 -05:00
Andre Wagner
b471726e0d . 2026-04-29 15:57:39 -05:00
Andre Wagner
feb272e545 . 2026-04-29 15:50:27 -05:00
Andre Wagner
03b61f4339 . 2026-04-29 15:36:22 -05:00
Andre Wagner
30bfb38e9a . 2026-04-29 15:18:30 -05:00
Andre Wagner
635596c31d . 2026-04-29 14:56:03 -05:00
148c98e642 code (#4)
Reviewed-on: #4
2026-04-29 14:44:33 -05:00
54729c1e14 bytecode-improvements (#3)
Reviewed-on: #3
2026-04-29 11:40:46 -05:00
d8130272a0 bytecode2 (#2)
Reviewed-on: #2
2026-04-28 19:50:48 -05:00
3f097b0ba8 Byte array (#1)
Reviewed-on: #1
2026-04-27 09:42:03 -05:00
50 changed files with 4861 additions and 0 deletions

2
.gitignore vendored
View File

@@ -32,3 +32,5 @@
*.out *.out
*.app *.app
cmake-build-*/
build/

5
.idea/.gitignore generated vendored Normal file
View File

@@ -0,0 +1,5 @@
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/

37
.idea/editor.xml generated Normal file
View File

@@ -0,0 +1,37 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="BackendCodeEditorSettings">
<option name="/Default/CodeInspection/Highlighting/InspectionSeverities/=CppClassCanBeFinal/@EntryIndexedValue" value="DO_NOT_SHOW" type="string" />
<option name="/Default/CodeInspection/Highlighting/InspectionSeverities/=CppDefinitionsOrder/@EntryIndexedValue" value="HINT" type="string" />
<option name="/Default/CodeInspection/Highlighting/InspectionSeverities/=CppDeprecatedOverridenMethod/@EntryIndexedValue" value="WARNING" type="string" />
<option name="/Default/CodeInspection/Highlighting/InspectionSeverities/=CppModulePartitionWithSeveralPartitionUnits/@EntryIndexedValue" value="WARNING" type="string" />
<option name="/Default/CodeInspection/Highlighting/InspectionSeverities/=CppRedundantFwdClassOrEnumSpecifier/@EntryIndexedValue" value="SUGGESTION" type="string" />
<option name="/Default/CodeInspection/Highlighting/InspectionSeverities/=CppRedundantQualifierADL/@EntryIndexedValue" value="DO_NOT_SHOW" type="string" />
<option name="/Default/CodeStyle/CodeFormatting/CppCodeStyle/CVQualifiersPlacement/@EntryValue" value="AfterType" type="string" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/ALIGN_TERNARY/@EntryValue" value="ALIGN_ALL" type="string" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/ALLOW_COMMENT_AFTER_LBRACE/@EntryValue" value="true" type="bool" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/ANONYMOUS_METHOD_DECLARATION_BRACES/@EntryValue" value="END_OF_LINE" type="string" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/CASE_BLOCK_BRACES/@EntryValue" value="END_OF_LINE" type="string" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/EMPTY_BLOCK_STYLE/@EntryValue" value="TOGETHER_SAME_LINE" type="string" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/ENABLE_SLATE_FORMAT/@EntryValue" value="false" type="bool" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/INDENT_CASE_FROM_SWITCH/@EntryValue" value="true" type="bool" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/INDENT_GOTO_LABELS/@EntryValue" value="false" type="bool" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/MAX_ENUM_MEMBERS_ON_LINE/@EntryValue" value="6" type="long" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/NAMESPACE_DECLARATION_BRACES/@EntryValue" value="END_OF_LINE" type="string" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/NAMESPACE_INDENTATION/@EntryValue" value="None" type="string" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/OTHER_BRACES/@EntryValue" value="END_OF_LINE" type="string" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/PLACE_CATCH_ON_NEW_LINE/@EntryValue" value="false" type="bool" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/PLACE_ELSE_ON_NEW_LINE/@EntryValue" value="false" type="bool" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/PLACE_WHILE_ON_NEW_LINE/@EntryValue" value="false" type="bool" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/REQUIRES_EXPRESSION_BRACES/@EntryValue" value="END_OF_LINE" type="string" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/SIMPLE_EMBEDDED_STATEMENT_STYLE/@EntryValue" value="LINE_BREAK" type="string" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/SPACE_AFTER_CAST_EXPRESSION_PARENTHESES/@EntryValue" value="true" type="bool" />
<option name="/Default/CodeStyle/CodeFormatting/CppFormatting/WRAP_LINES/@EntryValue" value="false" type="bool" />
<option name="/Default/CodeStyle/Naming/CppNaming/Rules/=Classes_0020and_0020structs/@EntryIndexedValue" value="&lt;NamingElement Priority=&quot;1&quot;&gt;&lt;Descriptor Static=&quot;Indeterminate&quot; Constexpr=&quot;Indeterminate&quot; Const=&quot;Indeterminate&quot; Volatile=&quot;Indeterminate&quot; Accessibility=&quot;NOT_APPLICABLE&quot;&gt;&lt;type Name=&quot;__interface&quot; /&gt;&lt;type Name=&quot;class&quot; /&gt;&lt;type Name=&quot;struct&quot; /&gt;&lt;/Descriptor&gt;&lt;Policy Inspect=&quot;True&quot; Prefix=&quot;&quot; Suffix=&quot;&quot; Style=&quot;AaBb&quot; /&gt;&lt;/NamingElement&gt;" type="string" />
<option name="/Default/CodeStyle/Naming/CppNaming/Rules/=Concepts/@EntryIndexedValue" value="&lt;NamingElement Priority=&quot;2&quot;&gt;&lt;Descriptor Static=&quot;Indeterminate&quot; Constexpr=&quot;Indeterminate&quot; Const=&quot;Indeterminate&quot; Volatile=&quot;Indeterminate&quot; Accessibility=&quot;NOT_APPLICABLE&quot;&gt;&lt;type Name=&quot;concept&quot; /&gt;&lt;/Descriptor&gt;&lt;Policy Inspect=&quot;True&quot; Prefix=&quot;&quot; Suffix=&quot;&quot; Style=&quot;AaBb&quot; /&gt;&lt;/NamingElement&gt;" type="string" />
<option name="/Default/CodeStyle/Naming/CppNaming/Rules/=Enum_0020members/@EntryIndexedValue" value="&lt;NamingElement Priority=&quot;14&quot;&gt;&lt;Descriptor Static=&quot;Indeterminate&quot; Constexpr=&quot;Indeterminate&quot; Const=&quot;Indeterminate&quot; Volatile=&quot;Indeterminate&quot; Accessibility=&quot;NOT_APPLICABLE&quot;&gt;&lt;type Name=&quot;scoped enumerator&quot; /&gt;&lt;type Name=&quot;unscoped enumerator&quot; /&gt;&lt;/Descriptor&gt;&lt;Policy Inspect=&quot;True&quot; Prefix=&quot;&quot; Suffix=&quot;&quot; Style=&quot;AaBb&quot; /&gt;&lt;/NamingElement&gt;" type="string" />
<option name="/Default/CodeStyle/Naming/CppNaming/Rules/=Enums/@EntryIndexedValue" value="&lt;NamingElement Priority=&quot;3&quot;&gt;&lt;Descriptor Static=&quot;Indeterminate&quot; Constexpr=&quot;Indeterminate&quot; Const=&quot;Indeterminate&quot; Volatile=&quot;Indeterminate&quot; Accessibility=&quot;NOT_APPLICABLE&quot;&gt;&lt;type Name=&quot;enum&quot; /&gt;&lt;/Descriptor&gt;&lt;Policy Inspect=&quot;True&quot; Prefix=&quot;&quot; Suffix=&quot;&quot; Style=&quot;AaBb&quot; /&gt;&lt;/NamingElement&gt;" type="string" />
<option name="/Default/CodeStyle/Naming/CppNaming/Rules/=Other_0020constants/@EntryIndexedValue" value="&lt;NamingElement Priority=&quot;15&quot;&gt;&lt;Descriptor Static=&quot;True&quot; Constexpr=&quot;Indeterminate&quot; Const=&quot;True&quot; Volatile=&quot;Indeterminate&quot; Accessibility=&quot;NOT_APPLICABLE&quot;&gt;&lt;type Name=&quot;class field&quot; /&gt;&lt;type Name=&quot;local variable&quot; /&gt;&lt;type Name=&quot;struct field&quot; /&gt;&lt;/Descriptor&gt;&lt;Policy Inspect=&quot;True&quot; Prefix=&quot;&quot; Suffix=&quot;&quot; Style=&quot;AA_BB&quot; /&gt;&lt;/NamingElement&gt;" type="string" />
<option name="/Default/CodeStyle/Naming/CppNaming/Rules/=Unions/@EntryIndexedValue" value="&lt;NamingElement Priority=&quot;4&quot;&gt;&lt;Descriptor Static=&quot;Indeterminate&quot; Constexpr=&quot;Indeterminate&quot; Const=&quot;Indeterminate&quot; Volatile=&quot;Indeterminate&quot; Accessibility=&quot;NOT_APPLICABLE&quot;&gt;&lt;type Name=&quot;union&quot; /&gt;&lt;/Descriptor&gt;&lt;Policy Inspect=&quot;True&quot; Prefix=&quot;&quot; Suffix=&quot;&quot; Style=&quot;AaBb&quot; /&gt;&lt;/NamingElement&gt;" type="string" />
</component>
</project>

9
.idea/misc.xml generated Normal file
View File

@@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CMakePythonSetting">
<option name="pythonIntegrationState" value="YES" />
</component>
<component name="CMakeWorkspace">
<contentRoot DIR="$PROJECT_DIR$" />
</component>
</project>

8
.idea/modules.xml generated Normal file
View File

@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/tyche.iml" filepath="$PROJECT_DIR$/.idea/tyche.iml" />
</modules>
</component>
</project>

2
.idea/tyche.iml generated Normal file
View File

@@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8"?>
<module classpath="CIDR" type="CPP_MODULE" version="4" />

7
.idea/vcs.xml generated Normal file
View File

@@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

127
CMakeLists.txt Normal file
View File

@@ -0,0 +1,127 @@
cmake_minimum_required (VERSION 3.24)
project(tyche
VERSION 0.0.1
DESCRIPTION "An embeddable/standalone programming language"
LANGUAGES C CXX ASM)
#
# project options / configuration
#
set(CMAKE_C_STANDARD 17)
set(CMAKE_CXX_STANDARD 23 CACHE STRING "C++ Standard")
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set_property(GLOBAL PROPERTY CXX_EXTENSIONS OFF)
set_property(GLOBAL PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE)
set_property(GLOBAL PROPERTY LINK_WHAT_YOU_USE TRUE)
# warnings / flags
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
set(warnings -Wall -Wextra -Wformat-nonliteral -Wundef -Wshadow -Wwrite-strings -Wfloat-equal -Wswitch-default -Wmissing-format-attribute -Wswitch-enum -Wmissing-noreturn -Wno-unused-parameter -Wno-unused)
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(warnings ${warnings} -Wsuggest-attribute=pure -Wsuggest-attribute=const -Wsuggest-attribute=noreturn -Wsuggest-attribute=malloc -Wsuggest-attribute=format -Wsuggest-attribute=cold)
endif()
endif()
# try to use ccache, if available
find_program(CCACHE_PROGRAM ccache)
if(CCACHE_PROGRAM)
set(CMAKE_CXX_COMPILER_LAUNCHER "${CCACHE_PROGRAM}")
endif()
# ignore warnings in imported files
set_source_files_properties(${IMGUI_SRC} PROPERTIES COMPILE_FLAGS "-w")
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
add_compile_options(-ggdb -O0)
endif()
if(CMAKE_BUILD_TYPE STREQUAL "Release")
set(DEF B_PRODUCTION_MODE=ON)
add_compile_options(-Ofast -flto)
endif()
#
# libraries
#
include(FetchContent)
FetchContent_Declare(
googletest
# Specify the commit you depend on and update it regularly.
URL https://github.com/google/googletest/releases/download/v1.17.0/googletest-1.17.0.tar.gz
)
FetchContent_MakeAvailable(googletest)
#
# library
#
add_library(lib${PROJECT_NAME} SHARED
src/common/overloaded.hh
src/common/bytearray.hh
src/common/bytearray.cc
src/bytecode/bytecode.cc
src/bytecode/bytecode.hh
src/bytecode/bytecodeprototype.hh
src/bytecode/constant.hh
src/vm/code.cc
src/vm/code.hh
src/vm/instruction.hh
src/vm/instruction.cc
src/vm/value.cc
src/vm/value.hh
src/vm/stack.cc
src/vm/stack.hh
src/vm/vm_exceptions.hh
src/vm/vm.cc
src/vm/vm.hh
src/vm/expr.cc
src/vm/expr.hh
src/vm/location.hh
src/assembler/lexer.cc
src/assembler/lexer.hh
src/assembler/assembler.cc
src/assembler/assembler.hh
src/assembler/as_exceptions.hh
src/bytecode/bc_exceptions.hh
)
target_compile_options(lib${PROJECT_NAME} PRIVATE ${warnings})
#
# tests
#
enable_testing()
add_executable(${PROJECT_NAME}-bytecode-test src/bytecode/tests.cc)
target_link_libraries(${PROJECT_NAME}-bytecode-test lib${PROJECT_NAME} gtest_main)
add_test(NAME tyche_bytecode_test COMMAND ${PROJECT_NAME}-bytecode-test)
add_executable(${PROJECT_NAME}-vm-test src/vm/tests.cc)
target_link_libraries(${PROJECT_NAME}-vm-test lib${PROJECT_NAME} gtest_main)
add_test(NAME tyche_vm_test COMMAND ${PROJECT_NAME}-vm-test)
add_executable(${PROJECT_NAME}-as-test src/assembler/tests.cc)
target_link_libraries(${PROJECT_NAME}-as-test lib${PROJECT_NAME} gtest_main)
add_test(NAME tyche_as_test COMMAND ${PROJECT_NAME}-as-test)
#
# check for leaks
#
add_custom_target(leaks-vm-test)
add_custom_command(TARGET leaks-vm-test
POST_BUILD
COMMENT "Check for leaks using valgrind."
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
COMMAND valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --suppressions=${CMAKE_SOURCE_DIR}/valgrind.supp ./${PROJECT_NAME}-vm-test
)
#
# installation
#
install(TARGETS lib${CMAKE_PROJECT_NAME} RUNTIME DESTINATION lib)

65
TODO.md Normal file
View File

@@ -0,0 +1,65 @@
## Bytecode
- [x] Byte array
- Auto-expand
- Add/retrive byte/int/float/string
- Should not be larger than the byte array itself
- [x] Bytecode
- Add/retrive all types of data
- Keeps no memory except for caching
- [x] Refactor bytecode code
Improvements:
- [x] Fixed int type (based on opcode)
- [x] Constant type (only floats and strings for now)
After some additional development:
- [ ] Bytecode debugging info
## VM
- [x] VM
- [x] Code
- [x] Simple bytecode loader
- [x] Output bytecode format
- [x] Value object
- [x] Stack object
- [x] External interface
- [x] Code execution (except functions)
- [x] Functions
- [x] Print stack
- [x] Assembler
- [ ] VM execution
- [x] Stack operations (nil, integer, float, string, function)
- [x] Integer
- [x] Float
- [x] String
- [x] Expressions
- [x] Integer
- [x] Float
- [x] String
- [ ] Local/global variables
- [ ] Functions
- [ ] Constants
- [ ] Other operations
- [ ] Arrays
- [ ] Iteration
- [ ] Expressions
- [ ] Tables
- [ ] Iteration
- [ ] Metatables
- [ ] Expressions
- [ ] Control flow
- [ ] Compilation
- [ ] Error handling
- [ ] C++ API
- [ ] Run native code on VM
- [ ] Run tyche code from C++
- [ ] C API
After some additional development:
- [ ] Bytecode loader
- Combine multiple chunks
- Resolve function ids, constant ids, etc
- [ ] Upvalues

35
doc/BYTECODE Normal file
View File

@@ -0,0 +1,35 @@
Bytecode format
---------------
The bytecode file is composed of the following sections:
* HEADER: 16-byte header
[0:3]: Magic
[4]: VM format
[rest]: Reserved for future use
* TABLE_OF_CONTENTS: list of 8 records pointing to each one of the sections
Each record (6 bytes):
- Pointer to section: 4 bytes
- Number of records in section: 2 bytes
* [0x0] Constants indexes: pointers to each of the constant locations
* Table of 4-byte constant indexes with pointer to constant
(counter start at beginning of raw constants)
* [0x1] Functions indexes: Pointer to functions within the code
[0:3]: function pointer (counter start at the beginning of executable code)
[4:5]: number of parameters
[6:7]: number of local variables
[8:b]: function size
* [0x2] Constants raw data
* [0x3] Code: executable code
* [0x4] Debugging info
???
The max file size is 2 Gb.
## Values can be encoded in the following ways:
* The type is defined by the operator.
* Encoding varies according to the type:
int: use protobuf format
float: 4-bit floating point
string: int-defined length, followed by the string proper - no null terminator
* Constant indexes and function ids are encoded as ints

93
doc/OPCODES Normal file
View File

@@ -0,0 +1,93 @@
Operations
----------
Operations take either 0 or 1 parameter. The ones that take a parameter, it can be either a int8, int16 or int32.
Instructions follow this logic:
00 ~ 9F : no parameter
A0 ~ BF : int8 (1 byte)
C0 ~ DF : int16 (2 bytes)
E0 ~ FF : int32 (4 bytes)
The operations of 1, 2 and 4 bytes are always interchangeable by adding/subtracting 0x20.
,----------- no parameter
| ,-------- int8
| | ,----- int16
| | | ,-- int32
NP I8 I16 I32 Opc Instruction Description
Stack operations:
a0 c0 e0 pushi [int] Push int
a1 c1 e1 pushc [index] Push constant
a2 c2 e2 pushf [function] Push function id
00 pushz Push zero (or false)
01 pusht Push true
02 newa Push (create) empty array
03 newt Push (create) empty table
04 pop
05 dup
Local variables:
a3 c3 e3 pushv [int] Push n nil values into the stack (used to init local vars)
ab cb eb set [index] Set value in stack position (set local variable)
a4 c4 e4 dupv [index] Duplicate stack value (load local variable)
a5 c5 e5 setg [int] Set global variable
a6 c6 e6 getg [int] Get global variable
Function operations:
a7 c7 e7 call [n_pars] Enter function on stack toplevel (passing n next stack values as parameters)
10 ret Leave a function (return value in stack)
11 retn Leave a function (return nil)
Table and array operations:
16 getkv Get table's value based on key (pull 1 value, push 1 value)
17 setkv Set table's key and value (pull 2 values from stack)
18 geta Get array's position value
19 seta Set array's position value (pull 2 values from stack)
1a appnd Add value to the end of array
1b next Push the next pair into the stack (for loops)
1c smt Set value metatable
1d mt Get value metatable
Logical/arithmetic:
20 sum Sum top 2 values in stack
21 sub Subtract top 2 values in stack
22 mul Multiply top 2 values in stack
23 div Float division
24 idiv Integer division
25 eq Equality
26 neq Inequality
27 lt Less than
28 lte Less than or equals
29 gt Greater than
2a gte Greater than or equals
2b and Bitwise AND
2c or Bitwise OR
2d xor Bitwise XOR
2e pow Power
2f shl Shift left
30 shr Shift right
31 mod Modulo
Other value operations:
40 len Get table, array or string size
41 type Get type from value at the top of the stack
b0 cast [type] Cast type to another type
42 ver Return VM version
External code:
48 cmpl Compile code to assembly
49 asmbl Assemble code to bytecode format
4a load Load bytecode as function (will place function on stack)
Control flow:
a8 c8 e8 bz [pc] Branch if zero
a9 c9 e9 bnz [pc] Branch if not zero
aa ca ea jmp [pc] Unconditional jump
* Jumps can only happen within the same function.
Error handling: (0xa0~0xaf)
???

15
doc/VM Normal file
View File

@@ -0,0 +1,15 @@
Internal handling of values
---------------------------
## Supported types
Nil 0
Integer 1
Float 2
String 3
Array 4
Table 5
Function 6
NativePointer 7
## Internal format
???

37
lua-temp/TODO.md Normal file
View File

@@ -0,0 +1,37 @@
Progress of the Lua port:
- [x] Assembler
- [x] Basic VM execution
- [x] Logic/arithmetic expressions
- [x] Variables
- [x] Local variables
- [x] Functions
- [x] Calling functions
- [x] Calling functions with parameters
- [x] Control flow
- [x] Labels in Assembly
- [x] Recursion
- [x] Strings
- [x] From constants
- [x] Garbage collection
- [x] Arrays
- [x] Garbage collection
- [ ] Tables
- [ ] Garbage collection
- [ ] Metatables
- [ ] Real
- [ ] Globals
- [ ] Error handling
- [ ] Stack traces in case of errors
- [ ] Closures/upvalues
- [ ] Assembler generate bytecode
- [ ] VM interpret it
## C interface
- [ ] Error management (decision)
- [ ] Format for value and heap value
- [ ] Transparency and log levels

35
lua-temp/doc/BYTECODE Normal file
View File

@@ -0,0 +1,35 @@
Bytecode format
---------------
The bytecode file is composed of the following sections:
* HEADER: 16-byte header
[0:3]: Magic
[4]: VM format
[rest]: Reserved for future use
* TABLE_OF_CONTENTS: list of 8 records pointing to each one of the sections
Each record (6 bytes):
- Pointer to section: 4 bytes
- Number of records in section: 2 bytes
* [0x0] Constants indexes: pointers to each of the constant locations
* Table of 4-byte constant indexes with pointer to constant
(counter start at beginning of raw constants)
* [0x1] Functions indexes: Pointer to functions within the code
[0:3]: function pointer (counter start at the beginning of executable code)
[4:5]: number of parameters
[6:7]: number of local variables
[8:b]: function size
* [0x2] Constants raw data
* [0x3] Code: executable code
* [0x4] Debugging info
???
The max file size is 2 Gb.
## Values can be encoded in the following ways:
* The type is defined by the operator.
* Encoding varies according to the type:
int: use protobuf format
float: 4-bit floating point
string: int-defined length, followed by the string proper - no null terminator
* Constant indexes and function ids are encoded as ints

96
lua-temp/doc/OPCODES Normal file
View File

@@ -0,0 +1,96 @@
Operations
----------
Operations take either 0 or 1 parameter. The ones that take a parameter, it can be either a int8, int16 or int32.
Instructions follow this logic:
00 ~ 9F : no parameter
A0 ~ BF : int8 (1 byte)
C0 ~ DF : int16 (2 bytes)
E0 ~ FF : int32 (4 bytes)
The operations of 1, 2 and 4 bytes are always interchangeable by adding/subtracting 0x20.
,----------- no parameter
| ,-------- int8
| | ,----- int16
| | | ,-- int32
NP I8 I16 I32 Opc Instruction Description
Stack operations:
a0 c0 e0 pushi [int] Push int
a1 c1 e1 pushc [index] Push constant
a2 c2 e2 pushf [function] Push function id
00 pushn Push nil
01 pushz Push zero (or false)
02 pusht Push true
03 newa Push (create) empty array
04 newt Push (create) empty table
05 pop
06 dup
Local variables:
a3 c3 e3 pushv [int] Push n nil values into the stack (used to init local vars)
ab cb eb set [index] Set value in stack position (set local variable)
a4 c4 e4 dupv [index] Duplicate stack value (load local variable)
a5 c5 e5 setg [int] Set global variable
a6 c6 e6 getg [int] Get global variable
Function operations:
a7 c7 e7 call [n_pars] Enter function on stack toplevel (passing n next stack values as parameters)
10 ret Leave a function (return value in stack)
11 retn Leave a function (return nil)
Table and array operations:
16 getkv Get table's value based on key (pull 1 value, push 1 value)
17 setkv Set table's key and value (pull 2 values from stack)
a8 c8 e8 geti Get array's position value
a9 c9 e9 seti Set array's position value
18 appnd Add value to the end of array
19 next Push the next pair into the stack (for loops)
1a smt Set value metatable
1b mt Get value metatable
Logical/arithmetic:
20 sum Sum top 2 values in stack
21 sub Subtract top 2 values in stack
22 mul Multiply top 2 values in stack
23 div Float division
24 idiv Integer division
25 mod Modulo
26 eq Equality
27 neq Inequality
28 lt Less than
29 lte Less than or equals
2a gt Greater than
2b gte Greater than or equals
2c and Bitwise AND
2d or Bitwise OR
2e xor Bitwise XOR
2f pow Power
30 shl Shift left
31 shr Shift right
Other value operations:
40 len Get table, array or string size
41 type Get type from value at the top of the stack
aa cast [type] Cast type to another type
42 ver Return VM version
External code:
48 cmpl Compile code to assembly
49 asmbl Assemble code to bytecode format
4a load Load bytecode as function (will place function on stack)
Control flow (the destination is always a 16-bit field):
ca bz [pc] Branch if zero
cb bnz [pc] Branch if not zero
cc jmp [pc] Unconditional jump
* Jumps can only happen within the same function.
Memory management:
4b gc Call garbage collector
Error handling: (0xa0~0xaf)
???

15
lua-temp/doc/VM Normal file
View File

@@ -0,0 +1,15 @@
Internal handling of values
---------------------------
## Supported types
Nil 0
Integer 1
Float 2
String 3
Array 4
Table 5
Function 6
NativePointer 7
## Internal format
???

584
lua-temp/pprint.lua Normal file
View File

@@ -0,0 +1,584 @@
local pprint = { VERSION = '0.1' }
local depth = 1
pprint.defaults = {
-- If set to number N, then limit table recursion to N deep.
depth_limit = false,
-- type display trigger, hide not useful datatypes by default
-- custom types are treated as table
show_nil = true,
show_boolean = true,
show_number = true,
show_string = true,
show_table = true,
show_function = false,
show_thread = false,
show_userdata = false,
-- additional display trigger
show_metatable = false, -- show metatable
show_all = false, -- override other show settings and show everything
use_tostring = false, -- use __tostring to print table if available
filter_function = nil, -- called like callback(value[,key, parent]), return truty value to hide
object_cache = 'local', -- cache blob and table to give it a id, 'local' cache per print, 'global' cache
-- per process, falsy value to disable (might cause infinite loop)
-- format settings
indent_size = 2, -- indent for each nested table level
level_width = 80, -- max width per indent level
wrap_string = true, -- wrap string when it's longer than level_width
wrap_array = false, -- wrap every array elements
string_is_utf8 = true, -- treat string as utf8, and count utf8 char when wrapping, if possible
sort_keys = true, -- sort table keys
}
local TYPES = {
['nil'] = 1, ['boolean'] = 2, ['number'] = 3, ['string'] = 4,
['table'] = 5, ['function'] = 6, ['thread'] = 7, ['userdata'] = 8
}
-- seems this is the only way to escape these, as lua don't know how to map char '\a' to 'a'
local ESCAPE_MAP = {
['\a'] = '\\a', ['\b'] = '\\b', ['\f'] = '\\f', ['\n'] = '\\n', ['\r'] = '\\r',
['\t'] = '\\t', ['\v'] = '\\v', ['\\'] = '\\\\',
}
-- generic utilities
local tokenize_string = function(s)
local t = {}
for i = 1, #s do
local c = s:sub(i, i)
local b = c:byte()
local e = ESCAPE_MAP[c]
if (b >= 0x20 and b < 0x80) or e then
local s = e or c
t[i] = { char = s, len = #s }
else
t[i] = { char = string.format('\\x%02x', b), len = 4 }
end
if c == '"' then
t.has_double_quote = true
elseif c == "'" then
t.has_single_quote = true
end
end
return t
end
local tokenize_utf8_string = tokenize_string
local has_lpeg, lpeg = pcall(require, 'lpeg')
if has_lpeg then
local function utf8_valid_char(c)
return { char = c, len = 1 }
end
local function utf8_invalid_char(c)
local b = c:byte()
local e = ESCAPE_MAP[c]
if (b >= 0x20 and b < 0x80) or e then
local s = e or c
return { char = s, len = #s }
else
return { char = string.format('\\x%02x', b), len = 4 }
end
end
local cont = lpeg.R('\x80\xbf')
local utf8_char =
lpeg.R('\x20\x7f') +
lpeg.R('\xc0\xdf') * cont +
lpeg.R('\xe0\xef') * cont * cont +
lpeg.R('\xf0\xf7') * cont * cont * cont
local utf8_capture = (((utf8_char / utf8_valid_char) + (lpeg.P(1) / utf8_invalid_char)) ^ 0) * -1
tokenize_utf8_string = function(s)
local dq = s:find('"')
local sq = s:find("'")
local t = table.pack(utf8_capture:match(s))
t.has_double_quote = not not dq
t.has_single_quote = not not sq
return t
end
end
local function is_plain_key(key)
return type(key) == 'string' and key:match('^[%a_][%a%d_]*$')
end
local CACHE_TYPES = {
['table'] = true, ['function'] = true, ['thread'] = true, ['userdata'] = true
}
-- cache would be populated to be like:
-- {
-- function = { `fun1` = 1, _cnt = 1 }, -- object id
-- table = { `table1` = 1, `table2` = 2, _cnt = 2 },
-- visited_tables = { `table1` = 7, `table2` = 8 }, -- visit count
-- }
-- use weakrefs to avoid accidentall adding refcount
local function cache_apperance(obj, cache, option)
if not cache.visited_tables then
cache.visited_tables = setmetatable({}, {__mode = 'k'})
end
local t = type(obj)
-- TODO can't test filter_function here as we don't have the ix and key,
-- might cause different results?
-- respect show_xxx and filter_function to be consistent with print results
if (not TYPES[t] and not option.show_table)
or (TYPES[t] and not option['show_'..t]) then
return
end
if CACHE_TYPES[t] or TYPES[t] == nil then
if not cache[t] then
cache[t] = setmetatable({}, {__mode = 'k'})
cache[t]._cnt = 0
end
if not cache[t][obj] then
cache[t]._cnt = cache[t]._cnt + 1
cache[t][obj] = cache[t]._cnt
end
end
if t == 'table' or TYPES[t] == nil then
if cache.visited_tables[obj] == false then
-- already printed, no need to mark this and its children anymore
return
elseif cache.visited_tables[obj] == nil then
cache.visited_tables[obj] = 1
else
-- visited already, increment and continue
cache.visited_tables[obj] = cache.visited_tables[obj] + 1
return
end
for k, v in pairs(obj) do
cache_apperance(k, cache, option)
cache_apperance(v, cache, option)
end
local mt = getmetatable(obj)
if mt and option.show_metatable then
cache_apperance(mt, cache, option)
end
end
end
-- makes 'foo2' < 'foo100000'. string.sub makes substring anyway, no need to use index based method
local function str_natural_cmp(lhs, rhs)
while #lhs > 0 and #rhs > 0 do
local lmid, lend = lhs:find('%d+')
local rmid, rend = rhs:find('%d+')
if not (lmid and rmid) then return lhs < rhs end
local lsub = lhs:sub(1, lmid-1)
local rsub = rhs:sub(1, rmid-1)
if lsub ~= rsub then
return lsub < rsub
end
local lnum = tonumber(lhs:sub(lmid, lend))
local rnum = tonumber(rhs:sub(rmid, rend))
if lnum ~= rnum then
return lnum < rnum
end
lhs = lhs:sub(lend+1)
rhs = rhs:sub(rend+1)
end
return lhs < rhs
end
local function cmp(lhs, rhs)
local tleft = type(lhs)
local tright = type(rhs)
if tleft == 'number' and tright == 'number' then return lhs < rhs end
if tleft == 'string' and tright == 'string' then return str_natural_cmp(lhs, rhs) end
if tleft == tright then return str_natural_cmp(tostring(lhs), tostring(rhs)) end
-- allow custom types
local oleft = TYPES[tleft] or 9
local oright = TYPES[tright] or 9
return oleft < oright
end
-- setup option with default
local function make_option(option)
if option == nil then
option = {}
end
for k, v in pairs(pprint.defaults) do
if option[k] == nil then
option[k] = v
end
if option.show_all then
for t, _ in pairs(TYPES) do
option['show_'..t] = true
end
option.show_metatable = true
end
end
return option
end
-- override defaults and take effects for all following calls
function pprint.setup(option)
pprint.defaults = make_option(option)
end
-- format lua object into a string
function pprint.pformat(obj, option, printer)
option = make_option(option)
local buf = {}
local function default_printer(s)
table.insert(buf, s)
end
printer = printer or default_printer
local cache
if option.object_cache == 'global' then
-- steal the cache into a local var so it's not visible from _G or anywhere
-- still can't avoid user explicitly referentce pprint._cache but it shouldn't happen anyway
cache = pprint._cache or {}
pprint._cache = nil
elseif option.object_cache == 'local' then
cache = {}
end
local last = '' -- used for look back and remove trailing comma
local status = {
indent = '', -- current indent
len = 0, -- current line length
printed_something = false, -- used to remove leading new lines
}
local wrapped_printer = function(s)
status.printed_something = true
printer(last)
last = s
end
local function _indent(d)
status.indent = string.rep(' ', d + #(status.indent))
end
local function _n(d)
if not status.printed_something then return end
wrapped_printer('\n')
wrapped_printer(status.indent)
if d then
_indent(d)
end
status.len = 0
return true -- used to close bracket correctly
end
local function _p(s, nowrap)
status.len = status.len + #s
if not nowrap and status.len > option.level_width then
_n()
wrapped_printer(s)
status.len = #s
else
wrapped_printer(s)
end
end
local formatter = {}
local function format(v)
local f = formatter[type(v)]
f = f or formatter.table -- allow patched type()
if option.filter_function and option.filter_function(v, nil, nil) then
return ''
else
return f(v)
end
end
local function tostring_formatter(v)
return tostring(v)
end
local function number_formatter(n)
return n == math.huge and '[[math.huge]]' or tostring(n)
end
local function nop_formatter(v)
return ''
end
local function make_fixed_formatter(t, has_cache)
if has_cache then
return function (v)
return string.format('[[%s %d]]', t, cache[t][v])
end
else
return function (v)
return '[['..t..']]'
end
end
end
local function string_formatter(s, force_long_quote)
local tokens = option.string_is_utf8 and tokenize_utf8_string(s) or tokenize_string(s)
local string_len = 0
local escape_quotes = tokens.has_double_quote and tokens.has_single_quote
for _, token in ipairs(tokens) do
if escape_quotes and token.char == '"' then
string_len = string_len + 2
else
string_len = string_len + token.len
end
end
local quote_len = 2
local long_quote_dashes = 0
local function compute_long_quote_dashes()
local keep_looking = true
while keep_looking do
if s:find('%]' .. string.rep('=', long_quote_dashes) .. '%]') then
long_quote_dashes = long_quote_dashes + 1
else
keep_looking = false
end
end
end
if force_long_quote then
compute_long_quote_dashes()
quote_len = 2 + long_quote_dashes
end
if quote_len + string_len + status.len > option.level_width then
_n()
-- only wrap string when is longer than level_width
if option.wrap_string and string_len + quote_len > option.level_width then
if not force_long_quote then
compute_long_quote_dashes()
quote_len = 2 + long_quote_dashes
end
-- keep the quotes together
local dashes = string.rep('=', long_quote_dashes)
_p('[' .. dashes .. '[', true)
local status_len = status.len
local line_len = 0
local line = ''
for _, token in ipairs(tokens) do
if line_len + token.len + status_len > option.level_width then
_n()
_p(line, true)
line_len = token.len
line = token.char
else
line_len = line_len + token.len
line = line .. token.char
end
end
return line .. ']' .. dashes .. ']'
end
end
if tokens.has_double_quote and tokens.has_single_quote and not force_long_quote then
for i, token in ipairs(tokens) do
if token.char == '"' then
tokens[i].char = '\\"'
end
end
end
local flat_table = {}
for _, token in ipairs(tokens) do
table.insert(flat_table, token.char)
end
local concat = table.concat(flat_table)
if force_long_quote then
local dashes = string.rep('=', long_quote_dashes)
return '[' .. dashes .. '[' .. concat .. ']' .. dashes .. ']'
elseif tokens.has_single_quote then
-- use double quote
return '"' .. concat .. '"'
else
-- use single quote
return "'" .. concat .. "'"
end
end
local function table_formatter(t)
if option.use_tostring then
local mt = getmetatable(t)
if mt and mt.__tostring then
return string_formatter(tostring(t), true)
end
end
local print_header_ix = nil
local ttype = type(t)
if option.object_cache then
local cache_state = cache.visited_tables[t]
local tix = cache[ttype][t]
-- FIXME should really handle `cache_state == nil`
-- as user might add things through filter_function
if cache_state == false then
-- already printed, just print the the number
return string_formatter(string.format('%s %d', ttype, tix), true)
elseif cache_state > 1 then
-- appeared more than once, print table header with number
print_header_ix = tix
cache.visited_tables[t] = false
else
-- appeared exactly once, print like a normal table
end
end
local limit = tonumber(option.depth_limit)
if limit and depth > limit then
if print_header_ix then
return string.format('[[%s %d]]...', ttype, print_header_ix)
end
return string_formatter(tostring(t), true)
end
local tlen = #t
local wrapped = false
_p('{')
_indent(option.indent_size)
_p(string.rep(' ', option.indent_size - 1))
if print_header_ix then
_p(string.format('--[[%s %d]] ', ttype, print_header_ix))
end
for ix = 1,tlen do
local v = t[ix]
if formatter[type(v)] == nop_formatter or
(option.filter_function and option.filter_function(v, ix, t)) then
-- pass
else
if option.wrap_array then
wrapped = _n()
end
depth = depth+1
_p(format(v)..', ')
depth = depth-1
end
end
-- hashmap part of the table, in contrast to array part
local function is_hash_key(k)
if type(k) ~= 'number' then
return true
end
local numkey = math.floor(tonumber(k))
if numkey ~= k or numkey > tlen or numkey <= 0 then
return true
end
end
local function print_kv(k, v, t)
-- can't use option.show_x as obj may contain custom type
if formatter[type(v)] == nop_formatter or
formatter[type(k)] == nop_formatter or
(option.filter_function and option.filter_function(v, k, t)) then
return
end
wrapped = _n()
if is_plain_key(k) then
_p(k, true)
else
_p('[')
-- [[]] type string in key is illegal, needs to add spaces inbetween
local k = format(k)
if string.match(k, '%[%[') then
_p(' '..k..' ', true)
else
_p(k, true)
end
_p(']')
end
_p(' = ', true)
depth = depth+1
_p(format(v), true)
depth = depth-1
_p(',', true)
end
if option.sort_keys then
local keys = {}
for k, _ in pairs(t) do
if is_hash_key(k) then
table.insert(keys, k)
end
end
table.sort(keys, cmp)
for _, k in ipairs(keys) do
print_kv(k, t[k], t)
end
else
for k, v in pairs(t) do
if is_hash_key(k) then
print_kv(k, v, t)
end
end
end
if option.show_metatable then
local mt = getmetatable(t)
if mt then
print_kv('__metatable', mt, t)
end
end
_indent(-option.indent_size)
-- make { } into {}
last = string.gsub(last, '^ +$', '')
-- peek last to remove trailing comma
last = string.gsub(last, ',%s*$', ' ')
if wrapped then
_n()
end
_p('}')
return ''
end
-- set formatters
formatter['nil'] = option.show_nil and tostring_formatter or nop_formatter
formatter['boolean'] = option.show_boolean and tostring_formatter or nop_formatter
formatter['number'] = option.show_number and number_formatter or nop_formatter -- need to handle math.huge
formatter['function'] = option.show_function and make_fixed_formatter('function', option.object_cache) or nop_formatter
formatter['thread'] = option.show_thread and make_fixed_formatter('thread', option.object_cache) or nop_formatter
formatter['userdata'] = option.show_userdata and make_fixed_formatter('userdata', option.object_cache) or nop_formatter
formatter['string'] = option.show_string and string_formatter or nop_formatter
formatter['table'] = option.show_table and table_formatter or nop_formatter
if option.object_cache then
-- needs to visit the table before start printing
cache_apperance(obj, cache, option)
end
_p(format(obj))
printer(last) -- close the buffered one
-- put cache back if global
if option.object_cache == 'global' then
pprint._cache = cache
end
return table.concat(buf)
end
-- pprint all the arguments
function pprint.pprint( ... )
local args = {...}
-- select will get an accurate count of array len, counting trailing nils
local len = select('#', ...)
for ix = 1,len do
pprint.pformat(args[ix], nil, io.write)
io.write('\n')
end
end
setmetatable(pprint, {
__call = function (_, ...)
pprint.pprint(...)
end
})
return pprint

461
lua-temp/tests.lua Executable file
View File

@@ -0,0 +1,461 @@
#!/usr/bin/env lua
local pprint = require('pprint')
local assemble = require('tyche-as')
local VM = require('tyche-vm')
----------------------
-- --
-- SUPPORT --
-- --
----------------------
function TEST(name)
print("### " .. name)
end
function assert_eq(found, expected, key)
assert(type(found) == type(expected), 'Types not matching , expected "' .. pprint.pformat(expected) .. '", found "' .. pprint.pformat(found) .. '".' .. ((key ~= nil) and ('(key: ' .. key .. ')') or ''))
if type(found) == 'table' then
assert(#found == #expected, "Tables are of different sizes " .. ((key ~= nil) and ('(key: ' .. key .. ')') or ''))
for k,v in pairs(found) do
assert_eq(v, expected[k], k)
end
for k,v in pairs(expected) do
assert_eq(v, found[k], k)
end
else
assert(found == expected, 'Assertion failed, expected "' .. pprint.pformat(expected) .. '", found "' .. pprint.pformat(found) .. '".')
end
end
----------------------
-- --
-- PARSER --
-- --
----------------------
do TEST "Parser"
local source = [[
.const
0: 3.14
1: "Hello world"
.func 0
pushi 2 ; this is a comment
pushi 3
sum
ret
.func 1
pushi 5000
ret ]]
local expected = {
constants = { [0] = 3.14, [1] = "Hello world" },
functions = {
[0] = {
{ "pushi", 2 },
{ "pushi", 3 },
{ "sum" },
{ "ret" },
},
[1] = {
{ "pushi", 5000 },
{ "ret" },
}
}
}
local found = assemble(source)
-- pprint(expected)
-- pprint(found)
assert_eq(found, expected)
end
do TEST "Parser: labels"
local source = [[
.func 0
jmp @my_label
pushi 3
@my_label:
ret ]]
local expected = {
constants = {},
functions = {
[0] = {
{ "jmp", "@my_label" },
{ "pushi", 3 },
{ "ret", labels = { "@my_label" } },
}
}
}
local found = assemble(source)
assert_eq(found, expected)
end
----------------------
-- --
-- STACK --
-- --
----------------------
do TEST "Stack"
local stack = VM.new().stack
stack:push({ type='integer', value=10 })
stack:push({ type='integer', value=20 })
stack:push({ type='integer', value=30 })
assert_eq(#stack, 3)
assert_eq(stack[0].value, 10)
assert_eq(stack[1].value, 20)
assert_eq(stack[-1].value, 30)
assert_eq(stack[-2].value, 20)
stack:pop()
stack:pop()
assert_eq(stack[-1].value, 10)
stack:pop()
assert_eq(#stack, 0)
end
do TEST "Stack with frame pointer"
local stack = VM.new().stack
stack:push({ type='integer', value=10 })
stack:push({ type='integer', value=20 })
stack:push_fp()
stack:push({ type='integer', value=30 })
stack:push({ type='integer', value=40 })
stack:push({ type='integer', value=50 })
assert_eq(#stack, 3)
assert_eq(stack[0].value, 30)
assert_eq(stack[1].value, 40)
assert_eq(stack[-1].value, 50)
assert_eq(stack[-2].value, 40)
stack:pop_fp()
assert_eq(#stack, 2)
assert_eq(stack[0].value, 10)
assert_eq(stack[1].value, 20)
assert_eq(stack[-1].value, 20)
assert_eq(stack[-2].value, 10)
end
----------------------
-- --
-- VM --
-- --
----------------------
local function arith(a, b, op)
return VM.new():load(assemble(string.format([[
.func 0
pushi %d
pushi %d
%s
ret
]], a, b, op))):call(0)
end
do TEST "VM: basic"
local vm = VM.new()
-- vm.debug = true
local bytecode = assemble [[
.func 0
pushi 2
pushi 3
sum
ret
]]
vm:load(bytecode)
assert_eq(vm:stack_sz(), 1)
assert_eq(vm:is(-1, 'function'), true)
vm:call(0)
assert_eq(vm:stack_sz(), 1)
assert_eq(vm:is(-1, 'integer'), true)
assert_eq(vm:to_integer(-1), 5)
end
do TEST "VM: logic/arithmetic"
assert_eq(arith(2, 5, 'sum'):to_integer(-1), 7)
assert_eq(arith(2, 5, 'sub'):to_integer(-1), -3)
assert_eq(arith(2, 5, 'mul'):to_integer(-1), 10)
assert_eq(arith(20, 3, 'idiv'):to_integer(-1), 6)
assert_eq(arith(5, 5, 'eq'):to_integer(-1), 1)
assert_eq(arith(5, 5, 'neq'):to_integer(-1), 0)
assert_eq(arith(4, 5, 'lt'):to_integer(-1), 1)
assert_eq(arith(5, 5, 'lt'):to_integer(-1), 0)
assert_eq(arith(4, 5, 'lte'):to_integer(-1), 1)
assert_eq(arith(5, 5, 'lte'):to_integer(-1), 1)
assert_eq(arith(5, 5, 'gt'):to_integer(-1), 0)
assert_eq(arith(5, 5, 'gte'):to_integer(-1), 1)
assert_eq(arith(20, 5, 'and'):to_integer(-1), 4)
assert_eq(arith(20, 5, 'or'):to_integer(-1), 21)
assert_eq(arith(20, 5, 'xor'):to_integer(-1), 17)
assert_eq(arith(2, 5, 'pow'):to_integer(-1), 32)
assert_eq(arith(2, 5, 'shl'):to_integer(-1), 64)
assert_eq(arith(20, 3, 'shr'):to_integer(-1), 2)
assert_eq(arith(20, 3, 'mod'):to_integer(-1), 2)
end
do TEST "VM: local variables"
local vm = VM.new():load(assemble([[
.func 0
pushv 2 ; local a, b
pushi 3 ; a = 3
set 0
pushi 4 ; b = 4
set 1
dupv 0 ; return a
ret
]])):call(0)
assert_eq(vm:stack_sz(), 1)
assert_eq(vm:to_integer(-1), 3)
end
do TEST "VM: functions"
local vm = VM.new():load(assemble([[
.func 0
pushf 1
pushi 2
pushi 3
call 2
ret
.func 1
dupv 0
dupv 1
sub
ret
]])):call(0)
assert_eq(vm:stack_sz(), 1)
assert_eq(vm:to_integer(-1), -1)
end
do TEST "VM: jumps (jmp + bnz)"
local vm = VM.new():load(assemble [[
.func 0
jmp @x1
pushi 5
@x1:
pushi 1
bnz @x2
pushi 1
bz @x3
@x2:
pushi 6
ret
@x3:
pushi 7
ret
]]):call(0)
assert_eq(vm:to_integer(-1), 6)
end
do TEST "VM: jumps (bz)"
local vm = VM.new():load(assemble [[
.func 0
jmp @x1
pushi 5
@x1:
pushi 0
bnz @x2
pushi 0
bz @x3
@x2:
pushi 6
ret
@x3:
pushi 7
ret
]]):call(0)
assert_eq(vm:to_integer(-1), 7)
end
do TEST "VM: string from const"
local vm = VM.new():load(assemble [[
.const
0: "Hello"
.func 0
pushc 0
ret
]]):call(0)
assert_eq(vm:to_string(-1), "Hello")
end
do TEST "VM: managed strings"
local vm = VM.new():push_string("Hello")
-- print(vm:debug_heap())
assert_eq(vm:to_string(-1), "Hello")
end
do TEST "VM: concatenate strings (GC won't delete)"
local vm = VM.new():load(assemble [[
.const
0: "Hello "
1: "world"
.func 0
pushc 0
pushc 1
sum
gc
ret
]]):call(0)
-- print(vm:debug_heap())
assert_eq(vm:to_string(-1), "Hello world")
assert_eq(vm.heap:size(), 1)
end
do TEST "VM: GC strings"
local vm = VM.new():load(assemble [[
.const
0: "Hello "
1: "world"
.func 0
pushn
pushc 0
pushc 1
sum
pop
gc
ret
]]):call(0)
-- print(vm:debug_heap())
assert_eq(vm:is(-1, 'nil'), true)
assert_eq(vm.heap:size(), 0)
end
do TEST "VM: arrays"
local vm = VM.new():load(assemble [[
.func 0
newa
pushi 10
seti 0
pushi 20
seti 1
pushi 30
seti 2
geti 1
ret
]]):call(0)
-- print(vm:debug_heap())
assert_eq(vm:to_integer(-1), 20)
assert_eq(vm.heap:size(), 1)
end
do TEST "VM: arrays GC"
local vm = VM.new():load(assemble [[
.func 0
pushn
newa
pushi 10
seti 0
pop
gc
ret
]]):call(0)
assert_eq(vm.heap:size(), 0)
end
do TEST "VM: GC items (1st level) - no items removed"
local vm = VM.new():load(assemble [[
.const
0: "Hello "
1: "world"
.func 0
pushn
newa
pushc 0
pushc 1
sum
seti 0
gc
pop
ret
]]):call(0)
assert_eq(vm.heap:size(), 2)
end
do TEST "VM: GC items (1st level) - all items removed"
local vm = VM.new():load(assemble [[
.const
0: "Hello "
1: "world"
.func 0
pushn
newa
pushc 0
pushc 1
sum
seti 0
pop
gc
ret
]]):call(0)
assert_eq(vm.heap:size(), 0)
end
do TEST "VM: GC items (2nd level) - no items removed"
local vm = VM.new():load(assemble [[
.const
0: "Hello "
1: "world"
.func 0
pushn
newa
newa
pushc 0
pushc 1
sum
seti 0
seti 0
gc
pop
ret
]]):call(0)
assert_eq(vm.heap:size(), 3)
end
do TEST "VM: GC items (1st level) - all items removed"
local vm = VM.new():load(assemble [[
.const
0: "Hello "
1: "world"
.func 0
pushn
newa
newa
pushc 0
pushc 1
sum
seti 0
seti 0
pop
gc
ret
]]):call(0)
assert_eq(vm.heap:size(), 0)
end
print('End.')

87
lua-temp/tyche-as.lua Normal file
View File

@@ -0,0 +1,87 @@
----------------------
-- --
-- PARSER --
-- --
----------------------
local function assemble(source)
local proto = {
constants = {},
functions = {},
}
local section = ''
local current_f_id = 0
local next_label = nil
for line in source:gmatch("([^\n]+)") do
local line = line:gsub("%s*;.*$", "") -- remove comments
line = line:match("^%s*(.-)%s*$") -- trim
if #line == 0 then goto continue end
if line == ".const" then
section = 'const'
elseif line:match("%.func%s+%d+") then
section = 'function'
local f_id = tonumber(line:match("%.func%s+(%d+)"))
proto.functions[f_id] = {}
current_f_id = f_id
elseif section == 'const' then
local k, v = line:match("^%s*(%d+)%s*:%s*(.+)$")
if not k then error("Invalid row for constant: " .. line) end
if v:sub(1, 1) == '"' then
proto.constants[tonumber(k)] = line:match('"(.*)"')
else
proto.constants[tonumber(k)] = tonumber(v)
end
elseif section == 'function' then
local regexes = {
"^%s*(%a+)%s+(%d+)%s*$", -- instruction + parameter
"^%s*(%a+)%s+(@[%a_][%a%d_]*)%s*$", -- instruction + label
"^%s*(%a+)%s*$", -- instruction only
"^(@[%a_][%a%d_]*):%s*$", -- label
}
local match = false
for i,regex in ipairs(regexes) do
local inst, par = line:match(regex)
if inst then
match = true
if i == 1 then -- instruction + parameter
table.insert(proto.functions[current_f_id], { inst, tonumber(par), labels = next_label })
elseif i == 2 then -- instruction + label
table.insert(proto.functions[current_f_id], { inst, par, labels = next_label })
elseif i == 3 then -- instruction only
table.insert(proto.functions[current_f_id], { inst, labels = next_label })
elseif i == 4 then -- label
if not next_label then
next_label = { inst }
else
table.insert(next_label, inst)
end
end
if i ~= 4 then
next_label = nil
end
end
end
if not match then error("Invalid instruction: " .. line) end
end
::continue::
end
return proto
end
----------------------
-- --
-- MAIN --
-- --
----------------------
if ... then
return assemble
else
error("Running assembler directly not supported yet")
end

620
lua-temp/tyche-vm.lua Normal file
View File

@@ -0,0 +1,620 @@
local pprint = require('pprint')
local TYPES = { 'nil', 'integer', 'float', 'string', 'array', 'table', 'function', 'native_pointer' }
local TYPE_MAP = {}; for _,v in ipairs(TYPES) do TYPE_MAP[v] = true end
local ARITH_LOGIC_OPS = {
sum=true, sub=true, mul=true, div=true, idiv=true, eq=true, neq=true, lt=true, lte=true, gt=true, gte=true,
['and']=true, ['or']=true, xor=true, pow=true, shl=true, shr=true, mod=true
}
math.randomseed(os.time())
----------------------
-- --
-- UTIL --
-- --
----------------------
local function validate_value(v)
assert(v, "value cannot be nil")
assert(type(v) == 'table',
"invalid value format (expected { type='...', value=... }), received: " .. pprint.pformat(v))
assert(TYPE_MAP[v.type], "missing field 'type' in value")
if v.type == 'nil' then
assert(v.value == nil)
elseif v.type == 'number' then
assert(type(v.value) == 'number')
elseif v.type == 'function' then
assert(type(v.value) == 'number' and v.value >= 0, "function must be a positive number")
elseif v.type == 'string' then
assert(type(v.ref) == 'number' or type(v.const_ref) == 'number')
elseif v.type == 'array' then
assert(type(v.ref) == 'number')
end
end
function is_zero(v)
if v.type == 'nil' then return true end
if v.type == 'integer' and v.value == 0 then return true end
return false
end
----------------------
-- --
-- STACK --
-- --
----------------------
local Stack = {}
Stack.__index = Stack
function Stack.new()
local self = setmetatable({
stack = {},
fps = {},
}, Stack)
self:push_fp()
return self
end
function Stack:top_fps()
return self.fps[#self.fps]
end
function Stack:push(value)
validate_value(value)
table.insert(self.stack, value)
end
function Stack:pop()
if #self.stack < self:top_fps() then error("Stack underflow") end
local v = self.stack[#self.stack]
self.stack[#self.stack] = nil
return v
end
function Stack:peek()
if #self.stack < self:top_fps() then error("Stack underflow") end
return self.stack[#self.stack]
end
Stack.__len = function(self)
return #self.stack - self:top_fps() + 1
end
Stack.__index = function(self, key)
local idx = tonumber(key)
if idx then
if idx >= 0 then
return self.stack[self:top_fps() + idx]
else
if self:top_fps() + #self.stack + idx < 0 then error("Stack access out of range") end
return self.stack[#self.stack + idx + 1]
end
else
return Stack[key] -- other methods
end
end
Stack.__newindex = function(self, key, value)
validate_value(value)
local idx = tonumber(key)
if idx then
if idx >= 0 then
self.stack[self:top_fps() + idx] = value
else
if self:top_fps() + #self.stack + idx < 0 then error("Stack access out of range") end
self.stack[#self.stack + idx + 1] = value
end
end
end
function Stack:push_fp()
table.insert(self.fps, #self.stack + 1)
end
function Stack:pop_fp()
if #self.fps == 1 then error("FPS queue underflow") end
for i=self:top_fps(),#self.stack,1 do
self.stack[i] = nil
end
table.remove(self.fps)
end
function Stack:fp_level()
return #self.fps
end
----------------------
-- --
-- CODE --
-- --
----------------------
local Code = {}
Code.__index = Code
function Code.new()
return setmetatable({
bytecode = nil
}, Code)
end
function Code:load(bytecode)
-- TODO - what if there's code already loaded?
self.bytecode = bytecode
return 0 -- main function
end
function Code:next_instruction(function_id, pc)
return {
operator = self.bytecode.functions[function_id][pc][1],
operand = self.bytecode.functions[function_id][pc][2],
instruction_size = 1,
}
end
function Code:find_label(function_id, label)
for pc, op in ipairs(self.bytecode.functions[function_id]) do
if op.labels then
for _,lbl in ipairs(op.labels) do
if lbl == label then
return pc
end
end
end
end
end
----------------------
-- --
-- EXPR --
-- --
----------------------
local EXPR = {}
-- initialize default
for op,_ in pairs(ARITH_LOGIC_OPS) do
EXPR[op] = {}
for _,type1 in ipairs(TYPES) do
EXPR[op][type1] = {}
for _,type2 in ipairs(TYPES) do
EXPR[op][type1][type2] = function(_, _, _) error(string.format("Type mismatch for operation '%s': types '%s' and '%s'", op, type1, type2)) end
end
end
end
EXPR.sum.integer.integer = function(vm, b, a) vm:push_integer(a.value + b.value) end
EXPR.sum.string.string = function(vm, b, a) vm:push_string(vm:_extract_string(a) ..vm:_extract_string(b)) end
EXPR.sub.integer.integer = function(vm, b, a) vm:push_integer(a.value - b.value) end
EXPR.mul.integer.integer = function(vm, b, a) vm:push_integer(a.value * b.value) end
-- TODO - div
EXPR.idiv.integer.integer = function(vm, b, a) vm:push_integer(math.floor(a.value / b.value)) end
EXPR.mod.integer.integer = function(vm, b, a) vm:push_integer(a.value % b.value) end
EXPR.eq.integer.integer = function(vm, b, a) vm:push_integer((a.value == b.value) and 1 or 0) end
EXPR.neq.integer.integer = function(vm, b, a) vm:push_integer((a.value ~= b.value) and 1 or 0) end
EXPR.lt.integer.integer = function(vm, b, a) vm:push_integer((a.value < b.value) and 1 or 0) end
EXPR.lte.integer.integer = function(vm, b, a) vm:push_integer((a.value <= b.value) and 1 or 0) end
EXPR.gt.integer.integer = function(vm, b, a) vm:push_integer((a.value > b.value) and 1 or 0) end
EXPR.gte.integer.integer = function(vm, b, a) vm:push_integer((a.value >= b.value) and 1 or 0) end
EXPR['and'].integer.integer = function(vm, b, a) vm:push_integer(a.value & b.value) end
EXPR['or'].integer.integer = function(vm, b, a) vm:push_integer(a.value | b.value) end
EXPR.xor.integer.integer = function(vm, b, a) vm:push_integer(a.value ~ b.value) end
EXPR.pow.integer.integer = function(vm, b, a) vm:push_integer(a.value ^ b.value) end
EXPR.shl.integer.integer = function(vm, b, a) vm:push_integer(a.value << b.value) end
EXPR.shr.integer.integer = function(vm, b, a) vm:push_integer(a.value >> b.value) end
----------------------
-- --
-- HEAP --
-- --
----------------------
local Heap = {}
Heap.__index = Heap
function Heap.new()
return setmetatable({
items = {}
}, Heap)
end
function Heap:add_value(value)
assert(value.type and (value.type == 'string' or value.type == 'array' or value.type == 'table'))
assert(value.value)
local key = math.random(1, math.maxinteger)
while self.items[key] do key = math.random(1, math.maxinteger) end
self.items[key] = value
return key
end
function Heap:get_value(key)
assert(type(key) == 'number')
return self.items[key]
end
function Heap:size()
local n = 0
for _ in pairs(self.items) do n = n + 1 end
return n
end
function Heap:call_gc(roots)
-- mark
local marked = {}
local function mark(v)
if v.type == 'string' then
if v.ref then marked[v.ref] = true end
elseif v.type == 'array' then
marked[v.ref] = true
for _,vv in ipairs(self.items[v.ref].value) do mark(vv) end
end
end
for _,v in ipairs(roots) do -- TODO - recursive, add support to array
mark(v)
end
-- sweep
for key,_ in pairs(self.items) do
if not marked[key] then
self.items[key] = nil
end
end
end
----------------------
-- --
-- VM --
-- --
----------------------
local VM = {}
VM.__index = VM
function VM.new()
return setmetatable({
stack = Stack.new(),
heap = Heap.new(),
code = Code.new(),
loc = {},
debug = false,
}, VM)
end
function VM:set_debug(b)
self.debug = b
return self
end
--
-- code management
--
function VM:load(bytecode)
local f_id = self.code:load(bytecode)
self.stack:push({ type = 'function', value = f_id })
return self
end
--
-- stack management
--
function VM:push_integer(n)
self.stack:push({ type = 'integer', value = n })
return self
end
function VM:push_string(str)
self.stack:push({ type = 'string', ref = self.heap:add_value({ type='string', value=str }) })
return self
end
function VM:push_nil()
self.stack:push({ type = 'nil' })
return self
end
function VM:new_array()
self.stack:push({ type = 'array', ref = self.heap:add_value({ type='array', value={} }) })
return self
end
--
-- information
--
function VM:stack_sz()
return #self.stack
end
function VM:is(idx, type_)
assert(type(idx) == "number")
assert(TYPE_MAP[type_])
return self.stack[idx].type == type_
end
function VM:to_integer(idx)
local value = self.stack[idx]
if value.type ~= 'integer' then error("Type error: not an integer") end
return value.value
end
function VM:_extract_string(value)
assert(value)
assert(value.type == 'string')
if value.const_ref then
return self.code.bytecode.constants[value.const_ref]
elseif value.ref then
return self.heap:get_value(value.ref).value
else
error("Incorrect string value (nor 'const_ref' or 'ref')")
end
end
function VM:_extract_array(value)
assert(value)
assert(value.type == 'array')
local array = self.heap:get_value(value.ref)
if type(array) ~= 'table' then error('Expected array') end
return self.heap:get_value(value.ref).value
end
function VM:to_string(idx)
local value = self.stack[idx]
if value.type ~= 'string' then error("Type error: not a string") end
return self:_extract_string(value)
end
function VM:format_value(v)
if v.type == 'integer' or v.type == 'real' then
return tostring(v.value)
elseif v.type == 'string' then
return '"' .. self:_extract_string(v) .. '"'
elseif v.type == 'array' then
local array = self:_extract_array(v)
local tbl = {}
for _,vv in ipairs(array) do table.insert(tbl, self:format_value(vv)) end
return "[" .. table.concat(tbl, ', ') .. "]"
elseif v.type == 'function' then
return '@' .. tostring(v.value)
elseif v.type == 'nil' then
return 'nil'
else
print('warning: cannot convert from type ' .. tostring(v.type))
return pprint.pformat(v)
end
end
function VM:debug_stack()
if #self.stack.stack == 0 then return "empty" end
local ss = {}
for i,v in ipairs(self.stack.stack) do
for _,fp in pairs(self.stack.fps) do
if i == fp then table.insert(ss, '^ ') end
end
table.insert(ss, self:format_value(v) .. ' ')
end
return table.concat(ss)
end
function VM:debug_heap()
local ss = { "Heap:\n" }
for k,v in pairs(self.heap.items) do
if v.type == 'string' then
table.insert(ss, string.format(' [%X] = "%s"', k, v.value))
elseif v.type == 'array' then
table.insert(ss, string.format(' [%X] = [', k))
local t = {}; for _,vv in ipairs(v.value) do t[#t+1] = self:format_value(vv) end
table.insert(ss, table.concat(t, ", ") .. ']')
else
error('Unsupported type in heap')
end
table.insert(ss, "\n")
end
return table.concat(ss)
end
--
-- code execution
--
function VM:_enter_function(n_pars)
-- get parameters
local vars = {}
for i=1,n_pars do
vars[i] = self.stack:pop()
end
-- get function
local f = self.stack:pop()
if f.type ~= 'function' then error("Type error: expected function") end
-- enter function
table.insert(self.loc, {
f_id = f.value,
pc = 1
})
self.stack:push_fp()
-- pass parameters
for i=1,n_pars do
self.stack:push(vars[#vars-i+1])
end
end
function VM:call(n_pars)
self:_enter_function(n_pars)
self:_run_until_return()
return self
end
function VM:_run_until_return()
local level = self.stack:fp_level()
while self.stack:fp_level() >= level do
self:_step()
end
end
function VM:_print_stack()
if self.debug then
print(self:debug_stack())
end
end
function VM:_step()
local loc = self.loc[#self.loc]
local op = self.code:next_instruction(loc.f_id, loc.pc)
if self.debug then print('## ' .. loc.f_id .. ':' .. loc.pc .. ' ' .. op.operator .. ' ' .. (op.operand and op.operand or '')) end
--
-- stack operations
--
if op.operator == 'pushn' then
self:push_nil()
elseif op.operator == 'pushi' then
self:push_integer(op.operand)
elseif op.operator == 'pushf' then
assert(op.operand >= 0)
self.stack:push({ type = 'function', value = op.operand })
elseif op.operator == 'pushc' then
local c = self.code.bytecode.constants[op.operand]
if type(c) == 'string' then
self.stack:push({ type = 'string', const_ref = op.operand })
elseif type(c) == 'number' then
error('REAL consts not supported for now.')
end
elseif op.operator == 'newa' then
self:new_array()
elseif op.operator == 'pop' then
self.stack:pop()
elseif op.operator == 'dup' then
self.stack:push(self.stack:peek())
--
-- local variables
--
elseif op.operator == 'pushv' then
assert(op.operand >= 0)
for _=1,op.operand do
self:push_nil()
end
elseif op.operator == 'set' then
assert(op.operand >= 0)
local a = self.stack:pop()
self.stack[op.operand] = a
elseif op.operator == 'dupv' then
assert(op.operand >= 0)
local a = self.stack[op.operand]
self.stack:push(a)
--
-- table and array operations
--
elseif op.operator == 'seti' then
local array_ref = self.stack[-2]
local array = self:_extract_array(array_ref)
array[op.operand+1] = self.stack:pop()
elseif op.operator == 'geti' then
local array_ref = self.stack[-1]
local array = self:_extract_array(array_ref)
self.stack:push(array[op.operand+1])
--
-- logic/arithmetic operations
--
elseif ARITH_LOGIC_OPS[op.operator] then
local a = self.stack:pop()
local b = self.stack:pop()
EXPR[op.operator][a.type][b.type](self, a, b)
--
-- function management
---
elseif op.operator == 'call' then
assert(op.operand >= 0)
self:_enter_function(op.operand)
elseif op.operator == 'ret' then
local v = self.stack:pop()
self.stack:pop_fp()
self.stack:push(v)
table.remove(self.loc)
self:_print_stack()
return
--
-- jumps/branching
--
elseif op.operator == 'jmp' then
loc.pc = self.code:find_label(loc.f_id, op.operand)
self:_print_stack()
return
elseif op.operator == 'bz' then
local v = self.stack:pop()
if is_zero(v) then
loc.pc = self.code:find_label(loc.f_id, op.operand)
self:_print_stack()
return
end
elseif op.operator == 'bnz' then
local v = self.stack:pop()
if not is_zero(v) then
loc.pc = self.code:find_label(loc.f_id, op.operand)
self:_print_stack()
return
end
--
-- memory management
--
elseif op.operator == 'gc' then
-- if self.debug then
-- print('About to run GC, current heap:')
-- print(self:debug_heap())
-- end
self.heap:call_gc(self.stack.stack)
-- if self.debug then
-- print('GC executed, this is the heap:')
-- print(self:debug_heap())
-- end
--
-- instruction not found
--
else
error("Unknown operator '" .. tostring(op.operator) .. "'")
end
self:_print_stack()
loc.pc = loc.pc + op.instruction_size
end
return VM

View File

@@ -0,0 +1,18 @@
#ifndef TYCHE_VM_EXCEPTIONS_HH
#define TYCHE_VM_EXCEPTIONS_HH
#include <stdexcept>
#include <string>
namespace tyche::as {
class AssemblyError : public std::runtime_error
{
public:
explicit AssemblyError(std::string const& str, size_t line, size_t column)
: std::runtime_error((str + " at: line " + std::to_string(line) + ", column: " + std::to_string(column)).c_str()) {}
};
}
#endif //TYCHE_VM_EXCEPTIONS_HH

View File

@@ -0,0 +1,98 @@
#include "assembler.hh"
#include <unordered_map>
#include "as_exceptions.hh"
#include "../bytecode/bytecode.hh"
#include "../vm/instruction.hh"
using namespace std::string_literals;
namespace tyche::as {
ByteArray Assembler::assemble()
{
bc::BytecodePrototype bp;
lexer_.reset();
enum class Section { Const, Function } section;
uint32_t function_id = 0;
for (;;) {
Token t = lexer_.ingest();
if (t.type == TokenType::Enter)
continue;
if (t.type == TokenType::Directive) {
if (std::get<std::string>(t.token) == ".const") {
section = Section::Const;
expect_token(TokenType::Enter);
} else if (std::get<std::string>(t.token) == ".func") {
section = Section::Function;
function_id = std::get<int>(expect_token(TokenType::Integer));
if (function_id >= bp.functions.size())
bp.functions.resize(function_id + 1, { 0, 0 });
expect_token(TokenType::Enter);
} else {
throw AssemblyError("Invalid directive " + std::get<std::string>(t.token), t.line, t.column);
}
} else if (section == Section::Const && t.type == TokenType::Integer) {
int index = std::get<int>(t.token);
if ((size_t) index >= bp.constants.size())
bp.constants.resize(index + 1);
expect_token(TokenType::Colon);
Token tt = lexer_.ingest();
if (tt.type == TokenType::Float)
bp.constants[index] = std::get<float>(tt.token);
else if (tt.type == TokenType::String)
bp.constants[index] = std::get<std::string>(tt.token);
else
throw AssemblyError("Expected float or string as constant", tt.line, tt.column);
expect_token(TokenType::Enter);
} else if (section == Section::Function && t.type == TokenType::Instruction) {
std::string instruction = std::get<std::string>(t.token);
std::optional<int> oper = {};
Token tt = lexer_.ingest();
if (tt.type == TokenType::Integer) {
oper = std::get<int>(tt.token);
tt = lexer_.ingest();
}
auto oinst = vm::translate_instruction(instruction, oper);
if (!oinst)
throw AssemblyError("Invalid or misused instruction '" + instruction + "'", tt.line, tt.column);
bp.functions.at(function_id).code.append_byte((uint8_t) *oinst);
switch (vm::instruction_operand_type(*oinst)) {
case vm::OperandType::Int8: bp.functions.at(function_id).code.append_int8((int8_t) *oper); break;
case vm::OperandType::Int16: bp.functions.at(function_id).code.append_int16((int16_t) *oper); break;
case vm::OperandType::Int32: bp.functions.at(function_id).code.append_int32(*oper); break;
case vm::OperandType::NoOperand: default: break;
}
if (tt.type != TokenType::Enter)
throw AssemblyError("Expected enter", tt.line, tt.column);
} else if (t.type == TokenType::EOF_) {
break;
} else if (t.type != TokenType::Enter) {
throw AssemblyError("Unexpected token of type " + token_type_name(t.type) + ")", t.line, t.column);
}
}
return bc::Bytecode::generate(bp);
}
TokenValue Assembler::expect_token(TokenType type)
{
Token t = lexer_.ingest();
if (t.type != type)
throw AssemblyError("Expected " + token_type_name(t.type), t.line, t.column);
return t.token;
}
} // tyche

View File

@@ -0,0 +1,27 @@
#ifndef TYCHE_ASSEMBLER_HH
#define TYCHE_ASSEMBLER_HH
#include <optional>
#include <string>
#include "lexer.hh"
#include "../common/bytearray.hh"
#include "../bytecode/bytecodeprototype.hh"
namespace tyche::as {
class Assembler {
public:
explicit Assembler(std::string source) : lexer_(std::move(source) + "\n") {}
[[nodiscard]] ByteArray assemble();
private:
Lexer lexer_;
TokenValue expect_token(TokenType type);
};
} // tyche
#endif //TYCHE_ASSEMBLER_HH

122
src/assembler/lexer.cc Normal file
View File

@@ -0,0 +1,122 @@
#include "lexer.hh"
#include "as_exceptions.hh"
namespace tyche::as {
std::string token_type_name(TokenType type)
{
switch (type) {
case TokenType::BOF: return "BOF";
case TokenType::Directive: return "directive";
case TokenType::Instruction: return "instruction";
case TokenType::Integer: return "integer";
case TokenType::Float: return "float";
case TokenType::String: return "string";
case TokenType::Enter: return "enter";
case TokenType::Colon: return "colon";
case TokenType::EOF_: return "EOF";
default: return "???";
}
}
void Lexer::reset()
{
pos_ = 0;
ingest_next_token();
}
Token Lexer::peek() const
{
return current_token_;
}
Token Lexer::ingest()
{
Token t = current_token_;
ingest_next_token();
return t;
}
void Lexer::ingest_next_token()
{
size_t current_line_pos = 1;
size_t current_line = 1;
if (pos_ >= source_.size()) {
current_token_ = { TokenType::EOF_ };
return;
}
char c = source_.at(pos_);
TokenType type {};
std::string stoken;
TokenValue value = std::monostate();
if (c == '.') {
type = TokenType::Directive;
stoken += '.';
while (c = source_.at(++pos_), isalpha(c) || c == '_')
stoken += c;
value = stoken;
} else if (c == '"') {
type = TokenType::String;
++pos_;
while (true) {
if (source_.at(pos_) == '\\') { // TODO - improve this for special characters
++pos_;
} else if (source_.at(pos_) == '"') {
++pos_;
break;
} else if (pos_ >= source_.size()) {
throw AssemblyError("Unterminated string", current_line, pos_ - current_line_pos);
}
stoken += source_.at(pos_++);
}
value = stoken;
} else if (isdigit(c) || c == '-') {
type = TokenType::Integer;
stoken += c;
while (c = source_.at(++pos_), isdigit(c) || c == '.') {
stoken += c;
if (c == '.') {
if (type == TokenType::Integer)
type = TokenType::Float;
else
throw AssemblyError("Double point in floating point number", current_line, pos_ - current_line_pos);
}
}
if (type == TokenType::Integer)
value = std::stoi(stoken);
else
value = std::stof(stoken);
} else if (isalpha(c)) {
type = TokenType::Instruction;
stoken += c;
while (c = source_.at(++pos_), isalpha(c))
stoken += c;
value = stoken;
} else if (c == ':') {
type = TokenType::Colon;
++pos_;
} else if (c == '\n' || c == ';') {
while (pos_ < source_.size() && source_.at(pos_) != '\n')
++pos_;
type = TokenType::Enter;
value = "\n";
++pos_;
++current_line;
current_line_pos = pos_;
} else {
throw AssemblyError(std::string("Unexpected character '") + c + "' (ascii: " + std::to_string((int) c) + ")", current_line, pos_ - current_line_pos);
}
// skip ignored tokens
while (pos_ < source_.size() && (source_.at(pos_) == ' ' || source_.at(pos_) == '\t' || source_.at(pos_) == '\r'))
++pos_;
current_token_ = { .type = type, .token = value, .line = current_line, .column = pos_ - current_line_pos };
}
} // tyche

45
src/assembler/lexer.hh Normal file
View File

@@ -0,0 +1,45 @@
#ifndef TYCHE_LEXER_HH
#define TYCHE_LEXER_HH
#include <string>
#include <utility>
#include <variant>
namespace tyche::as {
enum class TokenType {
BOF, Directive, Instruction, Integer, Float, String, Enter, Colon, EOF_
};
using TokenValue = std::variant<std::monostate, int, float, std::string>;
struct Token {
TokenType type;
TokenValue token = std::monostate();
size_t line = 0;
size_t column = 0;
friend bool operator==(Token const& lhs, Token const& rhs) { return std::tie(lhs.type, lhs.token) == std::tie(rhs.type, rhs.token); }
};
std::string token_type_name(TokenType type);
class Lexer {
public:
explicit Lexer(std::string source) : source_(std::move(source)) { reset(); }
void reset();
[[nodiscard]] Token peek() const;
[[nodiscard]] Token ingest();
private:
const std::string source_;
size_t pos_ = 0;
Token current_token_ { TokenType::BOF };
void ingest_next_token();
};
} // tyche
#endif //TYCHE_LEXER_HH

76
src/assembler/tests.cc Normal file
View File

@@ -0,0 +1,76 @@
#include "assembler.hh"
#include "gtest/gtest.h"
#include "../bytecode/bytecodeprototype.hh"
#include "../bytecode/bytecode.hh"
#include "../vm/instruction.hh"
using namespace tyche;
using namespace tyche::as;
using namespace tyche::bc;
using namespace tyche::vm;
TEST(Lexer, Lexer)
{
Token t;
Lexer lexer(".dir push 382 -12 3.14 -12.8 \"Hello\" \"Hel\\\"lo\"\n");
ASSERT_EQ(lexer.ingest(), (Token { TokenType::Directive, ".dir" }));
ASSERT_EQ(lexer.ingest(), (Token { TokenType::Instruction, "push" }));
t = lexer.ingest(); ASSERT_EQ(t.type, TokenType::Integer); ASSERT_EQ(std::get<int>(t.token), 382);
t = lexer.ingest(); ASSERT_EQ(t.type, TokenType::Integer); ASSERT_EQ(std::get<int>(t.token), -12);
t = lexer.ingest(); ASSERT_EQ(t.type, TokenType::Float); ASSERT_FLOAT_EQ(std::get<float>(t.token), 3.14f);
t = lexer.ingest(); ASSERT_EQ(t.type, TokenType::Float); ASSERT_FLOAT_EQ(std::get<float>(t.token), -12.8f);
ASSERT_EQ(lexer.ingest(), (Token { TokenType::String, "Hello" }));
ASSERT_EQ(lexer.ingest(), (Token { TokenType::String, "Hel\"lo" }));
ASSERT_EQ(lexer.ingest(), (Token { TokenType::Enter, "\n" }));
ASSERT_EQ(lexer.ingest(), (Token { TokenType::EOF_ }));
ASSERT_EQ(lexer.ingest(), (Token { TokenType::EOF_ }));
ASSERT_EQ(lexer.ingest(), (Token { TokenType::EOF_ }));
lexer.reset();
ASSERT_EQ(lexer.ingest(), (Token { TokenType::Directive, ".dir" }));
}
TEST(Assember, Assembler)
{
BytecodePrototype bp;
bp.constants.emplace_back(3.14f);
bp.constants.emplace_back("Hello world");
bp.functions.emplace_back(0, 0);
bp.functions.at(0).code.append_byte((uint8_t) Instruction::PushInt8);
bp.functions.at(0).code.append_int8(2);
bp.functions.at(0).code.append_byte((uint8_t) Instruction::PushInt8);
bp.functions.at(0).code.append_int8(3);
bp.functions.at(0).code.append_byte((uint8_t) Instruction::Sum);
bp.functions.at(0).code.append_byte((uint8_t) Instruction::Return);
bp.functions.emplace_back(0, 0);
bp.functions.at(1).code.append_byte((uint8_t) Instruction::PushInt16);
bp.functions.at(1).code.append_int16(5000);
bp.functions.at(1).code.append_byte((uint8_t) Instruction::Return);
ByteArray expected = Bytecode::generate(bp);
std::string src = R"(
.const
0: 3.14
1: "Hello world"
.func 0
pushi 2 ; this is a comment
pushi 3
sum
ret
.func 1
pushi 5000
ret
)";
ByteArray actual = Assembler(src).assemble();
ASSERT_EQ(expected, actual);
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

View File

@@ -0,0 +1,15 @@
#ifndef TYCHE_BC_EXCEPTIONS_HH
#define TYCHE_BC_EXCEPTIONS_HH
#include <stdexcept>
namespace tyche::bc {
class BytecodeParsingError : public std::runtime_error {
public:
explicit BytecodeParsingError(std::string const& str) : std::runtime_error(str.c_str()) {}
};
}
#endif //TYCHE_BC_EXCEPTIONS_HH

166
src/bytecode/bytecode.cc Normal file
View File

@@ -0,0 +1,166 @@
#include "bytecode.hh"
#include "bc_exceptions.hh"
#include "../common/overloaded.hh"
namespace tyche::bc {
Bytecode::Bytecode(ByteArray ba)
: byte_array_(std::move(ba))
{
// check file size
if (byte_array_.size() < (TOC_START + TOC_SZ))
throw BytecodeParsingError("Invalid bytecode format (file too short)");
// check magic number and version
if (byte_array_.get_uint32(0) != MAGIC_NUMBER)
throw BytecodeParsingError("Invalid bytecode format (magic number not matching)");
if (byte_array_.get_uint32(4) != BYTECODE_VERSION)
throw BytecodeParsingError("Unexpected bytecode format version");
// load cache
cache_.constants_idx_addr = byte_array_.get_uint32(TOC_START);
cache_.n_constants = byte_array_.get_uint16(TOC_START + 4);
cache_.functions_idx_addr = byte_array_.get_uint32(TOC_START + (1 * TOC_RECORD_SZ));
cache_.n_functions = byte_array_.get_uint16(TOC_START + (1 * TOC_RECORD_SZ) + 4);
cache_.constants_start_addr = byte_array_.get_uint32(TOC_START + (2 * TOC_RECORD_SZ));
uint32_t code_start = byte_array_.get_uint32(TOC_START + (3 * TOC_RECORD_SZ));
for (uint32_t i = 0; i < cache_.n_functions; ++i) {
cache_.function_addr.emplace_back(code_start + byte_array_.get_uint32(cache_.functions_idx_addr + (i * FUNCTION_RECORD_SZ)));
cache_.function_sz.emplace_back(byte_array_.get_uint32(cache_.functions_idx_addr + (i * FUNCTION_RECORD_SZ) + 8));
}
}
uint32_t Bytecode::n_constants() const
{
return cache_.n_constants;
}
uint32_t Bytecode::n_functions() const
{
return cache_.n_functions;
}
ConstantValue Bytecode::get_constant(uint32_t idx) const
{
uint32_t constant_idx = byte_array_.get_uint32(cache_.constants_idx_addr + (idx * CONST_RECORD_SZ));
switch ((ConstantType) byte_array_.get_byte(cache_.constants_start_addr + constant_idx)) {
case CONST_TYPE_FLOAT:
return byte_array_.get_float(cache_.constants_start_addr + constant_idx + 1);
case CONST_TYPE_STRING:
return byte_array_.get_string(cache_.constants_start_addr + constant_idx + 1).first;
default:
throw BytecodeParsingError("Invalid bytecode format (invalid constant type)");
}
}
Bytecode::FunctionDef Bytecode::get_function_def(uint32_t function_id) const
{
uint32_t idx = cache_.functions_idx_addr + (function_id * FUNCTION_RECORD_SZ);
return {
.n_params = byte_array_.get_uint16(idx + 4),
.locals = byte_array_.get_uint16(idx + 6),
};
}
uint32_t Bytecode::get_function_sz(uint32_t function_id) const
{
return cache_.function_sz.at(function_id);
}
uint8_t Bytecode::get_code_byte(uint32_t function_id, uint32_t idx) const
{
return byte_array_.get_byte(cache_.function_addr.at(function_id) + idx);
}
int8_t Bytecode::get_code_int8(uint32_t function_id, uint32_t idx) const
{
return byte_array_.get_int8(cache_.function_addr.at(function_id) + idx);
}
int16_t Bytecode::get_code_int16(uint32_t function_id, uint32_t idx) const
{
return byte_array_.get_int16(cache_.function_addr.at(function_id) + idx);
}
int32_t Bytecode::get_code_int32(uint32_t function_id, uint32_t idx) const
{
return byte_array_.get_int32(cache_.function_addr.at(function_id) + idx);
}
ByteArray Bytecode::generate(BytecodePrototype const& bp)
{
// header section
ByteArray header;
header.set_uint32(0, MAGIC_NUMBER);
header.set_byte(4, BYTECODE_VERSION);
// constants
ByteArray constant_indexes;
ByteArray raw_constants;
uint32_t idx = 0;
for (auto const& constant: bp.constants) {
constant_indexes.append_uint32(idx);
std::visit(overloaded {
[&](float f) {
raw_constants.append_byte(CONST_TYPE_FLOAT);
raw_constants.append_float(f);
},
[&](std::string const& s) {
raw_constants.append_byte(CONST_TYPE_STRING);
raw_constants.append_string(s);
},
}, constant);
idx = raw_constants.size();
}
// functions
ByteArray functions_indexes;
ByteArray raw_code;
uint32_t idx_idx = 0, code_idx = 0;
for (auto const& f: bp.functions) {
functions_indexes.set_uint32(idx_idx, code_idx);
functions_indexes.set_uint16(idx_idx + 4, f.n_pars);
functions_indexes.set_uint16(idx_idx + 6, f.n_locals);
functions_indexes.set_uint32(idx_idx + 8, f.code.size());
raw_code.append_bytearray(f.code);
code_idx = raw_code.size();
idx_idx += FUNCTION_RECORD_SZ;
}
// table of contents
uint32_t function_idx_start = CONST_IDX_START + constant_indexes.size();
uint32_t raw_constant_start = function_idx_start + functions_indexes.size();
uint32_t raw_code_start = raw_constant_start + raw_constants.size();
ByteArray toc;
if (!bp.constants.empty()) {
toc.set_uint32(SEC_CONST_IDX * TOC_RECORD_SZ, CONST_IDX_START);
toc.set_uint32(SEC_CONST_IDX * TOC_RECORD_SZ + 4, constant_indexes.size() / CONST_RECORD_SZ);
toc.set_uint32(SEC_CONST_DATA * TOC_RECORD_SZ, raw_constant_start);
toc.set_uint32(SEC_CONST_DATA * TOC_RECORD_SZ + 4, raw_constants.size());
}
if (!bp.functions.empty()) {
toc.set_uint32(SEC_FUNC_IDX * TOC_RECORD_SZ, function_idx_start);
toc.set_uint32(SEC_FUNC_IDX * TOC_RECORD_SZ + 4, functions_indexes.size() / FUNCTION_RECORD_SZ);
toc.set_uint32(SEC_CODE * TOC_RECORD_SZ, raw_code_start);
toc.set_uint32(SEC_CODE * TOC_RECORD_SZ + 4, raw_code.size());
}
//
// assemble bytecode
//
ByteArray ba;
ba.set_bytearray(0, header);
ba.set_bytearray(TOC_START, toc);
ba.set_bytearray(CONST_IDX_START, constant_indexes);
ba.set_bytearray(function_idx_start, functions_indexes);
ba.set_bytearray(raw_constant_start, raw_constants);
ba.set_bytearray(raw_code_start, raw_code);
return ba;
}
}

62
src/bytecode/bytecode.hh Normal file
View File

@@ -0,0 +1,62 @@
#ifndef TYCHE_BYTECODE_HH
#define TYCHE_BYTECODE_HH
#include "../common/bytearray.hh"
#include "bytecodeprototype.hh"
namespace tyche::bc {
class Bytecode {
public:
Bytecode() = default;
explicit Bytecode(ByteArray ba);
[[nodiscard]] uint32_t n_constants() const;
[[nodiscard]] uint32_t n_functions() const;
[[nodiscard]] ConstantValue get_constant(uint32_t idx) const;
struct FunctionDef { uint16_t n_params, locals; };
[[nodiscard]] FunctionDef get_function_def(uint32_t function_id) const;
[[nodiscard]] uint32_t get_function_sz(uint32_t function_id) const;
[[nodiscard]] uint8_t get_code_byte(uint32_t function_id, uint32_t idx) const;
[[nodiscard]] int8_t get_code_int8(uint32_t function_id, uint32_t idx) const;
[[nodiscard]] int16_t get_code_int16(uint32_t function_id, uint32_t idx) const;
[[nodiscard]] int32_t get_code_int32(uint32_t function_id, uint32_t idx) const;
// TODO - debugging info
[[nodiscard]] static ByteArray generate(BytecodePrototype const& bp);
private:
ByteArray byte_array_; // the actual data
static constexpr uint8_t BYTECODE_VERSION = 1;
static constexpr uint32_t MAGIC_NUMBER = 0x74b3c138;
static constexpr uint32_t TOC_START = 16,
TOC_N_RECORDS = 8,
TOC_RECORD_SZ = 8,
TOC_SZ = TOC_N_RECORDS * TOC_RECORD_SZ;
static constexpr uint32_t CONST_IDX_START = TOC_START + TOC_SZ,
CONST_RECORD_SZ = 4;
static constexpr uint32_t FUNCTION_RECORD_SZ = 12;
enum Sections { SEC_CONST_IDX = 0, SEC_FUNC_IDX = 1, SEC_CONST_DATA = 2, SEC_CODE = 3 };
// caching for faster reading of data
struct Cache {
uint32_t constants_idx_addr;
uint16_t n_constants;
uint32_t constants_start_addr;
uint32_t functions_idx_addr;
uint32_t n_functions;
std::vector<uint32_t> function_addr;
std::vector<uint32_t> function_sz;
};
Cache cache_ {};
};
}
#endif //TYCHE_BYTECODE_HH

View File

@@ -0,0 +1,30 @@
#ifndef TYCHE_BYTECODEPROTOTYPE_HH
#define TYCHE_BYTECODEPROTOTYPE_HH
#include <cstdint>
#include <string>
#include <variant>
#include <vector>
#include "constant.hh"
#include "../common/bytearray.hh"
namespace tyche::bc {
struct BytecodePrototype {
struct Function {
uint16_t n_pars;
uint16_t n_locals;
ByteArray code {};
Function(uint16_t n_pars_, uint16_t n_locals_) : n_pars(n_pars_), n_locals(n_locals_), code(ByteArray {}) {}
};
std::vector<ConstantValue> constants {};
std::vector<Function> functions {};
// TODO - debugging info
};
}
#endif //TYCHE_BYTECODEPROTOTYPE_HH

15
src/bytecode/constant.hh Normal file
View File

@@ -0,0 +1,15 @@
#ifndef TYCHE_CONSTANT_HH
#define TYCHE_CONSTANT_HH
#include <string>
#include <variant>
namespace tyche::bc {
using ConstantValue = std::variant<float, std::string>;
enum ConstantType : uint8_t { CONST_TYPE_FLOAT = 1, CONST_TYPE_STRING = 2 };
}
#endif //TYCHE_CONSTANT_HH

167
src/bytecode/tests.cc Normal file
View File

@@ -0,0 +1,167 @@
#include "gtest/gtest.h"
#include <cstring>
#include <functional>
#include "../common/bytearray.hh"
#include "bytecodeprototype.hh"
#include "bytecode.hh"
using namespace tyche;
using namespace tyche::bc;
TEST(ByteArray, ByteArray)
{
auto test = [](std::function<void(ByteArray&)> const& f, std::vector<uint8_t> const& expected) {
ByteArray ba;
f(ba);
ASSERT_EQ(ba.data().size(), expected.size());
ASSERT_EQ(std::memcmp(ba.data().data(), expected.data(), ba.data().size()), 0);
};
#define TESTX(a, ...) test([](ByteArray& ba) { a; }, std::vector<uint8_t>({ __VA_ARGS__ }));
TESTX(ba.set_byte(1, 0xab), 0x00, 0xab)
ByteArray ba;
ba.set_byte(1, 0xab); ASSERT_EQ(ba.get_byte(1), 0xab);
ba.set_int8(1, 12); ASSERT_EQ(ba.get_int8(1), 12);
ba.set_int8(1, -12); ASSERT_EQ(ba.get_int8(1), -12);
ba.set_int16(1, 5000); ASSERT_EQ(ba.get_int16(1), 5000);
ba.set_int32(1, 5000300); ASSERT_EQ(ba.get_int32(1), 5000300);
ba.set_int32(1, -5000300); ASSERT_EQ(ba.get_int32(1), -5000300);
ba.set_float(1, 3.14); ASSERT_FLOAT_EQ(ba.get_float(1), 3.14);
ba.set_float(1, -3.14); ASSERT_FLOAT_EQ(ba.get_float(1), -3.14);
ba.set_float(1, -5000300.1324); ASSERT_FLOAT_EQ(ba.get_float(1), -5000300.1324);
ba.set_string(1, "Hello world!"); ASSERT_EQ(ba.get_string(1), std::make_pair("Hello world!", 13));
#undef TESTX
}
TEST(Bytecode, Constants)
{
BytecodePrototype bp;
bp.constants.emplace_back(42.3f);
bp.constants.emplace_back("HELLO");
std::vector<uint8_t> expected = {
// header
0x38, 0xc1, 0xb3, 0x74, // magic
0x01, 0x00, 0x00, 0x00, // version
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
// index
0x50, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, // constant index
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // function undex
0x58, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, // raw constants
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // raw code
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// constant indexes
0x00, 0x00, 0x00, 0x00,
0x05, 0x00, 0x00, 0x00,
// constant values
CONST_TYPE_FLOAT, 0x33, 0x33, 0x29, 0x42, // float: 42.3f
CONST_TYPE_STRING, 'H', 'E', 'L', 'L', 'O', 0x00
};
ByteArray ba = Bytecode::generate(bp);
ASSERT_EQ(ba.data(), expected);
}
TEST(Bytecode, Code)
{
BytecodePrototype bp;
auto& f = bp.functions.emplace_back(0, 0);
f.code.append_byte(0x68);
f.code.append_int8(42);
auto& f2 = bp.functions.emplace_back(2, 1);
f2.code.append_byte(0x42);
std::vector<uint8_t> expected = {
// header
0x38, 0xc1, 0xb3, 0x74, // magic
0x01, 0x00, 0x00, 0x00, // version
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
// index
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // constant index
0x50, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, // variable index
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // raw constants
0x68, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, // raw code
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// function definitions
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00,
// code
0x68, 42, 0x42,
};
ByteArray ba = Bytecode::generate(bp);
ASSERT_EQ(ba.data(), expected);
}
TEST(Bytecode, Parsing)
{
// write bytecode
BytecodePrototype bp;
bp.constants.emplace_back(3.14f);
bp.constants.emplace_back("HELLO");
auto& f = bp.functions.emplace_back(0, 0);
f.code.append_byte(0x68);
f.code.append_int8(42);
auto& ff = bp.functions.emplace_back(2, 1);
ff.code.append_byte(0x42);
ByteArray ba = Bytecode::generate(bp);
// print(ba.data());
// read bytecode
Bytecode bc(std::move(ba));
ASSERT_EQ(bc.n_constants(), 2);
ASSERT_EQ(bc.n_functions(), 2);
ASSERT_EQ(bc.get_function_sz(0), 2);
ASSERT_EQ(bc.get_function_sz(1), 1);
ASSERT_FLOAT_EQ(std::get<float>(bc.get_constant(0)), 3.14f);
ASSERT_EQ(std::get<std::string>(bc.get_constant(1)), "HELLO");
Bytecode::FunctionDef f1 = bc.get_function_def(0);
ASSERT_EQ(f1.n_params, 0);
ASSERT_EQ(f1.locals, 0);
Bytecode::FunctionDef f2 = bc.get_function_def(1);
ASSERT_EQ(f2.n_params, 2);
ASSERT_EQ(f2.locals, 1);
ASSERT_EQ(bc.get_code_byte(0, 0), 0x68);
ASSERT_EQ(bc.get_code_int8(0, 1), 42);
ASSERT_EQ(bc.get_code_byte(1, 0), 0x42);
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

152
src/common/bytearray.cc Normal file
View File

@@ -0,0 +1,152 @@
#include "bytearray.hh"
#include <cstring>
#include <cstdio>
namespace tyche {
void ByteArray::set_byte(uint32_t addr, uint8_t byte)
{
if (data_.size() < (addr + 1))
data_.resize(addr + 1, 0);
data_.at(addr) = byte;
}
void ByteArray::set_int8(uint32_t addr, int8_t value)
{
set_byte(addr, (uint8_t) value);
}
void ByteArray::set_int16(uint32_t addr, int16_t value)
{
set_byte(addr, (uint8_t) (value));
set_byte(addr+1, (uint8_t) (value >> 8));
}
void ByteArray::set_int32(uint32_t addr, int32_t value)
{
set_byte(addr, (uint8_t) (value));
set_byte(addr+1, (uint8_t) (value >> 8));
set_byte(addr+2, (uint8_t) (value >> 16));
set_byte(addr+3, (uint8_t) (value >> 24));
}
void ByteArray::set_uint16(uint32_t addr, uint16_t value)
{
set_byte(addr, (uint8_t) (value));
set_byte(addr+1, (uint8_t) (value >> 8));
}
void ByteArray::set_uint32(uint32_t addr, uint32_t value)
{
set_byte(addr, (uint8_t) (value));
set_byte(addr+1, (uint8_t) (value >> 8));
set_byte(addr+2, (uint8_t) (value >> 16));
set_byte(addr+3, (uint8_t) (value >> 24));
}
void ByteArray::set_float(uint32_t addr, float value)
{
uint32_t bits;
std::memcpy(&bits, &value, 4);
set_byte(addr, (uint8_t) (bits));
set_byte(addr+1, (uint8_t) (bits >> 8));
set_byte(addr+2, (uint8_t) (bits >> 16));
set_byte(addr+3, (uint8_t) (bits >> 24));
}
void ByteArray::set_string(uint32_t addr, std::string const& str)
{
for (uint8_t c: str)
set_byte(addr++, c);
set_byte(addr, 0);
}
void ByteArray::set_bytearray(uint32_t addr, ByteArray const& bytearray)
{
for (uint8_t byte: bytearray.data())
set_byte(addr++, byte);
}
uint8_t ByteArray::get_byte(uint32_t addr) const
{
return data_.at(addr);
}
uint16_t ByteArray::get_uint16(uint32_t addr) const
{
return (uint32_t) get_byte(addr)
| (uint32_t) get_byte(addr+1) << 8;
}
uint32_t ByteArray::get_uint32(uint32_t addr) const
{
return (uint32_t) get_byte(addr)
| (uint32_t) get_byte(addr+1) << 8
| (uint32_t) get_byte(addr+2) << 16
| (uint32_t) get_byte(addr+3) << 24;
}
int8_t ByteArray::get_int8(uint32_t addr) const
{
return std::bit_cast<int8_t>(get_byte(addr));
}
int16_t ByteArray::get_int16(uint32_t addr) const
{
return (uint16_t) get_byte(addr)
| (uint16_t) get_byte(addr+1) << 8;
}
int32_t ByteArray::get_int32(uint32_t addr) const
{
return std::bit_cast<int32_t>((uint32_t) get_byte(addr)
| (uint32_t) get_byte(addr+1) << 8
| (uint32_t) get_byte(addr+2) << 16
| (uint32_t) get_byte(addr+3) << 24);
}
float ByteArray::get_float(uint32_t addr) const
{
uint32_t bits = (uint32_t) get_byte(addr)
| (uint32_t) get_byte(addr+1) << 8
| (uint32_t) get_byte(addr+2) << 16
| (uint32_t) get_byte(addr+3) << 24;
float value;
std::memcpy(&value, &bits, 4);
return value;
}
std::pair<std::string, size_t> ByteArray::get_string(uint32_t addr) const
{
std::string data;
while (char c = (char) get_byte(addr++))
data += c;
return { data, data.size() + 1 };
}
void ByteArray::append_bytearray(ByteArray const& bytearray)
{
data_.insert(data_.end(), bytearray.data().begin(), bytearray.data().end());
}
std::string ByteArray::hexdump() const
{
auto to_hex = [](uint32_t value, size_t n_chars) -> std::string {
char buf[15];
snprintf(buf, sizeof buf, "%0*X", (int) n_chars, value);
return { buf };
};
std::string out;
for (size_t i = 0; i < data_.size(); ++i) {
if (i % 16 == 0)
out += to_hex(i, 4) + " | ";
out += to_hex(data_.at(i), 2) + " ";
if (i % 16 == 15)
out += "\n";
}
return out + "\n";
}
}

58
src/common/bytearray.hh Normal file
View File

@@ -0,0 +1,58 @@
#ifndef TYCHE_BYTEARRAY_HH
#define TYCHE_BYTEARRAY_HH
#include <cstdint>
#include <stdexcept>
#include <string>
#include <vector>
namespace tyche {
class ByteArray {
public:
ByteArray() = default;
explicit ByteArray(std::vector<uint8_t> data) : data_(std::move(data)) {}
void set_byte(uint32_t addr, uint8_t byte);
void set_uint16(uint32_t addr, uint16_t value);
void set_uint32(uint32_t addr, uint32_t value);
void set_int8(uint32_t addr, int8_t value);
void set_int16(uint32_t addr, int16_t value);
void set_int32(uint32_t addr, int32_t value);
void set_float(uint32_t addr, float value);
void set_string(uint32_t addr, std::string const& str);
void set_bytearray(uint32_t addr, ByteArray const& bytearray);
void append_byte(uint8_t byte) { set_byte(data_.size(), byte); }
void append_uint16(uint16_t value) { set_uint16(data_.size(), value); }
void append_uint32(uint32_t value) { set_uint32(data_.size(), value); }
void append_int8(int8_t value) { set_int8(data_.size(), value); }
void append_int16(int16_t value) { set_int16(data_.size(), value); }
void append_int32(int32_t value) { set_int32(data_.size(), value); }
void append_float(float value) { set_float(data_.size(), value); }
void append_string(std::string const& str) { set_string(data_.size(), str); }
void append_bytearray(ByteArray const& bytearray);
[[nodiscard]] uint8_t get_byte(uint32_t addr) const;
[[nodiscard]] uint16_t get_uint16(uint32_t addr) const;
[[nodiscard]] uint32_t get_uint32(uint32_t addr) const;
[[nodiscard]] int8_t get_int8(uint32_t addr) const;
[[nodiscard]] int16_t get_int16(uint32_t addr) const;
[[nodiscard]] int32_t get_int32(uint32_t addr) const;
[[nodiscard]] float get_float(uint32_t addr) const;
[[nodiscard]] std::pair<std::string, size_t> get_string(uint32_t addr) const;
[[nodiscard]] std::vector<uint8_t> const& data() const { return data_; }
[[nodiscard]] size_t size() const { return data_.size(); }
[[nodiscard]] std::string hexdump() const;
friend bool operator==(ByteArray const& lhs, ByteArray const& rhs) { return lhs.data_ == rhs.data_; }
private:
std::vector<uint8_t> data_ {};
};
}
#endif //TYCHE_BYTEARRAY_HH

8
src/common/overloaded.hh Normal file
View File

@@ -0,0 +1,8 @@
#ifndef TYCHE_OVERLOADED_HH
#define TYCHE_OVERLOADED_HH
// used by std::visitor
template<class... Ts>
struct overloaded : Ts... { using Ts::operator()...; };
#endif //TYCHE_OVERLOADED_HH

82
src/vm/code.cc Normal file
View File

@@ -0,0 +1,82 @@
#include "code.hh"
#include "../common/overloaded.hh"
#include "instruction.hh"
namespace tyche::vm {
FunctionId Code::import_bytecode(ByteArray incoming)
{
bc::Bytecode bc(std::move(incoming));
// TODO - adjust function calls, constants
bytecode_ = std::move(bc);
return 0; // TODO
}
Operation Code::operation(Location const& location) const
{
Instruction inst = (Instruction) bytecode_.get_code_byte(location.function_id, location.pc);
OperandType opet = instruction_operand_type(inst);
switch (opet) {
case OperandType::NoOperand:
return {
.instruction = inst,
.operator_ = 0,
.next_location = { .function_id = location.function_id, .pc = location.pc + 1 },
};
case OperandType::Int8:
return {
.instruction = inst,
.operator_ = bytecode_.get_code_int8(location.function_id, location.pc + 1),
.next_location = { .function_id = location.function_id, .pc = location.pc + 2 },
};
case OperandType::Int16:
return {
.instruction = inst,
.operator_ = bytecode_.get_code_int16(location.function_id, location.pc + 1),
.next_location = { .function_id = location.function_id, .pc = location.pc + 3 },
};
case OperandType::Int32:
return {
.instruction = inst,
.operator_ = bytecode_.get_code_int32(location.function_id, location.pc + 1),
.next_location = { .function_id = location.function_id, .pc = location.pc + 5 },
};
default:
break;
}
throw std::logic_error("Should not get here");
}
std::string Code::disassemble() const
{
std::string out;
out += ".const\n";
for (size_t i = 0; i < bytecode_.n_constants(); ++i) {
out += "\t" + std::to_string(i) + ": ";
std::visit(overloaded {
[&out](float f) { out += std::to_string(f); },
[&out](std::string const& str) { out += "\"" + str + "\""; },
}, bytecode_.get_constant(i));
out += "\n";
}
out += "\n";
for (size_t i = 0; i < bytecode_.n_functions(); ++i) {
out += ".func " + std::to_string(i) + "\n";
uint32_t addr = 0;
while (addr < bytecode_.get_function_sz(i)) {
auto [op, sz] = debug_instruction(bytecode_, i, addr);
out += "\t" + op + "\n";
addr += sz;
}
}
return out;
}
} // tyche

34
src/vm/code.hh Normal file
View File

@@ -0,0 +1,34 @@
#ifndef TYCHE_CODE_HH
#define TYCHE_CODE_HH
#include "instruction.hh"
#include "location.hh"
#include "value.hh"
#include "../bytecode/bytecode.hh"
namespace tyche::vm {
struct Operation
{
Instruction instruction;
int32_t operator_;
Location next_location;
};
class Code {
public:
FunctionId import_bytecode(ByteArray incoming);
[[nodiscard]] std::string disassemble() const;
[[nodiscard]] Operation operation(Location const& location) const;
[[nodiscard]] bc::Bytecode const& bytecode() const { return bytecode_; }
private:
bc::Bytecode bytecode_;
};
} // tyche
#endif //TYCHE_CODE_HH

114
src/vm/expr.cc Normal file
View File

@@ -0,0 +1,114 @@
#include "expr.hh"
#include <cmath>
#include <functional>
#include "vm_exceptions.hh"
namespace tyche::vm {
std::function<Value(Value const&, Value const&)> binary_ops[(size_t) BinaryOperationType::COUNT][(size_t) Type::COUNT][(size_t) Type::COUNT];
static int init_ = []() {
// every combination, except when explicit, return type error
for (size_t i = 0; i < (size_t) BinaryOperationType::COUNT; ++i) {
for (size_t j = 0; j < (size_t) Type::COUNT; ++j) {
for (size_t k = 0; k < (size_t) Type::COUNT; ++k) {
binary_ops[i][j][k] = [&i](Value const& a, Value const& b) -> Value {
throw VMInvalidOperation((BinaryOperationType) i, a.type(), b.type());
};
}
}
}
// every equality/inequality, by default, return inequal
for (size_t j = 0; j < (size_t) Type::COUNT; ++j) {
for (size_t k = 0; k < (size_t) Type::COUNT; ++k) {
binary_ops[(size_t) BinaryOperationType::Equality][j][k] = [](Value const&, Value const&) { return Value::createFalse(); };
binary_ops[(size_t) BinaryOperationType::Inequality][j][k] = [](Value const&, Value const&) { return Value::createTrue(); };
}
}
#define BIN_OP(op, t1, t2) binary_ops[(size_t) BinaryOperationType::op][(size_t) Type::t1][(size_t) Type::t2] = [](Value const& b, Value const& a)
BIN_OP(Sum, Integer, Integer) { return Value::createInteger(a.as_integer() + b.as_integer()); };
BIN_OP(Sum, Integer, Float) { return Value::createFloat((float) a.as_integer() + b.as_float()); };
BIN_OP(Sum, Float, Integer) { return Value::createFloat(a.as_float() + (float) b.as_integer()); };
BIN_OP(Sum, Float, Float) { return Value::createFloat(a.as_float() + b.as_float()); };
BIN_OP(Sum, String, String) { return Value::createString(a.as_string() + b.as_string()); };
BIN_OP(Subtraction, Integer, Integer) { return Value::createInteger(a.as_integer() - b.as_integer()); };
BIN_OP(Subtraction, Integer, Float) { return Value::createFloat((float) a.as_integer() - b.as_float()); };
BIN_OP(Subtraction, Float, Integer) { return Value::createFloat(a.as_float() - (float) b.as_integer()); };
BIN_OP(Subtraction, Float, Float) { return Value::createFloat(a.as_float() - b.as_float()); };
BIN_OP(Multiplication, Integer, Integer) { return Value::createInteger(a.as_integer() * b.as_integer()); };
BIN_OP(Multiplication, Integer, Float) { return Value::createFloat((float) a.as_integer() * b.as_float()); };
BIN_OP(Multiplication, Float, Integer) { return Value::createFloat(a.as_float() * (float) b.as_integer()); };
BIN_OP(Multiplication, Float, Float) { return Value::createFloat(a.as_float() * b.as_float()); };
BIN_OP(Division, Integer, Integer) { return Value::createFloat((float) a.as_integer() / (float) b.as_integer()); };
BIN_OP(Division, Integer, Float) { return Value::createFloat((float) a.as_integer() / b.as_float()); };
BIN_OP(Division, Float, Integer) { return Value::createFloat(a.as_float() / (float) b.as_integer()); };
BIN_OP(Division, Float, Float) { return Value::createFloat(a.as_float() / b.as_float()); };
BIN_OP(IntegerDivision, Integer, Integer) { return Value::createInteger(a.as_integer() / b.as_integer()); };
BIN_OP(IntegerDivision, Integer, Float) { return Value::createInteger(a.as_integer() / (int32_t) b.as_float()); };
BIN_OP(IntegerDivision, Float, Integer) { return Value::createInteger((int32_t) a.as_float() / b.as_integer()); };
BIN_OP(IntegerDivision, Float, Float) { return Value::createInteger((int32_t) a.as_float() / (int32_t) b.as_float()); };
BIN_OP(Equality, Integer, Integer) { return Value::createIntegerFromBool(a.as_integer() == b.as_integer()); };
BIN_OP(Equality, Integer, Float) { return Value::createIntegerFromBool(std::abs((float) a.as_integer() - b.as_float()) < FLOAT_EPSILON); };
BIN_OP(Equality, Float, Integer) { return Value::createIntegerFromBool(std::abs(a.as_float() - (float) b.as_integer()) < FLOAT_EPSILON); };
BIN_OP(Equality, Float, Float) { return Value::createIntegerFromBool(std::abs(a.as_float() - b.as_float()) < FLOAT_EPSILON); };
BIN_OP(Equality, String, String) { return Value::createIntegerFromBool(a.as_string() == b.as_string()); };
BIN_OP(Inequality, Integer, Integer) { return Value::createIntegerFromBool(a.as_integer() != b.as_integer()); };
BIN_OP(Inequality, Integer, Float) { return Value::createIntegerFromBool(std::abs((float) a.as_integer() - b.as_float()) >= FLOAT_EPSILON); };
BIN_OP(Inequality, Float, Integer) { return Value::createIntegerFromBool(std::abs(a.as_float() - (float) b.as_integer()) >= FLOAT_EPSILON); };
BIN_OP(Inequality, Float, Float) { return Value::createIntegerFromBool(std::abs(a.as_float() - b.as_float()) >= FLOAT_EPSILON); };
BIN_OP(Inequality, String, String) { return Value::createIntegerFromBool(a.as_string() != b.as_string()); };
BIN_OP(LessThan, Integer, Integer) { return Value::createIntegerFromBool(a.as_integer() < b.as_integer()); };
BIN_OP(LessThan, Integer, Float) { return Value::createIntegerFromBool((float) a.as_integer() < b.as_float()); };
BIN_OP(LessThan, Float, Integer) { return Value::createIntegerFromBool(a.as_float() < (float) b.as_integer()); };
BIN_OP(LessThan, Float, Float) { return Value::createIntegerFromBool(a.as_float() < b.as_float()); };
BIN_OP(LessThanOrEquals, Integer, Integer) { return Value::createIntegerFromBool(a.as_integer() <= b.as_integer()); };
BIN_OP(LessThanOrEquals, Integer, Float) { return Value::createIntegerFromBool((float) a.as_integer() <= b.as_float()); };
BIN_OP(LessThanOrEquals, Float, Integer) { return Value::createIntegerFromBool(a.as_float() <= (float) b.as_integer()); };
BIN_OP(LessThanOrEquals, Float, Float) { return Value::createIntegerFromBool(a.as_float() <= b.as_float()); };
BIN_OP(GreaterThan, Integer, Integer) { return Value::createIntegerFromBool(a.as_integer() > b.as_integer()); };
BIN_OP(GreaterThan, Integer, Float) { return Value::createIntegerFromBool((float) a.as_integer() > b.as_float()); };
BIN_OP(GreaterThan, Float, Integer) { return Value::createIntegerFromBool(a.as_float() > (float) b.as_integer()); };
BIN_OP(GreaterThan, Float, Float) { return Value::createIntegerFromBool(a.as_float() > b.as_float()); };
BIN_OP(GreaterThanOrEquals, Integer, Integer) { return Value::createIntegerFromBool(a.as_integer() >= b.as_integer()); };
BIN_OP(GreaterThanOrEquals, Integer, Float) { return Value::createIntegerFromBool((float) a.as_integer() >= b.as_float()); };
BIN_OP(GreaterThanOrEquals, Float, Integer) { return Value::createIntegerFromBool(a.as_float() >= (float) b.as_integer()); };
BIN_OP(GreaterThanOrEquals, Float, Float) { return Value::createIntegerFromBool(a.as_float() >= b.as_float()); };
BIN_OP(Power, Integer, Integer) { return Value::createInteger((int32_t) powl(a.as_integer(), b.as_integer())); };
BIN_OP(Power, Integer, Float) { return Value::createFloat(powf((float) a.as_integer(), b.as_float())); };
BIN_OP(Power, Float, Integer) { return Value::createFloat(powf(a.as_float(), (float) b.as_integer())); };
BIN_OP(Power, Float, Float) { return Value::createFloat(powf(a.as_float(), b.as_float())); };
BIN_OP(Modulo, Integer, Integer) { return Value::createInteger(a.as_integer() % b.as_integer()); };
BIN_OP(ShiftLeft, Integer, Integer) { return Value::createInteger(a.as_integer() << b.as_integer()); };
BIN_OP(ShiftRight, Integer, Integer) { return Value::createInteger(a.as_integer() >> b.as_integer()); };
BIN_OP(BitwiseAnd, Integer, Integer) { return Value::createInteger(a.as_integer() & b.as_integer()); };
BIN_OP(BitwiseOr, Integer, Integer) { return Value::createInteger(a.as_integer() | b.as_integer()); };
BIN_OP(BitwiseXor, Integer, Integer) { return Value::createInteger(a.as_integer() ^ b.as_integer()); };
#undef BIN_OP
return 0;
}();
Value binary_operation(Value const& a, Value const& b, BinaryOperationType op)
{
return binary_ops[(size_t) op][(size_t) b.type()][(size_t) a.type()](a, b);
}
}

21
src/vm/expr.hh Normal file
View File

@@ -0,0 +1,21 @@
#ifndef TYCHE_EXPR_HH
#define TYCHE_EXPR_HH
#include "value.hh"
namespace tyche::vm {
enum class BinaryOperationType
{
Sum, Subtraction, Multiplication, Division, IntegerDivision,
Equality, Inequality, LessThan, LessThanOrEquals,
GreaterThan, GreaterThanOrEquals, Power, Modulo,
BitwiseAnd, BitwiseOr, BitwiseXor, ShiftLeft, ShiftRight,
COUNT
};
constexpr float FLOAT_EPSILON = 0.000001f;
Value binary_operation(Value const& a, Value const& b, BinaryOperationType op);
}
#endif //TYCHE_EXPR_HH

244
src/vm/instruction.cc Normal file
View File

@@ -0,0 +1,244 @@
#include "instruction.hh"
#include <limits>
#include <unordered_map>
namespace tyche::vm {
const std::unordered_map<std::string, Instruction> instruction_names = {
{ "pushi", Instruction::PushInt8 },
{ "pushc", Instruction::PushConstant8 },
{ "pushz", Instruction::PushZero },
{ "pusht", Instruction::PushTrue },
{ "pushf", Instruction::PushFunction8 },
{ "newa", Instruction::NewArray },
{ "newt", Instruction::NewTable },
{ "pop", Instruction::Pop },
{ "dup", Instruction::Duplicate },
{ "pushv", Instruction::PushValues8 },
{ "set", Instruction::SetValue8 },
{ "dupv", Instruction::DuplicateValue8 },
{ "setg", Instruction::SetGlobal8 },
{ "getl", Instruction::GetGlobal8 },
{ "call8", Instruction::Call8 },
{ "ret", Instruction::Return },
{ "retn", Instruction::ReturnNil },
{ "getkv", Instruction::GetKeyValue },
{ "setkv", Instruction::SetKeyValue },
{ "geta", Instruction::GetArrayItem },
{ "seta", Instruction::SetArrayItem },
{ "appnd", Instruction::Append },
{ "next", Instruction::Next },
{ "smt", Instruction::SetMetatable },
{ "mt", Instruction::GetMetatable },
{ "sum", Instruction::Sum },
{ "sub", Instruction::Subtract },
{ "mul", Instruction::Multiply },
{ "div", Instruction::Divide },
{ "idiv", Instruction::DivideInt },
{ "eq", Instruction::Equals },
{ "neq", Instruction::NotEquals },
{ "lt", Instruction::LessThan },
{ "lte", Instruction::LessThanEq },
{ "gt", Instruction::GreaterThan },
{ "gte", Instruction::GreaterThanEq },
{ "and", Instruction::And },
{ "or", Instruction::Or },
{ "xor", Instruction::Xor },
{ "pow", Instruction::Power },
{ "shl", Instruction::ShiftLeft },
{ "shr", Instruction::ShiftRight },
{ "mod", Instruction::Modulo },
{ "len", Instruction::Len },
{ "type", Instruction::Type },
{ "cast", Instruction::Cast },
{ "ver", Instruction::Version },
{ "bz", Instruction::BranchIfZero8 },
{ "bnz", Instruction::BranchIfNotZero8 },
{ "jmp", Instruction::Jump8 },
{ "cmpl", Instruction::Compile },
{ "asmbl", Instruction::Assemble },
{ "load", Instruction::Load },
};
std::pair<std::string, size_t> debug_instruction(Instruction inst, int oper)
{
std::string out;
switch (inst) {
case Instruction::PushInt8:
case Instruction::PushInt16:
case Instruction::PushInt32:
out = "pushi";
break;
case Instruction::PushConstant8:
case Instruction::PushConstant16:
case Instruction::PushConstant32:
out = "pushc";
break;
case Instruction::PushFunction8:
case Instruction::PushFunction16:
case Instruction::PushFunction32:
out = "pushf";
break;
case Instruction::PushZero: out = "pushz"; break;
case Instruction::PushTrue: out = "pusht"; break;
case Instruction::NewArray: out = "newa"; break;
case Instruction::NewTable: out = "newt"; break;
case Instruction::Pop: out = "pop"; break;
case Instruction::Duplicate: out = "dup"; break;
case Instruction::PushValues8:
case Instruction::PushValues16:
case Instruction::PushValues32:
out = "pushv";
break;
case Instruction::SetValue8:
case Instruction::SetValue16:
case Instruction::SetValue32:
out = "set";
break;
case Instruction::DuplicateValue8:
case Instruction::DuplicateValue16:
case Instruction::DuplicateValue32:
out = "dupv";
break;
case Instruction::SetGlobal8:
case Instruction::SetGlobal16:
case Instruction::SetGlobal32:
out = "setg";
break;
case Instruction::GetGlobal8:
case Instruction::GetGlobal16:
case Instruction::GetGlobal32:
out = "getg";
break;
case Instruction::Call8:
case Instruction::Call16:
case Instruction::Call32:
out = "call";
break;
case Instruction::Return: out = "ret"; break;
case Instruction::ReturnNil: out = "retn"; break;
case Instruction::GetKeyValue: out = "getkv"; break;
case Instruction::SetKeyValue: out = "setkv"; break;
case Instruction::GetArrayItem: out = "geta"; break;
case Instruction::SetArrayItem: out = "seta"; break;
case Instruction::Append: out = "appnd"; break;
case Instruction::Next: out = "next"; break;
case Instruction::SetMetatable: out = "smt"; break;
case Instruction::GetMetatable: out = "mt"; break;
case Instruction::Sum: out = "sum"; break;
case Instruction::Subtract: out = "sub"; break;
case Instruction::Multiply: out = "mul"; break;
case Instruction::Divide: out = "div"; break;
case Instruction::DivideInt: out = "idiv"; break;
case Instruction::Equals: out = "eq"; break;
case Instruction::NotEquals: out = "neq"; break;
case Instruction::LessThan: out = "lt"; break;
case Instruction::LessThanEq: out = "lte"; break;
case Instruction::GreaterThan: out = "gt"; break;
case Instruction::GreaterThanEq: out = "gte"; break;
case Instruction::And: out = "and"; break;
case Instruction::Or: out = "or"; break;
case Instruction::Xor: out = "xor"; break;
case Instruction::Power: out = "pow"; break;
case Instruction::ShiftLeft: out = "shl"; break;
case Instruction::ShiftRight: out = "shr"; break;
case Instruction::Modulo: out = "mod"; break;
case Instruction::Len: out = "len"; break;
case Instruction::Type: out = "type"; break;
case Instruction::Cast: out = "cast"; break;
case Instruction::Version: out = "ver"; break;
case Instruction::BranchIfZero8:
case Instruction::BranchIfZero16:
case Instruction::BranchIfZero32:
out = "bz";
break;
case Instruction::BranchIfNotZero8:
case Instruction::BranchIfNotZero16:
case Instruction::BranchIfNotZero32:
out = "bnz";
break;
case Instruction::Jump8:
case Instruction::Jump16:
case Instruction::Jump32:
out = "jmp";
break;
case Instruction::Compile: out = "cmpl"; break;
case Instruction::Assemble: out = "asmbl"; break;
case Instruction::Load: out = "load"; break;
default:
out = "???";
}
OperandType operands = instruction_operand_type(inst);
if (operands == OperandType::NoOperand)
return { out, 1 };
out += " " + std::to_string(oper);
if (operands == OperandType::Int32)
return { out, 5 };
if (operands == OperandType::Int16)
return { out, 3 };
return { out, 2 };
}
std::pair<std::string, size_t> debug_instruction(bc::Bytecode const& bt, uint32_t function_id, uint32_t addr)
{
auto inst = (Instruction) bt.get_code_byte(function_id, addr);
switch (instruction_operand_type(inst)) {
case OperandType::NoOperand:
return debug_instruction(inst);
case OperandType::Int8:
return debug_instruction(inst, bt.get_code_int8(function_id, addr + 1));
case OperandType::Int16:
return debug_instruction(inst, bt.get_code_int16(function_id, addr + 1));
case OperandType::Int32:
return debug_instruction(inst, bt.get_code_int32(function_id, addr + 1));
default:
break;
}
return { "???", 1 };
}
OperandType instruction_operand_type(Instruction inst)
{
if ((uint8_t) inst >= 0xe0)
return OperandType::Int32;
if ((uint8_t) inst >= 0xc0)
return OperandType::Int16;
if ((uint8_t) inst >= 0xa0)
return OperandType::Int8;
return OperandType::NoOperand;
}
std::optional<Instruction> translate_instruction(std::string const& txt, std::optional<int> op)
{
auto it = instruction_names.find(txt);
if (it == instruction_names.end())
return {};
Instruction inst = it->second;
OperandType optype = instruction_operand_type(inst);
if (optype == OperandType::NoOperand && op)
return {};
if (optype != OperandType::NoOperand && !op)
return {};
if (optype == OperandType::NoOperand)
return inst;
if (op >= std::numeric_limits<int8_t>::min() && op <= std::numeric_limits<int8_t>::max())
return inst;
if (op >= std::numeric_limits<int16_t>::min() && op <= std::numeric_limits<int16_t>::max())
return (Instruction) ((uint8_t) inst + OPCODE_NEXT_SIZE);
return (Instruction) ((uint8_t) inst + (OPCODE_NEXT_SIZE * 2));
}
}

121
src/vm/instruction.hh Normal file
View File

@@ -0,0 +1,121 @@
#ifndef TYCHE_INSTRUCTION_HH
#define TYCHE_INSTRUCTION_HH
#include <cstdint>
#include <optional>
#include <string>
#include <utility>
#include "../bytecode/bytecode.hh"
namespace tyche::vm {
constexpr uint8_t OPCODE_NEXT_SIZE = 0x20;
enum class Instruction : uint8_t {
// stack operations
PushInt8 = 0xa0,
PushInt16 = 0xc0,
PushInt32 = 0xe0,
PushConstant8 = 0xa1,
PushConstant16 = 0xc1,
PushConstant32 = 0xe1,
PushFunction8 = 0xa2,
PushFunction16 = 0xc2,
PushFunction32 = 0xe2,
PushZero = 0x00,
PushTrue = 0x01,
NewArray = 0x02,
NewTable = 0x03,
Pop = 0x04,
Duplicate = 0x05,
// local variables
PushValues8 = 0xa3,
PushValues16 = 0xc3,
PushValues32 = 0xe3,
SetValue8 = 0xab,
SetValue16 = 0xcb,
SetValue32 = 0xeb,
DuplicateValue8 = 0xa4,
DuplicateValue16 = 0xc4,
DuplicateValue32 = 0xe4,
SetGlobal8 = 0xa5,
SetGlobal16 = 0xc5,
SetGlobal32 = 0xe5,
GetGlobal8 = 0xa6,
GetGlobal16 = 0xc6,
GetGlobal32 = 0xe6,
// function operations
Call8 = 0xa7,
Call16 = 0xc7,
Call32 = 0xe7,
Return = 0x10,
ReturnNil = 0x11,
// table and array operations
GetKeyValue = 0x16,
SetKeyValue = 0x17,
GetArrayItem = 0x18,
SetArrayItem = 0x19,
Append = 0x1a,
Next = 0x1b,
SetMetatable = 0x1c,
GetMetatable = 0x1d,
// logical/arithmetic
Sum = 0x20,
Subtract = 0x21,
Multiply = 0x22,
Divide = 0x23,
DivideInt = 0x24,
Equals = 0x25,
NotEquals = 0x26,
LessThan = 0x27,
LessThanEq = 0x28,
GreaterThan = 0x29,
GreaterThanEq = 0x2a,
And = 0x2b,
Or = 0x2c,
Xor = 0x2d,
Power = 0x2e,
ShiftLeft = 0x2f,
ShiftRight = 0x30,
Modulo = 0x31,
// other value operations
Len = 0x40,
Type = 0x41,
Cast = 0x42,
Version = 0x43,
// control flow
BranchIfZero8 = 0xa8,
BranchIfZero16 = 0xc8,
BranchIfZero32 = 0xe8,
BranchIfNotZero8 = 0xa9,
BranchIfNotZero16 = 0xc9,
BranchIfNotZero32 = 0xe9,
Jump8 = 0xaa,
Jump16 = 0xca,
Jump32 = 0xea,
// external code
Compile = 0x48,
Assemble = 0x49,
Load = 0x4a,
};
std::pair<std::string, size_t> debug_instruction(Instruction inst, int oper=0);
std::pair<std::string, size_t> debug_instruction(bc::Bytecode const& bt, uint32_t function_id, uint32_t addr);
enum class OperandType { NoOperand, Int8, Int16, Int32 };
OperandType instruction_operand_type(Instruction instruction);
std::optional<Instruction> translate_instruction(std::string const& txt, std::optional<int> op);
}
#endif //TYCHE_INSTRUCTION_HH

16
src/vm/location.hh Normal file
View File

@@ -0,0 +1,16 @@
#ifndef TYCHE_LOCATION_HH
#define TYCHE_LOCATION_HH
#include <cstdint>
namespace tyche::vm {
struct Location
{
uint32_t function_id;
uint32_t pc;
};
}
#endif //TYCHE_LOCATION_HH

95
src/vm/stack.cc Normal file
View File

@@ -0,0 +1,95 @@
#include "stack.hh"
#include "vm_exceptions.hh"
namespace tyche::vm {
Stack::Stack()
{
fps_.push(0);
}
void Stack::push(Value const& value)
{
stack_.push_back(value);
}
Value Stack::pop()
{
if (stack_.size() <= fps_.top())
throw VMStackUnderflow();
Value v = stack_.back();
stack_.pop_back();
return v;
}
Value Stack::peek() const
{
if (stack_.size() <= fps_.top())
throw VMStackUnderflow();
return stack_.back();
}
Value Stack::at(int pos) const
{
try {
if (pos >= 0) {
return stack_.at(fps_.top() + pos);
} else {
if ((int) fps_.top() + (int) stack_.size() + pos < 0)
throw VMStackOutOfRange();
return stack_.at(stack_.size() + pos);
}
} catch (std::out_of_range&) {
throw VMStackOutOfRange();
}
}
void Stack::set(int pos, Value const& val)
{
try {
if (pos >= 0) {
stack_.at(fps_.top() + pos) = val;
} else {
if ((int) fps_.top() + (int) stack_.size() + pos < 0)
throw VMStackOutOfRange();
stack_.at(stack_.size() + pos) = val;
}
} catch (std::out_of_range&) {
throw VMStackOutOfRange();
}
}
size_t Stack::size() const
{
return stack_.size() - fps_.top();
}
void Stack::push_fp()
{
fps_.push(stack_.size());
}
void Stack::pop_fp()
{
if (fps_.size() == 1)
throw VMStackUnderflow();
stack_.resize(fps_.top());
fps_.pop();
}
std::string Stack::debug() const
{
if (stack_.empty())
return "empty";
std::string out;
for (size_t i = 0; i < stack_.size(); ++i)
out += "[" + stack_.at(i).to_string() + "] ";
return out;
}
} // tyche

38
src/vm/stack.hh Normal file
View File

@@ -0,0 +1,38 @@
#ifndef TYCHE_STACK_HH
#define TYCHE_STACK_HH
#include <stack>
#include <vector>
#include "value.hh"
namespace tyche::vm {
class Stack {
public:
Stack();
void push(Value const& value);
Value pop();
[[nodiscard]] Value peek() const;
[[nodiscard]] Value at(int pos) const;
[[nodiscard]] size_t size() const;
void set(int pos, Value const& val);
void push_fp();
void pop_fp();
[[nodiscard]] size_t fp_level() const { return fps_.size(); }
[[nodiscard]] std::string debug() const;
private:
std::vector<Value> stack_;
std::stack<size_t> fps_;
};
} // tyche
#endif //TYCHE_STACK_HH

292
src/vm/tests.cc Normal file
View File

@@ -0,0 +1,292 @@
#include "gtest/gtest.h"
#include "../bytecode/bytecodeprototype.hh"
#include "../bytecode/bytecode.hh"
#include "../assembler/assembler.hh"
#include "code.hh"
#include "stack.hh"
#include "vm.hh"
using namespace tyche;
using namespace tyche::bc;
using namespace tyche::vm;
static VM run(std::string oper) {
return VM().load_bytecode(as::Assembler(std::format(R"(
.const
0: 3.14
1: "Hello world"
.func 0
{}
ret
)", oper)).assemble()).call(0);
}
TEST(Code, ImportSingleAndDebug)
{
BytecodePrototype bp;
bp.constants.emplace_back(3.14f);
bp.constants.emplace_back("HELLO");
bp.functions.emplace_back(0, 0);
bp.functions.at(0).code.append_byte(0xa0); // pushi
bp.functions.at(0).code.append_int8(42);
bp.functions.emplace_back(2, 1);
bp.functions.at(1).code.append_byte(0x1a); // appnd
ByteArray ba = Bytecode::generate(bp);
Code code;
code.import_bytecode(std::move(ba));
printf("%s\n", code.disassemble().c_str());
}
TEST(Stack, PushPullGet)
{
Stack stack;
stack.push(Value::createInteger(10));
stack.push(Value::createInteger(20));
stack.push(Value::createInteger(30));
ASSERT_EQ(stack.size(), 3);
ASSERT_EQ(stack.at(0).as_integer(), 10);
ASSERT_EQ(stack.at(1).as_integer(), 20);
ASSERT_EQ(stack.at(-1).as_integer(), 30);
ASSERT_EQ(stack.at(-2).as_integer(), 20);
}
TEST(Stack, FramePointer)
{
Stack stack;
stack.push(Value::createInteger(10));
stack.push(Value::createInteger(20));
stack.push_fp();
stack.push(Value::createInteger(30));
stack.push(Value::createInteger(40));
stack.push(Value::createInteger(50));
ASSERT_EQ(stack.size(), 3);
ASSERT_EQ(stack.at(0).as_integer(), 30);
ASSERT_EQ(stack.at(1).as_integer(), 40);
ASSERT_EQ(stack.at(-1).as_integer(), 50);
ASSERT_EQ(stack.at(-2).as_integer(), 40);
stack.pop_fp();
ASSERT_EQ(stack.size(), 2);
ASSERT_EQ(stack.at(0).as_integer(), 10);
ASSERT_EQ(stack.at(1).as_integer(), 20);
ASSERT_EQ(stack.at(-1).as_integer(), 20);
ASSERT_EQ(stack.at(-2).as_integer(), 10);
}
TEST(VM, BasicCode)
{
// code (2+3)
BytecodePrototype bp;
bp.functions.emplace_back(0, 0);
bp.functions.at(0).code.append_byte((uint8_t) Instruction::PushInt8);
bp.functions.at(0).code.append_int8(2);
bp.functions.at(0).code.append_byte((uint8_t) Instruction::PushInt8);
bp.functions.at(0).code.append_int8(3);
bp.functions.at(0).code.append_byte((uint8_t) Instruction::Sum);
bp.functions.at(0).code.append_byte((uint8_t) Instruction::Return);
ByteArray ba = Bytecode::generate(bp);
VM vm;
vm.load_bytecode(std::move(ba));
vm.call(0);
int32_t result = vm.to_integer(-1);
ASSERT_EQ(result, 5);
}
TEST(VM, StackOperations)
{
ASSERT_EQ(run("pushi 5000").to_integer(-1), 5000);
ASSERT_EQ(run("pushi -5000").to_integer(-1), -5000);
ASSERT_FLOAT_EQ(run("pushi 5000").to_float(-1), 5000.f);
ASSERT_FLOAT_EQ(run("pushc 0").to_float(-1), 3.14f);
ASSERT_EQ(run("pushc 0").to_integer(-1), 3);
ASSERT_EQ(run("pushc 1").to_string(-1), "Hello world");
ASSERT_TRUE(run("pushf 0").is_function(-1));
ASSERT_EQ(run("pushi 2\n pushi 3\n pop").to_integer(-1), 2);
}
TEST(VM, IntegerIntegerOperations)
{
auto test_op = [](int32_t op1, int32_t op2, std::string oper) {
return VM().load_bytecode(as::Assembler(std::format(R"(
.func 0
pushi {}
pushi {}
{}
ret
)", op1, op2, oper)).assemble()).call(0).to_integer(-1);
};
ASSERT_EQ(test_op(2, 3, "sum"), 5);
ASSERT_EQ(test_op(2, 3, "sub"), -1);
ASSERT_EQ(test_op(2, 3, "mul"), 6);
ASSERT_EQ(test_op(20, 3, "idiv"), 6);
ASSERT_EQ(test_op(2, 3, "eq"), 0);
ASSERT_EQ(test_op(2, 3, "neq"), 1);
ASSERT_EQ(test_op(2, 3, "lt"), 1);
ASSERT_EQ(test_op(2, 3, "lte"), 1);
ASSERT_EQ(test_op(3, 3, "lte"), 1);
ASSERT_EQ(test_op(4, 3, "lte"), 0);
ASSERT_EQ(test_op(2, 3, "gt"), 0);
ASSERT_EQ(test_op(2, 3, "gte"), 0);
ASSERT_EQ(test_op(3, 3, "gte"), 1);
ASSERT_EQ(test_op(4, 3, "gte"), 1);
ASSERT_EQ(test_op(2, 3, "and"), 2);
ASSERT_EQ(test_op(2, 3, "or"), 3);
ASSERT_EQ(test_op(2, 3, "xor"), 1);
ASSERT_EQ(test_op(2, 3, "pow"), 8);
ASSERT_EQ(test_op(2, 3, "shl"), 16);
ASSERT_EQ(test_op(30, 2, "shr"), 7);
ASSERT_EQ(test_op(8, 3, "mod"), 2);
ASSERT_FLOAT_EQ(run("pushi 3\n pushi 2\n div").to_float(-1), 1.5f);
}
TEST(VM, IntegerFloatOperations)
{
auto test_op = [](int op1, std::string const& op2, std::string oper) -> VM {
return VM().load_bytecode(as::Assembler(std::format(R"(
.const
0: {}
.func 0
pushi {}
pushc 0
{}
ret
)", op2, op1, oper)).assemble()).call(0);
};
ASSERT_FLOAT_EQ(test_op(2, "3.5", "sum").to_float(-1), 5.5f);
ASSERT_FLOAT_EQ(test_op(2, "3.5", "sub").to_float(-1), -1.5f);
ASSERT_FLOAT_EQ(test_op(2, "3.5", "mul").to_float(-1), 7.f);
ASSERT_FLOAT_EQ(test_op(20, "3.5", "idiv").to_integer(-1), 6);
ASSERT_FLOAT_EQ(test_op(20, "3.5", "div").to_float(-1), 5.7142859);
ASSERT_FLOAT_EQ(test_op(3, "3.5", "eq").to_integer(-1), 0);
ASSERT_FLOAT_EQ(test_op(3, "3.0", "eq").to_integer(-1), 1);
}
TEST(VM, FloatIntegerOperations)
{
auto test_op = [](std::string const& op1, int op2, std::string oper) -> VM {
return VM().load_bytecode(as::Assembler(std::format(R"(
.const
0: {}
.func 0
pushc 0
pushi {}
{}
ret
)", op1, op2, oper)).assemble()).call(0);
};
ASSERT_FLOAT_EQ(test_op("3.5", 2, "sum").to_float(-1), 5.5f);
ASSERT_FLOAT_EQ(test_op("3.5", 2, "sub").to_float(-1), 1.5f);
ASSERT_FLOAT_EQ(test_op("3.5", 2, "mul").to_float(-1), 7.f);
ASSERT_FLOAT_EQ(test_op("3.5", 2, "idiv").to_integer(-1), 1);
ASSERT_FLOAT_EQ(test_op("3.5", 2, "div").to_float(-1), 1.75f);
ASSERT_FLOAT_EQ(test_op("3.5", 3, "eq").to_integer(-1), 0);
ASSERT_FLOAT_EQ(test_op("3.0", 3, "eq").to_integer(-1), 1);
}
TEST(VM, FloatFloatOperations)
{
auto test_op = [](std::string const& op1, std::string const& op2, std::string oper) -> VM {
return VM().load_bytecode(as::Assembler(std::format(R"(
.const
0: {}
1: {}
.func 0
pushc 0
pushc 1
{}
ret
)", op1, op2, oper)).assemble()).call(0);
};
ASSERT_FLOAT_EQ(test_op("3.5", "2.2", "sum").to_float(-1), 5.7f);
ASSERT_FLOAT_EQ(test_op("3.5", "2.2", "sub").to_float(-1), 1.3f);
ASSERT_FLOAT_EQ(test_op("3.5", "2.2", "mul").to_float(-1), 7.7f);
ASSERT_FLOAT_EQ(test_op("3.5", "2.2", "idiv").to_integer(-1), 1);
ASSERT_FLOAT_EQ(test_op("4.5", "2.5", "div").to_float(-1), 1.8f);
ASSERT_FLOAT_EQ(test_op("3.2005", "3.2", "eq").to_integer(-1), 0);
ASSERT_FLOAT_EQ(test_op("3.2", "3.2", "eq").to_integer(-1), 1);
}
TEST(VM, StringString)
{
ASSERT_EQ(run(R"(
.const
0: "Hello"
1: "World"
.func 0
pushc 0
pushc 1
sum
ret
)").to_string(-1), "HelloWorld");
ASSERT_EQ(run(R"(
.const
0: "Hello"
1: "World"
.func 0
pushc 0
pushc 1
eq
ret
)").to_integer(-1), 0);
ASSERT_EQ(run(R"(
.const
0: "Hello"
1: "Hello"
.func 0
pushc 0
pushc 1
eq
ret
)").to_integer(-1), 1);
ASSERT_EQ(run(R"(
.const
0: "Hello"
.func 0
pushc 0
pushi 1
eq
ret
)").to_integer(-1), 0);
}
TEST(VM, LocalVariables)
{
VM vm = run(R"(
.func 0
pushv 2 ; local a, b
pushi 3 ; a = 3
set 0
pushi 4 ; b = 4
set 1
dupv 0 ; return a
ret
)");
ASSERT_EQ(vm.stack_sz(), 1);
ASSERT_EQ(vm.to_integer(-1), 3);
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

44
src/vm/value.cc Normal file
View File

@@ -0,0 +1,44 @@
#include "value.hh"
#include "../common/overloaded.hh"
namespace tyche::vm {
std::string type_name(Type type)
{
switch (type) {
case Type::Nil: return "nil";
case Type::Integer: return "integer";
case Type::Float: return "float";
case Type::String: return "string";
case Type::Array: return "array";
case Type::Table: return "table";
case Type::Function: return "function";
case Type::NativePointer: return "native pointer";
case Type::COUNT: default: return "???";
}
}
Type Value::type() const
{
return std::visit(overloaded {
[](std::monostate) { return Type::Nil; },
[](int32_t) { return Type::Integer; },
[](float) { return Type::Float; },
[](std::string const&) { return Type::String; },
[](Function const&) { return Type::Function; },
}, value_);
}
std::string Value::to_string() const
{
return std::visit(overloaded {
[](std::monostate) { return std::string("nil"); },
[](int32_t i) { return std::to_string(i); },
[](float f) { return std::to_string(f); },
[](std::string const& s) { return s; },
[](Function const& f) { return "@" + std::to_string(f.f_id); }
}, value_);
}
}

53
src/vm/value.hh Normal file
View File

@@ -0,0 +1,53 @@
#ifndef TYCHE_VALUE_HH
#define TYCHE_VALUE_HH
#include <cstdint>
#include <string>
#include <variant>
namespace tyche::vm {
using FunctionId = uint32_t;
enum class Type : uint8_t
{
Nil = 0, Integer, Float, String, Array, Table, Function, NativePointer, COUNT
};
std::string type_name(Type type);
class Value {
struct Function { FunctionId f_id; };
public:
Value() : value_(std::monostate()) {}
static Value createNil() { return Value(std::monostate()); }
static Value createInteger(int32_t v) { return Value(v); }
static Value createFloat(float f) { return Value(f); }
static Value createString(std::string const& str) { return Value(str); }
static Value createFunctionId(FunctionId f_id) { return Value(Function { f_id }); }
static Value createFalse() { return createInteger(0); }
static Value createTrue() { return createInteger(1); }
static Value createIntegerFromBool(bool b) { return createInteger(b ? 1 : 0); }
[[nodiscard]] Type type() const;
[[nodiscard]] int32_t as_integer() const { return std::get<int32_t>(value_); }
[[nodiscard]] float as_float() const { return std::get<float>(value_); }
[[nodiscard]] std::string as_string() const { return std::get<std::string>(value_); }
[[nodiscard]] FunctionId as_function_id() const { return std::get<Function>(value_).f_id; }
[[nodiscard]] std::string to_string() const;
private:
using Internal = std::variant<std::monostate, int32_t, float, std::string, Function>;
Internal value_;
explicit Value(Internal const& internal) : value_(internal) {}
};
}
#endif //TYCHE_VALUE_HH

208
src/vm/vm.cc Normal file
View File

@@ -0,0 +1,208 @@
#include "vm.hh"
#include "vm_exceptions.hh"
#include "expr.hh"
namespace tyche::vm {
VM& VM::load_bytecode(ByteArray const& ba)
{
FunctionId f_id = code_.import_bytecode(ba);
stack_.push(Value::createFunctionId(f_id));
return *this;
}
VM& VM::call(size_t n_params)
{
// TODO - parameters
Value f = stack_.pop();
if (f.type() != Type::Function)
throw VMTypeError(Type::Function, f.type());
loc_.emplace(f.as_function_id(), 0);
stack_.push_fp();
run_until_return();
// stack_.pop_fp();
loc_.pop();
return *this;
}
int32_t VM::to_integer(int index) const
{
Value i = stack_.at(index);
if (i.type() == Type::Integer)
return i.as_integer();
if (i.type() == Type::Float)
return (int32_t) i.as_float();
throw VMTypeError(Type::Integer, i.type());
}
float VM::to_float(int index) const
{
Value f = stack_.at(index);
if (f.type() == Type::Float)
return f.as_float();
if (f.type() == Type::Integer)
return (float) f.as_integer();
throw VMTypeError(Type::Float, f.type());
}
std::string VM::to_string(int index) const
{
Value i = stack_.at(index);
assert_type(i, Type::String);
return i.as_string();
}
VM& VM::push_nil()
{
stack_.push(Value::createNil());
return *this;
}
VM& VM::push_integer(int32_t value)
{
stack_.push(Value::createInteger(value));
return *this;
}
VM& VM::push_float(float value)
{
stack_.push(Value::createFloat(value));
return *this;
}
VM& VM::push_string(std::string const& str)
{
stack_.push(Value::createString(str));
return *this;
}
void VM::run_until_return()
{
size_t level = stack_.fp_level();
while (stack_.fp_level() >= level)
step();
}
void VM::step()
{
Operation op = code_.operation(loc_.top());
switch (op.instruction) {
//
// stack management
//
case Instruction::PushInt8:
case Instruction::PushInt16:
case Instruction::PushInt32:
push_integer(op.operator_);
break;
case Instruction::PushConstant8:
case Instruction::PushConstant16:
case Instruction::PushConstant32: {
auto cnst = code_.bytecode().get_constant(op.operator_);
if (auto f = std::get_if<float>(&cnst))
push_float(*f);
else if (auto s = std::get_if<std::string>(&cnst))
push_string(*s);
else
throw std::logic_error("Shouldn't get here");
break;
}
case Instruction::PushFunction8:
case Instruction::PushFunction16:
case Instruction::PushFunction32:
stack_.push(Value::createFunctionId(op.operator_));
break;
case Instruction::Pop:
stack_.pop();
break;
case Instruction::Duplicate:
stack_.push(stack_.peek());
break;
//
// variables
//
case Instruction::PushValues8:
case Instruction::PushValues16:
case Instruction::PushValues32:
for (int i = 0; i < op.operator_; ++i)
push_nil();
break;
case Instruction::SetValue8:
case Instruction::SetValue16:
case Instruction::SetValue32: {
Value a = stack_.pop();
stack_.set(op.operator_, a);
break;
}
case Instruction::DuplicateValue8:
case Instruction::DuplicateValue16:
case Instruction::DuplicateValue32: {
Value a = stack_.at(op.operator_);
stack_.push(a);
break;
}
//
// logical/arithmetic
//
#define BIN_OP(op) { Value a = stack_.pop(); Value b = stack_.pop(); stack_.push(binary_operation(a, b, BinaryOperationType::op)); }
case Instruction::Sum: BIN_OP(Sum) break;
case Instruction::Subtract: BIN_OP(Subtraction) break;
case Instruction::Multiply: BIN_OP(Multiplication) break;
case Instruction::Divide: BIN_OP(Division) break;
case Instruction::DivideInt: BIN_OP(IntegerDivision) break;
case Instruction::Equals: BIN_OP(Equality) break;
case Instruction::NotEquals: BIN_OP(Inequality) break;
case Instruction::LessThan: BIN_OP(LessThan) break;
case Instruction::LessThanEq: BIN_OP(LessThanOrEquals) break;
case Instruction::GreaterThan: BIN_OP(GreaterThan) break;
case Instruction::GreaterThanEq: BIN_OP(GreaterThanOrEquals) break;
case Instruction::And: BIN_OP(BitwiseAnd) break;
case Instruction::Or: BIN_OP(BitwiseOr) break;
case Instruction::Xor: BIN_OP(BitwiseXor) break;
case Instruction::Power: BIN_OP(Power) break;
case Instruction::ShiftLeft: BIN_OP(ShiftLeft) break;
case Instruction::ShiftRight: BIN_OP(ShiftRight) break;
case Instruction::Modulo: BIN_OP(Modulo) break;
#undef BIN_OP
//
// function operations
//
case Instruction::Return: {
Value v = stack_.pop();
stack_.pop_fp();
stack_.push(v);
return;
}
default:
throw VMInvalidOpcode((uint8_t) op.instruction);
}
loc_.top() = op.next_location;
}
void VM::assert_type(Value const& val, Type type)
{
if (val.type() != type)
throw VMTypeError(type, val.type());
}
} // tyche

51
src/vm/vm.hh Normal file
View File

@@ -0,0 +1,51 @@
#ifndef TYCHE_VM_HH
#define TYCHE_VM_HH
#include "code.hh"
#include "location.hh"
#include "stack.hh"
namespace tyche::vm {
class VM {
public:
VM& load_bytecode(ByteArray const& ba);
VM& call(size_t n_params);
[[nodiscard]] bool is_nil(int index) const { return stack_.at(index).type() == Type::Nil; }
[[nodiscard]] bool is_integer(int index) const { return stack_.at(index).type() == Type::Integer; }
[[nodiscard]] bool is_float(int index) const { return stack_.at(index).type() == Type::Float; }
[[nodiscard]] bool is_string(int index) const { return stack_.at(index).type() == Type::String; }
[[nodiscard]] bool is_array(int index) const { return stack_.at(index).type() == Type::Array; }
[[nodiscard]] bool is_table(int index) const { return stack_.at(index).type() == Type::Table; }
[[nodiscard]] bool is_function(int index) const { return stack_.at(index).type() == Type::Function; }
[[nodiscard]] bool is_native_pointer(int index) const { return stack_.at(index).type() == Type::NativePointer; }
[[nodiscard]] size_t stack_sz() const { return stack_.size(); }
VM& push_nil();
VM& push_integer(int32_t value);
VM& push_float(float value);
VM& push_string(std::string const& string);
[[nodiscard]] int32_t to_integer(int index) const;
[[nodiscard]] float to_float(int index) const;
[[nodiscard]] std::string to_string(int index) const;
[[nodiscard]] std::string debug_stack() const { return stack_.debug(); }
private:
void run_until_return();
void step();
static void assert_type(Value const& val, Type type);
Stack stack_;
Code code_;
std::stack<Location> loc_;
};
} // tyche
#endif //TYCHE_VM_HH

49
src/vm/vm_exceptions.hh Normal file
View File

@@ -0,0 +1,49 @@
#ifndef TYCHE_VM_EXCEPTIONS_HH
#define TYCHE_VM_EXCEPTIONS_HH
#include <stdexcept>
#include <string>
#include "expr.hh"
namespace tyche::vm {
class VMRuntimeError : public std::runtime_error
{
public:
explicit VMRuntimeError(std::string const& str) : std::runtime_error(str.c_str()) {}
};
class VMStackUnderflow : public VMRuntimeError
{
public:
explicit VMStackUnderflow() : VMRuntimeError("Stack underflow") {}
};
class VMStackOutOfRange : public VMRuntimeError
{
public:
explicit VMStackOutOfRange() : VMRuntimeError("Item does not exist in stack") {}
};
class VMTypeError : public VMRuntimeError
{
public:
explicit VMTypeError(Type expected, Type found) : VMRuntimeError("Type error (expected " + type_name(expected) + ", found " + type_name(found) + ")") {}
};
class VMInvalidOpcode : public VMRuntimeError
{
public:
explicit VMInvalidOpcode(uint8_t opcode) : VMRuntimeError("Invalid opcode " + std::to_string(opcode)) {}
};
class VMInvalidOperation : public VMRuntimeError
{
public:
explicit VMInvalidOperation(BinaryOperationType op, Type type1, Type type2) : VMRuntimeError("Invalid binary operation for types " + type_name(type1) + " and " + type_name(type2)) {}
};
}
#endif //TYCHE_VM_EXCEPTIONS_HH