Files
tyche/lib/code.c
2026-05-17 09:36:15 -05:00

273 lines
9.6 KiB
C

#include "priv.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
# error Sorry, big endian architectures are not supported at this time.
#endif
#define MAGIC 0xa7d6e9b1
#define VERSION_ADDR 0x04
#define CODE_START_ADDR 0x08
#define N_CONST_ADDR 0x0c
#define CONST_START 0x10
#define OP_8BIT_OPERAND 0xa0
#define OP_16BIT_OPERAND 0xc0
#define OP_32BIT_OPERAND 0xe0
struct Code {
uint8_t const* bytecode;
size_t bytecode_sz;
uint32_t* const_addr;
uint32_t fn_count;
uint32_t* fn_addr;
uint32_t* fn_sz;
};
Code* code_new(void)
{
Code* code = xcalloc(1, sizeof(Code));
return code;
}
void code_destroy(Code* code)
{
free(code->const_addr);
free(code->fn_addr);
free(code->fn_sz);
free(code);
}
TYC_RESULT code_load_bytecode(Code* code, uint8_t const* bytecode, size_t bytecode_sz)
{
// TODO - linking
if (bytecode_sz < 24)
return T_ERR_BYTECODE_TOO_SMALL;
uint32_t magic;
memcpy(&magic, bytecode, sizeof(magic));
if (magic != MAGIC)
return T_ERR_BYTECODE_INVALID_MAGIC;
code->bytecode = bytecode;
code->bytecode_sz = bytecode_sz;
/*
for (size_t i = 0; i < bytecode_sz; ++i) {
if (i % 16 == 0)
printf("%04X: ", i);
printf("%02x ", bytecode[i]);
if (i % 16 == 15)
printf("\n");
}
printf("\n");
*/
uint32_t n_consts = code_n_consts(code);
code->const_addr = xcalloc(n_consts, sizeof(uint32_t));
uint32_t addr = CONST_START;
for (size_t i = 0; i < n_consts; ++i) {
code->const_addr[i] = addr;
switch (code_const_type(code, i)) {
case TC_STRING: {
uint32_t sz = (uint32_t) strlen((const char*) &bytecode[code->const_addr[i] + 1]);
addr += sz + 2; // 2 = constant type + NULL terminator
break;
}
case TC_REAL:
addr += 5; // 5 = constant type + float
break;
case TC_INVALID_TYPE:
default:
__builtin_unreachable();
}
}
addr += 4; // skip debug start address
memcpy(&code->fn_count, &bytecode[addr], sizeof(uint32_t)); // number of functions
addr += 4;
code->fn_addr = xcalloc(code->fn_count, sizeof(uint32_t));
code->fn_sz = xcalloc(code->fn_count, sizeof(uint32_t));
code->fn_addr[0] = addr;
uint32_t addr_next;
for (size_t i = 1; i < code->fn_count; ++i) {
memcpy(&addr_next, &bytecode[addr], sizeof(uint32_t));
code->fn_sz[i-1] = addr_next - addr - 4;
addr = code->fn_addr[i] = addr_next;
}
memcpy(&addr_next, &bytecode[addr], sizeof(uint32_t));
code->fn_sz[code->fn_count-1] = addr_next - addr - 4;
return T_OK;
}
uint32_t code_n_consts(Code const* code)
{
uint32_t n_consts; memcpy(&n_consts, &code->bytecode[N_CONST_ADDR], sizeof(uint32_t));
return n_consts;
}
TYC_CONST_TYPE code_const_type(Code const* code, size_t n)
{
uint8_t t = code->bytecode[code->const_addr[n]];
if (t >= TC_INVALID_TYPE)
return TC_INVALID_TYPE;
return t;
}
T_REAL code_const_real(Code const* code, size_t n)
{
float f;
memcpy(&f, &code->bytecode[code->const_addr[n] + 1], sizeof(float));
return f;
}
const char* code_const_string(Code const* code, size_t n)
{
return (const char*) &code->bytecode[code->const_addr[n] + 1];
}
uint32_t code_n_functions(Code const* code)
{
return code->fn_count;
}
uint32_t code_function_sz(Code const* code, uint32_t f_id)
{
return code->fn_sz[f_id];
}
Instruction code_next_instruction(Code const* code, uint32_t function_id, uint32_t pc)
{
uint32_t addr = code->fn_addr[function_id] + 4 + pc;
uint8_t opcode = code->bytecode[addr];
int32_t operand = 0;
uint8_t sz = 1;
if (opcode >= OP_8BIT_OPERAND && opcode < OP_16BIT_OPERAND) {
operand = (int8_t) code->bytecode[addr + 1];
sz = 2;
} else if (opcode >= OP_16BIT_OPERAND && opcode < OP_32BIT_OPERAND) {
opcode -= 0x20;
operand = (int16_t) ((uint16_t) code->bytecode[addr + 1] |
(uint16_t) (code->bytecode[addr + 2] << 8));
sz = 3;
} else if (opcode >= OP_32BIT_OPERAND) {
opcode -= 0x40;
operand = (int32_t) ((uint32_t) code->bytecode[addr + 1] |
(uint32_t) (code->bytecode[addr + 2] << 8) |
(uint32_t) (code->bytecode[addr + 3] << 16) |
(uint32_t) (code->bytecode[addr + 4] << 24));
sz = 5;
}
return (Instruction) {
.operator = (TYC_INST) opcode,
.operand = operand,
.sz = sz,
};
}
#ifdef DEBUG_ASSEMBLY
void code_decompile(Code const* code)
{
if (code_n_consts(code) > 0)
printf(".const\n");
for (size_t const_id = 0; const_id < code_n_consts(code); ++const_id) {
TYC_CONST_TYPE type = code_const_type(code, const_id);
if (type == TC_STRING)
printf(" %03zu: \"%s\"\n", const_id, code_const_string(code, const_id));
else if (type == TC_REAL)
printf(" %03zu: %f\n", const_id, (double) code_const_real(code, const_id));
}
for (uint32_t f_id = 0; f_id < code_n_functions(code); ++f_id) {
printf(".func %d\n", f_id);
uint32_t pc = 0;
while (pc < code_function_sz(code, f_id)) {
Instruction inst = code_next_instruction(code, f_id, pc);
char buf[50];
code_parse_instruction(inst, buf, sizeof buf);
printf(" %s ; %d\n", buf, pc);
pc += inst.sz;
}
}
}
void code_parse_instruction(Instruction inst, char* outbuf, size_t sz)
{
int n;
switch (inst.operator) {
case TO_PUSHI: n = snprintf(outbuf, sz, "pushi "); break;
case TO_PUSHC: n = snprintf(outbuf, sz, "pushc "); break;
case TO_PUSHF: n = snprintf(outbuf, sz, "pushf "); break;
case TO_PUSHN: n = snprintf(outbuf, sz, "pushn "); break;
case TO_PUSHZ: n = snprintf(outbuf, sz, "pushz "); break;
case TO_PUSHT: n = snprintf(outbuf, sz, "pusht "); break;
case TO_NEWA: n = snprintf(outbuf, sz, "newa "); break;
case TO_NEWT: n = snprintf(outbuf, sz, "newt "); break;
case TO_POP: n = snprintf(outbuf, sz, "pop "); break;
case TO_DUP: n = snprintf(outbuf, sz, "dup "); break;
case TO_PUSHV: n = snprintf(outbuf, sz, "pushv "); break;
case TO_SET: n = snprintf(outbuf, sz, "set "); break;
case TO_DUPV: n = snprintf(outbuf, sz, "dupv "); break;
case TO_SETG: n = snprintf(outbuf, sz, "setg "); break;
case TO_GETG: n = snprintf(outbuf, sz, "getg "); break;
case TO_CALL: n = snprintf(outbuf, sz, "call "); break;
case TO_RET: n = snprintf(outbuf, sz, "ret "); break;
case TO_RETI: n = snprintf(outbuf, sz, "reti "); break;
case TO_GETKV: n = snprintf(outbuf, sz, "getkv "); break;
case TO_SETKV: n = snprintf(outbuf, sz, "setkv "); break;
case TO_GETI: n = snprintf(outbuf, sz, "geti "); break;
case TO_SETI: n = snprintf(outbuf, sz, "seti "); break;
case TO_APPND: n = snprintf(outbuf, sz, "appnd "); break;
case TO_NEXT: n = snprintf(outbuf, sz, "next "); break;
case TO_SMT: n = snprintf(outbuf, sz, "smt "); break;
case TO_MT: n = snprintf(outbuf, sz, "mt "); break;
case TO_SUM: n = snprintf(outbuf, sz, "sum "); break;
case TO_SUB: n = snprintf(outbuf, sz, "sub "); break;
case TO_MUL: n = snprintf(outbuf, sz, "mul "); break;
case TO_DIV: n = snprintf(outbuf, sz, "div "); break;
case TO_IDIV: n = snprintf(outbuf, sz, "idiv "); break;
case TO_MOD: n = snprintf(outbuf, sz, "mod "); break;
case TO_EQ: n = snprintf(outbuf, sz, "eq "); break;
case TO_NEQ: n = snprintf(outbuf, sz, "neq "); break;
case TO_LT: n = snprintf(outbuf, sz, "lt "); break;
case TO_LTE: n = snprintf(outbuf, sz, "lte "); break;
case TO_GT: n = snprintf(outbuf, sz, "gt "); break;
case TO_GTE: n = snprintf(outbuf, sz, "gte "); break;
case TO_AND: n = snprintf(outbuf, sz, "and "); break;
case TO_OR: n = snprintf(outbuf, sz, "or "); break;
case TO_XOR: n = snprintf(outbuf, sz, "xor "); break;
case TO_POW: n = snprintf(outbuf, sz, "pow "); break;
case TO_SHL: n = snprintf(outbuf, sz, "shl "); break;
case TO_SHR: n = snprintf(outbuf, sz, "shr "); break;
case TO_LEN: n = snprintf(outbuf, sz, "len "); break;
case TO_TYPE: n = snprintf(outbuf, sz, "type "); break;
case TO_CAST: n = snprintf(outbuf, sz, "cast "); break;
case TO_VER: n = snprintf(outbuf, sz, "ver "); break;
case TO_CMPL: n = snprintf(outbuf, sz, "cmpl "); break;
case TO_ASMBL: n = snprintf(outbuf, sz, "asmbl "); break;
case TO_LOAD: n = snprintf(outbuf, sz, "load "); break;
case TO_BZ: n = snprintf(outbuf, sz, "bz "); break;
case TO_BNZ: n = snprintf(outbuf, sz, "bnz "); break;
case TO_JMP: n = snprintf(outbuf, sz, "jmp "); break;
case TO_GC: n = snprintf(outbuf, sz, "gc "); break;
default: n = snprintf(outbuf, sz, "??? "); break;
}
if (inst.operator >= OP_8BIT_OPERAND)
snprintf(&outbuf[n], sz + (size_t) n, "%2d", inst.operand);
else
snprintf(&outbuf[n], sz + (size_t) n, " ");
}
#endif