285 lines
9.8 KiB
C
285 lines
9.8 KiB
C
#include "priv.h"
|
|
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
|
|
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
|
# error Sorry, big endian architectures are not supported at this time.
|
|
#endif
|
|
|
|
#define MAGIC 0xa7d6e9b1
|
|
|
|
#define VERSION_ADDR 0x04
|
|
#define CODE_START_ADDR 0x08
|
|
#define N_CONST_ADDR 0x0c
|
|
#define CONST_START 0x10
|
|
|
|
#define OP_8BIT_OPERAND 0xa0
|
|
#define OP_16BIT_OPERAND 0xc0
|
|
#define OP_32BIT_OPERAND 0xe0
|
|
|
|
struct Code {
|
|
uint8_t const* bytecode;
|
|
size_t bytecode_sz;
|
|
uint32_t* const_addr;
|
|
uint32_t fn_count;
|
|
uint32_t* fn_addr;
|
|
uint32_t* fn_sz;
|
|
};
|
|
|
|
Code* code_new(void)
|
|
{
|
|
Code* code = xcalloc(1, sizeof(Code));
|
|
return code;
|
|
}
|
|
|
|
void code_destroy(Code* code)
|
|
{
|
|
free(code->const_addr);
|
|
free(code->fn_addr);
|
|
free(code->fn_sz);
|
|
free(code);
|
|
}
|
|
|
|
TYC_RESULT code_load_bytecode(Code* code, uint8_t const* bytecode, size_t bytecode_sz)
|
|
{
|
|
// TODO - linking
|
|
|
|
if (bytecode_sz < 24)
|
|
return T_ERR_BYTECODE_TOO_SMALL;
|
|
|
|
uint32_t magic;
|
|
memcpy(&magic, bytecode, sizeof(magic));
|
|
if (magic != MAGIC)
|
|
return T_ERR_BYTECODE_INVALID_MAGIC;
|
|
|
|
code->bytecode = bytecode;
|
|
code->bytecode_sz = bytecode_sz;
|
|
|
|
/*
|
|
for (size_t i = 0; i < bytecode_sz; ++i) {
|
|
if (i % 16 == 0)
|
|
printf("%04X: ", i);
|
|
printf("%02x ", bytecode[i]);
|
|
if (i % 16 == 15)
|
|
printf("\n");
|
|
}
|
|
printf("\n");
|
|
*/
|
|
|
|
uint32_t n_consts = code_n_consts(code);
|
|
code->const_addr = xcalloc(n_consts, sizeof(uint32_t));
|
|
uint32_t addr = CONST_START;
|
|
for (size_t i = 0; i < n_consts; ++i) {
|
|
code->const_addr[i] = addr;
|
|
switch (code_const_type(code, i)) {
|
|
case TC_STRING: {
|
|
uint32_t sz = (uint32_t) strlen((const char*) &bytecode[code->const_addr[i] + 1]);
|
|
addr += sz + 2; // 2 = constant type + NULL terminator
|
|
break;
|
|
}
|
|
case TC_REAL:
|
|
addr += 5; // 5 = constant type + float
|
|
break;
|
|
case TC_INVALID_TYPE:
|
|
default:
|
|
__builtin_unreachable();
|
|
}
|
|
}
|
|
|
|
addr += 4; // skip debug start address
|
|
memcpy(&code->fn_count, &bytecode[addr], sizeof(uint32_t)); // number of functions
|
|
addr += 4;
|
|
|
|
code->fn_addr = xcalloc(code->fn_count, sizeof(uint32_t));
|
|
code->fn_sz = xcalloc(code->fn_count, sizeof(uint32_t));
|
|
code->fn_addr[0] = addr;
|
|
uint32_t addr_next;
|
|
for (size_t i = 1; i < code->fn_count; ++i) {
|
|
memcpy(&addr_next, &bytecode[addr], sizeof(uint32_t));
|
|
code->fn_sz[i-1] = addr_next - addr - 4;
|
|
addr = code->fn_addr[i] = addr_next;
|
|
}
|
|
memcpy(&addr_next, &bytecode[addr], sizeof(uint32_t));
|
|
code->fn_sz[code->fn_count-1] = addr_next - addr - 4;
|
|
|
|
return T_OK;
|
|
}
|
|
|
|
uint32_t code_n_consts(Code const* code)
|
|
{
|
|
uint32_t n_consts; memcpy(&n_consts, &code->bytecode[N_CONST_ADDR], sizeof(uint32_t));
|
|
return n_consts;
|
|
}
|
|
|
|
TYC_CONST_TYPE code_const_type(Code const* code, size_t n)
|
|
{
|
|
uint8_t t = code->bytecode[code->const_addr[n]];
|
|
if (t >= TC_INVALID_TYPE)
|
|
return TC_INVALID_TYPE;
|
|
return t;
|
|
}
|
|
|
|
T_REAL code_const_real(Code const* code, size_t n)
|
|
{
|
|
float f;
|
|
memcpy(&f, &code->bytecode[code->const_addr[n] + 1], sizeof(float));
|
|
return f;
|
|
}
|
|
|
|
const char* code_const_string(Code const* code, size_t n)
|
|
{
|
|
return (const char*) &code->bytecode[code->const_addr[n] + 1];
|
|
}
|
|
|
|
uint32_t code_n_functions(Code const* code)
|
|
{
|
|
return code->fn_count;
|
|
}
|
|
|
|
uint32_t code_function_sz(Code const* code, uint32_t f_id)
|
|
{
|
|
return code->fn_sz[f_id];
|
|
}
|
|
|
|
Instruction code_next_instruction(Code const* code, uint32_t function_id, uint32_t pc)
|
|
{
|
|
uint32_t addr = code->fn_addr[function_id] + 4 + pc;
|
|
uint8_t opcode = code->bytecode[addr];
|
|
int32_t operand = 0;
|
|
uint8_t sz = 1;
|
|
|
|
if (opcode >= OP_8BIT_OPERAND && opcode < OP_16BIT_OPERAND) {
|
|
operand = (int8_t) code->bytecode[addr + 1];
|
|
sz = 2;
|
|
} else if (opcode >= OP_16BIT_OPERAND && opcode < OP_32BIT_OPERAND) {
|
|
opcode -= 0x20;
|
|
operand = (int16_t) ((uint16_t) code->bytecode[addr + 1] |
|
|
(uint16_t) (code->bytecode[addr + 2] << 8));
|
|
sz = 3;
|
|
} else if (opcode >= OP_32BIT_OPERAND) {
|
|
opcode -= 0x40;
|
|
operand = (int32_t) ((uint32_t) code->bytecode[addr + 1] |
|
|
(uint32_t) (code->bytecode[addr + 2] << 8) |
|
|
(uint32_t) (code->bytecode[addr + 3] << 16) |
|
|
(uint32_t) (code->bytecode[addr + 4] << 24));
|
|
sz = 5;
|
|
}
|
|
|
|
return (Instruction) {
|
|
.operator = (TYC_INST) opcode,
|
|
.operand = operand,
|
|
.sz = sz,
|
|
};
|
|
}
|
|
|
|
#ifdef DEBUG_ASSEMBLY
|
|
|
|
void code_debug_bytecode(Code const* code)
|
|
{
|
|
for (int i = 0; i < code->bytecode_sz; ++i) {
|
|
if (i % 16 == 0)
|
|
printf("%04X : ", i);
|
|
printf("%02X ", code->bytecode[i]);
|
|
if (i % 16 == 15)
|
|
printf("\n");
|
|
}
|
|
printf("\n");
|
|
}
|
|
|
|
void code_decompile(Code const* code)
|
|
{
|
|
if (code_n_consts(code) > 0)
|
|
printf(".const\n");
|
|
|
|
for (size_t const_id = 0; const_id < code_n_consts(code); ++const_id) {
|
|
TYC_CONST_TYPE type = code_const_type(code, const_id);
|
|
if (type == TC_STRING)
|
|
printf(" %03zu: \"%s\"\n", const_id, code_const_string(code, const_id));
|
|
else if (type == TC_REAL)
|
|
printf(" %03zu: %f\n", const_id, (double) code_const_real(code, const_id));
|
|
}
|
|
|
|
for (uint32_t f_id = 0; f_id < code_n_functions(code); ++f_id) {
|
|
printf(".func %d\n", f_id);
|
|
uint32_t pc = 0;
|
|
while (pc < code_function_sz(code, f_id)) {
|
|
Instruction inst = code_next_instruction(code, f_id, pc);
|
|
char buf[50];
|
|
code_parse_instruction(inst, buf, sizeof buf);
|
|
printf(" %s ; %d\n", buf, pc);
|
|
pc += inst.sz;
|
|
}
|
|
}
|
|
}
|
|
|
|
void code_parse_instruction(Instruction inst, char* outbuf, size_t sz)
|
|
{
|
|
int n;
|
|
switch (inst.operator) {
|
|
case TO_PUSHI: n = snprintf(outbuf, sz, "pushi "); break;
|
|
case TO_PUSHC: n = snprintf(outbuf, sz, "pushc "); break;
|
|
case TO_PUSHF: n = snprintf(outbuf, sz, "pushf "); break;
|
|
case TO_PUSHN: n = snprintf(outbuf, sz, "pushn "); break;
|
|
case TO_PUSHZ: n = snprintf(outbuf, sz, "pushz "); break;
|
|
case TO_PUSHT: n = snprintf(outbuf, sz, "pusht "); break;
|
|
case TO_NEWA: n = snprintf(outbuf, sz, "newa "); break;
|
|
case TO_NEWT: n = snprintf(outbuf, sz, "newt "); break;
|
|
case TO_POP: n = snprintf(outbuf, sz, "pop "); break;
|
|
case TO_DUP: n = snprintf(outbuf, sz, "dup "); break;
|
|
case TO_PUSHV: n = snprintf(outbuf, sz, "pushv "); break;
|
|
case TO_SET: n = snprintf(outbuf, sz, "set "); break;
|
|
case TO_DUPV: n = snprintf(outbuf, sz, "dupv "); break;
|
|
case TO_SETG: n = snprintf(outbuf, sz, "setg "); break;
|
|
case TO_GETG: n = snprintf(outbuf, sz, "getg "); break;
|
|
case TO_CALL: n = snprintf(outbuf, sz, "call "); break;
|
|
case TO_RET: n = snprintf(outbuf, sz, "ret "); break;
|
|
case TO_RETI: n = snprintf(outbuf, sz, "reti "); break;
|
|
case TO_GETKV: n = snprintf(outbuf, sz, "getkv "); break;
|
|
case TO_SETKV: n = snprintf(outbuf, sz, "setkv "); break;
|
|
case TO_GETI: n = snprintf(outbuf, sz, "geti "); break;
|
|
case TO_SETI: n = snprintf(outbuf, sz, "seti "); break;
|
|
case TO_APPND: n = snprintf(outbuf, sz, "appnd "); break;
|
|
case TO_NEXT: n = snprintf(outbuf, sz, "next "); break;
|
|
case TO_SMT: n = snprintf(outbuf, sz, "smt "); break;
|
|
case TO_MT: n = snprintf(outbuf, sz, "mt "); break;
|
|
case TO_SUM: n = snprintf(outbuf, sz, "sum "); break;
|
|
case TO_SUB: n = snprintf(outbuf, sz, "sub "); break;
|
|
case TO_MUL: n = snprintf(outbuf, sz, "mul "); break;
|
|
case TO_DIV: n = snprintf(outbuf, sz, "div "); break;
|
|
case TO_IDIV: n = snprintf(outbuf, sz, "idiv "); break;
|
|
case TO_MOD: n = snprintf(outbuf, sz, "mod "); break;
|
|
case TO_EQ: n = snprintf(outbuf, sz, "eq "); break;
|
|
case TO_NEQ: n = snprintf(outbuf, sz, "neq "); break;
|
|
case TO_LT: n = snprintf(outbuf, sz, "lt "); break;
|
|
case TO_LTE: n = snprintf(outbuf, sz, "lte "); break;
|
|
case TO_GT: n = snprintf(outbuf, sz, "gt "); break;
|
|
case TO_GTE: n = snprintf(outbuf, sz, "gte "); break;
|
|
case TO_AND: n = snprintf(outbuf, sz, "and "); break;
|
|
case TO_OR: n = snprintf(outbuf, sz, "or "); break;
|
|
case TO_XOR: n = snprintf(outbuf, sz, "xor "); break;
|
|
case TO_POW: n = snprintf(outbuf, sz, "pow "); break;
|
|
case TO_SHL: n = snprintf(outbuf, sz, "shl "); break;
|
|
case TO_SHR: n = snprintf(outbuf, sz, "shr "); break;
|
|
case TO_LEN: n = snprintf(outbuf, sz, "len "); break;
|
|
case TO_TYPE: n = snprintf(outbuf, sz, "type "); break;
|
|
case TO_CAST: n = snprintf(outbuf, sz, "cast "); break;
|
|
case TO_VER: n = snprintf(outbuf, sz, "ver "); break;
|
|
case TO_CMPL: n = snprintf(outbuf, sz, "cmpl "); break;
|
|
case TO_ASMBL: n = snprintf(outbuf, sz, "asmbl "); break;
|
|
case TO_LOAD: n = snprintf(outbuf, sz, "load "); break;
|
|
case TO_BZ: n = snprintf(outbuf, sz, "bz "); break;
|
|
case TO_BNZ: n = snprintf(outbuf, sz, "bnz "); break;
|
|
case TO_JMP: n = snprintf(outbuf, sz, "jmp "); break;
|
|
case TO_GC: n = snprintf(outbuf, sz, "gc "); break;
|
|
default: n = snprintf(outbuf, sz, "??? "); break;
|
|
}
|
|
|
|
if (inst.operator >= OP_8BIT_OPERAND)
|
|
snprintf(&outbuf[n], sz + (size_t) n, "%2d", inst.operand);
|
|
else
|
|
snprintf(&outbuf[n], sz + (size_t) n, " ");
|
|
}
|
|
|
|
#endif |