Assembler #6

Merged
andre merged 12 commits from assembler into master 2026-05-01 10:12:41 -05:00
5 changed files with 90 additions and 25 deletions
Showing only changes of commit 1269a158f5 - Show all commits

View File

@@ -3,12 +3,15 @@
#include "as_exceptions.hh" #include "as_exceptions.hh"
#include "../bytecode/bytecode.hh" #include "../bytecode/bytecode.hh"
using namespace std::string_literals;
namespace tyche::as { namespace tyche::as {
ByteArray Assembler::assemble() ByteArray Assembler::assemble()
{ {
bc::BytecodePrototype bp;
lexer_.reset(); lexer_.reset();
bp_ = {};
enum class Section { Const, Function } section; enum class Section { Const, Function } section;
uint32_t function_id = 0; uint32_t function_id = 0;
@@ -17,16 +20,46 @@ ByteArray Assembler::assemble()
Token t = lexer_.ingest(); Token t = lexer_.ingest();
if (t.type == TokenType::Directive) { if (t.type == TokenType::Directive) {
if (std::get<std::string>(t.token) == ".const") {
section = Section::Const;
expect_token(TokenType::Enter);
} else if (std::get<std::string>(t.token) == ".func") {
section = Section::Function;
function_id = std::get<int>(expect_token(TokenType::Integer));
expect_token(TokenType::Enter);
} else {
throw AssemblyError("Invalid directive " + std::get<std::string>(t.token), t.line, t.column);
}
} else if (section == Section::Const && t.type == TokenType::Integer) { } else if (section == Section::Const && t.type == TokenType::Integer) {
int index = std::get<int>(expect_token(TokenType::Integer));
if ((size_t) index >= bp.constants.size())
bp.constants.resize(index + 1);
expect_token(TokenType::Colon);
Token tt = lexer_.ingest();
if (tt.type == TokenType::Float) {
bp.constants[index] = std::get<float>(tt.token);
} else if (tt.type == TokenType::String) {
bp.constants[index] = std::get<std::string>(tt.token);
} else {
throw AssemblyError("Expected float or string as constant", tt.line, tt.column);
}
} else if (section == Section::Function && t.type == TokenType::Instruction) { } else if (section == Section::Function && t.type == TokenType::Instruction) {
} else if (t.type == TokenType::EOF_) { } else if (t.type == TokenType::EOF_) {
break; break;
} else if (t.type != TokenType::Enter) { } else if (t.type != TokenType::Enter) {
throw AssemblyError("Unexpected token " + t.token, t.line, t.column); throw AssemblyError("Unexpected token of type " + token_type_name(t.type) + ")", t.line, t.column);
} }
} }
return bc::Bytecode::generate(bp_); return bc::Bytecode::generate(bp);
}
TokenValue Assembler::expect_token(TokenType type)
{
Token t = lexer_.ingest();
if (t.type != type)
throw AssemblyError("Expected " + token_type_name(t.type), t.line, t.column);
return t.token;
} }
} // tyche } // tyche

View File

@@ -17,7 +17,8 @@ public:
private: private:
Lexer lexer_; Lexer lexer_;
bc::BytecodePrototype bp_;
TokenValue expect_token(TokenType type);
}; };
} // tyche } // tyche

View File

@@ -4,6 +4,22 @@
namespace tyche::as { namespace tyche::as {
std::string token_type_name(TokenType type)
{
switch (type) {
case TokenType::BOF: return "BOF";
case TokenType::Directive: return "directive";
case TokenType::Instruction: return "instruction";
case TokenType::Integer: return "integer";
case TokenType::Float: return "float";
case TokenType::String: return "string";
case TokenType::Enter: return "enter";
case TokenType::Colon: return "colon";
case TokenType::EOF_: return "EOF";
default: return "???";
}
}
void Lexer::reset() void Lexer::reset()
{ {
pos_ = 0; pos_ = 0;
@@ -28,20 +44,22 @@ void Lexer::ingest_next_token()
size_t current_line = 1; size_t current_line = 1;
if (pos_ >= source_.size()) { if (pos_ >= source_.size()) {
current_token_ = { TokenType::EOF_, "" }; current_token_ = { TokenType::EOF_ };
return; return;
} }
char c = source_.at(pos_); char c = source_.at(pos_);
TokenType type {}; TokenType type {};
std::string token; std::string stoken;
TokenValue value = std::monostate();
if (c == '.') { if (c == '.') {
type = TokenType::Directive; type = TokenType::Directive;
token += '.'; stoken += '.';
while (c = source_.at(++pos_), isalpha(c) || c == '_') while (c = source_.at(++pos_), isalpha(c) || c == '_')
token += c; stoken += c;
value = stoken;
} else if (c == '"') { } else if (c == '"') {
type = TokenType::String; type = TokenType::String;
++pos_; ++pos_;
@@ -54,13 +72,14 @@ void Lexer::ingest_next_token()
} else if (pos_ >= source_.size()) { } else if (pos_ >= source_.size()) {
throw AssemblyError("Unterminated string", current_line, pos_ - current_line_pos); throw AssemblyError("Unterminated string", current_line, pos_ - current_line_pos);
} }
token += source_.at(pos_++); stoken += source_.at(pos_++);
} }
value = stoken;
} else if (isdigit(c) || c == '-') { } else if (isdigit(c) || c == '-') {
type = TokenType::Integer; type = TokenType::Integer;
token += c; stoken += c;
while (c = source_.at(++pos_), isdigit(c) || c == '.') { while (c = source_.at(++pos_), isdigit(c) || c == '.') {
token += c; stoken += c;
if (c == '.') { if (c == '.') {
if (type == TokenType::Integer) if (type == TokenType::Integer)
type = TokenType::Float; type = TokenType::Float;
@@ -68,16 +87,22 @@ void Lexer::ingest_next_token()
throw AssemblyError("Double point in floating point number", current_line, pos_ - current_line_pos); throw AssemblyError("Double point in floating point number", current_line, pos_ - current_line_pos);
} }
} }
if (type == TokenType::Integer)
value = std::stoi(stoken);
else
value = std::stof(stoken);
} else if (isalpha(c)) { } else if (isalpha(c)) {
type = TokenType::Instruction; type = TokenType::Instruction;
token += c; stoken += c;
while (c = source_.at(++pos_), isalpha(c)) while (c = source_.at(++pos_), isalpha(c))
token += c; stoken += c;
value = stoken;
} else if (c == ':') { } else if (c == ':') {
type = TokenType::Colon; type = TokenType::Colon;
++pos_; ++pos_;
} else if (c == '\n') { } else if (c == '\n') {
type = TokenType::Enter; type = TokenType::Enter;
value = "\n";
++pos_; ++pos_;
++current_line; ++current_line;
current_line_pos = pos_; current_line_pos = pos_;
@@ -89,7 +114,7 @@ void Lexer::ingest_next_token()
while (pos_ < source_.size() && (source_.at(pos_) == ' ' || source_.at(pos_) == '\t' || source_.at(pos_) == '\r')) while (pos_ < source_.size() && (source_.at(pos_) == ' ' || source_.at(pos_) == '\t' || source_.at(pos_) == '\r'))
++pos_; ++pos_;
current_token_ = { .type = type, .token = token, .line = current_line, .column = pos_ - current_line_pos }; current_token_ = { .type = type, .token = value, .line = current_line, .column = pos_ - current_line_pos };
} }
} // tyche } // tyche

View File

@@ -3,6 +3,7 @@
#include <string> #include <string>
#include <utility> #include <utility>
#include <variant>
namespace tyche::as { namespace tyche::as {
@@ -10,15 +11,19 @@ enum class TokenType {
BOF, Directive, Instruction, Integer, Float, String, Enter, Colon, EOF_ BOF, Directive, Instruction, Integer, Float, String, Enter, Colon, EOF_
}; };
using TokenValue = std::variant<std::monostate, int, float, std::string>;
struct Token { struct Token {
TokenType type; TokenType type;
std::string token; TokenValue token = std::monostate();
size_t line; size_t line = 0;
size_t column; size_t column = 0;
friend bool operator==(Token const& lhs, Token const& rhs) { return std::tie(lhs.type, lhs.token) == std::tie(rhs.type, rhs.token); } friend bool operator==(Token const& lhs, Token const& rhs) { return std::tie(lhs.type, lhs.token) == std::tie(rhs.type, rhs.token); }
}; };
std::string token_type_name(TokenType type);
class Lexer { class Lexer {
public: public:
explicit Lexer(std::string source) : source_(std::move(source)) { reset(); } explicit Lexer(std::string source) : source_(std::move(source)) { reset(); }
@@ -30,7 +35,7 @@ public:
private: private:
const std::string source_; const std::string source_;
size_t pos_ = 0; size_t pos_ = 0;
Token current_token_ { TokenType::BOF, "" }; Token current_token_ { TokenType::BOF };
void ingest_next_token(); void ingest_next_token();
}; };

View File

@@ -12,17 +12,18 @@ using namespace tyche::vm;
TEST(Lexer, Lexer) TEST(Lexer, Lexer)
{ {
Token t;
Lexer lexer(".dir push 382 -12 3.14 -12.8 \"Hello\" \"Hel\\\"lo\"\n"); Lexer lexer(".dir push 382 -12 3.14 -12.8 \"Hello\" \"Hel\\\"lo\"\n");
ASSERT_EQ(lexer.ingest(), (Token { TokenType::Directive, ".dir" })); ASSERT_EQ(lexer.ingest(), (Token { TokenType::Directive, ".dir" }));
ASSERT_EQ(lexer.ingest(), (Token { TokenType::Instruction, "push" })); ASSERT_EQ(lexer.ingest(), (Token { TokenType::Instruction, "push" }));
ASSERT_EQ(lexer.ingest(), (Token { TokenType::Integer, "382" })); t = lexer.ingest(); ASSERT_EQ(t.type, TokenType::Integer); ASSERT_EQ(std::get<int>(t.token), 382);
ASSERT_EQ(lexer.ingest(), (Token { TokenType::Integer, "-12" })); t = lexer.ingest(); ASSERT_EQ(t.type, TokenType::Integer); ASSERT_EQ(std::get<int>(t.token), -12);
ASSERT_EQ(lexer.ingest(), (Token { TokenType::Float, "3.14" })); t = lexer.ingest(); ASSERT_EQ(t.type, TokenType::Float); ASSERT_FLOAT_EQ(std::get<float>(t.token), 3.14f);
ASSERT_EQ(lexer.ingest(), (Token { TokenType::Float, "-12.8" })); t = lexer.ingest(); ASSERT_EQ(t.type, TokenType::Float); ASSERT_FLOAT_EQ(std::get<float>(t.token), -12.8f);
ASSERT_EQ(lexer.ingest(), (Token { TokenType::String, "Hello" })); ASSERT_EQ(lexer.ingest(), (Token { TokenType::String, "Hello" }));
ASSERT_EQ(lexer.ingest(), (Token { TokenType::String, "Hel\"lo" })); ASSERT_EQ(lexer.ingest(), (Token { TokenType::String, "Hel\"lo" }));
ASSERT_EQ(lexer.ingest(), (Token { TokenType::Enter })); ASSERT_EQ(lexer.ingest(), (Token { TokenType::Enter, "\n" }));
ASSERT_EQ(lexer.ingest(), (Token { TokenType::EOF_ })); ASSERT_EQ(lexer.ingest(), (Token { TokenType::EOF_ }));
ASSERT_EQ(lexer.ingest(), (Token { TokenType::EOF_ })); ASSERT_EQ(lexer.ingest(), (Token { TokenType::EOF_ }));
ASSERT_EQ(lexer.ingest(), (Token { TokenType::EOF_ })); ASSERT_EQ(lexer.ingest(), (Token { TokenType::EOF_ }));
@@ -54,12 +55,12 @@ TEST(Assember, Assembler)
0: 3.14 0: 3.14
1: "Hello world" 1: "Hello world"
.function 0 .func 0
pushi 2 pushi 2
pushi 3 pushi 3
sum sum
ret ret
.function 1 .func 1
pushi 5000 pushi 5000
ret ret
)"; )";