From 1269a158f530e5026e4e2581161ce3180585bccb Mon Sep 17 00:00:00 2001 From: Andre Wagner Date: Fri, 1 May 2026 09:16:10 -0500 Subject: [PATCH] . --- src/assembler/assembler.cc | 39 ++++++++++++++++++++++++++++++--- src/assembler/assembler.hh | 3 ++- src/assembler/lexer.cc | 45 +++++++++++++++++++++++++++++--------- src/assembler/lexer.hh | 13 +++++++---- src/assembler/tests.cc | 15 +++++++------ 5 files changed, 90 insertions(+), 25 deletions(-) diff --git a/src/assembler/assembler.cc b/src/assembler/assembler.cc index 8e2b87e..06f611f 100644 --- a/src/assembler/assembler.cc +++ b/src/assembler/assembler.cc @@ -3,12 +3,15 @@ #include "as_exceptions.hh" #include "../bytecode/bytecode.hh" +using namespace std::string_literals; + namespace tyche::as { ByteArray Assembler::assemble() { + bc::BytecodePrototype bp; + lexer_.reset(); - bp_ = {}; enum class Section { Const, Function } section; uint32_t function_id = 0; @@ -17,16 +20,46 @@ ByteArray Assembler::assemble() Token t = lexer_.ingest(); if (t.type == TokenType::Directive) { + if (std::get(t.token) == ".const") { + section = Section::Const; + expect_token(TokenType::Enter); + } else if (std::get(t.token) == ".func") { + section = Section::Function; + function_id = std::get(expect_token(TokenType::Integer)); + expect_token(TokenType::Enter); + } else { + throw AssemblyError("Invalid directive " + std::get(t.token), t.line, t.column); + } } else if (section == Section::Const && t.type == TokenType::Integer) { + int index = std::get(expect_token(TokenType::Integer)); + if ((size_t) index >= bp.constants.size()) + bp.constants.resize(index + 1); + expect_token(TokenType::Colon); + Token tt = lexer_.ingest(); + if (tt.type == TokenType::Float) { + bp.constants[index] = std::get(tt.token); + } else if (tt.type == TokenType::String) { + bp.constants[index] = std::get(tt.token); + } else { + throw AssemblyError("Expected float or string as constant", tt.line, tt.column); + } } else if (section == Section::Function && t.type == TokenType::Instruction) { } else if (t.type == TokenType::EOF_) { break; } else if (t.type != TokenType::Enter) { - throw AssemblyError("Unexpected token " + t.token, t.line, t.column); + throw AssemblyError("Unexpected token of type " + token_type_name(t.type) + ")", t.line, t.column); } } - return bc::Bytecode::generate(bp_); + return bc::Bytecode::generate(bp); +} + +TokenValue Assembler::expect_token(TokenType type) +{ + Token t = lexer_.ingest(); + if (t.type != type) + throw AssemblyError("Expected " + token_type_name(t.type), t.line, t.column); + return t.token; } } // tyche diff --git a/src/assembler/assembler.hh b/src/assembler/assembler.hh index ab8d4d6..55393b1 100644 --- a/src/assembler/assembler.hh +++ b/src/assembler/assembler.hh @@ -17,7 +17,8 @@ public: private: Lexer lexer_; - bc::BytecodePrototype bp_; + + TokenValue expect_token(TokenType type); }; } // tyche diff --git a/src/assembler/lexer.cc b/src/assembler/lexer.cc index 94e6230..03b3836 100644 --- a/src/assembler/lexer.cc +++ b/src/assembler/lexer.cc @@ -4,6 +4,22 @@ namespace tyche::as { +std::string token_type_name(TokenType type) +{ + switch (type) { + case TokenType::BOF: return "BOF"; + case TokenType::Directive: return "directive"; + case TokenType::Instruction: return "instruction"; + case TokenType::Integer: return "integer"; + case TokenType::Float: return "float"; + case TokenType::String: return "string"; + case TokenType::Enter: return "enter"; + case TokenType::Colon: return "colon"; + case TokenType::EOF_: return "EOF"; + default: return "???"; + } +} + void Lexer::reset() { pos_ = 0; @@ -28,20 +44,22 @@ void Lexer::ingest_next_token() size_t current_line = 1; if (pos_ >= source_.size()) { - current_token_ = { TokenType::EOF_, "" }; + current_token_ = { TokenType::EOF_ }; return; } char c = source_.at(pos_); TokenType type {}; - std::string token; + std::string stoken; + TokenValue value = std::monostate(); if (c == '.') { type = TokenType::Directive; - token += '.'; + stoken += '.'; while (c = source_.at(++pos_), isalpha(c) || c == '_') - token += c; + stoken += c; + value = stoken; } else if (c == '"') { type = TokenType::String; ++pos_; @@ -54,13 +72,14 @@ void Lexer::ingest_next_token() } else if (pos_ >= source_.size()) { throw AssemblyError("Unterminated string", current_line, pos_ - current_line_pos); } - token += source_.at(pos_++); + stoken += source_.at(pos_++); } + value = stoken; } else if (isdigit(c) || c == '-') { type = TokenType::Integer; - token += c; + stoken += c; while (c = source_.at(++pos_), isdigit(c) || c == '.') { - token += c; + stoken += c; if (c == '.') { if (type == TokenType::Integer) type = TokenType::Float; @@ -68,16 +87,22 @@ void Lexer::ingest_next_token() throw AssemblyError("Double point in floating point number", current_line, pos_ - current_line_pos); } } + if (type == TokenType::Integer) + value = std::stoi(stoken); + else + value = std::stof(stoken); } else if (isalpha(c)) { type = TokenType::Instruction; - token += c; + stoken += c; while (c = source_.at(++pos_), isalpha(c)) - token += c; + stoken += c; + value = stoken; } else if (c == ':') { type = TokenType::Colon; ++pos_; } else if (c == '\n') { type = TokenType::Enter; + value = "\n"; ++pos_; ++current_line; current_line_pos = pos_; @@ -89,7 +114,7 @@ void Lexer::ingest_next_token() while (pos_ < source_.size() && (source_.at(pos_) == ' ' || source_.at(pos_) == '\t' || source_.at(pos_) == '\r')) ++pos_; - current_token_ = { .type = type, .token = token, .line = current_line, .column = pos_ - current_line_pos }; + current_token_ = { .type = type, .token = value, .line = current_line, .column = pos_ - current_line_pos }; } } // tyche diff --git a/src/assembler/lexer.hh b/src/assembler/lexer.hh index 1c634bb..eab14aa 100644 --- a/src/assembler/lexer.hh +++ b/src/assembler/lexer.hh @@ -3,6 +3,7 @@ #include #include +#include namespace tyche::as { @@ -10,15 +11,19 @@ enum class TokenType { BOF, Directive, Instruction, Integer, Float, String, Enter, Colon, EOF_ }; +using TokenValue = std::variant; + struct Token { TokenType type; - std::string token; - size_t line; - size_t column; + TokenValue token = std::monostate(); + size_t line = 0; + size_t column = 0; friend bool operator==(Token const& lhs, Token const& rhs) { return std::tie(lhs.type, lhs.token) == std::tie(rhs.type, rhs.token); } }; +std::string token_type_name(TokenType type); + class Lexer { public: explicit Lexer(std::string source) : source_(std::move(source)) { reset(); } @@ -30,7 +35,7 @@ public: private: const std::string source_; size_t pos_ = 0; - Token current_token_ { TokenType::BOF, "" }; + Token current_token_ { TokenType::BOF }; void ingest_next_token(); }; diff --git a/src/assembler/tests.cc b/src/assembler/tests.cc index c0e53f2..06b446a 100644 --- a/src/assembler/tests.cc +++ b/src/assembler/tests.cc @@ -12,17 +12,18 @@ using namespace tyche::vm; TEST(Lexer, Lexer) { + Token t; Lexer lexer(".dir push 382 -12 3.14 -12.8 \"Hello\" \"Hel\\\"lo\"\n"); ASSERT_EQ(lexer.ingest(), (Token { TokenType::Directive, ".dir" })); ASSERT_EQ(lexer.ingest(), (Token { TokenType::Instruction, "push" })); - ASSERT_EQ(lexer.ingest(), (Token { TokenType::Integer, "382" })); - ASSERT_EQ(lexer.ingest(), (Token { TokenType::Integer, "-12" })); - ASSERT_EQ(lexer.ingest(), (Token { TokenType::Float, "3.14" })); - ASSERT_EQ(lexer.ingest(), (Token { TokenType::Float, "-12.8" })); + t = lexer.ingest(); ASSERT_EQ(t.type, TokenType::Integer); ASSERT_EQ(std::get(t.token), 382); + t = lexer.ingest(); ASSERT_EQ(t.type, TokenType::Integer); ASSERT_EQ(std::get(t.token), -12); + t = lexer.ingest(); ASSERT_EQ(t.type, TokenType::Float); ASSERT_FLOAT_EQ(std::get(t.token), 3.14f); + t = lexer.ingest(); ASSERT_EQ(t.type, TokenType::Float); ASSERT_FLOAT_EQ(std::get(t.token), -12.8f); ASSERT_EQ(lexer.ingest(), (Token { TokenType::String, "Hello" })); ASSERT_EQ(lexer.ingest(), (Token { TokenType::String, "Hel\"lo" })); - ASSERT_EQ(lexer.ingest(), (Token { TokenType::Enter })); + ASSERT_EQ(lexer.ingest(), (Token { TokenType::Enter, "\n" })); ASSERT_EQ(lexer.ingest(), (Token { TokenType::EOF_ })); ASSERT_EQ(lexer.ingest(), (Token { TokenType::EOF_ })); ASSERT_EQ(lexer.ingest(), (Token { TokenType::EOF_ })); @@ -54,12 +55,12 @@ TEST(Assember, Assembler) 0: 3.14 1: "Hello world" -.function 0 +.func 0 pushi 2 pushi 3 sum ret -.function 1 +.func 1 pushi 5000 ret )";