Assembler #6
@@ -3,12 +3,15 @@
|
|||||||
#include "as_exceptions.hh"
|
#include "as_exceptions.hh"
|
||||||
#include "../bytecode/bytecode.hh"
|
#include "../bytecode/bytecode.hh"
|
||||||
|
|
||||||
|
using namespace std::string_literals;
|
||||||
|
|
||||||
namespace tyche::as {
|
namespace tyche::as {
|
||||||
|
|
||||||
ByteArray Assembler::assemble()
|
ByteArray Assembler::assemble()
|
||||||
{
|
{
|
||||||
|
bc::BytecodePrototype bp;
|
||||||
|
|
||||||
lexer_.reset();
|
lexer_.reset();
|
||||||
bp_ = {};
|
|
||||||
|
|
||||||
enum class Section { Const, Function } section;
|
enum class Section { Const, Function } section;
|
||||||
uint32_t function_id = 0;
|
uint32_t function_id = 0;
|
||||||
@@ -17,16 +20,46 @@ ByteArray Assembler::assemble()
|
|||||||
Token t = lexer_.ingest();
|
Token t = lexer_.ingest();
|
||||||
|
|
||||||
if (t.type == TokenType::Directive) {
|
if (t.type == TokenType::Directive) {
|
||||||
|
if (std::get<std::string>(t.token) == ".const") {
|
||||||
|
section = Section::Const;
|
||||||
|
expect_token(TokenType::Enter);
|
||||||
|
} else if (std::get<std::string>(t.token) == ".func") {
|
||||||
|
section = Section::Function;
|
||||||
|
function_id = std::get<int>(expect_token(TokenType::Integer));
|
||||||
|
expect_token(TokenType::Enter);
|
||||||
|
} else {
|
||||||
|
throw AssemblyError("Invalid directive " + std::get<std::string>(t.token), t.line, t.column);
|
||||||
|
}
|
||||||
} else if (section == Section::Const && t.type == TokenType::Integer) {
|
} else if (section == Section::Const && t.type == TokenType::Integer) {
|
||||||
|
int index = std::get<int>(expect_token(TokenType::Integer));
|
||||||
|
if ((size_t) index >= bp.constants.size())
|
||||||
|
bp.constants.resize(index + 1);
|
||||||
|
expect_token(TokenType::Colon);
|
||||||
|
Token tt = lexer_.ingest();
|
||||||
|
if (tt.type == TokenType::Float) {
|
||||||
|
bp.constants[index] = std::get<float>(tt.token);
|
||||||
|
} else if (tt.type == TokenType::String) {
|
||||||
|
bp.constants[index] = std::get<std::string>(tt.token);
|
||||||
|
} else {
|
||||||
|
throw AssemblyError("Expected float or string as constant", tt.line, tt.column);
|
||||||
|
}
|
||||||
} else if (section == Section::Function && t.type == TokenType::Instruction) {
|
} else if (section == Section::Function && t.type == TokenType::Instruction) {
|
||||||
} else if (t.type == TokenType::EOF_) {
|
} else if (t.type == TokenType::EOF_) {
|
||||||
break;
|
break;
|
||||||
} else if (t.type != TokenType::Enter) {
|
} else if (t.type != TokenType::Enter) {
|
||||||
throw AssemblyError("Unexpected token " + t.token, t.line, t.column);
|
throw AssemblyError("Unexpected token of type " + token_type_name(t.type) + ")", t.line, t.column);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return bc::Bytecode::generate(bp_);
|
return bc::Bytecode::generate(bp);
|
||||||
|
}
|
||||||
|
|
||||||
|
TokenValue Assembler::expect_token(TokenType type)
|
||||||
|
{
|
||||||
|
Token t = lexer_.ingest();
|
||||||
|
if (t.type != type)
|
||||||
|
throw AssemblyError("Expected " + token_type_name(t.type), t.line, t.column);
|
||||||
|
return t.token;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // tyche
|
} // tyche
|
||||||
|
|||||||
@@ -17,7 +17,8 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
Lexer lexer_;
|
Lexer lexer_;
|
||||||
bc::BytecodePrototype bp_;
|
|
||||||
|
TokenValue expect_token(TokenType type);
|
||||||
};
|
};
|
||||||
|
|
||||||
} // tyche
|
} // tyche
|
||||||
|
|||||||
@@ -4,6 +4,22 @@
|
|||||||
|
|
||||||
namespace tyche::as {
|
namespace tyche::as {
|
||||||
|
|
||||||
|
std::string token_type_name(TokenType type)
|
||||||
|
{
|
||||||
|
switch (type) {
|
||||||
|
case TokenType::BOF: return "BOF";
|
||||||
|
case TokenType::Directive: return "directive";
|
||||||
|
case TokenType::Instruction: return "instruction";
|
||||||
|
case TokenType::Integer: return "integer";
|
||||||
|
case TokenType::Float: return "float";
|
||||||
|
case TokenType::String: return "string";
|
||||||
|
case TokenType::Enter: return "enter";
|
||||||
|
case TokenType::Colon: return "colon";
|
||||||
|
case TokenType::EOF_: return "EOF";
|
||||||
|
default: return "???";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Lexer::reset()
|
void Lexer::reset()
|
||||||
{
|
{
|
||||||
pos_ = 0;
|
pos_ = 0;
|
||||||
@@ -28,20 +44,22 @@ void Lexer::ingest_next_token()
|
|||||||
size_t current_line = 1;
|
size_t current_line = 1;
|
||||||
|
|
||||||
if (pos_ >= source_.size()) {
|
if (pos_ >= source_.size()) {
|
||||||
current_token_ = { TokenType::EOF_, "" };
|
current_token_ = { TokenType::EOF_ };
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
char c = source_.at(pos_);
|
char c = source_.at(pos_);
|
||||||
|
|
||||||
TokenType type {};
|
TokenType type {};
|
||||||
std::string token;
|
std::string stoken;
|
||||||
|
TokenValue value = std::monostate();
|
||||||
|
|
||||||
if (c == '.') {
|
if (c == '.') {
|
||||||
type = TokenType::Directive;
|
type = TokenType::Directive;
|
||||||
token += '.';
|
stoken += '.';
|
||||||
while (c = source_.at(++pos_), isalpha(c) || c == '_')
|
while (c = source_.at(++pos_), isalpha(c) || c == '_')
|
||||||
token += c;
|
stoken += c;
|
||||||
|
value = stoken;
|
||||||
} else if (c == '"') {
|
} else if (c == '"') {
|
||||||
type = TokenType::String;
|
type = TokenType::String;
|
||||||
++pos_;
|
++pos_;
|
||||||
@@ -54,13 +72,14 @@ void Lexer::ingest_next_token()
|
|||||||
} else if (pos_ >= source_.size()) {
|
} else if (pos_ >= source_.size()) {
|
||||||
throw AssemblyError("Unterminated string", current_line, pos_ - current_line_pos);
|
throw AssemblyError("Unterminated string", current_line, pos_ - current_line_pos);
|
||||||
}
|
}
|
||||||
token += source_.at(pos_++);
|
stoken += source_.at(pos_++);
|
||||||
}
|
}
|
||||||
|
value = stoken;
|
||||||
} else if (isdigit(c) || c == '-') {
|
} else if (isdigit(c) || c == '-') {
|
||||||
type = TokenType::Integer;
|
type = TokenType::Integer;
|
||||||
token += c;
|
stoken += c;
|
||||||
while (c = source_.at(++pos_), isdigit(c) || c == '.') {
|
while (c = source_.at(++pos_), isdigit(c) || c == '.') {
|
||||||
token += c;
|
stoken += c;
|
||||||
if (c == '.') {
|
if (c == '.') {
|
||||||
if (type == TokenType::Integer)
|
if (type == TokenType::Integer)
|
||||||
type = TokenType::Float;
|
type = TokenType::Float;
|
||||||
@@ -68,16 +87,22 @@ void Lexer::ingest_next_token()
|
|||||||
throw AssemblyError("Double point in floating point number", current_line, pos_ - current_line_pos);
|
throw AssemblyError("Double point in floating point number", current_line, pos_ - current_line_pos);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (type == TokenType::Integer)
|
||||||
|
value = std::stoi(stoken);
|
||||||
|
else
|
||||||
|
value = std::stof(stoken);
|
||||||
} else if (isalpha(c)) {
|
} else if (isalpha(c)) {
|
||||||
type = TokenType::Instruction;
|
type = TokenType::Instruction;
|
||||||
token += c;
|
stoken += c;
|
||||||
while (c = source_.at(++pos_), isalpha(c))
|
while (c = source_.at(++pos_), isalpha(c))
|
||||||
token += c;
|
stoken += c;
|
||||||
|
value = stoken;
|
||||||
} else if (c == ':') {
|
} else if (c == ':') {
|
||||||
type = TokenType::Colon;
|
type = TokenType::Colon;
|
||||||
++pos_;
|
++pos_;
|
||||||
} else if (c == '\n') {
|
} else if (c == '\n') {
|
||||||
type = TokenType::Enter;
|
type = TokenType::Enter;
|
||||||
|
value = "\n";
|
||||||
++pos_;
|
++pos_;
|
||||||
++current_line;
|
++current_line;
|
||||||
current_line_pos = pos_;
|
current_line_pos = pos_;
|
||||||
@@ -89,7 +114,7 @@ void Lexer::ingest_next_token()
|
|||||||
while (pos_ < source_.size() && (source_.at(pos_) == ' ' || source_.at(pos_) == '\t' || source_.at(pos_) == '\r'))
|
while (pos_ < source_.size() && (source_.at(pos_) == ' ' || source_.at(pos_) == '\t' || source_.at(pos_) == '\r'))
|
||||||
++pos_;
|
++pos_;
|
||||||
|
|
||||||
current_token_ = { .type = type, .token = token, .line = current_line, .column = pos_ - current_line_pos };
|
current_token_ = { .type = type, .token = value, .line = current_line, .column = pos_ - current_line_pos };
|
||||||
}
|
}
|
||||||
|
|
||||||
} // tyche
|
} // tyche
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
#include <variant>
|
||||||
|
|
||||||
namespace tyche::as {
|
namespace tyche::as {
|
||||||
|
|
||||||
@@ -10,15 +11,19 @@ enum class TokenType {
|
|||||||
BOF, Directive, Instruction, Integer, Float, String, Enter, Colon, EOF_
|
BOF, Directive, Instruction, Integer, Float, String, Enter, Colon, EOF_
|
||||||
};
|
};
|
||||||
|
|
||||||
|
using TokenValue = std::variant<std::monostate, int, float, std::string>;
|
||||||
|
|
||||||
struct Token {
|
struct Token {
|
||||||
TokenType type;
|
TokenType type;
|
||||||
std::string token;
|
TokenValue token = std::monostate();
|
||||||
size_t line;
|
size_t line = 0;
|
||||||
size_t column;
|
size_t column = 0;
|
||||||
|
|
||||||
friend bool operator==(Token const& lhs, Token const& rhs) { return std::tie(lhs.type, lhs.token) == std::tie(rhs.type, rhs.token); }
|
friend bool operator==(Token const& lhs, Token const& rhs) { return std::tie(lhs.type, lhs.token) == std::tie(rhs.type, rhs.token); }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
std::string token_type_name(TokenType type);
|
||||||
|
|
||||||
class Lexer {
|
class Lexer {
|
||||||
public:
|
public:
|
||||||
explicit Lexer(std::string source) : source_(std::move(source)) { reset(); }
|
explicit Lexer(std::string source) : source_(std::move(source)) { reset(); }
|
||||||
@@ -30,7 +35,7 @@ public:
|
|||||||
private:
|
private:
|
||||||
const std::string source_;
|
const std::string source_;
|
||||||
size_t pos_ = 0;
|
size_t pos_ = 0;
|
||||||
Token current_token_ { TokenType::BOF, "" };
|
Token current_token_ { TokenType::BOF };
|
||||||
|
|
||||||
void ingest_next_token();
|
void ingest_next_token();
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -12,17 +12,18 @@ using namespace tyche::vm;
|
|||||||
|
|
||||||
TEST(Lexer, Lexer)
|
TEST(Lexer, Lexer)
|
||||||
{
|
{
|
||||||
|
Token t;
|
||||||
Lexer lexer(".dir push 382 -12 3.14 -12.8 \"Hello\" \"Hel\\\"lo\"\n");
|
Lexer lexer(".dir push 382 -12 3.14 -12.8 \"Hello\" \"Hel\\\"lo\"\n");
|
||||||
|
|
||||||
ASSERT_EQ(lexer.ingest(), (Token { TokenType::Directive, ".dir" }));
|
ASSERT_EQ(lexer.ingest(), (Token { TokenType::Directive, ".dir" }));
|
||||||
ASSERT_EQ(lexer.ingest(), (Token { TokenType::Instruction, "push" }));
|
ASSERT_EQ(lexer.ingest(), (Token { TokenType::Instruction, "push" }));
|
||||||
ASSERT_EQ(lexer.ingest(), (Token { TokenType::Integer, "382" }));
|
t = lexer.ingest(); ASSERT_EQ(t.type, TokenType::Integer); ASSERT_EQ(std::get<int>(t.token), 382);
|
||||||
ASSERT_EQ(lexer.ingest(), (Token { TokenType::Integer, "-12" }));
|
t = lexer.ingest(); ASSERT_EQ(t.type, TokenType::Integer); ASSERT_EQ(std::get<int>(t.token), -12);
|
||||||
ASSERT_EQ(lexer.ingest(), (Token { TokenType::Float, "3.14" }));
|
t = lexer.ingest(); ASSERT_EQ(t.type, TokenType::Float); ASSERT_FLOAT_EQ(std::get<float>(t.token), 3.14f);
|
||||||
ASSERT_EQ(lexer.ingest(), (Token { TokenType::Float, "-12.8" }));
|
t = lexer.ingest(); ASSERT_EQ(t.type, TokenType::Float); ASSERT_FLOAT_EQ(std::get<float>(t.token), -12.8f);
|
||||||
ASSERT_EQ(lexer.ingest(), (Token { TokenType::String, "Hello" }));
|
ASSERT_EQ(lexer.ingest(), (Token { TokenType::String, "Hello" }));
|
||||||
ASSERT_EQ(lexer.ingest(), (Token { TokenType::String, "Hel\"lo" }));
|
ASSERT_EQ(lexer.ingest(), (Token { TokenType::String, "Hel\"lo" }));
|
||||||
ASSERT_EQ(lexer.ingest(), (Token { TokenType::Enter }));
|
ASSERT_EQ(lexer.ingest(), (Token { TokenType::Enter, "\n" }));
|
||||||
ASSERT_EQ(lexer.ingest(), (Token { TokenType::EOF_ }));
|
ASSERT_EQ(lexer.ingest(), (Token { TokenType::EOF_ }));
|
||||||
ASSERT_EQ(lexer.ingest(), (Token { TokenType::EOF_ }));
|
ASSERT_EQ(lexer.ingest(), (Token { TokenType::EOF_ }));
|
||||||
ASSERT_EQ(lexer.ingest(), (Token { TokenType::EOF_ }));
|
ASSERT_EQ(lexer.ingest(), (Token { TokenType::EOF_ }));
|
||||||
@@ -54,12 +55,12 @@ TEST(Assember, Assembler)
|
|||||||
0: 3.14
|
0: 3.14
|
||||||
1: "Hello world"
|
1: "Hello world"
|
||||||
|
|
||||||
.function 0
|
.func 0
|
||||||
pushi 2
|
pushi 2
|
||||||
pushi 3
|
pushi 3
|
||||||
sum
|
sum
|
||||||
ret
|
ret
|
||||||
.function 1
|
.func 1
|
||||||
pushi 5000
|
pushi 5000
|
||||||
ret
|
ret
|
||||||
)";
|
)";
|
||||||
|
|||||||
Reference in New Issue
Block a user