diff --git a/.idea/editor.xml b/.idea/editor.xml
index aa2a125..43702aa 100644
--- a/.idea/editor.xml
+++ b/.idea/editor.xml
@@ -27,6 +27,7 @@
+
diff --git a/src/assembler/lexer.cc b/src/assembler/lexer.cc
index 4ecf7c8..c4e734f 100644
--- a/src/assembler/lexer.cc
+++ b/src/assembler/lexer.cc
@@ -12,12 +12,12 @@ void Lexer::reset()
Token Lexer::peek() const
{
- return look_ahead_;
+ return current_token_;
}
Token Lexer::ingest()
{
- Token t = look_ahead_;
+ Token t = current_token_;
ingest_next_token();
return t;
}
@@ -26,21 +26,54 @@ void Lexer::ingest_next_token()
{
char c = source_.at(pos_);
+ TokenType type {};
+ std::string token;
+
if (pos_ >= source_.size()) {
- look_ahead_ = { TokenType::EOF_, "" };
+ type = TokenType::EOF_;
} else if (c == '.') {
-
+ type = TokenType::Directive;
+ token += '.';
+ while (c = source_.at(++pos_), isalpha(c) || c == '_')
+ token += c;
} else if (c == '"') {
-
- } else if (isdigit(c)) {
-
+ type = TokenType::String;
+ ++pos_;
+ while (true) {
+ if (source_.at(pos_) == '\'') { // TODO - improve this for special characters
+ ++pos_;
+ } else if (source_.at(pos_) == '"') {
+ ++pos_;
+ break;
+ } else if (pos_ >= source_.size()) {
+ throw AssemblyError("Unterminated string");
+ }
+ token += source_.at(pos_++);
+ }
+ } else if (isdigit(c) || c == '-') {
+ type = TokenType::Integer;
+ token += c;
+ while (c = source_.at(++pos_), isdigit(c) || c == '.') {
+ token += c;
+ if (c == '.') {
+ if (type == TokenType::Integer)
+ type = TokenType::Float;
+ else
+ throw AssemblyError("Double point in floating point number");
+ }
+ }
} else if (isalpha(c)) {
-
+ type = TokenType::Instruction;
+ token += c;
+ while (c = source_.at(++pos_), isalpha(c))
+ token += c;
} else if (c == '\n') {
-
+ type = TokenType::Enter;
} else if (c != ' ' && c != '\t' && c != '\r') {
throw AssemblyError(std::string("Unexpected character '") + c + "' (ascii: " + std::to_string((int) c) + ")");
}
+
+ current_token_ = { .type = type, .token = token };
}
} // tyche
diff --git a/src/assembler/lexer.hh b/src/assembler/lexer.hh
index f5584d7..6dfd191 100644
--- a/src/assembler/lexer.hh
+++ b/src/assembler/lexer.hh
@@ -6,11 +6,13 @@
namespace tyche::as {
-enum class TokenType { BOF, Directive, Instruction, Number, Float, String, Enter, EOF_ };
+enum class TokenType { BOF, Directive, Instruction, Integer, Float, String, Enter, EOF_ };
struct Token {
TokenType type;
std::string token;
+
+ friend bool operator==(Token const& lhs, Token const& rhs) { return std::tie(lhs.type, lhs.token) == std::tie(rhs.type, rhs.token); }
};
class Lexer {
@@ -24,7 +26,7 @@ public:
private:
const std::string source_;
size_t pos_ = 0;
- Token look_ahead_;
+ Token current_token_ { TokenType::BOF, "" };
void ingest_next_token();
};
diff --git a/src/assembler/tests.cc b/src/assembler/tests.cc
index 0d0bec4..bfdf45a 100644
--- a/src/assembler/tests.cc
+++ b/src/assembler/tests.cc
@@ -10,6 +10,14 @@ using namespace tyche::as;
using namespace tyche::bc;
using namespace tyche::vm;
+TEST(Lexer, Lexer)
+{
+ Lexer lexer(".dir push 382 -12 3.14 -12.8 \"Hello\" \"Hel\"lo\"\n");
+
+ ASSERT_EQ(lexer.ingest(), (Token { TokenType::Directive, ".dir" }));
+ ASSERT_EQ(lexer.ingest(), (Token { TokenType::Instruction, "push" }));
+}
+
TEST(Assember, Assembler)
{
BytecodePrototype bp;