diff --git a/.idea/editor.xml b/.idea/editor.xml
index 43702aa..aa2a125 100644
--- a/.idea/editor.xml
+++ b/.idea/editor.xml
@@ -27,7 +27,6 @@
-
diff --git a/src/assembler/lexer.cc b/src/assembler/lexer.cc
index c4e734f..b581858 100644
--- a/src/assembler/lexer.cc
+++ b/src/assembler/lexer.cc
@@ -24,14 +24,17 @@ Token Lexer::ingest()
void Lexer::ingest_next_token()
{
+ if (pos_ >= source_.size()) {
+ current_token_ = { TokenType::EOF_, "" };
+ return;
+ }
+
char c = source_.at(pos_);
TokenType type {};
std::string token;
- if (pos_ >= source_.size()) {
- type = TokenType::EOF_;
- } else if (c == '.') {
+ if (c == '.') {
type = TokenType::Directive;
token += '.';
while (c = source_.at(++pos_), isalpha(c) || c == '_')
@@ -40,7 +43,7 @@ void Lexer::ingest_next_token()
type = TokenType::String;
++pos_;
while (true) {
- if (source_.at(pos_) == '\'') { // TODO - improve this for special characters
+ if (source_.at(pos_) == '\\') { // TODO - improve this for special characters
++pos_;
} else if (source_.at(pos_) == '"') {
++pos_;
@@ -69,10 +72,15 @@ void Lexer::ingest_next_token()
token += c;
} else if (c == '\n') {
type = TokenType::Enter;
- } else if (c != ' ' && c != '\t' && c != '\r') {
+ ++pos_;
+ } else {
throw AssemblyError(std::string("Unexpected character '") + c + "' (ascii: " + std::to_string((int) c) + ")");
}
+ // skip ignored tokens
+ while (pos_ < source_.size() && (source_.at(pos_) == ' ' || source_.at(pos_) == '\t' || source_.at(pos_) == '\r'))
+ ++pos_;
+
current_token_ = { .type = type, .token = token };
}
diff --git a/src/assembler/lexer.hh b/src/assembler/lexer.hh
index 6dfd191..0c70e04 100644
--- a/src/assembler/lexer.hh
+++ b/src/assembler/lexer.hh
@@ -17,7 +17,7 @@ struct Token {
class Lexer {
public:
- explicit Lexer(std::string source) : source_(std::move(source) + "\n") { reset(); }
+ explicit Lexer(std::string source) : source_(std::move(source)) { reset(); }
void reset();
[[nodiscard]] Token peek() const;
diff --git a/src/assembler/tests.cc b/src/assembler/tests.cc
index bfdf45a..c0e53f2 100644
--- a/src/assembler/tests.cc
+++ b/src/assembler/tests.cc
@@ -12,10 +12,23 @@ using namespace tyche::vm;
TEST(Lexer, Lexer)
{
- Lexer lexer(".dir push 382 -12 3.14 -12.8 \"Hello\" \"Hel\"lo\"\n");
+ Lexer lexer(".dir push 382 -12 3.14 -12.8 \"Hello\" \"Hel\\\"lo\"\n");
ASSERT_EQ(lexer.ingest(), (Token { TokenType::Directive, ".dir" }));
ASSERT_EQ(lexer.ingest(), (Token { TokenType::Instruction, "push" }));
+ ASSERT_EQ(lexer.ingest(), (Token { TokenType::Integer, "382" }));
+ ASSERT_EQ(lexer.ingest(), (Token { TokenType::Integer, "-12" }));
+ ASSERT_EQ(lexer.ingest(), (Token { TokenType::Float, "3.14" }));
+ ASSERT_EQ(lexer.ingest(), (Token { TokenType::Float, "-12.8" }));
+ ASSERT_EQ(lexer.ingest(), (Token { TokenType::String, "Hello" }));
+ ASSERT_EQ(lexer.ingest(), (Token { TokenType::String, "Hel\"lo" }));
+ ASSERT_EQ(lexer.ingest(), (Token { TokenType::Enter }));
+ ASSERT_EQ(lexer.ingest(), (Token { TokenType::EOF_ }));
+ ASSERT_EQ(lexer.ingest(), (Token { TokenType::EOF_ }));
+ ASSERT_EQ(lexer.ingest(), (Token { TokenType::EOF_ }));
+
+ lexer.reset();
+ ASSERT_EQ(lexer.ingest(), (Token { TokenType::Directive, ".dir" }));
}
TEST(Assember, Assembler)