From 27e584ea8439965adf5d607e1324b0ea36554b1c Mon Sep 17 00:00:00 2001 From: Riyyi Date: Fri, 24 Mar 2023 21:33:24 +0100 Subject: [PATCH] Everywhere: Add Keyword parsing --- src/ast.cpp | 15 ++++++++++ src/ast.h | 53 +++++++++++++++++++++++++---------- src/lexer.cpp | 73 +++++++++++++++++++++++++++++++++++++++++-------- src/lexer.h | 47 +++---------------------------- src/printer.cpp | 4 +++ src/reader.cpp | 12 ++++++-- src/reader.h | 3 +- 7 files changed, 135 insertions(+), 72 deletions(-) diff --git a/src/ast.cpp b/src/ast.cpp index 61622d5..437106f 100644 --- a/src/ast.cpp +++ b/src/ast.cpp @@ -5,6 +5,7 @@ */ #include // int64_t +#include #include "ast.h" @@ -31,6 +32,13 @@ String::String(const std::string& data) // ----------------------------------------- +Keyword::Keyword(const std::string& data) + : m_data(data) +{ +} + +// ----------------------------------------- + Number::Number(int64_t number) : m_number(number) { @@ -43,4 +51,11 @@ Symbol::Symbol(const std::string& symbol) { } +// ----------------------------------------- + +Value::Value(const std::string& value) + : m_value(value) +{ +} + } // namespace blaze diff --git a/src/ast.h b/src/ast.h index 66840cd..f626cb7 100644 --- a/src/ast.h +++ b/src/ast.h @@ -28,9 +28,11 @@ public: virtual bool isHashMap() const { return false; } virtual bool isList() const { return false; } virtual bool isString() const { return false; } + virtual bool isKeyword() const { return false; } virtual bool isNumber() const { return false; } - virtual bool isSpecialSymbol() const { return false; } + virtual bool isValue() const { return false; } virtual bool isSymbol() const { return false; } + virtual bool isFunction() const { return false; } protected: ASTNode() {} @@ -109,6 +111,21 @@ private: // ----------------------------------------- +// :keyword +class Keyword final : public ASTNode { +public: + Keyword(const std::string& data); + virtual ~Keyword() = default; + + virtual bool isKeyword() const override { return true; } + + const std::string& keyword() const { return m_data; } + +private: + std::string m_data; +}; + +// ----------------------------------------- // 123 class Number final : public ASTNode { public: @@ -125,13 +142,15 @@ private: // ----------------------------------------- -// true, false, nil -class SpecialSymbol final : public ASTNode { +// Symbols +class Symbol final : public ASTNode { public: - SpecialSymbol(); - virtual ~SpecialSymbol(); + Symbol(const std::string& symbol); + virtual ~Symbol() = default; - virtual bool isSpecialSymbol() const override { return true; } + virtual bool isSymbol() const override { return true; } + + const std::string& symbol() const { return m_symbol; } private: std::string m_symbol; @@ -139,18 +158,19 @@ private: // ----------------------------------------- -// Other symbols -class Symbol final : public ASTNode { +// true, false, nil +class Value final : public ASTNode { public: - Symbol(const std::string& symbol); - virtual ~Symbol() = default; + Value(const std::string& value); + virtual ~Value() = default; - virtual bool isSymbol() const override { return true; } + virtual bool isValue() const override { return true; } - std::string symbol() const { return m_symbol; } + const std::string& value() const { return m_value; } private: - std::string m_symbol; + std::string m_value; +}; }; // ----------------------------------------- @@ -172,13 +192,16 @@ template<> inline bool ASTNode::fastIs() const { return isString(); } template<> -inline bool ASTNode::fastIs() const { return isNumber(); } +inline bool ASTNode::fastIs() const { return isKeyword(); } template<> -inline bool ASTNode::fastIs() const { return isSpecialSymbol(); } +inline bool ASTNode::fastIs() const { return isNumber(); } template<> inline bool ASTNode::fastIs() const { return isSymbol(); } + +template<> +inline bool ASTNode::fastIs() const { return isValue(); } // clang-format on } // namespace blaze diff --git a/src/lexer.cpp b/src/lexer.cpp index 5d15103..7bdec87 100644 --- a/src/lexer.cpp +++ b/src/lexer.cpp @@ -5,7 +5,7 @@ */ #include -#include +#include // std::to_string #include #include "ruc/format/print.h" @@ -73,6 +73,11 @@ void Lexer::tokenize() return; } break; + case ':': + if (!consumeKeyword()) { + return; + } + break; case ';': consumeComment(); break; @@ -162,14 +167,28 @@ bool Lexer::consumeString() return true; } -bool Lexer::consumeComment() +bool Lexer::consumeKeyword() { size_t column = m_column; - std::string comment = ""; + std::string keyword; + keyword += 0x7f; // 127 - ignore(); // ; + ignore(); // : static std::unordered_set exit = { + '[', + ']', + '{', + '}', + '(', + ')', + '\'', + '`', + ',', + '"', + ';', + ' ', + '\t', '\r', '\n', '\0', @@ -183,17 +202,13 @@ bool Lexer::consumeComment() break; } - comment += character; + keyword += character; ignore(); } - // Trim comment - comment.erase(comment.begin(), - std::find_if(comment.begin(), comment.end(), [](char c) { return !std::isspace(c); })); - comment.erase(std::find_if(comment.rbegin(), comment.rend(), [](char c) { return !std::isspace(c); }).base(), - comment.end()); + m_tokens.push_back({ Token::Type::Keyword, m_line, column, keyword }); - m_tokens.push_back({ Token::Type::Comment, m_line, column, comment }); + retreat(); return true; } @@ -241,6 +256,42 @@ bool Lexer::consumeValue() return true; } +bool Lexer::consumeComment() +{ + size_t column = m_column; + std::string comment = ""; + + ignore(); // ; + + static std::unordered_set exit = { + '\r', + '\n', + '\0', + }; + + char character = 0; + for (;;) { + character = peek(); + + if (exit.find(character) != exit.end()) { + break; + } + + comment += character; + ignore(); + } + + // Trim comment + comment.erase(comment.begin(), + std::find_if(comment.begin(), comment.end(), [](char c) { return !std::isspace(c); })); + comment.erase(std::find_if(comment.rbegin(), comment.rend(), [](char c) { return !std::isspace(c); }).base(), + comment.end()); + + m_tokens.push_back({ Token::Type::Comment, m_line, column, comment }); + + return true; +} + void Lexer::dump() const { print("tokens: {}\n", m_tokens.size()); diff --git a/src/lexer.h b/src/lexer.h index 2906ddd..68558e4 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -32,8 +32,9 @@ struct Token { Caret, // ^ At, // @ String, // "foobar" + Keyword, // :keyword + Value, // numbers, "true", "false", and "nil", symbols Comment, // ; - Value, // symbols, numbers, "true", "false", and "nil" Error, }; @@ -58,8 +59,9 @@ public: private: bool consumeSpliceUnquoteOrUnquote(); // ~@ or ~ bool consumeString(); - bool consumeComment(); + bool consumeKeyword(); bool consumeValue(); + bool consumeComment(); size_t m_column { 0 }; size_t m_line { 0 }; @@ -68,44 +70,3 @@ private: }; } // namespace blaze - -// ~^@ -// (+ 2 (* 3 4)) - -// Lexing -> creates tokens -// Parsing -> creates AST - -// class Thing1 { -// public: -// std::vector& numbers() { return m_numbers; } - -// private: -// std::vector m_numbers; -// }; - -// class Thing2 { -// public: -// std::vector&& numbers() { return std::move(m_numbers); } - -// private: -// std::vector m_numbers; -// }; - -// class OtherThing { -// public: -// OtherThing(std::vector&& numbers) noexcept -// : m_numbers(std::move(numbers)) -// { -// } - -// private: -// std::vector m_numbers; -// }; - -// int main() -// { -// Thing1 thing1; -// Thing2 thing2; -// OtherThing other_thing(std::move(thing1.numbers())); -// OtherThing other_thing2(thing2.numbers()); -// } diff --git a/src/printer.cpp b/src/printer.cpp index 7f13104..819e4ca 100644 --- a/src/printer.cpp +++ b/src/printer.cpp @@ -88,6 +88,10 @@ void Printer::dumpImpl(ASTNode* node) printSpacing(); print("{}", static_cast(node)->data()); } + else if (is(node)) { + printSpacing(); + print(":{}", static_cast(node)->keyword().substr(1)); + } else if (is(node)) { printSpacing(); print("{}", static_cast(node)->number()); diff --git a/src/reader.cpp b/src/reader.cpp index f9c7bd8..a6cccec 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -117,14 +117,17 @@ ASTNode* Reader::readImpl() case Token::Type::At: // @ return readDeref(); break; - case Token::Type::String: + case Token::Type::String: // "foobar" return readString(); break; + case Token::Type::Keyword: // :keyword + return readKeyword(); + break; case Token::Type::Comment: // ; ignore(); return nullptr; break; - case Token::Type::Value: + case Token::Type::Value: // true, false, nil return readValue(); break; default: @@ -301,6 +304,11 @@ ASTNode* Reader::readString() return new String(symbol); } +ASTNode* Reader::readKeyword() +{ + return new Keyword(consume().symbol); +} + ASTNode* Reader::readValue() { Token token = consume(); diff --git a/src/reader.h b/src/reader.h index d30b4a0..833e1ec 100644 --- a/src/reader.h +++ b/src/reader.h @@ -45,7 +45,8 @@ private: ASTNode* readWithMeta(); // ^ ASTNode* readDeref(); // @ ASTNode* readString(); // "foobar" - ASTNode* readValue(); + ASTNode* readKeyword(); // :keyword + ASTNode* readValue(); // true, false, nil void dumpImpl(ASTNode* node);