diff --git a/src/ast.cpp b/src/ast.cpp new file mode 100644 index 0000000..5b14c23 --- /dev/null +++ b/src/ast.cpp @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2023 Riyyi + * + * SPDX-License-Identifier: MIT + */ + +#include // int64_t + +#include "ast.h" + +namespace blaze { + +List::~List() +{ + for (auto node : m_nodes) { + delete node; + } +} + +// ----------------------------------------- + +void List::addNode(ASTNode* node) +{ + m_nodes.push_back(node); +} + +// ----------------------------------------- + +String::String(const std::string& data) + : m_data(data) +{ +} + +// ----------------------------------------- + +Number::Number(int64_t number) + : m_number(number) +{ +} + +// ----------------------------------------- + +Symbol::Symbol(const std::string& symbol) + : m_symbol(symbol) +{ +} + +} // namespace blaze diff --git a/src/ast.h b/src/ast.h new file mode 100644 index 0000000..5c411c7 --- /dev/null +++ b/src/ast.h @@ -0,0 +1,168 @@ +/* + * Copyright (C) 2023 Riyyi + * + * SPDX-License-Identifier: MIT + */ + +#pragma once + +#include // int64_t +#include +#include +#include // typeid +#include + +namespace blaze { + +class ASTNode { +public: + virtual ~ASTNode() = default; + + std::string className() const { return typeid(*this).name(); } + + template + bool fastIs() const = delete; + + virtual bool isVector() const { return false; } + virtual bool isHashMap() const { return false; } + virtual bool isList() const { return false; } + virtual bool isString() const { return false; } + virtual bool isNumber() const { return false; } + virtual bool isSpecialSymbol() const { return false; } + virtual bool isSymbol() const { return false; } +}; + +// ----------------------------------------- + +// [] +class Vector final : public ASTNode { +public: + Vector(); + virtual ~Vector(); + + virtual bool isVector() const override { return true; } + +private: + std::vector m_nodes; +}; + +// ----------------------------------------- + +// {} +class HashMap final : public ASTNode { +public: + HashMap(); + virtual ~HashMap(); + + virtual bool isHashMap() const override { return true; } + +private: + std::vector m_nodes; +}; + +// ----------------------------------------- + +// () +class List final : public ASTNode { +public: + List() = default; + virtual ~List() override; + + virtual bool isList() const override { return true; } + + void addNode(ASTNode* node); + + const std::vector& nodes() const { return m_nodes; } + +private: + std::vector m_nodes; +}; + +// ----------------------------------------- + +// "string" +class String final : public ASTNode { +public: + String(const std::string& data); + virtual ~String() = default; + + virtual bool isString() const override { return true; } + + const std::string& data() const { return m_data; } + +private: + std::string m_data; +}; + +// ----------------------------------------- + +// 123 +class Number final : public ASTNode { +public: + Number(int64_t number); + virtual ~Number() = default; + + virtual bool isNumber() const override { return true; } + + int64_t number() const { return m_number; } + +private: + int64_t m_number { 0 }; +}; + +// ----------------------------------------- + +// true, false, nil +class SpecialSymbol final : public ASTNode { +public: + SpecialSymbol(); + virtual ~SpecialSymbol(); + + virtual bool isSpecialSymbol() const override { return true; } + +private: + std::string m_symbol; +}; + +// ----------------------------------------- + +// Other symbols +class Symbol final : public ASTNode { +public: + Symbol(const std::string& symbol); + virtual ~Symbol() = default; + + virtual bool isSymbol() const override { return true; } + + std::string symbol() const { return m_symbol; } + +private: + std::string m_symbol; +}; + +// ----------------------------------------- + +// clang-format off +template<> +inline bool ASTNode::fastIs() const { return isVector(); } + +template<> +inline bool ASTNode::fastIs() const { return isHashMap(); } + +template<> +inline bool ASTNode::fastIs() const { return isList(); } + +template<> +inline bool ASTNode::fastIs() const { return isString(); } + +template<> +inline bool ASTNode::fastIs() const { return isNumber(); } + +template<> +inline bool ASTNode::fastIs() const { return isSpecialSymbol(); } + +template<> +inline bool ASTNode::fastIs() const { return isSymbol(); } +// clang-format on + +} // namespace blaze diff --git a/src/lexer.cpp b/src/lexer.cpp new file mode 100644 index 0000000..8a8e1ff --- /dev/null +++ b/src/lexer.cpp @@ -0,0 +1,248 @@ +/* + * Copyright (C) 2023 Riyyi + * + * SPDX-License-Identifier: MIT + */ + +#include +#include +#include + +#include "ruc/format/print.h" +#include "ruc/genericlexer.h" + +#include "lexer.h" + +namespace blaze { + +Lexer::Lexer(std::string_view input) + : ruc::GenericLexer(input) +{ +} + +Lexer::~Lexer() +{ +} + +// ----------------------------------------- + +void Lexer::tokenize() +{ + if (m_tokens.size() != 0) { + return; + } + + while (m_index < m_input.length()) { + switch (peek()) { + case '~': // ~@ or ~ + consumeSpliceUnquoteOrUnquote(); + break; + case '[': + m_tokens.push_back({ Token::Type::ParenOpen, m_line, m_column, "[" }); + break; + case ']': + m_tokens.push_back({ Token::Type::ParenClose, m_line, m_column, "]" }); + break; + case '{': + m_tokens.push_back({ Token::Type::BraceOpen, m_line, m_column, "{" }); + break; + case '}': + m_tokens.push_back({ Token::Type::BraceClose, m_line, m_column, "}" }); + break; + case '(': + m_tokens.push_back({ Token::Type::ParenOpen, m_line, m_column, "(" }); + break; + case ')': + m_tokens.push_back({ Token::Type::ParenClose, m_line, m_column, ")" }); + break; + case '\'': + m_tokens.push_back({ Token::Type::ParenClose, m_line, m_column, "'" }); + break; + case '`': + m_tokens.push_back({ Token::Type::ParenClose, m_line, m_column, "`" }); + break; + case '^': + m_tokens.push_back({ Token::Type::ParenClose, m_line, m_column, "^" }); + break; + case '@': + m_tokens.push_back({ Token::Type::ParenClose, m_line, m_column, "@" }); + break; + case '"': + if (!consumeString()) { + return; + } + break; + case ';': + consumeComment(); + break; + case ' ': + case '\t': + case ',': + break; + case '\r': + if (peek(1) == '\n') { // CRLF \r\n + break; + } + m_column = -1; + m_line++; + break; + case '\n': + m_column = -1; + m_line++; + break; + default: + consumeValue(); + break; + } + + ignore(); + m_column++; + } +} + +bool Lexer::consumeSpliceUnquoteOrUnquote() +{ + size_t column = m_column; + + ignore(); // ~ + if (peek() == '@') { + m_tokens.push_back({ Token::Type::Special, m_line, column, "~@" }); + } + else { + m_tokens.push_back({ Token::Type::Tilde, m_line, column, "~" }); + } + + return true; +} + +bool Lexer::consumeString() +{ + size_t column = m_column; + std::string text = ""; + + static std::unordered_set exit = { + '"', + '\r', + '\n', + '\0', + }; + + bool escape = false; + char character = consume(); + for (;;) { + character = peek(); + + if (!escape && character == '\\') { + text += '\\'; + ignore(); + escape = true; + continue; + } + + if (!escape && exit.find(character) != exit.end()) { + break; + } + + text += character; + ignore(); + + if (escape) { + escape = false; + } + } + + m_tokens.push_back({ Token::Type::String, m_line, column, text }); + + return true; +} + +bool Lexer::consumeComment() +{ + size_t column = m_column; + std::string comment = ""; + + ignore(); // ; + + static std::unordered_set exit = { + '\r', + '\n', + '\0', + }; + + char character = 0; + for (;;) { + character = peek(); + + if (exit.find(character) != exit.end()) { + break; + } + + comment += character; + ignore(); + } + + // Trim comment + comment.erase(comment.begin(), + std::find_if(comment.begin(), comment.end(), [](char c) { return !std::isspace(c); })); + comment.erase(std::find_if(comment.rbegin(), comment.rend(), [](char c) { return !std::isspace(c); }).base(), + comment.end()); + + m_tokens.push_back({ Token::Type::Comment, m_line, column, comment }); + + return true; +} + +bool Lexer::consumeValue() +{ + size_t column = m_column; + std::string value = ""; + + static std::unordered_set exit = { + '[', + ']', + '{', + '}', + '(', + ')', + '\'', + '`', + ',', + '"', + ';', + ' ', + '\t', + '\r', + '\n', + '\0', + }; + + char character = 0; + for (;;) { + character = peek(); + + if (exit.find(character) != exit.end()) { + break; + } + + value += character; + ignore(); + } + + m_tokens.push_back({ Token::Type::Value, m_line, column, value }); + + retreat(); + + return true; +} + +void Lexer::dump() const +{ + print("tokens: {}\n", m_tokens.size()); + print("\""); + for (auto& token : m_tokens) { + print("{}", token.symbol); + } + print("\"\n"); +} + +} // namespace blaze diff --git a/src/lexer.h b/src/lexer.h new file mode 100644 index 0000000..96f05af --- /dev/null +++ b/src/lexer.h @@ -0,0 +1,110 @@ +/* + * Copyright (C) 2023 Riyyi + * + * SPDX-License-Identifier: MIT + */ + +#pragma once + +#include // size_t +#include // uint8_t +#include +#include + +#include "ruc/format/print.h" +#include "ruc/genericlexer.h" + +namespace blaze { + +struct Token { + enum class Type : uint8_t { + None, + Special, // ~@ + BracketOpen, // [ + BracketClose, // ] + BraceOpen, // { + BraceClose, // } + ParenOpen, // ( + ParenClose, // ) + Quote, // ' + Backtick, // ` + Tilde, // ~ + Caret, // ^ + At, // @ + String, // "foobar" + Comment, // ; + Value, // symbols, numbers, "true", "false", and "nil" + }; + + Type type { Type::None }; + size_t column { 0 }; + size_t line { 0 }; + std::string symbol; +}; + +// Lexical analyzer -> tokenizes +class Lexer final : public ruc::GenericLexer { +public: + Lexer(std::string_view input); + virtual ~Lexer(); + + void tokenize(); + + void dump() const; + + std::vector& tokens() { return m_tokens; } + +private: + bool consumeSpliceUnquoteOrUnquote(); // ~@ or ~ + bool consumeString(); + bool consumeComment(); + bool consumeValue(); + + size_t m_column { 0 }; + size_t m_line { 0 }; + + std::vector m_tokens; +}; + +} // namespace blaze + +// ~^@ +// (+ 2 (* 3 4)) + +// Lexing -> creates tokens +// Parsing -> creates AST + +// class Thing1 { +// public: +// std::vector& numbers() { return m_numbers; } + +// private: +// std::vector m_numbers; +// }; + +// class Thing2 { +// public: +// std::vector&& numbers() { return std::move(m_numbers); } + +// private: +// std::vector m_numbers; +// }; + +// class OtherThing { +// public: +// OtherThing(std::vector&& numbers) noexcept +// : m_numbers(std::move(numbers)) +// { +// } + +// private: +// std::vector m_numbers; +// }; + +// int main() +// { +// Thing1 thing1; +// Thing2 thing2; +// OtherThing other_thing(std::move(thing1.numbers())); +// OtherThing other_thing2(thing2.numbers()); +// } diff --git a/src/printer.cpp b/src/printer.cpp new file mode 100644 index 0000000..c5e57b6 --- /dev/null +++ b/src/printer.cpp @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2023 Riyyi + * + * SPDX-License-Identifier: MIT + */ + +#include "ruc/format/print.h" + +#include "printer.h" +#include "types.h" + +namespace blaze { + +Printer::Printer(ASTNode* node) + : m_node(node) +{ +} + +Printer::~Printer() +{ + delete m_node; +} + +// ----------------------------------------- + +void Printer::dump() +{ + if (m_node == nullptr) { + return; + } + + dumpImpl(m_node); + print("\n"); +} + +void Printer::dumpImpl(ASTNode* node) +{ + auto printSpacing = [this]() { + if (!m_firstNode && !m_previousNodeIsList) { + print(" "); + } + }; + + if (is(node)) { + printSpacing(); + print("("); + m_firstNode = false; + m_previousNodeIsList = true; + List* list = static_cast(node); + for (size_t i = 0; i < list->nodes().size(); ++i) { + dumpImpl(list->nodes()[i]); + m_previousNodeIsList = false; + } + print(")"); + } + else if (is(node)) { + printSpacing(); + print("\"{}\"", static_cast(node)->data()); + } + else if (is(node)) { + printSpacing(); + print("{}", static_cast(node)->number()); + } + else if (is(node)) { + printSpacing(); + print("{}", static_cast(node)->symbol()); + } +} + +} // namespace blaze diff --git a/src/printer.h b/src/printer.h new file mode 100644 index 0000000..7039907 --- /dev/null +++ b/src/printer.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2023 Riyyi + * + * SPDX-License-Identifier: MIT + */ + +#pragma once + +#include "ast.h" + +namespace blaze { + +// Serializer -> return to string +class Printer { +public: + Printer(ASTNode* node); + virtual ~Printer(); + + void dump(); + +private: + void dumpImpl(ASTNode* node); + + bool m_firstNode { true }; + bool m_previousNodeIsList { false }; + ASTNode* m_node { nullptr }; +}; + +} // namespace blaze diff --git a/src/reader.cpp b/src/reader.cpp new file mode 100644 index 0000000..0830041 --- /dev/null +++ b/src/reader.cpp @@ -0,0 +1,161 @@ +/* + * Copyright (C) 2023 Riyyi + * + * SPDX-License-Identifier: MIT + */ + +#include // size_t +#include // uint64_t +#include // std::strtoll +#include // std::move + +#include "ruc/format/color.h" +#include "ruc/meta/assert.h" + +#include "ast.h" +#include "reader.h" +#include "types.h" + +namespace blaze { + +Reader::Reader(std::vector&& tokens) noexcept + : m_tokens(std::move(tokens)) +{ +} + +Reader::~Reader() +{ +} + +// ----------------------------------------- + +void Reader::read() +{ + if (m_node != nullptr) { + return; + } + + m_node = readImpl(); + VERIFY(m_index > m_tokens.size() - 1, "more than one sexp in input"); +} + +ASTNode* Reader::readImpl() +{ + switch (peek().type) { + case Token::Type::ParenOpen: + return readList(); + break; + case Token::Type::String: + return readString(); + break; + case Token::Type::Value: + return readValue(); + default: + // Unimplemented token + VERIFY_NOT_REACHED(); + return nullptr; + } +} + +ASTNode* Reader::readList() +{ + ignore(); // ( + + List* list = new List(); + while (m_index < m_tokens.size() && peek().type != Token::Type::ParenClose) { + list->addNode(readImpl()); + } + + VERIFY(m_index != m_tokens.size(), "missing closing ')'"); + + ignore(); // ) + + return list; +} + +ASTNode* Reader::readString() +{ + Token token = consume(); + return new String(token.symbol); +} + +ASTNode* Reader::readValue() +{ + Token token = consume(); + char* endPtr = nullptr; + int64_t result = std::strtoll(token.symbol.c_str(), &endPtr, 10); + if (endPtr == token.symbol.c_str() + token.symbol.size()) { + return new Number(result); + } + + return new Symbol(token.symbol); +} + +// ----------------------------------------- + +bool Reader::isEOF() const +{ + return m_index >= m_tokens.size(); +} + +Token Reader::peek() const +{ + VERIFY(!isEOF()); + return m_tokens[m_index]; +} + +Token Reader::consume() +{ + VERIFY(!isEOF()); + return m_tokens[m_index++]; +} + +void Reader::ignore() +{ + m_index++; +} + +// ----------------------------------------- + +void Reader::dump() +{ + dumpImpl(m_node); +} + +void Reader::dumpImpl(ASTNode* node) +{ + std::string indentation = std::string(m_indentation * 2, ' '); + + if (is(node)) { + List* list = static_cast(node); + print("{}", indentation); + print(fg(ruc::format::TerminalColor::Blue), "ListContainer"); + print(" <"); + print(fg(ruc::format::TerminalColor::Blue), "()"); + print(">\n"); + m_indentation++; + for (size_t i = 0; i < list->nodes().size(); ++i) { + dumpImpl(list->nodes()[i]); + } + m_indentation--; + return; + } + else if (is(node)) { + print("{}", indentation); + print(fg(ruc::format::TerminalColor::Yellow), "StringNode"); + print(" <{}>", static_cast(node)->data()); + } + else if (is(node)) { + print("{}", indentation); + print(fg(ruc::format::TerminalColor::Yellow), "NumberNode"); + print(" <{}>", static_cast(node)->number()); + } + else if (is(node)) { + print("{}", indentation); + print(fg(ruc::format::TerminalColor::Yellow), "SymbolNode"); + print(" <{}>", static_cast(node)->symbol()); + } + print("\n"); +} + +} // namespace blaze diff --git a/src/reader.h b/src/reader.h new file mode 100644 index 0000000..13a96a9 --- /dev/null +++ b/src/reader.h @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2023 Riyyi + * + * SPDX-License-Identifier: MIT + */ + +#pragma once + +#include // size_t +#include + +#include "ast.h" +#include "lexer.h" + +namespace blaze { + +// Parsing -> creates AST +class Reader { +public: + Reader(std::vector&& tokens) noexcept; + virtual ~Reader(); + + void read(); + + void dump(); + + ASTNode* node() { return m_node; } + +private: + bool isEOF() const; + Token peek() const; + Token consume(); + void ignore(); + + ASTNode* readImpl(); + ASTNode* readList(); + ASTNode* readString(); + ASTNode* readValue(); + + void dumpImpl(ASTNode* node); + + size_t m_index { 0 }; + size_t m_indentation { 0 }; + std::vector m_tokens; + + ASTNode* m_node { nullptr }; +}; + +} // namespace blaze diff --git a/src/step0_repl.cpp b/src/step0_repl.cpp new file mode 100644 index 0000000..3861738 --- /dev/null +++ b/src/step0_repl.cpp @@ -0,0 +1,44 @@ +#include +#include // std::cin +#include // std::getline +#include + +#if 0 +auto read(std::string_view data) -> std::string_view +{ + return data; +} + +auto eval(std::string_view data) -> std::string_view +{ + return data; +} + +auto print(std::string_view data) -> void +{ + printf("%s\n", data.data()); +} + +auto rep(std::string_view data) -> void +{ + print(eval(read(data))); +} + +auto main() -> int +{ + while (true) { + printf("user> "); + std::string line; + std::getline(std::cin, line); + + // Exit with Ctrl-D + if (std::cin.eof() || std::cin.fail()) { + break; + } + + rep(line); + } + + return 0; +} +#endif diff --git a/src/step1_read_print.cpp b/src/step1_read_print.cpp new file mode 100644 index 0000000..d9a9a98 --- /dev/null +++ b/src/step1_read_print.cpp @@ -0,0 +1,57 @@ +#include +#include // std::cin +#include // std::getline +#include + +#include "ast.h" +#include "lexer.h" +#include "printer.h" +#include "reader.h" + +#if 1 +auto read(std::string_view data) -> blaze::ASTNode* +{ + blaze::Lexer lexer(data); + lexer.tokenize(); + // lexer.dump(); + blaze::Reader reader(std::move(lexer.tokens())); + reader.read(); + // reader.dump(); + + return reader.node(); +} + +auto eval(blaze::ASTNode* node) -> blaze::ASTNode* +{ + return node; +} + +auto print(blaze::ASTNode* node) -> void +{ + blaze::Printer printer(node); + printer.dump(); +} + +auto rep(std::string_view data) -> void +{ + print(eval(read(data))); +} + +auto main() -> int +{ + while (true) { + printf("user> "); + std::string line; + std::getline(std::cin, line); + + // Exit with Ctrl-D + if (std::cin.eof() || std::cin.fail()) { + break; + } + + rep(line); + } + + return 0; +} +#endif diff --git a/src/types.h b/src/types.h new file mode 100644 index 0000000..62bd974 --- /dev/null +++ b/src/types.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2023 Riyyi + * + * SPDX-License-Identifier: MIT + */ + +#pragma once + +#include + +template +inline bool is(U& input) +{ + if constexpr (requires { input.template fastIs(); }) { + return input.template fastIs(); + } + + return typeid(input) == typeid(T); +} + +template +inline bool is(U* input) +{ + return input && is(*input); +} + +// serenity/AK/TypeCasts.h +// serenity/Userland/Libraries/LibJS/AST.h