Browse Source

Lisp: Add source

master
Riyyi 1 year ago
parent
commit
46e037e39e
  1. 48
      src/ast.cpp
  2. 168
      src/ast.h
  3. 248
      src/lexer.cpp
  4. 110
      src/lexer.h
  5. 70
      src/printer.cpp
  6. 29
      src/printer.h
  7. 161
      src/reader.cpp
  8. 49
      src/reader.h
  9. 44
      src/step0_repl.cpp
  10. 57
      src/step1_read_print.cpp
  11. 28
      src/types.h

48
src/ast.cpp

@ -0,0 +1,48 @@
/*
* Copyright (C) 2023 Riyyi
*
* SPDX-License-Identifier: MIT
*/
#include <cstdint> // int64_t
#include "ast.h"
namespace blaze {
List::~List()
{
for (auto node : m_nodes) {
delete node;
}
}
// -----------------------------------------
void List::addNode(ASTNode* node)
{
m_nodes.push_back(node);
}
// -----------------------------------------
String::String(const std::string& data)
: m_data(data)
{
}
// -----------------------------------------
Number::Number(int64_t number)
: m_number(number)
{
}
// -----------------------------------------
Symbol::Symbol(const std::string& symbol)
: m_symbol(symbol)
{
}
} // namespace blaze

168
src/ast.h

@ -0,0 +1,168 @@
/*
* Copyright (C) 2023 Riyyi
*
* SPDX-License-Identifier: MIT
*/
#pragma once
#include <cstdint> // int64_t
#include <string>
#include <string_view>
#include <typeinfo> // typeid
#include <vector>
namespace blaze {
class ASTNode {
public:
virtual ~ASTNode() = default;
std::string className() const { return typeid(*this).name(); }
template<typename T>
bool fastIs() const = delete;
virtual bool isVector() const { return false; }
virtual bool isHashMap() const { return false; }
virtual bool isList() const { return false; }
virtual bool isString() const { return false; }
virtual bool isNumber() const { return false; }
virtual bool isSpecialSymbol() const { return false; }
virtual bool isSymbol() const { return false; }
};
// -----------------------------------------
// []
class Vector final : public ASTNode {
public:
Vector();
virtual ~Vector();
virtual bool isVector() const override { return true; }
private:
std::vector<ASTNode*> m_nodes;
};
// -----------------------------------------
// {}
class HashMap final : public ASTNode {
public:
HashMap();
virtual ~HashMap();
virtual bool isHashMap() const override { return true; }
private:
std::vector<ASTNode*> m_nodes;
};
// -----------------------------------------
// ()
class List final : public ASTNode {
public:
List() = default;
virtual ~List() override;
virtual bool isList() const override { return true; }
void addNode(ASTNode* node);
const std::vector<ASTNode*>& nodes() const { return m_nodes; }
private:
std::vector<ASTNode*> m_nodes;
};
// -----------------------------------------
// "string"
class String final : public ASTNode {
public:
String(const std::string& data);
virtual ~String() = default;
virtual bool isString() const override { return true; }
const std::string& data() const { return m_data; }
private:
std::string m_data;
};
// -----------------------------------------
// 123
class Number final : public ASTNode {
public:
Number(int64_t number);
virtual ~Number() = default;
virtual bool isNumber() const override { return true; }
int64_t number() const { return m_number; }
private:
int64_t m_number { 0 };
};
// -----------------------------------------
// true, false, nil
class SpecialSymbol final : public ASTNode {
public:
SpecialSymbol();
virtual ~SpecialSymbol();
virtual bool isSpecialSymbol() const override { return true; }
private:
std::string m_symbol;
};
// -----------------------------------------
// Other symbols
class Symbol final : public ASTNode {
public:
Symbol(const std::string& symbol);
virtual ~Symbol() = default;
virtual bool isSymbol() const override { return true; }
std::string symbol() const { return m_symbol; }
private:
std::string m_symbol;
};
// -----------------------------------------
// clang-format off
template<>
inline bool ASTNode::fastIs<Vector>() const { return isVector(); }
template<>
inline bool ASTNode::fastIs<HashMap>() const { return isHashMap(); }
template<>
inline bool ASTNode::fastIs<List>() const { return isList(); }
template<>
inline bool ASTNode::fastIs<String>() const { return isString(); }
template<>
inline bool ASTNode::fastIs<Number>() const { return isNumber(); }
template<>
inline bool ASTNode::fastIs<SpecialSymbol>() const { return isSpecialSymbol(); }
template<>
inline bool ASTNode::fastIs<Symbol>() const { return isSymbol(); }
// clang-format on
} // namespace blaze

248
src/lexer.cpp

@ -0,0 +1,248 @@
/*
* Copyright (C) 2023 Riyyi
*
* SPDX-License-Identifier: MIT
*/
#include <algorithm>
#include <string>
#include <unordered_set>
#include "ruc/format/print.h"
#include "ruc/genericlexer.h"
#include "lexer.h"
namespace blaze {
Lexer::Lexer(std::string_view input)
: ruc::GenericLexer(input)
{
}
Lexer::~Lexer()
{
}
// -----------------------------------------
void Lexer::tokenize()
{
if (m_tokens.size() != 0) {
return;
}
while (m_index < m_input.length()) {
switch (peek()) {
case '~': // ~@ or ~
consumeSpliceUnquoteOrUnquote();
break;
case '[':
m_tokens.push_back({ Token::Type::ParenOpen, m_line, m_column, "[" });
break;
case ']':
m_tokens.push_back({ Token::Type::ParenClose, m_line, m_column, "]" });
break;
case '{':
m_tokens.push_back({ Token::Type::BraceOpen, m_line, m_column, "{" });
break;
case '}':
m_tokens.push_back({ Token::Type::BraceClose, m_line, m_column, "}" });
break;
case '(':
m_tokens.push_back({ Token::Type::ParenOpen, m_line, m_column, "(" });
break;
case ')':
m_tokens.push_back({ Token::Type::ParenClose, m_line, m_column, ")" });
break;
case '\'':
m_tokens.push_back({ Token::Type::ParenClose, m_line, m_column, "'" });
break;
case '`':
m_tokens.push_back({ Token::Type::ParenClose, m_line, m_column, "`" });
break;
case '^':
m_tokens.push_back({ Token::Type::ParenClose, m_line, m_column, "^" });
break;
case '@':
m_tokens.push_back({ Token::Type::ParenClose, m_line, m_column, "@" });
break;
case '"':
if (!consumeString()) {
return;
}
break;
case ';':
consumeComment();
break;
case ' ':
case '\t':
case ',':
break;
case '\r':
if (peek(1) == '\n') { // CRLF \r\n
break;
}
m_column = -1;
m_line++;
break;
case '\n':
m_column = -1;
m_line++;
break;
default:
consumeValue();
break;
}
ignore();
m_column++;
}
}
bool Lexer::consumeSpliceUnquoteOrUnquote()
{
size_t column = m_column;
ignore(); // ~
if (peek() == '@') {
m_tokens.push_back({ Token::Type::Special, m_line, column, "~@" });
}
else {
m_tokens.push_back({ Token::Type::Tilde, m_line, column, "~" });
}
return true;
}
bool Lexer::consumeString()
{
size_t column = m_column;
std::string text = "";
static std::unordered_set<char> exit = {
'"',
'\r',
'\n',
'\0',
};
bool escape = false;
char character = consume();
for (;;) {
character = peek();
if (!escape && character == '\\') {
text += '\\';
ignore();
escape = true;
continue;
}
if (!escape && exit.find(character) != exit.end()) {
break;
}
text += character;
ignore();
if (escape) {
escape = false;
}
}
m_tokens.push_back({ Token::Type::String, m_line, column, text });
return true;
}
bool Lexer::consumeComment()
{
size_t column = m_column;
std::string comment = "";
ignore(); // ;
static std::unordered_set<char> exit = {
'\r',
'\n',
'\0',
};
char character = 0;
for (;;) {
character = peek();
if (exit.find(character) != exit.end()) {
break;
}
comment += character;
ignore();
}
// Trim comment
comment.erase(comment.begin(),
std::find_if(comment.begin(), comment.end(), [](char c) { return !std::isspace(c); }));
comment.erase(std::find_if(comment.rbegin(), comment.rend(), [](char c) { return !std::isspace(c); }).base(),
comment.end());
m_tokens.push_back({ Token::Type::Comment, m_line, column, comment });
return true;
}
bool Lexer::consumeValue()
{
size_t column = m_column;
std::string value = "";
static std::unordered_set<char> exit = {
'[',
']',
'{',
'}',
'(',
')',
'\'',
'`',
',',
'"',
';',
' ',
'\t',
'\r',
'\n',
'\0',
};
char character = 0;
for (;;) {
character = peek();
if (exit.find(character) != exit.end()) {
break;
}
value += character;
ignore();
}
m_tokens.push_back({ Token::Type::Value, m_line, column, value });
retreat();
return true;
}
void Lexer::dump() const
{
print("tokens: {}\n", m_tokens.size());
print("\"");
for (auto& token : m_tokens) {
print("{}", token.symbol);
}
print("\"\n");
}
} // namespace blaze

110
src/lexer.h

@ -0,0 +1,110 @@
/*
* Copyright (C) 2023 Riyyi
*
* SPDX-License-Identifier: MIT
*/
#pragma once
#include <cstddef> // size_t
#include <cstdint> // uint8_t
#include <string>
#include <vector>
#include "ruc/format/print.h"
#include "ruc/genericlexer.h"
namespace blaze {
struct Token {
enum class Type : uint8_t {
None,
Special, // ~@
BracketOpen, // [
BracketClose, // ]
BraceOpen, // {
BraceClose, // }
ParenOpen, // (
ParenClose, // )
Quote, // '
Backtick, // `
Tilde, // ~
Caret, // ^
At, // @
String, // "foobar"
Comment, // ;
Value, // symbols, numbers, "true", "false", and "nil"
};
Type type { Type::None };
size_t column { 0 };
size_t line { 0 };
std::string symbol;
};
// Lexical analyzer -> tokenizes
class Lexer final : public ruc::GenericLexer {
public:
Lexer(std::string_view input);
virtual ~Lexer();
void tokenize();
void dump() const;
std::vector<Token>& tokens() { return m_tokens; }
private:
bool consumeSpliceUnquoteOrUnquote(); // ~@ or ~
bool consumeString();
bool consumeComment();
bool consumeValue();
size_t m_column { 0 };
size_t m_line { 0 };
std::vector<Token> m_tokens;
};
} // namespace blaze
// ~^@
// (+ 2 (* 3 4))
// Lexing -> creates tokens
// Parsing -> creates AST
// class Thing1 {
// public:
// std::vector<int>& numbers() { return m_numbers; }
// private:
// std::vector<int> m_numbers;
// };
// class Thing2 {
// public:
// std::vector<int>&& numbers() { return std::move(m_numbers); }
// private:
// std::vector<int> m_numbers;
// };
// class OtherThing {
// public:
// OtherThing(std::vector<int>&& numbers) noexcept
// : m_numbers(std::move(numbers))
// {
// }
// private:
// std::vector<int> m_numbers;
// };
// int main()
// {
// Thing1 thing1;
// Thing2 thing2;
// OtherThing other_thing(std::move(thing1.numbers()));
// OtherThing other_thing2(thing2.numbers());
// }

70
src/printer.cpp

@ -0,0 +1,70 @@
/*
* Copyright (C) 2023 Riyyi
*
* SPDX-License-Identifier: MIT
*/
#include "ruc/format/print.h"
#include "printer.h"
#include "types.h"
namespace blaze {
Printer::Printer(ASTNode* node)
: m_node(node)
{
}
Printer::~Printer()
{
delete m_node;
}
// -----------------------------------------
void Printer::dump()
{
if (m_node == nullptr) {
return;
}
dumpImpl(m_node);
print("\n");
}
void Printer::dumpImpl(ASTNode* node)
{
auto printSpacing = [this]() {
if (!m_firstNode && !m_previousNodeIsList) {
print(" ");
}
};
if (is<List>(node)) {
printSpacing();
print("(");
m_firstNode = false;
m_previousNodeIsList = true;
List* list = static_cast<List*>(node);
for (size_t i = 0; i < list->nodes().size(); ++i) {
dumpImpl(list->nodes()[i]);
m_previousNodeIsList = false;
}
print(")");
}
else if (is<String>(node)) {
printSpacing();
print("\"{}\"", static_cast<String*>(node)->data());
}
else if (is<Number>(node)) {
printSpacing();
print("{}", static_cast<Number*>(node)->number());
}
else if (is<Symbol>(node)) {
printSpacing();
print("{}", static_cast<Symbol*>(node)->symbol());
}
}
} // namespace blaze

29
src/printer.h

@ -0,0 +1,29 @@
/*
* Copyright (C) 2023 Riyyi
*
* SPDX-License-Identifier: MIT
*/
#pragma once
#include "ast.h"
namespace blaze {
// Serializer -> return to string
class Printer {
public:
Printer(ASTNode* node);
virtual ~Printer();
void dump();
private:
void dumpImpl(ASTNode* node);
bool m_firstNode { true };
bool m_previousNodeIsList { false };
ASTNode* m_node { nullptr };
};
} // namespace blaze

161
src/reader.cpp

@ -0,0 +1,161 @@
/*
* Copyright (C) 2023 Riyyi
*
* SPDX-License-Identifier: MIT
*/
#include <cstddef> // size_t
#include <cstdint> // uint64_t
#include <cstdlib> // std::strtoll
#include <utility> // std::move
#include "ruc/format/color.h"
#include "ruc/meta/assert.h"
#include "ast.h"
#include "reader.h"
#include "types.h"
namespace blaze {
Reader::Reader(std::vector<Token>&& tokens) noexcept
: m_tokens(std::move(tokens))
{
}
Reader::~Reader()
{
}
// -----------------------------------------
void Reader::read()
{
if (m_node != nullptr) {
return;
}
m_node = readImpl();
VERIFY(m_index > m_tokens.size() - 1, "more than one sexp in input");
}
ASTNode* Reader::readImpl()
{
switch (peek().type) {
case Token::Type::ParenOpen:
return readList();
break;
case Token::Type::String:
return readString();
break;
case Token::Type::Value:
return readValue();
default:
// Unimplemented token
VERIFY_NOT_REACHED();
return nullptr;
}
}
ASTNode* Reader::readList()
{
ignore(); // (
List* list = new List();
while (m_index < m_tokens.size() && peek().type != Token::Type::ParenClose) {
list->addNode(readImpl());
}
VERIFY(m_index != m_tokens.size(), "missing closing ')'");
ignore(); // )
return list;
}
ASTNode* Reader::readString()
{
Token token = consume();
return new String(token.symbol);
}
ASTNode* Reader::readValue()
{
Token token = consume();
char* endPtr = nullptr;
int64_t result = std::strtoll(token.symbol.c_str(), &endPtr, 10);
if (endPtr == token.symbol.c_str() + token.symbol.size()) {
return new Number(result);
}
return new Symbol(token.symbol);
}
// -----------------------------------------
bool Reader::isEOF() const
{
return m_index >= m_tokens.size();
}
Token Reader::peek() const
{
VERIFY(!isEOF());
return m_tokens[m_index];
}
Token Reader::consume()
{
VERIFY(!isEOF());
return m_tokens[m_index++];
}
void Reader::ignore()
{
m_index++;
}
// -----------------------------------------
void Reader::dump()
{
dumpImpl(m_node);
}
void Reader::dumpImpl(ASTNode* node)
{
std::string indentation = std::string(m_indentation * 2, ' ');
if (is<List>(node)) {
List* list = static_cast<List*>(node);
print("{}", indentation);
print(fg(ruc::format::TerminalColor::Blue), "ListContainer");
print(" <");
print(fg(ruc::format::TerminalColor::Blue), "()");
print(">\n");
m_indentation++;
for (size_t i = 0; i < list->nodes().size(); ++i) {
dumpImpl(list->nodes()[i]);
}
m_indentation--;
return;
}
else if (is<String>(node)) {
print("{}", indentation);
print(fg(ruc::format::TerminalColor::Yellow), "StringNode");
print(" <{}>", static_cast<String*>(node)->data());
}
else if (is<Number>(node)) {
print("{}", indentation);
print(fg(ruc::format::TerminalColor::Yellow), "NumberNode");
print(" <{}>", static_cast<Number*>(node)->number());
}
else if (is<Symbol>(node)) {
print("{}", indentation);
print(fg(ruc::format::TerminalColor::Yellow), "SymbolNode");
print(" <{}>", static_cast<Symbol*>(node)->symbol());
}
print("\n");
}
} // namespace blaze

49
src/reader.h

@ -0,0 +1,49 @@
/*
* Copyright (C) 2023 Riyyi
*
* SPDX-License-Identifier: MIT
*/
#pragma once
#include <cstddef> // size_t
#include <vector>
#include "ast.h"
#include "lexer.h"
namespace blaze {
// Parsing -> creates AST
class Reader {
public:
Reader(std::vector<Token>&& tokens) noexcept;
virtual ~Reader();
void read();
void dump();
ASTNode* node() { return m_node; }
private:
bool isEOF() const;
Token peek() const;
Token consume();
void ignore();
ASTNode* readImpl();
ASTNode* readList();
ASTNode* readString();
ASTNode* readValue();
void dumpImpl(ASTNode* node);
size_t m_index { 0 };
size_t m_indentation { 0 };
std::vector<Token> m_tokens;
ASTNode* m_node { nullptr };
};
} // namespace blaze

44
src/step0_repl.cpp

@ -0,0 +1,44 @@
#include <cstdio>
#include <iostream> // std::cin
#include <string> // std::getline
#include <string_view>
#if 0
auto read(std::string_view data) -> std::string_view
{
return data;
}
auto eval(std::string_view data) -> std::string_view
{
return data;
}
auto print(std::string_view data) -> void
{
printf("%s\n", data.data());
}
auto rep(std::string_view data) -> void
{
print(eval(read(data)));
}
auto main() -> int
{
while (true) {
printf("user> ");
std::string line;
std::getline(std::cin, line);
// Exit with Ctrl-D
if (std::cin.eof() || std::cin.fail()) {
break;
}
rep(line);
}
return 0;
}
#endif

57
src/step1_read_print.cpp

@ -0,0 +1,57 @@
#include <cstdio>
#include <iostream> // std::cin
#include <string> // std::getline
#include <string_view>
#include "ast.h"
#include "lexer.h"
#include "printer.h"
#include "reader.h"
#if 1
auto read(std::string_view data) -> blaze::ASTNode*
{
blaze::Lexer lexer(data);
lexer.tokenize();
// lexer.dump();
blaze::Reader reader(std::move(lexer.tokens()));
reader.read();
// reader.dump();
return reader.node();
}
auto eval(blaze::ASTNode* node) -> blaze::ASTNode*
{
return node;
}
auto print(blaze::ASTNode* node) -> void
{
blaze::Printer printer(node);
printer.dump();
}
auto rep(std::string_view data) -> void
{
print(eval(read(data)));
}
auto main() -> int
{
while (true) {
printf("user> ");
std::string line;
std::getline(std::cin, line);
// Exit with Ctrl-D
if (std::cin.eof() || std::cin.fail()) {
break;
}
rep(line);
}
return 0;
}
#endif

28
src/types.h

@ -0,0 +1,28 @@
/*
* Copyright (C) 2023 Riyyi
*
* SPDX-License-Identifier: MIT
*/
#pragma once
#include <typeinfo>
template<typename T, typename U>
inline bool is(U& input)
{
if constexpr (requires { input.template fastIs<T>(); }) {
return input.template fastIs<T>();
}
return typeid(input) == typeid(T);
}
template<typename T, typename U>
inline bool is(U* input)
{
return input && is<T>(*input);
}
// serenity/AK/TypeCasts.h
// serenity/Userland/Libraries/LibJS/AST.h
Loading…
Cancel
Save