Browse Source

Everywhere: Add Keyword parsing

master
Riyyi 2 years ago
parent
commit
27e584ea84
  1. 15
      src/ast.cpp
  2. 53
      src/ast.h
  3. 73
      src/lexer.cpp
  4. 47
      src/lexer.h
  5. 4
      src/printer.cpp
  6. 12
      src/reader.cpp
  7. 3
      src/reader.h

15
src/ast.cpp

@ -5,6 +5,7 @@
*/
#include <cstdint> // int64_t
#include <string>
#include "ast.h"
@ -31,6 +32,13 @@ String::String(const std::string& data)
// -----------------------------------------
Keyword::Keyword(const std::string& data)
: m_data(data)
{
}
// -----------------------------------------
Number::Number(int64_t number)
: m_number(number)
{
@ -43,4 +51,11 @@ Symbol::Symbol(const std::string& symbol)
{
}
// -----------------------------------------
Value::Value(const std::string& value)
: m_value(value)
{
}
} // namespace blaze

53
src/ast.h

@ -28,9 +28,11 @@ public:
virtual bool isHashMap() const { return false; }
virtual bool isList() const { return false; }
virtual bool isString() const { return false; }
virtual bool isKeyword() const { return false; }
virtual bool isNumber() const { return false; }
virtual bool isSpecialSymbol() const { return false; }
virtual bool isValue() const { return false; }
virtual bool isSymbol() const { return false; }
virtual bool isFunction() const { return false; }
protected:
ASTNode() {}
@ -109,6 +111,21 @@ private:
// -----------------------------------------
// :keyword
class Keyword final : public ASTNode {
public:
Keyword(const std::string& data);
virtual ~Keyword() = default;
virtual bool isKeyword() const override { return true; }
const std::string& keyword() const { return m_data; }
private:
std::string m_data;
};
// -----------------------------------------
// 123
class Number final : public ASTNode {
public:
@ -125,13 +142,15 @@ private:
// -----------------------------------------
// true, false, nil
class SpecialSymbol final : public ASTNode {
// Symbols
class Symbol final : public ASTNode {
public:
SpecialSymbol();
virtual ~SpecialSymbol();
Symbol(const std::string& symbol);
virtual ~Symbol() = default;
virtual bool isSpecialSymbol() const override { return true; }
virtual bool isSymbol() const override { return true; }
const std::string& symbol() const { return m_symbol; }
private:
std::string m_symbol;
@ -139,18 +158,19 @@ private:
// -----------------------------------------
// Other symbols
class Symbol final : public ASTNode {
// true, false, nil
class Value final : public ASTNode {
public:
Symbol(const std::string& symbol);
virtual ~Symbol() = default;
Value(const std::string& value);
virtual ~Value() = default;
virtual bool isSymbol() const override { return true; }
virtual bool isValue() const override { return true; }
std::string symbol() const { return m_symbol; }
const std::string& value() const { return m_value; }
private:
std::string m_symbol;
std::string m_value;
};
};
// -----------------------------------------
@ -172,13 +192,16 @@ template<>
inline bool ASTNode::fastIs<String>() const { return isString(); }
template<>
inline bool ASTNode::fastIs<Number>() const { return isNumber(); }
inline bool ASTNode::fastIs<Keyword>() const { return isKeyword(); }
template<>
inline bool ASTNode::fastIs<SpecialSymbol>() const { return isSpecialSymbol(); }
inline bool ASTNode::fastIs<Number>() const { return isNumber(); }
template<>
inline bool ASTNode::fastIs<Symbol>() const { return isSymbol(); }
template<>
inline bool ASTNode::fastIs<Value>() const { return isValue(); }
// clang-format on
} // namespace blaze

73
src/lexer.cpp

@ -5,7 +5,7 @@
*/
#include <algorithm>
#include <string>
#include <string> // std::to_string
#include <unordered_set>
#include "ruc/format/print.h"
@ -73,6 +73,11 @@ void Lexer::tokenize()
return;
}
break;
case ':':
if (!consumeKeyword()) {
return;
}
break;
case ';':
consumeComment();
break;
@ -162,14 +167,28 @@ bool Lexer::consumeString()
return true;
}
bool Lexer::consumeComment()
bool Lexer::consumeKeyword()
{
size_t column = m_column;
std::string comment = "";
std::string keyword;
keyword += 0x7f; // 127
ignore(); // ;
ignore(); // :
static std::unordered_set<char> exit = {
'[',
']',
'{',
'}',
'(',
')',
'\'',
'`',
',',
'"',
';',
' ',
'\t',
'\r',
'\n',
'\0',
@ -183,17 +202,13 @@ bool Lexer::consumeComment()
break;
}
comment += character;
keyword += character;
ignore();
}
// Trim comment
comment.erase(comment.begin(),
std::find_if(comment.begin(), comment.end(), [](char c) { return !std::isspace(c); }));
comment.erase(std::find_if(comment.rbegin(), comment.rend(), [](char c) { return !std::isspace(c); }).base(),
comment.end());
m_tokens.push_back({ Token::Type::Keyword, m_line, column, keyword });
m_tokens.push_back({ Token::Type::Comment, m_line, column, comment });
retreat();
return true;
}
@ -241,6 +256,42 @@ bool Lexer::consumeValue()
return true;
}
bool Lexer::consumeComment()
{
size_t column = m_column;
std::string comment = "";
ignore(); // ;
static std::unordered_set<char> exit = {
'\r',
'\n',
'\0',
};
char character = 0;
for (;;) {
character = peek();
if (exit.find(character) != exit.end()) {
break;
}
comment += character;
ignore();
}
// Trim comment
comment.erase(comment.begin(),
std::find_if(comment.begin(), comment.end(), [](char c) { return !std::isspace(c); }));
comment.erase(std::find_if(comment.rbegin(), comment.rend(), [](char c) { return !std::isspace(c); }).base(),
comment.end());
m_tokens.push_back({ Token::Type::Comment, m_line, column, comment });
return true;
}
void Lexer::dump() const
{
print("tokens: {}\n", m_tokens.size());

47
src/lexer.h

@ -32,8 +32,9 @@ struct Token {
Caret, // ^
At, // @
String, // "foobar"
Keyword, // :keyword
Value, // numbers, "true", "false", and "nil", symbols
Comment, // ;
Value, // symbols, numbers, "true", "false", and "nil"
Error,
};
@ -58,8 +59,9 @@ public:
private:
bool consumeSpliceUnquoteOrUnquote(); // ~@ or ~
bool consumeString();
bool consumeComment();
bool consumeKeyword();
bool consumeValue();
bool consumeComment();
size_t m_column { 0 };
size_t m_line { 0 };
@ -68,44 +70,3 @@ private:
};
} // namespace blaze
// ~^@
// (+ 2 (* 3 4))
// Lexing -> creates tokens
// Parsing -> creates AST
// class Thing1 {
// public:
// std::vector<int>& numbers() { return m_numbers; }
// private:
// std::vector<int> m_numbers;
// };
// class Thing2 {
// public:
// std::vector<int>&& numbers() { return std::move(m_numbers); }
// private:
// std::vector<int> m_numbers;
// };
// class OtherThing {
// public:
// OtherThing(std::vector<int>&& numbers) noexcept
// : m_numbers(std::move(numbers))
// {
// }
// private:
// std::vector<int> m_numbers;
// };
// int main()
// {
// Thing1 thing1;
// Thing2 thing2;
// OtherThing other_thing(std::move(thing1.numbers()));
// OtherThing other_thing2(thing2.numbers());
// }

4
src/printer.cpp

@ -88,6 +88,10 @@ void Printer::dumpImpl(ASTNode* node)
printSpacing();
print("{}", static_cast<String*>(node)->data());
}
else if (is<Keyword>(node)) {
printSpacing();
print(":{}", static_cast<Keyword*>(node)->keyword().substr(1));
}
else if (is<Number>(node)) {
printSpacing();
print("{}", static_cast<Number*>(node)->number());

12
src/reader.cpp

@ -117,14 +117,17 @@ ASTNode* Reader::readImpl()
case Token::Type::At: // @
return readDeref();
break;
case Token::Type::String:
case Token::Type::String: // "foobar"
return readString();
break;
case Token::Type::Keyword: // :keyword
return readKeyword();
break;
case Token::Type::Comment: // ;
ignore();
return nullptr;
break;
case Token::Type::Value:
case Token::Type::Value: // true, false, nil
return readValue();
break;
default:
@ -301,6 +304,11 @@ ASTNode* Reader::readString()
return new String(symbol);
}
ASTNode* Reader::readKeyword()
{
return new Keyword(consume().symbol);
}
ASTNode* Reader::readValue()
{
Token token = consume();

3
src/reader.h

@ -45,7 +45,8 @@ private:
ASTNode* readWithMeta(); // ^
ASTNode* readDeref(); // @
ASTNode* readString(); // "foobar"
ASTNode* readValue();
ASTNode* readKeyword(); // :keyword
ASTNode* readValue(); // true, false, nil
void dumpImpl(ASTNode* node);

Loading…
Cancel
Save