Browse Source

Everywhere: Add Keyword parsing

master
Riyyi 2 years ago
parent
commit
27e584ea84
  1. 15
      src/ast.cpp
  2. 53
      src/ast.h
  3. 73
      src/lexer.cpp
  4. 47
      src/lexer.h
  5. 4
      src/printer.cpp
  6. 12
      src/reader.cpp
  7. 3
      src/reader.h

15
src/ast.cpp

@ -5,6 +5,7 @@
*/ */
#include <cstdint> // int64_t #include <cstdint> // int64_t
#include <string>
#include "ast.h" #include "ast.h"
@ -31,6 +32,13 @@ String::String(const std::string& data)
// ----------------------------------------- // -----------------------------------------
Keyword::Keyword(const std::string& data)
: m_data(data)
{
}
// -----------------------------------------
Number::Number(int64_t number) Number::Number(int64_t number)
: m_number(number) : m_number(number)
{ {
@ -43,4 +51,11 @@ Symbol::Symbol(const std::string& symbol)
{ {
} }
// -----------------------------------------
Value::Value(const std::string& value)
: m_value(value)
{
}
} // namespace blaze } // namespace blaze

53
src/ast.h

@ -28,9 +28,11 @@ public:
virtual bool isHashMap() const { return false; } virtual bool isHashMap() const { return false; }
virtual bool isList() const { return false; } virtual bool isList() const { return false; }
virtual bool isString() const { return false; } virtual bool isString() const { return false; }
virtual bool isKeyword() const { return false; }
virtual bool isNumber() const { return false; } virtual bool isNumber() const { return false; }
virtual bool isSpecialSymbol() const { return false; } virtual bool isValue() const { return false; }
virtual bool isSymbol() const { return false; } virtual bool isSymbol() const { return false; }
virtual bool isFunction() const { return false; }
protected: protected:
ASTNode() {} ASTNode() {}
@ -109,6 +111,21 @@ private:
// ----------------------------------------- // -----------------------------------------
// :keyword
class Keyword final : public ASTNode {
public:
Keyword(const std::string& data);
virtual ~Keyword() = default;
virtual bool isKeyword() const override { return true; }
const std::string& keyword() const { return m_data; }
private:
std::string m_data;
};
// -----------------------------------------
// 123 // 123
class Number final : public ASTNode { class Number final : public ASTNode {
public: public:
@ -125,13 +142,15 @@ private:
// ----------------------------------------- // -----------------------------------------
// true, false, nil // Symbols
class SpecialSymbol final : public ASTNode { class Symbol final : public ASTNode {
public: public:
SpecialSymbol(); Symbol(const std::string& symbol);
virtual ~SpecialSymbol(); virtual ~Symbol() = default;
virtual bool isSpecialSymbol() const override { return true; } virtual bool isSymbol() const override { return true; }
const std::string& symbol() const { return m_symbol; }
private: private:
std::string m_symbol; std::string m_symbol;
@ -139,18 +158,19 @@ private:
// ----------------------------------------- // -----------------------------------------
// Other symbols // true, false, nil
class Symbol final : public ASTNode { class Value final : public ASTNode {
public: public:
Symbol(const std::string& symbol); Value(const std::string& value);
virtual ~Symbol() = default; virtual ~Value() = default;
virtual bool isSymbol() const override { return true; } virtual bool isValue() const override { return true; }
std::string symbol() const { return m_symbol; } const std::string& value() const { return m_value; }
private: private:
std::string m_symbol; std::string m_value;
};
}; };
// ----------------------------------------- // -----------------------------------------
@ -172,13 +192,16 @@ template<>
inline bool ASTNode::fastIs<String>() const { return isString(); } inline bool ASTNode::fastIs<String>() const { return isString(); }
template<> template<>
inline bool ASTNode::fastIs<Number>() const { return isNumber(); } inline bool ASTNode::fastIs<Keyword>() const { return isKeyword(); }
template<> template<>
inline bool ASTNode::fastIs<SpecialSymbol>() const { return isSpecialSymbol(); } inline bool ASTNode::fastIs<Number>() const { return isNumber(); }
template<> template<>
inline bool ASTNode::fastIs<Symbol>() const { return isSymbol(); } inline bool ASTNode::fastIs<Symbol>() const { return isSymbol(); }
template<>
inline bool ASTNode::fastIs<Value>() const { return isValue(); }
// clang-format on // clang-format on
} // namespace blaze } // namespace blaze

73
src/lexer.cpp

@ -5,7 +5,7 @@
*/ */
#include <algorithm> #include <algorithm>
#include <string> #include <string> // std::to_string
#include <unordered_set> #include <unordered_set>
#include "ruc/format/print.h" #include "ruc/format/print.h"
@ -73,6 +73,11 @@ void Lexer::tokenize()
return; return;
} }
break; break;
case ':':
if (!consumeKeyword()) {
return;
}
break;
case ';': case ';':
consumeComment(); consumeComment();
break; break;
@ -162,14 +167,28 @@ bool Lexer::consumeString()
return true; return true;
} }
bool Lexer::consumeComment() bool Lexer::consumeKeyword()
{ {
size_t column = m_column; size_t column = m_column;
std::string comment = ""; std::string keyword;
keyword += 0x7f; // 127
ignore(); // ; ignore(); // :
static std::unordered_set<char> exit = { static std::unordered_set<char> exit = {
'[',
']',
'{',
'}',
'(',
')',
'\'',
'`',
',',
'"',
';',
' ',
'\t',
'\r', '\r',
'\n', '\n',
'\0', '\0',
@ -183,17 +202,13 @@ bool Lexer::consumeComment()
break; break;
} }
comment += character; keyword += character;
ignore(); ignore();
} }
// Trim comment m_tokens.push_back({ Token::Type::Keyword, m_line, column, keyword });
comment.erase(comment.begin(),
std::find_if(comment.begin(), comment.end(), [](char c) { return !std::isspace(c); }));
comment.erase(std::find_if(comment.rbegin(), comment.rend(), [](char c) { return !std::isspace(c); }).base(),
comment.end());
m_tokens.push_back({ Token::Type::Comment, m_line, column, comment }); retreat();
return true; return true;
} }
@ -241,6 +256,42 @@ bool Lexer::consumeValue()
return true; return true;
} }
bool Lexer::consumeComment()
{
size_t column = m_column;
std::string comment = "";
ignore(); // ;
static std::unordered_set<char> exit = {
'\r',
'\n',
'\0',
};
char character = 0;
for (;;) {
character = peek();
if (exit.find(character) != exit.end()) {
break;
}
comment += character;
ignore();
}
// Trim comment
comment.erase(comment.begin(),
std::find_if(comment.begin(), comment.end(), [](char c) { return !std::isspace(c); }));
comment.erase(std::find_if(comment.rbegin(), comment.rend(), [](char c) { return !std::isspace(c); }).base(),
comment.end());
m_tokens.push_back({ Token::Type::Comment, m_line, column, comment });
return true;
}
void Lexer::dump() const void Lexer::dump() const
{ {
print("tokens: {}\n", m_tokens.size()); print("tokens: {}\n", m_tokens.size());

47
src/lexer.h

@ -32,8 +32,9 @@ struct Token {
Caret, // ^ Caret, // ^
At, // @ At, // @
String, // "foobar" String, // "foobar"
Keyword, // :keyword
Value, // numbers, "true", "false", and "nil", symbols
Comment, // ; Comment, // ;
Value, // symbols, numbers, "true", "false", and "nil"
Error, Error,
}; };
@ -58,8 +59,9 @@ public:
private: private:
bool consumeSpliceUnquoteOrUnquote(); // ~@ or ~ bool consumeSpliceUnquoteOrUnquote(); // ~@ or ~
bool consumeString(); bool consumeString();
bool consumeComment(); bool consumeKeyword();
bool consumeValue(); bool consumeValue();
bool consumeComment();
size_t m_column { 0 }; size_t m_column { 0 };
size_t m_line { 0 }; size_t m_line { 0 };
@ -68,44 +70,3 @@ private:
}; };
} // namespace blaze } // namespace blaze
// ~^@
// (+ 2 (* 3 4))
// Lexing -> creates tokens
// Parsing -> creates AST
// class Thing1 {
// public:
// std::vector<int>& numbers() { return m_numbers; }
// private:
// std::vector<int> m_numbers;
// };
// class Thing2 {
// public:
// std::vector<int>&& numbers() { return std::move(m_numbers); }
// private:
// std::vector<int> m_numbers;
// };
// class OtherThing {
// public:
// OtherThing(std::vector<int>&& numbers) noexcept
// : m_numbers(std::move(numbers))
// {
// }
// private:
// std::vector<int> m_numbers;
// };
// int main()
// {
// Thing1 thing1;
// Thing2 thing2;
// OtherThing other_thing(std::move(thing1.numbers()));
// OtherThing other_thing2(thing2.numbers());
// }

4
src/printer.cpp

@ -88,6 +88,10 @@ void Printer::dumpImpl(ASTNode* node)
printSpacing(); printSpacing();
print("{}", static_cast<String*>(node)->data()); print("{}", static_cast<String*>(node)->data());
} }
else if (is<Keyword>(node)) {
printSpacing();
print(":{}", static_cast<Keyword*>(node)->keyword().substr(1));
}
else if (is<Number>(node)) { else if (is<Number>(node)) {
printSpacing(); printSpacing();
print("{}", static_cast<Number*>(node)->number()); print("{}", static_cast<Number*>(node)->number());

12
src/reader.cpp

@ -117,14 +117,17 @@ ASTNode* Reader::readImpl()
case Token::Type::At: // @ case Token::Type::At: // @
return readDeref(); return readDeref();
break; break;
case Token::Type::String: case Token::Type::String: // "foobar"
return readString(); return readString();
break; break;
case Token::Type::Keyword: // :keyword
return readKeyword();
break;
case Token::Type::Comment: // ; case Token::Type::Comment: // ;
ignore(); ignore();
return nullptr; return nullptr;
break; break;
case Token::Type::Value: case Token::Type::Value: // true, false, nil
return readValue(); return readValue();
break; break;
default: default:
@ -301,6 +304,11 @@ ASTNode* Reader::readString()
return new String(symbol); return new String(symbol);
} }
ASTNode* Reader::readKeyword()
{
return new Keyword(consume().symbol);
}
ASTNode* Reader::readValue() ASTNode* Reader::readValue()
{ {
Token token = consume(); Token token = consume();

3
src/reader.h

@ -45,7 +45,8 @@ private:
ASTNode* readWithMeta(); // ^ ASTNode* readWithMeta(); // ^
ASTNode* readDeref(); // @ ASTNode* readDeref(); // @
ASTNode* readString(); // "foobar" ASTNode* readString(); // "foobar"
ASTNode* readValue(); ASTNode* readKeyword(); // :keyword
ASTNode* readValue(); // true, false, nil
void dumpImpl(ASTNode* node); void dumpImpl(ASTNode* node);

Loading…
Cancel
Save