Browse Source

Lexer+Reader+Printer: Store strings with quotes, improve error handling

master
Riyyi 2 years ago
parent
commit
21914c6b6a
  1. 12
      src/lexer.cpp
  2. 10
      src/printer.cpp
  3. 107
      src/reader.cpp
  4. 5
      src/reader.h

12
src/lexer.cpp

@ -118,7 +118,7 @@ bool Lexer::consumeSpliceUnquoteOrUnquote()
bool Lexer::consumeString() bool Lexer::consumeString()
{ {
size_t column = m_column; size_t column = m_column;
std::string text = ""; std::string text = "\"";
static std::unordered_set<char> exit = { static std::unordered_set<char> exit = {
'"', '"',
@ -146,11 +146,15 @@ bool Lexer::consumeString()
text += character; text += character;
ignore(); ignore();
if (escape) { escape = false;
escape = false;
}
} }
if (character == '"') {
text += character;
}
print("lex text '{}'\n", text);
m_tokens.push_back({ Token::Type::String, m_line, column, text }); m_tokens.push_back({ Token::Type::String, m_line, column, text });
return true; return true;

10
src/printer.cpp

@ -35,13 +35,17 @@ void Printer::dump()
void Printer::dumpImpl(ASTNode* node) void Printer::dumpImpl(ASTNode* node)
{ {
auto printSpacing = [this]() { auto printSpacing = [this]() -> void {
if (!m_firstNode && !m_previousNodeIsList) { if (!m_firstNode && !m_previousNodeIsList) {
print(" "); print(" ");
} }
}; };
if (is<List>(node)) {
if (is<Error>(node)) {
print("*** blaze error *** {}", static_cast<Error*>(node)->error());
}
else if (is<List>(node)) {
printSpacing(); printSpacing();
print("("); print("(");
m_firstNode = false; m_firstNode = false;
@ -55,7 +59,7 @@ void Printer::dumpImpl(ASTNode* node)
} }
else if (is<String>(node)) { else if (is<String>(node)) {
printSpacing(); printSpacing();
print("\"{}\"", static_cast<String*>(node)->data()); print("{}", static_cast<String*>(node)->data());
} }
else if (is<Number>(node)) { else if (is<Number>(node)) {
printSpacing(); printSpacing();

107
src/reader.cpp

@ -31,30 +31,66 @@ Reader::~Reader()
void Reader::read() void Reader::read()
{ {
if (m_node != nullptr) { if (m_node) {
return; return;
} }
m_node = readImpl(); m_node = readImpl();
VERIFY(m_index > m_tokens.size() - 1, "more than one sexp in input");
// Error checking
if (m_invalid_syntax) {
m_node = new Error("Invalid read syntax: '" + std::string(1, m_error_character) + "'");
return;
}
if (m_is_unbalanced) {
m_node = new Error("Expected '" + std::string(1, m_error_character) + "', got EOF");
return;
}
if (!isEOF()) {
Token::Type type = peek().type;
switch (type) {
case Token::Type::ParenOpen: // (
case Token::Type::ParenClose: // )
case Token::Type::String:
case Token::Type::Value:
m_node = new Error("More than one sexp in input");
break;
default:
m_node = new Error("Unknown error");
break;
};
}
} }
ASTNode* Reader::readImpl() ASTNode* Reader::readImpl()
{ {
if (m_tokens.size() == 0) {
return nullptr;
}
switch (peek().type) { switch (peek().type) {
case Token::Type::ParenOpen: case Token::Type::ParenOpen: // (
return readList(); return readList();
break; break;
case Token::Type::ParenClose: // )
m_invalid_syntax = true;
m_error_character = ')';
return nullptr;
break;
case Token::Type::String: case Token::Type::String:
return readString(); return readString();
break; break;
case Token::Type::Value: case Token::Type::Value:
return readValue(); return readValue();
break;
default: default:
// Unimplemented token // Unimplemented token
VERIFY_NOT_REACHED(); VERIFY_NOT_REACHED();
return nullptr; return nullptr;
} };
} }
ASTNode* Reader::readList() ASTNode* Reader::readList()
@ -62,29 +98,64 @@ ASTNode* Reader::readList()
ignore(); // ( ignore(); // (
List* list = new List(); List* list = new List();
while (m_index < m_tokens.size() && peek().type != Token::Type::ParenClose) { while (!isEOF() && peek().type != Token::Type::ParenClose) {
list->addNode(readImpl()); list->addNode(readImpl());
} }
VERIFY(m_index != m_tokens.size(), "missing closing ')'"); if (!consumeSpecific(Token { .type = Token::Type::ParenClose })) { // )
m_error_character = ')';
ignore(); // ) m_is_unbalanced = true;
}
return list; return list;
} }
static bool isValidString(const std::string& str)
{
if (str.size() < 2 || str.front() != '"' || str.back() != '"') {
return false;
}
if (str.size() == 2) {
return true;
}
bool escaped = false;
for (auto it = str.begin() + 1; it != str.end() - 1; ++it) {
if (*it == '\\' && !escaped) {
escaped = true;
continue;
}
// The last character needs to be an escaped '\' or not a '\'
if (it == str.end() - 2 && (escaped || *it != '\\')) {
return true;
}
escaped = false;
}
return false;
}
ASTNode* Reader::readString() ASTNode* Reader::readString()
{ {
Token token = consume(); std::string symbol = consume().symbol;
return new String(token.symbol);
// Unbalanced string
if (!isValidString(symbol)) {
m_error_character = '"';
m_is_unbalanced = true;
}
return new String(symbol);
} }
ASTNode* Reader::readValue() ASTNode* Reader::readValue()
{ {
Token token = consume(); Token token = consume();
char* endPtr = nullptr; char* end_ptr = nullptr;
int64_t result = std::strtoll(token.symbol.c_str(), &endPtr, 10); int64_t result = std::strtoll(token.symbol.c_str(), &end_ptr, 10);
if (endPtr == token.symbol.c_str() + token.symbol.size()) { if (end_ptr == token.symbol.c_str() + token.symbol.size()) {
return new Number(result); return new Number(result);
} }
@ -110,6 +181,16 @@ Token Reader::consume()
return m_tokens[m_index++]; return m_tokens[m_index++];
} }
bool Reader::consumeSpecific(Token token)
{
if (isEOF() || peek().type != token.type) {
return false;
}
ignore();
return true;
}
void Reader::ignore() void Reader::ignore()
{ {
m_index++; m_index++;

5
src/reader.h

@ -30,6 +30,7 @@ private:
bool isEOF() const; bool isEOF() const;
Token peek() const; Token peek() const;
Token consume(); Token consume();
bool consumeSpecific(Token token);
void ignore(); void ignore();
ASTNode* readImpl(); ASTNode* readImpl();
@ -43,6 +44,10 @@ private:
size_t m_indentation { 0 }; size_t m_indentation { 0 };
std::vector<Token> m_tokens; std::vector<Token> m_tokens;
char m_error_character { 0 };
bool m_invalid_syntax { false };
bool m_is_unbalanced { false };
ASTNode* m_node { nullptr }; ASTNode* m_node { nullptr };
}; };

Loading…
Cancel
Save