From 21914c6b6a4048022a1c757e4ac3e18275027f52 Mon Sep 17 00:00:00 2001 From: Riyyi Date: Sun, 19 Mar 2023 12:06:04 +0100 Subject: [PATCH] Lexer+Reader+Printer: Store strings with quotes, improve error handling --- src/lexer.cpp | 12 ++++-- src/printer.cpp | 10 +++-- src/reader.cpp | 107 ++++++++++++++++++++++++++++++++++++++++++------ src/reader.h | 5 +++ 4 files changed, 114 insertions(+), 20 deletions(-) diff --git a/src/lexer.cpp b/src/lexer.cpp index c5d15fe..e123864 100644 --- a/src/lexer.cpp +++ b/src/lexer.cpp @@ -118,7 +118,7 @@ bool Lexer::consumeSpliceUnquoteOrUnquote() bool Lexer::consumeString() { size_t column = m_column; - std::string text = ""; + std::string text = "\""; static std::unordered_set exit = { '"', @@ -146,11 +146,15 @@ bool Lexer::consumeString() text += character; ignore(); - if (escape) { - escape = false; - } + escape = false; } + if (character == '"') { + text += character; + } + + print("lex text '{}'\n", text); + m_tokens.push_back({ Token::Type::String, m_line, column, text }); return true; diff --git a/src/printer.cpp b/src/printer.cpp index c5e57b6..bda16ff 100644 --- a/src/printer.cpp +++ b/src/printer.cpp @@ -35,13 +35,17 @@ void Printer::dump() void Printer::dumpImpl(ASTNode* node) { - auto printSpacing = [this]() { + auto printSpacing = [this]() -> void { if (!m_firstNode && !m_previousNodeIsList) { print(" "); } }; - if (is(node)) { + + if (is(node)) { + print("*** blaze error *** {}", static_cast(node)->error()); + } + else if (is(node)) { printSpacing(); print("("); m_firstNode = false; @@ -55,7 +59,7 @@ void Printer::dumpImpl(ASTNode* node) } else if (is(node)) { printSpacing(); - print("\"{}\"", static_cast(node)->data()); + print("{}", static_cast(node)->data()); } else if (is(node)) { printSpacing(); diff --git a/src/reader.cpp b/src/reader.cpp index 0830041..9083822 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -31,30 +31,66 @@ Reader::~Reader() void Reader::read() { - if (m_node != nullptr) { + if (m_node) { return; } m_node = readImpl(); - VERIFY(m_index > m_tokens.size() - 1, "more than one sexp in input"); + + // Error checking + + if (m_invalid_syntax) { + m_node = new Error("Invalid read syntax: '" + std::string(1, m_error_character) + "'"); + return; + } + + if (m_is_unbalanced) { + m_node = new Error("Expected '" + std::string(1, m_error_character) + "', got EOF"); + return; + } + + if (!isEOF()) { + Token::Type type = peek().type; + switch (type) { + case Token::Type::ParenOpen: // ( + case Token::Type::ParenClose: // ) + case Token::Type::String: + case Token::Type::Value: + m_node = new Error("More than one sexp in input"); + break; + default: + m_node = new Error("Unknown error"); + break; + }; + } } ASTNode* Reader::readImpl() { + if (m_tokens.size() == 0) { + return nullptr; + } + switch (peek().type) { - case Token::Type::ParenOpen: + case Token::Type::ParenOpen: // ( return readList(); break; + case Token::Type::ParenClose: // ) + m_invalid_syntax = true; + m_error_character = ')'; + return nullptr; + break; case Token::Type::String: return readString(); break; case Token::Type::Value: return readValue(); + break; default: // Unimplemented token VERIFY_NOT_REACHED(); return nullptr; - } + }; } ASTNode* Reader::readList() @@ -62,29 +98,64 @@ ASTNode* Reader::readList() ignore(); // ( List* list = new List(); - while (m_index < m_tokens.size() && peek().type != Token::Type::ParenClose) { + while (!isEOF() && peek().type != Token::Type::ParenClose) { list->addNode(readImpl()); } - VERIFY(m_index != m_tokens.size(), "missing closing ')'"); - - ignore(); // ) + if (!consumeSpecific(Token { .type = Token::Type::ParenClose })) { // ) + m_error_character = ')'; + m_is_unbalanced = true; + } return list; } +static bool isValidString(const std::string& str) +{ + if (str.size() < 2 || str.front() != '"' || str.back() != '"') { + return false; + } + if (str.size() == 2) { + return true; + } + + bool escaped = false; + for (auto it = str.begin() + 1; it != str.end() - 1; ++it) { + if (*it == '\\' && !escaped) { + escaped = true; + continue; + } + + // The last character needs to be an escaped '\' or not a '\' + if (it == str.end() - 2 && (escaped || *it != '\\')) { + return true; + } + + escaped = false; + } + + return false; +} + ASTNode* Reader::readString() { - Token token = consume(); - return new String(token.symbol); + std::string symbol = consume().symbol; + + // Unbalanced string + if (!isValidString(symbol)) { + m_error_character = '"'; + m_is_unbalanced = true; + } + + return new String(symbol); } ASTNode* Reader::readValue() { Token token = consume(); - char* endPtr = nullptr; - int64_t result = std::strtoll(token.symbol.c_str(), &endPtr, 10); - if (endPtr == token.symbol.c_str() + token.symbol.size()) { + char* end_ptr = nullptr; + int64_t result = std::strtoll(token.symbol.c_str(), &end_ptr, 10); + if (end_ptr == token.symbol.c_str() + token.symbol.size()) { return new Number(result); } @@ -110,6 +181,16 @@ Token Reader::consume() return m_tokens[m_index++]; } +bool Reader::consumeSpecific(Token token) +{ + if (isEOF() || peek().type != token.type) { + return false; + } + + ignore(); + return true; +} + void Reader::ignore() { m_index++; diff --git a/src/reader.h b/src/reader.h index 13a96a9..fbd9720 100644 --- a/src/reader.h +++ b/src/reader.h @@ -30,6 +30,7 @@ private: bool isEOF() const; Token peek() const; Token consume(); + bool consumeSpecific(Token token); void ignore(); ASTNode* readImpl(); @@ -43,6 +44,10 @@ private: size_t m_indentation { 0 }; std::vector m_tokens; + char m_error_character { 0 }; + bool m_invalid_syntax { false }; + bool m_is_unbalanced { false }; + ASTNode* m_node { nullptr }; };