diff --git a/src/util/json/lexer.cpp b/src/util/json/lexer.cpp index 888d0d4..0c0e967 100644 --- a/src/util/json/lexer.cpp +++ b/src/util/json/lexer.cpp @@ -74,8 +74,6 @@ void Lexer::analyze() case '8': case '9': if (!getNumber()) { - // Error! - printf("Invalid JSON!\n"); return; } break; @@ -190,34 +188,18 @@ bool Lexer::getString() bool Lexer::getNumber() { - size_t index = m_index; size_t column = m_column; std::string symbol = ""; + std::string breakOnGrammar = std::string() + "{}[]:,\"" + '\n'; - bool seenDot = false; - char character; - for (;;) { + for (char character;;) { character = peek(); - // FIXME: Break on separator }], rather than valid number symbols to - // get the entire thing, resulting in better error handling - // FIXME: e/E and exponent are also valid characters (?) - if (character != 45 // - - && character != 46 // . - && (character < 48 || character > 57)) { // 0-9 + // Break on all valid JSON grammar thats not a number + if (breakOnGrammar.find(character) != std::string::npos) { break; } - // Fail if '.' is used more than once - if (seenDot == true && character == 46) { // . - m_index = index; - m_column = column; - return false; - } - if (character == 46) { // . - seenDot = true; - } - m_index++; m_column++; symbol += character; diff --git a/src/util/json/parser.cpp b/src/util/json/parser.cpp index 0c1fe44..ddf1895 100644 --- a/src/util/json/parser.cpp +++ b/src/util/json/parser.cpp @@ -4,12 +4,13 @@ * SPDX-License-Identifier: MIT */ -#include // size_t #include // uint32_t -#include // printf, sprintf +#include // count +#include // size_t +#include // printf, sprintf #include #include // shared_ptr -#include // stod +#include // stod #include "util/json/array.h" #include "util/json/job.h" @@ -56,7 +57,7 @@ Value Parser::parse() m_index++; break; case Token::Type::Number: - result = Value { std::stod(token.symbol) }; + result = getNumber(); m_index++; break; case Token::Type::String: @@ -123,6 +124,102 @@ bool Parser::consumeSpecific(Token::Type type) return true; } +Value Parser::getNumber() +{ + Token token = consume(); + + auto reportError = [this](Token token, const std::string& message) -> void { + m_job->printErrorLine(token, message.c_str()); + }; + + // Validation + // number = [ minus ] int [ frac ] [ exp ] + + size_t minusPrefix = token.symbol[0] == '-' ? 1 : 0; + + // Leading 0s + if (token.symbol[minusPrefix] == '0' + && token.symbol[minusPrefix + 1] > '0' && token.symbol[minusPrefix + 1] < '9') { + reportError(token, "invalid leading zero"); + return nullptr; + } + + enum class State { + Int, + Fraction, + Exponent + }; + + State state = State::Int; + std::string validCharacters = "0123456789-"; + + size_t fractionPosition = 0; + size_t exponentPosition = 0; + size_t length = token.symbol.length(); + for (size_t i = 0; i < length; ++i) { + char character = token.symbol[i]; + + // Int -> Fraction + if (character == '.' && state == State::Int) { + state = State::Fraction; + validCharacters = "0123456789"; + fractionPosition = i; + continue; + } + // Int/Fraction -> Exponent + else if ((character == 'e' || character == 'E') && state != State::Exponent) { + state = State::Exponent; + validCharacters = "0123456789+-"; + exponentPosition = i; + continue; + } + + if (state == State::Int) { + if (character == '-') { + if (i == length - 1) { + reportError(token, "expected number after minus"); + return nullptr; + } + if (i != 0) { + reportError(token, "invalid minus"); + return nullptr; + } + } + } + else if (state == State::Fraction) { + } + else if (state == State::Exponent) { + if (character == '-' || character == '+') { + if (i == length - 1) { + reportError(token, "expected number after plus/minus"); + return nullptr; + } + if (i > exponentPosition + 1) { + reportError(token, "invalid plus/minus"); + return nullptr; + } + } + } + + if (validCharacters.find(character) == std::string::npos) { + reportError(token, std::string() + "invalid number, unexpected '" + character + '\''); + return nullptr; + } + } + + if (fractionPosition == exponentPosition - 1) { + reportError(token, "invalid exponent sign, expected number"); + return nullptr; + } + + if (fractionPosition == length - 1 || exponentPosition == length - 1) { + reportError(token, "invalid number"); + return nullptr; + } + + return std::stod(token.symbol); +} + Value Parser::getArray() { m_index++; @@ -138,7 +235,7 @@ Value Parser::getArray() Value array = Value::Type::Array; Token token; for (;;) { - token = consume(); + token = peek(); if (token.type == Token::Type::Literal) { printf("Adding literal to array.. v:{%s}, t:{%d}\n", token.symbol.c_str(), (int)token.type); @@ -151,27 +248,27 @@ Value Parser::getArray() else if (token.symbol == "false") { array.emplace_back(false); } + m_index++; } else if (token.type == Token::Type::Number) { printf("Adding number to array.. v:{%s}, t:{%d} -> %f\n", token.symbol.c_str(), (int)token.type, std::stod(token.symbol)); - array.emplace_back(std::stod(token.symbol)); + array.emplace_back(getNumber()); } else if (token.type == Token::Type::String) { printf("Adding string to array.. v:{%s}, t:{%d}\n", token.symbol.c_str(), (int)token.type); array.emplace_back(token.symbol); + m_index++; } else if (token.type == Token::Type::BracketOpen) { - m_index--; array.emplace_back(getArray()); } else if (token.type == Token::Type::BraceOpen) { - m_index--; array.emplace_back(getObject()); } else if (token.type == Token::Type::BracketClose) { // Trailing comma if (array.asArray().size() > 0) { - reportError((*m_tokens)[m_index - 2], "invalid comma, expecting ']'"); + reportError((*m_tokens)[m_index - 1], "invalid comma, expecting ']'"); } break; } @@ -260,8 +357,9 @@ Value Parser::getObject() } } else if (token.type == Token::Type::Number) { - printf("Adding number to object.. k:{%s}, v:{%s}, t:{%d} -> %f\n", key.c_str(), token.symbol.c_str(), (int)token.type, std::stod(token.symbol)); - object[key] = std::stod(token.symbol); + printf("Adding number to object.. k:{%s}, v:{%s}, t:{%d} -> %f\n", name.c_str(), token.symbol.c_str(), (int)token.type, std::stod(token.symbol)); + m_index--; + object[key] = getNumber(); } else if (token.type == Token::Type::String) { printf("Adding string to object.. k:{%s}, v:{%s}, t:{%d}\n", key.c_str(), token.symbol.c_str(), (int)token.type); diff --git a/src/util/json/parser.h b/src/util/json/parser.h index 0dd7cb5..bc92e36 100644 --- a/src/util/json/parser.h +++ b/src/util/json/parser.h @@ -36,6 +36,7 @@ private: Token consume(); bool consumeSpecific(Token::Type type); + Value getNumber(); Value getArray(); Value getObject();