Browse Source

Util: Move number validation logic to Parser

master
Riyyi 2 years ago
parent
commit
ddf0a2858b
  1. 26
      src/util/json/lexer.cpp
  2. 120
      src/util/json/parser.cpp
  3. 1
      src/util/json/parser.h

26
src/util/json/lexer.cpp

@ -74,8 +74,6 @@ void Lexer::analyze()
case '8': case '8':
case '9': case '9':
if (!getNumber()) { if (!getNumber()) {
// Error!
printf("Invalid JSON!\n");
return; return;
} }
break; break;
@ -190,34 +188,18 @@ bool Lexer::getString()
bool Lexer::getNumber() bool Lexer::getNumber()
{ {
size_t index = m_index;
size_t column = m_column; size_t column = m_column;
std::string symbol = ""; std::string symbol = "";
std::string breakOnGrammar = std::string() + "{}[]:,\"" + '\n';
bool seenDot = false; for (char character;;) {
char character;
for (;;) {
character = peek(); character = peek();
// FIXME: Break on separator }], rather than valid number symbols to // Break on all valid JSON grammar thats not a number
// get the entire thing, resulting in better error handling if (breakOnGrammar.find(character) != std::string::npos) {
// FIXME: e/E and exponent are also valid characters (?)
if (character != 45 // -
&& character != 46 // .
&& (character < 48 || character > 57)) { // 0-9
break; break;
} }
// Fail if '.' is used more than once
if (seenDot == true && character == 46) { // .
m_index = index;
m_column = column;
return false;
}
if (character == 46) { // .
seenDot = true;
}
m_index++; m_index++;
m_column++; m_column++;
symbol += character; symbol += character;

120
src/util/json/parser.cpp

@ -4,12 +4,13 @@
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
*/ */
#include <cstddef> // size_t
#include <cstdint> // uint32_t #include <cstdint> // uint32_t
#include <cstdio> // printf, sprintf #include <algorithm> // count
#include <cstddef> // size_t
#include <cstdio> // printf, sprintf
#include <map> #include <map>
#include <memory> // shared_ptr #include <memory> // shared_ptr
#include <string> // stod #include <string> // stod
#include "util/json/array.h" #include "util/json/array.h"
#include "util/json/job.h" #include "util/json/job.h"
@ -56,7 +57,7 @@ Value Parser::parse()
m_index++; m_index++;
break; break;
case Token::Type::Number: case Token::Type::Number:
result = Value { std::stod(token.symbol) }; result = getNumber();
m_index++; m_index++;
break; break;
case Token::Type::String: case Token::Type::String:
@ -123,6 +124,102 @@ bool Parser::consumeSpecific(Token::Type type)
return true; return true;
} }
Value Parser::getNumber()
{
Token token = consume();
auto reportError = [this](Token token, const std::string& message) -> void {
m_job->printErrorLine(token, message.c_str());
};
// Validation
// number = [ minus ] int [ frac ] [ exp ]
size_t minusPrefix = token.symbol[0] == '-' ? 1 : 0;
// Leading 0s
if (token.symbol[minusPrefix] == '0'
&& token.symbol[minusPrefix + 1] > '0' && token.symbol[minusPrefix + 1] < '9') {
reportError(token, "invalid leading zero");
return nullptr;
}
enum class State {
Int,
Fraction,
Exponent
};
State state = State::Int;
std::string validCharacters = "0123456789-";
size_t fractionPosition = 0;
size_t exponentPosition = 0;
size_t length = token.symbol.length();
for (size_t i = 0; i < length; ++i) {
char character = token.symbol[i];
// Int -> Fraction
if (character == '.' && state == State::Int) {
state = State::Fraction;
validCharacters = "0123456789";
fractionPosition = i;
continue;
}
// Int/Fraction -> Exponent
else if ((character == 'e' || character == 'E') && state != State::Exponent) {
state = State::Exponent;
validCharacters = "0123456789+-";
exponentPosition = i;
continue;
}
if (state == State::Int) {
if (character == '-') {
if (i == length - 1) {
reportError(token, "expected number after minus");
return nullptr;
}
if (i != 0) {
reportError(token, "invalid minus");
return nullptr;
}
}
}
else if (state == State::Fraction) {
}
else if (state == State::Exponent) {
if (character == '-' || character == '+') {
if (i == length - 1) {
reportError(token, "expected number after plus/minus");
return nullptr;
}
if (i > exponentPosition + 1) {
reportError(token, "invalid plus/minus");
return nullptr;
}
}
}
if (validCharacters.find(character) == std::string::npos) {
reportError(token, std::string() + "invalid number, unexpected '" + character + '\'');
return nullptr;
}
}
if (fractionPosition == exponentPosition - 1) {
reportError(token, "invalid exponent sign, expected number");
return nullptr;
}
if (fractionPosition == length - 1 || exponentPosition == length - 1) {
reportError(token, "invalid number");
return nullptr;
}
return std::stod(token.symbol);
}
Value Parser::getArray() Value Parser::getArray()
{ {
m_index++; m_index++;
@ -138,7 +235,7 @@ Value Parser::getArray()
Value array = Value::Type::Array; Value array = Value::Type::Array;
Token token; Token token;
for (;;) { for (;;) {
token = consume(); token = peek();
if (token.type == Token::Type::Literal) { if (token.type == Token::Type::Literal) {
printf("Adding literal to array.. v:{%s}, t:{%d}\n", token.symbol.c_str(), (int)token.type); printf("Adding literal to array.. v:{%s}, t:{%d}\n", token.symbol.c_str(), (int)token.type);
@ -151,27 +248,27 @@ Value Parser::getArray()
else if (token.symbol == "false") { else if (token.symbol == "false") {
array.emplace_back(false); array.emplace_back(false);
} }
m_index++;
} }
else if (token.type == Token::Type::Number) { else if (token.type == Token::Type::Number) {
printf("Adding number to array.. v:{%s}, t:{%d} -> %f\n", token.symbol.c_str(), (int)token.type, std::stod(token.symbol)); printf("Adding number to array.. v:{%s}, t:{%d} -> %f\n", token.symbol.c_str(), (int)token.type, std::stod(token.symbol));
array.emplace_back(std::stod(token.symbol)); array.emplace_back(getNumber());
} }
else if (token.type == Token::Type::String) { else if (token.type == Token::Type::String) {
printf("Adding string to array.. v:{%s}, t:{%d}\n", token.symbol.c_str(), (int)token.type); printf("Adding string to array.. v:{%s}, t:{%d}\n", token.symbol.c_str(), (int)token.type);
array.emplace_back(token.symbol); array.emplace_back(token.symbol);
m_index++;
} }
else if (token.type == Token::Type::BracketOpen) { else if (token.type == Token::Type::BracketOpen) {
m_index--;
array.emplace_back(getArray()); array.emplace_back(getArray());
} }
else if (token.type == Token::Type::BraceOpen) { else if (token.type == Token::Type::BraceOpen) {
m_index--;
array.emplace_back(getObject()); array.emplace_back(getObject());
} }
else if (token.type == Token::Type::BracketClose) { else if (token.type == Token::Type::BracketClose) {
// Trailing comma // Trailing comma
if (array.asArray().size() > 0) { if (array.asArray().size() > 0) {
reportError((*m_tokens)[m_index - 2], "invalid comma, expecting ']'"); reportError((*m_tokens)[m_index - 1], "invalid comma, expecting ']'");
} }
break; break;
} }
@ -260,8 +357,9 @@ Value Parser::getObject()
} }
} }
else if (token.type == Token::Type::Number) { else if (token.type == Token::Type::Number) {
printf("Adding number to object.. k:{%s}, v:{%s}, t:{%d} -> %f\n", key.c_str(), token.symbol.c_str(), (int)token.type, std::stod(token.symbol)); printf("Adding number to object.. k:{%s}, v:{%s}, t:{%d} -> %f\n", name.c_str(), token.symbol.c_str(), (int)token.type, std::stod(token.symbol));
object[key] = std::stod(token.symbol); m_index--;
object[key] = getNumber();
} }
else if (token.type == Token::Type::String) { else if (token.type == Token::Type::String) {
printf("Adding string to object.. k:{%s}, v:{%s}, t:{%d}\n", key.c_str(), token.symbol.c_str(), (int)token.type); printf("Adding string to object.. k:{%s}, v:{%s}, t:{%d}\n", key.c_str(), token.symbol.c_str(), (int)token.type);

1
src/util/json/parser.h

@ -36,6 +36,7 @@ private:
Token consume(); Token consume();
bool consumeSpecific(Token::Type type); bool consumeSpecific(Token::Type type);
Value getNumber();
Value getArray(); Value getArray();
Value getObject(); Value getObject();

Loading…
Cancel
Save