You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
497 lines
10 KiB
497 lines
10 KiB
/* |
|
* Copyright (C) 2022 Riyyi |
|
* |
|
* SPDX-License-Identifier: MIT |
|
*/ |
|
|
|
#include <algorithm> // count |
|
#include <cstddef> // size_t |
|
#include <cstdint> // uint8_t |
|
#include <cstdio> // printf |
|
#include <map> |
|
#include <string> // stod |
|
|
|
#include "util/json/array.h" |
|
#include "util/json/job.h" |
|
#include "util/json/lexer.h" |
|
#include "util/json/object.h" |
|
#include "util/json/parser.h" |
|
#include "util/json/value.h" |
|
#include "util/meta/assert.h" |
|
|
|
namespace Util::JSON { |
|
|
|
Parser::Parser(Job* job) |
|
: m_job(job) |
|
, m_tokens(m_job->tokens()) |
|
{ |
|
} |
|
|
|
Parser::~Parser() |
|
{ |
|
} |
|
|
|
// ----------------------------------------- |
|
|
|
Value Parser::parse() |
|
{ |
|
Value result; |
|
|
|
if (m_tokens->size() == 0) { |
|
m_job->printErrorLine({}, "expecting token, not 'EOF'"); |
|
return result; |
|
} |
|
|
|
Token token = peek(); |
|
switch (token.type) { |
|
case Token::Type::Literal: |
|
result = consumeLiteral(); |
|
break; |
|
case Token::Type::Number: |
|
result = consumeNumber(); |
|
break; |
|
case Token::Type::String: |
|
result = consumeString(); |
|
break; |
|
case Token::Type::BracketOpen: |
|
result = consumeArray(); |
|
break; |
|
case Token::Type::BraceOpen: |
|
result = consumeObject(); |
|
break; |
|
case Token::Type::BracketClose: |
|
m_job->printErrorLine(token, "expecting value, not ']'"); |
|
m_index++; |
|
break; |
|
case Token::Type::BraceClose: |
|
m_job->printErrorLine(token, "expecting string, not '}'"); |
|
m_index++; |
|
break; |
|
default: |
|
m_job->printErrorLine(token, "multiple root elements"); |
|
m_index++; |
|
break; |
|
} |
|
|
|
if (!isEOF()) { |
|
m_job->printErrorLine(peek(), "multiple root elements"); |
|
} |
|
|
|
return result; |
|
} |
|
|
|
// ----------------------------------------- |
|
|
|
bool Parser::isEOF() |
|
{ |
|
return m_index >= m_tokens->size(); |
|
} |
|
|
|
Token Parser::peek() |
|
{ |
|
VERIFY(!isEOF()); |
|
return (*m_tokens)[m_index]; |
|
} |
|
|
|
Token Parser::consume() |
|
{ |
|
VERIFY(!isEOF()); |
|
return (*m_tokens)[m_index++]; |
|
} |
|
|
|
void Parser::ignoreUntil(Token::Type type) |
|
{ |
|
while (!isEOF() && peek().type != type) { |
|
++m_index; |
|
} |
|
} |
|
|
|
Value Parser::consumeLiteral() |
|
{ |
|
Token token = consume(); |
|
|
|
if (token.symbol == "null") { |
|
return nullptr; |
|
} |
|
else if (token.symbol == "true") { |
|
return true; |
|
} |
|
else if (token.symbol == "false") { |
|
return false; |
|
} |
|
|
|
m_job->printErrorLine(token, "invalid literal"); |
|
return nullptr; |
|
} |
|
|
|
Value Parser::consumeNumber() |
|
{ |
|
Token token = consume(); |
|
|
|
auto reportError = [this](Token token, const std::string& message) -> void { |
|
m_job->printErrorLine(token, message.c_str()); |
|
}; |
|
|
|
// Validation |
|
// number = [ minus ] int [ frac ] [ exp ] |
|
|
|
size_t minusPrefix = token.symbol[0] == '-' ? 1 : 0; |
|
|
|
// Leading 0s |
|
if (token.symbol[minusPrefix] == '0' |
|
&& token.symbol[minusPrefix + 1] > '0' && token.symbol[minusPrefix + 1] < '9') { |
|
reportError(token, "invalid leading zero"); |
|
return nullptr; |
|
} |
|
|
|
enum class State : uint8_t { |
|
Int, |
|
Fraction, |
|
Exponent |
|
}; |
|
|
|
State state = State::Int; |
|
|
|
#define CHECK_IF_VALID_NUMBER \ |
|
if (character < 48 || character > 57) { \ |
|
reportError(token, std::string() + "invalid number, unexpected '" + character + '\''); \ |
|
return nullptr; \ |
|
} |
|
|
|
size_t fractionPosition = 0; |
|
size_t exponentPosition = 0; |
|
size_t length = token.symbol.length(); |
|
for (size_t i = 0; i < length; ++i) { |
|
char character = token.symbol[i]; |
|
|
|
// Int -> Fraction |
|
if (character == '.' && state == State::Int) { |
|
state = State::Fraction; |
|
fractionPosition = i; |
|
continue; |
|
} |
|
// Int/Fraction -> Exponent |
|
else if ((character == 'e' || character == 'E') && state != State::Exponent) { |
|
state = State::Exponent; |
|
exponentPosition = i; |
|
continue; |
|
} |
|
|
|
if (state == State::Int) { |
|
if (character == '-') { |
|
if (i == length - 1) { |
|
reportError(token, "expected number after minus"); |
|
return nullptr; |
|
} |
|
if (i != 0) { |
|
reportError(token, "invalid minus"); |
|
return nullptr; |
|
} |
|
} |
|
else { |
|
CHECK_IF_VALID_NUMBER; |
|
} |
|
} |
|
else if (state == State::Fraction) { |
|
CHECK_IF_VALID_NUMBER; |
|
} |
|
else if (state == State::Exponent) { |
|
if (character == '-' || character == '+') { |
|
if (i == length - 1) { |
|
reportError(token, "expected number after plus/minus"); |
|
return nullptr; |
|
} |
|
if (i > exponentPosition + 1) { |
|
reportError(token, "invalid plus/minus"); |
|
return nullptr; |
|
} |
|
} |
|
else { |
|
CHECK_IF_VALID_NUMBER; |
|
} |
|
} |
|
} |
|
|
|
if (fractionPosition != 0 || exponentPosition != 0) { |
|
if (fractionPosition == exponentPosition - 1) { |
|
reportError(token, "invalid exponent sign, expected number"); |
|
return nullptr; |
|
} |
|
|
|
if (fractionPosition == length - 1 || exponentPosition == length - 1) { |
|
reportError(token, "invalid number"); |
|
return nullptr; |
|
} |
|
} |
|
|
|
return std::stod(token.symbol); |
|
} |
|
|
|
Value Parser::consumeString() |
|
{ |
|
Token token = consume(); |
|
|
|
auto reportError = [this](Token token, const std::string& message) -> void { |
|
m_job->printErrorLine(token, message.c_str()); |
|
}; |
|
|
|
// FIXME: support \u Unicode character escape sequence |
|
auto getPrintableString = [](char character) -> std::string { |
|
if (character == '"' || character == '\\' || character == '/' |
|
|| (character >= 0 && character <= 31)) { |
|
switch (character) { |
|
case '"': |
|
return "\\\""; |
|
break; |
|
case '\\': |
|
return "\\\\"; |
|
break; |
|
case '/': |
|
return "/"; |
|
break; |
|
case '\b': |
|
return "\\b"; |
|
break; |
|
case '\f': |
|
return "\\f"; |
|
break; |
|
case '\n': |
|
return "\\n"; |
|
break; |
|
case '\r': |
|
return "\\r"; |
|
break; |
|
case '\t': |
|
return "\\t"; |
|
break; |
|
default: |
|
char buffer[7]; |
|
sprintf(buffer, "\\u%0.4X", character); |
|
return std::string(buffer); |
|
break; |
|
} |
|
} |
|
|
|
return std::string() + character; |
|
}; |
|
|
|
std::string string; |
|
|
|
bool escape = false; |
|
for (char character : token.symbol) { |
|
if (!escape) { |
|
if (character == '\\') { |
|
escape = true; |
|
continue; |
|
} |
|
|
|
if (character == '"' || (character >= 0 && character <= 31)) { |
|
reportError(token, "invalid string, unescaped character found"); |
|
return nullptr; |
|
} |
|
} |
|
|
|
string += getPrintableString(character); |
|
|
|
if (escape) { |
|
escape = false; |
|
} |
|
} |
|
|
|
return string; |
|
} |
|
|
|
Value Parser::consumeArray() |
|
{ |
|
m_index++; |
|
|
|
auto reportError = [this](Token token, const std::string& message) -> void { |
|
m_job->printErrorLine(token, message.c_str()); |
|
|
|
// After an error, try to find the closing bracket |
|
ignoreUntil(Token::Type::BracketClose); |
|
m_index++; |
|
}; |
|
|
|
Value array = Value::Type::Array; |
|
Token token; |
|
for (;;) { |
|
// EOF |
|
if (isEOF()) { |
|
reportError(m_tokens->at(m_index - 1), "expecting closing ']' at end"); |
|
break; |
|
} |
|
|
|
token = peek(); |
|
if (token.type == Token::Type::Literal) { |
|
array.emplace_back(consumeLiteral()); |
|
} |
|
else if (token.type == Token::Type::Number) { |
|
array.emplace_back(consumeNumber()); |
|
} |
|
else if (token.type == Token::Type::String) { |
|
array.emplace_back(consumeString()); |
|
} |
|
else if (token.type == Token::Type::BracketOpen) { |
|
array.emplace_back(consumeArray()); |
|
} |
|
else if (token.type == Token::Type::BraceOpen) { |
|
array.emplace_back(consumeObject()); |
|
} |
|
else if (token.type == Token::Type::BracketClose) { |
|
// Trailing comma |
|
if (array.m_value.array->size() > 0) { |
|
reportError(m_tokens->at(m_index - 1), "invalid comma, expecting ']'"); |
|
break; |
|
} |
|
} |
|
else { |
|
reportError(token, "expecting value or ']', not '" + token.symbol + "'"); |
|
break; |
|
} |
|
|
|
// EOF |
|
if (isEOF()) { |
|
reportError(token, "expecting closing ']' at end"); |
|
break; |
|
} |
|
|
|
// Find , or ] |
|
token = consume(); |
|
if (token.type == Token::Type::Comma) { |
|
continue; |
|
} |
|
else if (token.type == Token::Type::BracketClose) { |
|
break; |
|
} |
|
else { |
|
reportError(m_tokens->at(m_index - 1), "expecting comma or ']', not '" + token.symbol + "'"); |
|
break; |
|
} |
|
} |
|
|
|
return array; |
|
} |
|
|
|
Value Parser::consumeObject() |
|
{ |
|
m_index++; |
|
|
|
auto reportError = [this](Token token, const std::string& message) -> void { |
|
m_job->printErrorLine(token, message.c_str()); |
|
|
|
// After an error, try to find the closing brace |
|
ignoreUntil(Token::Type::BraceClose); |
|
m_index++; |
|
}; |
|
|
|
Value object = Value::Type::Object; |
|
Token token; |
|
std::string name; |
|
std::map<std::string, uint8_t> unique; |
|
for (;;) { |
|
// EOF |
|
if (isEOF()) { |
|
reportError(m_tokens->at(m_index - 1), "expecting closing '}' at end"); |
|
break; |
|
} |
|
|
|
token = consume(); |
|
if (token.type == Token::Type::BraceClose) { |
|
// Trailing comma |
|
if (object.m_value.object->size() > 0) { |
|
reportError(m_tokens->at(m_index - 1), "invalid comma, expecting '}'"); |
|
} |
|
// Empty object |
|
break; |
|
} |
|
if (token.type != Token::Type::String) { |
|
reportError(token, "expecting string or '}', not '" + token.symbol + "'"); |
|
break; |
|
} |
|
|
|
// Find member name |
|
m_index--; |
|
Value tmpName = consumeString(); |
|
if (tmpName.m_type != Value::Type::String) { |
|
ignoreUntil(Token::Type::BraceClose); |
|
m_index++; |
|
break; |
|
} |
|
|
|
// Check if name exists in hashmap |
|
name = *tmpName.m_value.string; |
|
if (unique.find(name) != unique.end()) { |
|
reportError(token, "duplicate name '" + token.symbol + "', names should be unique"); |
|
break; |
|
} |
|
// Add name to hashmap |
|
unique.insert({ name, 0 }); |
|
|
|
// EOF |
|
if (isEOF()) { |
|
reportError(token, "expecting colon, not 'EOF'"); |
|
reportError(token, "expecting closing '}' at end"); |
|
break; |
|
} |
|
|
|
// Find : |
|
token = consume(); |
|
if (token.type != Token::Type::Colon) { |
|
reportError(token, "expecting colon, not '" + token.symbol + "'"); |
|
break; |
|
} |
|
|
|
// EOF |
|
if (isEOF()) { |
|
reportError(token, "expecting value, not 'EOF'"); |
|
reportError(token, "expecting closing '}' at end"); |
|
break; |
|
} |
|
|
|
// Add member (name:value pair) to object |
|
token = peek(); |
|
if (token.type == Token::Type::Literal) { |
|
object.emplace(name, consumeLiteral()); |
|
} |
|
else if (token.type == Token::Type::Number) { |
|
object.emplace(name, consumeNumber()); |
|
} |
|
else if (token.type == Token::Type::String) { |
|
object.emplace(name, consumeString()); |
|
} |
|
else if (token.type == Token::Type::BracketOpen) { |
|
object.emplace(name, consumeArray()); |
|
} |
|
else if (token.type == Token::Type::BraceOpen) { |
|
object.emplace(name, consumeObject()); |
|
} |
|
else { |
|
reportError(token, "expecting value, not '" + token.symbol + "'"); |
|
break; |
|
} |
|
|
|
// EOF |
|
if (isEOF()) { |
|
reportError(token, "expecting closing '}' at end"); |
|
break; |
|
} |
|
|
|
// Find , or } |
|
token = consume(); |
|
if (token.type == Token::Type::Comma) { |
|
continue; |
|
} |
|
else if (token.type == Token::Type::BraceClose) { |
|
break; |
|
} |
|
else { |
|
reportError(token, "expecting comma or '}', not '" + token.symbol + "'"); |
|
break; |
|
} |
|
} |
|
|
|
return object; |
|
} |
|
|
|
} // namespace Util::JSON
|
|
|