You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
270 lines
5.5 KiB
270 lines
5.5 KiB
/* |
|
* Copyright (C) 2022 Riyyi |
|
* |
|
* SPDX-License-Identifier: MIT |
|
*/ |
|
|
|
#include <cstddef> |
|
#include <string> |
|
|
|
#include "util/json/job.h" |
|
#include "util/json/lexer.h" |
|
|
|
namespace Json { |
|
|
|
Lexer::Lexer(Job* job) |
|
: m_job(job) |
|
, m_tokens(job->tokens()) |
|
{ |
|
} |
|
|
|
Lexer::~Lexer() |
|
{ |
|
} |
|
|
|
// ----------------------------------------- |
|
|
|
void Lexer::analyze() |
|
{ |
|
printf("---------\n"); |
|
printf("Input JSON:\n%s\n", m_job->input().c_str()); |
|
printf("---------\n"); |
|
printf("Lexing:\n"); |
|
|
|
while (m_index < m_job->input().length()) { |
|
switch (peek()) { |
|
case '{': |
|
printf("Pushing -> BraceOpen: \"{\"\t%zu[%zu]\n", m_line, m_column); |
|
m_tokens->push_back({ Token::Type::BraceOpen, m_line, m_column, "{" }); |
|
break; |
|
case '}': |
|
printf("Pushing -> BraceClose: \"}\"\t%zu[%zu]\n", m_line, m_column); |
|
m_tokens->push_back({ Token::Type::BraceClose, m_line, m_column, "}" }); |
|
break; |
|
case '[': |
|
printf("Pushing -> BracketOpen: \"[\"\t%zu[%zu]\n", m_line, m_column); |
|
m_tokens->push_back({ Token::Type::BracketOpen, m_line, m_column, "[" }); |
|
break; |
|
case ']': |
|
printf("Pushing -> BracketClose: \"]\"\t%zu[%zu]\n", m_line, m_column); |
|
m_tokens->push_back({ Token::Type::BracketClose, m_line, m_column, "]" }); |
|
break; |
|
case ':': |
|
printf("Pushing -> Colon: \":\"\t%zu[%zu]\n", m_line, m_column); |
|
m_tokens->push_back({ Token::Type::Colon, m_line, m_column, ":" }); |
|
break; |
|
case ',': |
|
printf("Pushing -> Comma: \",\"\t%zu[%zu]\n", m_line, m_column); |
|
m_tokens->push_back({ Token::Type::Comma, m_line, m_column, "," }); |
|
break; |
|
case '"': |
|
if (!getString()) { |
|
return; |
|
} |
|
break; |
|
case '-': |
|
case '0': |
|
case '1': |
|
case '2': |
|
case '3': |
|
case '4': |
|
case '5': |
|
case '6': |
|
case '7': |
|
case '8': |
|
case '9': |
|
if (!getNumber()) { |
|
// Error! |
|
printf("Invalid JSON!\n"); |
|
return; |
|
} |
|
break; |
|
case 'f': |
|
case 'n': |
|
case 't': |
|
if (!getLiteral()) { |
|
// Error! |
|
printf("Invalid JSON!\n"); |
|
return; |
|
} |
|
break; |
|
case ' ': |
|
case '\t': |
|
break; |
|
case '\r': |
|
if (peekNext() == '\n') { // CRLF \r\n |
|
break; |
|
} |
|
m_column = 0; |
|
m_line++; |
|
break; |
|
case '\n': |
|
m_column = 0; |
|
m_line++; |
|
break; |
|
default: |
|
// Error! |
|
m_tokens->push_back({ Token::Type::None, m_line, m_column, std::string(1, peek()) }); |
|
m_job->printErrorLine(m_tokens->back(), |
|
("unexpected character '" + std::string(1, peek()) + "'").c_str()); |
|
return; |
|
break; |
|
} |
|
|
|
m_index++; |
|
m_column++; |
|
} |
|
} |
|
|
|
// ----------------------------------------- |
|
|
|
char Lexer::peek() |
|
{ |
|
return m_job->input()[m_index]; |
|
} |
|
|
|
char Lexer::peekNext() |
|
{ |
|
return m_job->input()[m_index + 1]; |
|
} |
|
|
|
char Lexer::consume() |
|
{ |
|
char character = peek(); |
|
m_index++; |
|
m_column++; |
|
return character; |
|
} |
|
|
|
bool Lexer::consumeSpecific(char character) |
|
{ |
|
if (peek() != character) { |
|
return false; |
|
} |
|
|
|
m_index++; |
|
m_column++; |
|
return true; |
|
} |
|
|
|
bool Lexer::getString() |
|
{ |
|
size_t column = m_column; |
|
|
|
auto isValidStringCharacter = [](char check) -> bool { |
|
std::string invalidCharacters = "{}[]:,"; |
|
if (invalidCharacters.find(check) != std::string::npos) { |
|
return false; |
|
} |
|
|
|
return true; |
|
}; |
|
|
|
std::string symbol = ""; |
|
char character = consume(); |
|
for (;;) { |
|
character = peek(); |
|
|
|
// TODO: Escape logic goes here |
|
// ", \, /, b(ackspace), f(orm feed), l(ine feed), c(arriage return), t(ab), u(nicode) \u0021 |
|
|
|
if (!isValidStringCharacter(character)) { |
|
m_tokens->push_back({ Token::Type::None, m_line, column, "" }); |
|
m_job->printErrorLine(m_job->tokens()->back(), "strings should be wrapped in double quotes"); |
|
return false; |
|
} |
|
if (character == '"') { |
|
break; |
|
} |
|
|
|
m_index++; |
|
m_column++; |
|
symbol += character; |
|
} |
|
|
|
printf("Pushing -> String: \"%s\"\t%zu[%zu]\n", symbol.c_str(), m_line, column); |
|
m_tokens->push_back({ Token::Type::String, m_line, column, symbol }); |
|
|
|
return true; |
|
} |
|
|
|
bool Lexer::getNumber() |
|
{ |
|
size_t index = m_index; |
|
size_t column = m_column; |
|
std::string symbol = ""; |
|
|
|
bool seenDot = false; |
|
char character; |
|
for (;;) { |
|
character = peek(); |
|
|
|
// FIXME: Break on separator }], rather than valid number symbols to |
|
// get the entire thing, resulting in better error handling |
|
// FIXME: e/E and exponent are also valid characters (?) |
|
if (character != 45 // - |
|
&& character != 46 // . |
|
&& (character < 48 || character > 57)) { // 0-9 |
|
break; |
|
} |
|
|
|
// Fail if '.' is used more than once |
|
if (seenDot == true && character == 46) { // . |
|
m_index = index; |
|
m_column = column; |
|
return false; |
|
} |
|
if (character == 46) { // . |
|
seenDot = true; |
|
} |
|
|
|
m_index++; |
|
m_column++; |
|
symbol += character; |
|
} |
|
m_index--; |
|
m_column--; |
|
|
|
printf("Pushing -> Number: \"%s\"\t%zu[%zu]\n", symbol.c_str(), m_line, column); |
|
m_tokens->push_back({ Token::Type::Number, m_line, column, symbol }); |
|
|
|
return true; |
|
} |
|
|
|
bool Lexer::getLiteral() |
|
{ |
|
size_t index = m_index; |
|
size_t column = m_column; |
|
|
|
std::string symbol = ""; |
|
|
|
char character; |
|
for (;;) { |
|
character = peek(); |
|
|
|
// Literals can only contain lower-case letters |
|
if (character < 97 || character > 122) { // a-z |
|
break; |
|
} |
|
|
|
m_index++; |
|
m_column++; |
|
symbol += character; |
|
} |
|
m_index--; |
|
m_column--; |
|
|
|
// Literal name validation |
|
if (symbol != "false" && symbol != "null" && symbol != "true") { |
|
m_index = index; |
|
m_column = column; |
|
return false; |
|
} |
|
|
|
printf("Pushing -> Literal: \"%s\"\t%zu[%zu]\n", symbol.c_str(), m_line, column); |
|
m_tokens->push_back({ Token::Type::Literal, m_line, column, symbol }); |
|
|
|
return true; |
|
} |
|
|
|
} // namespace Json
|
|
|