Browse Source

Util: Improve Lexer string tokenization

master
Riyyi 2 years ago
parent
commit
c1c9429a9d
  1. 38
      src/util/json/lexer.cpp

38
src/util/json/lexer.cpp

@ -178,40 +178,46 @@ bool Lexer::consumeSpecific(char character)
bool Lexer::getString() bool Lexer::getString()
{ {
size_t column = m_column; size_t column = m_column;
std::string symbol = "";
auto isValidStringCharacter = [](char check) -> bool { // Break on "\/ and any control character
std::string invalidCharacters = "{}[]:,"; std::string breakOnGrammar = "\"\\/";
if (invalidCharacters.find(check) != std::string::npos) { for (size_t i = 0; i < 32; ++i) {
return false; breakOnGrammar += i;
} }
return true; bool escape = false;
};
std::string symbol = "";
char character = consume(); char character = consume();
for (;;) { for (;;) {
character = peek(); character = peek();
// TODO: Escape logic goes here if (!escape && character == '\\') {
// ", \, /, b(ackspace), f(orm feed), l(ine feed), c(arriage return), t(ab), u(nicode) \u0021 symbol += '\\';
increment();
if (!isValidStringCharacter(character)) { escape = true;
m_tokens->push_back({ Token::Type::None, m_line, column, "" }); continue;
m_job->printErrorLine(m_job->tokens()->back(), "strings should be wrapped in double quotes");
return false;
} }
if (character == '"') {
if (!escape && breakOnGrammar.find(character) != std::string::npos) {
break; break;
} }
symbol += character; symbol += character;
increment(); increment();
if (escape) {
escape = false;
}
} }
printf("Pushing -> String: \"%s\"\t%zu[%zu]\n", symbol.c_str(), m_line, column); printf("Pushing -> String: \"%s\"\t%zu[%zu]\n", symbol.c_str(), m_line, column);
m_tokens->push_back({ Token::Type::String, m_line, column, symbol }); m_tokens->push_back({ Token::Type::String, m_line, column, symbol });
if (character != '"') {
m_job->printErrorLine(m_job->tokens()->back(), "strings should be wrapped in double quotes");
return false;
}
return true; return true;
} }

Loading…
Cancel
Save