Util: Improve Lexer string tokenization

4 years ago · c1c9429a9d
1 changed files with 23 additions and 17 deletions
--- a/src/util/json/lexer.cpp
+++ b/src/util/json/lexer.cpp
@ -178,40 +178,46 @@ bool Lexer::consumeSpecific(char character)
 bool Lexer::getString()
 {
 	size_t column = m_column;
 	std::string symbol = "";
-	auto isValidStringCharacter = [](char check) -> bool {
+	// Break on "\/ and any control character
-		std::string invalidCharacters = "{}[]:,";
+	std::string breakOnGrammar = "\"\\/";
-		if (invalidCharacters.find(check) != std::string::npos) {
+	for (size_t i = 0; i < 32; ++i) {
-			return false;
+		breakOnGrammar += i;
 	}
-		return true;
+	bool escape = false;
 	};
 	std::string symbol = "";
 	char character = consume();
 	for (;;) {
 		character = peek();
-		// TODO: Escape logic goes here
+		if (!escape && character == '\\') {
-		// ", \, /, b(ackspace), f(orm feed), l(ine feed), c(arriage return), t(ab), u(nicode) \u0021
+			symbol += '\\';
-
+			increment();
-		if (!isValidStringCharacter(character)) {
+			escape = true;
-			m_tokens->push_back({ Token::Type::None, m_line, column, "" });
+			continue;
 			m_job->printErrorLine(m_job->tokens()->back(), "strings should be wrapped in double quotes");
 			return false;
 		}
-		if (character == '"') {
+
 		if (!escape && breakOnGrammar.find(character) != std::string::npos) {
 			break;
 		}
 		symbol += character;
 		increment();
 		if (escape) {
 			escape = false;
 		}
 	}
 	printf("Pushing ->       String:  \"%s\"\t%zu[%zu]\n", symbol.c_str(), m_line, column);
 	m_tokens->push_back({ Token::Type::String, m_line, column, symbol });
 	if (character != '"') {
 		m_job->printErrorLine(m_job->tokens()->back(), "strings should be wrapped in double quotes");
 		return false;
 	}
 	return true;
 }