Tokenizer now supports line numbers

This commit is contained in:
Johannes Janssen 2025-03-02 23:58:29 +01:00
parent 60b2da7372
commit 3fb5c44ffa
2 changed files with 84 additions and 0 deletions

View File

@ -0,0 +1,39 @@
#include "faustLexer.h"
namespace faust {
FaustLexer::FaustLexer() : lexer(TK_EOF) {
lexer.addToken(u"\\d+", TK_DECIMAL); // Order of operations important
lexer.addToken(u"\\d+\\.\\d+", TK_FLOATING_DECIMAL);
lexer.addToken(u"0x[0-9A-Fa-f]+", TK_HEXADECIMAL);
lexer.addToken(u"0b[01]+", TK_BINARY);
lexer.addToken(u"([\"'])(?:\\\\\\1|.)*?\\1", TK_STRING);
lexer.addToken(u"\\(", TK_LEFT_ROUND_BRACKET);
lexer.addToken(u"\\)", TK_RIGHT_ROUND_BRACKET);
lexer.addToken(u"\\[", TK_LEFT_SQUARE_BRACKET);
lexer.addToken(u"\\]", TK_RIGHT_SQUARE_BRACKET);
lexer.addToken(u"\\{", TK_LEFT_CURLY_BRACKET);
lexer.addToken(u"\\}", TK_RIGHT_CURLY_BRACKET);
lexer.addToken(u"\\=", TK_ASSIGN);
lexer.addToken(u"->", TK_ARROW);
lexer.addToken(u":", TK_COLON);
lexer.addToken(u";", TK_SEMICOLON);
lexer.addToken(u"\\/\\/.*$", TK_COMMENT);
lexer.addToken(u"\\bfn\\b", TK_FN);
lexer.addToken(u"\\bconst\\b", TK_CONST);
lexer.addToken(u"\\bmut\\b", TK_MUT);
lexer.addToken(u"\\bimport\\b", TK_IMPORT);
lexer.addToken(u"[a-zA-Z][a-zA-Z0-9_]*", TK_IDENTIFIER);
}
Vector<Lexer<FaustTokens>::Lexeme> FaustLexer::lex(const icu::UnicodeString &text) {
lexer.setText(text);
Vector<Lexer<FaustTokens>::Lexeme> lexemes;
while (lexer.hasNext()) {
Lexer<FaustTokens>::Lexeme next = lexer.next();
lexemes.push_back(next);
}
return lexemes;
}
} // namespace faust

View File

@ -0,0 +1,45 @@
#pragma once
#include "faust-lib/common/vector.h"
#include "faust-lib/parser/lexer.h"
namespace faust {
enum FaustTokens {
TK_EOF = 0,
TK_DECIMAL,
TK_FLOATING_DECIMAL,
TK_HEXADECIMAL,
TK_BINARY,
TK_STRING,
TK_LEFT_ROUND_BRACKET,
TK_RIGHT_ROUND_BRACKET,
TK_LEFT_SQUARE_BRACKET,
TK_RIGHT_SQUARE_BRACKET,
TK_LEFT_CURLY_BRACKET,
TK_RIGHT_CURLY_BRACKET,
TK_ASSIGN,
TK_ARROW,
TK_COLON,
TK_SEMICOLON,
TK_COMMENT,
TK_FN,
TK_CONST,
TK_MUT,
TK_IMPORT,
TK_IDENTIFIER,
};
class FaustLexer {
public:
FaustLexer();
Vector<Lexer<FaustTokens>::Lexeme> lex(const icu::UnicodeString &text);
private:
Lexer<FaustTokens> lexer;
protected:
};
} // namespace faust