Source code for noisemaker.dsl.tokenizer

# Support ternary operator and equals sign for named arguments
PUNCT = set("(){}[],.:+-*/?=<>")


class TokenizerError(Exception):
    """Custom exception for tokenizer errors with line and column tracking."""

    def __init__(self, message: str, line: int, column: int):
        super().__init__(f"{message} at line {line} column {column}")
        self.line = line
        self.column = column


def is_digit(ch):
    return ch is not None and "0" <= ch <= "9"


def is_hex(ch):
    return ch is not None and (is_digit(ch) or ("a" <= ch <= "f") or ("A" <= ch <= "F"))


def is_ident_start(ch):
    return ("A" <= ch <= "Z") or ("a" <= ch <= "z") or ch == "_"


def is_ident(ch):
    return is_ident_start(ch) or is_digit(ch)


def make_error(message, line, column):
    return TokenizerError(message, line, column)


[docs] def tokenize(source): tokens = [] i = 0 line = 1 column = 1 length = len(source) def peek(offset=0): idx = i + offset return source[idx] if idx < length else None def advance(): nonlocal i, line, column ch = source[i] i += 1 if ch == "\n": line += 1 column = 1 else: column += 1 return ch def skip_whitespace(): nonlocal i while i < length: ch = peek() if ch in (" ", "\t", "\r", "\n"): advance() continue if ch == "/" and peek(1) == "/": advance() advance() while i < length and peek() != "\n": advance() continue if ch == "/" and peek(1) == "*": comment_line = line comment_column = column advance() advance() closed = False while i < length: if peek() == "*" and peek(1) == "/": advance() advance() closed = True break advance() if not closed: raise make_error("Unterminated multi-line comment", comment_line, comment_column) continue break while i < length: skip_whitespace() if i >= length: break ch = peek() token_line = line token_column = column if is_digit(ch) or (ch == "." and is_digit(peek(1))): num_str = "" if ch == ".": num_str = "0" advance() while is_digit(peek()): num_str += advance() if peek() == ".": num_str += advance() while is_digit(peek()): num_str += advance() tokens.append({"type": "number", "value": float(num_str), "line": token_line, "column": token_column}) continue if ch in ('"', "'"): quote = advance() s = "" while i < length and peek() != quote: c = advance() if c == "\\": if i >= length: raise make_error("Unterminated string literal", token_line, token_column) esc = advance() mapping = {"n": "\n", "r": "\r", "t": "\t", "\\": "\\", '"': '"', "'": "'"} s += mapping.get(esc, esc) elif c == "\n": raise make_error("Unterminated string literal", token_line, token_column) else: s += c if peek() != quote: raise make_error("Unterminated string literal", token_line, token_column) advance() tokens.append({"type": "string", "value": s, "line": token_line, "column": token_column}) continue if ch == "#": advance() hex_str = "" while i < length and is_hex(peek()): hex_str += advance() if len(hex_str) not in (3, 6): raise make_error("Invalid color", token_line, token_column) tokens.append({"type": "color", "value": f"#{hex_str}", "line": token_line, "column": token_column}) continue if ch in PUNCT: advance() tokens.append({"type": ch, "value": ch, "line": token_line, "column": token_column}) continue if is_ident_start(ch): ident = "" while i < length and is_ident(peek()): ident += advance() if ident in ("true", "false"): tokens.append({"type": "boolean", "value": ident == "true", "line": token_line, "column": token_column}) elif ident == "null": tokens.append({"type": "null", "value": None, "line": token_line, "column": token_column}) else: tokens.append({"type": "identifier", "value": ident, "line": token_line, "column": token_column}) continue raise make_error(f"Unexpected character '{ch}'", token_line, token_column) return tokens