import re from dataclasses import dataclass from typing import Sequence def normalize_text(value: str) -> str: return re.sub(r"\s+", "", value.casefold()) @dataclass(frozen=True) class Rule: rule_id: str primary_keyword: str aliases: tuple[str, ...] reply: str def search_terms(self) -> tuple[str, ...]: terms: list[str] = [] for candidate in (self.primary_keyword, *self.aliases): cleaned = candidate.strip() if cleaned and cleaned not in terms: terms.append(cleaned) return tuple(terms) @dataclass(frozen=True) class MatchResult: transcript: str matched: bool rule_id: str | None = None matched_keyword: str | None = None reply: str | None = None match_position: int | None = None class RuleEngine: def __init__(self, rules: Sequence[Rule]) -> None: self._rules = list(rules) @property def rules(self) -> list[Rule]: return list(self._rules) def update_rules(self, rules: Sequence[Rule]) -> None: self._rules = list(rules) def match(self, transcript: str) -> MatchResult: normalized_transcript = normalize_text(transcript) best_score: tuple[int, int] | None = None best_rule: Rule | None = None best_term: str | None = None for rule in self._rules: for term in rule.search_terms(): normalized_term = normalize_text(term) if not normalized_term: continue position = normalized_transcript.find(normalized_term) if position < 0: continue score = (position, -len(normalized_term)) if best_score is None or score < best_score: best_score = score best_rule = rule best_term = term if best_rule is None or best_term is None or best_score is None: return MatchResult(transcript=transcript, matched=False) return MatchResult( transcript=transcript, matched=True, rule_id=best_rule.rule_id, matched_keyword=best_term, reply=best_rule.reply, match_position=best_score[0], )