""" FilGoalBot — Intent Router ========================== Pure regex-based intent detector. Lives in its own module so it can be unit-tested without importing Groq, FAISS, or sentence-transformers. Intents: 1. lineup → تشكيلة 2. match_result → نتيجة مباراة / أهداف 3. transfer_news → ميركاتو / انتقالات / عقود 4. team_news → أخبار فريق / مران / مؤتمر صحفي 5. player_info → معلومات لاعب / إحصائيات 6. general_football → ambiguous-but-still-football fallback (e.g. "ترتيب الدوري", "موعد المباريات") 7. out_of_scope → clearly NOT football (weather, cooking, other sports, politics …). The pipeline short-circuits these with a tailored refusal BEFORE any retrieval or LLM call. """ import re INTENT_PATTERNS: dict[str, list[str]] = { "lineup": [ r'تشكيل', r'تشكيله', r'التشكيلة', r'أساسي', r'اساسي', r'الحارس', r'الحراسه', r'حارس المرمى', r'خط الدفاع', r'خط الوسط', r'خط الهجوم', r'مين اللي هيلعب', r'مين هيلعب', r'الاحتياطي', r'البدلاء', r'الكابتن', r'مين هيبدأ', r'مين بيبدأ', r'الـ?11\b', r'الإحدى عشر', ], "match_result": [ # MSA # (? bool: return any(re.search(p, query) for p in OUT_OF_SCOPE_PATTERNS) def detect_intent(query: str) -> str: q = query.lower() # "ماذا قال X" → team_news. Override needed because match_result runs first, # and quoted statements often mention "فوز", "هدف" etc that fire there. if re.search(r'ماذا\s+قال|ما\s+الذي\s+قاله|ماذا\s+أعلن', q): return "team_news" # High-priority override: "لاعب اسمه ..." routes to player_info to defeat # match_result's "سجل" trigger (e.g. "كم سجل لاعب اسمه X"). Skip the override # when a transfer verb is present — "هل انتقل لاعب اسمه X" is transfer_news. if re.search(r'لاعب\s+اسمه', q) and not re.search(r'انتقل|صفق|مفاوضات|تعاقد|إعار|اعار', q): return "player_info" # Player-centric return-to-training. Allow 1-3 tokens between "عاد" and # "ل/إلى تدريب" so multi-word names like "دي بروين" / "عبد المنعم" match. # Without this override, team_news's "تدريب" pattern swallows the case. if re.search(r'(?:عاد|عودة)\s+\S+(?:\s+\S+){0,2}\s+ل[إا]?\s*تدريب', q): return "player_info" # Out-of-scope check BEFORE INTENT_ORDER. Football vocabulary is so # broad (نتيجة، مباراة، حالة، ...) that a query like # "نتيجة مباراة كرة السلة" would otherwise match match_result on # "نتيج" and lose its OOS signal. The patterns are conservative — # they target topics with no overlap with football (weather, other # sports, politics-with-disambiguator, etc.) — so promoting them # over INTENT_ORDER does not over-refuse genuine football queries. if _is_out_of_scope(q): return "out_of_scope" for intent in INTENT_ORDER: for pattern in INTENT_PATTERNS[intent]: if re.search(pattern, q): return intent return "general_football"