Spaces:

Peterase
/

rag-api-node-1

Running

File size: 5,628 Bytes

a63c61f

import logging
import re
import threading

logger = logging.getLogger(__name__)

# ── Instant keyword shortcuts ─────────────────────────────────────────────────

_SMALL_TALK_EXACT = {
    "hi", "hello", "hey", "thanks", "thank you", "bye", "goodbye",
    "good morning", "good afternoon", "good evening", "sup", "yo",
    "hello there", "hey there", "hi there", "greetings",
    # frustration / profanity
    "wtf", "lol", "lmao", "omg", "damn", "shit", "fuck",
    "for fuck sake", "for fucks sake", "oh my god", "are you kidding",
    "seriously", "come on", "ugh", "argh",
}
_SMALL_TALK_PREFIX = (
    "how are you", "what are you", "who are you", "what can you do",
    "tell me a joke", "make me laugh", "what's up", "whats up",
    "for fuck", "for fucks", "what the fuck", "what the hell",
    "are you serious", "you must be", "hello ", "hi ", "hey ",
)

# Temporal patterns → always NEWS (user is asking about time-scoped news)
_TEMPORAL_PATTERNS = re.compile(
    r"\b("
    r"today|yesterday|tomorrow|tonight|"
    r"this (week|month|year|morning|evening|afternoon)|"
    r"last (week|month|year|night|monday|tuesday|wednesday|thursday|friday|saturday|sunday)|"
    r"next (week|month|year)|"
    r"past (\d+ )?(day|days|week|weeks|month|months|year|years)|"
    r"recent(ly)?|latest|breaking|just (now|happened|announced)|"
    r"(monday|tuesday|wednesday|thursday|friday|saturday|sunday)|"
    r"january|february|march|april|may|june|july|august|september|october|november|december|"
    r"\d{4}|"           # year like 2024, 2025
    r"\d+(st|nd|rd|th)" # ordinal like 1st, 2nd
    r")\b",
    re.IGNORECASE
)


class IntentClassifier:
    """
    Local zero-shot intent classifier using MoritzLaurer/DeBERTa-v3-small-mnli.
    - 140 MB model, ~20 ms inference, no API calls, no rate limits.
    - Lazy-loaded on first use so startup is not blocked.
    - Thread-safe singleton load.

    Classification priority:
    1. Small-talk exact/prefix match → OTHER (instant)
    2. Temporal pattern match → NEWS (instant, handles "this week", "yesterday", etc.)
    3. DeBERTa NLI model → NEWS or OTHER (~20ms)
    4. Keyword fallback if model failed to load
    5. Default → NEWS (always prefer RAG over hallucination)
    """
    MODEL_NAME = "MoritzLaurer/deberta-v3-base-zeroshot-v2.0"

    def __init__(self):
        self._pipe = None
        self._lock = threading.Lock()
        self._load_failed = False

    def _load(self):
        if self._pipe is not None or self._load_failed:
            return
        with self._lock:
            if self._pipe is not None or self._load_failed:
                return
            try:
                from transformers import pipeline
                logger.info(f"Loading intent classifier: {self.MODEL_NAME} ...")
                self._pipe = pipeline(
                    "zero-shot-classification",
                    model=self.MODEL_NAME,
                    device=-1,          # CPU
                    multi_label=False,
                )
                logger.info("✅ Intent classifier loaded.")
            except Exception as e:
                logger.error(f"❌ Failed to load intent classifier: {e}. Falling back to keyword matching.")
                self._load_failed = True

    def classify(self, query: str) -> str:
        """Returns 'NEWS' or 'OTHER'."""
        q = query.strip().lower()

        # 1. Instant small-talk shortcuts
        if q in _SMALL_TALK_EXACT:
            logger.debug(f"Intent: OTHER (small-talk exact) — '{q}'")
            return "OTHER"
        if any(q.startswith(p) for p in _SMALL_TALK_PREFIX):
            logger.debug(f"Intent: OTHER (small-talk prefix) — '{q}'")
            return "OTHER"

        # 2. Temporal pattern → always NEWS
        if _TEMPORAL_PATTERNS.search(query):
            logger.debug(f"Intent: NEWS (temporal pattern) — '{query[:60]}'")
            return "NEWS"

        # 3. DeBERTa NLI model
        self._load()
        if self._pipe is not None:
            try:
                result = self._pipe(
                    query,
                    candidate_labels=[
                        "news, current events, politics, economy, sports, technology, world affairs",
                        "small talk, greeting, joke, or general question unrelated to news",
                    ],
                    hypothesis_template="This message is about {}.",
                )
                top_label = result["labels"][0]
                score = result["scores"][0]
                intent = "NEWS" if "news" in top_label else "OTHER"
                logger.debug(f"Intent: {intent} (DeBERTa score={score:.2f}) — '{query[:60]}'")
                return intent
            except Exception as e:
                logger.warning(f"Intent classifier inference failed: {e}. Defaulting to NEWS.")

        # 4. Keyword fallback
        news_signals = [
            "latest", "recent", "news", "update", "development", "what happened",
            "who is", "what is", "when did", "why did", "how did", "report",
            "conflict", "election", "economy", "war", "crisis", "deal",
            "agreement", "president", "minister", "market", "price",
            "attack", "protest", "africa", "ethiopia",
        ]
        if any(s in q for s in news_signals):
            return "NEWS"

        # 5. Default — always prefer RAG over hallucination
        return "NEWS"


# Module-level singleton — shared across all requests
intent_classifier = IntentClassifier()