import os import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForSequenceClassification # ---------------------------- # Intent mapping (inlined) # ---------------------------- ID_TO_INTENT = { 0: "price_check", 1: "product_information", 2: "product_search", 3: "promo_discount", 4: "return_refund", 5: "stock_check", } INTENT_TO_ID = {intent: idx for idx, intent in ID_TO_INTENT.items()} def get_intent_from_id(label_id: int) -> str: return ID_TO_INTENT.get(label_id, f"unknown_intent_{label_id}") # ---------------------------- # Model load # ---------------------------- BASE_DIR = os.path.dirname(os.path.abspath(__file__)) MODEL_DIR = os.path.join(BASE_DIR, "models", "intent_bert_model") # adjust if your folder name differs device = "cuda" if torch.cuda.is_available() else "cpu" tok = AutoTokenizer.from_pretrained(MODEL_DIR) mdl = AutoModelForSequenceClassification.from_pretrained(MODEL_DIR).to(device) mdl.eval() # ---------------------------- # API function # ---------------------------- def intent_only(message: str): message = (message or "").strip() if not message: return {"intent": None, "confidence": 0.0} inputs = tok(message, return_tensors="pt", truncation=True, max_length=256).to(device) with torch.no_grad(): logits = mdl(**inputs).logits[0] probs = torch.softmax(logits, dim=-1) label_id = int(torch.argmax(probs).item()) confidence = float(torch.max(probs).item()) return { "intent": get_intent_from_id(label_id), "confidence": confidence, "label_id": label_id, # remove later if you want } # ---------------------------- # Gradio app (minimal UI, API-first) # ---------------------------- demo = gr.Interface( fn=intent_only, inputs=gr.Textbox(label="message"), outputs=gr.JSON(label="intent"), title="Pure Intent Classifier (No GenAI)", ) demo.api_name = "/intent" demo.launch()