Spaces:

Aadhavan12344
/

bubble-classifier

Sleeping

App Files Files Community

Aadhavan12344 commited on 4 days ago

Commit

e4ddfa4

verified ·

1 Parent(s): 959c0bb

Update app.py

Browse files

Files changed (1) hide show

app.py +268 -41

app.py CHANGED Viewed

@@ -1,70 +1,297 @@
 import gradio as gr
 from sentence_transformers import SentenceTransformer, util
-import torch
-import json
-model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
-# In-memory label store (can be replaced by DB)
-LABEL_STORE = {
-    "chat": ["say hello", "casual talk", "how are you"],
-    "image_generation": ["generate an image", "draw a picture", "create artwork"],
-    "action": ["set a timer", "create a reminder"]
-}
-def classify(text, label_json):
-    if not text.strip():
-        return {"error": "Empty input"}
-    try:
-        labels = json.loads(label_json)
-    except Exception as e:
-        return {"error": "Invalid label JSON"}
-    label_embeddings = {}
     for label, examples in labels.items():
-        if not examples:
             continue
-        emb = model.encode(examples, convert_to_tensor=True)
-        label_embeddings[label] = emb.mean(dim=0)
-    if not label_embeddings:
-        return {"error": "No valid labels"}
-    text_emb = model.encode(text, convert_to_tensor=True)
-    scores = {}
-    for label, emb in label_embeddings.items():
-        scores[label] = float(util.cos_sim(text_emb, emb))
-    top_intent = max(scores, key=scores.get)
     return {
         "text": text,
         "top_intent": top_intent,
-        "scores": scores
     }
-with gr.Blocks() as demo:
-    gr.Markdown("## ⚡ Semantic Intent Router Builder")
-    user_input = gr.Textbox(label="User Input")
-    label_editor = gr.JSON(
-        value=LABEL_STORE,
-        label="Labels & Examples (edit freely, add/remove)"
     )
-    output = gr.JSON(label="Routing Result")
-    classify_btn = gr.Button("Classify")
     classify_btn.click(
-        classify,
-        inputs=[user_input, label_editor],
-        outputs=output
     )
 demo.launch(
-    share=True
-)

+# ============================================================
+# ⚡ Ultra-Fast Semantic Intent Router
+# MiniLM-based, zero-training, sub-second classification
+# Works on Hugging Face Free CPU
+# ============================================================
+import json
+import math
+import time
+from typing import Dict, List, Tuple, Any
+import torch
 import gradio as gr
 from sentence_transformers import SentenceTransformer, util
+# ============================================================
+# 🔧 CONFIG
+# ============================================================
+MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
+MAX_EXAMPLES_PER_LABEL = 20
+NORMALIZE_SCORES = True
+MIN_CONFIDENCE = 0.05
+# ============================================================
+# 🧠 MODEL LOAD
+# ============================================================
+print("🔄 Loading MiniLM model...")
+model = SentenceTransformer(MODEL_NAME, device="cpu")
+print("✅ Model loaded")
+# ============================================================
+# 🧩 UTILITIES
+# ============================================================
+def cosine_similarity(a: torch.Tensor, b: torch.Tensor) -> float:
+    return util.cos_sim(a, b).item()
+def softmax(scores: Dict[str, float]) -> Dict[str, float]:
+    if not scores:
+        return {}
+    max_val = max(scores.values())
+    exp_scores = {k: math.exp(v - max_val) for k, v in scores.items()}
+    total = sum(exp_scores.values())
+    if total == 0:
+        return {k: 0.0 for k in scores}
+    return {k: v / total for k, v in exp_scores.items()}
+# ============================================================
+# 🧱 LABEL PARSING (CRITICAL FIX)
+# ============================================================
+def parse_labels(raw: Any) -> Dict[str, List[str]]:
+    """
+    Handles:
+    - dict (Gradio JSON component)
+    - stringified JSON
+    - rejects invalid structures safely
+    """
+    if isinstance(raw, dict):
+        labels = raw
+    elif isinstance(raw, str):
+        try:
+            labels = json.loads(raw)
+        except Exception as e:
+            return {"__error__": f"Invalid JSON: {e}"}
+    else:
+        return {"__error__": "Labels must be JSON object"}
+    if not isinstance(labels, dict):
+        return {"__error__": "Top-level JSON must be object"}
+    clean = {}
     for label, examples in labels.items():
+        if not isinstance(label, str):
+            continue
+        if not isinstance(examples, list):
             continue
+        cleaned_examples = [
+            str(x).strip()
+            for x in examples
+            if isinstance(x, (str, int, float)) and str(x).strip()
+        ][:MAX_EXAMPLES_PER_LABEL]
+        if cleaned_examples:
+            clean[label] = cleaned_examples
+    if not clean:
+        return {"__error__": "No valid labels found"}
+    return clean
+# ============================================================
+# 🧠 ROUTER CORE
+# ============================================================
+def classify_intent(
+    text: str,
+    labels: Dict[str, List[str]],
+) -> Dict[str, Any]:
+    start = time.time()
+    if not text or not text.strip():
+        return {"error": "Input text is empty"}
+    text = text.strip()
+    # Encode input
+    input_emb = model.encode(text, convert_to_tensor=True)
+    label_scores: Dict[str, float] = {}
+    for label, examples in labels.items():
+        example_embs = model.encode(examples, convert_to_tensor=True)
+        sims = util.cos_sim(input_emb, example_embs)[0]
+        score = float(torch.max(sims).item())
+        if score >= MIN_CONFIDENCE:
+            label_scores[label] = score
+    if not label_scores:
+        return {
+            "text": text,
+            "top_intent": None,
+            "scores": {},
+            "latency_ms": round((time.time() - start) * 1000, 2),
+        }
+    if NORMALIZE_SCORES:
+        label_scores = softmax(label_scores)
+    top_intent = max(label_scores, key=label_scores.get)
     return {
         "text": text,
         "top_intent": top_intent,
+        "scores": dict(sorted(label_scores.items(), key=lambda x: -x[1])),
+        "latency_ms": round((time.time() - start) * 1000, 2),
     }
+# ============================================================
+# 🌐 API WRAPPER (Gradio-compatible)
+# ============================================================
+def api_route(text: str, raw_labels: Any) -> Dict[str, Any]:
+    labels = parse_labels(raw_labels)
+    if "__error__" in labels:
+        return {"error": labels["__error__"]}
+    return classify_intent(text, labels)
+# ============================================================
+# 🖥️ DEFAULT LABEL SET
+# ============================================================
+DEFAULT_LABELS = {
+    "chat": [
+        "say hello",
+        "casual conversation",
+        "how are you",
+    ],
+    "search": [
+        "find information",
+        "look this up",
+        "search online",
+    ],
+    "image_generation": [
+        "generate an image",
+        "draw a picture",
+        "create artwork",
+    ],
+    "code": [
+        "write code",
+        "debug this program",
+        "fix this bug",
+    ],
+    "research": [
+        "deep analysis",
+        "investigate topic",
+        "academic research",
+    ],
+    "study": [
+        "teach me",
+        "explain concept",
+        "help me learn",
+    ],
+    "project": [
+        "build a project",
+        "start a new app",
+        "create software",
+    ],
+    "action": [
+        "set a timer",
+        "create reminder",
+        "schedule event",
+    ],
+}
+# ============================================================
+# 🎛️ GRADIO UI
+# ============================================================
+with gr.Blocks(title="⚡ Semantic Intent Router") as demo:
+    gr.Markdown(
+        """
+# ⚡ Semantic Intent Router Builder
+**MiniLM semantic classifier**
+No training · CPU-only · Sub-second · API ready
+• Edit labels & examples freely
+• Add unlimited labels
+• Used for system-prompt injection & MPC routing
+"""
     )
+    with gr.Row():
+        user_input = gr.Textbox(
+            label="User Input",
+            placeholder="Type anything...",
+            lines=2,
+        )
+    with gr.Row():
+        labels_json = gr.JSON(
+            label="Labels & Examples (edit freely, add/remove)",
+            value=DEFAULT_LABELS,
+        )
+    with gr.Row():
+        output = gr.JSON(label="Routing Result")
+    classify_btn = gr.Button("Classify", variant="primary")
     classify_btn.click(
+        fn=api_route,
+        inputs=[user_input, labels_json],
+        outputs=output,
     )
+    gr.Markdown(
+        """
+### 🔌 API Usage
+**POST** to this Space URL:
+```json
+{
+  "data": [
+    "Set a timer for 10 minutes",
+    {
+      "chat": ["hello", "hi"],
+      "action": ["set a timer", "remind me"]
+    }
+  ]
+}
+Response:
+{
+  "top_intent": "action",
+  "scores": { ... }
+}
+“””
+)
+============================================================
+🚀 LAUNCH
+============================================================
+if name == “main”:
 demo.launch(
+share=True,
+enable_queue=False,
+)
+---