Spaces:

sherdd
/

chat-sentiment-api

Sleeping

App Files Files Community

sherdd commited on Sep 29, 2025

Commit

016dbc6

verified ·

1 Parent(s): e1377d6

updating

Browse files

Files changed (1) hide show

app.py +193 -15

app.py CHANGED Viewed

@@ -1,33 +1,211 @@
-import os
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
-MODEL_ID = os.getenv("MODEL_ID", "cardiffnlp/twitter-xlm-roberta-base-sentiment")
-LABEL_MAP = {0: "negative", 1: "neutral", 2: "positive"}  # modelin etiket sirasi
-# modeli ve tokenizer'i bir kez yukle
-tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
-pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True, framework="pt", device=-1)
 def analyze(text: str):
     text = (text or "").strip()
     if not text:
         return {"label": "neutral", "score": 1.0}
-    scores = pipe(text)[0]  # [{label: "...", score: ...}, ...]
-    max_idx = max(range(len(scores)), key=lambda i: scores[i]["score"])
-    label = LABEL_MAP.get(max_idx, scores[max_idx]["label"]).lower()
-    score = float(scores[max_idx]["score"])
-    return {"label": label, "score": round(score, 4)}
-demo = gr.Interface(
     fn=analyze,
     inputs=gr.Textbox(lines=3, placeholder="type a message..."),
     outputs=gr.JSON(),
     title="chat sentiment api",
     description="returns json: {label: positive|neutral|negative, score: 0..1}",
 )
-demo.api_name = "analyze"  # endpoint: /api/predict/analyze
 if __name__ == "__main__":
     demo.launch()

+import os, re, time
 import gradio as gr
+from typing import List, Dict, Tuple
+from transformers import (
+    AutoTokenizer, AutoModelForSequenceClassification,
+    TextClassificationPipeline, AutoConfig
+)
+# ----------------------------
+#  MODELs
+# ----------------------------
+MODELS: Dict[str, Dict] = {
+    "xlmr": {
+        "name": "XLM-R (3-class)",
+        "id": "cardiffnlp/twitter-xlm-roberta-base-sentiment",
+        "kind": "3class",
+        "default": True,   # default
+    },
+    "distilmulti": {
+        "name": "DistilBERT (5-star)",
+        "id": "lxyuan/distilbert-base-multilingual-cased-sentiments-student",
+        "kind": "5star",
+        "default": True,
+    },
+    "mbert5": {
+        "name": "mBERT (5-star)",
+        "id": "nlptown/bert-base-multilingual-uncased-sentiment",
+        "kind": "5star",
+        "default": False,
+    },
+    "turkish2": {
+        "name": "Turkish BERT (2-class)",
+        "id": "savasy/bert-base-turkish-sentiment-cased",
+        "kind": "2class",
+        "default": False,
+    },
+}
+# Tek model API'si için
+MODEL_ID = os.getenv("MODEL_ID", MODELS["xlmr"]["id"])
+LABEL_MAP_3CLS = {0: "negative", 1: "neutral", 2: "positive"}
+_PIPE_CACHE: Dict[str, TextClassificationPipeline] = {}
+_CFG_CACHE: Dict[str, AutoConfig] = {}
+def get_pipe_and_cfg(model_id: str) -> Tuple[TextClassificationPipeline, AutoConfig]:
+    if model_id not in _PIPE_CACHE:
+        tok = AutoTokenizer.from_pretrained(model_id)
+        mdl = AutoModelForSequenceClassification.from_pretrained(model_id)
+        _PIPE_CACHE[model_id] = TextClassificationPipeline(
+            model=mdl, tokenizer=tok, return_all_scores=True, framework="pt", device=-1
+        )
+        _CFG_CACHE[model_id] = AutoConfig.from_pretrained(model_id)
+    return _PIPE_CACHE[model_id], _CFG_CACHE[model_id]
+# ----------------------------
+#  LABEL NORMALIZATION
+# ----------------------------
+def normalize_label(raw_label: str, cfg: AutoConfig, kind: str) -> str:
+    """Ham etiketleri positive/neutral/negative üçlüsüne indirger."""
+    lbl = raw_label.lower()
+    # LABEL_0 -> id2label -> metne çevir
+    if lbl.startswith("label_") and hasattr(cfg, "id2label"):
+        try:
+            idx = int(lbl.split("_")[-1])
+            lbl = str(cfg.id2label[idx]).lower()
+        except Exception:
+            pass
+    # 5-yıldızlı modeller: 1..5 -> neg/neu/pos
+    if kind == "5star":
+        m = re.search(r"([1-5])", lbl)
+        if m:
+            s = int(m.group(1))
+            if s <= 2:
+                return "negative"
+            if s == 3:
+                return "neutral"
+            return "positive"
+    # metinsel eşleştirme
+    if "neg" in lbl:
+        return "negative"
+    if "neu" in lbl:
+        return "neutral"
+    if "pos" in lbl:
+        return "positive"
+    # 2-class modellerin bazılarında sadece pos/neg var
+    return "neutral"
+# ----------------------------
+#  TEK METİN ANALİZ (API)
+# ----------------------------
+#  endpoint: /api/predict/analyze
+_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+_model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
+_pipe = TextClassificationPipeline(model=_model, tokenizer=_tokenizer, return_all_scores=True, framework="pt", device=-1)
 def analyze(text: str):
     text = (text or "").strip()
     if not text:
         return {"label": "neutral", "score": 1.0}
+    scores = _pipe(text)[0]
+    top = max(scores, key=lambda s: s["score"])
+    # LABEL_0/1/2 -> okunabilir etiket
+    raw = top["label"]
+    if raw.startswith("LABEL_"):
+        idx = int(raw.split("_")[-1])
+        label = LABEL_MAP_3CLS.get(idx, raw).lower()
+    else:
+        label = raw.lower()
+    return {"label": label, "score": round(float(top["score"]), 4)}
+api_intf = gr.Interface(
     fn=analyze,
     inputs=gr.Textbox(lines=3, placeholder="type a message..."),
     outputs=gr.JSON(),
     title="chat sentiment api",
     description="returns json: {label: positive|neutral|negative, score: 0..1}",
 )
+api_intf.api_name = "analyze"  # /api/predict/analyze
+# ----------------------------
+#  ÇOKLU MODEL KARŞILAŞTIRMA (UI)
+# ----------------------------
+def run_benchmark(texts_blob: str, selected_keys: List[str]):
+    texts = [t.strip() for t in (texts_blob or "").splitlines() if t.strip()]
+    if not texts:
+        return " Metin alanı boş. Her satıra bir örnek yaz.", []
+    if not selected_keys:
+        return " En az bir model seç.", []
+    # tablo başlıkları
+    headers = ["text", "model", "label", "score", "latency_ms"]
+    rows = []
+    for t in texts:
+        for key in selected_keys:
+            spec = MODELS[key]
+            pipe, cfg = get_pipe_and_cfg(spec["id"])
+            t0 = time.perf_counter()
+            out = pipe(t)[0]  # list of dicts
+            top = max(out, key=lambda s: s["score"])
+            latency = (time.perf_counter() - t0) * 1000.0
+            label = normalize_label(top["label"], cfg, spec["kind"])
+            score = float(top["score"])
+            rows.append([t, spec["name"], label, round(score, 4), round(latency, 1)])
+    # küçük özet
+    # ortalama gecikme ve label dağılımı (model bazında)
+    by_model: Dict[str, Dict] = {}
+    for r in rows:
+        _t, m, lab, sc, lat = r
+        d = by_model.setdefault(m, {"n": 0, "lat_sum": 0.0, "neg": 0, "neu": 0, "pos": 0})
+        d["n"] += 1
+        d["lat_sum"] += lat
+        d[lab[:3]] += 1  # neg/neu/pos sayacı
+    lines = ["### Summary"]
+    for m, d in by_model.items():
+        avg_lat = d["lat_sum"] / max(d["n"], 1)
+        lines.append(f"- **{m}** → avg latency: **{avg_lat:.1f} ms**, counts: neg={d['neg']}, neu={d['neu']}, pos={d['pos']}")
+    summary_md = "\n".join(lines)
+    return summary_md, rows, headers
+with gr.Blocks(title="sentiment multi-model bench") as bench_ui:
+    gr.Markdown("## Compare models on the same inputs\nEnter one sentence per line. Select models and run.")
+    txt = gr.Textbox(lines=8, label="Sentences (one per line)", placeholder="bugün hava harika\nama içim biraz buruk\nnötr bir cümle örneği")
+    default_keys = [k for k, v in MODELS.items() if v["default"]]
+    choices = gr.CheckboxGroup(
+        choices=[gr.Checkbox(label=v["name"], value=False, elem_id=k) for k, v in MODELS.items()],
+        label="Models to test",
+    )
+    # CheckboxGroup 'choices' parametresi metin beklediği için isimleri kullanacağız:
+    model_names = [MODELS[k]["name"] for k in MODELS]
+    choices.choices = model_names
+    choices.value = [MODELS[k]["name"] for k in MODELS if MODELS[k]["default"]]
+    run_btn = gr.Button("Run benchmark")
+    out_md = gr.Markdown()
+    out_tbl = gr.Dataframe(row_count=(0, "dynamic"), col_count=(5, "fixed"), wrap=True)
+    def _resolve_keys(selected_names: List[str]) -> List[str]:
+        rev = {v["name"]: k for k, v in MODELS.items()}
+        return [rev[n] for n in (selected_names or []) if n in rev]
+    def _runner(texts_blob, selected_names):
+        keys = _resolve_keys(selected_names)
+        summary_md, rows, headers = run_benchmark(texts_blob, keys)
+        out_tbl_headers = headers  # ["text","model","label","score","latency_ms"]
+        return summary_md, gr.update(value=rows, headers=out_tbl_headers)
+    run_btn.click(_runner, inputs=[txt, choices], outputs=[out_md, out_tbl])
+demo = gr.TabbedInterface(
+    [api_intf, bench_ui],
+    tab_names=["API (single model)", "Compare models"],
+)
 if __name__ == "__main__":
     demo.launch()