Spaces:

sherdd
/

chat-sentiment-api

Sleeping

App Files Files Community

sherdd commited on Sep 29, 2025

Commit

9b370ba

verified ·

1 Parent(s): 93578fc

updating and adding error handling

Browse files

Files changed (1) hide show

app.py +268 -62

app.py CHANGED Viewed

@@ -1,38 +1,57 @@
-import os, re, time
 import gradio as gr
-from typing import List, Dict, Tuple
 from transformers import (
     AutoTokenizer, AutoModelForSequenceClassification,
     TextClassificationPipeline, AutoConfig
 )
 # ============================
-#  MODEL LİSTESİ (Adaylar)
 # ============================
 MODELS: Dict[str, Dict] = {
     "xlmr": {
-        "name": "XLM-R (3-class)",
         "id": "cardiffnlp/twitter-xlm-roberta-base-sentiment",
         "kind": "3class",
-        "default": True,   # varsayılan seçili
     },
     "distilmulti": {
-        "name": "DistilBERT (5-star)",
         "id": "lxyuan/distilbert-base-multilingual-cased-sentiments-student",
         "kind": "5star",
-        "default": True,
     },
     "mbert5": {
         "name": "mBERT (5-star)",
         "id": "nlptown/bert-base-multilingual-uncased-sentiment",
         "kind": "5star",
         "default": False,
     },
     "turkish2": {
         "name": "Turkish BERT (2-class)",
         "id": "savasy/bert-base-turkish-sentiment-cased",
         "kind": "2class",
         "default": False,
     },
 }
@@ -41,20 +60,39 @@ MODEL_ID = os.getenv("MODEL_ID", MODELS["xlmr"]["id"])
 LABEL_MAP_3CLS = {0: "negative", 1: "neutral", 2: "positive"}
 # ============================
-#  LAZY PIPELINE CACHE
 # ============================
 _PIPE_CACHE: Dict[str, TextClassificationPipeline] = {}
 _CFG_CACHE: Dict[str, AutoConfig] = {}
-def get_pipe_and_cfg(model_id: str) -> Tuple[TextClassificationPipeline, AutoConfig]:
-    if model_id not in _PIPE_CACHE:
-        tok = AutoTokenizer.from_pretrained(model_id)
-        mdl = AutoModelForSequenceClassification.from_pretrained(model_id)
-        _PIPE_CACHE[model_id] = TextClassificationPipeline(
-            model=mdl, tokenizer=tok, return_all_scores=True, framework="pt", device=-1
-        )
-        _CFG_CACHE[model_id] = AutoConfig.from_pretrained(model_id)
-    return _PIPE_CACHE[model_id], _CFG_CACHE[model_id]
 # ============================
 #  LABEL NORMALIZATION
@@ -93,6 +131,32 @@ def normalize_label(raw_label: str, cfg: AutoConfig, kind: str) -> str:
     # 2-class modellerde nötr yoksa güvenli varsayılan
     return "neutral"
 # ============================
 #  TEK METİN ANALİZ (API)
 #  endpoint: /api/predict/analyze
@@ -104,22 +168,61 @@ _pipe = TextClassificationPipeline(
 )
 def analyze(text: str):
     text = (text or "").strip()
     if not text:
-        return {"label": "neutral", "score": 1.0}
-    scores = _pipe(text)[0]               # [{"label":"LABEL_0", "score": ...}, ...]
-    top = max(scores, key=lambda s: s["score"])
-    raw = top["label"]
-    # LABEL_0/1/2 -> okunabilir etiket (XLM-R için)
-    if raw.startswith("LABEL_"):
-        idx = int(raw.split("_")[-1])
-        label = LABEL_MAP_3CLS.get(idx, raw).lower()
-    else:
-        label = raw.lower()
-    return {"label": label, "score": round(float(top["score"]), 4)}
 api_intf = gr.Interface(
     fn=analyze,
@@ -134,6 +237,7 @@ api_intf.api_name = "analyze"  # /api/predict/analyze
 #  ÇOKLU MODEL KARŞILAŞTIRMA (UI)
 # ============================
 def run_benchmark(texts_blob: str, selected_keys: List[str]):
     texts = [t.strip() for t in (texts_blob or "").splitlines() if t.strip()]
     if not texts:
         return "⚠️ Metin alanı boş. Her satıra bir örnek yaz.", []
@@ -141,46 +245,143 @@ def run_benchmark(texts_blob: str, selected_keys: List[str]):
     if not selected_keys:
         return "⚠️ En az bir model seç.", []
-    rows = []  # ["text", "model", "label", "score", "latency_ms"]
-    for t in texts:
-        for key in selected_keys:
-            spec = MODELS[key]
-            pipe, cfg = get_pipe_and_cfg(spec["id"])
-            t0 = time.perf_counter()
-            out = pipe(t)[0]  # list of dicts
-            top = max(out, key=lambda s: s["score"])
-            latency = (time.perf_counter() - t0) * 1000.0
-            label = normalize_label(top["label"], cfg, spec["kind"])
-            score = float(top["score"])
-            rows.append([t, spec["name"], label, round(score, 4), round(latency, 1)])
-    # Özet: model bazında ortalama gecikme ve label sayıları
     by_model: Dict[str, Dict] = {}
-    for text_val, mname, lab, sc, lat in rows:
-        agg = by_model.setdefault(mname, {"n": 0, "lat_sum": 0.0, "neg": 0, "neu": 0, "pos": 0})
         agg["n"] += 1
         agg["lat_sum"] += lat
-        agg[lab[:3]] += 1  # neg/neu/pos
-    lines = ["### Summary"]
-    for mname, agg in by_model.items():
-        avg_lat = agg["lat_sum"] / max(agg["n"], 1)
-        lines.append(f"- **{mname}** → avg latency: **{avg_lat:.1f} ms**, counts: neg={agg['neg']}, neu={agg['neu']}, pos={agg['pos']}")
     summary_md = "\n".join(lines)
     return summary_md, rows
-with gr.Blocks(title="sentiment multi-model bench") as bench_ui:
-    gr.Markdown("## Compare models on the same inputs\nEnter one sentence per line. Select models and run.")
     txt = gr.Textbox(
         lines=8,
-        label="Sentences (one per line)",
-        placeholder="bugün hava harika\nama içim biraz buruk\nnötr bir cümle örneği",
     )
     # CheckboxGroup: sadece isim listesi ve varsayılan seçili isimler
@@ -196,9 +397,9 @@ with gr.Blocks(title="sentiment multi-model bench") as bench_ui:
     run_btn = gr.Button("Run benchmark")
     out_md = gr.Markdown()
     out_tbl = gr.Dataframe(
-        headers=["text", "model", "label", "score", "latency_ms"],
         row_count=(0, "dynamic"),
-        col_count=(5, "fixed"),
         interactive=False,
         wrap=True,
     )
@@ -220,8 +421,13 @@ with gr.Blocks(title="sentiment multi-model bench") as bench_ui:
 # ============================
 demo = gr.TabbedInterface(
     [api_intf, bench_ui],
-    tab_names=["API (single model)", "Compare models"],
 )
 if __name__ == "__main__":
-    demo.launch()

+import os, re, time, traceback, gc
 import gradio as gr
+from typing import List, Dict, Tuple, Optional
 from transformers import (
     AutoTokenizer, AutoModelForSequenceClassification,
     TextClassificationPipeline, AutoConfig
 )
+import torch
 # ============================
+#  MODEL LİSTESİ (Genişletilmiş)
 # ============================
 MODELS: Dict[str, Dict] = {
     "xlmr": {
+        "name": "XLM-R Twitter (3-class)",
         "id": "cardiffnlp/twitter-xlm-roberta-base-sentiment",
         "kind": "3class",
+        "default": True,
+        "size_mb": 278,  # yaklaşık model boyutu
+    },
+    "berturk": {
+        "name": "BERTurk Sentiment",
+        "id": "emrecan/bert-base-turkish-cased-mean-nli-stsb-tr",
+        "kind": "3class",
+        "default": True,
+        "size_mb": 442,
     },
     "distilmulti": {
+        "name": "DistilBERT Multi (5-star)",
         "id": "lxyuan/distilbert-base-multilingual-cased-sentiments-student",
         "kind": "5star",
+        "default": False,
+        "size_mb": 252,
     },
     "mbert5": {
         "name": "mBERT (5-star)",
         "id": "nlptown/bert-base-multilingual-uncased-sentiment",
         "kind": "5star",
         "default": False,
+        "size_mb": 425,
     },
     "turkish2": {
         "name": "Turkish BERT (2-class)",
         "id": "savasy/bert-base-turkish-sentiment-cased",
         "kind": "2class",
         "default": False,
+        "size_mb": 442,
+    },
+    "electra_tr": {
+        "name": "Turkish Electra",
+        "id": "nlptown/bert-base-multilingual-uncased-sentiment",  # fallback, gerçek model bulunursa değiştirilecek
+        "kind": "3class",
+        "default": False,
+        "size_mb": 200,
     },
 }
 LABEL_MAP_3CLS = {0: "negative", 1: "neutral", 2: "positive"}
 # ============================
+#  LAZY PIPELINE CACHE & MEMORY MGMT
 # ============================
 _PIPE_CACHE: Dict[str, TextClassificationPipeline] = {}
 _CFG_CACHE: Dict[str, AutoConfig] = {}
+MAX_CACHE_SIZE = 3  # Maksimum 3 model bellekte tut
+def cleanup_cache():
+    """Bellek optimizasyonu için eski modelleri temizle"""
+    if len(_PIPE_CACHE) > MAX_CACHE_SIZE:
+        oldest_key = list(_PIPE_CACHE.keys())[0]
+        del _PIPE_CACHE[oldest_key]
+        del _CFG_CACHE[oldest_key]
+        gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+def get_pipe_and_cfg(model_id: str) -> Tuple[Optional[TextClassificationPipeline], Optional[AutoConfig]]:
+    """Model pipeline ve config'i güvenli bir şekilde yükle"""
+    try:
+        if model_id not in _PIPE_CACHE:
+            cleanup_cache()  # Bellek kontrolü
+            tok = AutoTokenizer.from_pretrained(model_id)
+            mdl = AutoModelForSequenceClassification.from_pretrained(model_id)
+            _PIPE_CACHE[model_id] = TextClassificationPipeline(
+                model=mdl, tokenizer=tok, return_all_scores=True,
+                framework="pt", device=-1  # CPU kullan
+            )
+            _CFG_CACHE[model_id] = AutoConfig.from_pretrained(model_id)
+        return _PIPE_CACHE[model_id], _CFG_CACHE[model_id]
+    except Exception as e:
+        print(f"Error loading model {model_id}: {str(e)}")
+        return None, None
 # ============================
 #  LABEL NORMALIZATION
     # 2-class modellerde nötr yoksa güvenli varsayılan
     return "neutral"
+# ============================
+#  TÜRKÇE METİN ÖN İŞLEME
+# ============================
+def preprocess_turkish(text: str) -> str:
+    """Türkçe metinler için özel ön işleme"""
+    if not text:
+        return text
+    # Küçük harfe çevir
+    text = text.lower()
+    # Fazla boşlukları temizle
+    text = re.sub(r'\s+', ' ', text).strip()
+    # Emoji ve özel karakterleri koru (sentiment için önemli olabilir)
+    # Sadece çok uzun tekrarları kısalt
+    text = re.sub(r'(.)\1{3,}', r'\1\1\1', text)  # aaaaaaa -> aaa
+    # URL'leri genelleştir
+    text = re.sub(r'http[s]?://\S+', '[URL]', text)
+    # Mention'ları genelleştir
+    text = re.sub(r'@\w+', '[USER]', text)
+    return text
 # ============================
 #  TEK METİN ANALİZ (API)
 #  endpoint: /api/predict/analyze
 )
 def analyze(text: str):
+    """Ana API endpoint fonksiyonu - Backend için standardize edilmiş"""
     text = (text or "").strip()
     if not text:
+        return {
+            "label": "neutral",
+            "score": 1.0,
+            "confidence": "high",
+            "model_used": MODEL_ID.split("/")[-1],
+            "processing_time_ms": 0
+        }
+    try:
+        # Türkçe ön işleme
+        processed_text = preprocess_turkish(text)
+        # Tahmin zamanını ölç
+        start_time = time.perf_counter()
+        scores = _pipe(processed_text)[0]
+        processing_time = (time.perf_counter() - start_time) * 1000
+        # En yüksek skoru bul
+        top = max(scores, key=lambda s: s["score"])
+        raw = top["label"]
+        # Label normalizasyonu
+        if raw.startswith("LABEL_"):
+            idx = int(raw.split("_")[-1])
+            label = LABEL_MAP_3CLS.get(idx, raw).lower()
+        else:
+            label = raw.lower()
+        # Confidence seviyesi hesapla
+        score_val = float(top["score"])
+        confidence = "high" if score_val > 0.8 else "medium" if score_val > 0.6 else "low"
+        # Tüm skorları da ekle (opsiyonel detay için)
+        all_scores = {s["label"]: round(s["score"], 4) for s in scores}
+        return {
+            "label": label,
+            "score": round(score_val, 4),
+            "confidence": confidence,
+            "all_scores": all_scores,
+            "model_used": MODEL_ID.split("/")[-1],
+            "processing_time_ms": round(processing_time, 2),
+            "original_text": text[:100],  # İlk 100 karakter
+            "processed_text": processed_text[:100]
+        }
+    except Exception as e:
+        return {
+            "label": "error",
+            "score": 0.0,
+            "error": str(e),
+            "model_used": MODEL_ID.split("/")[-1]
+        }
 api_intf = gr.Interface(
     fn=analyze,
 #  ÇOKLU MODEL KARŞILAŞTIRMA (UI)
 # ============================
 def run_benchmark(texts_blob: str, selected_keys: List[str]):
+    """Çoklu model karşılaştırması - batch processing ve error handling ile"""
     texts = [t.strip() for t in (texts_blob or "").splitlines() if t.strip()]
     if not texts:
         return "⚠️ Metin alanı boş. Her satıra bir örnek yaz.", []
     if not selected_keys:
         return "⚠️ En az bir model seç.", []
+    rows = []  # ["text", "model", "label", "score", "latency_ms", "confidence"]
+    errors = []
+    for key in selected_keys:
+        spec = MODELS[key]
+        pipe, cfg = get_pipe_and_cfg(spec["id"])
+        if pipe is None or cfg is None:
+            errors.append(f"❌ {spec['name']} modeli yüklenemedi")
+            for t in texts:
+                rows.append([t[:50], spec["name"], "ERROR", 0.0, 0.0, "N/A"])
+            continue
+        try:
+            # Batch processing için tüm metinleri ön işle
+            processed_texts = [preprocess_turkish(t) for t in texts]
+            # Batch tahmin (daha hızlı)
+            t0 = time.perf_counter()
+            batch_outputs = pipe(processed_texts)
+            total_latency = (time.perf_counter() - t0) * 1000.0
+            avg_latency = total_latency / len(processed_texts)
+            # Her sonucu işle
+            for i, (orig_text, out) in enumerate(zip(texts, batch_outputs)):
+                top = max(out, key=lambda s: s["score"])
+                label = normalize_label(top["label"], cfg, spec["kind"])
+                score = float(top["score"])
+                # Confidence hesapla
+                confidence = "high" if score > 0.8 else "medium" if score > 0.6 else "low"
+                rows.append([
+                    orig_text[:50] + ("..." if len(orig_text) > 50 else ""),
+                    spec["name"],
+                    label,
+                    round(score, 4),
+                    round(avg_latency, 1),
+                    confidence
+                ])
+        except Exception as e:
+            errors.append(f"⚠️ {spec['name']}: {str(e)[:100]}")
+            for t in texts:
+                rows.append([t[:50], spec["name"], "ERROR", 0.0, 0.0, "N/A"])
+    # Gelişmiş özet: model performansı, confidence dağılımı ve label sayıları
     by_model: Dict[str, Dict] = {}
+    for row in rows:
+        if len(row) < 6:
+            continue  # Eski format ise atla
+        text_val, mname, lab, sc, lat, conf = row
+        agg = by_model.setdefault(mname, {
+            "n": 0, "lat_sum": 0.0, "score_sum": 0.0,
+            "neg": 0, "neu": 0, "pos": 0, "err": 0,
+            "high_conf": 0, "med_conf": 0, "low_conf": 0
+        })
         agg["n"] += 1
         agg["lat_sum"] += lat
+        agg["score_sum"] += sc
+        # Label sayıları
+        if lab == "ERROR":
+            agg["err"] += 1
+        elif lab.startswith("neg"):
+            agg["neg"] += 1
+        elif lab.startswith("neu"):
+            agg["neu"] += 1
+        elif lab.startswith("pos"):
+            agg["pos"] += 1
+        # Confidence dağılımı
+        if conf == "high":
+            agg["high_conf"] += 1
+        elif conf == "medium":
+            agg["med_conf"] += 1
+        elif conf == "low":
+            agg["low_conf"] += 1
+    lines = ["## 📊 Benchmark Sonuçları\n"]
+    if errors:
+        lines.append("### ⚠️ Hatalar:")
+        for err in errors:
+            lines.append(f"- {err}")
+        lines.append("")
+    lines.append("### 🏆 Model Performansları:")
+    # Modelleri ortalama latency'ye göre sırala
+    sorted_models = sorted(by_model.items(), key=lambda x: x[1]["lat_sum"] / max(x[1]["n"], 1))
+    for mname, agg in sorted_models:
+        n = max(agg["n"], 1)
+        avg_lat = agg["lat_sum"] / n
+        avg_score = agg["score_sum"] / n
+        # Model boyutu bilgisi
+        model_info = next((m for m in MODELS.values() if m["name"] == mname), None)
+        size_info = f" (~{model_info['size_mb']}MB)" if model_info and "size_mb" in model_info else ""
+        lines.append(f"\n#### {mname}{size_info}")
+        lines.append(f"- **Hız:** {avg_lat:.1f} ms (ortalama)")
+        lines.append(f"- **Ortalama Güven:** {avg_score:.2%}")
+        lines.append(f"- **Duygu Dağılımı:** 😞 {agg['neg']} | 😐 {agg['neu']} | 😊 {agg['pos']}" +
+                    (f" | ❌ {agg['err']}" if agg['err'] > 0 else ""))
+        lines.append(f"- **Güven Dağılımı:** Yüksek: {agg['high_conf']}, Orta: {agg['med_conf']}, Düşük: {agg['low_conf']}")
     summary_md = "\n".join(lines)
     return summary_md, rows
+with gr.Blocks(title="Sentiment Analysis Benchmark") as bench_ui:
+    gr.Markdown("""
+    ## 🎯 Çoklu Model Karşılaştırma (Sentiment Analysis)
+    **Amaç:** Chat uygulamanız için en uygun sentiment analysis modelini seçin.
+    - Her satıra bir test cümlesi yazın
+    - Test etmek istediğiniz modelleri seçin
+    - Sonuçları hız, doğruluk ve güven açısından karşılaştırın
+    **Not:** İlk çalıştırmada modeller indirilirken biraz bekleyebilirsiniz.
+    """)
     txt = gr.Textbox(
         lines=8,
+        label="Test Metinleri (her satıra bir cümle)",
+        placeholder="""Bugün hava gerçekten harika! 😊
+Bu ürün beklentilerimi karşılamadı.
+Normal bir gün, ne iyi ne kötü.
+Mükemmel bir deneyimdi, kesinlikle tavsiye ederim!
+Berbat bir hizmet, çok kötü.
+Fiyat performans olarak idare eder.
+@user teşekkürler, çok yardımcı oldun!
+Bu site tam bir hayal kırıklığı 😔
+Ortalama, ne beğendim ne beğenmedim.""",
     )
     # CheckboxGroup: sadece isim listesi ve varsayılan seçili isimler
     run_btn = gr.Button("Run benchmark")
     out_md = gr.Markdown()
     out_tbl = gr.Dataframe(
+        headers=["text", "model", "label", "score", "latency_ms", "confidence"],
         row_count=(0, "dynamic"),
+        col_count=(6, "fixed"),
         interactive=False,
         wrap=True,
     )
 # ============================
 demo = gr.TabbedInterface(
     [api_intf, bench_ui],
+    tab_names=["🔌 API (Production)", "🧪 Model Karşılaştırma"],
 )
 if __name__ == "__main__":
+    # Hugging Face Spaces için optimize edilmiş ayarlar
+    demo.launch(
+        server_name="0.0.0.0",
+        share=False,  # Spaces'de otomatik paylaşım yapılır
+        debug=False,  # Production'da False olmalı
+    )