Spaces:

sherdd
/

chat-sentiment-api

Sleeping

App Files Files Community

sherdd commited on Sep 29, 2025

Commit

4912b0b

verified ·

1 Parent(s): 721b9e7

updating

Browse files

Files changed (1) hide show

app.py +285 -426

app.py CHANGED Viewed

@@ -1,232 +1,172 @@
-# app.py — Hugging Face Space (Gradio) for 2-Aşamalı (Dil Tespiti ➜ Model Yönlendirme)
-# + EN/TR en iyi 3 model ile Benchmark
-#
-# Gereksinimler (Spaces > Files > "requirements.txt"):
-# transformers>=4.43.0
-# torch
-# gradio>=4.44.0
-# langdetect
-#
-# Çalışma Modları:
-# 1) 🔌 API (Production): /api/predict/analyze
-#    Body örn:
-#    {
-#      "text": "Harika bir ürün!",
-#      "force_lang": null,         // opsiyonel: "en" | "tr" | "other"
-#      "benchmark": false          // true ise en iyi 3 adaydan mini-benchmark sonucu döner
-#    }
-#
-# 2) 🧪 Benchmark (Auto EN/TR/Other): Çoklu metni satır satır test edip özetler
-#
-# Notlar:
-# - EN için basit ön-işleme uygulanır (TR için uygulanmaz).
-# - Label standardizasyonu: positive/neutral/negative
-# - Cache + lazy load: Modeller ihtiyaç oldukça yüklenir, bellek sınırı aşıldığında eskiler çıkarılır.
-import os, re, time, gc, traceback
-from typing import List, Dict, Tuple, Optional
-import gradio as gr
-import torch
-from transformers import (
-    AutoTokenizer,
-    AutoModelForSequenceClassification,
-    TextClassificationPipeline,
-    AutoConfig,
-)
-# =========================================
-#  MODEL HAVUZU (ID’ler Hugging Face’ten)
-# =========================================
 MODELS: Dict[str, Dict] = {
-    # ——— EN Önerilen 3 ———
-    "roberta": {
-        "name": "RoBERTa Twitter (3-class) [EN]",
-        "id": "cardiffnlp/twitter-roberta-base-sentiment-latest",
-        "kind": "3class",
-        "size_mb": 476,
     },
-    "distilbert": {
-        "name": "DistilBERT SST-2 (2-class) [EN]",
-        "id": "distilbert-base-uncased-finetuned-sst-2-english",
-        "kind": "2class",
-        "size_mb": 255,
     },
-    "bertweet": {
-        "name": "BERTweet Sentiment (3-class) [EN]",
-        "id": "finiteautomata/bertweet-base-sentiment-analysis",
-        "kind": "3class",
-        "size_mb": 540,
     },
-    # ——— TR (şimdilik çok dilli ağırlıklı 3) ———
-    "xlmr": {
-        "name": "XLM-R Multilingual (3-class) [TR/Multi]",
-        "id": "cardiffnlp/twitter-xlm-roberta-base-sentiment",
-        "kind": "3class",
-        "size_mb": 278,
     },
-    "bert_5star": {
-        "name": "BERT Multilingual Reviews (5-star) [TR/Multi]",
-        "id": "nlptown/bert-base-multilingual-uncased-sentiment",
-        "kind": "5star",
-        "size_mb": 425,
     },
-    # Hafif bir seçenek (EN verisiyle eğitilmiş olsa da fallback amaçlı)
-    "albert": {
-        "name": "ALBERT v2 (3-class) [Light/Fallback]",
-        "id": "barissayil/bert-sentiment-analysis-sst",
-        "kind": "3class",
-        "size_mb": 46,
     },
 }
-# Dil bazlı “top-3” kümeleri (ileride TR-özel model eklersen burayı güncelle)
-LANG_TOP3 = {
-    "en": ["roberta", "distilbert", "bertweet"],
-    "tr": ["xlmr", "bert_5star", "albert"],  # TR için: çok dilli + hafif model
-    "other": ["xlmr", "bert_5star", "roberta"],  # fallback
 }
-# =========================================
-#  LAZY CACHE (Bellek Yönetimi)
-# =========================================
-_PIPE_CACHE: Dict[str, TextClassificationPipeline] = {}
-_CFG_CACHE: Dict[str, AutoConfig] = {}
-MAX_CACHE_SIZE = int(os.getenv("MAX_CACHE_SIZE", "4"))  # Hugging Face CPU/VRAM'e göre ayarla
-def _cleanup_cache():
-    """Eski modelleri temizle (FIFO)."""
     try:
-        while len(_PIPE_CACHE) > MAX_CACHE_SIZE:
-            oldest_key = next(iter(_PIPE_CACHE.keys()))
-            _PIPE_CACHE.pop(oldest_key, None)
-            _CFG_CACHE.pop(oldest_key, None)
-        gc.collect()
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-    except Exception:
-        pass
-def get_pipe_and_cfg(model_key: str) -> Tuple[Optional[TextClassificationPipeline], Optional[AutoConfig]]:
-    """model_key -> (pipeline, config). Hata varsa (None, None)."""
-    try:
-        spec = MODELS[model_key]
-        model_id = spec["id"]
-        if model_id not in _PIPE_CACHE:
-            _cleanup_cache()
-            tok = AutoTokenizer.from_pretrained(model_id)
-            mdl = AutoModelForSequenceClassification.from_pretrained(model_id)
-            pipe = TextClassificationPipeline(
-                model=mdl,
-                tokenizer=tok,
-                framework="pt",
-                return_all_scores=True,
-                device=-1  # CPU
-            )
-            _PIPE_CACHE[model_id] = pipe
-            _CFG_CACHE[model_id] = AutoConfig.from_pretrained(model_id)
-        return _PIPE_CACHE[model_id], _CFG_CACHE[model_id]
-    except Exception as e:
-        print(f"[load-error] {model_key} -> {e}")
-        return None, None
-# =========================================
-#  DİL TESPİTİ
-# =========================================
 try:
-    from langdetect import detect  # hızlı ve hafif; kısa metinlerde bazen şaşabilir
 except Exception:
-    detect = None
-def detect_lang(text: str) -> str:
-    """'en' | 'tr' | 'other'. Boş/çok kısa metin 'other' döner."""
-    txt = (text or "").strip()
-    if not txt or len(txt) < 2:
-        return "other"
-    if detect is None:
-        return "other"
     try:
-        lang = detect(txt)
-        return lang if lang in ("en", "tr") else "other"
     except Exception:
-        return "other"
-# =========================================
-#  LABEL NORMALİZASYONU
-# =========================================
-def normalize_label(raw_label: str, cfg: Optional[AutoConfig], kind: str) -> str:
-    """Çeşitli model etiketlerini: negative / neutral / positive standardına çevir."""
-    lbl = (raw_label or "").lower()
-    # Bazı modeller LABEL_0/1/2 verir -> id2label ile çöz
-    if lbl.startswith("label_") and cfg is not None and hasattr(cfg, "id2label"):
         try:
-            idx = int(lbl.split("_")[-1])
-            lbl = str(cfg.id2label[idx]).lower()
         except Exception:
-            pass
-    # 5-yıldızlılar: 1..5
-    if kind == "5star":
-        m = re.search(r"([1-5])", lbl)
-        if m:
-            s = int(m.group(1))
-            if s <= 2: return "negative"
-            if s == 3: return "neutral"
-            return "positive"
-    # Metin tabanlı eşleşmeler
-    if "neg" in lbl: return "negative"
-    if "neu" in lbl: return "neutral"
-    if "pos" in lbl: return "positive"
-    # 2-class modellerde nötr yoksa temkinli varsayılan
-    return "neutral"
-# =========================================
-#  ÖN-İŞLEME (ENGLISH)
-# =========================================
-def preprocess_en(text: str) -> str:
-    """EN için hafif normalize. TR için dokunmuyoruz."""
-    if not text:
-        return text
-    t = re.sub(r"\s+", " ", text).strip()
-    t = re.sub(r"(.)\1{3,}", r"\1\1", t)               # goooood -> good
-    t = re.sub(r"http[s]?://\S+", "URL", t)            # linkleri normalize
-    t = re.sub(r"@\w+", "@USER", t)                    # mention normalize
-    t = re.sub(r"#(\w+)", r"\1", t)                    # hashtag kelimesini koru
-    # Basit contraction açma
-    for old, new in {
-        "won't":"will not","can't":"cannot","n't":" not",
-        "'re":" are","'ve":" have","'ll":" will","'d":" would","'m":" am"
-    }.items():
-        t = t.replace(old, new)
-    return t
-# =========================================
-#  ANALYZE (API) — 2 Aşama: Dil Tespiti ➜ Uygun Model
-# =========================================
-def _pick_default_key_for_lang(lang: str) -> str:
-    if lang == "en":
-        return "roberta"
-    if lang == "tr":
-        return "xlmr"
-    return "xlmr"  # other -> çok dilli güvenli seçim
-def analyze(
-    text: str,
-    force_lang: Optional[str] = None,
-    benchmark: bool = False
-):
-    """
-    Production API.
-    - force_lang: "en" | "tr" | "other" | None
-    - benchmark: True ise dil kümesindeki en iyi 3 adaydan mini karşılaştırma döner
-    """
-    text = (text or "").strip()
-    if not text:
-        return {
             "label": "neutral",
             "score": 1.0,
             "confidence": "high",
@@ -234,235 +174,154 @@ def analyze(
             "model_used": "none",
             "processing_time_ms": 0.0
         }
-    lang = force_lang if force_lang in ("en","tr","other") else detect_lang(text)
-    text_proc = preprocess_en(text) if lang == "en" else text
-    # Mini-benchmark istendiyse: LANG_TOP3'te dolaş
-    candidates = []
-    if benchmark:
-        for key in LANG_TOP3.get(lang, LANG_TOP3["other"]):
-            pipe, cfg = get_pipe_and_cfg(key)
-            if pipe is None:
-                continue
-            t0 = time.perf_counter()
-            out = pipe(text_proc)[0]
-            latency_ms = (time.perf_counter() - t0) * 1000.0
-            top = max(out, key=lambda s: s["score"])
-            label = normalize_label(top["label"], cfg, MODELS[key]["kind"])
-            candidates.append({
-                "model": key,
-                "label": label,
-                "score": round(float(top["score"]), 4),
-                "latency_ms": round(latency_ms, 2)
             })
-        # En iyi aday: skor farkı <0.03 ise daha hızlı olanı seç
-        if candidates:
-            best = sorted(
-                candidates,
-                key=lambda c: (-c["score"], c["latency_ms"])
-            )[0]
-            final_model_key = best["model"]
-        else:
-            final_model_key = _pick_default_key_for_lang(lang)
-    else:
-        final_model_key = _pick_default_key_for_lang(lang)
-    # Tek atış (veya benchmark sonrası kazanan)
-    pipe, cfg = get_pipe_and_cfg(final_model_key)
-    if pipe is None:
-        return {
             "label": "error",
             "score": 0.0,
             "confidence": "low",
             "lang": lang,
-            "model_used": final_model_key,
             "processing_time_ms": 0.0,
-            "error": f"model_load_failed:{final_model_key}"
         }
-    t0 = time.perf_counter()
-    out = pipe(text_proc)[0]
-    latency_ms = (time.perf_counter() - t0) * 1000.0
-    top = max(out, key=lambda s: s["score"])
-    label = normalize_label(top["label"], cfg, MODELS[final_model_key]["kind"])
-    score = float(top["score"])
-    conf = "high" if score > 0.8 else "medium" if score > 0.6 else "low"
-    resp = {
-        "label": label,
-        "score": round(score, 4),
-        "confidence": conf,
-        "lang": lang,
-        "model_used": MODELS[final_model_key]["id"].split("/")[-1],
-        "processing_time_ms": round(latency_ms, 2),
-        "text_len": len(text)
     }
-    if benchmark and candidates:
-        resp["candidates"] = candidates
-    return resp
-# =========================================
-#  BENCHMARK (UI) — EN/TR/OTHER otomatik kova
-# =========================================
-def _summarize_rows(rows: List[List], errors: List[str]) -> str:
-    # rows: ["text", "bucket/modelname", "label", "score", "latency_ms", "confidence"]
-    by_model: Dict[str, Dict] = {}
-    for r in rows:
-        if len(r) < 6:
-            continue
-        _, mname, lab, sc, lat, conf = r
-        agg = by_model.setdefault(mname, {
-            "n": 0, "lat_sum": 0.0, "score_sum": 0.0,
-            "neg": 0, "neu": 0, "pos": 0, "err": 0,
-            "high": 0, "med": 0, "low": 0
-        })
-        agg["n"] += 1
-        agg["lat_sum"] += (lat or 0.0)
-        agg["score_sum"] += (sc or 0.0)
-        if lab == "ERROR":
-            agg["err"] += 1
-        elif lab.startswith("neg"):
-            agg["neg"] += 1
-        elif lab.startswith("neu"):
-            agg["neu"] += 1
-        elif lab.startswith("pos"):
-            agg["pos"] += 1
-        if conf == "high":
-            agg["high"] += 1
-        elif conf == "medium":
-            agg["med"] += 1
-        elif conf == "low":
-            agg["low"] += 1
-    lines = ["## 📊 Benchmark Results\n"]
-    if errors:
-        lines.append("### ⚠️ Errors:")
-        for e in errors:
-            lines.append(f"- {e}")
-        lines.append("")
-    lines.append("### 🏆 Model Performance (sorted by avg latency):")
-    order = sorted(by_model.items(), key=lambda kv: kv[1]["lat_sum"]/max(1,kv[1]["n"]))
-    for mname, agg in order:
-        n = max(1, agg["n"])
-        avg_lat = agg["lat_sum"]/n
-        avg_score = agg["score_sum"]/n
-        lines.append(f"\n#### {mname}")
-        lines.append(f"- **Speed:** {avg_lat:.1f} ms (avg)")
-        lines.append(f"- **Avg Confidence:** {avg_score:.2%}")
-        lines.append(f"- **Sentiment:** 😞 {agg['neg']} | 😐 {agg['neu']} | 😊 {agg['pos']}" +
-                     (f" | ❌ {agg['err']}" if agg['err'] else ""))
-        lines.append(f"- **Conf:** High {agg['high']} / Med {agg['med']} / Low {agg['low']}")
-    return "\n".join(lines)
-def run_benchmark_auto(texts_blob: str):
-    texts = [t.strip() for t in (texts_blob or "").splitlines() if t.strip()]
-    if not texts:
-        return "⚠️ Metin alanı boş. Her satıra bir örnek yaz.", []
-    buckets = {"en": [], "tr": [], "other": []}
-    for t in texts:
-        buckets[detect_lang(t)].append(t)
-    rows, errors = [], []
-    def bench_set(text_list: List[str], keys: List[str], tag: str):
-        nonlocal rows, errors
-        if not text_list:
-            return
-        for key in keys:
-            spec = MODELS[key]
-            pipe, cfg = get_pipe_and_cfg(key)
-            modelname = f"{tag}/{spec['name']}"
-            if pipe is None:
-                errors.append(f"❌ {modelname} yüklenemedi")
-                for t in text_list:
-                    rows.append([t[:50], modelname, "ERROR", 0.0, 0.0, "N/A"])
-                continue
-            # EN kümesi için basit ön-işleme
-            proc = [preprocess_en(x) if tag=="EN" else x for x in text_list]
-            t0 = time.perf_counter()
-            outs = pipe(proc)
-            avg_ms = (time.perf_counter() - t0) * 1000.0 / max(1, len(proc))
-            for orig, out in zip(text_list, outs):
                 try:
-                    top = max(out, key=lambda s: s["score"])
-                    lab = normalize_label(top["label"], cfg, spec["kind"])
-                    sc = float(top["score"])
-                    conf = "high" if sc > 0.8 else "medium" if sc > 0.6 else "low"
-                    rows.append([
-                        orig[:50] + ("..." if len(orig) > 50 else ""),
-                        modelname,
-                        lab,
-                        round(sc, 4),
-                        round(avg_ms, 1),
-                        conf
                     ])
-                except Exception as ex:
-                    errors.append(f"⚠️ {modelname}: {str(ex)[:100]}")
-                    rows.append([orig[:50], modelname, "ERROR", 0.0, 0.0, "N/A"])
-    bench_set(buckets["en"], LANG_TOP3["en"], "EN")
-    bench_set(buckets["tr"], LANG_TOP3["tr"], "TR")
-    bench_set(buckets["other"], LANG_TOP3["other"], "OTHER")
-    summary = _summarize_rows(rows, errors)
-    return summary, rows
-# =========================================
-#  GRADIO ARAYÜZLERİ
-# =========================================
-api_intf = gr.Interface(
-    fn=analyze,
-    inputs=[
-        gr.Textbox(lines=3, label="Text", placeholder="Type a message..."),
-        gr.Textbox(lines=1, label="force_lang (optional: en|tr|other)", value="", visible=False),
-        gr.Checkbox(label="benchmark (return candidates)", value=False, visible=False),
     ],
-    outputs=gr.JSON(label="Result"),
-    title="🔌 Sentiment API (Production)",
-    description="POST /api/predict/analyze — Returns {label, score, confidence, lang, model_used, processing_time_ms[, candidates]}",
 )
-api_intf.api_name = "analyze"  # /api/predict/analyze
-with gr.Blocks(title="Sentiment Analysis — EN/TR Auto Routing") as bench_ui:
-    gr.Markdown("""
-    ## 🧪 Multi-Model Benchmark (Auto EN/TR/Other)
-    Her satıra bir cümle gir. Uygulama her cümlenin dilini otomatik saptar:
-    - **EN** ➜ en iyi 3 EN modeli
-    - **TR** ➜ en iyi 3 TR (çok dilli ağırlıklı) modeli
-    - **Other** ➜ fallback 3'lüsü
-    **Önerilen hedefler:**
-    - P95 latency < 200ms
-    - Doğruluk/Skor yüksek, fark < 0.03 ise daha hızlı modeli seç
-    """)
-    txt = gr.Textbox(
-        lines=12,
-        label="Test Sentences (one per line, TR ve EN karışık olabilir)",
-        placeholder="I absolutely love this product!\nHizmet çok yavaş, memnun kalmadım.\nIt's okay, not great.\nFiyatına göre idare eder.\nWorst experience ever."
-    )
-    run_btn = gr.Button("Run benchmark (auto EN/TR/Other)")
-    out_md = gr.Markdown()
-    out_tbl = gr.Dataframe(
-        headers=["text", "bucket/model", "label", "score", "latency_ms", "confidence"],
-        row_count=(0, "dynamic"),
-        col_count=(6, "fixed"),
-        interactive=False,
-        wrap=True,
     )
-    run_btn.click(fn=run_benchmark_auto, inputs=[txt], outputs=[out_md, out_tbl])
-demo = gr.TabbedInterface(
-    [api_intf, bench_ui],
-    tab_names=["🔌 API (Production)", "🧪 Model Comparison"]
 )
-if __name__ == "__main__":
-    # İsteğe bağlı: Sıcak başlatma için en olası 2-3 modeli önceden dokundurabilirsin
-    # for k in ["roberta", "xlmr"]:
-    #     get_pipe_and_cfg(k)
-    demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")), show_error=True)

+import os  # isletim sistemi degiskenlerine erismek icin
+import re  # metin isleme icin regexp kutuphanesi
+import time  # gecikme olcumu icin zaman fonksiyonlari
+import gc  # bellek temizligi icin garbage collector
+from typing import Dict, Tuple, Optional, List  # tip ipuclari icin
+import gradio as gr  # Hugging Face Spaces arayuzunu kurmak icin
+import torch  # pytorch modellerini calistirmak icin
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline, AutoConfig
+# ====== MODEL KAYITLARI (sade ve ogrenci dostu) ======
 MODELS: Dict[str, Dict] = {
+    # en dili icin 3 model
+    "roberta": {
+        "name": "RoBERTa Twitter 3class EN",  # aciklama adi
+        "id": "cardiffnlp/twitter-roberta-base-sentiment-latest",  # HF model kimligi
+        "kind": "3class"  # cikis turu
     },
+    "distilbert": {
+        "name": "DistilBERT SST2 2class EN",  # aciklama adi
+        "id": "distilbert-base-uncased-finetuned-sst-2-english",  # HF model kimligi
+        "kind": "2class"  # cikis turu
     },
+    "bertweet": {
+        "name": "BERTweet 3class EN",  # aciklama adi
+        "id": "finiteautomata/bertweet-base-sentiment-analysis",  # HF model kimligi
+        "kind": "3class"  # cikis turu
     },
+    # tr veya diger diller icin 3 model (cok dilli agirlikli)
+    "xlmr": {
+        "name": "XLM-R 3class Multi",  # aciklama adi
+        "id": "cardiffnlp/twitter-xlm-roberta-base-sentiment",  # HF model kimligi
+        "kind": "3class"  # cikis turu
     },
+    "bert_5star": {
+        "name": "BERT Multi 5star",  # aciklama adi
+        "id": "nlptown/bert-base-multilingual-uncased-sentiment",  # HF model kimligi
+        "kind": "5star"  # cikis turu
     },
+    "albert": {
+        "name": "ALBERT v2 3class Light",  # aciklama adi
+        "id": "barissayil/bert-sentiment-analysis-sst",  # HF model kimligi
+        "kind": "3class"  # cikis turu
     },
 }
+# ====== DIL BAZLI TOP3 SECIMLERI ======
+LANG_TOP3 = {  # her dil icin 3 aday model listesi
+    "en": ["roberta", "distilbert", "bertweet"],  # ingilizce icin ilk 3
+    "tr": ["xlmr", "bert_5star", "albert"],  # turkce icin ilk 3 (cok dilli agirlikli)
+    "other": ["xlmr", "bert_5star", "roberta"]  # diger diller icin yedek 3
 }
+# ====== LAZY CACHE (modelleri ihtiyac olunca yukleme) ======
+_PIPE_CACHE: Dict[str, TextClassificationPipeline] = {}  # pipeline nesneleri icin cache sozlugu
+_CFG_CACHE: Dict[str, AutoConfig] = {}  # model config nesneleri icin cache sozlugu
+MAX_CACHE_SIZE = 4  # en fazla 4 farkli model cachede tut
+def cleanup_cache() -> None:  # cache buyurse eskileri silmek icin fonksiyon
+    while len(_PIPE_CACHE) > MAX_CACHE_SIZE:  # eger siniri astiysa
+        oldest_key = next(iter(_PIPE_CACHE.keys()))  # ilk ekleneni bul
+        _PIPE_CACHE.pop(oldest_key, None)  # pipeline sil
+        _CFG_CACHE.pop(oldest_key, None)  # config sil
+    gc.collect()  # python cop toplayici cagir
+    if torch.cuda.is_available():  # eger gpu varsa
+        torch.cuda.empty_cache()  # gpu bellegini de bosalt
+def get_pipe_and_cfg(model_key: str) -> Tuple[Optional[TextClassificationPipeline], Optional[AutoConfig]]:  # model yukleme fonksiyonu
+    spec = MODELS[model_key]  # model sozlugunden kaydi al
+    model_id = spec["id"]  # hf id'sini al
+    if model_id in _PIPE_CACHE:  # eger cachede varsa
+        return _PIPE_CACHE[model_id], _CFG_CACHE.get(model_id)  # cacheden dondur
     try:
+        tok = AutoTokenizer.from_pretrained(model_id)  # tokenizer yukle
+        mdl = AutoModelForSequenceClassification.from_pretrained(model_id)  # model yukle
+        pipe = TextClassificationPipeline(  # pipeline olustur
+            model=mdl,  # model set et
+            tokenizer=tok,  # tokenizer set et
+            framework="pt",  # pytorch kullan
+            return_all_scores=True,  # tum sinif skorlarini iste
+            device=-1  # cpu kullan
+        )
+        _PIPE_CACHE[model_id] = pipe  # pipeline'i cache'e yaz
+        _CFG_CACHE[model_id] = AutoConfig.from_pretrained(model_id)  # config'i cache'e yaz
+        cleanup_cache()  # gerekirse cache temizligi yap
+        return pipe, _CFG_CACHE[model_id]  # pipeline ve config dondur
+    except Exception as e:  # yukleme hatasi olursa
+        print(f"model yukleme hatasi: {model_key} -> {e}")  # ekrana yaz
+        return None, None  # None dondur
+# ====== DIL TESPITI ======
 try:
+    from langdetect import detect  # hafif dil tespiti kutuphanesi
 except Exception:
+    detect = None  # eger yuklenemezse None yap
+def detect_lang(text: str) -> str:  # girilen metnin dilini bul
+    t = (text or "").strip()  # bosluklari temizle
+    if not t or len(t) < 2:  # cok kisa ise
+        return "other"  # diger kabul et
+    if detect is None:  # kutuphane yoksa
+        return "other"  # diger kabul et
     try:
+        lang = detect(t)  # dil tespiti yap
+        return lang if lang in ("en", "tr") else "other"  # sadece en ve tr destekliyoruz
     except Exception:
+        return "other"  # hata olursa diger de
+# ====== LABEL NORMALIZASYONU ======
+def normalize_label(raw_label: str, cfg: Optional[AutoConfig], kind: str) -> str:  # farkli etiketleri standarda cevir
+    lbl = (raw_label or "").lower()  # etiket kucuk harfe cevir
+    if lbl.startswith("label_") and cfg is not None and hasattr(cfg, "id2label"):  # eger LABEL_0 gibi ise
         try:
+            idx = int(lbl.split("_")[-1])  # sayiyi al
+            lbl = str(cfg.id2label[idx]).lower()  # id2label ile gercek etikete cevir
         except Exception:
+            pass  # hata olursa devam et
+    if kind == "5star":  # 5 yildizli model icin
+        m = re.search(r"([1-5])", lbl)  # 1..5 ara
+        if m:  # bulunduysa
+            s = int(m.group(1))  # sayiyi al
+            if s <= 2:  # 1 veya 2 ise
+                return "negative"  # negatif
+            if s == 3:  # 3 ise
+                return "neutral"  # notr
+            return "positive"  # 4 veya 5 ise pozitif
+    if "neg" in lbl:  # metin icinde neg geciyorsa
+        return "negative"  # negatif dondur
+    if "neu" in lbl:  # metin icinde neu geciyorsa
+        return "neutral"  # notr dondur
+    if "pos" in lbl:  # metin icinde pos geciyorsa
+        return "positive"  # pozitif dondur
+    return "neutral"  # diger durumlarda notr dondur (2-class icin guvenli secim)
+# ====== ENGLISH ON-ISLEME ======
+def preprocess_en(text: str) -> str:  # ingilizce metin icin hafif on isleme
+    if not text:  # metin bos ise
+        return text  # aynen dondur
+    t = re.sub(r"\s+", " ", text).strip()  # fazla bosluklari sadelestir
+    t = re.sub(r"(.)\1{3,}", r"\1\1", t)  # cok tekrar eden karakterleri kisalt
+    t = re.sub(r"http[s]?://\S+", "URL", t)  # linkleri URL ile degistir
+    t = re.sub(r"@\w+", "@USER", t)  # mentionlari duzelt
+    t = re.sub(r"#(\w+)", r"\1", t)  # hashtag isaretini kaldir
+    reps = {  # kisaltma acilimlari sozlugu
+        "won't": "will not",
+        "can't": "cannot",
+        "n't": " not",
+        "'re": " are",
+        "'ve": " have",
+        "'ll": " will",
+        "'d": " would",
+        "'m": " am"
+    }
+    for old, new in reps.items():  # her kural icin
+        t = t.replace(old, new)  # metinde degistir
+    return t  # islenmis metni dondur
+# ====== DIL -> VARSAYILAN MODEL KURALI ======
+def pick_default_key_for_lang(lang: str) -> str:  # dile gore varsayilan modeli sec
+    if lang == "en":  # ingilizce ise
+        return "roberta"  # roberta sec
+    if lang == "tr":  # turkce ise
+        return "xlmr"  # xlmr sec
+    return "xlmr"  # diger diller icin xlmr sec
+# ====== ANA API FONKSIYONU (/api/predict/analyze) ======
+def analyze(text: str, force_lang: Optional[str] = None, benchmark: bool = False):  # ana uretim endpointi
+    txt = (text or "").strip()  # giris metnini temizle
+    if not txt:  # bos metin ise
+        return {  # notr dondur
             "label": "neutral",
             "score": 1.0,
             "confidence": "high",
             "model_used": "none",
             "processing_time_ms": 0.0
         }
+    lang = force_lang if force_lang in ("en", "tr", "other") else detect_lang(txt)  # dili belirle
+    proc = preprocess_en(txt) if lang == "en" else txt  # ingilizce ise on isleme uygula
+    candidates: List[Dict] = []  # benchmark aday listesi
+    if benchmark:  # eger mini benchmark istenirse
+        keys = LANG_TOP3.get(lang, LANG_TOP3["other"])  # o dilin TOP3 listesi
+        for k in keys:  # her aday icin
+            pipe, cfg = get_pipe_and_cfg(k)  # pipeline ve config al
+            if pipe is None:  # yuklenemediyse
+                continue  # atla
+            t0 = time.perf_counter()  # zaman sayacini baslat
+            out = pipe(proc)[0]  # modeli calistir
+            ms = (time.perf_counter() - t0) * 1000.0  # gecikmeyi hesapla
+            top = max(out, key=lambda s: s["score"])  # en yuksek skorlu sinifi bul
+            lab = normalize_label(top["label"], cfg, MODELS[k]["kind"])  # etiketi standarda cevir
+            candidates.append({  # adayi listeye ekle
+                "model": k,
+                "label": lab,
+                "score": float(top["score"]),
+                "latency_ms": round(ms, 2)
             })
+        if candidates:  # adaylar varsa
+            candidates.sort(key=lambda c: (-c["score"], c["latency_ms"]))  # skora gore azalan, sonra gecikmeye gore artan
+            winner_key = candidates[0]["model"]  # kazananin anahtarini al
+        else:  # aday yoksa
+            winner_key = pick_default_key_for_lang(lang)  # varsayilani sec
+    else:  # benchmark yoksa
+        winner_key = pick_default_key_for_lang(lang)  # dogrudan varsayilani sec
+    pipe, cfg = get_pipe_and_cfg(winner_key)  # kazanan pipeline ve config al
+    if pipe is None:  # yuklenemezse
+        return {  # hata don
             "label": "error",
             "score": 0.0,
             "confidence": "low",
             "lang": lang,
+            "model_used": winner_key,
             "processing_time_ms": 0.0,
+            "error": "model_load_failed"
         }
+    t0 = time.perf_counter()  # zaman sayacini baslat
+    out = pipe(proc)[0]  # tek atis tahmini al
+    ms = (time.perf_counter() - t0) * 1000.0  # gecikmeyi hesapla
+    top = max(out, key=lambda s: s["score"])  # en yuksek skorlu sinifi bul
+    label = normalize_label(top["label"], cfg, MODELS[winner_key]["kind"])  # etiketi standarda cevir
+    score = float(top["score"])  # skoru sayiya cevir
+    confidence = "high" if score > 0.8 else ("medium" if score > 0.6 else "low")  # guven araligini belirle
+    resp = {  # yanit sozlugunu olustur
+        "label": label,  # son etiket
+        "score": round(score, 4),  # skor 4 ondalik
+        "confidence": confidence,  # guven seviyesi
+        "lang": lang,  # tespit edilen dil
+        "model_used": MODELS[winner_key]["id"].split("/")[-1],  # kullanilan modelin son parcasi
+        "processing_time_ms": round(ms, 2)  # islem suresi ms
     }
+    if benchmark and candidates:  # eger benchmark yapildiysa
+        resp["candidates"] = candidates  # adaylar listesini da dondur
+    return resp  # yaniti dondur
+# ====== BENCHMARK UI (otomatik EN/TR/OTHER kovalama) ======
+def run_benchmark_auto(texts_blob: str):  # coklu metin benchmark fonksiyonu
+    texts = [t.strip() for t in (texts_blob or "").splitlines() if t.strip()]  # satirlari ayir ve boslari temizle
+    if not texts:  # eger hic metin yoksa
+        return "Uyari: metin alani bos.", []  # uyari ve bos tablo dondur
+    buckets = {"en": [], "tr": [], "other": []}  # dil kovalari olustur
+    for t in texts:  # her metin icin
+        buckets[detect_lang(t)].append(t)  # uygun kovaya ekle
+    rows: List[List] = []  # cikti satirlarini tutacak liste
+    errors: List[str] = []  # hata mesajlari icin liste
+    def bench_set(text_list: List[str], keys: List[str], tag: str):  # bir dil kovasi icin benchmark calistir
+        if not text_list:  # eger bu kovada metin yoksa
+            return  # cik
+        for k in keys:  # her model adayi icin
+            spec = MODELS[k]  # model kaydini al
+            pipe, cfg = get_pipe_and_cfg(k)  # pipeline ve config al
+            modelname = f"{tag}/{spec['name']}"  # tablo icin gorsel ad olustur
+            if pipe is None:  # yuklenemedi ise
+                errors.append(f"yuklenemedi: {modelname}")  # hatayi kaydet
+                for t in text_list:  # her metin icin
+                    rows.append([t[:50], modelname, "ERROR", 0.0, 0.0, "N/A"])  # hata satiri ekle
+                continue  # diger modele gec
+            proc = [preprocess_en(x) if tag == "EN" else x for x in text_list]  # EN icin on isleme yap
+            t0 = time.perf_counter()  # zaman sayacini baslat
+            outs = pipe(proc)  # toplu tahmin al
+            avg_ms = (time.perf_counter() - t0) * 1000.0 / max(1, len(proc))  # ortalama gecikme hesapla
+            for orig, out in zip(text_list, outs):  # her cikti icin
                 try:
+                    top = max(out, key=lambda s: s["score"])  # en yuksek skoru bul
+                    lab = normalize_label(top["label"], cfg, spec["kind"])  # etiketi standarda cevir
+                    sc = float(top["score"])  # skoru sayiya cevir
+                    conf = "high" if sc > 0.8 else ("medium" if sc > 0.6 else "low")  # guven seviyesi
+                    rows.append([  # tabloya bir satir ekle
+                        orig[:50] + ("..." if len(orig) > 50 else ""),  # metnin kisa hali
+                        modelname,  # model adi
+                        lab,  # etiket
+                        round(sc, 4),  # skor
+                        round(avg_ms, 1),  # ortalama gecikme
+                        conf  # guven
                     ])
+                except Exception as ex:  # tahmin hatasi olursa
+                    errors.append(f"hata: {modelname}: {str(ex)[:80]}")  # hata listesine ekle
+                    rows.append([orig[:50], modelname, "ERROR", 0.0, 0.0, "N/A"])  # hata satiri ekle
+    bench_set(buckets["en"], LANG_TOP3["en"], "EN")  # ingilizce kovayi calistir
+    bench_set(buckets["tr"], LANG_TOP3["tr"], "TR")  # turkce kovayi calistir
+    bench_set(buckets["other"], LANG_TOP3["other"], "OTHER")  # diger kovayi calistir
+    # sadelesmis ozet metni olustur
+    summary_lines: List[str] = []  # ozet satirlari icin liste
+    if errors:  # hata varsa
+        summary_lines.append("Hatalar:")  # baslik yaz
+        for e in errors:  # her hata icin
+            summary_lines.append(f"- {e}")  # listele
+    if not summary_lines:  # hic hata yoksa
+        summary_lines.append("Benchmark tamamlandi.")  # basit bilgi satiri
+    return "\n".join(summary_lines), rows  # ozet metni ve tablo satirlarini dondur
+# ====== GRADIO ARAYUZLERI ======
+api_intf = gr.Interface(  # uretim api arayuzu
+    fn=analyze,  # cagrilacak fonksiyon
+    inputs=[  # giris bilesenleri
+        gr.Textbox(lines=3, label="Text"),  # metin kutusu
+        gr.Textbox(lines=1, label="force_lang (en|tr|other, opsiyonel)", value=""),  # zorlama dil alani
+        gr.Checkbox(label="benchmark (kisa TOP3 karsilastirma)", value=False),  # benchmark secenegi
     ],
+    outputs=gr.JSON(label="Result"),  # cikti JSON gosterimi
+    title="Sentiment API (Production)",  # baslik
+    description="POST /api/predict/analyze doner: {label, score, confidence, lang, model_used, processing_time_ms[, candidates]}"  # aciklama
 )
+api_intf.api_name = "analyze"  # endpoint yolu: /api/predict/analyze
+with gr.Blocks(title="Sentiment Benchmark") as bench_ui:  # benchmark arayuzu
+    gr.Markdown("Coklu metin benchmark. Her satir ayri bir ornek olmalidir.")  # kullaniciya kisa bilgi
+    txt = gr.Textbox(lines=10, label="Ornekler (satir satir)")  # coklu satir metin girisi
+    btn = gr.Button("Calistir")  # calistir butonu
+    out_md = gr.Markdown()  # ozet metin alanı
+    out_tbl = gr.Dataframe(  # tablo cikti alani
+        headers=["text", "bucket/model", "label", "score", "latency_ms", "confidence"],  # tablo basliklari
+        row_count=(0, "dynamic"),  # dinamik satir sayisi
+        col_count=(6, "fixed"),  # sabit sutun sayisi
+        interactive=False,  # kullanici duzenleyemesin
+        wrap=True  # metin sarma acik
     )
+    btn.click(fn=run_benchmark_auto, inputs=[txt], outputs=[out_md, out_tbl])  # buton tiklayinca fonksiyonu cagir
+demo = gr.TabbedInterface(  # iki sekmeli toplam arayuz
+    [api_intf, bench_ui],  # birinci sekme api, ikinci sekme benchmark
+    tab_names=["API", "Benchmark"]  # sekme isimleri
 )
+if __name__ == "__main__":  # ana calisma blogu
+    demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")), show_error=True)  # gradioyu baslat