import gradio as gr import pandas as pd import torch from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification import tempfile import os # ── Model loading ────────────────────────────────────────────────────────────── print("Loading sentiment model (savasy/bert-base-turkish-sentiment-cased)...") sentiment_pipeline = pipeline( "sentiment-analysis", model="savasy/bert-base-turkish-sentiment-cased", tokenizer="savasy/bert-base-turkish-sentiment-cased", truncation=True, max_length=512, ) print("Loading toxicity model (unitary/toxic-bert)...") toxicity_pipeline = pipeline( "text-classification", model="unitary/toxic-bert", tokenizer="unitary/toxic-bert", truncation=True, max_length=512, ) # ── Keyword extraction (simple TF-based, no external API) ───────────────────── import re from collections import Counter STOPWORDS_TR = { "bir", "bu", "ve", "ile", "da", "de", "mi", "mu", "mü", "mı", "ki", "ne", "için", "ama", "fakat", "çok", "daha", "en", "gibi", "kadar", "ben", "sen", "o", "biz", "siz", "onlar", "bu", "şu", "o", "her", "hiç", "bazı", "tüm", "bütün", "var", "yok", "olan", "olarak", } STOPWORDS_EN = { "the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for", "of", "with", "by", "from", "is", "are", "was", "were", "be", "been", "have", "has", "had", "do", "does", "did", "will", "would", "could", "should", "may", "might", "this", "that", "these", "those", "it", "its", "i", "you", "he", "she", "we", "they", "not", "no", "so", "as", "if", } STOPWORDS = STOPWORDS_TR | STOPWORDS_EN def extract_keywords(text: str, top_n: int = 5) -> str: words = re.findall(r"\b[a-zA-ZçğışöüÇĞİŞÖÜ]{3,}\b", text.lower()) filtered = [w for w in words if w not in STOPWORDS] if not filtered: return "" counts = Counter(filtered) return ", ".join(w for w, _ in counts.most_common(top_n)) # ── Core analysis ────────────────────────────────────────────────────────────── def analyze_text(text: str) -> dict: text = text.strip() if not text: return {} # Sentiment sent_result = sentiment_pipeline(text)[0] sentiment_label = sent_result["label"] # e.g. "positive" / "negative" sentiment_score = round(sent_result["score"], 4) # Toxicity tox_result = toxicity_pipeline(text)[0] is_toxic = tox_result["label"].lower() == "toxic" toxicity_score = round(tox_result["score"] if is_toxic else 1 - tox_result["score"], 4) toxicity_label = "Toxic 🚨" if is_toxic else "Safe ✅" # Keywords keywords = extract_keywords(text) return { "Text": text[:120] + ("…" if len(text) > 120 else ""), "Sentiment": sentiment_label.capitalize(), "Sentiment Score": sentiment_score, "Toxicity": toxicity_label, "Toxicity Score": toxicity_score, "Keywords": keywords, } # ── Gradio handlers ──────────────────────────────────────────────────────────── def run_analysis(text_input: str, csv_file): rows = [] # CSV path takes priority; fall back to text box if csv_file is not None: try: df_in = pd.read_csv(csv_file.name if hasattr(csv_file, "name") else csv_file) except Exception as e: return pd.DataFrame([{"Error": f"Could not read CSV: {e}"}]), None # Use first column that looks like text text_col = df_in.columns[0] for col in df_in.columns: if df_in[col].dtype == object: text_col = col break for _, row in df_in.iterrows(): cell = str(row[text_col]).strip() if cell and cell.lower() != "nan": result = analyze_text(cell) if result: rows.append(result) elif text_input and text_input.strip(): result = analyze_text(text_input.strip()) if result: rows.append(result) else: return pd.DataFrame([{"Info": "Please enter text or upload a CSV file."}]), None if not rows: return pd.DataFrame([{"Info": "No valid text found to analyze."}]), None df_out = pd.DataFrame(rows) # Save CSV for download tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", encoding="utf-8") df_out.to_csv(tmp.name, index=False) tmp.close() return df_out, tmp.name # ── UI ───────────────────────────────────────────────────────────────────────── CSS = """ /* ── Palette ─────────────────────────────── */ :root { --bg: #0d0f14; --surface: #161a22; --border: #252a35; --accent: #5b8dee; --accent2: #e05b8d; --text: #e8ecf4; --muted: #7a8399; --safe: #3ecf78; --toxic: #ff4f64; --font-head: 'Space Mono', monospace; --font-body: 'DM Sans', sans-serif; --radius: 10px; } /* ── Reset & base ─────────────────────────── */ body, .gradio-container { background: var(--bg) !important; color: var(--text) !important; font-family: var(--font-body) !important; } /* ── Header ───────────────────────────────── */ #app-header { text-align: center; padding: 36px 20px 20px; background: linear-gradient(135deg, #0d0f14 0%, #161a22 100%); border-bottom: 1px solid var(--border); margin-bottom: 28px; } #app-header h1 { font-family: var(--font-head); font-size: clamp(1.4rem, 4vw, 2.2rem); letter-spacing: -0.5px; background: linear-gradient(90deg, var(--accent), var(--accent2)); -webkit-background-clip: text; -webkit-text-fill-color: transparent; margin: 0 0 8px; } #app-header p { color: var(--muted); font-size: 0.9rem; margin: 0; } /* ── Panels ───────────────────────────────── */ .gr-group, .gr-box, .gr-form { background: var(--surface) !important; border: 1px solid var(--border) !important; border-radius: var(--radius) !important; } /* ── Labels ───────────────────────────────── */ label span, .gr-form label { color: var(--muted) !important; font-size: 0.78rem !important; letter-spacing: 0.06em !important; text-transform: uppercase !important; font-family: var(--font-head) !important; } /* ── Textbox ──────────────────────────────── */ textarea, input[type="text"] { background: #1c2130 !important; border: 1px solid var(--border) !important; border-radius: 8px !important; color: var(--text) !important; font-family: var(--font-body) !important; font-size: 0.95rem !important; } textarea:focus, input[type="text"]:focus { border-color: var(--accent) !important; box-shadow: 0 0 0 3px rgba(91,141,238,0.15) !important; } /* ── Buttons ──────────────────────────────── */ button.primary { background: linear-gradient(135deg, var(--accent), #3d6dcf) !important; border: none !important; border-radius: 8px !important; color: #fff !important; font-family: var(--font-head) !important; font-size: 0.85rem !important; letter-spacing: 0.08em !important; padding: 10px 28px !important; cursor: pointer !important; transition: opacity 0.2s, transform 0.1s !important; } button.primary:hover { opacity: 0.88 !important; transform: translateY(-1px) !important; } button.primary:active { transform: translateY(0) !important; } button.secondary { background: var(--surface) !important; border: 1px solid var(--border) !important; border-radius: 8px !important; color: var(--muted) !important; font-family: var(--font-head) !important; font-size: 0.78rem !important; } /* ── Dataframe ────────────────────────────── */ .gr-dataframe, .dataframe-container { background: var(--surface) !important; border-radius: var(--radius) !important; overflow: hidden !important; } table { width: 100%; border-collapse: collapse; } thead tr { background: #1c2130 !important; } thead th { color: var(--accent) !important; font-family: var(--font-head) !important; font-size: 0.72rem !important; letter-spacing: 0.07em !important; text-transform: uppercase !important; padding: 10px 14px !important; border-bottom: 1px solid var(--border) !important; } tbody tr:nth-child(even) { background: rgba(255,255,255,0.02) !important; } tbody tr:hover { background: rgba(91,141,238,0.06) !important; } tbody td { color: var(--text) !important; font-size: 0.87rem !important; padding: 9px 14px !important; border-bottom: 1px solid var(--border) !important; } /* ── File upload ──────────────────────────── */ .gr-file-upload { border: 2px dashed var(--border) !important; border-radius: var(--radius) !important; background: #13171f !important; } .gr-file-upload:hover { border-color: var(--accent) !important; } /* ── Download file ────────────────────────── */ .gr-file { background: var(--surface) !important; border-color: var(--border) !important; } /* ── Divider ──────────────────────────────── */ hr { border-color: var(--border) !important; margin: 20px 0 !important; } /* ── Scrollbar ────────────────────────────── */ ::-webkit-scrollbar { width: 6px; height: 6px; } ::-webkit-scrollbar-track { background: var(--bg); } ::-webkit-scrollbar-thumb { background: var(--border); border-radius: 4px; } """ HEADER_HTML = """

⚡ Social Media Analytics

Sentiment · Toxicity · Keywords  |  Turkish & English supported

""" with gr.Blocks(css=CSS, title="Social Media Analytics") as demo: gr.HTML(HEADER_HTML) with gr.Row(): with gr.Column(scale=1): text_input = gr.Textbox( label="Single Text Analysis", placeholder="Paste a tweet, comment, or any social media post here…", lines=5, ) csv_file = gr.File( label="Bulk CSV Upload (first text column is used)", file_types=[".csv"], ) analyze_btn = gr.Button("🔍 Analyze", variant="primary") with gr.Column(scale=2): result_table = gr.Dataframe( label="Results", interactive=False, wrap=True, ) download_btn = gr.File(label="⬇ Download CSV", interactive=False) analyze_btn.click( fn=run_analysis, inputs=[text_input, csv_file], outputs=[result_table, download_btn], ) gr.HTML("""
Models: savasy/bert-base-turkish-sentiment-cased · unitary/toxic-bert
""") if __name__ == "__main__": demo.launch()