Spaces:

GoshawkVortexAI
/

Goshawk-Tiktok

Running

File size: 12,325 Bytes

9864a36

import gradio as gr
import pandas as pd
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import tempfile
import os

# ── Model loading ──────────────────────────────────────────────────────────────

print("Loading sentiment model (savasy/bert-base-turkish-sentiment-cased)...")
sentiment_pipeline = pipeline(
    "sentiment-analysis",
    model="savasy/bert-base-turkish-sentiment-cased",
    tokenizer="savasy/bert-base-turkish-sentiment-cased",
    truncation=True,
    max_length=512,
)

print("Loading toxicity model (unitary/toxic-bert)...")
toxicity_pipeline = pipeline(
    "text-classification",
    model="unitary/toxic-bert",
    tokenizer="unitary/toxic-bert",
    truncation=True,
    max_length=512,
)

# ── Keyword extraction (simple TF-based, no external API) ─────────────────────

import re
from collections import Counter

STOPWORDS_TR = {
    "bir", "bu", "ve", "ile", "da", "de", "mi", "mu", "mü", "mı", "ki",
    "ne", "için", "ama", "fakat", "çok", "daha", "en", "gibi", "kadar",
    "ben", "sen", "o", "biz", "siz", "onlar", "bu", "şu", "o", "her",
    "hiç", "bazı", "tüm", "bütün", "var", "yok", "olan", "olarak",
}

STOPWORDS_EN = {
    "the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for",
    "of", "with", "by", "from", "is", "are", "was", "were", "be", "been",
    "have", "has", "had", "do", "does", "did", "will", "would", "could",
    "should", "may", "might", "this", "that", "these", "those", "it", "its",
    "i", "you", "he", "she", "we", "they", "not", "no", "so", "as", "if",
}

STOPWORDS = STOPWORDS_TR | STOPWORDS_EN


def extract_keywords(text: str, top_n: int = 5) -> str:
    words = re.findall(r"\b[a-zA-ZçğışöüÇĞİŞÖÜ]{3,}\b", text.lower())
    filtered = [w for w in words if w not in STOPWORDS]
    if not filtered:
        return ""
    counts = Counter(filtered)
    return ", ".join(w for w, _ in counts.most_common(top_n))


# ── Core analysis ──────────────────────────────────────────────────────────────

def analyze_text(text: str) -> dict:
    text = text.strip()
    if not text:
        return {}

    # Sentiment
    sent_result = sentiment_pipeline(text)[0]
    sentiment_label = sent_result["label"]        # e.g. "positive" / "negative"
    sentiment_score = round(sent_result["score"], 4)

    # Toxicity
    tox_result = toxicity_pipeline(text)[0]
    is_toxic = tox_result["label"].lower() == "toxic"
    toxicity_score = round(tox_result["score"] if is_toxic else 1 - tox_result["score"], 4)
    toxicity_label = "Toxic 🚨" if is_toxic else "Safe ✅"

    # Keywords
    keywords = extract_keywords(text)

    return {
        "Text": text[:120] + ("…" if len(text) > 120 else ""),
        "Sentiment": sentiment_label.capitalize(),
        "Sentiment Score": sentiment_score,
        "Toxicity": toxicity_label,
        "Toxicity Score": toxicity_score,
        "Keywords": keywords,
    }


# ── Gradio handlers ────────────────────────────────────────────────────────────

def run_analysis(text_input: str, csv_file):
    rows = []

    # CSV path takes priority; fall back to text box
    if csv_file is not None:
        try:
            df_in = pd.read_csv(csv_file.name if hasattr(csv_file, "name") else csv_file)
        except Exception as e:
            return pd.DataFrame([{"Error": f"Could not read CSV: {e}"}]), None

        # Use first column that looks like text
        text_col = df_in.columns[0]
        for col in df_in.columns:
            if df_in[col].dtype == object:
                text_col = col
                break

        for _, row in df_in.iterrows():
            cell = str(row[text_col]).strip()
            if cell and cell.lower() != "nan":
                result = analyze_text(cell)
                if result:
                    rows.append(result)

    elif text_input and text_input.strip():
        result = analyze_text(text_input.strip())
        if result:
            rows.append(result)
    else:
        return pd.DataFrame([{"Info": "Please enter text or upload a CSV file."}]), None

    if not rows:
        return pd.DataFrame([{"Info": "No valid text found to analyze."}]), None

    df_out = pd.DataFrame(rows)

    # Save CSV for download
    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", encoding="utf-8")
    df_out.to_csv(tmp.name, index=False)
    tmp.close()

    return df_out, tmp.name


# ── UI ─────────────────────────────────────────────────────────────────────────

CSS = """
/* ── Palette ─────────────────────────────── */
:root {
    --bg:        #0d0f14;
    --surface:   #161a22;
    --border:    #252a35;
    --accent:    #5b8dee;
    --accent2:   #e05b8d;
    --text:      #e8ecf4;
    --muted:     #7a8399;
    --safe:      #3ecf78;
    --toxic:     #ff4f64;
    --font-head: 'Space Mono', monospace;
    --font-body: 'DM Sans', sans-serif;
    --radius:    10px;
}

/* ── Reset & base ─────────────────────────── */
body, .gradio-container {
    background: var(--bg) !important;
    color: var(--text) !important;
    font-family: var(--font-body) !important;
}

/* ── Header ───────────────────────────────── */
#app-header {
    text-align: center;
    padding: 36px 20px 20px;
    background: linear-gradient(135deg, #0d0f14 0%, #161a22 100%);
    border-bottom: 1px solid var(--border);
    margin-bottom: 28px;
}
#app-header h1 {
    font-family: var(--font-head);
    font-size: clamp(1.4rem, 4vw, 2.2rem);
    letter-spacing: -0.5px;
    background: linear-gradient(90deg, var(--accent), var(--accent2));
    -webkit-background-clip: text;
    -webkit-text-fill-color: transparent;
    margin: 0 0 8px;
}
#app-header p {
    color: var(--muted);
    font-size: 0.9rem;
    margin: 0;
}

/* ── Panels ───────────────────────────────── */
.gr-group, .gr-box, .gr-form {
    background: var(--surface) !important;
    border: 1px solid var(--border) !important;
    border-radius: var(--radius) !important;
}

/* ── Labels ───────────────────────────────── */
label span, .gr-form label {
    color: var(--muted) !important;
    font-size: 0.78rem !important;
    letter-spacing: 0.06em !important;
    text-transform: uppercase !important;
    font-family: var(--font-head) !important;
}

/* ── Textbox ──────────────────────────────── */
textarea, input[type="text"] {
    background: #1c2130 !important;
    border: 1px solid var(--border) !important;
    border-radius: 8px !important;
    color: var(--text) !important;
    font-family: var(--font-body) !important;
    font-size: 0.95rem !important;
}
textarea:focus, input[type="text"]:focus {
    border-color: var(--accent) !important;
    box-shadow: 0 0 0 3px rgba(91,141,238,0.15) !important;
}

/* ── Buttons ──────────────────────────────── */
button.primary {
    background: linear-gradient(135deg, var(--accent), #3d6dcf) !important;
    border: none !important;
    border-radius: 8px !important;
    color: #fff !important;
    font-family: var(--font-head) !important;
    font-size: 0.85rem !important;
    letter-spacing: 0.08em !important;
    padding: 10px 28px !important;
    cursor: pointer !important;
    transition: opacity 0.2s, transform 0.1s !important;
}
button.primary:hover { opacity: 0.88 !important; transform: translateY(-1px) !important; }
button.primary:active { transform: translateY(0) !important; }

button.secondary {
    background: var(--surface) !important;
    border: 1px solid var(--border) !important;
    border-radius: 8px !important;
    color: var(--muted) !important;
    font-family: var(--font-head) !important;
    font-size: 0.78rem !important;
}

/* ── Dataframe ────────────────────────────── */
.gr-dataframe, .dataframe-container {
    background: var(--surface) !important;
    border-radius: var(--radius) !important;
    overflow: hidden !important;
}
table { width: 100%; border-collapse: collapse; }
thead tr { background: #1c2130 !important; }
thead th {
    color: var(--accent) !important;
    font-family: var(--font-head) !important;
    font-size: 0.72rem !important;
    letter-spacing: 0.07em !important;
    text-transform: uppercase !important;
    padding: 10px 14px !important;
    border-bottom: 1px solid var(--border) !important;
}
tbody tr:nth-child(even) { background: rgba(255,255,255,0.02) !important; }
tbody tr:hover { background: rgba(91,141,238,0.06) !important; }
tbody td {
    color: var(--text) !important;
    font-size: 0.87rem !important;
    padding: 9px 14px !important;
    border-bottom: 1px solid var(--border) !important;
}

/* ── File upload ──────────────────────────── */
.gr-file-upload {
    border: 2px dashed var(--border) !important;
    border-radius: var(--radius) !important;
    background: #13171f !important;
}
.gr-file-upload:hover { border-color: var(--accent) !important; }

/* ── Download file ────────────────────────── */
.gr-file { background: var(--surface) !important; border-color: var(--border) !important; }

/* ── Divider ──────────────────────────────── */
hr { border-color: var(--border) !important; margin: 20px 0 !important; }

/* ── Scrollbar ────────────────────────────── */
::-webkit-scrollbar { width: 6px; height: 6px; }
::-webkit-scrollbar-track { background: var(--bg); }
::-webkit-scrollbar-thumb { background: var(--border); border-radius: 4px; }
"""

HEADER_HTML = """
<link rel="preconnect" href="https://fonts.googleapis.com">
<link href="https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;700&family=DM+Sans:wght@300;400;500;600&display=swap" rel="stylesheet">
<div id="app-header">
  <h1>⚡ Social Media Analytics</h1>
  <p>Sentiment · Toxicity · Keywords &nbsp;|&nbsp; Turkish &amp; English supported</p>
</div>
"""

with gr.Blocks(css=CSS, title="Social Media Analytics") as demo:
    gr.HTML(HEADER_HTML)

    with gr.Row():
        with gr.Column(scale=1):
            text_input = gr.Textbox(
                label="Single Text Analysis",
                placeholder="Paste a tweet, comment, or any social media post here…",
                lines=5,
            )
            csv_file = gr.File(
                label="Bulk CSV Upload  (first text column is used)",
                file_types=[".csv"],
            )
            analyze_btn = gr.Button("🔍 Analyze", variant="primary")

        with gr.Column(scale=2):
            result_table = gr.Dataframe(
                label="Results",
                interactive=False,
                wrap=True,
            )
            download_btn = gr.File(label="⬇ Download CSV", interactive=False)

    analyze_btn.click(
        fn=run_analysis,
        inputs=[text_input, csv_file],
        outputs=[result_table, download_btn],
    )

    gr.HTML("""
    <div style="text-align:center;padding:20px 0 10px;color:#4a5268;font-size:0.78rem;font-family:'Space Mono',monospace;">
      Models: savasy/bert-base-turkish-sentiment-cased · unitary/toxic-bert
    </div>
    """)

if __name__ == "__main__":
    demo.launch()