Goshawk-Tiktok / app.py
GoshawkVortexAI's picture
Create app.py
9864a36 verified
import gradio as gr
import pandas as pd
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import tempfile
import os
# ── Model loading ──────────────────────────────────────────────────────────────
print("Loading sentiment model (savasy/bert-base-turkish-sentiment-cased)...")
sentiment_pipeline = pipeline(
"sentiment-analysis",
model="savasy/bert-base-turkish-sentiment-cased",
tokenizer="savasy/bert-base-turkish-sentiment-cased",
truncation=True,
max_length=512,
)
print("Loading toxicity model (unitary/toxic-bert)...")
toxicity_pipeline = pipeline(
"text-classification",
model="unitary/toxic-bert",
tokenizer="unitary/toxic-bert",
truncation=True,
max_length=512,
)
# ── Keyword extraction (simple TF-based, no external API) ─────────────────────
import re
from collections import Counter
STOPWORDS_TR = {
"bir", "bu", "ve", "ile", "da", "de", "mi", "mu", "mΓΌ", "mΔ±", "ki",
"ne", "iΓ§in", "ama", "fakat", "Γ§ok", "daha", "en", "gibi", "kadar",
"ben", "sen", "o", "biz", "siz", "onlar", "bu", "şu", "o", "her",
"hiΓ§", "bazΔ±", "tΓΌm", "bΓΌtΓΌn", "var", "yok", "olan", "olarak",
}
STOPWORDS_EN = {
"the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for",
"of", "with", "by", "from", "is", "are", "was", "were", "be", "been",
"have", "has", "had", "do", "does", "did", "will", "would", "could",
"should", "may", "might", "this", "that", "these", "those", "it", "its",
"i", "you", "he", "she", "we", "they", "not", "no", "so", "as", "if",
}
STOPWORDS = STOPWORDS_TR | STOPWORDS_EN
def extract_keywords(text: str, top_n: int = 5) -> str:
words = re.findall(r"\b[a-zA-ZΓ§ΔŸΔ±ΕŸΓΆΓΌΓ‡ΔžΔ°ΕžΓ–Γœ]{3,}\b", text.lower())
filtered = [w for w in words if w not in STOPWORDS]
if not filtered:
return ""
counts = Counter(filtered)
return ", ".join(w for w, _ in counts.most_common(top_n))
# ── Core analysis ──────────────────────────────────────────────────────────────
def analyze_text(text: str) -> dict:
text = text.strip()
if not text:
return {}
# Sentiment
sent_result = sentiment_pipeline(text)[0]
sentiment_label = sent_result["label"] # e.g. "positive" / "negative"
sentiment_score = round(sent_result["score"], 4)
# Toxicity
tox_result = toxicity_pipeline(text)[0]
is_toxic = tox_result["label"].lower() == "toxic"
toxicity_score = round(tox_result["score"] if is_toxic else 1 - tox_result["score"], 4)
toxicity_label = "Toxic 🚨" if is_toxic else "Safe βœ…"
# Keywords
keywords = extract_keywords(text)
return {
"Text": text[:120] + ("…" if len(text) > 120 else ""),
"Sentiment": sentiment_label.capitalize(),
"Sentiment Score": sentiment_score,
"Toxicity": toxicity_label,
"Toxicity Score": toxicity_score,
"Keywords": keywords,
}
# ── Gradio handlers ────────────────────────────────────────────────────────────
def run_analysis(text_input: str, csv_file):
rows = []
# CSV path takes priority; fall back to text box
if csv_file is not None:
try:
df_in = pd.read_csv(csv_file.name if hasattr(csv_file, "name") else csv_file)
except Exception as e:
return pd.DataFrame([{"Error": f"Could not read CSV: {e}"}]), None
# Use first column that looks like text
text_col = df_in.columns[0]
for col in df_in.columns:
if df_in[col].dtype == object:
text_col = col
break
for _, row in df_in.iterrows():
cell = str(row[text_col]).strip()
if cell and cell.lower() != "nan":
result = analyze_text(cell)
if result:
rows.append(result)
elif text_input and text_input.strip():
result = analyze_text(text_input.strip())
if result:
rows.append(result)
else:
return pd.DataFrame([{"Info": "Please enter text or upload a CSV file."}]), None
if not rows:
return pd.DataFrame([{"Info": "No valid text found to analyze."}]), None
df_out = pd.DataFrame(rows)
# Save CSV for download
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", encoding="utf-8")
df_out.to_csv(tmp.name, index=False)
tmp.close()
return df_out, tmp.name
# ── UI ─────────────────────────────────────────────────────────────────────────
CSS = """
/* ── Palette ─────────────────────────────── */
:root {
--bg: #0d0f14;
--surface: #161a22;
--border: #252a35;
--accent: #5b8dee;
--accent2: #e05b8d;
--text: #e8ecf4;
--muted: #7a8399;
--safe: #3ecf78;
--toxic: #ff4f64;
--font-head: 'Space Mono', monospace;
--font-body: 'DM Sans', sans-serif;
--radius: 10px;
}
/* ── Reset & base ─────────────────────────── */
body, .gradio-container {
background: var(--bg) !important;
color: var(--text) !important;
font-family: var(--font-body) !important;
}
/* ── Header ───────────────────────────────── */
#app-header {
text-align: center;
padding: 36px 20px 20px;
background: linear-gradient(135deg, #0d0f14 0%, #161a22 100%);
border-bottom: 1px solid var(--border);
margin-bottom: 28px;
}
#app-header h1 {
font-family: var(--font-head);
font-size: clamp(1.4rem, 4vw, 2.2rem);
letter-spacing: -0.5px;
background: linear-gradient(90deg, var(--accent), var(--accent2));
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
margin: 0 0 8px;
}
#app-header p {
color: var(--muted);
font-size: 0.9rem;
margin: 0;
}
/* ── Panels ───────────────────────────────── */
.gr-group, .gr-box, .gr-form {
background: var(--surface) !important;
border: 1px solid var(--border) !important;
border-radius: var(--radius) !important;
}
/* ── Labels ───────────────────────────────── */
label span, .gr-form label {
color: var(--muted) !important;
font-size: 0.78rem !important;
letter-spacing: 0.06em !important;
text-transform: uppercase !important;
font-family: var(--font-head) !important;
}
/* ── Textbox ──────────────────────────────── */
textarea, input[type="text"] {
background: #1c2130 !important;
border: 1px solid var(--border) !important;
border-radius: 8px !important;
color: var(--text) !important;
font-family: var(--font-body) !important;
font-size: 0.95rem !important;
}
textarea:focus, input[type="text"]:focus {
border-color: var(--accent) !important;
box-shadow: 0 0 0 3px rgba(91,141,238,0.15) !important;
}
/* ── Buttons ──────────────────────────────── */
button.primary {
background: linear-gradient(135deg, var(--accent), #3d6dcf) !important;
border: none !important;
border-radius: 8px !important;
color: #fff !important;
font-family: var(--font-head) !important;
font-size: 0.85rem !important;
letter-spacing: 0.08em !important;
padding: 10px 28px !important;
cursor: pointer !important;
transition: opacity 0.2s, transform 0.1s !important;
}
button.primary:hover { opacity: 0.88 !important; transform: translateY(-1px) !important; }
button.primary:active { transform: translateY(0) !important; }
button.secondary {
background: var(--surface) !important;
border: 1px solid var(--border) !important;
border-radius: 8px !important;
color: var(--muted) !important;
font-family: var(--font-head) !important;
font-size: 0.78rem !important;
}
/* ── Dataframe ────────────────────────────── */
.gr-dataframe, .dataframe-container {
background: var(--surface) !important;
border-radius: var(--radius) !important;
overflow: hidden !important;
}
table { width: 100%; border-collapse: collapse; }
thead tr { background: #1c2130 !important; }
thead th {
color: var(--accent) !important;
font-family: var(--font-head) !important;
font-size: 0.72rem !important;
letter-spacing: 0.07em !important;
text-transform: uppercase !important;
padding: 10px 14px !important;
border-bottom: 1px solid var(--border) !important;
}
tbody tr:nth-child(even) { background: rgba(255,255,255,0.02) !important; }
tbody tr:hover { background: rgba(91,141,238,0.06) !important; }
tbody td {
color: var(--text) !important;
font-size: 0.87rem !important;
padding: 9px 14px !important;
border-bottom: 1px solid var(--border) !important;
}
/* ── File upload ──────────────────────────── */
.gr-file-upload {
border: 2px dashed var(--border) !important;
border-radius: var(--radius) !important;
background: #13171f !important;
}
.gr-file-upload:hover { border-color: var(--accent) !important; }
/* ── Download file ────────────────────────── */
.gr-file { background: var(--surface) !important; border-color: var(--border) !important; }
/* ── Divider ──────────────────────────────── */
hr { border-color: var(--border) !important; margin: 20px 0 !important; }
/* ── Scrollbar ────────────────────────────── */
::-webkit-scrollbar { width: 6px; height: 6px; }
::-webkit-scrollbar-track { background: var(--bg); }
::-webkit-scrollbar-thumb { background: var(--border); border-radius: 4px; }
"""
HEADER_HTML = """
<link rel="preconnect" href="https://fonts.googleapis.com">
<link href="https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;700&family=DM+Sans:wght@300;400;500;600&display=swap" rel="stylesheet">
<div id="app-header">
<h1>⚑ Social Media Analytics</h1>
<p>Sentiment Β· Toxicity Β· Keywords &nbsp;|&nbsp; Turkish &amp; English supported</p>
</div>
"""
with gr.Blocks(css=CSS, title="Social Media Analytics") as demo:
gr.HTML(HEADER_HTML)
with gr.Row():
with gr.Column(scale=1):
text_input = gr.Textbox(
label="Single Text Analysis",
placeholder="Paste a tweet, comment, or any social media post here…",
lines=5,
)
csv_file = gr.File(
label="Bulk CSV Upload (first text column is used)",
file_types=[".csv"],
)
analyze_btn = gr.Button("πŸ” Analyze", variant="primary")
with gr.Column(scale=2):
result_table = gr.Dataframe(
label="Results",
interactive=False,
wrap=True,
)
download_btn = gr.File(label="⬇ Download CSV", interactive=False)
analyze_btn.click(
fn=run_analysis,
inputs=[text_input, csv_file],
outputs=[result_table, download_btn],
)
gr.HTML("""
<div style="text-align:center;padding:20px 0 10px;color:#4a5268;font-size:0.78rem;font-family:'Space Mono',monospace;">
Models: savasy/bert-base-turkish-sentiment-cased Β· unitary/toxic-bert
</div>
""")
if __name__ == "__main__":
demo.launch()