SlangGPT / app.py
AdhamAshraf's picture
make it mobile compatible
67510b3 verified
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import logging
from huggingface_hub import login
from datetime import datetime
import pandas as pd
from datasets import Dataset, load_dataset, Features, Value
import os
logging.basicConfig(level=logging.INFO)
# ==============================================
# CONFIGURATION
# ==============================================
HF_DATASET_NAME = "AdhamAshraf/slanggpt-feedback-dataset"
MODEL_NAME = "AdhamAshraf/SlangGPT"
HF_TOKEN = os.getenv("HF_TOKEN")
if not HF_TOKEN:
raise RuntimeError(
"HF_TOKEN environment variable not set. "
"Please add a secret named 'HF_TOKEN' with your Hugging Face write token."
)
login(token=HF_TOKEN)
print("✅ Logged in to Hugging Face Hub")
# ==============================================
# EXPLICIT SCHEMA — prevents column-mismatch errors
# ==============================================
FEEDBACK_FEATURES = Features({
"egyptian_arabic": Value("string"),
"generated_msa": Value("string"),
"user_label": Value("string"),
"user_rating": Value("int64"),
"corrected_msa": Value("string"),
"timestamp": Value("string"),
})
# ==============================================
# LOAD GENERATION MODEL
# ==============================================
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"
print("Loading model...")
dtype = torch.float16 if torch.cuda.is_available() else torch.float32
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=dtype,
device_map="auto",
low_cpu_mem_usage=True,
)
model.eval()
print("✅ Model loaded successfully")
# ==============================================
# CACHE FEEDBACK DATASET AT STARTUP
# ==============================================
print("Loading feedback dataset...")
try:
_feedback_df = load_dataset(HF_DATASET_NAME, split="train").to_pandas()
if "corrected_msa" not in _feedback_df.columns:
_feedback_df["corrected_msa"] = ""
for col in ["egyptian_arabic", "generated_msa", "user_label", "corrected_msa", "timestamp"]:
_feedback_df[col] = _feedback_df[col].fillna("").astype(str)
_feedback_df["user_rating"] = _feedback_df["user_rating"].fillna(-1).astype("int64")
print(f"✅ Feedback dataset loaded ({len(_feedback_df)} existing rows)")
except Exception as e:
print(f"⚠️ No existing feedback dataset — starting fresh ({e})")
_feedback_df = pd.DataFrame(columns=list(FEEDBACK_FEATURES.keys()))
# ==============================================
# TRANSLATION
# ==============================================
def translate_to_msa(egyptian_text):
if not egyptian_text or not egyptian_text.strip():
return "Please enter an Egyptian Arabic phrase."
prompt = f"dialect: {egyptian_text.strip()} ↔ msa:"
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=64)
inputs = {k: v.to(model.device) for k, v in inputs.items()}
with torch.inference_mode():
outputs = model.generate(
**inputs,
max_new_tokens=48,
do_sample=False,
repetition_penalty=1.3,
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id,
)
full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
result = full_output.split("msa:")[-1].strip() if "msa:" in full_output else full_output
result = result.split("↔")[0].strip()
return result if result else "[No translation generated.]"
# ==============================================
# FEEDBACK SAVING
# ==============================================
def save_feedback(egyptian_input, generated_output, correct_incorrect, rating, corrected_translation):
global _feedback_df
if not generated_output or not generated_output.strip() or generated_output.startswith("["):
return "⚠️ No valid translation to rate. Please translate something first."
if rating is None:
return "⚠️ Please provide a quality rating before submitting."
rating = int(rating)
is_incorrect = correct_incorrect == "❌ Incorrect"
low_rating = rating <= 2
needs_correction = is_incorrect or low_rating
has_correction = bool(corrected_translation and corrected_translation.strip())
if needs_correction and not has_correction:
if is_incorrect:
return "⚠️ Please provide the correct MSA translation before submitting."
else:
return f"⚠️ Rating of {rating}/5 is low — please provide a better translation before submitting."
new_row = {
"egyptian_arabic": egyptian_input.strip(),
"generated_msa": generated_output.strip(),
"user_label": "incorrect" if is_incorrect else "correct",
"user_rating": rating,
"corrected_msa": corrected_translation.strip() if needs_correction and has_correction else "",
"timestamp": datetime.utcnow().isoformat() + "Z",
}
_feedback_df = pd.concat([_feedback_df, pd.DataFrame([new_row])], ignore_index=True)
_feedback_df["user_rating"] = _feedback_df["user_rating"].fillna(-1).astype("int64")
try:
Dataset.from_pandas(_feedback_df, features=FEEDBACK_FEATURES).push_to_hub(
HF_DATASET_NAME, split="train", private=False
)
return "✅ Feedback recorded — شكراً!"
except Exception as e:
_feedback_df = _feedback_df.iloc[:-1].reset_index(drop=True)
return f"⚠️ Could not save feedback: {str(e)}"
# ==============================================
# RESET UI
# ==============================================
def reset_feedback_ui():
return (
gr.update(visible=False),
gr.update(value=""),
gr.update(value="✅ Correct"),
gr.update(value=None),
gr.update(value=""),
)
# ==============================================
# MOBILE-RESPONSIVE CSS
# ==============================================
CSS = """
@import url('https://fonts.googleapis.com/css2?family=Noto+Naskh+Arabic:wght@400;600&family=DM+Mono:wght@400;500&family=DM+Sans:wght@300;400;500;600&display=swap');
:root {
--bg: #0f1117;
--surface: #181c27;
--border: #2a2f3d;
--accent: #4f8ef7;
--accent2: #a78bfa;
--text: #e8eaf0;
--muted: #6b7280;
--success: #34d399;
--warn: #f87171;
--radius: 12px;
--mono: 'DM Mono', monospace;
--sans: 'DM Sans', sans-serif;
--arabic: 'Noto Naskh Arabic', serif;
}
/* ── Base ── */
body, .gradio-container {
background: var(--bg) !important;
font-family: var(--sans) !important;
color: var(--text) !important;
/* prevent horizontal overflow on mobile */
overflow-x: hidden !important;
}
/* ── Header ── */
#header {
text-align: center;
padding: 2rem 1rem 1rem;
border-bottom: 1px solid var(--border);
margin-bottom: 1.5rem;
}
#header h1 {
font-family: var(--mono);
font-size: clamp(1.4rem, 5vw, 2rem);
letter-spacing: -0.02em;
background: linear-gradient(135deg, var(--accent), var(--accent2));
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
margin: 0 0 0.25rem;
}
#header p {
color: var(--muted);
font-size: clamp(0.75rem, 2.5vw, 0.9rem);
margin: 0;
line-height: 1.5;
}
/* ── Inputs ── */
textarea, input[type=text] {
background: #0f1117 !important;
border: 1px solid var(--border) !important;
border-radius: 8px !important;
color: var(--text) !important;
font-family: var(--arabic) !important;
font-size: clamp(1rem, 3.5vw, 1.1rem) !important;
line-height: 1.7 !important;
direction: rtl;
transition: border-color 0.2s;
/* Prevent zoom on focus in iOS (font-size must be >= 16px equivalent) */
touch-action: manipulation;
}
textarea:focus, input[type=text]:focus {
border-color: var(--accent) !important;
outline: none !important;
box-shadow: 0 0 0 3px rgba(79,142,247,0.15) !important;
}
#output-box textarea {
background: #13161f !important;
color: #a5f3c0 !important;
font-size: clamp(1rem, 3.5vw, 1.15rem) !important;
}
/* ── Buttons ── */
button.primary, #translate-btn {
background: linear-gradient(135deg, var(--accent), var(--accent2)) !important;
border: none !important;
border-radius: 8px !important;
color: #fff !important;
font-family: var(--sans) !important;
font-weight: 600 !important;
font-size: clamp(0.85rem, 3vw, 0.95rem) !important;
/* taller tap target on mobile */
padding: 0.75rem 1.4rem !important;
min-height: 48px !important;
width: 100% !important;
cursor: pointer !important;
transition: opacity 0.15s, transform 0.1s !important;
touch-action: manipulation;
}
button.primary:hover, #translate-btn:hover {
opacity: 0.88 !important;
transform: translateY(-1px) !important;
}
#submit-btn {
background: var(--surface) !important;
border: 1px solid var(--accent) !important;
border-radius: 8px !important;
color: var(--accent) !important;
font-family: var(--sans) !important;
font-weight: 500 !important;
min-height: 48px !important;
width: 100% !important;
transition: background 0.15s !important;
touch-action: manipulation;
}
#submit-btn:hover {
background: rgba(79,142,247,0.1) !important;
}
/* ── Radio & Slider ── */
.gr-radio-item label {
color: var(--text) !important;
font-family: var(--sans) !important;
/* larger touch target */
padding: 0.4rem 0 !important;
min-height: 44px !important;
display: flex !important;
align-items: center !important;
}
.gr-radio-item input[type=radio] {
width: 20px !important;
height: 20px !important;
}
input[type=range] {
accent-color: var(--accent) !important;
height: 6px !important;
/* taller hit area */
padding: 12px 0 !important;
cursor: pointer;
touch-action: manipulation;
}
/* ── Labels ── */
label span, .gr-form label {
color: var(--muted) !important;
font-family: var(--sans) !important;
font-size: clamp(0.72rem, 2vw, 0.82rem) !important;
text-transform: uppercase !important;
letter-spacing: 0.06em !important;
}
/* ── Status ── */
#status-box textarea {
background: transparent !important;
border: none !important;
color: var(--success) !important;
font-family: var(--mono) !important;
font-size: clamp(0.8rem, 2.5vw, 0.9rem) !important;
text-align: center;
}
/* ── Examples table ── */
.gr-samples-table td {
font-family: var(--arabic) !important;
font-size: clamp(0.9rem, 3vw, 1rem) !important;
direction: rtl;
color: var(--text) !important;
/* comfortable row height on mobile */
padding: 0.6rem 0.75rem !important;
}
.gr-samples-table tr:hover td {
background: rgba(79,142,247,0.07) !important;
cursor: pointer;
}
/* ── Section labels ── */
.section-label {
font-family: var(--mono);
font-size: clamp(0.68rem, 2vw, 0.75rem);
letter-spacing: 0.1em;
color: var(--muted);
text-transform: uppercase;
margin: 1.2rem 0 0.5rem;
display: flex;
align-items: center;
gap: 0.6rem;
}
.section-label::after {
content: '';
flex: 1;
height: 1px;
background: var(--border);
}
/* ── Feedback panel ── */
#feedback-panel {
border: 1px solid var(--border) !important;
border-radius: var(--radius) !important;
padding: 1rem !important;
margin-top: 1rem !important;
}
/* ════════════════════════════════════════
RESPONSIVE BREAKPOINTS
════════════════════════════════════════ */
/* Tablet / large phone — stack the two columns */
@media (max-width: 768px) {
/* Gradio Row becomes a single column */
.gr-row {
flex-direction: column !important;
gap: 0 !important;
}
.gr-column {
width: 100% !important;
min-width: 0 !important;
flex: none !important;
}
/* Give textareas a comfortable height on phone */
textarea {
min-height: 100px !important;
}
/* Feedback radio stack vertically */
.gr-radio-group {
flex-direction: column !important;
}
}
/* Small phones */
@media (max-width: 480px) {
.gradio-container {
padding: 0 0.5rem !important;
}
#header {
padding: 1.25rem 0.5rem 0.75rem;
}
textarea {
min-height: 90px !important;
font-size: 1rem !important; /* prevents iOS zoom */
}
/* Make slider label wrap gracefully */
.gr-form label span {
white-space: normal !important;
}
}
"""
# ==============================================
# GRADIO INTERFACE
# ==============================================
with gr.Blocks(title="SlangGPT", css=CSS, theme=gr.themes.Base()) as demo:
gr.HTML("""
<div id="header">
<h1>SlangGPT</h1>
<p>Egyptian Arabic dialect → Modern Standard Arabic (MSA)<br>اللهجة المصرية ← الفصحى</p>
</div>
""")
with gr.Row(equal_height=True):
with gr.Column(scale=1):
gr.HTML('<div class="section-label">Egyptian Arabic Input · اكتب بالمصري</div>')
egyptian_input = gr.Textbox(
show_label=False,
placeholder="اكتب هنا باللهجة المصرية…",
lines=4,
rtl=True,
)
translate_btn = gr.Button(
"Translate · ترجم →",
variant="primary",
elem_id="translate-btn",
)
with gr.Column(scale=1):
gr.HTML('<div class="section-label">MSA Translation · الترجمة بالفصحى</div>')
msa_output = gr.Textbox(
show_label=False,
lines=4,
interactive=False,
placeholder="ستظهر الترجمة هنا…",
rtl=True,
elem_id="output-box",
)
gr.HTML('<div class="section-label">Try an example · جرّب مثال</div>')
gr.Examples(
examples=[
["إنت رايح فين؟"],
["عايز اكل حاجة حلوة"],
["انا تعبان قوي النهارده"],
["الأكل ده كان تحفة"],
["ممكن تساعدني؟"],
],
inputs=egyptian_input,
label="",
)
with gr.Group(visible=False, elem_id="feedback-panel") as feedback_group:
gr.HTML('<div class="section-label">Rate this translation · قيّم الترجمة</div>')
with gr.Row():
correct_radio = gr.Radio(
choices=["✅ Correct", "❌ Incorrect"],
value="✅ Correct",
label="Is the translation correct? · هل الترجمة صحيحة؟",
scale=1,
)
rating_slider = gr.Slider(
minimum=0, maximum=5, step=1,
value=None,
label="Quality · الجودة (0 = غير مفيدة · 5 = ممتازة) — required · مطلوب",
scale=2,
)
correction_textbox = gr.Textbox(
label="Better MSA translation · ترجمة أفضل — required if incorrect or rating ≤ 2 · مطلوب إذا كانت خاطئة أو التقييم ≤ 2",
lines=2,
visible=True,
placeholder="الترجمة الصحيحة هنا…",
rtl=True,
)
submit_feedback = gr.Button("Submit Feedback · أرسل التقييم", elem_id="submit-btn")
feedback_status = gr.Textbox(
show_label=False,
interactive=False,
elem_id="status-box",
lines=1,
)
latest_translation = gr.State("")
translate_btn.click(
fn=translate_to_msa,
inputs=egyptian_input,
outputs=msa_output,
).then(
lambda out: (gr.update(visible=True), out),
inputs=msa_output,
outputs=[feedback_group, latest_translation],
)
submit_feedback.click(
fn=save_feedback,
inputs=[egyptian_input, latest_translation, correct_radio, rating_slider, correction_textbox],
outputs=feedback_status,
).then(
fn=reset_feedback_ui,
outputs=[feedback_group, correction_textbox, correct_radio, rating_slider, feedback_status],
)
if __name__ == "__main__":
demo.launch()