Spaces:

AdhamAshraf
/

SlangGPT

Running

File size: 16,924 Bytes

import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import logging
from huggingface_hub import login
from datetime import datetime
import pandas as pd
from datasets import Dataset, load_dataset, Features, Value
import os

logging.basicConfig(level=logging.INFO)

# ==============================================
#  CONFIGURATION
# ==============================================
HF_DATASET_NAME = "AdhamAshraf/slanggpt-feedback-dataset"
MODEL_NAME      = "AdhamAshraf/SlangGPT"
HF_TOKEN        = os.getenv("HF_TOKEN")

if not HF_TOKEN:
    raise RuntimeError(
        "HF_TOKEN environment variable not set. "
        "Please add a secret named 'HF_TOKEN' with your Hugging Face write token."
    )

login(token=HF_TOKEN)
print("✅ Logged in to Hugging Face Hub")

# ==============================================
#  EXPLICIT SCHEMA — prevents column-mismatch errors
# ==============================================
FEEDBACK_FEATURES = Features({
    "egyptian_arabic": Value("string"),
    "generated_msa":   Value("string"),
    "user_label":      Value("string"),
    "user_rating":     Value("int64"),
    "corrected_msa":   Value("string"),
    "timestamp":       Value("string"),
})

# ==============================================
#  LOAD GENERATION MODEL
# ==============================================
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

print("Loading model...")
dtype = torch.float16 if torch.cuda.is_available() else torch.float32

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=dtype,
    device_map="auto",
    low_cpu_mem_usage=True,
)
model.eval()
print("✅ Model loaded successfully")

# ==============================================
#  CACHE FEEDBACK DATASET AT STARTUP
# ==============================================
print("Loading feedback dataset...")
try:
    _feedback_df = load_dataset(HF_DATASET_NAME, split="train").to_pandas()
    if "corrected_msa" not in _feedback_df.columns:
        _feedback_df["corrected_msa"] = ""
    for col in ["egyptian_arabic", "generated_msa", "user_label", "corrected_msa", "timestamp"]:
        _feedback_df[col] = _feedback_df[col].fillna("").astype(str)
    _feedback_df["user_rating"] = _feedback_df["user_rating"].fillna(-1).astype("int64")
    print(f"✅ Feedback dataset loaded ({len(_feedback_df)} existing rows)")
except Exception as e:
    print(f"⚠️  No existing feedback dataset — starting fresh ({e})")
    _feedback_df = pd.DataFrame(columns=list(FEEDBACK_FEATURES.keys()))

# ==============================================
#  TRANSLATION
# ==============================================
def translate_to_msa(egyptian_text):
    if not egyptian_text or not egyptian_text.strip():
        return "Please enter an Egyptian Arabic phrase."

    prompt = f"dialect: {egyptian_text.strip()} ↔ msa:"
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=64)
    inputs = {k: v.to(model.device) for k, v in inputs.items()}

    with torch.inference_mode():
        outputs = model.generate(
            **inputs,
            max_new_tokens=48,
            do_sample=False,
            repetition_penalty=1.3,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )

    full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
    result = full_output.split("msa:")[-1].strip() if "msa:" in full_output else full_output
    result = result.split("↔")[0].strip()
    return result if result else "[No translation generated.]"

# ==============================================
#  FEEDBACK SAVING
# ==============================================
def save_feedback(egyptian_input, generated_output, correct_incorrect, rating, corrected_translation):
    global _feedback_df

    if not generated_output or not generated_output.strip() or generated_output.startswith("["):
        return "⚠️ No valid translation to rate. Please translate something first."

    if rating is None:
        return "⚠️ Please provide a quality rating before submitting."

    rating = int(rating)
    is_incorrect = correct_incorrect == "❌  Incorrect"
    low_rating   = rating <= 2
    needs_correction = is_incorrect or low_rating
    has_correction   = bool(corrected_translation and corrected_translation.strip())

    if needs_correction and not has_correction:
        if is_incorrect:
            return "⚠️ Please provide the correct MSA translation before submitting."
        else:
            return f"⚠️ Rating of {rating}/5 is low — please provide a better translation before submitting."

    new_row = {
        "egyptian_arabic": egyptian_input.strip(),
        "generated_msa":   generated_output.strip(),
        "user_label":      "incorrect" if is_incorrect else "correct",
        "user_rating":     rating,
        "corrected_msa":   corrected_translation.strip() if needs_correction and has_correction else "",
        "timestamp":       datetime.utcnow().isoformat() + "Z",
    }

    _feedback_df = pd.concat([_feedback_df, pd.DataFrame([new_row])], ignore_index=True)
    _feedback_df["user_rating"] = _feedback_df["user_rating"].fillna(-1).astype("int64")

    try:
        Dataset.from_pandas(_feedback_df, features=FEEDBACK_FEATURES).push_to_hub(
            HF_DATASET_NAME, split="train", private=False
        )
        return "✅ Feedback recorded — شكراً!"
    except Exception as e:
        _feedback_df = _feedback_df.iloc[:-1].reset_index(drop=True)
        return f"⚠️ Could not save feedback: {str(e)}"

# ==============================================
#  RESET UI
# ==============================================
def reset_feedback_ui():
    return (
        gr.update(visible=False),
        gr.update(value=""),
        gr.update(value="✅  Correct"),
        gr.update(value=None),
        gr.update(value=""),
    )

# ==============================================
#  MOBILE-RESPONSIVE CSS
# ==============================================
CSS = """
@import url('https://fonts.googleapis.com/css2?family=Noto+Naskh+Arabic:wght@400;600&family=DM+Mono:wght@400;500&family=DM+Sans:wght@300;400;500;600&display=swap');

:root {
    --bg:      #0f1117;
    --surface: #181c27;
    --border:  #2a2f3d;
    --accent:  #4f8ef7;
    --accent2: #a78bfa;
    --text:    #e8eaf0;
    --muted:   #6b7280;
    --success: #34d399;
    --warn:    #f87171;
    --radius:  12px;
    --mono:    'DM Mono', monospace;
    --sans:    'DM Sans', sans-serif;
    --arabic:  'Noto Naskh Arabic', serif;
}

/* ── Base ── */
body, .gradio-container {
    background: var(--bg) !important;
    font-family: var(--sans) !important;
    color: var(--text) !important;
    /* prevent horizontal overflow on mobile */
    overflow-x: hidden !important;
}

/* ── Header ── */
#header {
    text-align: center;
    padding: 2rem 1rem 1rem;
    border-bottom: 1px solid var(--border);
    margin-bottom: 1.5rem;
}
#header h1 {
    font-family: var(--mono);
    font-size: clamp(1.4rem, 5vw, 2rem);
    letter-spacing: -0.02em;
    background: linear-gradient(135deg, var(--accent), var(--accent2));
    -webkit-background-clip: text;
    -webkit-text-fill-color: transparent;
    margin: 0 0 0.25rem;
}
#header p {
    color: var(--muted);
    font-size: clamp(0.75rem, 2.5vw, 0.9rem);
    margin: 0;
    line-height: 1.5;
}

/* ── Inputs ── */
textarea, input[type=text] {
    background: #0f1117 !important;
    border: 1px solid var(--border) !important;
    border-radius: 8px !important;
    color: var(--text) !important;
    font-family: var(--arabic) !important;
    font-size: clamp(1rem, 3.5vw, 1.1rem) !important;
    line-height: 1.7 !important;
    direction: rtl;
    transition: border-color 0.2s;
    /* Prevent zoom on focus in iOS (font-size must be >= 16px equivalent) */
    touch-action: manipulation;
}
textarea:focus, input[type=text]:focus {
    border-color: var(--accent) !important;
    outline: none !important;
    box-shadow: 0 0 0 3px rgba(79,142,247,0.15) !important;
}

#output-box textarea {
    background: #13161f !important;
    color: #a5f3c0 !important;
    font-size: clamp(1rem, 3.5vw, 1.15rem) !important;
}

/* ── Buttons ── */
button.primary, #translate-btn {
    background: linear-gradient(135deg, var(--accent), var(--accent2)) !important;
    border: none !important;
    border-radius: 8px !important;
    color: #fff !important;
    font-family: var(--sans) !important;
    font-weight: 600 !important;
    font-size: clamp(0.85rem, 3vw, 0.95rem) !important;
    /* taller tap target on mobile */
    padding: 0.75rem 1.4rem !important;
    min-height: 48px !important;
    width: 100% !important;
    cursor: pointer !important;
    transition: opacity 0.15s, transform 0.1s !important;
    touch-action: manipulation;
}
button.primary:hover, #translate-btn:hover {
    opacity: 0.88 !important;
    transform: translateY(-1px) !important;
}

#submit-btn {
    background: var(--surface) !important;
    border: 1px solid var(--accent) !important;
    border-radius: 8px !important;
    color: var(--accent) !important;
    font-family: var(--sans) !important;
    font-weight: 500 !important;
    min-height: 48px !important;
    width: 100% !important;
    transition: background 0.15s !important;
    touch-action: manipulation;
}
#submit-btn:hover {
    background: rgba(79,142,247,0.1) !important;
}

/* ── Radio & Slider ── */
.gr-radio-item label {
    color: var(--text) !important;
    font-family: var(--sans) !important;
    /* larger touch target */
    padding: 0.4rem 0 !important;
    min-height: 44px !important;
    display: flex !important;
    align-items: center !important;
}
.gr-radio-item input[type=radio] {
    width: 20px !important;
    height: 20px !important;
}

input[type=range] {
    accent-color: var(--accent) !important;
    height: 6px !important;
    /* taller hit area */
    padding: 12px 0 !important;
    cursor: pointer;
    touch-action: manipulation;
}

/* ── Labels ── */
label span, .gr-form label {
    color: var(--muted) !important;
    font-family: var(--sans) !important;
    font-size: clamp(0.72rem, 2vw, 0.82rem) !important;
    text-transform: uppercase !important;
    letter-spacing: 0.06em !important;
}

/* ── Status ── */
#status-box textarea {
    background: transparent !important;
    border: none !important;
    color: var(--success) !important;
    font-family: var(--mono) !important;
    font-size: clamp(0.8rem, 2.5vw, 0.9rem) !important;
    text-align: center;
}

/* ── Examples table ── */
.gr-samples-table td {
    font-family: var(--arabic) !important;
    font-size: clamp(0.9rem, 3vw, 1rem) !important;
    direction: rtl;
    color: var(--text) !important;
    /* comfortable row height on mobile */
    padding: 0.6rem 0.75rem !important;
}
.gr-samples-table tr:hover td {
    background: rgba(79,142,247,0.07) !important;
    cursor: pointer;
}

/* ── Section labels ── */
.section-label {
    font-family: var(--mono);
    font-size: clamp(0.68rem, 2vw, 0.75rem);
    letter-spacing: 0.1em;
    color: var(--muted);
    text-transform: uppercase;
    margin: 1.2rem 0 0.5rem;
    display: flex;
    align-items: center;
    gap: 0.6rem;
}
.section-label::after {
    content: '';
    flex: 1;
    height: 1px;
    background: var(--border);
}

/* ── Feedback panel ── */
#feedback-panel {
    border: 1px solid var(--border) !important;
    border-radius: var(--radius) !important;
    padding: 1rem !important;
    margin-top: 1rem !important;
}

/* ════════════════════════════════════════
   RESPONSIVE BREAKPOINTS
   ════════════════════════════════════════ */

/* Tablet / large phone — stack the two columns */
@media (max-width: 768px) {
    /* Gradio Row becomes a single column */
    .gr-row {
        flex-direction: column !important;
        gap: 0 !important;
    }
    .gr-column {
        width: 100% !important;
        min-width: 0 !important;
        flex: none !important;
    }

    /* Give textareas a comfortable height on phone */
    textarea {
        min-height: 100px !important;
    }

    /* Feedback radio stack vertically */
    .gr-radio-group {
        flex-direction: column !important;
    }
}

/* Small phones */
@media (max-width: 480px) {
    .gradio-container {
        padding: 0 0.5rem !important;
    }
    #header {
        padding: 1.25rem 0.5rem 0.75rem;
    }
    textarea {
        min-height: 90px !important;
        font-size: 1rem !important; /* prevents iOS zoom */
    }
    /* Make slider label wrap gracefully */
    .gr-form label span {
        white-space: normal !important;
    }
}
"""

# ==============================================
#  GRADIO INTERFACE
# ==============================================
with gr.Blocks(title="SlangGPT", css=CSS, theme=gr.themes.Base()) as demo:

    gr.HTML("""
    <div id="header">
        <h1>SlangGPT</h1>
        <p>Egyptian Arabic dialect → Modern Standard Arabic (MSA)<br>اللهجة المصرية ← الفصحى</p>
    </div>
    """)

    with gr.Row(equal_height=True):
        with gr.Column(scale=1):
            gr.HTML('<div class="section-label">Egyptian Arabic Input · اكتب بالمصري</div>')
            egyptian_input = gr.Textbox(
                show_label=False,
                placeholder="اكتب هنا باللهجة المصرية…",
                lines=4,
                rtl=True,
            )
            translate_btn = gr.Button(
                "Translate · ترجم  →",
                variant="primary",
                elem_id="translate-btn",
            )

        with gr.Column(scale=1):
            gr.HTML('<div class="section-label">MSA Translation · الترجمة بالفصحى</div>')
            msa_output = gr.Textbox(
                show_label=False,
                lines=4,
                interactive=False,
                placeholder="ستظهر الترجمة هنا…",
                rtl=True,
                elem_id="output-box",
            )

    gr.HTML('<div class="section-label">Try an example · جرّب مثال</div>')
    gr.Examples(
        examples=[
            ["إنت رايح فين؟"],
            ["عايز اكل حاجة حلوة"],
            ["انا تعبان قوي النهارده"],
            ["الأكل ده كان تحفة"],
            ["ممكن تساعدني؟"],
        ],
        inputs=egyptian_input,
        label="",
    )

    with gr.Group(visible=False, elem_id="feedback-panel") as feedback_group:
        gr.HTML('<div class="section-label">Rate this translation · قيّم الترجمة</div>')

        with gr.Row():
            correct_radio = gr.Radio(
                choices=["✅  Correct", "❌  Incorrect"],
                value="✅  Correct",
                label="Is the translation correct? · هل الترجمة صحيحة؟",
                scale=1,
            )
            rating_slider = gr.Slider(
                minimum=0, maximum=5, step=1,
                value=None,
                label="Quality · الجودة  (0 = غير مفيدة · 5 = ممتازة)  — required · مطلوب",
                scale=2,
            )

        correction_textbox = gr.Textbox(
            label="Better MSA translation · ترجمة أفضل — required if incorrect or rating ≤ 2 · مطلوب إذا كانت خاطئة أو التقييم ≤ 2",
            lines=2,
            visible=True,
            placeholder="الترجمة الصحيحة هنا…",
            rtl=True,
        )

        submit_feedback = gr.Button("Submit Feedback · أرسل التقييم", elem_id="submit-btn")
        feedback_status = gr.Textbox(
            show_label=False,
            interactive=False,
            elem_id="status-box",
            lines=1,
        )

    latest_translation = gr.State("")

    translate_btn.click(
        fn=translate_to_msa,
        inputs=egyptian_input,
        outputs=msa_output,
    ).then(
        lambda out: (gr.update(visible=True), out),
        inputs=msa_output,
        outputs=[feedback_group, latest_translation],
    )

    submit_feedback.click(
        fn=save_feedback,
        inputs=[egyptian_input, latest_translation, correct_radio, rating_slider, correction_textbox],
        outputs=feedback_status,
    ).then(
        fn=reset_feedback_ui,
        outputs=[feedback_group, correction_textbox, correct_radio, rating_slider, feedback_status],
    )

if __name__ == "__main__":
    demo.launch()