Spaces:
Running
Running
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import torch | |
| import logging | |
| from huggingface_hub import login | |
| from datetime import datetime | |
| import pandas as pd | |
| from datasets import Dataset, load_dataset, Features, Value | |
| import os | |
| logging.basicConfig(level=logging.INFO) | |
| # ============================================== | |
| # CONFIGURATION | |
| # ============================================== | |
| HF_DATASET_NAME = "AdhamAshraf/slanggpt-feedback-dataset" | |
| MODEL_NAME = "AdhamAshraf/SlangGPT" | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| if not HF_TOKEN: | |
| raise RuntimeError( | |
| "HF_TOKEN environment variable not set. " | |
| "Please add a secret named 'HF_TOKEN' with your Hugging Face write token." | |
| ) | |
| login(token=HF_TOKEN) | |
| print("✅ Logged in to Hugging Face Hub") | |
| # ============================================== | |
| # EXPLICIT SCHEMA — prevents column-mismatch errors | |
| # ============================================== | |
| FEEDBACK_FEATURES = Features({ | |
| "egyptian_arabic": Value("string"), | |
| "generated_msa": Value("string"), | |
| "user_label": Value("string"), | |
| "user_rating": Value("int64"), | |
| "corrected_msa": Value("string"), | |
| "timestamp": Value("string"), | |
| }) | |
| # ============================================== | |
| # LOAD GENERATION MODEL | |
| # ============================================== | |
| print("Loading tokenizer...") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| tokenizer.padding_side = "left" | |
| print("Loading model...") | |
| dtype = torch.float16 if torch.cuda.is_available() else torch.float32 | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| torch_dtype=dtype, | |
| device_map="auto", | |
| low_cpu_mem_usage=True, | |
| ) | |
| model.eval() | |
| print("✅ Model loaded successfully") | |
| # ============================================== | |
| # CACHE FEEDBACK DATASET AT STARTUP | |
| # ============================================== | |
| print("Loading feedback dataset...") | |
| try: | |
| _feedback_df = load_dataset(HF_DATASET_NAME, split="train").to_pandas() | |
| if "corrected_msa" not in _feedback_df.columns: | |
| _feedback_df["corrected_msa"] = "" | |
| for col in ["egyptian_arabic", "generated_msa", "user_label", "corrected_msa", "timestamp"]: | |
| _feedback_df[col] = _feedback_df[col].fillna("").astype(str) | |
| _feedback_df["user_rating"] = _feedback_df["user_rating"].fillna(-1).astype("int64") | |
| print(f"✅ Feedback dataset loaded ({len(_feedback_df)} existing rows)") | |
| except Exception as e: | |
| print(f"⚠️ No existing feedback dataset — starting fresh ({e})") | |
| _feedback_df = pd.DataFrame(columns=list(FEEDBACK_FEATURES.keys())) | |
| # ============================================== | |
| # TRANSLATION | |
| # ============================================== | |
| def translate_to_msa(egyptian_text): | |
| if not egyptian_text or not egyptian_text.strip(): | |
| return "Please enter an Egyptian Arabic phrase." | |
| prompt = f"dialect: {egyptian_text.strip()} ↔ msa:" | |
| inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=64) | |
| inputs = {k: v.to(model.device) for k, v in inputs.items()} | |
| with torch.inference_mode(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=48, | |
| do_sample=False, | |
| repetition_penalty=1.3, | |
| pad_token_id=tokenizer.pad_token_id, | |
| eos_token_id=tokenizer.eos_token_id, | |
| ) | |
| full_output = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| result = full_output.split("msa:")[-1].strip() if "msa:" in full_output else full_output | |
| result = result.split("↔")[0].strip() | |
| return result if result else "[No translation generated.]" | |
| # ============================================== | |
| # FEEDBACK SAVING | |
| # ============================================== | |
| def save_feedback(egyptian_input, generated_output, correct_incorrect, rating, corrected_translation): | |
| global _feedback_df | |
| if not generated_output or not generated_output.strip() or generated_output.startswith("["): | |
| return "⚠️ No valid translation to rate. Please translate something first." | |
| if rating is None: | |
| return "⚠️ Please provide a quality rating before submitting." | |
| rating = int(rating) | |
| is_incorrect = correct_incorrect == "❌ Incorrect" | |
| low_rating = rating <= 2 | |
| needs_correction = is_incorrect or low_rating | |
| has_correction = bool(corrected_translation and corrected_translation.strip()) | |
| if needs_correction and not has_correction: | |
| if is_incorrect: | |
| return "⚠️ Please provide the correct MSA translation before submitting." | |
| else: | |
| return f"⚠️ Rating of {rating}/5 is low — please provide a better translation before submitting." | |
| new_row = { | |
| "egyptian_arabic": egyptian_input.strip(), | |
| "generated_msa": generated_output.strip(), | |
| "user_label": "incorrect" if is_incorrect else "correct", | |
| "user_rating": rating, | |
| "corrected_msa": corrected_translation.strip() if needs_correction and has_correction else "", | |
| "timestamp": datetime.utcnow().isoformat() + "Z", | |
| } | |
| _feedback_df = pd.concat([_feedback_df, pd.DataFrame([new_row])], ignore_index=True) | |
| _feedback_df["user_rating"] = _feedback_df["user_rating"].fillna(-1).astype("int64") | |
| try: | |
| Dataset.from_pandas(_feedback_df, features=FEEDBACK_FEATURES).push_to_hub( | |
| HF_DATASET_NAME, split="train", private=False | |
| ) | |
| return "✅ Feedback recorded — شكراً!" | |
| except Exception as e: | |
| _feedback_df = _feedback_df.iloc[:-1].reset_index(drop=True) | |
| return f"⚠️ Could not save feedback: {str(e)}" | |
| # ============================================== | |
| # RESET UI | |
| # ============================================== | |
| def reset_feedback_ui(): | |
| return ( | |
| gr.update(visible=False), | |
| gr.update(value=""), | |
| gr.update(value="✅ Correct"), | |
| gr.update(value=None), | |
| gr.update(value=""), | |
| ) | |
| # ============================================== | |
| # MOBILE-RESPONSIVE CSS | |
| # ============================================== | |
| CSS = """ | |
| @import url('https://fonts.googleapis.com/css2?family=Noto+Naskh+Arabic:wght@400;600&family=DM+Mono:wght@400;500&family=DM+Sans:wght@300;400;500;600&display=swap'); | |
| :root { | |
| --bg: #0f1117; | |
| --surface: #181c27; | |
| --border: #2a2f3d; | |
| --accent: #4f8ef7; | |
| --accent2: #a78bfa; | |
| --text: #e8eaf0; | |
| --muted: #6b7280; | |
| --success: #34d399; | |
| --warn: #f87171; | |
| --radius: 12px; | |
| --mono: 'DM Mono', monospace; | |
| --sans: 'DM Sans', sans-serif; | |
| --arabic: 'Noto Naskh Arabic', serif; | |
| } | |
| /* ── Base ── */ | |
| body, .gradio-container { | |
| background: var(--bg) !important; | |
| font-family: var(--sans) !important; | |
| color: var(--text) !important; | |
| /* prevent horizontal overflow on mobile */ | |
| overflow-x: hidden !important; | |
| } | |
| /* ── Header ── */ | |
| #header { | |
| text-align: center; | |
| padding: 2rem 1rem 1rem; | |
| border-bottom: 1px solid var(--border); | |
| margin-bottom: 1.5rem; | |
| } | |
| #header h1 { | |
| font-family: var(--mono); | |
| font-size: clamp(1.4rem, 5vw, 2rem); | |
| letter-spacing: -0.02em; | |
| background: linear-gradient(135deg, var(--accent), var(--accent2)); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| margin: 0 0 0.25rem; | |
| } | |
| #header p { | |
| color: var(--muted); | |
| font-size: clamp(0.75rem, 2.5vw, 0.9rem); | |
| margin: 0; | |
| line-height: 1.5; | |
| } | |
| /* ── Inputs ── */ | |
| textarea, input[type=text] { | |
| background: #0f1117 !important; | |
| border: 1px solid var(--border) !important; | |
| border-radius: 8px !important; | |
| color: var(--text) !important; | |
| font-family: var(--arabic) !important; | |
| font-size: clamp(1rem, 3.5vw, 1.1rem) !important; | |
| line-height: 1.7 !important; | |
| direction: rtl; | |
| transition: border-color 0.2s; | |
| /* Prevent zoom on focus in iOS (font-size must be >= 16px equivalent) */ | |
| touch-action: manipulation; | |
| } | |
| textarea:focus, input[type=text]:focus { | |
| border-color: var(--accent) !important; | |
| outline: none !important; | |
| box-shadow: 0 0 0 3px rgba(79,142,247,0.15) !important; | |
| } | |
| #output-box textarea { | |
| background: #13161f !important; | |
| color: #a5f3c0 !important; | |
| font-size: clamp(1rem, 3.5vw, 1.15rem) !important; | |
| } | |
| /* ── Buttons ── */ | |
| button.primary, #translate-btn { | |
| background: linear-gradient(135deg, var(--accent), var(--accent2)) !important; | |
| border: none !important; | |
| border-radius: 8px !important; | |
| color: #fff !important; | |
| font-family: var(--sans) !important; | |
| font-weight: 600 !important; | |
| font-size: clamp(0.85rem, 3vw, 0.95rem) !important; | |
| /* taller tap target on mobile */ | |
| padding: 0.75rem 1.4rem !important; | |
| min-height: 48px !important; | |
| width: 100% !important; | |
| cursor: pointer !important; | |
| transition: opacity 0.15s, transform 0.1s !important; | |
| touch-action: manipulation; | |
| } | |
| button.primary:hover, #translate-btn:hover { | |
| opacity: 0.88 !important; | |
| transform: translateY(-1px) !important; | |
| } | |
| #submit-btn { | |
| background: var(--surface) !important; | |
| border: 1px solid var(--accent) !important; | |
| border-radius: 8px !important; | |
| color: var(--accent) !important; | |
| font-family: var(--sans) !important; | |
| font-weight: 500 !important; | |
| min-height: 48px !important; | |
| width: 100% !important; | |
| transition: background 0.15s !important; | |
| touch-action: manipulation; | |
| } | |
| #submit-btn:hover { | |
| background: rgba(79,142,247,0.1) !important; | |
| } | |
| /* ── Radio & Slider ── */ | |
| .gr-radio-item label { | |
| color: var(--text) !important; | |
| font-family: var(--sans) !important; | |
| /* larger touch target */ | |
| padding: 0.4rem 0 !important; | |
| min-height: 44px !important; | |
| display: flex !important; | |
| align-items: center !important; | |
| } | |
| .gr-radio-item input[type=radio] { | |
| width: 20px !important; | |
| height: 20px !important; | |
| } | |
| input[type=range] { | |
| accent-color: var(--accent) !important; | |
| height: 6px !important; | |
| /* taller hit area */ | |
| padding: 12px 0 !important; | |
| cursor: pointer; | |
| touch-action: manipulation; | |
| } | |
| /* ── Labels ── */ | |
| label span, .gr-form label { | |
| color: var(--muted) !important; | |
| font-family: var(--sans) !important; | |
| font-size: clamp(0.72rem, 2vw, 0.82rem) !important; | |
| text-transform: uppercase !important; | |
| letter-spacing: 0.06em !important; | |
| } | |
| /* ── Status ── */ | |
| #status-box textarea { | |
| background: transparent !important; | |
| border: none !important; | |
| color: var(--success) !important; | |
| font-family: var(--mono) !important; | |
| font-size: clamp(0.8rem, 2.5vw, 0.9rem) !important; | |
| text-align: center; | |
| } | |
| /* ── Examples table ── */ | |
| .gr-samples-table td { | |
| font-family: var(--arabic) !important; | |
| font-size: clamp(0.9rem, 3vw, 1rem) !important; | |
| direction: rtl; | |
| color: var(--text) !important; | |
| /* comfortable row height on mobile */ | |
| padding: 0.6rem 0.75rem !important; | |
| } | |
| .gr-samples-table tr:hover td { | |
| background: rgba(79,142,247,0.07) !important; | |
| cursor: pointer; | |
| } | |
| /* ── Section labels ── */ | |
| .section-label { | |
| font-family: var(--mono); | |
| font-size: clamp(0.68rem, 2vw, 0.75rem); | |
| letter-spacing: 0.1em; | |
| color: var(--muted); | |
| text-transform: uppercase; | |
| margin: 1.2rem 0 0.5rem; | |
| display: flex; | |
| align-items: center; | |
| gap: 0.6rem; | |
| } | |
| .section-label::after { | |
| content: ''; | |
| flex: 1; | |
| height: 1px; | |
| background: var(--border); | |
| } | |
| /* ── Feedback panel ── */ | |
| #feedback-panel { | |
| border: 1px solid var(--border) !important; | |
| border-radius: var(--radius) !important; | |
| padding: 1rem !important; | |
| margin-top: 1rem !important; | |
| } | |
| /* ════════════════════════════════════════ | |
| RESPONSIVE BREAKPOINTS | |
| ════════════════════════════════════════ */ | |
| /* Tablet / large phone — stack the two columns */ | |
| @media (max-width: 768px) { | |
| /* Gradio Row becomes a single column */ | |
| .gr-row { | |
| flex-direction: column !important; | |
| gap: 0 !important; | |
| } | |
| .gr-column { | |
| width: 100% !important; | |
| min-width: 0 !important; | |
| flex: none !important; | |
| } | |
| /* Give textareas a comfortable height on phone */ | |
| textarea { | |
| min-height: 100px !important; | |
| } | |
| /* Feedback radio stack vertically */ | |
| .gr-radio-group { | |
| flex-direction: column !important; | |
| } | |
| } | |
| /* Small phones */ | |
| @media (max-width: 480px) { | |
| .gradio-container { | |
| padding: 0 0.5rem !important; | |
| } | |
| #header { | |
| padding: 1.25rem 0.5rem 0.75rem; | |
| } | |
| textarea { | |
| min-height: 90px !important; | |
| font-size: 1rem !important; /* prevents iOS zoom */ | |
| } | |
| /* Make slider label wrap gracefully */ | |
| .gr-form label span { | |
| white-space: normal !important; | |
| } | |
| } | |
| """ | |
| # ============================================== | |
| # GRADIO INTERFACE | |
| # ============================================== | |
| with gr.Blocks(title="SlangGPT", css=CSS, theme=gr.themes.Base()) as demo: | |
| gr.HTML(""" | |
| <div id="header"> | |
| <h1>SlangGPT</h1> | |
| <p>Egyptian Arabic dialect → Modern Standard Arabic (MSA)<br>اللهجة المصرية ← الفصحى</p> | |
| </div> | |
| """) | |
| with gr.Row(equal_height=True): | |
| with gr.Column(scale=1): | |
| gr.HTML('<div class="section-label">Egyptian Arabic Input · اكتب بالمصري</div>') | |
| egyptian_input = gr.Textbox( | |
| show_label=False, | |
| placeholder="اكتب هنا باللهجة المصرية…", | |
| lines=4, | |
| rtl=True, | |
| ) | |
| translate_btn = gr.Button( | |
| "Translate · ترجم →", | |
| variant="primary", | |
| elem_id="translate-btn", | |
| ) | |
| with gr.Column(scale=1): | |
| gr.HTML('<div class="section-label">MSA Translation · الترجمة بالفصحى</div>') | |
| msa_output = gr.Textbox( | |
| show_label=False, | |
| lines=4, | |
| interactive=False, | |
| placeholder="ستظهر الترجمة هنا…", | |
| rtl=True, | |
| elem_id="output-box", | |
| ) | |
| gr.HTML('<div class="section-label">Try an example · جرّب مثال</div>') | |
| gr.Examples( | |
| examples=[ | |
| ["إنت رايح فين؟"], | |
| ["عايز اكل حاجة حلوة"], | |
| ["انا تعبان قوي النهارده"], | |
| ["الأكل ده كان تحفة"], | |
| ["ممكن تساعدني؟"], | |
| ], | |
| inputs=egyptian_input, | |
| label="", | |
| ) | |
| with gr.Group(visible=False, elem_id="feedback-panel") as feedback_group: | |
| gr.HTML('<div class="section-label">Rate this translation · قيّم الترجمة</div>') | |
| with gr.Row(): | |
| correct_radio = gr.Radio( | |
| choices=["✅ Correct", "❌ Incorrect"], | |
| value="✅ Correct", | |
| label="Is the translation correct? · هل الترجمة صحيحة؟", | |
| scale=1, | |
| ) | |
| rating_slider = gr.Slider( | |
| minimum=0, maximum=5, step=1, | |
| value=None, | |
| label="Quality · الجودة (0 = غير مفيدة · 5 = ممتازة) — required · مطلوب", | |
| scale=2, | |
| ) | |
| correction_textbox = gr.Textbox( | |
| label="Better MSA translation · ترجمة أفضل — required if incorrect or rating ≤ 2 · مطلوب إذا كانت خاطئة أو التقييم ≤ 2", | |
| lines=2, | |
| visible=True, | |
| placeholder="الترجمة الصحيحة هنا…", | |
| rtl=True, | |
| ) | |
| submit_feedback = gr.Button("Submit Feedback · أرسل التقييم", elem_id="submit-btn") | |
| feedback_status = gr.Textbox( | |
| show_label=False, | |
| interactive=False, | |
| elem_id="status-box", | |
| lines=1, | |
| ) | |
| latest_translation = gr.State("") | |
| translate_btn.click( | |
| fn=translate_to_msa, | |
| inputs=egyptian_input, | |
| outputs=msa_output, | |
| ).then( | |
| lambda out: (gr.update(visible=True), out), | |
| inputs=msa_output, | |
| outputs=[feedback_group, latest_translation], | |
| ) | |
| submit_feedback.click( | |
| fn=save_feedback, | |
| inputs=[egyptian_input, latest_translation, correct_radio, rating_slider, correction_textbox], | |
| outputs=feedback_status, | |
| ).then( | |
| fn=reset_feedback_ui, | |
| outputs=[feedback_group, correction_textbox, correct_radio, rating_slider, feedback_status], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |