import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch import logging from huggingface_hub import login from datetime import datetime import pandas as pd from datasets import Dataset, load_dataset, Features, Value import os logging.basicConfig(level=logging.INFO) # ============================================== # CONFIGURATION # ============================================== HF_DATASET_NAME = "AdhamAshraf/slanggpt-feedback-dataset" MODEL_NAME = "AdhamAshraf/SlangGPT" HF_TOKEN = os.getenv("HF_TOKEN") if not HF_TOKEN: raise RuntimeError( "HF_TOKEN environment variable not set. " "Please add a secret named 'HF_TOKEN' with your Hugging Face write token." ) login(token=HF_TOKEN) print("✅ Logged in to Hugging Face Hub") # ============================================== # EXPLICIT SCHEMA — prevents column-mismatch errors # ============================================== FEEDBACK_FEATURES = Features({ "egyptian_arabic": Value("string"), "generated_msa": Value("string"), "user_label": Value("string"), "user_rating": Value("int64"), "corrected_msa": Value("string"), "timestamp": Value("string"), }) # ============================================== # LOAD GENERATION MODEL # ============================================== print("Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = "left" print("Loading model...") dtype = torch.float16 if torch.cuda.is_available() else torch.float32 model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, torch_dtype=dtype, device_map="auto", low_cpu_mem_usage=True, ) model.eval() print("✅ Model loaded successfully") # ============================================== # CACHE FEEDBACK DATASET AT STARTUP # ============================================== print("Loading feedback dataset...") try: _feedback_df = load_dataset(HF_DATASET_NAME, split="train").to_pandas() if "corrected_msa" not in _feedback_df.columns: _feedback_df["corrected_msa"] = "" for col in ["egyptian_arabic", "generated_msa", "user_label", "corrected_msa", "timestamp"]: _feedback_df[col] = _feedback_df[col].fillna("").astype(str) _feedback_df["user_rating"] = _feedback_df["user_rating"].fillna(-1).astype("int64") print(f"✅ Feedback dataset loaded ({len(_feedback_df)} existing rows)") except Exception as e: print(f"⚠️ No existing feedback dataset — starting fresh ({e})") _feedback_df = pd.DataFrame(columns=list(FEEDBACK_FEATURES.keys())) # ============================================== # TRANSLATION # ============================================== def translate_to_msa(egyptian_text): if not egyptian_text or not egyptian_text.strip(): return "Please enter an Egyptian Arabic phrase." prompt = f"dialect: {egyptian_text.strip()} ↔ msa:" inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=64) inputs = {k: v.to(model.device) for k, v in inputs.items()} with torch.inference_mode(): outputs = model.generate( **inputs, max_new_tokens=48, do_sample=False, repetition_penalty=1.3, pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id, ) full_output = tokenizer.decode(outputs[0], skip_special_tokens=True) result = full_output.split("msa:")[-1].strip() if "msa:" in full_output else full_output result = result.split("↔")[0].strip() return result if result else "[No translation generated.]" # ============================================== # FEEDBACK SAVING # ============================================== def save_feedback(egyptian_input, generated_output, correct_incorrect, rating, corrected_translation): global _feedback_df if not generated_output or not generated_output.strip() or generated_output.startswith("["): return "⚠️ No valid translation to rate. Please translate something first." if rating is None: return "⚠️ Please provide a quality rating before submitting." rating = int(rating) is_incorrect = correct_incorrect == "❌ Incorrect" low_rating = rating <= 2 needs_correction = is_incorrect or low_rating has_correction = bool(corrected_translation and corrected_translation.strip()) if needs_correction and not has_correction: if is_incorrect: return "⚠️ Please provide the correct MSA translation before submitting." else: return f"⚠️ Rating of {rating}/5 is low — please provide a better translation before submitting." new_row = { "egyptian_arabic": egyptian_input.strip(), "generated_msa": generated_output.strip(), "user_label": "incorrect" if is_incorrect else "correct", "user_rating": rating, "corrected_msa": corrected_translation.strip() if needs_correction and has_correction else "", "timestamp": datetime.utcnow().isoformat() + "Z", } _feedback_df = pd.concat([_feedback_df, pd.DataFrame([new_row])], ignore_index=True) _feedback_df["user_rating"] = _feedback_df["user_rating"].fillna(-1).astype("int64") try: Dataset.from_pandas(_feedback_df, features=FEEDBACK_FEATURES).push_to_hub( HF_DATASET_NAME, split="train", private=False ) return "✅ Feedback recorded — شكراً!" except Exception as e: _feedback_df = _feedback_df.iloc[:-1].reset_index(drop=True) return f"⚠️ Could not save feedback: {str(e)}" # ============================================== # RESET UI # ============================================== def reset_feedback_ui(): return ( gr.update(visible=False), gr.update(value=""), gr.update(value="✅ Correct"), gr.update(value=None), gr.update(value=""), ) # ============================================== # MOBILE-RESPONSIVE CSS # ============================================== CSS = """ @import url('https://fonts.googleapis.com/css2?family=Noto+Naskh+Arabic:wght@400;600&family=DM+Mono:wght@400;500&family=DM+Sans:wght@300;400;500;600&display=swap'); :root { --bg: #0f1117; --surface: #181c27; --border: #2a2f3d; --accent: #4f8ef7; --accent2: #a78bfa; --text: #e8eaf0; --muted: #6b7280; --success: #34d399; --warn: #f87171; --radius: 12px; --mono: 'DM Mono', monospace; --sans: 'DM Sans', sans-serif; --arabic: 'Noto Naskh Arabic', serif; } /* ── Base ── */ body, .gradio-container { background: var(--bg) !important; font-family: var(--sans) !important; color: var(--text) !important; /* prevent horizontal overflow on mobile */ overflow-x: hidden !important; } /* ── Header ── */ #header { text-align: center; padding: 2rem 1rem 1rem; border-bottom: 1px solid var(--border); margin-bottom: 1.5rem; } #header h1 { font-family: var(--mono); font-size: clamp(1.4rem, 5vw, 2rem); letter-spacing: -0.02em; background: linear-gradient(135deg, var(--accent), var(--accent2)); -webkit-background-clip: text; -webkit-text-fill-color: transparent; margin: 0 0 0.25rem; } #header p { color: var(--muted); font-size: clamp(0.75rem, 2.5vw, 0.9rem); margin: 0; line-height: 1.5; } /* ── Inputs ── */ textarea, input[type=text] { background: #0f1117 !important; border: 1px solid var(--border) !important; border-radius: 8px !important; color: var(--text) !important; font-family: var(--arabic) !important; font-size: clamp(1rem, 3.5vw, 1.1rem) !important; line-height: 1.7 !important; direction: rtl; transition: border-color 0.2s; /* Prevent zoom on focus in iOS (font-size must be >= 16px equivalent) */ touch-action: manipulation; } textarea:focus, input[type=text]:focus { border-color: var(--accent) !important; outline: none !important; box-shadow: 0 0 0 3px rgba(79,142,247,0.15) !important; } #output-box textarea { background: #13161f !important; color: #a5f3c0 !important; font-size: clamp(1rem, 3.5vw, 1.15rem) !important; } /* ── Buttons ── */ button.primary, #translate-btn { background: linear-gradient(135deg, var(--accent), var(--accent2)) !important; border: none !important; border-radius: 8px !important; color: #fff !important; font-family: var(--sans) !important; font-weight: 600 !important; font-size: clamp(0.85rem, 3vw, 0.95rem) !important; /* taller tap target on mobile */ padding: 0.75rem 1.4rem !important; min-height: 48px !important; width: 100% !important; cursor: pointer !important; transition: opacity 0.15s, transform 0.1s !important; touch-action: manipulation; } button.primary:hover, #translate-btn:hover { opacity: 0.88 !important; transform: translateY(-1px) !important; } #submit-btn { background: var(--surface) !important; border: 1px solid var(--accent) !important; border-radius: 8px !important; color: var(--accent) !important; font-family: var(--sans) !important; font-weight: 500 !important; min-height: 48px !important; width: 100% !important; transition: background 0.15s !important; touch-action: manipulation; } #submit-btn:hover { background: rgba(79,142,247,0.1) !important; } /* ── Radio & Slider ── */ .gr-radio-item label { color: var(--text) !important; font-family: var(--sans) !important; /* larger touch target */ padding: 0.4rem 0 !important; min-height: 44px !important; display: flex !important; align-items: center !important; } .gr-radio-item input[type=radio] { width: 20px !important; height: 20px !important; } input[type=range] { accent-color: var(--accent) !important; height: 6px !important; /* taller hit area */ padding: 12px 0 !important; cursor: pointer; touch-action: manipulation; } /* ── Labels ── */ label span, .gr-form label { color: var(--muted) !important; font-family: var(--sans) !important; font-size: clamp(0.72rem, 2vw, 0.82rem) !important; text-transform: uppercase !important; letter-spacing: 0.06em !important; } /* ── Status ── */ #status-box textarea { background: transparent !important; border: none !important; color: var(--success) !important; font-family: var(--mono) !important; font-size: clamp(0.8rem, 2.5vw, 0.9rem) !important; text-align: center; } /* ── Examples table ── */ .gr-samples-table td { font-family: var(--arabic) !important; font-size: clamp(0.9rem, 3vw, 1rem) !important; direction: rtl; color: var(--text) !important; /* comfortable row height on mobile */ padding: 0.6rem 0.75rem !important; } .gr-samples-table tr:hover td { background: rgba(79,142,247,0.07) !important; cursor: pointer; } /* ── Section labels ── */ .section-label { font-family: var(--mono); font-size: clamp(0.68rem, 2vw, 0.75rem); letter-spacing: 0.1em; color: var(--muted); text-transform: uppercase; margin: 1.2rem 0 0.5rem; display: flex; align-items: center; gap: 0.6rem; } .section-label::after { content: ''; flex: 1; height: 1px; background: var(--border); } /* ── Feedback panel ── */ #feedback-panel { border: 1px solid var(--border) !important; border-radius: var(--radius) !important; padding: 1rem !important; margin-top: 1rem !important; } /* ════════════════════════════════════════ RESPONSIVE BREAKPOINTS ════════════════════════════════════════ */ /* Tablet / large phone — stack the two columns */ @media (max-width: 768px) { /* Gradio Row becomes a single column */ .gr-row { flex-direction: column !important; gap: 0 !important; } .gr-column { width: 100% !important; min-width: 0 !important; flex: none !important; } /* Give textareas a comfortable height on phone */ textarea { min-height: 100px !important; } /* Feedback radio stack vertically */ .gr-radio-group { flex-direction: column !important; } } /* Small phones */ @media (max-width: 480px) { .gradio-container { padding: 0 0.5rem !important; } #header { padding: 1.25rem 0.5rem 0.75rem; } textarea { min-height: 90px !important; font-size: 1rem !important; /* prevents iOS zoom */ } /* Make slider label wrap gracefully */ .gr-form label span { white-space: normal !important; } } """ # ============================================== # GRADIO INTERFACE # ============================================== with gr.Blocks(title="SlangGPT", css=CSS, theme=gr.themes.Base()) as demo: gr.HTML("""
Egyptian Arabic dialect → Modern Standard Arabic (MSA)
اللهجة المصرية ← الفصحى