Spaces:

AdhamAshraf
/

SlangGPT

Running

App Files Files Community

SlangGPT / app.py

AdhamAshraf

make it mobile compatible

67510b3 verified 3 days ago

raw

history blame contribute delete

16.9 kB

	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch
	import logging
	from huggingface_hub import login
	from datetime import datetime
	import pandas as pd
	from datasets import Dataset, load_dataset, Features, Value
	import os

	logging.basicConfig(level=logging.INFO)

	# ==============================================
	# CONFIGURATION
	# ==============================================
	HF_DATASET_NAME = "AdhamAshraf/slanggpt-feedback-dataset"
	MODEL_NAME = "AdhamAshraf/SlangGPT"
	HF_TOKEN = os.getenv("HF_TOKEN")

	if not HF_TOKEN:
	raise RuntimeError(
	"HF_TOKEN environment variable not set. "
	"Please add a secret named 'HF_TOKEN' with your Hugging Face write token."
	)

	login(token=HF_TOKEN)
	print("✅ Logged in to Hugging Face Hub")

	# ==============================================
	# EXPLICIT SCHEMA — prevents column-mismatch errors
	# ==============================================
	FEEDBACK_FEATURES = Features({
	"egyptian_arabic": Value("string"),
	"generated_msa": Value("string"),
	"user_label": Value("string"),
	"user_rating": Value("int64"),
	"corrected_msa": Value("string"),
	"timestamp": Value("string"),
	})

	# ==============================================
	# LOAD GENERATION MODEL
	# ==============================================
	print("Loading tokenizer...")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token
	tokenizer.padding_side = "left"

	print("Loading model...")
	dtype = torch.float16 if torch.cuda.is_available() else torch.float32

	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	torch_dtype=dtype,
	device_map="auto",
	low_cpu_mem_usage=True,
	)
	model.eval()
	print("✅ Model loaded successfully")

	# ==============================================
	# CACHE FEEDBACK DATASET AT STARTUP
	# ==============================================
	print("Loading feedback dataset...")
	try:
	_feedback_df = load_dataset(HF_DATASET_NAME, split="train").to_pandas()
	if "corrected_msa" not in _feedback_df.columns:
	_feedback_df["corrected_msa"] = ""
	for col in ["egyptian_arabic", "generated_msa", "user_label", "corrected_msa", "timestamp"]:
	_feedback_df[col] = _feedback_df[col].fillna("").astype(str)
	_feedback_df["user_rating"] = _feedback_df["user_rating"].fillna(-1).astype("int64")
	print(f"✅ Feedback dataset loaded ({len(_feedback_df)} existing rows)")
	except Exception as e:
	print(f"⚠️ No existing feedback dataset — starting fresh ({e})")
	_feedback_df = pd.DataFrame(columns=list(FEEDBACK_FEATURES.keys()))

	# ==============================================
	# TRANSLATION
	# ==============================================
	def translate_to_msa(egyptian_text):
	if not egyptian_text or not egyptian_text.strip():
	return "Please enter an Egyptian Arabic phrase."

	prompt = f"dialect: {egyptian_text.strip()} ↔ msa:"
	inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=64)
	inputs = {k: v.to(model.device) for k, v in inputs.items()}

	with torch.inference_mode():
	outputs = model.generate(
	**inputs,
	max_new_tokens=48,
	do_sample=False,
	repetition_penalty=1.3,
	pad_token_id=tokenizer.pad_token_id,
	eos_token_id=tokenizer.eos_token_id,
	)

	full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
	result = full_output.split("msa:")[-1].strip() if "msa:" in full_output else full_output
	result = result.split("↔")[0].strip()
	return result if result else "[No translation generated.]"

	# ==============================================
	# FEEDBACK SAVING
	# ==============================================
	def save_feedback(egyptian_input, generated_output, correct_incorrect, rating, corrected_translation):
	global _feedback_df

	if not generated_output or not generated_output.strip() or generated_output.startswith("["):
	return "⚠️ No valid translation to rate. Please translate something first."

	if rating is None:
	return "⚠️ Please provide a quality rating before submitting."

	rating = int(rating)
	is_incorrect = correct_incorrect == "❌ Incorrect"
	low_rating = rating <= 2
	needs_correction = is_incorrect or low_rating
	has_correction = bool(corrected_translation and corrected_translation.strip())

	if needs_correction and not has_correction:
	if is_incorrect:
	return "⚠️ Please provide the correct MSA translation before submitting."
	else:
	return f"⚠️ Rating of {rating}/5 is low — please provide a better translation before submitting."

	new_row = {
	"egyptian_arabic": egyptian_input.strip(),
	"generated_msa": generated_output.strip(),
	"user_label": "incorrect" if is_incorrect else "correct",
	"user_rating": rating,
	"corrected_msa": corrected_translation.strip() if needs_correction and has_correction else "",
	"timestamp": datetime.utcnow().isoformat() + "Z",
	}

	_feedback_df = pd.concat([_feedback_df, pd.DataFrame([new_row])], ignore_index=True)
	_feedback_df["user_rating"] = _feedback_df["user_rating"].fillna(-1).astype("int64")

	try:
	Dataset.from_pandas(_feedback_df, features=FEEDBACK_FEATURES).push_to_hub(
	HF_DATASET_NAME, split="train", private=False
	)
	return "✅ Feedback recorded — شكراً!"
	except Exception as e:
	_feedback_df = _feedback_df.iloc[:-1].reset_index(drop=True)
	return f"⚠️ Could not save feedback: {str(e)}"

	# ==============================================
	# RESET UI
	# ==============================================
	def reset_feedback_ui():
	return (
	gr.update(visible=False),
	gr.update(value=""),
	gr.update(value="✅ Correct"),
	gr.update(value=None),
	gr.update(value=""),
	)

	# ==============================================
	# MOBILE-RESPONSIVE CSS
	# ==============================================
	CSS = """
	@import url('https://fonts.googleapis.com/css2?family=Noto+Naskh+Arabic:wght@400;600&family=DM+Mono:wght@400;500&family=DM+Sans:wght@300;400;500;600&display=swap');

	:root {
	--bg: #0f1117;
	--surface: #181c27;
	--border: #2a2f3d;
	--accent: #4f8ef7;
	--accent2: #a78bfa;
	--text: #e8eaf0;
	--muted: #6b7280;
	--success: #34d399;
	--warn: #f87171;
	--radius: 12px;
	--mono: 'DM Mono', monospace;
	--sans: 'DM Sans', sans-serif;
	--arabic: 'Noto Naskh Arabic', serif;
	}

	/* ── Base ── */
	body, .gradio-container {
	background: var(--bg) !important;
	font-family: var(--sans) !important;
	color: var(--text) !important;
	/* prevent horizontal overflow on mobile */
	overflow-x: hidden !important;
	}

	/* ── Header ── */
	#header {
	text-align: center;
	padding: 2rem 1rem 1rem;
	border-bottom: 1px solid var(--border);
	margin-bottom: 1.5rem;
	}
	#header h1 {
	font-family: var(--mono);
	font-size: clamp(1.4rem, 5vw, 2rem);
	letter-spacing: -0.02em;
	background: linear-gradient(135deg, var(--accent), var(--accent2));
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	margin: 0 0 0.25rem;
	}
	#header p {
	color: var(--muted);
	font-size: clamp(0.75rem, 2.5vw, 0.9rem);
	margin: 0;
	line-height: 1.5;
	}

	/* ── Inputs ── */
	textarea, input[type=text] {
	background: #0f1117 !important;
	border: 1px solid var(--border) !important;
	border-radius: 8px !important;
	color: var(--text) !important;
	font-family: var(--arabic) !important;
	font-size: clamp(1rem, 3.5vw, 1.1rem) !important;
	line-height: 1.7 !important;
	direction: rtl;
	transition: border-color 0.2s;
	/* Prevent zoom on focus in iOS (font-size must be >= 16px equivalent) */
	touch-action: manipulation;
	}
	textarea:focus, input[type=text]:focus {
	border-color: var(--accent) !important;
	outline: none !important;
	box-shadow: 0 0 0 3px rgba(79,142,247,0.15) !important;
	}

	#output-box textarea {
	background: #13161f !important;
	color: #a5f3c0 !important;
	font-size: clamp(1rem, 3.5vw, 1.15rem) !important;
	}

	/* ── Buttons ── */
	button.primary, #translate-btn {
	background: linear-gradient(135deg, var(--accent), var(--accent2)) !important;
	border: none !important;
	border-radius: 8px !important;
	color: #fff !important;
	font-family: var(--sans) !important;
	font-weight: 600 !important;
	font-size: clamp(0.85rem, 3vw, 0.95rem) !important;
	/* taller tap target on mobile */
	padding: 0.75rem 1.4rem !important;
	min-height: 48px !important;
	width: 100% !important;
	cursor: pointer !important;
	transition: opacity 0.15s, transform 0.1s !important;
	touch-action: manipulation;
	}
	button.primary:hover, #translate-btn:hover {
	opacity: 0.88 !important;
	transform: translateY(-1px) !important;
	}

	#submit-btn {
	background: var(--surface) !important;
	border: 1px solid var(--accent) !important;
	border-radius: 8px !important;
	color: var(--accent) !important;
	font-family: var(--sans) !important;
	font-weight: 500 !important;
	min-height: 48px !important;
	width: 100% !important;
	transition: background 0.15s !important;
	touch-action: manipulation;
	}
	#submit-btn:hover {
	background: rgba(79,142,247,0.1) !important;
	}

	/* ── Radio & Slider ── */
	.gr-radio-item label {
	color: var(--text) !important;
	font-family: var(--sans) !important;
	/* larger touch target */
	padding: 0.4rem 0 !important;
	min-height: 44px !important;
	display: flex !important;
	align-items: center !important;
	}
	.gr-radio-item input[type=radio] {
	width: 20px !important;
	height: 20px !important;
	}

	input[type=range] {
	accent-color: var(--accent) !important;
	height: 6px !important;
	/* taller hit area */
	padding: 12px 0 !important;
	cursor: pointer;
	touch-action: manipulation;
	}

	/* ── Labels ── */
	label span, .gr-form label {
	color: var(--muted) !important;
	font-family: var(--sans) !important;
	font-size: clamp(0.72rem, 2vw, 0.82rem) !important;
	text-transform: uppercase !important;
	letter-spacing: 0.06em !important;
	}

	/* ── Status ── */
	#status-box textarea {
	background: transparent !important;
	border: none !important;
	color: var(--success) !important;
	font-family: var(--mono) !important;
	font-size: clamp(0.8rem, 2.5vw, 0.9rem) !important;
	text-align: center;
	}

	/* ── Examples table ── */
	.gr-samples-table td {
	font-family: var(--arabic) !important;
	font-size: clamp(0.9rem, 3vw, 1rem) !important;
	direction: rtl;
	color: var(--text) !important;
	/* comfortable row height on mobile */
	padding: 0.6rem 0.75rem !important;
	}
	.gr-samples-table tr:hover td {
	background: rgba(79,142,247,0.07) !important;
	cursor: pointer;
	}

	/* ── Section labels ── */
	.section-label {
	font-family: var(--mono);
	font-size: clamp(0.68rem, 2vw, 0.75rem);
	letter-spacing: 0.1em;
	color: var(--muted);
	text-transform: uppercase;
	margin: 1.2rem 0 0.5rem;
	display: flex;
	align-items: center;
	gap: 0.6rem;
	}
	.section-label::after {
	content: '';
	flex: 1;
	height: 1px;
	background: var(--border);
	}

	/* ── Feedback panel ── */
	#feedback-panel {
	border: 1px solid var(--border) !important;
	border-radius: var(--radius) !important;
	padding: 1rem !important;
	margin-top: 1rem !important;
	}

	/* ════════════════════════════════════════
	RESPONSIVE BREAKPOINTS
	════════════════════════════════════════ */

	/* Tablet / large phone — stack the two columns */
	@media (max-width: 768px) {
	/* Gradio Row becomes a single column */
	.gr-row {
	flex-direction: column !important;
	gap: 0 !important;
	}
	.gr-column {
	width: 100% !important;
	min-width: 0 !important;
	flex: none !important;
	}

	/* Give textareas a comfortable height on phone */
	textarea {
	min-height: 100px !important;
	}

	/* Feedback radio stack vertically */
	.gr-radio-group {
	flex-direction: column !important;
	}
	}

	/* Small phones */
	@media (max-width: 480px) {
	.gradio-container {
	padding: 0 0.5rem !important;
	}
	#header {
	padding: 1.25rem 0.5rem 0.75rem;
	}
	textarea {
	min-height: 90px !important;
	font-size: 1rem !important; /* prevents iOS zoom */
	}
	/* Make slider label wrap gracefully */
	.gr-form label span {
	white-space: normal !important;
	}
	}
	"""

	# ==============================================
	# GRADIO INTERFACE
	# ==============================================
	with gr.Blocks(title="SlangGPT", css=CSS, theme=gr.themes.Base()) as demo:

	gr.HTML("""
	<div id="header">
	<h1>SlangGPT</h1>
	<p>Egyptian Arabic dialect → Modern Standard Arabic (MSA)<br>اللهجة المصرية ← الفصحى</p>
	</div>
	""")

	with gr.Row(equal_height=True):
	with gr.Column(scale=1):
	gr.HTML('<div class="section-label">Egyptian Arabic Input · اكتب بالمصري</div>')
	egyptian_input = gr.Textbox(
	show_label=False,
	placeholder="اكتب هنا باللهجة المصرية…",
	lines=4,
	rtl=True,
	)
	translate_btn = gr.Button(
	"Translate · ترجم →",
	variant="primary",
	elem_id="translate-btn",
	)

	with gr.Column(scale=1):
	gr.HTML('<div class="section-label">MSA Translation · الترجمة بالفصحى</div>')
	msa_output = gr.Textbox(
	show_label=False,
	lines=4,
	interactive=False,
	placeholder="ستظهر الترجمة هنا…",
	rtl=True,
	elem_id="output-box",
	)

	gr.HTML('<div class="section-label">Try an example · جرّب مثال</div>')
	gr.Examples(
	examples=[
	["إنت رايح فين؟"],
	["عايز اكل حاجة حلوة"],
	["انا تعبان قوي النهارده"],
	["الأكل ده كان تحفة"],
	["ممكن تساعدني؟"],
	],
	inputs=egyptian_input,
	label="",
	)

	with gr.Group(visible=False, elem_id="feedback-panel") as feedback_group:
	gr.HTML('<div class="section-label">Rate this translation · قيّم الترجمة</div>')

	with gr.Row():
	correct_radio = gr.Radio(
	choices=["✅ Correct", "❌ Incorrect"],
	value="✅ Correct",
	label="Is the translation correct? · هل الترجمة صحيحة؟",
	scale=1,
	)
	rating_slider = gr.Slider(
	minimum=0, maximum=5, step=1,
	value=None,
	label="Quality · الجودة (0 = غير مفيدة · 5 = ممتازة) — required · مطلوب",
	scale=2,
	)

	correction_textbox = gr.Textbox(
	label="Better MSA translation · ترجمة أفضل — required if incorrect or rating ≤ 2 · مطلوب إذا كانت خاطئة أو التقييم ≤ 2",
	lines=2,
	visible=True,
	placeholder="الترجمة الصحيحة هنا…",
	rtl=True,
	)

	submit_feedback = gr.Button("Submit Feedback · أرسل التقييم", elem_id="submit-btn")
	feedback_status = gr.Textbox(
	show_label=False,
	interactive=False,
	elem_id="status-box",
	lines=1,
	)

	latest_translation = gr.State("")

	translate_btn.click(
	fn=translate_to_msa,
	inputs=egyptian_input,
	outputs=msa_output,
	).then(
	lambda out: (gr.update(visible=True), out),
	inputs=msa_output,
	outputs=[feedback_group, latest_translation],
	)

	submit_feedback.click(
	fn=save_feedback,
	inputs=[egyptian_input, latest_translation, correct_radio, rating_slider, correction_textbox],
	outputs=feedback_status,
	).then(
	fn=reset_feedback_ui,
	outputs=[feedback_group, correction_textbox, correct_radio, rating_slider, feedback_status],
	)

	if __name__ == "__main__":
	demo.launch()