Spaces:

ishalr
/

urdu-sarcasm-interpreter

Sleeping

App Files Files Community

urdu-sarcasm-interpreter / app.py

ishalr

Update app.py

ff27bdf verified 4 months ago

raw

history blame contribute delete

10.3 kB

	"""
	Urdu Sarcasm Detection and Explanation System
	Group 16: Muhammad Yahya Rahim, Ishal Rahat, Ammara Haroon
	"""

	import streamlit as st
	import torch
	from transformers import XLMRobertaTokenizer, XLMRobertaForSequenceClassification
	from transformers import AutoTokenizer, AutoModelForCausalLM
	from peft import PeftModel

	# Page config
	st.set_page_config(
	page_title="Urdu Sarcasm Interpreter",
	page_icon="🎭",
	layout="wide"
	)

	st.markdown("""
	<style>
	/* Global */
	html, body, [class*="css"] {
	background-color: #0e0e0e;
	color: #e6e6e6;
	}
	/* Headers */
	.main-header {
	font-size: 2.4rem;
	color: #a62d2d; /* maroon */
	text-align: center;
	margin-bottom: 1.5rem;
	font-weight: 700;
	letter-spacing: 1px;
	}
	/* Result boxes */
	.result-box {
	padding: 1.4rem;
	border-radius: 12px;
	margin: 1rem 0;
	background-color: #151515;
	}
	/* Sarcastic */
	.sarcastic {
	border-left: 5px solid #7b1e1e;
	}
	/* Not sarcastic */
	.not-sarcastic {
	border-left: 5px solid #2e7d32;
	}
	/* Urdu text */
	.urdu-text {
	font-family: 'Noto Nastaliq Urdu', serif;
	font-size: 1.35rem;
	direction: rtl;
	text-align: right;
	color: #f2f2f2;
	}
	/* Buttons */
	button[kind="primary"] {
	background-color: #7b1e1e !important;
	border: none;
	}
	button {
	background-color: #1c1c1c !important;
	color: #e6e6e6 !important;
	border-radius: 8px !important;
	border: 1px solid #333 !important;
	}
	/* Sidebar */
	section[data-testid="stSidebar"] {
	background-color: #121212;
	border-right: 1px solid #2a2a2a;
	}
	/* Inputs */
	textarea {
	background-color: #111 !important;
	color: #f2f2f2 !important;
	border-radius: 8px !important;
	border: 1px solid #333 !important;
	}
	/* Info box */
	div[data-testid="stInfo"] {
	background-color: #141414;
	border-left: 5px solid #555;
	}
	/* Footer */
	.footer {
	color: #888;
	font-size: 0.9rem;
	}
	</style>
	""", unsafe_allow_html=True)


	# Cache model loading
	@st.cache_resource
	def load_models():
	with st.spinner("Loading models... (this may take a minute)"):
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	cache_dir = "./model_cache"
	# Load detector
	detector_tokenizer = XLMRobertaTokenizer.from_pretrained(
	"ishalr/urdu-sarcasm-detectorfin"
	)
	detector_model = XLMRobertaForSequenceClassification.from_pretrained(
	"ishalr/urdu-sarcasm-detectorfin"
	)
	detector_model.to(device).eval()

	# Load explainer tokenizer (from adapter repo)
	explainer_tokenizer = AutoTokenizer.from_pretrained(
	"ishalr/urdu-sarcasm-explainer",
	use_fast=False
	)

	# Load BASE model you trained LoRA on
	explainer_model = AutoModelForCausalLM.from_pretrained(
	"facebook/xglm-1.7B"
	)

	# IMPORTANT: align vocab size
	explainer_model.resize_token_embeddings(len(explainer_tokenizer))

	# Load LoRA adapter
	explainer_model = PeftModel.from_pretrained(
	explainer_model,
	"ishalr/urdu-sarcasm-explainer"
	)

	explainer_model.to(device).eval()


	return {
	'detector_model': detector_model,
	'detector_tokenizer': detector_tokenizer,
	'explainer_model': explainer_model,
	'explainer_tokenizer': explainer_tokenizer,
	'device': device
	}

	def detect_sarcasm(text, models):
	"""Detect if text is sarcastic"""
	encoding = models['detector_tokenizer'](
	text,
	truncation=True,
	padding='max_length',
	max_length=128,
	return_tensors='pt'
	).to(models['device'])

	with torch.no_grad():
	outputs = models['detector_model'](**encoding)
	logits = outputs.logits
	probs = torch.softmax(logits, dim=-1)

	confidence, predicted_class = torch.max(probs, dim=-1)

	return {
	'is_sarcastic': predicted_class.item() == 1,
	'confidence': confidence.item(),
	'sarcastic_prob': probs[0][1].item(),
	'not_sarcastic_prob': probs[0][0].item()
	}

	def explain_sarcasm(text, models):
	"""Generate explanation for sarcastic text"""
	prompt = f"""### Instruction:
	Explain why this Urdu tweet is sarcastic in Urdu. Provide:
	1) ظاہری معنی (Literal meaning)
	2) اصل مطلب (Intended meaning) No repitition
	### Input:
	{text}
	### Response:
	"""

	inputs = models['explainer_tokenizer'](
	prompt,
	return_tensors="pt",
	max_length=512,
	truncation=True
	).to(models['device'])

	with torch.no_grad():
	outputs = models['explainer_model'].generate(
	**inputs,
	max_length=256,
	temperature=0.7,
	do_sample=True,
	top_p=0.9,
	pad_token_id=models['explainer_tokenizer'].eos_token_id
	)

	response = models['explainer_tokenizer'].decode(outputs[0], skip_special_tokens=True)
	explanation = response.split("### Response:")[-1].strip()

	return explanation

	# Main app
	def main():
	# Header
	st.markdown('<h1 class="main-header">اردو طنز کا تجزیہ کار</h1>', unsafe_allow_html=True)
	st.markdown('<h1 class="main-header">Urdu Sarcasm Interpreter</h1>', unsafe_allow_html=True)


	st.markdown("""
	<div style='text-align: center; margin-bottom: 2rem;'>
	<p style='font-size: 1.2rem;'>
	Cross-Generational Communication Tool<br>
	<strong>Not just detection - we explain WHY text is sarcastic</strong>
	</p>
	</div>
	""", unsafe_allow_html=True)

	# Sidebar
	with st.sidebar:
	st.header("About")
	st.write("""
	This tool detects sarcasm in Urdu text and provides detailed explanations
	to help bridge understanding gaps between generations.
	""")

	st.header("Example Inputs")
	examples = [
	"جی واہ! آج پھر لوڈشیڈنگ کا نیا ریکارڈ بنا، حکومت کا شکریہ کہ ہمیں اندھیرے میں بیٹھنے کا موقع دیا 😂",
	"آج موسم اچھا ہے",
	"شاندار کارکردگی! پٹرول کی قیمت پھر بڑھا دی، عوام تو خوش ہو گئے ہوں گے",

	]

	for ex in examples:
	if st.button(ex, key=ex):
	st.session_state['input_text'] = ex

	# Load models
	models = load_models()

	# Input area
	st.markdown("### Enter Urdu Text")

	input_text = st.text_area(
	"Type or paste Urdu text here:",
	value=st.session_state.get('input_text', ''),
	height=150,
	placeholder="یہاں اردو متن لکھیں یا چسپاں کریں...",
	key='text_input'
	)

	col1, col2 = st.columns([1, 4])
	with col1:
	analyze_button = st.button("Analyze Text", type="primary", use_container_width=True)

	with col2:
	if st.button("Clear", use_container_width=True):
	st.session_state['input_text'] = ''
	st.rerun()

	# Analysis
	if analyze_button and input_text.strip():
	with st.spinner("Analyzing..."):
	# Detect sarcasm
	result = detect_sarcasm(input_text, models)

	# Display input
	st.markdown("Input Text:")
	st.markdown(f'<div class="urdu-text">{input_text}</div>', unsafe_allow_html=True)

	# Results in columns
	col1, col2 = st.columns(2)

	with col1:
	st.markdown("### Detection Result")

	if result['is_sarcastic']:
	st.markdown(f"""
	<div class="result-box sarcastic">
	<h3>✓ Sarcastic</h3>
	<p><strong>Confidence:</strong> {result['confidence']:.1%}</p>
	<p><strong>Sarcastic Probability:</strong> {result['sarcastic_prob']:.1%}</p>
	</div>
	""", unsafe_allow_html=True)
	else:
	st.markdown(f"""
	<div class="result-box not-sarcastic">
	<h3>✓ Not Sarcastic</h3>
	<p><strong>Confidence:</strong> {result['confidence']:.1%}</p>
	<p><strong>Not Sarcastic Probability:</strong> {result['not_sarcastic_prob']:.1%}</p>
	</div>
	""", unsafe_allow_html=True)

	# Probability chart
	import pandas as pd
	prob_df = pd.DataFrame({
	'Category': ['Not Sarcastic', 'Sarcastic'],
	'Probability': [result['not_sarcastic_prob'], result['sarcastic_prob']]
	})
	st.bar_chart(prob_df.set_index('Category'))

	with col2:
	st.markdown("### Explanation")

	if result['is_sarcastic']:
	with st.spinner("Generating explanation..."):
	explanation = explain_sarcasm(input_text, models)

	st.markdown(
	f"""
	<div class="urdu-text" style="
	background-color: #1a1a1a;
	padding: 1.2rem;
	border-radius: 10px;
	border-left: 4px solid #7b1e1e;
	">
	{explanation}
	</div>
	""",
	unsafe_allow_html=True
	)

	else:
	st.info("یہ پیغام سیدھا اور واضح ہے۔ اس میں طنز کی کوئی علامت نہیں ہے۔")

	elif analyze_button:
	st.warning("Please enter some text to analyze")

	# Footer
	st.markdown("---")
	st.markdown("""
	<div style='text-align: center; color: #666;'>
	<p>
	Cross-Generational Sarcasm Interpreter \| Group 16<br>
	Muhammad Yahya Rahim, Ishal Rahat, Ammara Haroon
	</p>
	</div>
	""", unsafe_allow_html=True)

	if __name__ == "__main__":
	main()