ishalr's picture
Update app.py
ff27bdf verified
"""
Urdu Sarcasm Detection and Explanation System
Group 16: Muhammad Yahya Rahim, Ishal Rahat, Ammara Haroon
"""
import streamlit as st
import torch
from transformers import XLMRobertaTokenizer, XLMRobertaForSequenceClassification
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
# Page config
st.set_page_config(
page_title="Urdu Sarcasm Interpreter",
page_icon="🎭",
layout="wide"
)
st.markdown("""
<style>
/* Global */
html, body, [class*="css"] {
background-color: #0e0e0e;
color: #e6e6e6;
}
/* Headers */
.main-header {
font-size: 2.4rem;
color: #a62d2d; /* maroon */
text-align: center;
margin-bottom: 1.5rem;
font-weight: 700;
letter-spacing: 1px;
}
/* Result boxes */
.result-box {
padding: 1.4rem;
border-radius: 12px;
margin: 1rem 0;
background-color: #151515;
}
/* Sarcastic */
.sarcastic {
border-left: 5px solid #7b1e1e;
}
/* Not sarcastic */
.not-sarcastic {
border-left: 5px solid #2e7d32;
}
/* Urdu text */
.urdu-text {
font-family: 'Noto Nastaliq Urdu', serif;
font-size: 1.35rem;
direction: rtl;
text-align: right;
color: #f2f2f2;
}
/* Buttons */
button[kind="primary"] {
background-color: #7b1e1e !important;
border: none;
}
button {
background-color: #1c1c1c !important;
color: #e6e6e6 !important;
border-radius: 8px !important;
border: 1px solid #333 !important;
}
/* Sidebar */
section[data-testid="stSidebar"] {
background-color: #121212;
border-right: 1px solid #2a2a2a;
}
/* Inputs */
textarea {
background-color: #111 !important;
color: #f2f2f2 !important;
border-radius: 8px !important;
border: 1px solid #333 !important;
}
/* Info box */
div[data-testid="stInfo"] {
background-color: #141414;
border-left: 5px solid #555;
}
/* Footer */
.footer {
color: #888;
font-size: 0.9rem;
}
</style>
""", unsafe_allow_html=True)
# Cache model loading
@st.cache_resource
def load_models():
with st.spinner("Loading models... (this may take a minute)"):
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
cache_dir = "./model_cache"
# Load detector
detector_tokenizer = XLMRobertaTokenizer.from_pretrained(
"ishalr/urdu-sarcasm-detectorfin"
)
detector_model = XLMRobertaForSequenceClassification.from_pretrained(
"ishalr/urdu-sarcasm-detectorfin"
)
detector_model.to(device).eval()
# Load explainer tokenizer (from adapter repo)
explainer_tokenizer = AutoTokenizer.from_pretrained(
"ishalr/urdu-sarcasm-explainer",
use_fast=False
)
# Load BASE model you trained LoRA on
explainer_model = AutoModelForCausalLM.from_pretrained(
"facebook/xglm-1.7B"
)
# IMPORTANT: align vocab size
explainer_model.resize_token_embeddings(len(explainer_tokenizer))
# Load LoRA adapter
explainer_model = PeftModel.from_pretrained(
explainer_model,
"ishalr/urdu-sarcasm-explainer"
)
explainer_model.to(device).eval()
return {
'detector_model': detector_model,
'detector_tokenizer': detector_tokenizer,
'explainer_model': explainer_model,
'explainer_tokenizer': explainer_tokenizer,
'device': device
}
def detect_sarcasm(text, models):
"""Detect if text is sarcastic"""
encoding = models['detector_tokenizer'](
text,
truncation=True,
padding='max_length',
max_length=128,
return_tensors='pt'
).to(models['device'])
with torch.no_grad():
outputs = models['detector_model'](**encoding)
logits = outputs.logits
probs = torch.softmax(logits, dim=-1)
confidence, predicted_class = torch.max(probs, dim=-1)
return {
'is_sarcastic': predicted_class.item() == 1,
'confidence': confidence.item(),
'sarcastic_prob': probs[0][1].item(),
'not_sarcastic_prob': probs[0][0].item()
}
def explain_sarcasm(text, models):
"""Generate explanation for sarcastic text"""
prompt = f"""### Instruction:
Explain why this Urdu tweet is sarcastic in Urdu. Provide:
1) ظاہری معنی (Literal meaning)
2) اصل مطلب (Intended meaning) No repitition
### Input:
{text}
### Response:
"""
inputs = models['explainer_tokenizer'](
prompt,
return_tensors="pt",
max_length=512,
truncation=True
).to(models['device'])
with torch.no_grad():
outputs = models['explainer_model'].generate(
**inputs,
max_length=256,
temperature=0.7,
do_sample=True,
top_p=0.9,
pad_token_id=models['explainer_tokenizer'].eos_token_id
)
response = models['explainer_tokenizer'].decode(outputs[0], skip_special_tokens=True)
explanation = response.split("### Response:")[-1].strip()
return explanation
# Main app
def main():
# Header
st.markdown('<h1 class="main-header">اردو طنز کا تجزیہ کار</h1>', unsafe_allow_html=True)
st.markdown('<h1 class="main-header">Urdu Sarcasm Interpreter</h1>', unsafe_allow_html=True)
st.markdown("""
<div style='text-align: center; margin-bottom: 2rem;'>
<p style='font-size: 1.2rem;'>
Cross-Generational Communication Tool<br>
<strong>Not just detection - we explain WHY text is sarcastic</strong>
</p>
</div>
""", unsafe_allow_html=True)
# Sidebar
with st.sidebar:
st.header("About")
st.write("""
This tool detects sarcasm in Urdu text and provides detailed explanations
to help bridge understanding gaps between generations.
""")
st.header("Example Inputs")
examples = [
"جی واہ! آج پھر لوڈشیڈنگ کا نیا ریکارڈ بنا، حکومت کا شکریہ کہ ہمیں اندھیرے میں بیٹھنے کا موقع دیا 😂",
"آج موسم اچھا ہے",
"شاندار کارکردگی! پٹرول کی قیمت پھر بڑھا دی، عوام تو خوش ہو گئے ہوں گے",
]
for ex in examples:
if st.button(ex, key=ex):
st.session_state['input_text'] = ex
# Load models
models = load_models()
# Input area
st.markdown("### Enter Urdu Text")
input_text = st.text_area(
"Type or paste Urdu text here:",
value=st.session_state.get('input_text', ''),
height=150,
placeholder="یہاں اردو متن لکھیں یا چسپاں کریں...",
key='text_input'
)
col1, col2 = st.columns([1, 4])
with col1:
analyze_button = st.button("Analyze Text", type="primary", use_container_width=True)
with col2:
if st.button("Clear", use_container_width=True):
st.session_state['input_text'] = ''
st.rerun()
# Analysis
if analyze_button and input_text.strip():
with st.spinner("Analyzing..."):
# Detect sarcasm
result = detect_sarcasm(input_text, models)
# Display input
st.markdown("**Input Text:**")
st.markdown(f'<div class="urdu-text">{input_text}</div>', unsafe_allow_html=True)
# Results in columns
col1, col2 = st.columns(2)
with col1:
st.markdown("### Detection Result")
if result['is_sarcastic']:
st.markdown(f"""
<div class="result-box sarcastic">
<h3>✓ Sarcastic</h3>
<p><strong>Confidence:</strong> {result['confidence']:.1%}</p>
<p><strong>Sarcastic Probability:</strong> {result['sarcastic_prob']:.1%}</p>
</div>
""", unsafe_allow_html=True)
else:
st.markdown(f"""
<div class="result-box not-sarcastic">
<h3>✓ Not Sarcastic</h3>
<p><strong>Confidence:</strong> {result['confidence']:.1%}</p>
<p><strong>Not Sarcastic Probability:</strong> {result['not_sarcastic_prob']:.1%}</p>
</div>
""", unsafe_allow_html=True)
# Probability chart
import pandas as pd
prob_df = pd.DataFrame({
'Category': ['Not Sarcastic', 'Sarcastic'],
'Probability': [result['not_sarcastic_prob'], result['sarcastic_prob']]
})
st.bar_chart(prob_df.set_index('Category'))
with col2:
st.markdown("### Explanation")
if result['is_sarcastic']:
with st.spinner("Generating explanation..."):
explanation = explain_sarcasm(input_text, models)
st.markdown(
f"""
<div class="urdu-text" style="
background-color: #1a1a1a;
padding: 1.2rem;
border-radius: 10px;
border-left: 4px solid #7b1e1e;
">
{explanation}
</div>
""",
unsafe_allow_html=True
)
else:
st.info("یہ پیغام سیدھا اور واضح ہے۔ اس میں طنز کی کوئی علامت نہیں ہے۔")
elif analyze_button:
st.warning("Please enter some text to analyze")
# Footer
st.markdown("---")
st.markdown("""
<div style='text-align: center; color: #666;'>
<p>
Cross-Generational Sarcasm Interpreter | Group 16<br>
Muhammad Yahya Rahim, Ishal Rahat, Ammara Haroon
</p>
</div>
""", unsafe_allow_html=True)
if __name__ == "__main__":
main()