goalgamal's picture
Upload app.py
074ee41 verified
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import re
# --- 1. CONFIGURATION ---
# Replace this with your actual model path on Hugging Face
MODEL_NAME = "goalgamal/AraBERT-Arabic-Sentiment"
# Map your labels matching your training (0: Negative, 1: Neutral, 2: Positive)
LABELS = {
0: "Negative 😞",
1: "Neutral 😐",
2: "Positive 😃"
}
# --- 2. LOAD MODEL & TOKENIZER ---
print(f"Loading model: {MODEL_NAME}...")
try:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
print("Model loaded successfully!")
except Exception as e:
print(f"Error loading model: {e}")
raise e
# --- 3. PREPROCESSING FUNCTION ---
# We replicate the basic cleaning you did in training to ensure accuracy
def clean_text(text):
if not isinstance(text, str):
return ""
# Remove HTML tags and URLs
text = re.sub(r'http\S+', '', text)
text = re.sub(r'<.*?>', '', text)
# Keep only Arabic letters and spaces (Basic noise removal)
# This regex keeps Arabic chars, spaces, and common punctuation
text = re.sub(r'[^\w\s\u0600-\u06FF]', ' ', text)
# Normalize Alef (أ, إ, آ -> ا)
text = re.sub(r'[أإآ]', 'ا', text)
# Normalize Teh Marbuta (ة -> ه)
text = re.sub(r'ة', 'ه', text)
return text.strip()
# --- 4. PREDICTION FUNCTION ---
def predict(text):
# 1. Clean
cleaned_text = clean_text(text)
# 2. Tokenize
inputs = tokenizer(
cleaned_text,
return_tensors="pt",
truncation=True,
padding=True,
max_length=128
)
# 3. Inference
with torch.no_grad():
outputs = model(**inputs)
# 4. Get Probabilities (Softmax)
probs = torch.nn.functional.softmax(outputs.logits, dim=1)
# 5. Format Output for Gradio (Label -> Probability)
# Gradio expects a dictionary: {"Positive": 0.9, "Negative": 0.1}
results = {}
for idx, score in enumerate(probs[0]):
label_text = LABELS[idx]
results[label_text] = float(score)
return results
# --- 5. BUILD INTERFACE ---
# We use a clean, professional theme
demo = gr.Interface(
fn=predict,
inputs=gr.Textbox(
label="أدخل تعليق الطالب (Enter Student Feedback)",
placeholder="اكتب هنا... (مثال: الشرح كان ممتاز واستفدت جدا)",
lines=3,
text_align="right" # RTL support for Arabic
),
outputs=gr.Label(label="Sentiment Analysis Result", num_top_classes=3),
title="📊 Arabic Course Feedback Analyzer",
description="""
This is an AI-powered tool to analyze student feedback using **Deep Learning (AraBERT)**.
It detects whether the sentiment is **Positive**, **Negative**, or **Neutral**.
""",
examples=[
["الكورس ممتاز والشرح كان رائع جدا"],
["بصراحة ضيعت وقتي، المحتوى ضعيف"],
["الكورس عادي يعني لا وحش ولا حلو"],
["الشرح كويس بس الصوت كان واطي في بعض الفيديوهات"]
],
theme=gr.themes.Soft()
)
# Launch
if __name__ == "__main__":
demo.launch()