Spaces:

goalgamal
/

Arabic-Course-Sentiment-Demo

Sleeping

App Files Files Community

Arabic-Course-Sentiment-Demo / app.py

goalgamal

Upload app.py

074ee41 verified 2 months ago

raw

history blame contribute delete

3.44 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import re

	# --- 1. CONFIGURATION ---
	# Replace this with your actual model path on Hugging Face
	MODEL_NAME = "goalgamal/AraBERT-Arabic-Sentiment"

	# Map your labels matching your training (0: Negative, 1: Neutral, 2: Positive)
	LABELS = {
	0: "Negative 😞",
	1: "Neutral 😐",
	2: "Positive 😃"
	}

	# --- 2. LOAD MODEL & TOKENIZER ---
	print(f"Loading model: {MODEL_NAME}...")
	try:
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
	print("Model loaded successfully!")
	except Exception as e:
	print(f"Error loading model: {e}")
	raise e

	# --- 3. PREPROCESSING FUNCTION ---
	# We replicate the basic cleaning you did in training to ensure accuracy
	def clean_text(text):
	if not isinstance(text, str):
	return ""

	# Remove HTML tags and URLs
	text = re.sub(r'http\S+', '', text)
	text = re.sub(r'<.*?>', '', text)

	# Keep only Arabic letters and spaces (Basic noise removal)
	# This regex keeps Arabic chars, spaces, and common punctuation
	text = re.sub(r'[^\w\s\u0600-\u06FF]', ' ', text)

	# Normalize Alef (أ, إ, آ -> ا)
	text = re.sub(r'[أإآ]', 'ا', text)
	# Normalize Teh Marbuta (ة -> ه)
	text = re.sub(r'ة', 'ه', text)

	return text.strip()

	# --- 4. PREDICTION FUNCTION ---
	def predict(text):
	# 1. Clean
	cleaned_text = clean_text(text)

	# 2. Tokenize
	inputs = tokenizer(
	cleaned_text,
	return_tensors="pt",
	truncation=True,
	padding=True,
	max_length=128
	)

	# 3. Inference
	with torch.no_grad():
	outputs = model(**inputs)

	# 4. Get Probabilities (Softmax)
	probs = torch.nn.functional.softmax(outputs.logits, dim=1)

	# 5. Format Output for Gradio (Label -> Probability)
	# Gradio expects a dictionary: {"Positive": 0.9, "Negative": 0.1}
	results = {}
	for idx, score in enumerate(probs[0]):
	label_text = LABELS[idx]
	results[label_text] = float(score)

	return results

	# --- 5. BUILD INTERFACE ---
	# We use a clean, professional theme
	demo = gr.Interface(
	fn=predict,
	inputs=gr.Textbox(
	label="أدخل تعليق الطالب (Enter Student Feedback)",
	placeholder="اكتب هنا... (مثال: الشرح كان ممتاز واستفدت جدا)",
	lines=3,
	text_align="right" # RTL support for Arabic
	),
	outputs=gr.Label(label="Sentiment Analysis Result", num_top_classes=3),
	title="📊 Arabic Course Feedback Analyzer",
	description="""
	This is an AI-powered tool to analyze student feedback using Deep Learning (AraBERT).
	It detects whether the sentiment is Positive, Negative, or Neutral.
	""",
	examples=[
	["الكورس ممتاز والشرح كان رائع جدا"],
	["بصراحة ضيعت وقتي، المحتوى ضعيف"],
	["الكورس عادي يعني لا وحش ولا حلو"],
	["الشرح كويس بس الصوت كان واطي في بعض الفيديوهات"]
	],
	theme=gr.themes.Soft()
	)

	# Launch
	if __name__ == "__main__":
	demo.launch()