import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForSequenceClassification import re # --- 1. CONFIGURATION --- # Replace this with your actual model path on Hugging Face MODEL_NAME = "goalgamal/AraBERT-Arabic-Sentiment" # Map your labels matching your training (0: Negative, 1: Neutral, 2: Positive) LABELS = { 0: "Negative 😞", 1: "Neutral 😐", 2: "Positive 😃" } # --- 2. LOAD MODEL & TOKENIZER --- print(f"Loading model: {MODEL_NAME}...") try: tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME) print("Model loaded successfully!") except Exception as e: print(f"Error loading model: {e}") raise e # --- 3. PREPROCESSING FUNCTION --- # We replicate the basic cleaning you did in training to ensure accuracy def clean_text(text): if not isinstance(text, str): return "" # Remove HTML tags and URLs text = re.sub(r'http\S+', '', text) text = re.sub(r'<.*?>', '', text) # Keep only Arabic letters and spaces (Basic noise removal) # This regex keeps Arabic chars, spaces, and common punctuation text = re.sub(r'[^\w\s\u0600-\u06FF]', ' ', text) # Normalize Alef (أ, إ, آ -> ا) text = re.sub(r'[أإآ]', 'ا', text) # Normalize Teh Marbuta (ة -> ه) text = re.sub(r'ة', 'ه', text) return text.strip() # --- 4. PREDICTION FUNCTION --- def predict(text): # 1. Clean cleaned_text = clean_text(text) # 2. Tokenize inputs = tokenizer( cleaned_text, return_tensors="pt", truncation=True, padding=True, max_length=128 ) # 3. Inference with torch.no_grad(): outputs = model(**inputs) # 4. Get Probabilities (Softmax) probs = torch.nn.functional.softmax(outputs.logits, dim=1) # 5. Format Output for Gradio (Label -> Probability) # Gradio expects a dictionary: {"Positive": 0.9, "Negative": 0.1} results = {} for idx, score in enumerate(probs[0]): label_text = LABELS[idx] results[label_text] = float(score) return results # --- 5. BUILD INTERFACE --- # We use a clean, professional theme demo = gr.Interface( fn=predict, inputs=gr.Textbox( label="أدخل تعليق الطالب (Enter Student Feedback)", placeholder="اكتب هنا... (مثال: الشرح كان ممتاز واستفدت جدا)", lines=3, text_align="right" # RTL support for Arabic ), outputs=gr.Label(label="Sentiment Analysis Result", num_top_classes=3), title="📊 Arabic Course Feedback Analyzer", description=""" This is an AI-powered tool to analyze student feedback using **Deep Learning (AraBERT)**. It detects whether the sentiment is **Positive**, **Negative**, or **Neutral**. """, examples=[ ["الكورس ممتاز والشرح كان رائع جدا"], ["بصراحة ضيعت وقتي، المحتوى ضعيف"], ["الكورس عادي يعني لا وحش ولا حلو"], ["الشرح كويس بس الصوت كان واطي في بعض الفيديوهات"] ], theme=gr.themes.Soft() ) # Launch if __name__ == "__main__": demo.launch()