File size: 3,442 Bytes
370e49c
 
 
 
 
 
 
074ee41
370e49c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import re

# --- 1. CONFIGURATION ---
# Replace this with your actual model path on Hugging Face
MODEL_NAME = "goalgamal/AraBERT-Arabic-Sentiment"

# Map your labels matching your training (0: Negative, 1: Neutral, 2: Positive)
LABELS = {
    0: "Negative 😞", 
    1: "Neutral 😐", 
    2: "Positive 😃"
}

# --- 2. LOAD MODEL & TOKENIZER ---
print(f"Loading model: {MODEL_NAME}...")
try:
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
    print("Model loaded successfully!")
except Exception as e:
    print(f"Error loading model: {e}")
    raise e

# --- 3. PREPROCESSING FUNCTION ---
# We replicate the basic cleaning you did in training to ensure accuracy
def clean_text(text):
    if not isinstance(text, str):
        return ""
    
    # Remove HTML tags and URLs
    text = re.sub(r'http\S+', '', text)
    text = re.sub(r'<.*?>', '', text)
    
    # Keep only Arabic letters and spaces (Basic noise removal)
    # This regex keeps Arabic chars, spaces, and common punctuation
    text = re.sub(r'[^\w\s\u0600-\u06FF]', ' ', text)
    
    # Normalize Alef (أ, إ, آ -> ا)
    text = re.sub(r'[أإآ]', 'ا', text)
    # Normalize Teh Marbuta (ة -> ه)
    text = re.sub(r'ة', 'ه', text)
    
    return text.strip()

# --- 4. PREDICTION FUNCTION ---
def predict(text):
    # 1. Clean
    cleaned_text = clean_text(text)
    
    # 2. Tokenize
    inputs = tokenizer(
        cleaned_text, 
        return_tensors="pt", 
        truncation=True, 
        padding=True, 
        max_length=128
    )
    
    # 3. Inference
    with torch.no_grad():
        outputs = model(**inputs)
        
    # 4. Get Probabilities (Softmax)
    probs = torch.nn.functional.softmax(outputs.logits, dim=1)
    
    # 5. Format Output for Gradio (Label -> Probability)
    # Gradio expects a dictionary: {"Positive": 0.9, "Negative": 0.1}
    results = {}
    for idx, score in enumerate(probs[0]):
        label_text = LABELS[idx]
        results[label_text] = float(score)
        
    return results

# --- 5. BUILD INTERFACE ---
# We use a clean, professional theme
demo = gr.Interface(
    fn=predict,
    inputs=gr.Textbox(
        label="أدخل تعليق الطالب (Enter Student Feedback)", 
        placeholder="اكتب هنا... (مثال: الشرح كان ممتاز واستفدت جدا)",
        lines=3,
        text_align="right"  # RTL support for Arabic
    ),
    outputs=gr.Label(label="Sentiment Analysis Result", num_top_classes=3),
    title="📊 Arabic Course Feedback Analyzer",
    description="""

    This is an AI-powered tool to analyze student feedback using **Deep Learning (AraBERT)**.

    It detects whether the sentiment is **Positive**, **Negative**, or **Neutral**.

    """,
    examples=[
        ["الكورس ممتاز والشرح كان رائع جدا"],
        ["بصراحة ضيعت وقتي، المحتوى ضعيف"],
        ["الكورس عادي يعني لا وحش ولا حلو"],
        ["الشرح كويس بس الصوت كان واطي في بعض الفيديوهات"]
    ],
    theme=gr.themes.Soft()
)

# Launch
if __name__ == "__main__":
    demo.launch()