goalgamal commited on
Commit
370e49c
·
verified ·
1 Parent(s): 690f257

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -0
app.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
+ import re
5
+
6
+ # --- 1. CONFIGURATION ---
7
+ # Replace this with your actual model path on Hugging Face
8
+ MODEL_NAME = "YOUR_USERNAME/AraBERT-Arabic-Sentiment"
9
+
10
+ # Map your labels matching your training (0: Negative, 1: Neutral, 2: Positive)
11
+ LABELS = {
12
+ 0: "Negative 😞",
13
+ 1: "Neutral 😐",
14
+ 2: "Positive 😃"
15
+ }
16
+
17
+ # --- 2. LOAD MODEL & TOKENIZER ---
18
+ print(f"Loading model: {MODEL_NAME}...")
19
+ try:
20
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
21
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
22
+ print("Model loaded successfully!")
23
+ except Exception as e:
24
+ print(f"Error loading model: {e}")
25
+ raise e
26
+
27
+ # --- 3. PREPROCESSING FUNCTION ---
28
+ # We replicate the basic cleaning you did in training to ensure accuracy
29
+ def clean_text(text):
30
+ if not isinstance(text, str):
31
+ return ""
32
+
33
+ # Remove HTML tags and URLs
34
+ text = re.sub(r'http\S+', '', text)
35
+ text = re.sub(r'<.*?>', '', text)
36
+
37
+ # Keep only Arabic letters and spaces (Basic noise removal)
38
+ # This regex keeps Arabic chars, spaces, and common punctuation
39
+ text = re.sub(r'[^\w\s\u0600-\u06FF]', ' ', text)
40
+
41
+ # Normalize Alef (أ, إ, آ -> ا)
42
+ text = re.sub(r'[أإآ]', 'ا', text)
43
+ # Normalize Teh Marbuta (ة -> ه)
44
+ text = re.sub(r'ة', 'ه', text)
45
+
46
+ return text.strip()
47
+
48
+ # --- 4. PREDICTION FUNCTION ---
49
+ def predict(text):
50
+ # 1. Clean
51
+ cleaned_text = clean_text(text)
52
+
53
+ # 2. Tokenize
54
+ inputs = tokenizer(
55
+ cleaned_text,
56
+ return_tensors="pt",
57
+ truncation=True,
58
+ padding=True,
59
+ max_length=128
60
+ )
61
+
62
+ # 3. Inference
63
+ with torch.no_grad():
64
+ outputs = model(**inputs)
65
+
66
+ # 4. Get Probabilities (Softmax)
67
+ probs = torch.nn.functional.softmax(outputs.logits, dim=1)
68
+
69
+ # 5. Format Output for Gradio (Label -> Probability)
70
+ # Gradio expects a dictionary: {"Positive": 0.9, "Negative": 0.1}
71
+ results = {}
72
+ for idx, score in enumerate(probs[0]):
73
+ label_text = LABELS[idx]
74
+ results[label_text] = float(score)
75
+
76
+ return results
77
+
78
+ # --- 5. BUILD INTERFACE ---
79
+ # We use a clean, professional theme
80
+ demo = gr.Interface(
81
+ fn=predict,
82
+ inputs=gr.Textbox(
83
+ label="أدخل تعليق الطالب (Enter Student Feedback)",
84
+ placeholder="اكتب هنا... (مثال: الشرح كان ممتاز واستفدت جدا)",
85
+ lines=3,
86
+ text_align="right" # RTL support for Arabic
87
+ ),
88
+ outputs=gr.Label(label="Sentiment Analysis Result", num_top_classes=3),
89
+ title="📊 Arabic Course Feedback Analyzer",
90
+ description="""
91
+ This is an AI-powered tool to analyze student feedback using **Deep Learning (AraBERT)**.
92
+ It detects whether the sentiment is **Positive**, **Negative**, or **Neutral**.
93
+ """,
94
+ examples=[
95
+ ["الكورس ممتاز والشرح كان رائع جدا"],
96
+ ["بصراحة ضيعت وقتي، المحتوى ضعيف"],
97
+ ["الكورس عادي يعني لا وحش ولا حلو"],
98
+ ["الشرح كويس بس الصوت كان واطي في بعض الفيديوهات"]
99
+ ],
100
+ theme=gr.themes.Soft()
101
+ )
102
+
103
+ # Launch
104
+ if __name__ == "__main__":
105
+ demo.launch()