Maarij-Aqeel commited on
Commit
9391f2e
·
0 Parent(s):

Added file

Browse files
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .venv
2
+ .gradio
__pycache__/app.cpython-312.pyc ADDED
Binary file (4.05 kB). View file
 
__pycache__/intent.cpython-312.pyc ADDED
Binary file (1.26 kB). View file
 
__pycache__/topic_model.cpython-312.pyc ADDED
Binary file (956 Bytes). View file
 
app.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import joblib
3
+ import numpy as np
4
+
5
+ from intent import predict_intent
6
+ from topic_model import get_review_topic
7
+
8
+
9
+
10
+ try:
11
+ sentiment_pipeline = joblib.load("sentiment_pipeline.pkl")
12
+ except Exception as e: # pragma: no cover - helpful when learning
13
+ raise RuntimeError("could not load sentiment pipeline: %s" % e)
14
+
15
+ try:
16
+ vectorizer = joblib.load("tfidf_vectorizer.pkl")
17
+ nmf_model = joblib.load("nmf_model.pkl")
18
+ except Exception as e:
19
+ raise RuntimeError("could not load vectorizer/nmf model: %s" % e)
20
+
21
+ # the pipeline predicts integer labels (-1, 0, 1) corresponding to
22
+ # default negative/neutral/positive sentiment. map them to human-readable
23
+ # strings for display.
24
+ label_map = {
25
+ -1: "negative",
26
+ 0: "neutral",
27
+ 1: "positive",
28
+ }
29
+
30
+
31
+ def analyze_review(review_text: str):
32
+ """Return sentiment, intent, topic keywords, and confidence scores for a piece of text."""
33
+ if not review_text:
34
+ return "", "", ""
35
+
36
+ # Get sentiment prediction and confidence scores
37
+ pred = sentiment_pipeline.predict([review_text])[0]
38
+ sentiment = label_map.get(pred, str(pred))
39
+
40
+ # Get probability scores for all classes
41
+ proba = sentiment_pipeline.predict_proba([review_text])[0]
42
+
43
+ # Map probabilities to sentiment labels
44
+ confidence_map = {
45
+ -1: proba[0], # negative
46
+ 0: proba[1], # neutral
47
+ 1: proba[2], # positive
48
+ }
49
+
50
+ confidence_score = confidence_map.get(pred, 0.0)
51
+ confidence_text = f"Sentiment: {sentiment}\nConfidence: {confidence_score:.2%}"
52
+
53
+ intent = predict_intent(review_text)
54
+ topic = get_review_topic(review_text, vectorizer, nmf_model)
55
+
56
+ return confidence_text, intent, topic
57
+
58
+
59
+ # build a simple Gradio interface with one text input and multiple outputs.
60
+ iface = gr.Interface(
61
+ fn=analyze_review,
62
+ inputs=gr.Textbox(lines=4, placeholder="Enter a customer review...", label="Customer Review"),
63
+ outputs=[
64
+ gr.Textbox(label="Sentiment & Confidence Score"),
65
+ gr.Textbox(label="Predicted Intent"),
66
+ gr.Textbox(label="Topic / Keywords"),
67
+ ],
68
+ title="Interactive NLP Sentiment Analysis System",
69
+ description=(
70
+ "Enter a customer review to analyze its sentiment, predicted intent, "
71
+ "identified topics, and model confidence scores. This interactive demo "
72
+ "showcases how the NLP system works in a real-world setting."
73
+ ),
74
+ examples=[
75
+ ["I love this product! Amazing quality and fast delivery."],
76
+ ["The package arrived damaged and the refund process was slow."],
77
+ ["Average product, nothing special."],
78
+ ],
79
+ )
80
+
81
+ if __name__ == "__main__":
82
+ iface.launch()
intent.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def predict_intent(text):
2
+ text = text.lower()
3
+
4
+ # Refund Request
5
+ if any(word in text for word in ["refund", "return", "money back", "replace"]):
6
+ return "Refund Request"
7
+
8
+ # Delivery Issue
9
+ elif any(word in text for word in ["late", "delayed", "never arrived", "lost", "shipping"]):
10
+ return "Delivery Issue"
11
+
12
+ # Complaint
13
+ elif any(word in text for word in ["wrong", "damaged", "broken", "poor", "bad", "defective"]):
14
+ return "Complaint"
15
+
16
+ # Default
17
+ else:
18
+ return "General Query"
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio==4.41.1
2
+ scikit-learn==1.5.1
3
+ joblib==1.4.2
4
+ numpy==1.26.4
topic_model.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def get_review_topic(review_text, vectorizer, nmf_model, top_n_words=5):
2
+ # Transform the review into TF-IDF space
3
+ review_vect = vectorizer.transform([review_text])
4
+
5
+ # Get topic distribution
6
+ topic_probs = nmf_model.transform(review_vect) # shape (1, num_topics)
7
+ topic_index = topic_probs.argmax() # pick topic with highest score
8
+
9
+ # Get top words for that topic
10
+ words = vectorizer.get_feature_names_out()
11
+ topic_words = [words[j] for j in nmf_model.components_[topic_index].argsort()[-top_n_words:][::-1]]
12
+
13
+ return ", ".join(topic_words)