File size: 3,311 Bytes
1d5af67
 
 
 
 
 
b60b60e
 
1d5af67
b60b60e
1d5af67
b60b60e
1d5af67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b60b60e
 
 
 
1d5af67
b60b60e
 
 
1d5af67
 
b60b60e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d690f5
b60b60e
 
 
 
1d5af67
b60b60e
 
 
1d5af67
 
 
 
 
e2e63da
1d5af67
 
 
e2e63da
1d5af67
 
 
 
0d2216c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import gradio as gr
import pandas as pd
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
import joblib
import os
import traceback

# ===============================
# Load assets
# ===============================
print("🔄 Loading data and models...")
df = pd.read_csv("clean_feedback.csv")
print("✅ CSV loaded with columns:", df.columns.tolist())

embeddings = np.load("embeddings.npy")
print("✅ Embeddings loaded with shape:", embeddings.shape)

index = faiss.read_index("feedback.index")
print("✅ FAISS index loaded")

clf = joblib.load("feedback_model.pkl")
print("✅ Sentiment model loaded")

model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2", device="cpu")
print("✅ SentenceTransformer ready")

# File to store user submissions
USER_FEEDBACK_FILE = "user_feedback.csv"
if not os.path.exists(USER_FEEDBACK_FILE):
    pd.DataFrame(columns=["Sentence", "Predicted_Sentiment", "Confidence"]).to_csv(USER_FEEDBACK_FILE, index=False)

# ===============================
# Core classification function
# ===============================
def classify_feedback(text, top_k=5):
    try:
        if not text.strip():
            return "⚠️ Please enter a feedback text.", pd.read_csv(USER_FEEDBACK_FILE)

        # Embed query
        query_emb = model.encode([text])

        # Retrieve similar sentences
        distances, indices = index.search(query_emb, top_k)
        retrieved = df.iloc[indices[0]]

        # Predict sentiment & probability
        probs_all = clf.predict_proba(query_emb)[0]
        sentiment = clf.classes_[np.argmax(probs_all)]
        confidence = np.max(probs_all)

        examples = "\n".join(
            [f"{i+1}. {s}" for i, s in enumerate(retrieved['Sentence'].tolist())]
        )

        # Save user submission to shared file
        new_row = pd.DataFrame(
            [{"Sentence": text, "Predicted_Sentiment": sentiment, "Confidence": round(confidence, 2)}]
        )
        existing = pd.read_csv(USER_FEEDBACK_FILE)
        updated = pd.concat([existing, new_row], ignore_index=True)
        updated.to_csv(USER_FEEDBACK_FILE, index=False)

        print(f"✅ Prediction: {sentiment} ({confidence:.2f})")

        # Return both text output + table
        explanation = (
            f"**Predicted Sentiment:** {sentiment}\n"
            f"**Confidence:** {confidence:.2f}\n\n"
            f"**Similar Feedbacks:**\n{examples}"
        )
        return explanation, updated

    except Exception as e:
        tb = traceback.format_exc()
        print("❌ Error:", tb)
        return f"❌ Error occurred:\n```\n{tb}\n```", pd.read_csv(USER_FEEDBACK_FILE)

# ===============================
# Gradio Interface
# ===============================
demo = gr.Interface(
    fn=classify_feedback,
    inputs=[gr.Textbox(label="Enter Student Feedback")],
    outputs=[
        gr.Markdown(label="Prediction & Explanation"),
        gr.Dataframe(headers=["Sentence: ", "Predicted_Sentiment: ", "Confidence: "], label="🗂️ All User Feedback")
    ],
    title="🎓 Student Feedback RAG System",
    description=(
        "Classifies Urdu/Roman Urdu and English student feedback with context and reasoning.<br>"
        "All submissions are saved and visible to everyone below 👇"
    ),
)

demo.launch()