Spaces:

tahamueed23
/

RAG_Based_Sentiment_Analysis

Sleeping

File size: 3,311 Bytes

1d5af67
 
 
 
 
 
b60b60e
 
1d5af67
b60b60e
1d5af67
b60b60e
1d5af67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b60b60e
 
 
 
1d5af67
b60b60e
 
 
1d5af67
 
b60b60e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d690f5
b60b60e
 
 
 
1d5af67
b60b60e
 
 
1d5af67
 
 
 
 
e2e63da
1d5af67
 
 
e2e63da
1d5af67
 
 
 
0d2216c

import gradio as gr
import pandas as pd
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
import joblib
import os
import traceback

# ===============================
# Load assets
# ===============================
print("🔄 Loading data and models...")
df = pd.read_csv("clean_feedback.csv")
print("✅ CSV loaded with columns:", df.columns.tolist())

embeddings = np.load("embeddings.npy")
print("✅ Embeddings loaded with shape:", embeddings.shape)

index = faiss.read_index("feedback.index")
print("✅ FAISS index loaded")

clf = joblib.load("feedback_model.pkl")
print("✅ Sentiment model loaded")

model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2", device="cpu")
print("✅ SentenceTransformer ready")

# File to store user submissions
USER_FEEDBACK_FILE = "user_feedback.csv"
if not os.path.exists(USER_FEEDBACK_FILE):
    pd.DataFrame(columns=["Sentence", "Predicted_Sentiment", "Confidence"]).to_csv(USER_FEEDBACK_FILE, index=False)

# ===============================
# Core classification function
# ===============================
def classify_feedback(text, top_k=5):
    try:
        if not text.strip():
            return "⚠️ Please enter a feedback text.", pd.read_csv(USER_FEEDBACK_FILE)

        # Embed query
        query_emb = model.encode([text])

        # Retrieve similar sentences
        distances, indices = index.search(query_emb, top_k)
        retrieved = df.iloc[indices[0]]

        # Predict sentiment & probability
        probs_all = clf.predict_proba(query_emb)[0]
        sentiment = clf.classes_[np.argmax(probs_all)]
        confidence = np.max(probs_all)

        examples = "\n".join(
            [f"{i+1}. {s}" for i, s in enumerate(retrieved['Sentence'].tolist())]
        )

        # Save user submission to shared file
        new_row = pd.DataFrame(
            [{"Sentence": text, "Predicted_Sentiment": sentiment, "Confidence": round(confidence, 2)}]
        )
        existing = pd.read_csv(USER_FEEDBACK_FILE)
        updated = pd.concat([existing, new_row], ignore_index=True)
        updated.to_csv(USER_FEEDBACK_FILE, index=False)

        print(f"✅ Prediction: {sentiment} ({confidence:.2f})")

        # Return both text output + table
        explanation = (
            f"**Predicted Sentiment:** {sentiment}\n"
            f"**Confidence:** {confidence:.2f}\n\n"
            f"**Similar Feedbacks:**\n{examples}"
        )
        return explanation, updated

    except Exception as e:
        tb = traceback.format_exc()
        print("❌ Error:", tb)
        return f"❌ Error occurred:\n```\n{tb}\n```", pd.read_csv(USER_FEEDBACK_FILE)

# ===============================
# Gradio Interface
# ===============================
demo = gr.Interface(
    fn=classify_feedback,
    inputs=[gr.Textbox(label="Enter Student Feedback")],
    outputs=[
        gr.Markdown(label="Prediction & Explanation"),
        gr.Dataframe(headers=["Sentence: ", "Predicted_Sentiment: ", "Confidence: "], label="🗂️ All User Feedback")
    ],
    title="🎓 Student Feedback RAG System",
    description=(
        "Classifies Urdu/Roman Urdu and English student feedback with context and reasoning.<br>"
        "All submissions are saved and visible to everyone below 👇"
    ),
)

demo.launch()