Spaces:

tahamueed23
/

RAG_Based_Sentiment_Analysis

Sleeping

App Files Files Community

RAG_Based_Sentiment_Analysis / app.py

tahamueed23

Update app.py

0d2216c verified 3 months ago

raw

history blame contribute delete

3.31 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	import faiss
	from sentence_transformers import SentenceTransformer
	import joblib
	import os
	import traceback

	# ===============================
	# Load assets
	# ===============================
	print("🔄 Loading data and models...")
	df = pd.read_csv("clean_feedback.csv")
	print("✅ CSV loaded with columns:", df.columns.tolist())

	embeddings = np.load("embeddings.npy")
	print("✅ Embeddings loaded with shape:", embeddings.shape)

	index = faiss.read_index("feedback.index")
	print("✅ FAISS index loaded")

	clf = joblib.load("feedback_model.pkl")
	print("✅ Sentiment model loaded")

	model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2", device="cpu")
	print("✅ SentenceTransformer ready")

	# File to store user submissions
	USER_FEEDBACK_FILE = "user_feedback.csv"
	if not os.path.exists(USER_FEEDBACK_FILE):
	pd.DataFrame(columns=["Sentence", "Predicted_Sentiment", "Confidence"]).to_csv(USER_FEEDBACK_FILE, index=False)

	# ===============================
	# Core classification function
	# ===============================
	def classify_feedback(text, top_k=5):
	try:
	if not text.strip():
	return "⚠️ Please enter a feedback text.", pd.read_csv(USER_FEEDBACK_FILE)

	# Embed query
	query_emb = model.encode([text])

	# Retrieve similar sentences
	distances, indices = index.search(query_emb, top_k)
	retrieved = df.iloc[indices[0]]

	# Predict sentiment & probability
	probs_all = clf.predict_proba(query_emb)[0]
	sentiment = clf.classes_[np.argmax(probs_all)]
	confidence = np.max(probs_all)

	examples = "\n".join(
	[f"{i+1}. {s}" for i, s in enumerate(retrieved['Sentence'].tolist())]
	)

	# Save user submission to shared file
	new_row = pd.DataFrame(
	[{"Sentence": text, "Predicted_Sentiment": sentiment, "Confidence": round(confidence, 2)}]
	)
	existing = pd.read_csv(USER_FEEDBACK_FILE)
	updated = pd.concat([existing, new_row], ignore_index=True)
	updated.to_csv(USER_FEEDBACK_FILE, index=False)

	print(f"✅ Prediction: {sentiment} ({confidence:.2f})")

	# Return both text output + table
	explanation = (
	f"Predicted Sentiment: {sentiment}\n"
	f"Confidence: {confidence:.2f}\n\n"
	f"Similar Feedbacks:\n{examples}"
	)
	return explanation, updated

	except Exception as e:
	tb = traceback.format_exc()
	print("❌ Error:", tb)
	return f"❌ Error occurred:\n```\n{tb}\n```", pd.read_csv(USER_FEEDBACK_FILE)

	# ===============================
	# Gradio Interface
	# ===============================
	demo = gr.Interface(
	fn=classify_feedback,
	inputs=[gr.Textbox(label="Enter Student Feedback")],
	outputs=[
	gr.Markdown(label="Prediction & Explanation"),
	gr.Dataframe(headers=["Sentence: ", "Predicted_Sentiment: ", "Confidence: "], label="🗂️ All User Feedback")
	],
	title="🎓 Student Feedback RAG System",
	description=(
	"Classifies Urdu/Roman Urdu and English student feedback with context and reasoning.<br>"
	"All submissions are saved and visible to everyone below 👇"
	),
	)

	demo.launch()