Spaces:

ESCP
/

C_2_SE21-SmartStudyPlanner3

Sleeping

App Files Files Community

C_2_SE21-SmartStudyPlanner3 / app.py

sashanouaille

Upload 8 files

41c9888 verified 30 days ago

raw

history blame contribute delete

14 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import matplotlib
	matplotlib.use("Agg")
	import seaborn as sns
	from pathlib import Path
	from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import accuracy_score
	import warnings
	warnings.filterwarnings("ignore")

	# ─── Load data ───────────────────────────────────────────────
	try:
	df = pd.read_csv("student_dataset_enriched.csv")
	df_reviews = pd.read_csv("synthetic_student_reviews.csv")
	DATA_LOADED = True
	except:
	DATA_LOADED = False

	analyzer = SentimentIntensityAnalyzer()

	# ─── Train RF model once at startup ──────────────────────────
	def train_model():
	if not DATA_LOADED:
	return None
	features = ["hours_studied", "attendance", "sleep_hours",
	"previous_scores", "motivation_level",
	"tutoring_sessions", "stress_score", "focus_level"]
	df["risk_label"] = (df["risk_level"] == "high").astype(int)
	X, y = df[features], df["risk_label"]
	X_train, X_test, y_train, y_test = train_test_split(
	X, y, test_size=0.2, random_state=2025)
	clf = RandomForestClassifier(n_estimators=100, random_state=2025)
	clf.fit(X_train, y_train)
	acc = accuracy_score(y_test, clf.predict(X_test))
	return clf, features, acc

	MODEL_DATA = train_model()

	# ══════════════════════════════════════════════════════════════
	# TAB 1 — Student Risk Analyser
	# ══════════════════════════════════════════════════════════════
	def analyse_student(hours, attendance, sleep, prev_score,
	motivation, tutoring, comment, days_exam, difficulty):
	stress = (10 - sleep) + (2 - motivation)
	focus = motivation * (sleep / 10)
	burnout = int(stress > 7 and hours < 15)

	# Risk level
	if prev_score < 60 or (stress > 7 and hours < 15):
	risk = "🔴 HIGH RISK"
	else:
	risk = "🟢 LOW RISK"

	# Priority
	urgency = 0
	if days_exam <= 1: urgency += 3
	elif days_exam <= 3: urgency += 2
	elif days_exam <= 7: urgency += 1
	if difficulty >= 4: urgency += 2
	elif difficulty >= 3: urgency += 1
	if stress > 8: urgency += 2
	elif stress > 5: urgency += 1
	if sleep < 5: urgency += 1

	if urgency >= 6: priority = "🚨 CRITICAL"
	elif urgency >= 4: priority = "🔶 HIGH"
	elif urgency >= 2: priority = "🔷 MEDIUM"
	else: priority = "🟩 LOW"

	# Sentiment
	score = analyzer.polarity_scores(comment)["compound"]
	if score >= 0.05: sentiment = "😊 Positive"
	elif score <= -0.05: sentiment = "😟 Negative"
	else: sentiment = "😐 Neutral"

	# Study tips
	tips = []
	if sleep < 6:
	tips.append("💤 Prioritise sleep — aim for at least 7h to improve focus and memory consolidation.")
	if hours < 15:
	tips.append("📚 Increase daily study time gradually — start with +1h per day using the Pomodoro technique.")
	if stress > 7:
	tips.append("🧘 Practice stress management: 5-min breathing exercises before each study session.")
	if burnout:
	tips.append("⚠️ Burnout risk detected — take short breaks every 45 min and avoid all-nighters.")
	if motivation < 1:
	tips.append("🎯 Set small daily goals and reward yourself when you achieve them.")
	if not tips:
	tips.append("✅ You're on track! Keep your current routine and review key concepts regularly.")

	result = f"""
	## 📊 Student Analysis Report

	\| Indicator \| Value \|
	\|-----------\|-------\|
	\| Stress Score \| {stress:.1f} / 12 \|
	\| Focus Level \| {focus:.2f} \|
	\| Burnout Risk \| {"⚠️ Yes" if burnout else "✅ No"} \|

	## 🎯 Risk Level: {risk}
	## ⏱️ Study Priority: {priority}
	## 💬 Comment Sentiment: {sentiment} (score: {score:.2f})

	## 💡 Study Tips:
	""" + "\n".join(f"- {t}" for t in tips)

	return result

	# ══════════════════════════════════════════════════════════════
	# TAB 2 — Data Visualisation
	# ══════════════════════════════════════════════════════════════
	def plot_chart(chart_type):
	if not DATA_LOADED:
	fig, ax = plt.subplots()
	ax.text(0.5, 0.5, "Data not loaded", ha="center")
	return fig

	fig, ax = plt.subplots(figsize=(9, 5))
	sns.set_theme(style="whitegrid")

	if chart_type == "Study Hours vs Final Score":
	sc = ax.scatter(df["hours_studied"], df["final_exam_score"],
	c=df["stress_score"], cmap="RdYlGn_r", alpha=0.6)
	plt.colorbar(sc, ax=ax, label="Stress Score")
	ax.set_xlabel("Hours Studied")
	ax.set_ylabel("Final Exam Score")
	ax.set_title("Study Hours vs Final Score")

	elif chart_type == "Stress Score by Risk Level":
	df.boxplot(column="stress_score", by="risk_level", ax=ax,
	patch_artist=True,
	boxprops=dict(facecolor="steelblue", color="navy"),
	medianprops=dict(color="white", linewidth=2))
	ax.set_title("Stress Score by Risk Level")
	plt.suptitle("")

	elif chart_type == "Correlation Heatmap":
	cols = ["hours_studied", "attendance", "sleep_hours",
	"previous_scores", "stress_score", "focus_level", "final_exam_score"]
	corr = df[cols].corr()
	sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", ax=ax)
	ax.set_title("Correlation Heatmap")

	elif chart_type == "Sentiment Distribution":
	if DATA_LOADED and "vader_label" in df_reviews.columns:
	counts = df_reviews["vader_label"].value_counts()
	else:
	df_reviews["vader_label"] = df_reviews["student_comment"].apply(
	lambda x: "positive" if analyzer.polarity_scores(str(x))["compound"] >= 0.05
	else ("negative" if analyzer.polarity_scores(str(x))["compound"] <= -0.05 else "neutral"))
	counts = df_reviews["vader_label"].value_counts()
	colours = [{"positive": "mediumseagreen", "neutral": "gold",
	"negative": "tomato"}.get(l, "gray") for l in counts.index]
	counts.plot(kind="bar", ax=ax, color=colours, edgecolor="white")
	ax.set_title("Sentiment Distribution of Student Comments")
	ax.tick_params(axis="x", rotation=0)

	elif chart_type == "Feature Importance (Random Forest)":
	if MODEL_DATA:
	clf, features, acc = MODEL_DATA
	imp = pd.Series(clf.feature_importances_, index=features).sort_values()
	imp.plot(kind="barh", ax=ax, color="steelblue")
	ax.set_title(f"Random Forest Feature Importance (Accuracy: {acc:.0%})")

	plt.tight_layout()
	return fig

	# ══════════════════════════════════════════════════════════════
	# TAB 3 — Sentiment Analyser
	# ══════════════════════════════════════════════════════════════
	def analyse_sentiment(text):
	if not text.strip():
	return "Please enter a comment."
	scores = analyzer.polarity_scores(text)
	compound = scores["compound"]
	if compound >= 0.05: label = "😊 Positive"
	elif compound <= -0.05: label = "😟 Negative"
	else: label = "😐 Neutral"
	return f"""
	Sentiment: {label}
	Compound score: {compound:.3f}
	Positive: {scores['pos']:.3f} \| Neutral: {scores['neu']:.3f} \| Negative: {scores['neg']:.3f}

	Interpretation: {'This student appears confident and motivated.' if compound >= 0.05 else 'This student may be struggling — consider flagging for support.' if compound <= -0.05 else 'This student has a neutral outlook — monitor progress.'}
	"""

	# ══════════════════════════════════════════════════════════════
	# TAB 4 — Dataset Explorer
	# ══════════════════════════════════════════════════════════════
	def explore_data():
	if not DATA_LOADED:
	return "Dataset not loaded.", ""
	summary = df.describe().round(2).to_markdown()
	sample = df.head(10).to_markdown()
	return f"### Summary Statistics\n{summary}", f"### First 10 rows\n{sample}"

	# ══════════════════════════════════════════════════════════════
	# BUILD THE APP
	# ══════════════════════════════════════════════════════════════
	with gr.Blocks(title="Smart Study Planner", theme=gr.themes.Soft()) as demo:

	gr.Markdown("""
	# 🎓 Smart Study Planner
	AI for Big Data Management — Group C2
	Rosamaria Guadalupi · Sasha Nouaille · Alexia Beraud Valero · Aurora Vimercati · Luna Mariah Gallina
	---
	""")

	with gr.Tabs():

	# ── TAB 1 ─────────────────────────────────────────────
	with gr.TabItem("🔍 Student Analyser"):
	gr.Markdown("### Enter a student profile to get a risk assessment and study recommendations.")
	with gr.Row():
	with gr.Column():
	hours = gr.Slider(0, 40, value=20, label="Hours Studied per Week")
	attendance = gr.Slider(0, 100, value=80, label="Attendance (%)")
	sleep = gr.Slider(3, 12, value=7, step=0.5, label="Sleep Hours per Night")
	prev_score = gr.Slider(0, 100, value=65, label="Previous Scores")
	motivation = gr.Slider(0, 2, value=1, step=1,
	label="Motivation Level (0=Low, 1=Medium, 2=High)")
	tutoring = gr.Slider(0, 5, value=1, step=1, label="Tutoring Sessions per Week")
	with gr.Column():
	comment = gr.Textbox(label="Student Comment",
	placeholder="e.g. I feel overwhelmed this week...",
	lines=3)
	days_exam = gr.Slider(1, 30, value=5, step=1, label="Days Until Exam")
	difficulty = gr.Slider(1, 5, value=3, step=1, label="Exam Difficulty (1-5)")
	btn1 = gr.Button("🔍 Analyse Student", variant="primary")

	output1 = gr.Markdown()
	btn1.click(analyse_student,
	inputs=[hours, attendance, sleep, prev_score,
	motivation, tutoring, comment, days_exam, difficulty],
	outputs=output1)

	# ── TAB 2 ─────────────────────────────────────────────
	with gr.TabItem("📊 Data Visualisation"):
	gr.Markdown("### Explore the student dataset through interactive charts.")
	chart_choice = gr.Dropdown(
	choices=["Study Hours vs Final Score",
	"Stress Score by Risk Level",
	"Correlation Heatmap",
	"Sentiment Distribution",
	"Feature Importance (Random Forest)"],
	value="Study Hours vs Final Score",
	label="Select Chart")
	btn2 = gr.Button("📊 Generate Chart", variant="primary")
	plot = gr.Plot()
	btn2.click(plot_chart, inputs=chart_choice, outputs=plot)

	# ── TAB 3 ─────────────────────────────────────────────
	with gr.TabItem("💬 Sentiment Analyser"):
	gr.Markdown("### Analyse the sentiment of any student comment using VADER.")
	comment_input = gr.Textbox(label="Enter a student comment",
	placeholder="e.g. I can't focus and I'm exhausted...",
	lines=4)
	btn3 = gr.Button("💬 Analyse Sentiment", variant="primary")
	output3 = gr.Markdown()
	btn3.click(analyse_sentiment, inputs=comment_input, outputs=output3)

	# ── TAB 4 ─────────────────────────────────────────────
	with gr.TabItem("📂 Dataset Explorer"):
	gr.Markdown("### Explore the enriched student dataset.")
	btn4 = gr.Button("📂 Load Dataset", variant="primary")
	stats = gr.Markdown()
	sample = gr.Markdown()
	btn4.click(explore_data, outputs=[stats, sample])

	gr.Markdown("""
	---
	Smart Study Planner · AI for Big Data Management · ESCP Business School
	""")

	demo.launch()