sashanouaille's picture
Upload 8 files
41c9888 verified
import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use("Agg")
import seaborn as sns
from pathlib import Path
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings("ignore")
# ─── Load data ───────────────────────────────────────────────
try:
df = pd.read_csv("student_dataset_enriched.csv")
df_reviews = pd.read_csv("synthetic_student_reviews.csv")
DATA_LOADED = True
except:
DATA_LOADED = False
analyzer = SentimentIntensityAnalyzer()
# ─── Train RF model once at startup ──────────────────────────
def train_model():
if not DATA_LOADED:
return None
features = ["hours_studied", "attendance", "sleep_hours",
"previous_scores", "motivation_level",
"tutoring_sessions", "stress_score", "focus_level"]
df["risk_label"] = (df["risk_level"] == "high").astype(int)
X, y = df[features], df["risk_label"]
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=2025)
clf = RandomForestClassifier(n_estimators=100, random_state=2025)
clf.fit(X_train, y_train)
acc = accuracy_score(y_test, clf.predict(X_test))
return clf, features, acc
MODEL_DATA = train_model()
# ══════════════════════════════════════════════════════════════
# TAB 1 β€” Student Risk Analyser
# ══════════════════════════════════════════════════════════════
def analyse_student(hours, attendance, sleep, prev_score,
motivation, tutoring, comment, days_exam, difficulty):
stress = (10 - sleep) + (2 - motivation)
focus = motivation * (sleep / 10)
burnout = int(stress > 7 and hours < 15)
# Risk level
if prev_score < 60 or (stress > 7 and hours < 15):
risk = "πŸ”΄ HIGH RISK"
else:
risk = "🟒 LOW RISK"
# Priority
urgency = 0
if days_exam <= 1: urgency += 3
elif days_exam <= 3: urgency += 2
elif days_exam <= 7: urgency += 1
if difficulty >= 4: urgency += 2
elif difficulty >= 3: urgency += 1
if stress > 8: urgency += 2
elif stress > 5: urgency += 1
if sleep < 5: urgency += 1
if urgency >= 6: priority = "🚨 CRITICAL"
elif urgency >= 4: priority = "πŸ”Ά HIGH"
elif urgency >= 2: priority = "πŸ”· MEDIUM"
else: priority = "🟩 LOW"
# Sentiment
score = analyzer.polarity_scores(comment)["compound"]
if score >= 0.05: sentiment = "😊 Positive"
elif score <= -0.05: sentiment = "😟 Negative"
else: sentiment = "😐 Neutral"
# Study tips
tips = []
if sleep < 6:
tips.append("πŸ’€ Prioritise sleep β€” aim for at least 7h to improve focus and memory consolidation.")
if hours < 15:
tips.append("πŸ“š Increase daily study time gradually β€” start with +1h per day using the Pomodoro technique.")
if stress > 7:
tips.append("🧘 Practice stress management: 5-min breathing exercises before each study session.")
if burnout:
tips.append("⚠️ Burnout risk detected β€” take short breaks every 45 min and avoid all-nighters.")
if motivation < 1:
tips.append("🎯 Set small daily goals and reward yourself when you achieve them.")
if not tips:
tips.append("βœ… You're on track! Keep your current routine and review key concepts regularly.")
result = f"""
## πŸ“Š Student Analysis Report
| Indicator | Value |
|-----------|-------|
| Stress Score | {stress:.1f} / 12 |
| Focus Level | {focus:.2f} |
| Burnout Risk | {"⚠️ Yes" if burnout else "βœ… No"} |
## 🎯 Risk Level: {risk}
## ⏱️ Study Priority: {priority}
## πŸ’¬ Comment Sentiment: {sentiment} (score: {score:.2f})
## πŸ’‘ Study Tips:
""" + "\n".join(f"- {t}" for t in tips)
return result
# ══════════════════════════════════════════════════════════════
# TAB 2 β€” Data Visualisation
# ══════════════════════════════════════════════════════════════
def plot_chart(chart_type):
if not DATA_LOADED:
fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Data not loaded", ha="center")
return fig
fig, ax = plt.subplots(figsize=(9, 5))
sns.set_theme(style="whitegrid")
if chart_type == "Study Hours vs Final Score":
sc = ax.scatter(df["hours_studied"], df["final_exam_score"],
c=df["stress_score"], cmap="RdYlGn_r", alpha=0.6)
plt.colorbar(sc, ax=ax, label="Stress Score")
ax.set_xlabel("Hours Studied")
ax.set_ylabel("Final Exam Score")
ax.set_title("Study Hours vs Final Score")
elif chart_type == "Stress Score by Risk Level":
df.boxplot(column="stress_score", by="risk_level", ax=ax,
patch_artist=True,
boxprops=dict(facecolor="steelblue", color="navy"),
medianprops=dict(color="white", linewidth=2))
ax.set_title("Stress Score by Risk Level")
plt.suptitle("")
elif chart_type == "Correlation Heatmap":
cols = ["hours_studied", "attendance", "sleep_hours",
"previous_scores", "stress_score", "focus_level", "final_exam_score"]
corr = df[cols].corr()
sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", ax=ax)
ax.set_title("Correlation Heatmap")
elif chart_type == "Sentiment Distribution":
if DATA_LOADED and "vader_label" in df_reviews.columns:
counts = df_reviews["vader_label"].value_counts()
else:
df_reviews["vader_label"] = df_reviews["student_comment"].apply(
lambda x: "positive" if analyzer.polarity_scores(str(x))["compound"] >= 0.05
else ("negative" if analyzer.polarity_scores(str(x))["compound"] <= -0.05 else "neutral"))
counts = df_reviews["vader_label"].value_counts()
colours = [{"positive": "mediumseagreen", "neutral": "gold",
"negative": "tomato"}.get(l, "gray") for l in counts.index]
counts.plot(kind="bar", ax=ax, color=colours, edgecolor="white")
ax.set_title("Sentiment Distribution of Student Comments")
ax.tick_params(axis="x", rotation=0)
elif chart_type == "Feature Importance (Random Forest)":
if MODEL_DATA:
clf, features, acc = MODEL_DATA
imp = pd.Series(clf.feature_importances_, index=features).sort_values()
imp.plot(kind="barh", ax=ax, color="steelblue")
ax.set_title(f"Random Forest Feature Importance (Accuracy: {acc:.0%})")
plt.tight_layout()
return fig
# ══════════════════════════════════════════════════════════════
# TAB 3 β€” Sentiment Analyser
# ══════════════════════════════════════════════════════════════
def analyse_sentiment(text):
if not text.strip():
return "Please enter a comment."
scores = analyzer.polarity_scores(text)
compound = scores["compound"]
if compound >= 0.05: label = "😊 Positive"
elif compound <= -0.05: label = "😟 Negative"
else: label = "😐 Neutral"
return f"""
**Sentiment:** {label}
**Compound score:** {compound:.3f}
**Positive:** {scores['pos']:.3f} | **Neutral:** {scores['neu']:.3f} | **Negative:** {scores['neg']:.3f}
*Interpretation:* {'This student appears confident and motivated.' if compound >= 0.05 else 'This student may be struggling β€” consider flagging for support.' if compound <= -0.05 else 'This student has a neutral outlook β€” monitor progress.'}
"""
# ══════════════════════════════════════════════════════════════
# TAB 4 β€” Dataset Explorer
# ══════════════════════════════════════════════════════════════
def explore_data():
if not DATA_LOADED:
return "Dataset not loaded.", ""
summary = df.describe().round(2).to_markdown()
sample = df.head(10).to_markdown()
return f"### Summary Statistics\n{summary}", f"### First 10 rows\n{sample}"
# ══════════════════════════════════════════════════════════════
# BUILD THE APP
# ══════════════════════════════════════════════════════════════
with gr.Blocks(title="Smart Study Planner", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# πŸŽ“ Smart Study Planner
**AI for Big Data Management β€” Group C2**
*Rosamaria Guadalupi Β· Sasha Nouaille Β· Alexia Beraud Valero Β· Aurora Vimercati Β· Luna Mariah Gallina*
---
""")
with gr.Tabs():
# ── TAB 1 ─────────────────────────────────────────────
with gr.TabItem("πŸ” Student Analyser"):
gr.Markdown("### Enter a student profile to get a risk assessment and study recommendations.")
with gr.Row():
with gr.Column():
hours = gr.Slider(0, 40, value=20, label="Hours Studied per Week")
attendance = gr.Slider(0, 100, value=80, label="Attendance (%)")
sleep = gr.Slider(3, 12, value=7, step=0.5, label="Sleep Hours per Night")
prev_score = gr.Slider(0, 100, value=65, label="Previous Scores")
motivation = gr.Slider(0, 2, value=1, step=1,
label="Motivation Level (0=Low, 1=Medium, 2=High)")
tutoring = gr.Slider(0, 5, value=1, step=1, label="Tutoring Sessions per Week")
with gr.Column():
comment = gr.Textbox(label="Student Comment",
placeholder="e.g. I feel overwhelmed this week...",
lines=3)
days_exam = gr.Slider(1, 30, value=5, step=1, label="Days Until Exam")
difficulty = gr.Slider(1, 5, value=3, step=1, label="Exam Difficulty (1-5)")
btn1 = gr.Button("πŸ” Analyse Student", variant="primary")
output1 = gr.Markdown()
btn1.click(analyse_student,
inputs=[hours, attendance, sleep, prev_score,
motivation, tutoring, comment, days_exam, difficulty],
outputs=output1)
# ── TAB 2 ─────────────────────────────────────────────
with gr.TabItem("πŸ“Š Data Visualisation"):
gr.Markdown("### Explore the student dataset through interactive charts.")
chart_choice = gr.Dropdown(
choices=["Study Hours vs Final Score",
"Stress Score by Risk Level",
"Correlation Heatmap",
"Sentiment Distribution",
"Feature Importance (Random Forest)"],
value="Study Hours vs Final Score",
label="Select Chart")
btn2 = gr.Button("πŸ“Š Generate Chart", variant="primary")
plot = gr.Plot()
btn2.click(plot_chart, inputs=chart_choice, outputs=plot)
# ── TAB 3 ─────────────────────────────────────────────
with gr.TabItem("πŸ’¬ Sentiment Analyser"):
gr.Markdown("### Analyse the sentiment of any student comment using VADER.")
comment_input = gr.Textbox(label="Enter a student comment",
placeholder="e.g. I can't focus and I'm exhausted...",
lines=4)
btn3 = gr.Button("πŸ’¬ Analyse Sentiment", variant="primary")
output3 = gr.Markdown()
btn3.click(analyse_sentiment, inputs=comment_input, outputs=output3)
# ── TAB 4 ─────────────────────────────────────────────
with gr.TabItem("πŸ“‚ Dataset Explorer"):
gr.Markdown("### Explore the enriched student dataset.")
btn4 = gr.Button("πŸ“‚ Load Dataset", variant="primary")
stats = gr.Markdown()
sample = gr.Markdown()
btn4.click(explore_data, outputs=[stats, sample])
gr.Markdown("""
---
*Smart Study Planner Β· AI for Big Data Management Β· ESCP Business School*
""")
demo.launch()