import gradio as gr import pandas as pd import numpy as np import matplotlib.pyplot as plt import matplotlib matplotlib.use("Agg") import seaborn as sns from pathlib import Path from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score import warnings warnings.filterwarnings("ignore") # ─── Load data ─────────────────────────────────────────────── try: df = pd.read_csv("student_dataset_enriched.csv") df_reviews = pd.read_csv("synthetic_student_reviews.csv") DATA_LOADED = True except: DATA_LOADED = False analyzer = SentimentIntensityAnalyzer() # ─── Train RF model once at startup ────────────────────────── def train_model(): if not DATA_LOADED: return None features = ["hours_studied", "attendance", "sleep_hours", "previous_scores", "motivation_level", "tutoring_sessions", "stress_score", "focus_level"] df["risk_label"] = (df["risk_level"] == "high").astype(int) X, y = df[features], df["risk_label"] X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=2025) clf = RandomForestClassifier(n_estimators=100, random_state=2025) clf.fit(X_train, y_train) acc = accuracy_score(y_test, clf.predict(X_test)) return clf, features, acc MODEL_DATA = train_model() # ══════════════════════════════════════════════════════════════ # TAB 1 — Student Risk Analyser # ══════════════════════════════════════════════════════════════ def analyse_student(hours, attendance, sleep, prev_score, motivation, tutoring, comment, days_exam, difficulty): stress = (10 - sleep) + (2 - motivation) focus = motivation * (sleep / 10) burnout = int(stress > 7 and hours < 15) # Risk level if prev_score < 60 or (stress > 7 and hours < 15): risk = "🔴 HIGH RISK" else: risk = "🟢 LOW RISK" # Priority urgency = 0 if days_exam <= 1: urgency += 3 elif days_exam <= 3: urgency += 2 elif days_exam <= 7: urgency += 1 if difficulty >= 4: urgency += 2 elif difficulty >= 3: urgency += 1 if stress > 8: urgency += 2 elif stress > 5: urgency += 1 if sleep < 5: urgency += 1 if urgency >= 6: priority = "🚨 CRITICAL" elif urgency >= 4: priority = "🔶 HIGH" elif urgency >= 2: priority = "🔷 MEDIUM" else: priority = "🟩 LOW" # Sentiment score = analyzer.polarity_scores(comment)["compound"] if score >= 0.05: sentiment = "😊 Positive" elif score <= -0.05: sentiment = "😟 Negative" else: sentiment = "😐 Neutral" # Study tips tips = [] if sleep < 6: tips.append("💤 Prioritise sleep — aim for at least 7h to improve focus and memory consolidation.") if hours < 15: tips.append("📚 Increase daily study time gradually — start with +1h per day using the Pomodoro technique.") if stress > 7: tips.append("🧘 Practice stress management: 5-min breathing exercises before each study session.") if burnout: tips.append("⚠️ Burnout risk detected — take short breaks every 45 min and avoid all-nighters.") if motivation < 1: tips.append("🎯 Set small daily goals and reward yourself when you achieve them.") if not tips: tips.append("✅ You're on track! Keep your current routine and review key concepts regularly.") result = f""" ## 📊 Student Analysis Report | Indicator | Value | |-----------|-------| | Stress Score | {stress:.1f} / 12 | | Focus Level | {focus:.2f} | | Burnout Risk | {"⚠️ Yes" if burnout else "✅ No"} | ## 🎯 Risk Level: {risk} ## ⏱️ Study Priority: {priority} ## 💬 Comment Sentiment: {sentiment} (score: {score:.2f}) ## 💡 Study Tips: """ + "\n".join(f"- {t}" for t in tips) return result # ══════════════════════════════════════════════════════════════ # TAB 2 — Data Visualisation # ══════════════════════════════════════════════════════════════ def plot_chart(chart_type): if not DATA_LOADED: fig, ax = plt.subplots() ax.text(0.5, 0.5, "Data not loaded", ha="center") return fig fig, ax = plt.subplots(figsize=(9, 5)) sns.set_theme(style="whitegrid") if chart_type == "Study Hours vs Final Score": sc = ax.scatter(df["hours_studied"], df["final_exam_score"], c=df["stress_score"], cmap="RdYlGn_r", alpha=0.6) plt.colorbar(sc, ax=ax, label="Stress Score") ax.set_xlabel("Hours Studied") ax.set_ylabel("Final Exam Score") ax.set_title("Study Hours vs Final Score") elif chart_type == "Stress Score by Risk Level": df.boxplot(column="stress_score", by="risk_level", ax=ax, patch_artist=True, boxprops=dict(facecolor="steelblue", color="navy"), medianprops=dict(color="white", linewidth=2)) ax.set_title("Stress Score by Risk Level") plt.suptitle("") elif chart_type == "Correlation Heatmap": cols = ["hours_studied", "attendance", "sleep_hours", "previous_scores", "stress_score", "focus_level", "final_exam_score"] corr = df[cols].corr() sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", ax=ax) ax.set_title("Correlation Heatmap") elif chart_type == "Sentiment Distribution": if DATA_LOADED and "vader_label" in df_reviews.columns: counts = df_reviews["vader_label"].value_counts() else: df_reviews["vader_label"] = df_reviews["student_comment"].apply( lambda x: "positive" if analyzer.polarity_scores(str(x))["compound"] >= 0.05 else ("negative" if analyzer.polarity_scores(str(x))["compound"] <= -0.05 else "neutral")) counts = df_reviews["vader_label"].value_counts() colours = [{"positive": "mediumseagreen", "neutral": "gold", "negative": "tomato"}.get(l, "gray") for l in counts.index] counts.plot(kind="bar", ax=ax, color=colours, edgecolor="white") ax.set_title("Sentiment Distribution of Student Comments") ax.tick_params(axis="x", rotation=0) elif chart_type == "Feature Importance (Random Forest)": if MODEL_DATA: clf, features, acc = MODEL_DATA imp = pd.Series(clf.feature_importances_, index=features).sort_values() imp.plot(kind="barh", ax=ax, color="steelblue") ax.set_title(f"Random Forest Feature Importance (Accuracy: {acc:.0%})") plt.tight_layout() return fig # ══════════════════════════════════════════════════════════════ # TAB 3 — Sentiment Analyser # ══════════════════════════════════════════════════════════════ def analyse_sentiment(text): if not text.strip(): return "Please enter a comment." scores = analyzer.polarity_scores(text) compound = scores["compound"] if compound >= 0.05: label = "😊 Positive" elif compound <= -0.05: label = "😟 Negative" else: label = "😐 Neutral" return f""" **Sentiment:** {label} **Compound score:** {compound:.3f} **Positive:** {scores['pos']:.3f} | **Neutral:** {scores['neu']:.3f} | **Negative:** {scores['neg']:.3f} *Interpretation:* {'This student appears confident and motivated.' if compound >= 0.05 else 'This student may be struggling — consider flagging for support.' if compound <= -0.05 else 'This student has a neutral outlook — monitor progress.'} """ # ══════════════════════════════════════════════════════════════ # TAB 4 — Dataset Explorer # ══════════════════════════════════════════════════════════════ def explore_data(): if not DATA_LOADED: return "Dataset not loaded.", "" summary = df.describe().round(2).to_markdown() sample = df.head(10).to_markdown() return f"### Summary Statistics\n{summary}", f"### First 10 rows\n{sample}" # ══════════════════════════════════════════════════════════════ # BUILD THE APP # ══════════════════════════════════════════════════════════════ with gr.Blocks(title="Smart Study Planner", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # 🎓 Smart Study Planner **AI for Big Data Management — Group C2** *Rosamaria Guadalupi · Sasha Nouaille · Alexia Beraud Valero · Aurora Vimercati · Luna Mariah Gallina* --- """) with gr.Tabs(): # ── TAB 1 ───────────────────────────────────────────── with gr.TabItem("🔍 Student Analyser"): gr.Markdown("### Enter a student profile to get a risk assessment and study recommendations.") with gr.Row(): with gr.Column(): hours = gr.Slider(0, 40, value=20, label="Hours Studied per Week") attendance = gr.Slider(0, 100, value=80, label="Attendance (%)") sleep = gr.Slider(3, 12, value=7, step=0.5, label="Sleep Hours per Night") prev_score = gr.Slider(0, 100, value=65, label="Previous Scores") motivation = gr.Slider(0, 2, value=1, step=1, label="Motivation Level (0=Low, 1=Medium, 2=High)") tutoring = gr.Slider(0, 5, value=1, step=1, label="Tutoring Sessions per Week") with gr.Column(): comment = gr.Textbox(label="Student Comment", placeholder="e.g. I feel overwhelmed this week...", lines=3) days_exam = gr.Slider(1, 30, value=5, step=1, label="Days Until Exam") difficulty = gr.Slider(1, 5, value=3, step=1, label="Exam Difficulty (1-5)") btn1 = gr.Button("🔍 Analyse Student", variant="primary") output1 = gr.Markdown() btn1.click(analyse_student, inputs=[hours, attendance, sleep, prev_score, motivation, tutoring, comment, days_exam, difficulty], outputs=output1) # ── TAB 2 ───────────────────────────────────────────── with gr.TabItem("📊 Data Visualisation"): gr.Markdown("### Explore the student dataset through interactive charts.") chart_choice = gr.Dropdown( choices=["Study Hours vs Final Score", "Stress Score by Risk Level", "Correlation Heatmap", "Sentiment Distribution", "Feature Importance (Random Forest)"], value="Study Hours vs Final Score", label="Select Chart") btn2 = gr.Button("📊 Generate Chart", variant="primary") plot = gr.Plot() btn2.click(plot_chart, inputs=chart_choice, outputs=plot) # ── TAB 3 ───────────────────────────────────────────── with gr.TabItem("💬 Sentiment Analyser"): gr.Markdown("### Analyse the sentiment of any student comment using VADER.") comment_input = gr.Textbox(label="Enter a student comment", placeholder="e.g. I can't focus and I'm exhausted...", lines=4) btn3 = gr.Button("💬 Analyse Sentiment", variant="primary") output3 = gr.Markdown() btn3.click(analyse_sentiment, inputs=comment_input, outputs=output3) # ── TAB 4 ───────────────────────────────────────────── with gr.TabItem("📂 Dataset Explorer"): gr.Markdown("### Explore the enriched student dataset.") btn4 = gr.Button("📂 Load Dataset", variant="primary") stats = gr.Markdown() sample = gr.Markdown() btn4.click(explore_data, outputs=[stats, sample]) gr.Markdown(""" --- *Smart Study Planner · AI for Big Data Management · ESCP Business School* """) demo.launch()