Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import matplotlib | |
| matplotlib.use("Agg") | |
| import seaborn as sns | |
| from pathlib import Path | |
| from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.metrics import accuracy_score | |
| import warnings | |
| warnings.filterwarnings("ignore") | |
| # βββ Load data βββββββββββββββββββββββββββββββββββββββββββββββ | |
| try: | |
| df = pd.read_csv("student_dataset_enriched.csv") | |
| df_reviews = pd.read_csv("synthetic_student_reviews.csv") | |
| DATA_LOADED = True | |
| except: | |
| DATA_LOADED = False | |
| analyzer = SentimentIntensityAnalyzer() | |
| # βββ Train RF model once at startup ββββββββββββββββββββββββββ | |
| def train_model(): | |
| if not DATA_LOADED: | |
| return None | |
| features = ["hours_studied", "attendance", "sleep_hours", | |
| "previous_scores", "motivation_level", | |
| "tutoring_sessions", "stress_score", "focus_level"] | |
| df["risk_label"] = (df["risk_level"] == "high").astype(int) | |
| X, y = df[features], df["risk_label"] | |
| X_train, X_test, y_train, y_test = train_test_split( | |
| X, y, test_size=0.2, random_state=2025) | |
| clf = RandomForestClassifier(n_estimators=100, random_state=2025) | |
| clf.fit(X_train, y_train) | |
| acc = accuracy_score(y_test, clf.predict(X_test)) | |
| return clf, features, acc | |
| MODEL_DATA = train_model() | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # TAB 1 β Student Risk Analyser | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def analyse_student(hours, attendance, sleep, prev_score, | |
| motivation, tutoring, comment, days_exam, difficulty): | |
| stress = (10 - sleep) + (2 - motivation) | |
| focus = motivation * (sleep / 10) | |
| burnout = int(stress > 7 and hours < 15) | |
| # Risk level | |
| if prev_score < 60 or (stress > 7 and hours < 15): | |
| risk = "π΄ HIGH RISK" | |
| else: | |
| risk = "π’ LOW RISK" | |
| # Priority | |
| urgency = 0 | |
| if days_exam <= 1: urgency += 3 | |
| elif days_exam <= 3: urgency += 2 | |
| elif days_exam <= 7: urgency += 1 | |
| if difficulty >= 4: urgency += 2 | |
| elif difficulty >= 3: urgency += 1 | |
| if stress > 8: urgency += 2 | |
| elif stress > 5: urgency += 1 | |
| if sleep < 5: urgency += 1 | |
| if urgency >= 6: priority = "π¨ CRITICAL" | |
| elif urgency >= 4: priority = "πΆ HIGH" | |
| elif urgency >= 2: priority = "π· MEDIUM" | |
| else: priority = "π© LOW" | |
| # Sentiment | |
| score = analyzer.polarity_scores(comment)["compound"] | |
| if score >= 0.05: sentiment = "π Positive" | |
| elif score <= -0.05: sentiment = "π Negative" | |
| else: sentiment = "π Neutral" | |
| # Study tips | |
| tips = [] | |
| if sleep < 6: | |
| tips.append("π€ Prioritise sleep β aim for at least 7h to improve focus and memory consolidation.") | |
| if hours < 15: | |
| tips.append("π Increase daily study time gradually β start with +1h per day using the Pomodoro technique.") | |
| if stress > 7: | |
| tips.append("π§ Practice stress management: 5-min breathing exercises before each study session.") | |
| if burnout: | |
| tips.append("β οΈ Burnout risk detected β take short breaks every 45 min and avoid all-nighters.") | |
| if motivation < 1: | |
| tips.append("π― Set small daily goals and reward yourself when you achieve them.") | |
| if not tips: | |
| tips.append("β You're on track! Keep your current routine and review key concepts regularly.") | |
| result = f""" | |
| ## π Student Analysis Report | |
| | Indicator | Value | | |
| |-----------|-------| | |
| | Stress Score | {stress:.1f} / 12 | | |
| | Focus Level | {focus:.2f} | | |
| | Burnout Risk | {"β οΈ Yes" if burnout else "β No"} | | |
| ## π― Risk Level: {risk} | |
| ## β±οΈ Study Priority: {priority} | |
| ## π¬ Comment Sentiment: {sentiment} (score: {score:.2f}) | |
| ## π‘ Study Tips: | |
| """ + "\n".join(f"- {t}" for t in tips) | |
| return result | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # TAB 2 β Data Visualisation | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def plot_chart(chart_type): | |
| if not DATA_LOADED: | |
| fig, ax = plt.subplots() | |
| ax.text(0.5, 0.5, "Data not loaded", ha="center") | |
| return fig | |
| fig, ax = plt.subplots(figsize=(9, 5)) | |
| sns.set_theme(style="whitegrid") | |
| if chart_type == "Study Hours vs Final Score": | |
| sc = ax.scatter(df["hours_studied"], df["final_exam_score"], | |
| c=df["stress_score"], cmap="RdYlGn_r", alpha=0.6) | |
| plt.colorbar(sc, ax=ax, label="Stress Score") | |
| ax.set_xlabel("Hours Studied") | |
| ax.set_ylabel("Final Exam Score") | |
| ax.set_title("Study Hours vs Final Score") | |
| elif chart_type == "Stress Score by Risk Level": | |
| df.boxplot(column="stress_score", by="risk_level", ax=ax, | |
| patch_artist=True, | |
| boxprops=dict(facecolor="steelblue", color="navy"), | |
| medianprops=dict(color="white", linewidth=2)) | |
| ax.set_title("Stress Score by Risk Level") | |
| plt.suptitle("") | |
| elif chart_type == "Correlation Heatmap": | |
| cols = ["hours_studied", "attendance", "sleep_hours", | |
| "previous_scores", "stress_score", "focus_level", "final_exam_score"] | |
| corr = df[cols].corr() | |
| sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", ax=ax) | |
| ax.set_title("Correlation Heatmap") | |
| elif chart_type == "Sentiment Distribution": | |
| if DATA_LOADED and "vader_label" in df_reviews.columns: | |
| counts = df_reviews["vader_label"].value_counts() | |
| else: | |
| df_reviews["vader_label"] = df_reviews["student_comment"].apply( | |
| lambda x: "positive" if analyzer.polarity_scores(str(x))["compound"] >= 0.05 | |
| else ("negative" if analyzer.polarity_scores(str(x))["compound"] <= -0.05 else "neutral")) | |
| counts = df_reviews["vader_label"].value_counts() | |
| colours = [{"positive": "mediumseagreen", "neutral": "gold", | |
| "negative": "tomato"}.get(l, "gray") for l in counts.index] | |
| counts.plot(kind="bar", ax=ax, color=colours, edgecolor="white") | |
| ax.set_title("Sentiment Distribution of Student Comments") | |
| ax.tick_params(axis="x", rotation=0) | |
| elif chart_type == "Feature Importance (Random Forest)": | |
| if MODEL_DATA: | |
| clf, features, acc = MODEL_DATA | |
| imp = pd.Series(clf.feature_importances_, index=features).sort_values() | |
| imp.plot(kind="barh", ax=ax, color="steelblue") | |
| ax.set_title(f"Random Forest Feature Importance (Accuracy: {acc:.0%})") | |
| plt.tight_layout() | |
| return fig | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # TAB 3 β Sentiment Analyser | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def analyse_sentiment(text): | |
| if not text.strip(): | |
| return "Please enter a comment." | |
| scores = analyzer.polarity_scores(text) | |
| compound = scores["compound"] | |
| if compound >= 0.05: label = "π Positive" | |
| elif compound <= -0.05: label = "π Negative" | |
| else: label = "π Neutral" | |
| return f""" | |
| **Sentiment:** {label} | |
| **Compound score:** {compound:.3f} | |
| **Positive:** {scores['pos']:.3f} | **Neutral:** {scores['neu']:.3f} | **Negative:** {scores['neg']:.3f} | |
| *Interpretation:* {'This student appears confident and motivated.' if compound >= 0.05 else 'This student may be struggling β consider flagging for support.' if compound <= -0.05 else 'This student has a neutral outlook β monitor progress.'} | |
| """ | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # TAB 4 β Dataset Explorer | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def explore_data(): | |
| if not DATA_LOADED: | |
| return "Dataset not loaded.", "" | |
| summary = df.describe().round(2).to_markdown() | |
| sample = df.head(10).to_markdown() | |
| return f"### Summary Statistics\n{summary}", f"### First 10 rows\n{sample}" | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # BUILD THE APP | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Blocks(title="Smart Study Planner", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(""" | |
| # π Smart Study Planner | |
| **AI for Big Data Management β Group C2** | |
| *Rosamaria Guadalupi Β· Sasha Nouaille Β· Alexia Beraud Valero Β· Aurora Vimercati Β· Luna Mariah Gallina* | |
| --- | |
| """) | |
| with gr.Tabs(): | |
| # ββ TAB 1 βββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.TabItem("π Student Analyser"): | |
| gr.Markdown("### Enter a student profile to get a risk assessment and study recommendations.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| hours = gr.Slider(0, 40, value=20, label="Hours Studied per Week") | |
| attendance = gr.Slider(0, 100, value=80, label="Attendance (%)") | |
| sleep = gr.Slider(3, 12, value=7, step=0.5, label="Sleep Hours per Night") | |
| prev_score = gr.Slider(0, 100, value=65, label="Previous Scores") | |
| motivation = gr.Slider(0, 2, value=1, step=1, | |
| label="Motivation Level (0=Low, 1=Medium, 2=High)") | |
| tutoring = gr.Slider(0, 5, value=1, step=1, label="Tutoring Sessions per Week") | |
| with gr.Column(): | |
| comment = gr.Textbox(label="Student Comment", | |
| placeholder="e.g. I feel overwhelmed this week...", | |
| lines=3) | |
| days_exam = gr.Slider(1, 30, value=5, step=1, label="Days Until Exam") | |
| difficulty = gr.Slider(1, 5, value=3, step=1, label="Exam Difficulty (1-5)") | |
| btn1 = gr.Button("π Analyse Student", variant="primary") | |
| output1 = gr.Markdown() | |
| btn1.click(analyse_student, | |
| inputs=[hours, attendance, sleep, prev_score, | |
| motivation, tutoring, comment, days_exam, difficulty], | |
| outputs=output1) | |
| # ββ TAB 2 βββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.TabItem("π Data Visualisation"): | |
| gr.Markdown("### Explore the student dataset through interactive charts.") | |
| chart_choice = gr.Dropdown( | |
| choices=["Study Hours vs Final Score", | |
| "Stress Score by Risk Level", | |
| "Correlation Heatmap", | |
| "Sentiment Distribution", | |
| "Feature Importance (Random Forest)"], | |
| value="Study Hours vs Final Score", | |
| label="Select Chart") | |
| btn2 = gr.Button("π Generate Chart", variant="primary") | |
| plot = gr.Plot() | |
| btn2.click(plot_chart, inputs=chart_choice, outputs=plot) | |
| # ββ TAB 3 βββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.TabItem("π¬ Sentiment Analyser"): | |
| gr.Markdown("### Analyse the sentiment of any student comment using VADER.") | |
| comment_input = gr.Textbox(label="Enter a student comment", | |
| placeholder="e.g. I can't focus and I'm exhausted...", | |
| lines=4) | |
| btn3 = gr.Button("π¬ Analyse Sentiment", variant="primary") | |
| output3 = gr.Markdown() | |
| btn3.click(analyse_sentiment, inputs=comment_input, outputs=output3) | |
| # ββ TAB 4 βββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.TabItem("π Dataset Explorer"): | |
| gr.Markdown("### Explore the enriched student dataset.") | |
| btn4 = gr.Button("π Load Dataset", variant="primary") | |
| stats = gr.Markdown() | |
| sample = gr.Markdown() | |
| btn4.click(explore_data, outputs=[stats, sample]) | |
| gr.Markdown(""" | |
| --- | |
| *Smart Study Planner Β· AI for Big Data Management Β· ESCP Business School* | |
| """) | |
| demo.launch() | |