Spaces:
Sleeping
Sleeping
File size: 13,990 Bytes
41c9888 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 | import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use("Agg")
import seaborn as sns
from pathlib import Path
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings("ignore")
# βββ Load data βββββββββββββββββββββββββββββββββββββββββββββββ
try:
df = pd.read_csv("student_dataset_enriched.csv")
df_reviews = pd.read_csv("synthetic_student_reviews.csv")
DATA_LOADED = True
except:
DATA_LOADED = False
analyzer = SentimentIntensityAnalyzer()
# βββ Train RF model once at startup ββββββββββββββββββββββββββ
def train_model():
if not DATA_LOADED:
return None
features = ["hours_studied", "attendance", "sleep_hours",
"previous_scores", "motivation_level",
"tutoring_sessions", "stress_score", "focus_level"]
df["risk_label"] = (df["risk_level"] == "high").astype(int)
X, y = df[features], df["risk_label"]
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=2025)
clf = RandomForestClassifier(n_estimators=100, random_state=2025)
clf.fit(X_train, y_train)
acc = accuracy_score(y_test, clf.predict(X_test))
return clf, features, acc
MODEL_DATA = train_model()
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# TAB 1 β Student Risk Analyser
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def analyse_student(hours, attendance, sleep, prev_score,
motivation, tutoring, comment, days_exam, difficulty):
stress = (10 - sleep) + (2 - motivation)
focus = motivation * (sleep / 10)
burnout = int(stress > 7 and hours < 15)
# Risk level
if prev_score < 60 or (stress > 7 and hours < 15):
risk = "π΄ HIGH RISK"
else:
risk = "π’ LOW RISK"
# Priority
urgency = 0
if days_exam <= 1: urgency += 3
elif days_exam <= 3: urgency += 2
elif days_exam <= 7: urgency += 1
if difficulty >= 4: urgency += 2
elif difficulty >= 3: urgency += 1
if stress > 8: urgency += 2
elif stress > 5: urgency += 1
if sleep < 5: urgency += 1
if urgency >= 6: priority = "π¨ CRITICAL"
elif urgency >= 4: priority = "πΆ HIGH"
elif urgency >= 2: priority = "π· MEDIUM"
else: priority = "π© LOW"
# Sentiment
score = analyzer.polarity_scores(comment)["compound"]
if score >= 0.05: sentiment = "π Positive"
elif score <= -0.05: sentiment = "π Negative"
else: sentiment = "π Neutral"
# Study tips
tips = []
if sleep < 6:
tips.append("π€ Prioritise sleep β aim for at least 7h to improve focus and memory consolidation.")
if hours < 15:
tips.append("π Increase daily study time gradually β start with +1h per day using the Pomodoro technique.")
if stress > 7:
tips.append("π§ Practice stress management: 5-min breathing exercises before each study session.")
if burnout:
tips.append("β οΈ Burnout risk detected β take short breaks every 45 min and avoid all-nighters.")
if motivation < 1:
tips.append("π― Set small daily goals and reward yourself when you achieve them.")
if not tips:
tips.append("β
You're on track! Keep your current routine and review key concepts regularly.")
result = f"""
## π Student Analysis Report
| Indicator | Value |
|-----------|-------|
| Stress Score | {stress:.1f} / 12 |
| Focus Level | {focus:.2f} |
| Burnout Risk | {"β οΈ Yes" if burnout else "β
No"} |
## π― Risk Level: {risk}
## β±οΈ Study Priority: {priority}
## π¬ Comment Sentiment: {sentiment} (score: {score:.2f})
## π‘ Study Tips:
""" + "\n".join(f"- {t}" for t in tips)
return result
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# TAB 2 β Data Visualisation
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def plot_chart(chart_type):
if not DATA_LOADED:
fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Data not loaded", ha="center")
return fig
fig, ax = plt.subplots(figsize=(9, 5))
sns.set_theme(style="whitegrid")
if chart_type == "Study Hours vs Final Score":
sc = ax.scatter(df["hours_studied"], df["final_exam_score"],
c=df["stress_score"], cmap="RdYlGn_r", alpha=0.6)
plt.colorbar(sc, ax=ax, label="Stress Score")
ax.set_xlabel("Hours Studied")
ax.set_ylabel("Final Exam Score")
ax.set_title("Study Hours vs Final Score")
elif chart_type == "Stress Score by Risk Level":
df.boxplot(column="stress_score", by="risk_level", ax=ax,
patch_artist=True,
boxprops=dict(facecolor="steelblue", color="navy"),
medianprops=dict(color="white", linewidth=2))
ax.set_title("Stress Score by Risk Level")
plt.suptitle("")
elif chart_type == "Correlation Heatmap":
cols = ["hours_studied", "attendance", "sleep_hours",
"previous_scores", "stress_score", "focus_level", "final_exam_score"]
corr = df[cols].corr()
sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", ax=ax)
ax.set_title("Correlation Heatmap")
elif chart_type == "Sentiment Distribution":
if DATA_LOADED and "vader_label" in df_reviews.columns:
counts = df_reviews["vader_label"].value_counts()
else:
df_reviews["vader_label"] = df_reviews["student_comment"].apply(
lambda x: "positive" if analyzer.polarity_scores(str(x))["compound"] >= 0.05
else ("negative" if analyzer.polarity_scores(str(x))["compound"] <= -0.05 else "neutral"))
counts = df_reviews["vader_label"].value_counts()
colours = [{"positive": "mediumseagreen", "neutral": "gold",
"negative": "tomato"}.get(l, "gray") for l in counts.index]
counts.plot(kind="bar", ax=ax, color=colours, edgecolor="white")
ax.set_title("Sentiment Distribution of Student Comments")
ax.tick_params(axis="x", rotation=0)
elif chart_type == "Feature Importance (Random Forest)":
if MODEL_DATA:
clf, features, acc = MODEL_DATA
imp = pd.Series(clf.feature_importances_, index=features).sort_values()
imp.plot(kind="barh", ax=ax, color="steelblue")
ax.set_title(f"Random Forest Feature Importance (Accuracy: {acc:.0%})")
plt.tight_layout()
return fig
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# TAB 3 β Sentiment Analyser
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def analyse_sentiment(text):
if not text.strip():
return "Please enter a comment."
scores = analyzer.polarity_scores(text)
compound = scores["compound"]
if compound >= 0.05: label = "π Positive"
elif compound <= -0.05: label = "π Negative"
else: label = "π Neutral"
return f"""
**Sentiment:** {label}
**Compound score:** {compound:.3f}
**Positive:** {scores['pos']:.3f} | **Neutral:** {scores['neu']:.3f} | **Negative:** {scores['neg']:.3f}
*Interpretation:* {'This student appears confident and motivated.' if compound >= 0.05 else 'This student may be struggling β consider flagging for support.' if compound <= -0.05 else 'This student has a neutral outlook β monitor progress.'}
"""
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# TAB 4 β Dataset Explorer
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def explore_data():
if not DATA_LOADED:
return "Dataset not loaded.", ""
summary = df.describe().round(2).to_markdown()
sample = df.head(10).to_markdown()
return f"### Summary Statistics\n{summary}", f"### First 10 rows\n{sample}"
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# BUILD THE APP
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
with gr.Blocks(title="Smart Study Planner", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# π Smart Study Planner
**AI for Big Data Management β Group C2**
*Rosamaria Guadalupi Β· Sasha Nouaille Β· Alexia Beraud Valero Β· Aurora Vimercati Β· Luna Mariah Gallina*
---
""")
with gr.Tabs():
# ββ TAB 1 βββββββββββββββββββββββββββββββββββββββββββββ
with gr.TabItem("π Student Analyser"):
gr.Markdown("### Enter a student profile to get a risk assessment and study recommendations.")
with gr.Row():
with gr.Column():
hours = gr.Slider(0, 40, value=20, label="Hours Studied per Week")
attendance = gr.Slider(0, 100, value=80, label="Attendance (%)")
sleep = gr.Slider(3, 12, value=7, step=0.5, label="Sleep Hours per Night")
prev_score = gr.Slider(0, 100, value=65, label="Previous Scores")
motivation = gr.Slider(0, 2, value=1, step=1,
label="Motivation Level (0=Low, 1=Medium, 2=High)")
tutoring = gr.Slider(0, 5, value=1, step=1, label="Tutoring Sessions per Week")
with gr.Column():
comment = gr.Textbox(label="Student Comment",
placeholder="e.g. I feel overwhelmed this week...",
lines=3)
days_exam = gr.Slider(1, 30, value=5, step=1, label="Days Until Exam")
difficulty = gr.Slider(1, 5, value=3, step=1, label="Exam Difficulty (1-5)")
btn1 = gr.Button("π Analyse Student", variant="primary")
output1 = gr.Markdown()
btn1.click(analyse_student,
inputs=[hours, attendance, sleep, prev_score,
motivation, tutoring, comment, days_exam, difficulty],
outputs=output1)
# ββ TAB 2 βββββββββββββββββββββββββββββββββββββββββββββ
with gr.TabItem("π Data Visualisation"):
gr.Markdown("### Explore the student dataset through interactive charts.")
chart_choice = gr.Dropdown(
choices=["Study Hours vs Final Score",
"Stress Score by Risk Level",
"Correlation Heatmap",
"Sentiment Distribution",
"Feature Importance (Random Forest)"],
value="Study Hours vs Final Score",
label="Select Chart")
btn2 = gr.Button("π Generate Chart", variant="primary")
plot = gr.Plot()
btn2.click(plot_chart, inputs=chart_choice, outputs=plot)
# ββ TAB 3 βββββββββββββββββββββββββββββββββββββββββββββ
with gr.TabItem("π¬ Sentiment Analyser"):
gr.Markdown("### Analyse the sentiment of any student comment using VADER.")
comment_input = gr.Textbox(label="Enter a student comment",
placeholder="e.g. I can't focus and I'm exhausted...",
lines=4)
btn3 = gr.Button("π¬ Analyse Sentiment", variant="primary")
output3 = gr.Markdown()
btn3.click(analyse_sentiment, inputs=comment_input, outputs=output3)
# ββ TAB 4 βββββββββββββββββββββββββββββββββββββββββββββ
with gr.TabItem("π Dataset Explorer"):
gr.Markdown("### Explore the enriched student dataset.")
btn4 = gr.Button("π Load Dataset", variant="primary")
stats = gr.Markdown()
sample = gr.Markdown()
btn4.click(explore_data, outputs=[stats, sample])
gr.Markdown("""
---
*Smart Study Planner Β· AI for Big Data Management Β· ESCP Business School*
""")
demo.launch()
|