File size: 13,990 Bytes
41c9888
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use("Agg")
import seaborn as sns
from pathlib import Path
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings("ignore")

# ─── Load data ───────────────────────────────────────────────
try:
    df = pd.read_csv("student_dataset_enriched.csv")
    df_reviews = pd.read_csv("synthetic_student_reviews.csv")
    DATA_LOADED = True
except:
    DATA_LOADED = False

analyzer = SentimentIntensityAnalyzer()

# ─── Train RF model once at startup ──────────────────────────
def train_model():
    if not DATA_LOADED:
        return None
    features = ["hours_studied", "attendance", "sleep_hours",
                 "previous_scores", "motivation_level",
                 "tutoring_sessions", "stress_score", "focus_level"]
    df["risk_label"] = (df["risk_level"] == "high").astype(int)
    X, y = df[features], df["risk_label"]
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=2025)
    clf = RandomForestClassifier(n_estimators=100, random_state=2025)
    clf.fit(X_train, y_train)
    acc = accuracy_score(y_test, clf.predict(X_test))
    return clf, features, acc

MODEL_DATA = train_model()

# ══════════════════════════════════════════════════════════════
# TAB 1 β€” Student Risk Analyser
# ══════════════════════════════════════════════════════════════
def analyse_student(hours, attendance, sleep, prev_score,
                    motivation, tutoring, comment, days_exam, difficulty):
    stress = (10 - sleep) + (2 - motivation)
    focus  = motivation * (sleep / 10)
    burnout = int(stress > 7 and hours < 15)

    # Risk level
    if prev_score < 60 or (stress > 7 and hours < 15):
        risk = "πŸ”΄ HIGH RISK"
    else:
        risk = "🟒 LOW RISK"

    # Priority
    urgency = 0
    if days_exam <= 1:   urgency += 3
    elif days_exam <= 3: urgency += 2
    elif days_exam <= 7: urgency += 1
    if difficulty >= 4:  urgency += 2
    elif difficulty >= 3: urgency += 1
    if stress > 8:       urgency += 2
    elif stress > 5:     urgency += 1
    if sleep < 5:        urgency += 1

    if urgency >= 6:   priority = "🚨 CRITICAL"
    elif urgency >= 4: priority = "πŸ”Ά HIGH"
    elif urgency >= 2: priority = "πŸ”· MEDIUM"
    else:              priority = "🟩 LOW"

    # Sentiment
    score = analyzer.polarity_scores(comment)["compound"]
    if score >= 0.05:   sentiment = "😊 Positive"
    elif score <= -0.05: sentiment = "😟 Negative"
    else:               sentiment = "😐 Neutral"

    # Study tips
    tips = []
    if sleep < 6:
        tips.append("πŸ’€ Prioritise sleep β€” aim for at least 7h to improve focus and memory consolidation.")
    if hours < 15:
        tips.append("πŸ“š Increase daily study time gradually β€” start with +1h per day using the Pomodoro technique.")
    if stress > 7:
        tips.append("🧘 Practice stress management: 5-min breathing exercises before each study session.")
    if burnout:
        tips.append("⚠️ Burnout risk detected β€” take short breaks every 45 min and avoid all-nighters.")
    if motivation < 1:
        tips.append("🎯 Set small daily goals and reward yourself when you achieve them.")
    if not tips:
        tips.append("βœ… You're on track! Keep your current routine and review key concepts regularly.")

    result = f"""
## πŸ“Š Student Analysis Report

| Indicator | Value |
|-----------|-------|
| Stress Score | {stress:.1f} / 12 |
| Focus Level | {focus:.2f} |
| Burnout Risk | {"⚠️ Yes" if burnout else "βœ… No"} |

## 🎯 Risk Level: {risk}
## ⏱️ Study Priority: {priority}
## πŸ’¬ Comment Sentiment: {sentiment} (score: {score:.2f})

## πŸ’‘ Study Tips:
""" + "\n".join(f"- {t}" for t in tips)

    return result

# ══════════════════════════════════════════════════════════════
# TAB 2 β€” Data Visualisation
# ══════════════════════════════════════════════════════════════
def plot_chart(chart_type):
    if not DATA_LOADED:
        fig, ax = plt.subplots()
        ax.text(0.5, 0.5, "Data not loaded", ha="center")
        return fig

    fig, ax = plt.subplots(figsize=(9, 5))
    sns.set_theme(style="whitegrid")

    if chart_type == "Study Hours vs Final Score":
        sc = ax.scatter(df["hours_studied"], df["final_exam_score"],
                        c=df["stress_score"], cmap="RdYlGn_r", alpha=0.6)
        plt.colorbar(sc, ax=ax, label="Stress Score")
        ax.set_xlabel("Hours Studied")
        ax.set_ylabel("Final Exam Score")
        ax.set_title("Study Hours vs Final Score")

    elif chart_type == "Stress Score by Risk Level":
        df.boxplot(column="stress_score", by="risk_level", ax=ax,
                   patch_artist=True,
                   boxprops=dict(facecolor="steelblue", color="navy"),
                   medianprops=dict(color="white", linewidth=2))
        ax.set_title("Stress Score by Risk Level")
        plt.suptitle("")

    elif chart_type == "Correlation Heatmap":
        cols = ["hours_studied", "attendance", "sleep_hours",
                "previous_scores", "stress_score", "focus_level", "final_exam_score"]
        corr = df[cols].corr()
        sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", ax=ax)
        ax.set_title("Correlation Heatmap")

    elif chart_type == "Sentiment Distribution":
        if DATA_LOADED and "vader_label" in df_reviews.columns:
            counts = df_reviews["vader_label"].value_counts()
        else:
            df_reviews["vader_label"] = df_reviews["student_comment"].apply(
                lambda x: "positive" if analyzer.polarity_scores(str(x))["compound"] >= 0.05
                else ("negative" if analyzer.polarity_scores(str(x))["compound"] <= -0.05 else "neutral"))
            counts = df_reviews["vader_label"].value_counts()
        colours = [{"positive": "mediumseagreen", "neutral": "gold",
                    "negative": "tomato"}.get(l, "gray") for l in counts.index]
        counts.plot(kind="bar", ax=ax, color=colours, edgecolor="white")
        ax.set_title("Sentiment Distribution of Student Comments")
        ax.tick_params(axis="x", rotation=0)

    elif chart_type == "Feature Importance (Random Forest)":
        if MODEL_DATA:
            clf, features, acc = MODEL_DATA
            imp = pd.Series(clf.feature_importances_, index=features).sort_values()
            imp.plot(kind="barh", ax=ax, color="steelblue")
            ax.set_title(f"Random Forest Feature Importance (Accuracy: {acc:.0%})")

    plt.tight_layout()
    return fig

# ══════════════════════════════════════════════════════════════
# TAB 3 β€” Sentiment Analyser
# ══════════════════════════════════════════════════════════════
def analyse_sentiment(text):
    if not text.strip():
        return "Please enter a comment."
    scores = analyzer.polarity_scores(text)
    compound = scores["compound"]
    if compound >= 0.05:   label = "😊 Positive"
    elif compound <= -0.05: label = "😟 Negative"
    else:                   label = "😐 Neutral"
    return f"""
**Sentiment:** {label}
**Compound score:** {compound:.3f}
**Positive:** {scores['pos']:.3f} | **Neutral:** {scores['neu']:.3f} | **Negative:** {scores['neg']:.3f}

*Interpretation:* {'This student appears confident and motivated.' if compound >= 0.05 else 'This student may be struggling β€” consider flagging for support.' if compound <= -0.05 else 'This student has a neutral outlook β€” monitor progress.'}
"""

# ══════════════════════════════════════════════════════════════
# TAB 4 β€” Dataset Explorer
# ══════════════════════════════════════════════════════════════
def explore_data():
    if not DATA_LOADED:
        return "Dataset not loaded.", ""
    summary = df.describe().round(2).to_markdown()
    sample  = df.head(10).to_markdown()
    return f"### Summary Statistics\n{summary}", f"### First 10 rows\n{sample}"

# ══════════════════════════════════════════════════════════════
# BUILD THE APP
# ══════════════════════════════════════════════════════════════
with gr.Blocks(title="Smart Study Planner", theme=gr.themes.Soft()) as demo:

    gr.Markdown("""
    # πŸŽ“ Smart Study Planner
    **AI for Big Data Management β€” Group C2**
    *Rosamaria Guadalupi Β· Sasha Nouaille Β· Alexia Beraud Valero Β· Aurora Vimercati Β· Luna Mariah Gallina*
    ---
    """)

    with gr.Tabs():

        # ── TAB 1 ─────────────────────────────────────────────
        with gr.TabItem("πŸ” Student Analyser"):
            gr.Markdown("### Enter a student profile to get a risk assessment and study recommendations.")
            with gr.Row():
                with gr.Column():
                    hours      = gr.Slider(0, 40, value=20, label="Hours Studied per Week")
                    attendance = gr.Slider(0, 100, value=80, label="Attendance (%)")
                    sleep      = gr.Slider(3, 12, value=7, step=0.5, label="Sleep Hours per Night")
                    prev_score = gr.Slider(0, 100, value=65, label="Previous Scores")
                    motivation = gr.Slider(0, 2, value=1, step=1,
                                           label="Motivation Level (0=Low, 1=Medium, 2=High)")
                    tutoring   = gr.Slider(0, 5, value=1, step=1, label="Tutoring Sessions per Week")
                with gr.Column():
                    comment    = gr.Textbox(label="Student Comment",
                                            placeholder="e.g. I feel overwhelmed this week...",
                                            lines=3)
                    days_exam  = gr.Slider(1, 30, value=5, step=1, label="Days Until Exam")
                    difficulty = gr.Slider(1, 5, value=3, step=1, label="Exam Difficulty (1-5)")
                    btn1       = gr.Button("πŸ” Analyse Student", variant="primary")

            output1 = gr.Markdown()
            btn1.click(analyse_student,
                       inputs=[hours, attendance, sleep, prev_score,
                                motivation, tutoring, comment, days_exam, difficulty],
                       outputs=output1)

        # ── TAB 2 ─────────────────────────────────────────────
        with gr.TabItem("πŸ“Š Data Visualisation"):
            gr.Markdown("### Explore the student dataset through interactive charts.")
            chart_choice = gr.Dropdown(
                choices=["Study Hours vs Final Score",
                         "Stress Score by Risk Level",
                         "Correlation Heatmap",
                         "Sentiment Distribution",
                         "Feature Importance (Random Forest)"],
                value="Study Hours vs Final Score",
                label="Select Chart")
            btn2   = gr.Button("πŸ“Š Generate Chart", variant="primary")
            plot   = gr.Plot()
            btn2.click(plot_chart, inputs=chart_choice, outputs=plot)

        # ── TAB 3 ─────────────────────────────────────────────
        with gr.TabItem("πŸ’¬ Sentiment Analyser"):
            gr.Markdown("### Analyse the sentiment of any student comment using VADER.")
            comment_input = gr.Textbox(label="Enter a student comment",
                                       placeholder="e.g. I can't focus and I'm exhausted...",
                                       lines=4)
            btn3     = gr.Button("πŸ’¬ Analyse Sentiment", variant="primary")
            output3  = gr.Markdown()
            btn3.click(analyse_sentiment, inputs=comment_input, outputs=output3)

        # ── TAB 4 ─────────────────────────────────────────────
        with gr.TabItem("πŸ“‚ Dataset Explorer"):
            gr.Markdown("### Explore the enriched student dataset.")
            btn4    = gr.Button("πŸ“‚ Load Dataset", variant="primary")
            stats   = gr.Markdown()
            sample  = gr.Markdown()
            btn4.click(explore_data, outputs=[stats, sample])

    gr.Markdown("""
    ---
    *Smart Study Planner Β· AI for Big Data Management Β· ESCP Business School*
    """)

demo.launch()