Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,549 +1,360 @@
|
|
| 1 |
-
|
| 2 |
-
import
|
| 3 |
-
import gzip
|
| 4 |
-
import io
|
| 5 |
-
|
| 6 |
-
import numpy as np
|
| 7 |
import pandas as pd
|
| 8 |
-
import
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
#
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
else:
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
print("Loading graph...")
|
| 34 |
-
with gzip.open(LOCAL_PATH, "rt") as f:
|
| 35 |
-
G = nx.read_edgelist(f, nodetype=int)
|
| 36 |
-
|
| 37 |
-
print(f"Graph loaded: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")
|
| 38 |
-
|
| 39 |
-
# Ensure largest connected component (should already be connected in this dataset)
|
| 40 |
-
if not nx.is_connected(G):
|
| 41 |
-
largest_cc = max(nx.connected_components(G), key=len)
|
| 42 |
-
G = G.subgraph(largest_cc).copy()
|
| 43 |
-
print(f"After LCC: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")
|
| 44 |
-
|
| 45 |
-
nodes = list(G.nodes())
|
| 46 |
-
node_index = {n: i for i, n in enumerate(nodes)}
|
| 47 |
-
N = len(nodes)
|
| 48 |
-
|
| 49 |
-
# -------------------------------------------------------
|
| 50 |
-
# 2. Real structural features from SNAP graph
|
| 51 |
-
# -------------------------------------------------------
|
| 52 |
-
|
| 53 |
-
# Degree
|
| 54 |
-
deg = np.array([G.degree(n) for n in nodes], dtype=float)
|
| 55 |
-
|
| 56 |
-
# Clustering coefficient
|
| 57 |
-
cc_dict = nx.clustering(G)
|
| 58 |
-
cc = np.array([cc_dict[n] for n in nodes], dtype=float)
|
| 59 |
-
|
| 60 |
-
# Average neighbor degree
|
| 61 |
-
avg_nd_dict = nx.average_neighbor_degree(G)
|
| 62 |
-
avg_nd = np.array([avg_nd_dict[n] for n in nodes], dtype=float)
|
| 63 |
-
|
| 64 |
-
# PageRank
|
| 65 |
-
pr_dict = nx.pagerank(G, alpha=0.85)
|
| 66 |
-
pr = np.array([pr_dict[n] for n in nodes], dtype=float)
|
| 67 |
-
|
| 68 |
-
def minmax(x):
|
| 69 |
-
x = np.asarray(x, dtype=float)
|
| 70 |
-
return (x - x.min()) / (x.max() - x.min() + 1e-8)
|
| 71 |
-
|
| 72 |
-
deg_norm = minmax(deg)
|
| 73 |
-
cc_norm = minmax(cc)
|
| 74 |
-
avg_nd_norm = minmax(avg_nd)
|
| 75 |
-
pr_norm = minmax(pr)
|
| 76 |
-
|
| 77 |
-
print("Sample structural features for first 5 nodes:")
|
| 78 |
-
for i in range(5):
|
| 79 |
-
print(
|
| 80 |
-
nodes[i],
|
| 81 |
-
"deg=", deg[i],
|
| 82 |
-
"deg_norm=", round(deg_norm[i], 3),
|
| 83 |
-
"cc_norm=", round(cc_norm[i], 3),
|
| 84 |
-
"avg_nd_norm=", round(avg_nd_norm[i], 3),
|
| 85 |
-
"pr_norm=", round(pr_norm[i], 3),
|
| 86 |
-
)
|
| 87 |
-
|
| 88 |
-
# -------------------------------------------------------
|
| 89 |
-
# 3. Paper-style behavioural features (synthetic but graph-driven)
|
| 90 |
-
# -------------------------------------------------------
|
| 91 |
-
|
| 92 |
-
rng = np.random.default_rng(42)
|
| 93 |
-
|
| 94 |
-
# Engagement: central users are more "engaged"
|
| 95 |
-
engagement = 50 * (0.6 * deg_norm + 0.4 * avg_nd_norm) + rng.normal(0, 3, size=N)
|
| 96 |
-
engagement = np.clip(engagement, 0, None)
|
| 97 |
-
eng_norm = minmax(engagement)
|
| 98 |
-
|
| 99 |
-
# Trust base: users with higher PageRank and clustering are more trusted
|
| 100 |
-
trust_base = (pr_norm + cc_norm) / 2.0
|
| 101 |
-
|
| 102 |
-
# Suspicious: high degree but low clustering and low PageRank
|
| 103 |
-
suspicious_raw = deg_norm * (1.0 - cc_norm) * (1.0 - pr_norm)
|
| 104 |
-
suspicious_raw += 0.1 * rng.random(N)
|
| 105 |
-
susp_norm = minmax(suspicious_raw)
|
| 106 |
-
|
| 107 |
-
# Activity regularity: more regular if clustering is high (stable community)
|
| 108 |
-
activity_reg = cc_norm + rng.normal(0, 0.05, size=N)
|
| 109 |
-
activity_reg = np.clip(activity_reg, 0.0, 1.0)
|
| 110 |
-
act_norm = minmax(activity_reg)
|
| 111 |
-
|
| 112 |
-
# Friend requests sent: more for high degree, but bounded
|
| 113 |
-
sent_requests = rng.poisson(lam=2 + 15 * deg_norm)
|
| 114 |
-
sent_requests = np.maximum(sent_requests, 1)
|
| 115 |
-
|
| 116 |
-
# Acceptance probability: higher for trusted, lower for suspicious
|
| 117 |
-
accepted_prob = 0.1 + 0.7 * ((trust_base + (1.0 - susp_norm)) / 2.0)
|
| 118 |
-
accepted_prob = np.clip(accepted_prob, 0.0, 1.0)
|
| 119 |
-
accepted_requests = rng.binomial(sent_requests, accepted_prob)
|
| 120 |
-
friend_request_ratio = accepted_requests / (sent_requests + 1e-8)
|
| 121 |
-
frr_norm = minmax(friend_request_ratio)
|
| 122 |
-
|
| 123 |
-
# Mutual friends ratio (approx): we use clustering coefficient as a proxy
|
| 124 |
-
# because high clustering means many mutual connections among friends.
|
| 125 |
-
mutual_friends_ratio = cc_norm.copy()
|
| 126 |
-
mfr_norm = minmax(mutual_friends_ratio)
|
| 127 |
-
|
| 128 |
-
friends_norm = minmax(deg) # total friends β degree
|
| 129 |
-
|
| 130 |
-
# -------------------------------------------------------
|
| 131 |
-
# 4. Build S, T, B scores (in spirit of your paper)
|
| 132 |
-
# -------------------------------------------------------
|
| 133 |
-
|
| 134 |
-
# S: social / structural (FRR, MFR, friends)
|
| 135 |
-
S_score = (frr_norm + mfr_norm + friends_norm) / 3.0
|
| 136 |
-
|
| 137 |
-
# T: trust (trust_base, FRR, inverse suspiciousness)
|
| 138 |
-
T_score = (trust_base + frr_norm + (1.0 - susp_norm)) / 3.0
|
| 139 |
-
|
| 140 |
-
# B: behaviour (engagement, regularity, suspiciousness)
|
| 141 |
-
B_score = (eng_norm + act_norm + susp_norm) / 3.0
|
| 142 |
-
|
| 143 |
-
# -------------------------------------------------------
|
| 144 |
-
# 5. Fuse S, T, B with variance-based weights
|
| 145 |
-
# -------------------------------------------------------
|
| 146 |
-
|
| 147 |
-
varS = np.var(S_score)
|
| 148 |
-
varT = np.var(T_score)
|
| 149 |
-
varB = np.var(B_score)
|
| 150 |
-
den = varS + varT + varB + 1e-8
|
| 151 |
-
wS, wT, wB = varS / den, varT / den, varB / den
|
| 152 |
-
|
| 153 |
-
F = np.vstack([
|
| 154 |
-
wS * S_score,
|
| 155 |
-
wT * T_score,
|
| 156 |
-
wB * B_score
|
| 157 |
-
]).T # shape (N, 3)
|
| 158 |
-
|
| 159 |
-
print("Fusion weights:", wS, wT, wB)
|
| 160 |
-
print("F shape:", F.shape)
|
| 161 |
-
|
| 162 |
-
# -------------------------------------------------------
|
| 163 |
-
# 6. KMeans clustering -> pseudo labels
|
| 164 |
-
# (0 = Trusted, 1 = Under Observation, 2 = Intruder)
|
| 165 |
-
# -------------------------------------------------------
|
| 166 |
-
|
| 167 |
-
kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
|
| 168 |
-
cluster_raw = kmeans.fit_predict(F)
|
| 169 |
-
|
| 170 |
-
cluster_means = []
|
| 171 |
-
for c in range(3):
|
| 172 |
-
cluster_means.append((c, T_score[cluster_raw == c].mean()))
|
| 173 |
-
cluster_means_sorted = sorted(cluster_means, key=lambda x: x[1])
|
| 174 |
-
|
| 175 |
-
label_map = {
|
| 176 |
-
cluster_means_sorted[0][0]: 2, # lowest trust β Intruder
|
| 177 |
-
cluster_means_sorted[1][0]: 1, # medium β Under Observation
|
| 178 |
-
cluster_means_sorted[2][0]: 0 # highest β Trusted
|
| 179 |
-
}
|
| 180 |
-
|
| 181 |
-
cluster_labels = np.array([label_map[c] for c in cluster_raw], dtype=int)
|
| 182 |
-
|
| 183 |
-
label_names = {
|
| 184 |
-
0: "Trusted",
|
| 185 |
-
1: "Under Observation",
|
| 186 |
-
2: "Intruder"
|
| 187 |
-
}
|
| 188 |
-
|
| 189 |
-
status_counts = np.bincount(cluster_labels, minlength=3)
|
| 190 |
-
|
| 191 |
-
def make_status_bar_plot():
|
| 192 |
-
fig, ax = plt.subplots()
|
| 193 |
-
labels_txt = ["Trusted", "Under Observation", "Intruder"]
|
| 194 |
-
ax.bar(labels_txt, status_counts)
|
| 195 |
-
ax.set_ylabel("Number of users")
|
| 196 |
-
ax.set_title("Global distribution of user statuses (SNAP graph)")
|
| 197 |
-
fig.tight_layout()
|
| 198 |
-
return fig
|
| 199 |
-
|
| 200 |
-
# -------------------------------------------------------
|
| 201 |
-
# 7. Train small MLP on fused features -> status
|
| 202 |
-
# -------------------------------------------------------
|
| 203 |
-
|
| 204 |
-
X = torch.tensor(F, dtype=torch.float32)
|
| 205 |
-
y = torch.tensor(cluster_labels, dtype=torch.long)
|
| 206 |
-
|
| 207 |
-
dataset = TensorDataset(X, y)
|
| 208 |
-
loader = DataLoader(dataset, batch_size=128, shuffle=True)
|
| 209 |
-
|
| 210 |
-
class MLPClassifier(nn.Module):
|
| 211 |
-
def __init__(self, in_dim, hidden_dim=32, num_classes=3):
|
| 212 |
-
super().__init__()
|
| 213 |
-
self.net = nn.Sequential(
|
| 214 |
-
nn.Linear(in_dim, hidden_dim),
|
| 215 |
-
nn.ReLU(),
|
| 216 |
-
nn.Linear(hidden_dim, hidden_dim),
|
| 217 |
-
nn.ReLU(),
|
| 218 |
-
nn.Linear(hidden_dim, num_classes)
|
| 219 |
-
)
|
| 220 |
-
def forward(self, x):
|
| 221 |
-
return self.net(x)
|
| 222 |
-
|
| 223 |
-
model = MLPClassifier(in_dim=3)
|
| 224 |
-
criterion = nn.CrossEntropyLoss()
|
| 225 |
-
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
|
| 226 |
-
|
| 227 |
-
for epoch in range(20):
|
| 228 |
-
model.train()
|
| 229 |
-
total_loss = 0.0
|
| 230 |
-
for xb, yb in loader:
|
| 231 |
-
optimizer.zero_grad()
|
| 232 |
-
logits = model(xb)
|
| 233 |
-
loss = criterion(logits, yb)
|
| 234 |
-
loss.backward()
|
| 235 |
-
optimizer.step()
|
| 236 |
-
total_loss += loss.item() * xb.size(0)
|
| 237 |
-
# optional print, can be commented on HF to reduce logs
|
| 238 |
-
print(f"Epoch {epoch+1:02d} - loss = {total_loss / len(dataset):.4f}")
|
| 239 |
-
|
| 240 |
-
model.eval()
|
| 241 |
-
with torch.no_grad():
|
| 242 |
-
preds = model(X).argmax(dim=1)
|
| 243 |
-
acc = (preds == y).float().mean().item()
|
| 244 |
-
print(f"Training accuracy vs pseudo-labels: {acc:.3f}")
|
| 245 |
-
|
| 246 |
-
def predict_from_fused(S_val, T_val, B_val):
|
| 247 |
-
vec3 = np.array([wS * S_val, wT * T_val, wB * B_val], dtype=np.float32)
|
| 248 |
-
x = torch.tensor(vec3.reshape(1, -1), dtype=torch.float32)
|
| 249 |
-
model.eval()
|
| 250 |
-
with torch.no_grad():
|
| 251 |
-
logits = model(x)
|
| 252 |
-
probs = torch.softmax(logits, dim=1).cpu().numpy()[0]
|
| 253 |
-
pred = int(np.argmax(probs))
|
| 254 |
-
return pred, probs
|
| 255 |
-
|
| 256 |
-
eng_min = engagement.min()
|
| 257 |
-
eng_max = engagement.max()
|
| 258 |
-
|
| 259 |
-
# -------------------------------------------------------
|
| 260 |
-
# 8. Map UI sliders -> S/T/B (paper-style logic)
|
| 261 |
-
# -------------------------------------------------------
|
| 262 |
-
|
| 263 |
-
def build_scores_from_user_input(
|
| 264 |
-
engagement_input,
|
| 265 |
-
suspicious_input,
|
| 266 |
-
activity_input,
|
| 267 |
-
frr_input,
|
| 268 |
-
mfr_input
|
| 269 |
-
):
|
| 270 |
-
# Normalize engagement using dataset range
|
| 271 |
-
eng_norm_ui = (engagement_input - eng_min) / (eng_max - eng_min + 1e-8)
|
| 272 |
-
eng_norm_ui = float(np.clip(eng_norm_ui, 0.0, 1.0))
|
| 273 |
-
|
| 274 |
-
susp_norm_ui = float(np.clip(suspicious_input, 0.0, 1.0))
|
| 275 |
-
act_norm_ui = float(np.clip(activity_input, 0.0, 1.0))
|
| 276 |
-
frr_norm_ui = float(np.clip(frr_input, 0.0, 1.0))
|
| 277 |
-
mfr_norm_ui = float(np.clip(mfr_input, 0.0, 1.0))
|
| 278 |
-
|
| 279 |
-
# Assume average number of friends ~ 0.5 normalized
|
| 280 |
-
friends_norm_ui = 0.5
|
| 281 |
-
|
| 282 |
-
# Trust estimate from engagement & suspiciousness
|
| 283 |
-
trust_norm_ui = (eng_norm_ui + (1.0 - susp_norm_ui)) / 2.0
|
| 284 |
-
|
| 285 |
-
# Construct S / T / B
|
| 286 |
-
S_ui = (frr_norm_ui + mfr_norm_ui + friends_norm_ui) / 3.0
|
| 287 |
-
T_ui = (trust_norm_ui + frr_norm_ui + (1.0 - susp_norm_ui)) / 3.0
|
| 288 |
-
B_ui = (eng_norm_ui + act_norm_ui + susp_norm_ui) / 3.0
|
| 289 |
-
|
| 290 |
-
return S_ui, T_ui, B_ui, eng_norm_ui, susp_norm_ui, act_norm_ui
|
| 291 |
-
|
| 292 |
-
# -------------------------------------------------------
|
| 293 |
-
# 9. Timeline (T1βT5) helpers
|
| 294 |
-
# -------------------------------------------------------
|
| 295 |
-
|
| 296 |
-
def make_timeline_plot(timeline_state):
|
| 297 |
-
fig, ax = plt.subplots()
|
| 298 |
-
if not timeline_state:
|
| 299 |
-
ax.text(0.5, 0.5, "No timeline yet", ha="center", va="center")
|
| 300 |
-
ax.set_axis_off()
|
| 301 |
-
fig.tight_layout()
|
| 302 |
-
return fig
|
| 303 |
-
|
| 304 |
-
steps = [i + 1 for i in range(len(timeline_state))]
|
| 305 |
-
trusted = [entry["probs"][0] for entry in timeline_state]
|
| 306 |
-
obs = [entry["probs"][1] for entry in timeline_state]
|
| 307 |
-
intr = [entry["probs"][2] for entry in timeline_state]
|
| 308 |
-
|
| 309 |
-
ax.plot(steps, trusted, marker="o", label="Trusted")
|
| 310 |
-
ax.plot(steps, obs, marker="o", label="Under Observation")
|
| 311 |
-
ax.plot(steps, intr, marker="o", label="Intruder")
|
| 312 |
-
|
| 313 |
-
ax.set_xticks(steps)
|
| 314 |
-
ax.set_xlabel("Time step (T1βT5)")
|
| 315 |
-
ax.set_ylabel("Probability")
|
| 316 |
-
ax.set_ylim(0, 1)
|
| 317 |
-
ax.set_title("User status probabilities over time")
|
| 318 |
-
ax.legend()
|
| 319 |
-
fig.tight_layout()
|
| 320 |
-
return fig
|
| 321 |
-
|
| 322 |
-
def simulate_week(
|
| 323 |
-
engagement_input,
|
| 324 |
-
suspicious_input,
|
| 325 |
-
activity_input,
|
| 326 |
-
frr_input,
|
| 327 |
-
mfr_input,
|
| 328 |
-
timeline_state
|
| 329 |
-
):
|
| 330 |
-
if timeline_state is None:
|
| 331 |
-
timeline_state = []
|
| 332 |
-
|
| 333 |
-
S_ui, T_ui, B_ui, eng_n, susp_n, act_n = build_scores_from_user_input(
|
| 334 |
-
engagement_input,
|
| 335 |
-
suspicious_input,
|
| 336 |
-
activity_input,
|
| 337 |
-
frr_input,
|
| 338 |
-
mfr_input
|
| 339 |
-
)
|
| 340 |
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
# Keep only last 5 time steps (T1βT5)
|
| 345 |
-
if len(timeline_state) >= 5:
|
| 346 |
-
timeline_state = timeline_state[1:]
|
| 347 |
-
timeline_state.append({
|
| 348 |
-
"status": status,
|
| 349 |
-
"probs": probs.tolist(),
|
| 350 |
-
"S": float(S_ui),
|
| 351 |
-
"T": float(T_ui),
|
| 352 |
-
"B": float(B_ui)
|
| 353 |
-
})
|
| 354 |
-
|
| 355 |
-
step_num = len(timeline_state)
|
| 356 |
-
|
| 357 |
-
# Current week summary
|
| 358 |
-
lines = []
|
| 359 |
-
lines.append(f"### Current Time Step: T{step_num}")
|
| 360 |
-
lines.append(f"**Predicted Status:** **{status}**")
|
| 361 |
-
lines.append("")
|
| 362 |
-
lines.append("**Probabilities:**")
|
| 363 |
-
lines.append(f"- Trusted: {probs[0]:.2f}")
|
| 364 |
-
lines.append(f"- Under Observation: {probs[1]:.2f}")
|
| 365 |
-
lines.append(f"- Intruder: {probs[2]:.2f}")
|
| 366 |
-
lines.append("")
|
| 367 |
-
lines.append("**Aggregated scores (0β1):**")
|
| 368 |
-
lines.append(f"- S (Social / Structural): `{S_ui:.2f}`")
|
| 369 |
-
lines.append(f"- T (Trust): `{T_ui:.2f}`")
|
| 370 |
-
lines.append(f"- B (Behaviour): `{B_ui:.2f}`")
|
| 371 |
-
lines.append("")
|
| 372 |
-
lines.append("**Inputs (normalized):**")
|
| 373 |
-
lines.append(f"- Engagement: `{eng_n:.2f}`")
|
| 374 |
-
lines.append(f"- Suspiciousness: `{susp_n:.2f}`")
|
| 375 |
-
lines.append(f"- Activity regularity: `{act_n:.2f}`")
|
| 376 |
-
|
| 377 |
-
current_md = "\n".join(lines)
|
| 378 |
-
|
| 379 |
-
# Timeline text
|
| 380 |
-
tl_lines = ["## Timeline (T1βT5)"]
|
| 381 |
-
for i, entry in enumerate(timeline_state):
|
| 382 |
-
p = entry["probs"]
|
| 383 |
-
tl_lines.append(
|
| 384 |
-
f"- **T{i+1}**: {entry['status']} | "
|
| 385 |
-
f"Trusted={p[0]:.2f}, Obs={p[1]:.2f}, Intruder={p[2]:.2f}"
|
| 386 |
-
)
|
| 387 |
-
timeline_md = "\n".join(tl_lines)
|
| 388 |
-
|
| 389 |
-
tl_fig = make_timeline_plot(timeline_state)
|
| 390 |
-
|
| 391 |
-
return current_md, timeline_md, tl_fig, timeline_state
|
| 392 |
-
|
| 393 |
-
def reset_timeline():
|
| 394 |
-
empty_fig = make_timeline_plot([])
|
| 395 |
-
return (
|
| 396 |
-
"Timeline reset. Adjust sliders and click **Next week (T+1)** to start from T1.",
|
| 397 |
-
"## Timeline (T1βT5)\n(No entries yet)",
|
| 398 |
-
empty_fig,
|
| 399 |
-
[]
|
| 400 |
-
)
|
| 401 |
|
| 402 |
-
#
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 425 |
else:
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
def
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
)
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
with
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
)
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
value=examples_df,
|
| 516 |
-
label="Sample nodes from SNAP Facebook",
|
| 517 |
-
interactive=False
|
| 518 |
-
)
|
| 519 |
-
refresh_button = gr.Button("Refresh examples")
|
| 520 |
-
|
| 521 |
-
timeline_state = gr.State([])
|
| 522 |
-
|
| 523 |
-
next_button.click(
|
| 524 |
-
fn=simulate_week,
|
| 525 |
-
inputs=[
|
| 526 |
-
engagement_slider,
|
| 527 |
-
suspicious_slider,
|
| 528 |
-
activity_slider,
|
| 529 |
-
frr_slider,
|
| 530 |
-
mfr_slider,
|
| 531 |
-
timeline_state
|
| 532 |
-
],
|
| 533 |
-
outputs=[current_box, timeline_box, timeline_plot, timeline_state]
|
| 534 |
-
)
|
| 535 |
-
|
| 536 |
-
reset_button.click(
|
| 537 |
-
fn=reset_timeline,
|
| 538 |
-
inputs=None,
|
| 539 |
-
outputs=[current_box, timeline_box, timeline_plot, timeline_state]
|
| 540 |
-
)
|
| 541 |
-
|
| 542 |
-
refresh_button.click(
|
| 543 |
-
fn=refresh_examples,
|
| 544 |
-
inputs=None,
|
| 545 |
-
outputs=[examples_table]
|
| 546 |
)
|
| 547 |
|
| 548 |
-
|
| 549 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
+
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
import pandas as pd
|
| 4 |
+
import numpy as np
|
| 5 |
+
import plotly.express as px
|
| 6 |
+
|
| 7 |
+
st.set_page_config(page_title="Excel β Management Insights (Power BI style)", layout="wide")
|
| 8 |
+
|
| 9 |
+
st.title("π Excel β Interactive Management Dashboard (Power BI style)")
|
| 10 |
+
st.caption("Grade-based decision rule: **PASS if Grade β₯ C (including C, C+, B-, etc.)** and **FAIL if below C (C-, D, F, etc.)**. Marks thresholds are not used.")
|
| 11 |
+
|
| 12 |
+
# -----------------------------
|
| 13 |
+
# Grade logic (FINAL as per you)
|
| 14 |
+
# -----------------------------
|
| 15 |
+
def grade_pass_fail(g):
|
| 16 |
+
if pd.isna(g):
|
| 17 |
+
return "Unknown"
|
| 18 |
+
g = str(g).strip().upper()
|
| 19 |
+
|
| 20 |
+
# Explicit FAIL
|
| 21 |
+
if g.startswith(("D", "E", "F")):
|
| 22 |
+
return "Fail"
|
| 23 |
+
|
| 24 |
+
# C- is FAIL, all other C variants are PASS
|
| 25 |
+
if g.startswith("C"):
|
| 26 |
+
if g == "C-" or g.startswith("C-"):
|
| 27 |
+
return "Fail"
|
| 28 |
+
return "Pass"
|
| 29 |
+
|
| 30 |
+
# A/B (with any +/-) are PASS
|
| 31 |
+
if g.startswith(("A", "B")):
|
| 32 |
+
return "Pass"
|
| 33 |
+
|
| 34 |
+
return "Unknown"
|
| 35 |
+
|
| 36 |
+
def pick_grade_column(df: pd.DataFrame) -> str:
|
| 37 |
+
# User confirmed "Grade is last column" β we still try to be robust.
|
| 38 |
+
candidates = [c for c in df.columns if "grade" in str(c).lower()]
|
| 39 |
+
if candidates:
|
| 40 |
+
return candidates[-1]
|
| 41 |
+
return df.columns[-1]
|
| 42 |
+
|
| 43 |
+
def normalize_headers(df: pd.DataFrame) -> pd.DataFrame:
|
| 44 |
+
# Clean common trailing spaces
|
| 45 |
+
df = df.copy()
|
| 46 |
+
df.columns = [str(c).strip() for c in df.columns]
|
| 47 |
+
return df
|
| 48 |
+
|
| 49 |
+
def coerce_numeric(df: pd.DataFrame, cols):
|
| 50 |
+
for c in cols:
|
| 51 |
+
if c in df.columns:
|
| 52 |
+
df[c] = pd.to_numeric(df[c], errors="coerce")
|
| 53 |
+
return df
|
| 54 |
+
|
| 55 |
+
def describe_fail_reason(row, components):
|
| 56 |
+
# Human-readable reason (simple, management-friendly)
|
| 57 |
+
if row.get("PassFail") != "Fail":
|
| 58 |
+
return ""
|
| 59 |
+
hints = []
|
| 60 |
+
for c in components:
|
| 61 |
+
v = row.get(c)
|
| 62 |
+
if pd.notna(v):
|
| 63 |
+
# rough, non-controversial hinting β not using thresholds for pass/fail
|
| 64 |
+
if c.lower().find("final") >= 0 and v < np.nanpercentile(components_df[c].dropna(), 25):
|
| 65 |
+
hints.append("Final exam is in the lower quartile")
|
| 66 |
+
if c.lower().find("lab") >= 0 and v < np.nanpercentile(components_df[c].dropna(), 25):
|
| 67 |
+
hints.append("Lab total is in the lower quartile")
|
| 68 |
+
if c.lower().find("mid") >= 0 and v < np.nanpercentile(components_df[c].dropna(), 25):
|
| 69 |
+
hints.append("Mid exam is in the lower quartile")
|
| 70 |
+
if c.lower().find("test") >= 0 and v < np.nanpercentile(components_df[c].dropna(), 25):
|
| 71 |
+
hints.append("Test score is in the lower quartile")
|
| 72 |
+
if not hints:
|
| 73 |
+
return "Grade below C (check component performance & attendance/assessment issues)."
|
| 74 |
+
return " | ".join(hints)
|
| 75 |
+
|
| 76 |
+
# -----------------------------
|
| 77 |
+
# Upload + read
|
| 78 |
+
# -----------------------------
|
| 79 |
+
uploaded = st.file_uploader("Upload Excel (.xlsx)", type=["xlsx"])
|
| 80 |
+
|
| 81 |
+
if uploaded is None:
|
| 82 |
+
st.info("Upload an Excel file to begin.")
|
| 83 |
+
st.stop()
|
| 84 |
+
|
| 85 |
+
xls = pd.ExcelFile(uploaded)
|
| 86 |
+
sheet = st.selectbox("Select sheet", xls.sheet_names, index=0)
|
| 87 |
+
raw = pd.read_excel(uploaded, sheet_name=sheet)
|
| 88 |
+
raw = normalize_headers(raw)
|
| 89 |
+
|
| 90 |
+
# Try to remove non-student rows (robust: keep rows with any numeric marks OR any grade-like text)
|
| 91 |
+
grade_col_name = pick_grade_column(raw)
|
| 92 |
+
tmp_grade = raw[grade_col_name].astype(str).str.strip()
|
| 93 |
+
grade_like = tmp_grade.str.match(r"^[A-Fa-f][\+\-]?$", na=False)
|
| 94 |
+
|
| 95 |
+
numeric_cols_guess = [c for c in raw.columns if c != grade_col_name]
|
| 96 |
+
numeric_signal = raw[numeric_cols_guess].apply(pd.to_numeric, errors="coerce").notna().sum(axis=1) > 0
|
| 97 |
+
|
| 98 |
+
df = raw[grade_like | numeric_signal].copy()
|
| 99 |
+
|
| 100 |
+
# Add Sno if exists, else create row id
|
| 101 |
+
sno_col = None
|
| 102 |
+
for c in df.columns:
|
| 103 |
+
if str(c).strip().lower() in ["sno", "sno.", "sr", "sr.", "id", "studentid", "student id"]:
|
| 104 |
+
sno_col = c
|
| 105 |
+
break
|
| 106 |
+
if sno_col is None:
|
| 107 |
+
df.insert(0, "Sno", range(1, len(df) + 1))
|
| 108 |
+
sno_col = "Sno"
|
| 109 |
+
|
| 110 |
+
# Grade column
|
| 111 |
+
df["Grade"] = df[grade_col_name].astype(str).str.strip().str.upper()
|
| 112 |
+
df["PassFail"] = df["Grade"].apply(grade_pass_fail)
|
| 113 |
+
df["Pass"] = df["PassFail"].eq("Pass")
|
| 114 |
+
df["Fail"] = df["PassFail"].eq("Fail")
|
| 115 |
+
|
| 116 |
+
# Identify likely mark columns (common names; if not found, pick numeric ones)
|
| 117 |
+
common_components = ["Test -1", "Test-1", "Test 1", "Mid Exam", "Mid", "Lab Total", "Final Exam", "Total"]
|
| 118 |
+
component_cols = [c for c in df.columns if c in common_components]
|
| 119 |
+
if not component_cols:
|
| 120 |
+
# fallback: all numeric columns except Sno
|
| 121 |
+
num_cols = df.columns[df.apply(lambda s: pd.to_numeric(s, errors="coerce").notna().mean() > 0.4)]
|
| 122 |
+
component_cols = [c for c in num_cols if c != sno_col]
|
| 123 |
+
|
| 124 |
+
# Coerce numerics (if present)
|
| 125 |
+
df = coerce_numeric(df, component_cols)
|
| 126 |
+
|
| 127 |
+
# Consistency score (std across available components)
|
| 128 |
+
if len(component_cols) >= 2:
|
| 129 |
+
df["Consistency_SD"] = df[component_cols].std(axis=1, skipna=True)
|
| 130 |
else:
|
| 131 |
+
df["Consistency_SD"] = np.nan
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
|
| 133 |
+
# Global for hinting
|
| 134 |
+
components_df = df.copy()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
|
| 136 |
+
# Optional βFail reasonβ (for drilldown / risk view)
|
| 137 |
+
if component_cols:
|
| 138 |
+
df["FailReasonHint"] = df.apply(lambda r: describe_fail_reason(r, component_cols), axis=1)
|
| 139 |
+
else:
|
| 140 |
+
df["FailReasonHint"] = np.where(df["Fail"], "Grade below C.", "")
|
| 141 |
+
|
| 142 |
+
# -----------------------------
|
| 143 |
+
# Sidebar: βPower BI pagesβ
|
| 144 |
+
# -----------------------------
|
| 145 |
+
st.sidebar.header("Perspective")
|
| 146 |
+
view = st.sidebar.radio(
|
| 147 |
+
"Choose a view",
|
| 148 |
+
["Executive (Management)", "Risk & Intervention", "Assessment Quality", "Student Drill-down", "Export for Power BI"],
|
| 149 |
+
index=0
|
| 150 |
+
)
|
| 151 |
+
|
| 152 |
+
st.sidebar.header("Filters")
|
| 153 |
+
pf = st.sidebar.multiselect("Pass/Fail", ["Pass", "Fail", "Unknown"], default=["Pass", "Fail", "Unknown"])
|
| 154 |
+
grade_unique = sorted([g for g in df["Grade"].dropna().unique()])
|
| 155 |
+
sel_grades = st.sidebar.multiselect("Grades", grade_unique, default=grade_unique)
|
| 156 |
+
|
| 157 |
+
filtered = df[df["PassFail"].isin(pf)]
|
| 158 |
+
filtered = filtered[filtered["Grade"].isin(sel_grades)]
|
| 159 |
+
|
| 160 |
+
# -----------------------------
|
| 161 |
+
# KPI Row
|
| 162 |
+
# -----------------------------
|
| 163 |
+
k1, k2, k3, k4, k5 = st.columns(5)
|
| 164 |
+
with k1: st.metric("Students", int(filtered.shape[0]))
|
| 165 |
+
with k2: st.metric("Pass", int(filtered["Pass"].sum()))
|
| 166 |
+
with k3: st.metric("Fail", int(filtered["Fail"].sum()))
|
| 167 |
+
with k4:
|
| 168 |
+
pr = (filtered["Pass"].mean() * 100) if filtered.shape[0] else 0
|
| 169 |
+
st.metric("Pass Rate", f"{pr:.1f}%")
|
| 170 |
+
with k5:
|
| 171 |
+
if "Total" in filtered.columns and pd.api.types.is_numeric_dtype(filtered["Total"]):
|
| 172 |
+
st.metric("Average Total", f"{filtered['Total'].mean():.2f}")
|
| 173 |
else:
|
| 174 |
+
st.metric("Average Total", "β")
|
| 175 |
+
|
| 176 |
+
st.divider()
|
| 177 |
+
|
| 178 |
+
# -----------------------------
|
| 179 |
+
# Views
|
| 180 |
+
# -----------------------------
|
| 181 |
+
def executive_view(d):
|
| 182 |
+
left, right = st.columns([1, 1])
|
| 183 |
+
|
| 184 |
+
with left:
|
| 185 |
+
st.subheader("Grade Distribution")
|
| 186 |
+
grade_counts = d["Grade"].value_counts(dropna=False).reset_index()
|
| 187 |
+
grade_counts.columns = ["Grade", "Count"]
|
| 188 |
+
fig = px.bar(grade_counts, x="Grade", y="Count")
|
| 189 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 190 |
+
|
| 191 |
+
with right:
|
| 192 |
+
st.subheader("Pass/Fail Distribution")
|
| 193 |
+
pf_counts = d["PassFail"].value_counts(dropna=False).reset_index()
|
| 194 |
+
pf_counts.columns = ["Status", "Count"]
|
| 195 |
+
fig = px.pie(pf_counts, names="Status", values="Count")
|
| 196 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 197 |
+
|
| 198 |
+
st.subheader("Hidden Patterns (Quick Signals)")
|
| 199 |
+
c1, c2, c3 = st.columns(3)
|
| 200 |
+
|
| 201 |
+
# Pattern: Strong Lab but Fail (if lab exists)
|
| 202 |
+
if any("Lab" in c for c in component_cols):
|
| 203 |
+
lab_col = [c for c in component_cols if "Lab" in c][0]
|
| 204 |
+
strong_lab_fail = d[(d["Fail"]) & (d[lab_col].notna()) & (d[lab_col] >= d[lab_col].quantile(0.75))]
|
| 205 |
+
with c1:
|
| 206 |
+
st.metric("Fail with Strong Lab", int(strong_lab_fail.shape[0]))
|
| 207 |
+
else:
|
| 208 |
+
with c1:
|
| 209 |
+
st.metric("Fail with Strong Lab", "β")
|
| 210 |
+
|
| 211 |
+
# Pattern: Inconsistent high SD
|
| 212 |
+
if "Consistency_SD" in d.columns and d["Consistency_SD"].notna().any():
|
| 213 |
+
top_incons = d["Consistency_SD"].quantile(0.90)
|
| 214 |
+
with c2:
|
| 215 |
+
st.metric("High Inconsistency (Top 10%)", int((d["Consistency_SD"] >= top_incons).sum()))
|
| 216 |
+
else:
|
| 217 |
+
with c2:
|
| 218 |
+
st.metric("High Inconsistency (Top 10%)", "β")
|
| 219 |
+
|
| 220 |
+
# Pattern: Fail with good Total (if Total exists)
|
| 221 |
+
if "Total" in d.columns and pd.api.types.is_numeric_dtype(d["Total"]) and d["Total"].notna().any():
|
| 222 |
+
good_total_fail = d[(d["Fail"]) & (d["Total"] >= d["Total"].quantile(0.75))]
|
| 223 |
+
with c3:
|
| 224 |
+
st.metric("Fail with High Total", int(good_total_fail.shape[0]))
|
| 225 |
+
else:
|
| 226 |
+
with c3:
|
| 227 |
+
st.metric("Fail with High Total", "β")
|
| 228 |
+
|
| 229 |
+
if component_cols and "Total" in d.columns and pd.api.types.is_numeric_dtype(d["Total"]):
|
| 230 |
+
st.subheader("What Drives Total? (Correlation)")
|
| 231 |
+
corr_cols = [c for c in component_cols if c in d.columns] + ["Total"]
|
| 232 |
+
corr = d[corr_cols].corr(numeric_only=True)
|
| 233 |
+
fig = px.imshow(corr, text_auto=True, aspect="auto")
|
| 234 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 235 |
+
|
| 236 |
+
def risk_view(d):
|
| 237 |
+
st.subheader("Fail List (Grade < C)")
|
| 238 |
+
fails = d[d["Fail"]].copy()
|
| 239 |
+
|
| 240 |
+
# Bucket: C- vs D/F etc.
|
| 241 |
+
fails["FailType"] = np.where(fails["Grade"].str.startswith("C-"), "C- (Borderline Fail)", "Below C")
|
| 242 |
+
bucket = fails["FailType"].value_counts().reset_index()
|
| 243 |
+
bucket.columns = ["Fail Type", "Count"]
|
| 244 |
+
c1, c2 = st.columns([1, 2])
|
| 245 |
+
with c1:
|
| 246 |
+
fig = px.bar(bucket, x="Fail Type", y="Count")
|
| 247 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 248 |
+
with c2:
|
| 249 |
+
show_cols = [sno_col, "Grade", "PassFail"]
|
| 250 |
+
for c in ["Total"] + component_cols:
|
| 251 |
+
if c in fails.columns and c not in show_cols:
|
| 252 |
+
show_cols.append(c)
|
| 253 |
+
show_cols += ["FailReasonHint"]
|
| 254 |
+
st.dataframe(fails[show_cols].sort_values(by=["Grade", sno_col]), use_container_width=True, height=420)
|
| 255 |
+
|
| 256 |
+
st.subheader("Intervention Suggestions (Management-friendly)")
|
| 257 |
+
st.markdown(
|
| 258 |
+
"""
|
| 259 |
+
- **Many C- failures** β run targeted revision + re-assessment readiness support (borderline group).
|
| 260 |
+
- **Failures concentrated with low Final** β strengthen exam preparation (mock exams + feedback).
|
| 261 |
+
- **Failures with strong Lab** β review exam alignment, study strategy, and assessment balance.
|
| 262 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 263 |
)
|
| 264 |
|
| 265 |
+
def assessment_quality_view(d):
|
| 266 |
+
st.subheader("Assessment Component Overview")
|
| 267 |
+
if not component_cols:
|
| 268 |
+
st.warning("No numeric component columns detected. Add columns like Test/Mid/Lab/Final/Total for deeper assessment analysis.")
|
| 269 |
+
return
|
| 270 |
+
|
| 271 |
+
# Component distributions
|
| 272 |
+
comp = st.selectbox("Choose component", component_cols, index=min(0, len(component_cols)-1))
|
| 273 |
+
fig = px.histogram(d, x=comp, nbins=20)
|
| 274 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 275 |
+
|
| 276 |
+
# Component vs Grade
|
| 277 |
+
st.subheader("Component vs Grade (Boxplot)")
|
| 278 |
+
fig = px.box(d, x="Grade", y=comp)
|
| 279 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 280 |
+
|
| 281 |
+
# Zero / missing checks
|
| 282 |
+
st.subheader("Data Quality Flags")
|
| 283 |
+
flags = []
|
| 284 |
+
for c in component_cols:
|
| 285 |
+
series = d[c]
|
| 286 |
+
if pd.api.types.is_numeric_dtype(series):
|
| 287 |
+
missing = int(series.isna().sum())
|
| 288 |
+
zeros = int((series == 0).sum())
|
| 289 |
+
flags.append({"Component": c, "Missing": missing, "Zeros": zeros})
|
| 290 |
+
st.dataframe(pd.DataFrame(flags), use_container_width=True)
|
| 291 |
+
|
| 292 |
+
# If Total exists: correlation heatmap
|
| 293 |
+
if "Total" in d.columns and pd.api.types.is_numeric_dtype(d["Total"]):
|
| 294 |
+
st.subheader("Correlation Heatmap")
|
| 295 |
+
corr_cols = [c for c in component_cols if c in d.columns] + ["Total"]
|
| 296 |
+
corr = d[corr_cols].corr(numeric_only=True)
|
| 297 |
+
fig = px.imshow(corr, text_auto=True, aspect="auto")
|
| 298 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 299 |
+
|
| 300 |
+
def student_drilldown_view(d):
|
| 301 |
+
st.subheader("Student Drill-down")
|
| 302 |
+
st.caption("Pick a student to view component breakdown and the grade-based decision.")
|
| 303 |
+
sid = st.selectbox("Select student (Sno)", sorted(d[sno_col].unique()))
|
| 304 |
+
row = d[d[sno_col] == sid].iloc[0]
|
| 305 |
+
|
| 306 |
+
c1, c2, c3 = st.columns(3)
|
| 307 |
+
with c1: st.metric("Grade", str(row.get("Grade", "β")))
|
| 308 |
+
with c2: st.metric("Status", str(row.get("PassFail", "β")))
|
| 309 |
+
with c3:
|
| 310 |
+
if "Total" in d.columns and pd.notna(row.get("Total", np.nan)):
|
| 311 |
+
st.metric("Total", f"{row['Total']:.2f}")
|
| 312 |
+
else:
|
| 313 |
+
st.metric("Total", "β")
|
| 314 |
+
|
| 315 |
+
st.write("**Reason (simple hint):**", row.get("FailReasonHint", ""))
|
| 316 |
+
|
| 317 |
+
# Component bar
|
| 318 |
+
if component_cols:
|
| 319 |
+
comp_vals = {c: row.get(c) for c in component_cols if c in d.columns}
|
| 320 |
+
comp_df = pd.DataFrame({"Component": list(comp_vals.keys()), "Score": list(comp_vals.values())})
|
| 321 |
+
fig = px.bar(comp_df, x="Component", y="Score")
|
| 322 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 323 |
+
|
| 324 |
+
st.subheader("Raw record")
|
| 325 |
+
st.dataframe(pd.DataFrame(row).T, use_container_width=True)
|
| 326 |
+
|
| 327 |
+
def export_view(d):
|
| 328 |
+
st.subheader("Export for Power BI")
|
| 329 |
+
st.caption("Download cleaned data with the computed PassFail fields. Load into Power BI (Get Data β Text/CSV).")
|
| 330 |
+
|
| 331 |
+
clean_csv = d.to_csv(index=False).encode("utf-8")
|
| 332 |
+
st.download_button("β¬οΈ Download Cleaned Data (CSV)", clean_csv, file_name="cleaned_marks_with_passfail.csv", mime="text/csv")
|
| 333 |
+
|
| 334 |
+
st.subheader("Recommended Power BI Measures (DAX)")
|
| 335 |
+
st.code(r"""
|
| 336 |
+
Pass Count = CALCULATE(COUNTROWS(cleaned_marks), cleaned_marks[PassFail] = "Pass")
|
| 337 |
+
Fail Count = CALCULATE(COUNTROWS(cleaned_marks), cleaned_marks[PassFail] = "Fail")
|
| 338 |
+
Pass Rate % = DIVIDE([Pass Count], COUNTROWS(cleaned_marks))
|
| 339 |
+
""", language="text")
|
| 340 |
+
|
| 341 |
+
st.subheader("Summary Tables")
|
| 342 |
+
grade_summary = d["Grade"].value_counts(dropna=False).reset_index()
|
| 343 |
+
grade_summary.columns = ["Grade", "Count"]
|
| 344 |
+
st.dataframe(grade_summary, use_container_width=True)
|
| 345 |
+
|
| 346 |
+
pf_summary = d["PassFail"].value_counts(dropna=False).reset_index()
|
| 347 |
+
pf_summary.columns = ["PassFail", "Count"]
|
| 348 |
+
st.dataframe(pf_summary, use_container_width=True)
|
| 349 |
+
|
| 350 |
+
# Render selected view
|
| 351 |
+
if view == "Executive (Management)":
|
| 352 |
+
executive_view(filtered)
|
| 353 |
+
elif view == "Risk & Intervention":
|
| 354 |
+
risk_view(filtered)
|
| 355 |
+
elif view == "Assessment Quality":
|
| 356 |
+
assessment_quality_view(filtered)
|
| 357 |
+
elif view == "Student Drill-down":
|
| 358 |
+
student_drilldown_view(filtered)
|
| 359 |
+
else:
|
| 360 |
+
export_view(filtered)
|