Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -391,120 +391,146 @@ if not df_final.empty and source_choice == "(All)":
|
|
| 391 |
# ------------------- Output -------------------
|
| 392 |
# fig = plot_radar(df_final, grouped, chart_title)
|
| 393 |
# st.plotly_chart(fig, use_container_width=True)
|
| 394 |
-
# ============== Build per-stage vectors for comparisons ==============
|
|
|
|
| 395 |
# Columns to use based on mode
|
| 396 |
COLS = list(SKILL_GROUPS.keys()) if grouped else SKILLS
|
| 397 |
|
| 398 |
-
#
|
| 399 |
-
|
| 400 |
-
"""
|
| 401 |
-
use_merged=True -> read from df_final (after Likert merge)
|
| 402 |
-
use_merged=False -> read from df_resp (responses-only)
|
| 403 |
-
"""
|
| 404 |
-
df_base = df_final if use_merged else df_resp
|
| 405 |
-
if df_base.empty:
|
| 406 |
-
return {k: None for k in COLS}
|
| 407 |
-
|
| 408 |
-
if student and source:
|
| 409 |
-
label = f"{student} — {source}"
|
| 410 |
-
sub = df_base[df_base["label"] == label]
|
| 411 |
-
elif student and source is None:
|
| 412 |
-
# combined sources row (when overlay OFF)
|
| 413 |
-
sub = df_base[df_base["label"] == student]
|
| 414 |
-
else:
|
| 415 |
-
# cohort average across all rows in df_base
|
| 416 |
-
sub = df_base
|
| 417 |
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 422 |
|
| 423 |
-
|
| 424 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 425 |
|
| 426 |
-
def
|
|
|
|
| 427 |
if stage not in ("onboarding", "closing"):
|
| 428 |
return None
|
| 429 |
lg = mongo_get_likert_grouped(mongo_uri, db_name, summaries_coll, student, stage)
|
| 430 |
return lg if lg else None
|
| 431 |
|
| 432 |
-
def
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
elif stage == "combined_weeks":
|
| 439 |
-
srcs = ["week_2_responses", "week_3_responses", "closing_responses"]
|
| 440 |
-
else:
|
| 441 |
-
srcs = []
|
| 442 |
-
|
| 443 |
-
# Response-only mean across those sources
|
| 444 |
-
if not df_resp.empty:
|
| 445 |
-
if student and source_choice == "(All)":
|
| 446 |
-
# we may have aggregated to one row per student; compute from df_raw instead
|
| 447 |
-
# build per-source labels then average
|
| 448 |
-
rows = []
|
| 449 |
-
for s in srcs:
|
| 450 |
-
lbl = f"{student} — {s}"
|
| 451 |
-
sub = df_resp[df_resp["label"] == lbl]
|
| 452 |
-
if not sub.empty:
|
| 453 |
-
rows.append(sub[COLS].mean(numeric_only=True))
|
| 454 |
-
if rows:
|
| 455 |
-
m = pd.concat(rows, axis=1).mean(axis=1)
|
| 456 |
-
resp_vec = {k: (None if pd.isna(m.get(k)) else float(m.get(k))) for k in COLS}
|
| 457 |
-
else:
|
| 458 |
-
resp_vec = {k: None for k in COLS}
|
| 459 |
-
elif student and source_choice != "(All)":
|
| 460 |
-
# if the UI is filtered to a specific source, ignore that and recompute from df_resp
|
| 461 |
-
rows = []
|
| 462 |
-
for s in srcs:
|
| 463 |
-
lbl = f"{student} — {s}"
|
| 464 |
-
sub = df_resp[df_resp["label"] == lbl]
|
| 465 |
-
if not sub.empty:
|
| 466 |
-
rows.append(sub[COLS].mean(numeric_only=True))
|
| 467 |
-
if rows:
|
| 468 |
-
m = pd.concat(rows, axis=1).mean(axis=1)
|
| 469 |
-
resp_vec = {k: (None if pd.isna(m.get(k)) else float(m.get(k))) for k in COLS}
|
| 470 |
-
else:
|
| 471 |
-
resp_vec = {k: None for k in COLS}
|
| 472 |
-
else:
|
| 473 |
-
# cohort: average across all matching sources
|
| 474 |
-
sub = df_resp[df_resp["label"].str.contains(" — ", na=False)]
|
| 475 |
-
sub = sub[sub["label"].str.split(" — ").str[1].isin(srcs)]
|
| 476 |
-
if not sub.empty:
|
| 477 |
-
m = sub[COLS].mean(numeric_only=True)
|
| 478 |
-
resp_vec = {k: (None if pd.isna(m.get(k)) else float(m.get(k))) for k in COLS}
|
| 479 |
-
else:
|
| 480 |
-
resp_vec = {k: None for k in COLS}
|
| 481 |
-
else:
|
| 482 |
-
resp_vec = {k: None for k in COLS}
|
| 483 |
|
| 484 |
-
#
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 489 |
|
| 490 |
-
|
| 491 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 492 |
|
| 493 |
-
#
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 498 |
else:
|
| 499 |
-
#
|
| 500 |
-
vec_onb = _stage_vector(None, "onboarding")
|
| 501 |
-
vec_cls = _stage_vector(None, "closing")
|
| 502 |
-
vec_combo = _stage_vector(None, "combined_weeks")
|
| 503 |
|
| 504 |
-
|
| 505 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 506 |
pct_onb_to_combo = {k: _percent_change(vec_combo.get(k), vec_onb.get(k)) for k in COLS}
|
| 507 |
|
|
|
|
| 508 |
df_plot = df_final.copy()
|
| 509 |
avg_label = None
|
| 510 |
|
|
@@ -524,6 +550,61 @@ st.plotly_chart(fig, use_container_width=True)
|
|
| 524 |
|
| 525 |
st.caption(f"{len(df_final)} line(s) aggregated." if not df_final.empty else "No data.")
|
| 526 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 527 |
|
| 528 |
# ================== Dynamic Stage Summaries (only if student answered that week) ==================
|
| 529 |
import re
|
|
|
|
| 391 |
# ------------------- Output -------------------
|
| 392 |
# fig = plot_radar(df_final, grouped, chart_title)
|
| 393 |
# st.plotly_chart(fig, use_container_width=True)
|
| 394 |
+
# ============== Build per-stage vectors for comparisons (LIKERT-AWARE) ==============
|
| 395 |
+
|
| 396 |
# Columns to use based on mode
|
| 397 |
COLS = list(SKILL_GROUPS.keys()) if grouped else SKILLS
|
| 398 |
|
| 399 |
+
# Map each skill to its group(s) once (used to project group Likert down to skills)
|
| 400 |
+
SKILL_TO_GROUPS = {s: [g for g, members in SKILL_GROUPS.items() if s in members] for s in SKILLS}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 401 |
|
| 402 |
+
def _project_likert_to_cols(likert_grouped: dict | None, cols: list[str], grouped_flag: bool) -> dict:
|
| 403 |
+
"""Return a vector aligned to COLS from group-level Likert. If ungrouped, project to skills."""
|
| 404 |
+
if not likert_grouped:
|
| 405 |
+
return {k: None for k in cols}
|
| 406 |
+
if grouped_flag:
|
| 407 |
+
return {k: (likert_grouped.get(k) if k in likert_grouped else None) for k in cols}
|
| 408 |
+
# ungrouped → average the groups a skill belongs to
|
| 409 |
+
out = {}
|
| 410 |
+
for s in cols:
|
| 411 |
+
gs = SKILL_TO_GROUPS.get(s, [])
|
| 412 |
+
vals = [likert_grouped.get(g) for g in gs if likert_grouped.get(g) is not None]
|
| 413 |
+
out[s] = float(np.mean(vals)) if vals else None
|
| 414 |
+
return out
|
| 415 |
|
| 416 |
+
def _merge_resp_and_likert(resp_vec: dict, likert_vec: dict) -> dict:
|
| 417 |
+
"""Average where both exist; else take whichever exists."""
|
| 418 |
+
out = {}
|
| 419 |
+
for k in resp_vec.keys():
|
| 420 |
+
rv = resp_vec.get(k, None)
|
| 421 |
+
lv = likert_vec.get(k, None)
|
| 422 |
+
if rv is not None and lv is not None:
|
| 423 |
+
out[k] = (rv + lv) / 2.0
|
| 424 |
+
elif rv is not None:
|
| 425 |
+
out[k] = rv
|
| 426 |
+
else:
|
| 427 |
+
out[k] = lv
|
| 428 |
+
return out
|
| 429 |
+
|
| 430 |
+
def _mean_vectors(vecs: list[dict]) -> dict:
|
| 431 |
+
"""Element-wise mean ignoring None; returns None if all Nones for a key."""
|
| 432 |
+
if not vecs:
|
| 433 |
+
return {}
|
| 434 |
+
keys = list(vecs[0].keys())
|
| 435 |
+
out = {}
|
| 436 |
+
for k in keys:
|
| 437 |
+
vals = [v.get(k) for v in vecs if v.get(k) is not None]
|
| 438 |
+
out[k] = (float(np.mean(vals)) if vals else None)
|
| 439 |
+
return out
|
| 440 |
+
|
| 441 |
+
def _resp_mean_for_sources(df_src: pd.DataFrame, student: str | None, sources: list[str], cols: list[str]) -> dict:
|
| 442 |
+
"""Mean of response scores across docs for (student,sources). If student None → cohort."""
|
| 443 |
+
if df_src.empty:
|
| 444 |
+
return {k: None for k in cols}
|
| 445 |
+
sub = df_src.copy()
|
| 446 |
+
if student:
|
| 447 |
+
sub = sub[sub["student"] == student]
|
| 448 |
+
sub = sub[sub["source"].isin(sources)]
|
| 449 |
+
if sub.empty:
|
| 450 |
+
return {k: None for k in cols}
|
| 451 |
+
m = sub[cols].mean(numeric_only=True)
|
| 452 |
+
return {k: (None if pd.isna(m.get(k)) else float(m.get(k))) for k in cols}
|
| 453 |
|
| 454 |
+
def _likert_grouped_for_student_stage(student: str, stage: str) -> dict | None:
|
| 455 |
+
"""Get normalized (0–1) group-level Likert for onboarding/closing only."""
|
| 456 |
if stage not in ("onboarding", "closing"):
|
| 457 |
return None
|
| 458 |
lg = mongo_get_likert_grouped(mongo_uri, db_name, summaries_coll, student, stage)
|
| 459 |
return lg if lg else None
|
| 460 |
|
| 461 |
+
def _student_stage_vectors(df_src: pd.DataFrame, stu: str, cols: list[str], grouped_flag: bool) -> dict:
|
| 462 |
+
"""Per-student vectors with Likert merged for onboarding/closing; combined includes closing(merged)."""
|
| 463 |
+
# Onboarding = RESP(onboarding) ⊕ Likert(onboarding)
|
| 464 |
+
onb_resp = _resp_mean_for_sources(df_src, stu, ["onboarding_responses"], cols)
|
| 465 |
+
onb_lik = _project_likert_to_cols(_likert_grouped_for_student_stage(stu, "onboarding"), cols, grouped_flag)
|
| 466 |
+
onb = _merge_resp_and_likert(onb_resp, onb_lik)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 467 |
|
| 468 |
+
# Closing = RESP(closing) ⊕ Likert(closing)
|
| 469 |
+
cls_resp = _resp_mean_for_sources(df_src, stu, ["closing_responses"], cols)
|
| 470 |
+
cls_lik = _project_likert_to_cols(_likert_grouped_for_student_stage(stu, "closing"), cols, grouped_flag)
|
| 471 |
+
cls = _merge_resp_and_likert(cls_resp, cls_lik)
|
| 472 |
+
|
| 473 |
+
# Combined = mean( RESP(week2), RESP(week3), CLOSING(merged) )
|
| 474 |
+
w2 = _resp_mean_for_sources(df_src, stu, ["week_2_responses"], cols)
|
| 475 |
+
w3 = _resp_mean_for_sources(df_src, stu, ["week_3_responses"], cols)
|
| 476 |
+
combo = _mean_vectors([w2, w3, cls]) # <- note: closing already merged with Likert
|
| 477 |
+
|
| 478 |
+
return {"onboarding": onb, "closing": cls, "combined": combo}
|
| 479 |
+
|
| 480 |
+
def _stage_vectors_for_current_selection(df_src: pd.DataFrame, student_choice: str | None, cols: list[str], grouped_flag: bool) -> dict:
|
| 481 |
+
"""
|
| 482 |
+
If a student is selected → return their vectors.
|
| 483 |
+
If cohort (“(All)”) → average per-student vectors (Likert included where available).
|
| 484 |
+
"""
|
| 485 |
+
if student_choice and student_choice != "(All)":
|
| 486 |
+
return _student_stage_vectors(df_src, student_choice, cols, grouped_flag)
|
| 487 |
+
|
| 488 |
+
# Cohort: compute for each student then average
|
| 489 |
+
if df_src.empty:
|
| 490 |
+
empty_vec = {k: None for k in cols}
|
| 491 |
+
return {"onboarding": empty_vec, "closing": empty_vec, "combined": empty_vec}
|
| 492 |
+
|
| 493 |
+
students = sorted(set(str(x) for x in df_src["student"].dropna().unique()))
|
| 494 |
+
per_student = [_student_stage_vectors(df_src, s, cols, grouped_flag) for s in students]
|
| 495 |
+
return {
|
| 496 |
+
"onboarding": _mean_vectors([p["onboarding"] for p in per_student]),
|
| 497 |
+
"closing": _mean_vectors([p["closing"] for p in per_student]),
|
| 498 |
+
"combined": _mean_vectors([p["combined"] for p in per_student]),
|
| 499 |
+
}
|
| 500 |
|
| 501 |
+
def _percent_change(new: float | None, old: float | None) -> float | None:
|
| 502 |
+
if new is None or old is None:
|
| 503 |
+
return None
|
| 504 |
+
if old == 0:
|
| 505 |
+
return None # or return 100.0 if you prefer
|
| 506 |
+
return (new - old) / old * 100.0
|
| 507 |
|
| 508 |
+
# Use df_raw (one row per doc) so overlay/aggregation doesn’t hide sources
|
| 509 |
+
# Ensure df_raw has the per-skill or per-group columns we need:
|
| 510 |
+
if grouped and not df_raw.empty:
|
| 511 |
+
# build grouped view just for comparisons
|
| 512 |
+
df_grouped_for_comp = df_raw.copy()
|
| 513 |
+
# aggregate per-doc row to grouped columns
|
| 514 |
+
df_grouped_for_comp = (
|
| 515 |
+
df_grouped_for_comp
|
| 516 |
+
.assign(**{
|
| 517 |
+
g: df_grouped_for_comp.apply(lambda r: safe_mean([r.get(s, np.nan) for s in SKILL_GROUPS[g]]), axis=1)
|
| 518 |
+
for g in SKILL_GROUPS.keys()
|
| 519 |
+
})
|
| 520 |
+
)
|
| 521 |
+
df_src_for_comp = df_grouped_for_comp[["student", "source"] + list(SKILL_GROUPS.keys())]
|
| 522 |
else:
|
| 523 |
+
df_src_for_comp = df_raw # already per-skill
|
|
|
|
|
|
|
|
|
|
| 524 |
|
| 525 |
+
stage_vecs = _stage_vectors_for_current_selection(df_src_for_comp, student_choice, COLS, grouped)
|
| 526 |
+
vec_onb = stage_vecs["onboarding"]
|
| 527 |
+
vec_cls = stage_vecs["closing"]
|
| 528 |
+
vec_combo = stage_vecs["combined"]
|
| 529 |
+
|
| 530 |
+
pct_onb_to_cls = {k: _percent_change(vec_cls.get(k), vec_onb.get(k)) for k in COLS}
|
| 531 |
pct_onb_to_combo = {k: _percent_change(vec_combo.get(k), vec_onb.get(k)) for k in COLS}
|
| 532 |
|
| 533 |
+
# ------------------- Plot + table above stays the same -------------------
|
| 534 |
df_plot = df_final.copy()
|
| 535 |
avg_label = None
|
| 536 |
|
|
|
|
| 550 |
|
| 551 |
st.caption(f"{len(df_final)} line(s) aggregated." if not df_final.empty else "No data.")
|
| 552 |
|
| 553 |
+
# ------------------- Comparisons Tab (LIKERT-AWARE) -------------------
|
| 554 |
+
tab_compare, = st.tabs(["📊 Comparisons"])
|
| 555 |
+
|
| 556 |
+
with tab_compare:
|
| 557 |
+
st.subheader("Onboarding → Closing — % Change (Likert-aware)")
|
| 558 |
+
df1 = pd.DataFrame({
|
| 559 |
+
"Dimension": COLS,
|
| 560 |
+
"Onboarding (merged)": [vec_onb.get(k) for k in COLS],
|
| 561 |
+
"Closing (merged)": [vec_cls.get(k) for k in COLS],
|
| 562 |
+
"% Change": [pct_onb_to_cls.get(k) for k in COLS],
|
| 563 |
+
})
|
| 564 |
+
st.dataframe(
|
| 565 |
+
df1.style.format({
|
| 566 |
+
"Onboarding (merged)": "{:.2f}",
|
| 567 |
+
"Closing (merged)": "{:.2f}",
|
| 568 |
+
"% Change": "{:+.1f}%"
|
| 569 |
+
}),
|
| 570 |
+
use_container_width=True
|
| 571 |
+
)
|
| 572 |
+
|
| 573 |
+
st.subheader("Onboarding → (Week2 + Week3 + ClosingMerged) — % Change")
|
| 574 |
+
df2 = pd.DataFrame({
|
| 575 |
+
"Dimension": COLS,
|
| 576 |
+
"Onboarding (merged)": [vec_onb.get(k) for k in COLS],
|
| 577 |
+
"Weeks 2+3 + Closing (closing merged)": [vec_combo.get(k) for k in COLS],
|
| 578 |
+
"% Change": [pct_onb_to_combo.get(k) for k in COLS],
|
| 579 |
+
})
|
| 580 |
+
st.dataframe(
|
| 581 |
+
df2.style.format({
|
| 582 |
+
"Onboarding (merged)": "{:.2f}",
|
| 583 |
+
"Weeks 2+3 + Closing (closing merged)": "{:.2f}",
|
| 584 |
+
"% Change": "{:+.1f}%"
|
| 585 |
+
}),
|
| 586 |
+
use_container_width=True
|
| 587 |
+
)
|
| 588 |
+
|
| 589 |
+
# Optional bar chart: %Δ Onboarding → Closing
|
| 590 |
+
try:
|
| 591 |
+
fig_delta = go.Figure()
|
| 592 |
+
fig_delta.add_bar(
|
| 593 |
+
x=COLS,
|
| 594 |
+
y=[(pct_onb_to_cls.get(k) if pct_onb_to_cls.get(k) is not None else 0.0) for k in COLS],
|
| 595 |
+
name="%Δ Onb→Closing"
|
| 596 |
+
)
|
| 597 |
+
fig_delta.update_layout(
|
| 598 |
+
title="% Change: Onboarding → Closing (Likert-aware)",
|
| 599 |
+
xaxis_title="Dimension",
|
| 600 |
+
yaxis_title="% change",
|
| 601 |
+
margin=dict(l=20, r=20, t=50, b=20)
|
| 602 |
+
)
|
| 603 |
+
st.plotly_chart(fig_delta, use_container_width=True)
|
| 604 |
+
except Exception:
|
| 605 |
+
pass
|
| 606 |
+
|
| 607 |
+
|
| 608 |
|
| 609 |
# ================== Dynamic Stage Summaries (only if student answered that week) ==================
|
| 610 |
import re
|