Tesneem commited on
Commit
a0dc00a
·
verified ·
1 Parent(s): 8cbfb34

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +177 -96
app.py CHANGED
@@ -391,120 +391,146 @@ if not df_final.empty and source_choice == "(All)":
391
  # ------------------- Output -------------------
392
  # fig = plot_radar(df_final, grouped, chart_title)
393
  # st.plotly_chart(fig, use_container_width=True)
394
- # ============== Build per-stage vectors for comparisons ==============
 
395
  # Columns to use based on mode
396
  COLS = list(SKILL_GROUPS.keys()) if grouped else SKILLS
397
 
398
- # Helper to extract the mean vector for (student, source) from df_resp/df_final
399
- def _mean_vector_for(student: str | None, source: str | None, use_merged: bool) -> dict:
400
- """
401
- use_merged=True -> read from df_final (after Likert merge)
402
- use_merged=False -> read from df_resp (responses-only)
403
- """
404
- df_base = df_final if use_merged else df_resp
405
- if df_base.empty:
406
- return {k: None for k in COLS}
407
-
408
- if student and source:
409
- label = f"{student} — {source}"
410
- sub = df_base[df_base["label"] == label]
411
- elif student and source is None:
412
- # combined sources row (when overlay OFF)
413
- sub = df_base[df_base["label"] == student]
414
- else:
415
- # cohort average across all rows in df_base
416
- sub = df_base
417
 
418
- if sub.empty:
419
- return {k: None for k in COLS}
420
- means = sub[COLS].mean(numeric_only=True)
421
- return {k: (None if pd.isna(means.get(k)) else float(means.get(k))) for k in COLS}
 
 
 
 
 
 
 
 
 
422
 
423
- # Build mapping skill->groups (you already used this in the Likert merge)
424
- SKILL_TO_GROUPS = {s: [g for g, members in SKILL_GROUPS.items() if s in members] for s in SKILLS}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
425
 
426
- def _likert_grouped_for(student: str, stage: str) -> dict | None:
 
427
  if stage not in ("onboarding", "closing"):
428
  return None
429
  lg = mongo_get_likert_grouped(mongo_uri, db_name, summaries_coll, student, stage)
430
  return lg if lg else None
431
 
432
- def _stage_vector(student: str | None, stage: str) -> dict:
433
- # Which sources make up this stage?
434
- if stage == "onboarding":
435
- srcs = ["onboarding_responses"]
436
- elif stage == "closing":
437
- srcs = ["closing_responses"]
438
- elif stage == "combined_weeks":
439
- srcs = ["week_2_responses", "week_3_responses", "closing_responses"]
440
- else:
441
- srcs = []
442
-
443
- # Response-only mean across those sources
444
- if not df_resp.empty:
445
- if student and source_choice == "(All)":
446
- # we may have aggregated to one row per student; compute from df_raw instead
447
- # build per-source labels then average
448
- rows = []
449
- for s in srcs:
450
- lbl = f"{student} — {s}"
451
- sub = df_resp[df_resp["label"] == lbl]
452
- if not sub.empty:
453
- rows.append(sub[COLS].mean(numeric_only=True))
454
- if rows:
455
- m = pd.concat(rows, axis=1).mean(axis=1)
456
- resp_vec = {k: (None if pd.isna(m.get(k)) else float(m.get(k))) for k in COLS}
457
- else:
458
- resp_vec = {k: None for k in COLS}
459
- elif student and source_choice != "(All)":
460
- # if the UI is filtered to a specific source, ignore that and recompute from df_resp
461
- rows = []
462
- for s in srcs:
463
- lbl = f"{student} — {s}"
464
- sub = df_resp[df_resp["label"] == lbl]
465
- if not sub.empty:
466
- rows.append(sub[COLS].mean(numeric_only=True))
467
- if rows:
468
- m = pd.concat(rows, axis=1).mean(axis=1)
469
- resp_vec = {k: (None if pd.isna(m.get(k)) else float(m.get(k))) for k in COLS}
470
- else:
471
- resp_vec = {k: None for k in COLS}
472
- else:
473
- # cohort: average across all matching sources
474
- sub = df_resp[df_resp["label"].str.contains(" — ", na=False)]
475
- sub = sub[sub["label"].str.split(" — ").str[1].isin(srcs)]
476
- if not sub.empty:
477
- m = sub[COLS].mean(numeric_only=True)
478
- resp_vec = {k: (None if pd.isna(m.get(k)) else float(m.get(k))) for k in COLS}
479
- else:
480
- resp_vec = {k: None for k in COLS}
481
- else:
482
- resp_vec = {k: None for k in COLS}
483
 
484
- # Merge in Likert for onboarding/closing (projected to skills if ungrouped)
485
- if student:
486
- likert_g = _likert_grouped_for(student, "onboarding" if "onboarding_responses" in srcs else ("closing" if "closing_responses" in srcs and len(srcs)==1 else None))
487
- else:
488
- likert_g = None # no cohort Likert
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
489
 
490
- merged = _merge_resp_and_likert_vector(resp_vec, likert_g, grouped, SKILL_TO_GROUPS, SKILL_GROUPS)
491
- return merged
 
 
 
 
492
 
493
- # Build the vectors we need
494
- if student_choice != "(All)":
495
- vec_onb = _stage_vector(student_choice, "onboarding")
496
- vec_cls = _stage_vector(student_choice, "closing")
497
- vec_combo = _stage_vector(student_choice, "combined_weeks")
 
 
 
 
 
 
 
 
 
498
  else:
499
- # Cohort-wide comparison
500
- vec_onb = _stage_vector(None, "onboarding")
501
- vec_cls = _stage_vector(None, "closing")
502
- vec_combo = _stage_vector(None, "combined_weeks")
503
 
504
- # Compute % deltas
505
- pct_onb_to_cls = {k: _percent_change(vec_cls.get(k), vec_onb.get(k)) for k in COLS}
 
 
 
 
506
  pct_onb_to_combo = {k: _percent_change(vec_combo.get(k), vec_onb.get(k)) for k in COLS}
507
 
 
508
  df_plot = df_final.copy()
509
  avg_label = None
510
 
@@ -524,6 +550,61 @@ st.plotly_chart(fig, use_container_width=True)
524
 
525
  st.caption(f"{len(df_final)} line(s) aggregated." if not df_final.empty else "No data.")
526
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
527
 
528
  # ================== Dynamic Stage Summaries (only if student answered that week) ==================
529
  import re
 
391
  # ------------------- Output -------------------
392
  # fig = plot_radar(df_final, grouped, chart_title)
393
  # st.plotly_chart(fig, use_container_width=True)
394
+ # ============== Build per-stage vectors for comparisons (LIKERT-AWARE) ==============
395
+
396
  # Columns to use based on mode
397
  COLS = list(SKILL_GROUPS.keys()) if grouped else SKILLS
398
 
399
+ # Map each skill to its group(s) once (used to project group Likert down to skills)
400
+ SKILL_TO_GROUPS = {s: [g for g, members in SKILL_GROUPS.items() if s in members] for s in SKILLS}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
401
 
402
+ def _project_likert_to_cols(likert_grouped: dict | None, cols: list[str], grouped_flag: bool) -> dict:
403
+ """Return a vector aligned to COLS from group-level Likert. If ungrouped, project to skills."""
404
+ if not likert_grouped:
405
+ return {k: None for k in cols}
406
+ if grouped_flag:
407
+ return {k: (likert_grouped.get(k) if k in likert_grouped else None) for k in cols}
408
+ # ungrouped → average the groups a skill belongs to
409
+ out = {}
410
+ for s in cols:
411
+ gs = SKILL_TO_GROUPS.get(s, [])
412
+ vals = [likert_grouped.get(g) for g in gs if likert_grouped.get(g) is not None]
413
+ out[s] = float(np.mean(vals)) if vals else None
414
+ return out
415
 
416
+ def _merge_resp_and_likert(resp_vec: dict, likert_vec: dict) -> dict:
417
+ """Average where both exist; else take whichever exists."""
418
+ out = {}
419
+ for k in resp_vec.keys():
420
+ rv = resp_vec.get(k, None)
421
+ lv = likert_vec.get(k, None)
422
+ if rv is not None and lv is not None:
423
+ out[k] = (rv + lv) / 2.0
424
+ elif rv is not None:
425
+ out[k] = rv
426
+ else:
427
+ out[k] = lv
428
+ return out
429
+
430
+ def _mean_vectors(vecs: list[dict]) -> dict:
431
+ """Element-wise mean ignoring None; returns None if all Nones for a key."""
432
+ if not vecs:
433
+ return {}
434
+ keys = list(vecs[0].keys())
435
+ out = {}
436
+ for k in keys:
437
+ vals = [v.get(k) for v in vecs if v.get(k) is not None]
438
+ out[k] = (float(np.mean(vals)) if vals else None)
439
+ return out
440
+
441
+ def _resp_mean_for_sources(df_src: pd.DataFrame, student: str | None, sources: list[str], cols: list[str]) -> dict:
442
+ """Mean of response scores across docs for (student,sources). If student None → cohort."""
443
+ if df_src.empty:
444
+ return {k: None for k in cols}
445
+ sub = df_src.copy()
446
+ if student:
447
+ sub = sub[sub["student"] == student]
448
+ sub = sub[sub["source"].isin(sources)]
449
+ if sub.empty:
450
+ return {k: None for k in cols}
451
+ m = sub[cols].mean(numeric_only=True)
452
+ return {k: (None if pd.isna(m.get(k)) else float(m.get(k))) for k in cols}
453
 
454
+ def _likert_grouped_for_student_stage(student: str, stage: str) -> dict | None:
455
+ """Get normalized (0–1) group-level Likert for onboarding/closing only."""
456
  if stage not in ("onboarding", "closing"):
457
  return None
458
  lg = mongo_get_likert_grouped(mongo_uri, db_name, summaries_coll, student, stage)
459
  return lg if lg else None
460
 
461
+ def _student_stage_vectors(df_src: pd.DataFrame, stu: str, cols: list[str], grouped_flag: bool) -> dict:
462
+ """Per-student vectors with Likert merged for onboarding/closing; combined includes closing(merged)."""
463
+ # Onboarding = RESP(onboarding) ⊕ Likert(onboarding)
464
+ onb_resp = _resp_mean_for_sources(df_src, stu, ["onboarding_responses"], cols)
465
+ onb_lik = _project_likert_to_cols(_likert_grouped_for_student_stage(stu, "onboarding"), cols, grouped_flag)
466
+ onb = _merge_resp_and_likert(onb_resp, onb_lik)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
467
 
468
+ # Closing = RESP(closing) Likert(closing)
469
+ cls_resp = _resp_mean_for_sources(df_src, stu, ["closing_responses"], cols)
470
+ cls_lik = _project_likert_to_cols(_likert_grouped_for_student_stage(stu, "closing"), cols, grouped_flag)
471
+ cls = _merge_resp_and_likert(cls_resp, cls_lik)
472
+
473
+ # Combined = mean( RESP(week2), RESP(week3), CLOSING(merged) )
474
+ w2 = _resp_mean_for_sources(df_src, stu, ["week_2_responses"], cols)
475
+ w3 = _resp_mean_for_sources(df_src, stu, ["week_3_responses"], cols)
476
+ combo = _mean_vectors([w2, w3, cls]) # <- note: closing already merged with Likert
477
+
478
+ return {"onboarding": onb, "closing": cls, "combined": combo}
479
+
480
+ def _stage_vectors_for_current_selection(df_src: pd.DataFrame, student_choice: str | None, cols: list[str], grouped_flag: bool) -> dict:
481
+ """
482
+ If a student is selected → return their vectors.
483
+ If cohort (“(All)”) → average per-student vectors (Likert included where available).
484
+ """
485
+ if student_choice and student_choice != "(All)":
486
+ return _student_stage_vectors(df_src, student_choice, cols, grouped_flag)
487
+
488
+ # Cohort: compute for each student then average
489
+ if df_src.empty:
490
+ empty_vec = {k: None for k in cols}
491
+ return {"onboarding": empty_vec, "closing": empty_vec, "combined": empty_vec}
492
+
493
+ students = sorted(set(str(x) for x in df_src["student"].dropna().unique()))
494
+ per_student = [_student_stage_vectors(df_src, s, cols, grouped_flag) for s in students]
495
+ return {
496
+ "onboarding": _mean_vectors([p["onboarding"] for p in per_student]),
497
+ "closing": _mean_vectors([p["closing"] for p in per_student]),
498
+ "combined": _mean_vectors([p["combined"] for p in per_student]),
499
+ }
500
 
501
+ def _percent_change(new: float | None, old: float | None) -> float | None:
502
+ if new is None or old is None:
503
+ return None
504
+ if old == 0:
505
+ return None # or return 100.0 if you prefer
506
+ return (new - old) / old * 100.0
507
 
508
+ # Use df_raw (one row per doc) so overlay/aggregation doesn’t hide sources
509
+ # Ensure df_raw has the per-skill or per-group columns we need:
510
+ if grouped and not df_raw.empty:
511
+ # build grouped view just for comparisons
512
+ df_grouped_for_comp = df_raw.copy()
513
+ # aggregate per-doc row to grouped columns
514
+ df_grouped_for_comp = (
515
+ df_grouped_for_comp
516
+ .assign(**{
517
+ g: df_grouped_for_comp.apply(lambda r: safe_mean([r.get(s, np.nan) for s in SKILL_GROUPS[g]]), axis=1)
518
+ for g in SKILL_GROUPS.keys()
519
+ })
520
+ )
521
+ df_src_for_comp = df_grouped_for_comp[["student", "source"] + list(SKILL_GROUPS.keys())]
522
  else:
523
+ df_src_for_comp = df_raw # already per-skill
 
 
 
524
 
525
+ stage_vecs = _stage_vectors_for_current_selection(df_src_for_comp, student_choice, COLS, grouped)
526
+ vec_onb = stage_vecs["onboarding"]
527
+ vec_cls = stage_vecs["closing"]
528
+ vec_combo = stage_vecs["combined"]
529
+
530
+ pct_onb_to_cls = {k: _percent_change(vec_cls.get(k), vec_onb.get(k)) for k in COLS}
531
  pct_onb_to_combo = {k: _percent_change(vec_combo.get(k), vec_onb.get(k)) for k in COLS}
532
 
533
+ # ------------------- Plot + table above stays the same -------------------
534
  df_plot = df_final.copy()
535
  avg_label = None
536
 
 
550
 
551
  st.caption(f"{len(df_final)} line(s) aggregated." if not df_final.empty else "No data.")
552
 
553
+ # ------------------- Comparisons Tab (LIKERT-AWARE) -------------------
554
+ tab_compare, = st.tabs(["📊 Comparisons"])
555
+
556
+ with tab_compare:
557
+ st.subheader("Onboarding → Closing — % Change (Likert-aware)")
558
+ df1 = pd.DataFrame({
559
+ "Dimension": COLS,
560
+ "Onboarding (merged)": [vec_onb.get(k) for k in COLS],
561
+ "Closing (merged)": [vec_cls.get(k) for k in COLS],
562
+ "% Change": [pct_onb_to_cls.get(k) for k in COLS],
563
+ })
564
+ st.dataframe(
565
+ df1.style.format({
566
+ "Onboarding (merged)": "{:.2f}",
567
+ "Closing (merged)": "{:.2f}",
568
+ "% Change": "{:+.1f}%"
569
+ }),
570
+ use_container_width=True
571
+ )
572
+
573
+ st.subheader("Onboarding → (Week2 + Week3 + ClosingMerged) — % Change")
574
+ df2 = pd.DataFrame({
575
+ "Dimension": COLS,
576
+ "Onboarding (merged)": [vec_onb.get(k) for k in COLS],
577
+ "Weeks 2+3 + Closing (closing merged)": [vec_combo.get(k) for k in COLS],
578
+ "% Change": [pct_onb_to_combo.get(k) for k in COLS],
579
+ })
580
+ st.dataframe(
581
+ df2.style.format({
582
+ "Onboarding (merged)": "{:.2f}",
583
+ "Weeks 2+3 + Closing (closing merged)": "{:.2f}",
584
+ "% Change": "{:+.1f}%"
585
+ }),
586
+ use_container_width=True
587
+ )
588
+
589
+ # Optional bar chart: %Δ Onboarding → Closing
590
+ try:
591
+ fig_delta = go.Figure()
592
+ fig_delta.add_bar(
593
+ x=COLS,
594
+ y=[(pct_onb_to_cls.get(k) if pct_onb_to_cls.get(k) is not None else 0.0) for k in COLS],
595
+ name="%Δ Onb→Closing"
596
+ )
597
+ fig_delta.update_layout(
598
+ title="% Change: Onboarding → Closing (Likert-aware)",
599
+ xaxis_title="Dimension",
600
+ yaxis_title="% change",
601
+ margin=dict(l=20, r=20, t=50, b=20)
602
+ )
603
+ st.plotly_chart(fig_delta, use_container_width=True)
604
+ except Exception:
605
+ pass
606
+
607
+
608
 
609
  # ================== Dynamic Stage Summaries (only if student answered that week) ==================
610
  import re