Student_Assessment

Sleeping

App Files Files Community

Tesneem commited on Aug 15, 2025

Commit

66b2287

verified ·

1 Parent(s): e99f760

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -60

app.py CHANGED Viewed

@@ -242,7 +242,9 @@ fig = plot_radar(df_final, grouped, chart_title)
 st.plotly_chart(fig, use_container_width=True)
 st.caption(f"{len(df_final)} line(s) aggregated." if not df_final.empty else "No data.")
-## ================== Dynamic Stage Summaries (only if student answered that week) ==================
 # Stage <-> Source mapping
 STAGE_TO_SOURCE = {
@@ -270,7 +272,6 @@ def _responses_for_student_stage(uri, db, responses_coll, student: str, stage: s
         return [d for d in docs if (d.get("answer") or "").strip()]
     except Exception:
         return []
-import re
 def _answer_total_score(resp: dict) -> float:
     skills = resp.get("skills") or {}
@@ -282,76 +283,110 @@ def _answer_total_score(resp: dict) -> float:
             pass
     return total
-def _norm_text(s: str) -> str:
-    # lower, collapse whitespace, strip surrounding punctuation/dots
-    return re.sub(r"\s+", " ", (s or "").lower()).strip(" .,\"'`”’“‘-–—()[]{}")
-def _fragments_in_order(answer_norm: str, frags_norm: list[str]) -> bool:
-    """Return True if all fragments appear in order anywhere in the answer."""
     start = 0
-    for frag in frags_norm:
-        idx = answer_norm.find(frag, start)
         if idx == -1:
             return False
         start = idx + len(frag)
     return True
 def _fix_cutoff_quotes(quotes: list[str], responses: list[dict]) -> list[str]:
     """
     Replace truncated quotes with full answers when possible.
-    Works for:
-      - '...' ellipsis in the middle (checks fragments in order, anywhere)
-      - plain middle fragments (substring match)
-    If multiple matches, pick the response with highest total skill score.
     """
     if not quotes:
         return []
-    # Precompute normalized answers + scores
-    norm_answers = []
     for r in responses:
-        ans = (r.get("answer") or "").strip()
-        if not ans:
             continue
-        norm_answers.append((
-            _norm_text(ans),
-            ans,
-            _answer_total_score(r)
-        ))
-    fulls = []
     for q in quotes:
         q_raw = (q or "").strip()
         if not q_raw:
             continue
-        q_norm = _norm_text(q_raw)
         candidates = []
-        if "..." in q_raw:
-            # split on ellipses, keep non-empty normalized fragments
-            parts = [p.strip() for p in re.split(r"\.\.\.|…", q_raw)]
-            parts_norm = [_norm_text(p) for p in parts if _norm_text(p)]
-            if parts_norm:
-                for ans_norm, ans_full, score in norm_answers:
-                    if _fragments_in_order(ans_norm, parts_norm):
-                        candidates.append((score, ans_full))
         else:
-            # plain substring anywhere in the answer
-            for ans_norm, ans_full, score in norm_answers:
-                if q_norm and q_norm in ans_norm:
-                    candidates.append((score, ans_full))
         if candidates:
-            # pick highest scoring answer
             candidates.sort(key=lambda x: x[0], reverse=True)
-            fulls.append(candidates[0][1])
         else:
-            # no match found → keep original
-            fulls.append(q_raw)
-    return fulls
-    return fulls
 def _top3_answers_by_skill_sum(responses: list[dict]) -> list[str]:
     """Pick up to 3 answers with the highest total skill score."""
@@ -360,13 +395,7 @@ def _top3_answers_by_skill_sum(responses: list[dict]) -> list[str]:
         ans = (r.get("answer") or "").strip()
         if not ans:
             continue
-        skills = r.get("skills") or {}
-        total = 0.0
-        for v in skills.values():
-            try:
-                total += float(v)
-            except Exception:
-                pass
         scored.append((total, ans))
     scored.sort(key=lambda x: x[0], reverse=True)
     return [ans for _, ans in scored[:3]]
@@ -383,10 +412,10 @@ def fetch_student_stage_summary(
     Return summary dict for a student+stage ONLY if the student has responses for that week.
     Otherwise, return None (so we don't render the panel).
     """
-    # 1) Require that the student answered that week
     responses = _responses_for_student_stage(uri, db, responses_coll, student, stage)
     if not responses:
-        return None  # <-- do not show summary panel
     # 2) Pull summary doc (patterns nested)
     patterns = {}
@@ -407,7 +436,7 @@ def fetch_student_stage_summary(
     most_consistent = patterns.get("most_consistent")
     most_developed = patterns.get("most_developed")
-    # 3) Fix cut-off quotes; if none after fixing, fallback to top 3 highest-scoring answers
     notable_quotes = _fix_cutoff_quotes(notable_quotes, responses)
     if not notable_quotes:
         notable_quotes = _top3_answers_by_skill_sum(responses)
@@ -441,12 +470,9 @@ if mongo_uri and student_choice != "(All)" and source_choice != "(All)":
                 st.markdown("**Top Strengths:** " + (", ".join(strengths) if strengths else "—"))
             st.markdown("**Notable Quotes:**")
-            quotes = summary.get("notable_quotes") or []
-            if quotes:
-                for q in quotes[:3]:
-                    st.markdown(f"> {q}")
-            else:
-                st.write("—")
 # # app.py — Student Skill Radar (MongoDB, secrets-based, no CSV)
 # import os

 st.plotly_chart(fig, use_container_width=True)
 st.caption(f"{len(df_final)} line(s) aggregated." if not df_final.empty else "No data.")
+# ================== Dynamic Stage Summaries (only if student answered that week) ==================
+import re
+import unicodedata
 # Stage <-> Source mapping
 STAGE_TO_SOURCE = {
         return [d for d in docs if (d.get("answer") or "").strip()]
     except Exception:
         return []
 def _answer_total_score(resp: dict) -> float:
     skills = resp.get("skills") or {}
             pass
     return total
+def _normalize_quotes_spaces(s: str) -> str:
+    """Normalize unicode punctuation (smart quotes, ellipsis), collapse spaces."""
+    if not s:
+        return ""
+    s = unicodedata.normalize("NFKC", s)
+    s = s.replace("…", "...")
+    s = re.sub(r"\s+", " ", s).strip()
+    return s
+def _clean_for_loose_match(s: str) -> str:
+    """Lowercase, remove punctuation for forgiving matching."""
+    s = _normalize_quotes_spaces(s).lower()
+    s = re.sub(r"[^\w\s]", "", s)   # drop punctuation
+    s = re.sub(r"\s+", " ", s).strip()
+    return s
+def _fragments_in_order_clean(ans_clean: str, frags_clean: list[str]) -> bool:
+    """True if all cleaned fragments appear in order anywhere in the cleaned answer."""
     start = 0
+    for frag in frags_clean:
+        idx = ans_clean.find(frag, start)
         if idx == -1:
             return False
         start = idx + len(frag)
     return True
+def _build_relaxed_regex_from_fragments(parts: list[str]) -> re.Pattern:
+    """
+    Build a case-insensitive regex that matches fragments in order with up to ~160 chars between.
+    Uses normalized text (keeps punctuation in fragments).
+    """
+    esc_parts = [re.escape(_normalize_quotes_spaces(p)) for p in parts if _normalize_quotes_spaces(p)]
+    if not esc_parts:
+        return re.compile(r"(?!x)x", re.I | re.S)  # match nothing
+    pattern = r".*?".join(esc_parts)               # allow anything between fragments
+    pattern = pattern.replace(".*?", r"[\s\S]{0,160}?")  # optional limiter
+    return re.compile(pattern, re.I | re.S)
 def _fix_cutoff_quotes(quotes: list[str], responses: list[dict]) -> list[str]:
     """
     Replace truncated quotes with full answers when possible.
+    Handles:
+      - Ellipses in the middle or end (fragments matched in order, anywhere)
+      - Smart quotes / punctuation differences
+      - Middle substrings (not necessarily prefix/suffix)
+    If multiple answers match, picks the one with highest total skill score.
+    Searches ONLY within the 'responses' provided (already filtered to student+stage).
     """
     if not quotes:
         return []
+    # Precompute answer variants + scores
+    resp_cache = []
     for r in responses:
+        full = (r.get("answer") or "").strip()
+        if not full:
             continue
+        full_norm = _normalize_quotes_spaces(full)
+        full_clean = _clean_for_loose_match(full_norm)
+        score = _answer_total_score(r)
+        resp_cache.append({"raw": full, "norm": full_norm, "clean": full_clean, "score": score})
+    results = []
     for q in quotes:
         q_raw = (q or "").strip()
         if not q_raw:
             continue
+        q_norm = _normalize_quotes_spaces(q_raw)
+        q_clean = _clean_for_loose_match(q_norm)
         candidates = []
+        if "..." in q_norm:
+            # Split into fragments and check in-order occurrence anywhere
+            parts = [p.strip() for p in q_norm.split("...") if p.strip()]
+            parts_clean = [_clean_for_loose_match(p) for p in parts if _clean_for_loose_match(p)]
+            # 1) Loose cleaned check
+            for rc in resp_cache:
+                if parts_clean and _fragments_in_order_clean(rc["clean"], parts_clean):
+                    candidates.append((rc["score"], rc["raw"]))
+            # 2) Relaxed regex on normalized text if needed
+            if not candidates and parts:
+                rx = _build_relaxed_regex_from_fragments(parts)
+                for rc in resp_cache:
+                    if rx.search(rc["norm"]):
+                        candidates.append((rc["score"], rc["raw"]))
         else:
+            # No ellipsis: loose substring match (cleaned), then normalized fallback
+            for rc in resp_cache:
+                if q_clean and q_clean in rc["clean"]:
+                    candidates.append((rc["score"], rc["raw"]))
+            if not candidates:
+                for rc in resp_cache:
+                    if q_norm and q_norm.lower() in rc["norm"].lower():
+                        candidates.append((rc["score"], rc["raw"]))
         if candidates:
             candidates.sort(key=lambda x: x[0], reverse=True)
+            results.append(candidates[0][1])
         else:
+            results.append(q_raw)  # keep original if no match
+    return results
 def _top3_answers_by_skill_sum(responses: list[dict]) -> list[str]:
     """Pick up to 3 answers with the highest total skill score."""
         ans = (r.get("answer") or "").strip()
         if not ans:
             continue
+        total = _answer_total_score(r)
         scored.append((total, ans))
     scored.sort(key=lambda x: x[0], reverse=True)
     return [ans for _, ans in scored[:3]]
     Return summary dict for a student+stage ONLY if the student has responses for that week.
     Otherwise, return None (so we don't render the panel).
     """
+    # 1) Require that the student answered that week (source derived from stage)
     responses = _responses_for_student_stage(uri, db, responses_coll, student, stage)
     if not responses:
+        return None
     # 2) Pull summary doc (patterns nested)
     patterns = {}
     most_consistent = patterns.get("most_consistent")
     most_developed = patterns.get("most_developed")
+    # 3) Repair cut-off quotes; if none after fixing, fallback to top 3 highest-scoring answers
     notable_quotes = _fix_cutoff_quotes(notable_quotes, responses)
     if not notable_quotes:
         notable_quotes = _top3_answers_by_skill_sum(responses)
                 st.markdown("**Top Strengths:** " + (", ".join(strengths) if strengths else "—"))
             st.markdown("**Notable Quotes:**")
+            for q in (summary.get("notable_quotes") or [])[:3]:
+                st.markdown(f"> {q}")
 # # app.py — Student Skill Radar (MongoDB, secrets-based, no CSV)
 # import os