Tesneem commited on
Commit
66b2287
·
verified ·
1 Parent(s): e99f760

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -60
app.py CHANGED
@@ -242,7 +242,9 @@ fig = plot_radar(df_final, grouped, chart_title)
242
  st.plotly_chart(fig, use_container_width=True)
243
  st.caption(f"{len(df_final)} line(s) aggregated." if not df_final.empty else "No data.")
244
 
245
- ## ================== Dynamic Stage Summaries (only if student answered that week) ==================
 
 
246
 
247
  # Stage <-> Source mapping
248
  STAGE_TO_SOURCE = {
@@ -270,7 +272,6 @@ def _responses_for_student_stage(uri, db, responses_coll, student: str, stage: s
270
  return [d for d in docs if (d.get("answer") or "").strip()]
271
  except Exception:
272
  return []
273
- import re
274
 
275
  def _answer_total_score(resp: dict) -> float:
276
  skills = resp.get("skills") or {}
@@ -282,76 +283,110 @@ def _answer_total_score(resp: dict) -> float:
282
  pass
283
  return total
284
 
285
- def _norm_text(s: str) -> str:
286
- # lower, collapse whitespace, strip surrounding punctuation/dots
287
- return re.sub(r"\s+", " ", (s or "").lower()).strip(" .,\"'`”’“‘-–—()[]{}")
288
-
289
- def _fragments_in_order(answer_norm: str, frags_norm: list[str]) -> bool:
290
- """Return True if all fragments appear in order anywhere in the answer."""
 
 
 
 
 
 
 
 
 
 
 
 
291
  start = 0
292
- for frag in frags_norm:
293
- idx = answer_norm.find(frag, start)
294
  if idx == -1:
295
  return False
296
  start = idx + len(frag)
297
  return True
298
 
 
 
 
 
 
 
 
 
 
 
 
 
299
  def _fix_cutoff_quotes(quotes: list[str], responses: list[dict]) -> list[str]:
300
  """
301
  Replace truncated quotes with full answers when possible.
302
- Works for:
303
- - '...' ellipsis in the middle (checks fragments in order, anywhere)
304
- - plain middle fragments (substring match)
305
- If multiple matches, pick the response with highest total skill score.
 
 
306
  """
307
  if not quotes:
308
  return []
309
- # Precompute normalized answers + scores
310
- norm_answers = []
 
311
  for r in responses:
312
- ans = (r.get("answer") or "").strip()
313
- if not ans:
314
  continue
315
- norm_answers.append((
316
- _norm_text(ans),
317
- ans,
318
- _answer_total_score(r)
319
- ))
320
 
321
- fulls = []
322
  for q in quotes:
323
  q_raw = (q or "").strip()
324
  if not q_raw:
325
  continue
326
 
327
- q_norm = _norm_text(q_raw)
 
 
328
  candidates = []
329
 
330
- if "..." in q_raw:
331
- # split on ellipses, keep non-empty normalized fragments
332
- parts = [p.strip() for p in re.split(r"\.\.\.|…", q_raw)]
333
- parts_norm = [_norm_text(p) for p in parts if _norm_text(p)]
334
- if parts_norm:
335
- for ans_norm, ans_full, score in norm_answers:
336
- if _fragments_in_order(ans_norm, parts_norm):
337
- candidates.append((score, ans_full))
 
 
 
 
 
 
338
  else:
339
- # plain substring anywhere in the answer
340
- for ans_norm, ans_full, score in norm_answers:
341
- if q_norm and q_norm in ans_norm:
342
- candidates.append((score, ans_full))
 
 
 
 
343
 
344
  if candidates:
345
- # pick highest scoring answer
346
  candidates.sort(key=lambda x: x[0], reverse=True)
347
- fulls.append(candidates[0][1])
348
  else:
349
- # no match found keep original
350
- fulls.append(q_raw)
351
-
352
- return fulls
353
 
354
- return fulls
355
 
356
  def _top3_answers_by_skill_sum(responses: list[dict]) -> list[str]:
357
  """Pick up to 3 answers with the highest total skill score."""
@@ -360,13 +395,7 @@ def _top3_answers_by_skill_sum(responses: list[dict]) -> list[str]:
360
  ans = (r.get("answer") or "").strip()
361
  if not ans:
362
  continue
363
- skills = r.get("skills") or {}
364
- total = 0.0
365
- for v in skills.values():
366
- try:
367
- total += float(v)
368
- except Exception:
369
- pass
370
  scored.append((total, ans))
371
  scored.sort(key=lambda x: x[0], reverse=True)
372
  return [ans for _, ans in scored[:3]]
@@ -383,10 +412,10 @@ def fetch_student_stage_summary(
383
  Return summary dict for a student+stage ONLY if the student has responses for that week.
384
  Otherwise, return None (so we don't render the panel).
385
  """
386
- # 1) Require that the student answered that week
387
  responses = _responses_for_student_stage(uri, db, responses_coll, student, stage)
388
  if not responses:
389
- return None # <-- do not show summary panel
390
 
391
  # 2) Pull summary doc (patterns nested)
392
  patterns = {}
@@ -407,7 +436,7 @@ def fetch_student_stage_summary(
407
  most_consistent = patterns.get("most_consistent")
408
  most_developed = patterns.get("most_developed")
409
 
410
- # 3) Fix cut-off quotes; if none after fixing, fallback to top 3 highest-scoring answers
411
  notable_quotes = _fix_cutoff_quotes(notable_quotes, responses)
412
  if not notable_quotes:
413
  notable_quotes = _top3_answers_by_skill_sum(responses)
@@ -441,12 +470,9 @@ if mongo_uri and student_choice != "(All)" and source_choice != "(All)":
441
  st.markdown("**Top Strengths:** " + (", ".join(strengths) if strengths else "—"))
442
 
443
  st.markdown("**Notable Quotes:**")
444
- quotes = summary.get("notable_quotes") or []
445
- if quotes:
446
- for q in quotes[:3]:
447
- st.markdown(f"> {q}")
448
- else:
449
- st.write("—")
450
 
451
  # # app.py — Student Skill Radar (MongoDB, secrets-based, no CSV)
452
  # import os
 
242
  st.plotly_chart(fig, use_container_width=True)
243
  st.caption(f"{len(df_final)} line(s) aggregated." if not df_final.empty else "No data.")
244
 
245
+ # ================== Dynamic Stage Summaries (only if student answered that week) ==================
246
+ import re
247
+ import unicodedata
248
 
249
  # Stage <-> Source mapping
250
  STAGE_TO_SOURCE = {
 
272
  return [d for d in docs if (d.get("answer") or "").strip()]
273
  except Exception:
274
  return []
 
275
 
276
  def _answer_total_score(resp: dict) -> float:
277
  skills = resp.get("skills") or {}
 
283
  pass
284
  return total
285
 
286
+ def _normalize_quotes_spaces(s: str) -> str:
287
+ """Normalize unicode punctuation (smart quotes, ellipsis), collapse spaces."""
288
+ if not s:
289
+ return ""
290
+ s = unicodedata.normalize("NFKC", s)
291
+ s = s.replace("", "...")
292
+ s = re.sub(r"\s+", " ", s).strip()
293
+ return s
294
+
295
+ def _clean_for_loose_match(s: str) -> str:
296
+ """Lowercase, remove punctuation for forgiving matching."""
297
+ s = _normalize_quotes_spaces(s).lower()
298
+ s = re.sub(r"[^\w\s]", "", s) # drop punctuation
299
+ s = re.sub(r"\s+", " ", s).strip()
300
+ return s
301
+
302
+ def _fragments_in_order_clean(ans_clean: str, frags_clean: list[str]) -> bool:
303
+ """True if all cleaned fragments appear in order anywhere in the cleaned answer."""
304
  start = 0
305
+ for frag in frags_clean:
306
+ idx = ans_clean.find(frag, start)
307
  if idx == -1:
308
  return False
309
  start = idx + len(frag)
310
  return True
311
 
312
+ def _build_relaxed_regex_from_fragments(parts: list[str]) -> re.Pattern:
313
+ """
314
+ Build a case-insensitive regex that matches fragments in order with up to ~160 chars between.
315
+ Uses normalized text (keeps punctuation in fragments).
316
+ """
317
+ esc_parts = [re.escape(_normalize_quotes_spaces(p)) for p in parts if _normalize_quotes_spaces(p)]
318
+ if not esc_parts:
319
+ return re.compile(r"(?!x)x", re.I | re.S) # match nothing
320
+ pattern = r".*?".join(esc_parts) # allow anything between fragments
321
+ pattern = pattern.replace(".*?", r"[\s\S]{0,160}?") # optional limiter
322
+ return re.compile(pattern, re.I | re.S)
323
+
324
  def _fix_cutoff_quotes(quotes: list[str], responses: list[dict]) -> list[str]:
325
  """
326
  Replace truncated quotes with full answers when possible.
327
+ Handles:
328
+ - Ellipses in the middle or end (fragments matched in order, anywhere)
329
+ - Smart quotes / punctuation differences
330
+ - Middle substrings (not necessarily prefix/suffix)
331
+ If multiple answers match, picks the one with highest total skill score.
332
+ Searches ONLY within the 'responses' provided (already filtered to student+stage).
333
  """
334
  if not quotes:
335
  return []
336
+
337
+ # Precompute answer variants + scores
338
+ resp_cache = []
339
  for r in responses:
340
+ full = (r.get("answer") or "").strip()
341
+ if not full:
342
  continue
343
+ full_norm = _normalize_quotes_spaces(full)
344
+ full_clean = _clean_for_loose_match(full_norm)
345
+ score = _answer_total_score(r)
346
+ resp_cache.append({"raw": full, "norm": full_norm, "clean": full_clean, "score": score})
 
347
 
348
+ results = []
349
  for q in quotes:
350
  q_raw = (q or "").strip()
351
  if not q_raw:
352
  continue
353
 
354
+ q_norm = _normalize_quotes_spaces(q_raw)
355
+ q_clean = _clean_for_loose_match(q_norm)
356
+
357
  candidates = []
358
 
359
+ if "..." in q_norm:
360
+ # Split into fragments and check in-order occurrence anywhere
361
+ parts = [p.strip() for p in q_norm.split("...") if p.strip()]
362
+ parts_clean = [_clean_for_loose_match(p) for p in parts if _clean_for_loose_match(p)]
363
+ # 1) Loose cleaned check
364
+ for rc in resp_cache:
365
+ if parts_clean and _fragments_in_order_clean(rc["clean"], parts_clean):
366
+ candidates.append((rc["score"], rc["raw"]))
367
+ # 2) Relaxed regex on normalized text if needed
368
+ if not candidates and parts:
369
+ rx = _build_relaxed_regex_from_fragments(parts)
370
+ for rc in resp_cache:
371
+ if rx.search(rc["norm"]):
372
+ candidates.append((rc["score"], rc["raw"]))
373
  else:
374
+ # No ellipsis: loose substring match (cleaned), then normalized fallback
375
+ for rc in resp_cache:
376
+ if q_clean and q_clean in rc["clean"]:
377
+ candidates.append((rc["score"], rc["raw"]))
378
+ if not candidates:
379
+ for rc in resp_cache:
380
+ if q_norm and q_norm.lower() in rc["norm"].lower():
381
+ candidates.append((rc["score"], rc["raw"]))
382
 
383
  if candidates:
 
384
  candidates.sort(key=lambda x: x[0], reverse=True)
385
+ results.append(candidates[0][1])
386
  else:
387
+ results.append(q_raw) # keep original if no match
 
 
 
388
 
389
+ return results
390
 
391
  def _top3_answers_by_skill_sum(responses: list[dict]) -> list[str]:
392
  """Pick up to 3 answers with the highest total skill score."""
 
395
  ans = (r.get("answer") or "").strip()
396
  if not ans:
397
  continue
398
+ total = _answer_total_score(r)
 
 
 
 
 
 
399
  scored.append((total, ans))
400
  scored.sort(key=lambda x: x[0], reverse=True)
401
  return [ans for _, ans in scored[:3]]
 
412
  Return summary dict for a student+stage ONLY if the student has responses for that week.
413
  Otherwise, return None (so we don't render the panel).
414
  """
415
+ # 1) Require that the student answered that week (source derived from stage)
416
  responses = _responses_for_student_stage(uri, db, responses_coll, student, stage)
417
  if not responses:
418
+ return None
419
 
420
  # 2) Pull summary doc (patterns nested)
421
  patterns = {}
 
436
  most_consistent = patterns.get("most_consistent")
437
  most_developed = patterns.get("most_developed")
438
 
439
+ # 3) Repair cut-off quotes; if none after fixing, fallback to top 3 highest-scoring answers
440
  notable_quotes = _fix_cutoff_quotes(notable_quotes, responses)
441
  if not notable_quotes:
442
  notable_quotes = _top3_answers_by_skill_sum(responses)
 
470
  st.markdown("**Top Strengths:** " + (", ".join(strengths) if strengths else "—"))
471
 
472
  st.markdown("**Notable Quotes:**")
473
+ for q in (summary.get("notable_quotes") or [])[:3]:
474
+ st.markdown(f"> {q}")
475
+
 
 
 
476
 
477
  # # app.py — Student Skill Radar (MongoDB, secrets-based, no CSV)
478
  # import os