cjc0013 commited on
Commit
627a427
·
verified ·
1 Parent(s): 4f45004

Align overview evidence with detail context

Browse files
dataset_bundle/evidence_audit/consistency_report.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "generated_at": "2026-04-19T21:54:59-04:00",
3
  "event_provenance": {
4
  "event_count": 3918,
5
  "events_with_artifacts": 3878,
 
1
  {
2
+ "generated_at": "2026-04-19T22:19:39-04:00",
3
  "event_provenance": {
4
  "event_count": 3918,
5
  "events_with_artifacts": 3878,
dataset_bundle/public_release_manifest.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "public_version": "congress-public-records-slice-2026-04-v1",
3
  "title": "Congress Public Records Slice",
4
- "release_date": "2026-04-19T21:56:12-04:00",
5
  "slice_description": "A neutral, review-oriented slice of House public-record linkages across financial disclosures, sector overlap, and community project funding recipient relationships.",
6
  "source_run_name": "house_all_baseline_20260418_v21_recipienthardening",
7
  "dataset_repo_id": "cjc0013/cmp-data",
 
1
  {
2
  "public_version": "congress-public-records-slice-2026-04-v1",
3
  "title": "Congress Public Records Slice",
4
+ "release_date": "2026-04-19T22:20:56-04:00",
5
  "slice_description": "A neutral, review-oriented slice of House public-record linkages across financial disclosures, sector overlap, and community project funding recipient relationships.",
6
  "source_run_name": "house_all_baseline_20260418_v21_recipienthardening",
7
  "dataset_repo_id": "cjc0013/cmp-data",
public_space_app.py CHANGED
@@ -1346,6 +1346,19 @@ def _window_overlap_text(row: Dict[str, Any]) -> str:
1346
  return "not explicit in this row"
1347
 
1348
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1349
  def _member_activity_baselines(edges: pd.DataFrame) -> Dict[str, Dict[str, float]]:
1350
  if edges.empty:
1351
  return {}
@@ -1402,7 +1415,12 @@ def _relative_relationship_score(row: Dict[str, Any], baselines: Dict[str, Dict[
1402
  return max(0, min(100, int(round(relative))))
1403
 
1404
 
1405
- def _rank_relationships(edges: pd.DataFrame, ranking_mode: str = "raw") -> pd.DataFrame:
 
 
 
 
 
1406
  columns = [
1407
  "rank",
1408
  "relationship_id",
@@ -1435,7 +1453,12 @@ def _rank_relationships(edges: pd.DataFrame, ranking_mode: str = "raw") -> pd.Da
1435
  if family == "recipient"
1436
  else row.get("weak_event_count", 0) or 0
1437
  )
1438
- chips = _edge_evidence_chips(row)
 
 
 
 
 
1439
  raw_score = _relationship_score(row)
1440
  relative_score = _relative_relationship_score(row, baselines)
1441
  sort_score = relative_score if normalized_mode == "relative" else raw_score
@@ -1452,12 +1475,12 @@ def _rank_relationships(edges: pd.DataFrame, ranking_mode: str = "raw") -> pd.Da
1452
  "status_code": str(row.get("relationship_status", "") or ""),
1453
  "strength": _plain_status_label(str(row.get("relationship_status", "") or "")),
1454
  "evidence": " | ".join(chips) if chips else "published source support",
1455
- "time-window overlap": _window_overlap_text(row),
1456
  "supporting rows": int(row.get("link_count", 0) or 0),
1457
  "stronger support": stronger_support,
1458
  "needs caution": caution_support,
1459
  "unresolved refs": int(row.get("unresolved_source_ref_count", 0) or 0),
1460
- "source_examples": ", ".join(_split_pipe_values(row.get("source_urls", ""), limit=2)),
1461
  }
1462
  )
1463
  ranked = pd.DataFrame(rows).sort_values(
@@ -2394,7 +2417,7 @@ def build_app(copy_path: str | Path):
2394
  relationship_id: str | None = None,
2395
  ):
2396
  filtered_edges = _overview_edges(member_query, family, only_strong, int(top_n))
2397
- ranked = _rank_relationships(filtered_edges, ranking_mode=ranking_mode)
2398
  options = _relationship_options(ranked)
2399
  valid_ids = {value for _, value in options}
2400
  selected = relationship_id if relationship_id in valid_ids else (options[0][1] if options else None)
 
1346
  return "not explicit in this row"
1347
 
1348
 
1349
+ def _context_window_overlap_text(context: Dict[str, Any], row: Dict[str, Any]) -> str:
1350
+ chips = {str(item or "").strip().lower() for item in context.get("evidence_chips", [])}
1351
+ has_disclosure = bool(chips.intersection({"annual disclosure", "trade disclosure"}))
1352
+ has_legislative = bool(chips.intersection({"bill record", "vote activity", "lobbying activity"}))
1353
+ if has_disclosure and has_legislative:
1354
+ return "published disclosure and legislative records line up in this released slice"
1355
+ if has_disclosure and "committee roster" in chips:
1356
+ return "disclosure records plus current committee context"
1357
+ if "committee roster" in chips:
1358
+ return "current reference context only"
1359
+ return _window_overlap_text(row)
1360
+
1361
+
1362
  def _member_activity_baselines(edges: pd.DataFrame) -> Dict[str, Dict[str, float]]:
1363
  if edges.empty:
1364
  return {}
 
1415
  return max(0, min(100, int(round(relative))))
1416
 
1417
 
1418
+ def _rank_relationships(
1419
+ edges: pd.DataFrame,
1420
+ ranking_mode: str = "raw",
1421
+ links: pd.DataFrame | None = None,
1422
+ events: pd.DataFrame | None = None,
1423
+ ) -> pd.DataFrame:
1424
  columns = [
1425
  "rank",
1426
  "relationship_id",
 
1453
  if family == "recipient"
1454
  else row.get("weak_event_count", 0) or 0
1455
  )
1456
+ context = (
1457
+ _relationship_context(edges, links, events, str(row.get("edge_id") or ""), ranking_mode)
1458
+ if links is not None and events is not None
1459
+ else None
1460
+ )
1461
+ chips = context["evidence_chips"] if context else _edge_evidence_chips(row)
1462
  raw_score = _relationship_score(row)
1463
  relative_score = _relative_relationship_score(row, baselines)
1464
  sort_score = relative_score if normalized_mode == "relative" else raw_score
 
1475
  "status_code": str(row.get("relationship_status", "") or ""),
1476
  "strength": _plain_status_label(str(row.get("relationship_status", "") or "")),
1477
  "evidence": " | ".join(chips) if chips else "published source support",
1478
+ "time-window overlap": _context_window_overlap_text(context, row) if context else _window_overlap_text(row),
1479
  "supporting rows": int(row.get("link_count", 0) or 0),
1480
  "stronger support": stronger_support,
1481
  "needs caution": caution_support,
1482
  "unresolved refs": int(row.get("unresolved_source_ref_count", 0) or 0),
1483
+ "source_examples": ", ".join(context["surfaced_urls"][:2]) if context else ", ".join(_split_pipe_values(row.get("source_urls", ""), limit=2)),
1484
  }
1485
  )
1486
  ranked = pd.DataFrame(rows).sort_values(
 
2417
  relationship_id: str | None = None,
2418
  ):
2419
  filtered_edges = _overview_edges(member_query, family, only_strong, int(top_n))
2420
+ ranked = _rank_relationships(filtered_edges, ranking_mode=ranking_mode, links=links, events=events)
2421
  options = _relationship_options(ranked)
2422
  valid_ids = {value for _, value in options}
2423
  selected = relationship_id if relationship_id in valid_ids else (options[0][1] if options else None)