Align overview evidence with detail context
Browse files
dataset_bundle/evidence_audit/consistency_report.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"generated_at": "2026-04-
|
| 3 |
"event_provenance": {
|
| 4 |
"event_count": 3918,
|
| 5 |
"events_with_artifacts": 3878,
|
|
|
|
| 1 |
{
|
| 2 |
+
"generated_at": "2026-04-19T22:19:39-04:00",
|
| 3 |
"event_provenance": {
|
| 4 |
"event_count": 3918,
|
| 5 |
"events_with_artifacts": 3878,
|
dataset_bundle/public_release_manifest.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"public_version": "congress-public-records-slice-2026-04-v1",
|
| 3 |
"title": "Congress Public Records Slice",
|
| 4 |
-
"release_date": "2026-04-
|
| 5 |
"slice_description": "A neutral, review-oriented slice of House public-record linkages across financial disclosures, sector overlap, and community project funding recipient relationships.",
|
| 6 |
"source_run_name": "house_all_baseline_20260418_v21_recipienthardening",
|
| 7 |
"dataset_repo_id": "cjc0013/cmp-data",
|
|
|
|
| 1 |
{
|
| 2 |
"public_version": "congress-public-records-slice-2026-04-v1",
|
| 3 |
"title": "Congress Public Records Slice",
|
| 4 |
+
"release_date": "2026-04-19T22:20:56-04:00",
|
| 5 |
"slice_description": "A neutral, review-oriented slice of House public-record linkages across financial disclosures, sector overlap, and community project funding recipient relationships.",
|
| 6 |
"source_run_name": "house_all_baseline_20260418_v21_recipienthardening",
|
| 7 |
"dataset_repo_id": "cjc0013/cmp-data",
|
public_space_app.py
CHANGED
|
@@ -1346,6 +1346,19 @@ def _window_overlap_text(row: Dict[str, Any]) -> str:
|
|
| 1346 |
return "not explicit in this row"
|
| 1347 |
|
| 1348 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1349 |
def _member_activity_baselines(edges: pd.DataFrame) -> Dict[str, Dict[str, float]]:
|
| 1350 |
if edges.empty:
|
| 1351 |
return {}
|
|
@@ -1402,7 +1415,12 @@ def _relative_relationship_score(row: Dict[str, Any], baselines: Dict[str, Dict[
|
|
| 1402 |
return max(0, min(100, int(round(relative))))
|
| 1403 |
|
| 1404 |
|
| 1405 |
-
def _rank_relationships(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1406 |
columns = [
|
| 1407 |
"rank",
|
| 1408 |
"relationship_id",
|
|
@@ -1435,7 +1453,12 @@ def _rank_relationships(edges: pd.DataFrame, ranking_mode: str = "raw") -> pd.Da
|
|
| 1435 |
if family == "recipient"
|
| 1436 |
else row.get("weak_event_count", 0) or 0
|
| 1437 |
)
|
| 1438 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1439 |
raw_score = _relationship_score(row)
|
| 1440 |
relative_score = _relative_relationship_score(row, baselines)
|
| 1441 |
sort_score = relative_score if normalized_mode == "relative" else raw_score
|
|
@@ -1452,12 +1475,12 @@ def _rank_relationships(edges: pd.DataFrame, ranking_mode: str = "raw") -> pd.Da
|
|
| 1452 |
"status_code": str(row.get("relationship_status", "") or ""),
|
| 1453 |
"strength": _plain_status_label(str(row.get("relationship_status", "") or "")),
|
| 1454 |
"evidence": " | ".join(chips) if chips else "published source support",
|
| 1455 |
-
"time-window overlap": _window_overlap_text(row),
|
| 1456 |
"supporting rows": int(row.get("link_count", 0) or 0),
|
| 1457 |
"stronger support": stronger_support,
|
| 1458 |
"needs caution": caution_support,
|
| 1459 |
"unresolved refs": int(row.get("unresolved_source_ref_count", 0) or 0),
|
| 1460 |
-
"source_examples": ", ".join(_split_pipe_values(row.get("source_urls", ""), limit=2)),
|
| 1461 |
}
|
| 1462 |
)
|
| 1463 |
ranked = pd.DataFrame(rows).sort_values(
|
|
@@ -2394,7 +2417,7 @@ def build_app(copy_path: str | Path):
|
|
| 2394 |
relationship_id: str | None = None,
|
| 2395 |
):
|
| 2396 |
filtered_edges = _overview_edges(member_query, family, only_strong, int(top_n))
|
| 2397 |
-
ranked = _rank_relationships(filtered_edges, ranking_mode=ranking_mode)
|
| 2398 |
options = _relationship_options(ranked)
|
| 2399 |
valid_ids = {value for _, value in options}
|
| 2400 |
selected = relationship_id if relationship_id in valid_ids else (options[0][1] if options else None)
|
|
|
|
| 1346 |
return "not explicit in this row"
|
| 1347 |
|
| 1348 |
|
| 1349 |
+
def _context_window_overlap_text(context: Dict[str, Any], row: Dict[str, Any]) -> str:
|
| 1350 |
+
chips = {str(item or "").strip().lower() for item in context.get("evidence_chips", [])}
|
| 1351 |
+
has_disclosure = bool(chips.intersection({"annual disclosure", "trade disclosure"}))
|
| 1352 |
+
has_legislative = bool(chips.intersection({"bill record", "vote activity", "lobbying activity"}))
|
| 1353 |
+
if has_disclosure and has_legislative:
|
| 1354 |
+
return "published disclosure and legislative records line up in this released slice"
|
| 1355 |
+
if has_disclosure and "committee roster" in chips:
|
| 1356 |
+
return "disclosure records plus current committee context"
|
| 1357 |
+
if "committee roster" in chips:
|
| 1358 |
+
return "current reference context only"
|
| 1359 |
+
return _window_overlap_text(row)
|
| 1360 |
+
|
| 1361 |
+
|
| 1362 |
def _member_activity_baselines(edges: pd.DataFrame) -> Dict[str, Dict[str, float]]:
|
| 1363 |
if edges.empty:
|
| 1364 |
return {}
|
|
|
|
| 1415 |
return max(0, min(100, int(round(relative))))
|
| 1416 |
|
| 1417 |
|
| 1418 |
+
def _rank_relationships(
|
| 1419 |
+
edges: pd.DataFrame,
|
| 1420 |
+
ranking_mode: str = "raw",
|
| 1421 |
+
links: pd.DataFrame | None = None,
|
| 1422 |
+
events: pd.DataFrame | None = None,
|
| 1423 |
+
) -> pd.DataFrame:
|
| 1424 |
columns = [
|
| 1425 |
"rank",
|
| 1426 |
"relationship_id",
|
|
|
|
| 1453 |
if family == "recipient"
|
| 1454 |
else row.get("weak_event_count", 0) or 0
|
| 1455 |
)
|
| 1456 |
+
context = (
|
| 1457 |
+
_relationship_context(edges, links, events, str(row.get("edge_id") or ""), ranking_mode)
|
| 1458 |
+
if links is not None and events is not None
|
| 1459 |
+
else None
|
| 1460 |
+
)
|
| 1461 |
+
chips = context["evidence_chips"] if context else _edge_evidence_chips(row)
|
| 1462 |
raw_score = _relationship_score(row)
|
| 1463 |
relative_score = _relative_relationship_score(row, baselines)
|
| 1464 |
sort_score = relative_score if normalized_mode == "relative" else raw_score
|
|
|
|
| 1475 |
"status_code": str(row.get("relationship_status", "") or ""),
|
| 1476 |
"strength": _plain_status_label(str(row.get("relationship_status", "") or "")),
|
| 1477 |
"evidence": " | ".join(chips) if chips else "published source support",
|
| 1478 |
+
"time-window overlap": _context_window_overlap_text(context, row) if context else _window_overlap_text(row),
|
| 1479 |
"supporting rows": int(row.get("link_count", 0) or 0),
|
| 1480 |
"stronger support": stronger_support,
|
| 1481 |
"needs caution": caution_support,
|
| 1482 |
"unresolved refs": int(row.get("unresolved_source_ref_count", 0) or 0),
|
| 1483 |
+
"source_examples": ", ".join(context["surfaced_urls"][:2]) if context else ", ".join(_split_pipe_values(row.get("source_urls", ""), limit=2)),
|
| 1484 |
}
|
| 1485 |
)
|
| 1486 |
ranked = pd.DataFrame(rows).sort_values(
|
|
|
|
| 2417 |
relationship_id: str | None = None,
|
| 2418 |
):
|
| 2419 |
filtered_edges = _overview_edges(member_query, family, only_strong, int(top_n))
|
| 2420 |
+
ranked = _rank_relationships(filtered_edges, ranking_mode=ranking_mode, links=links, events=events)
|
| 2421 |
options = _relationship_options(ranked)
|
| 2422 |
valid_ids = {value for _, value in options}
|
| 2423 |
selected = relationship_id if relationship_id in valid_ids else (options[0][1] if options else None)
|