cjc0013 commited on
Commit
4f45004
·
verified ·
1 Parent(s): 9e68876

Tighten finance examples and overview signal chips

Browse files
dataset_bundle/evidence_audit/consistency_report.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "generated_at": "2026-04-19T21:24:27-04:00",
3
  "event_provenance": {
4
  "event_count": 3918,
5
  "events_with_artifacts": 3878,
 
1
  {
2
+ "generated_at": "2026-04-19T21:54:59-04:00",
3
  "event_provenance": {
4
  "event_count": 3918,
5
  "events_with_artifacts": 3878,
dataset_bundle/public_release_manifest.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "public_version": "congress-public-records-slice-2026-04-v1",
3
  "title": "Congress Public Records Slice",
4
- "release_date": "2026-04-19T21:25:32-04:00",
5
  "slice_description": "A neutral, review-oriented slice of House public-record linkages across financial disclosures, sector overlap, and community project funding recipient relationships.",
6
  "source_run_name": "house_all_baseline_20260418_v21_recipienthardening",
7
  "dataset_repo_id": "cjc0013/cmp-data",
 
1
  {
2
  "public_version": "congress-public-records-slice-2026-04-v1",
3
  "title": "Congress Public Records Slice",
4
+ "release_date": "2026-04-19T21:56:12-04:00",
5
  "slice_description": "A neutral, review-oriented slice of House public-record linkages across financial disclosures, sector overlap, and community project funding recipient relationships.",
6
  "source_run_name": "house_all_baseline_20260418_v21_recipienthardening",
7
  "dataset_repo_id": "cjc0013/cmp-data",
public_space_app.py CHANGED
@@ -953,6 +953,15 @@ def _source_family_for_url(url: str) -> str:
953
  def _edge_evidence_chips(row: Dict[str, Any], url_values: list[str] | None = None) -> list[str]:
954
  urls = url_values if url_values is not None else _split_pipe_values(row.get("source_urls", ""), limit=12)
955
  chips: list[str] = []
 
 
 
 
 
 
 
 
 
956
  for url in urls:
957
  chip = _source_family_for_url(url)
958
  if chip not in chips:
@@ -1236,7 +1245,7 @@ def _select_example_urls(
1236
  url = str(chosen.get("url") or "")
1237
  selected.append(url)
1238
  selected_set.add(url)
1239
- elif bill_like_records:
1240
  chosen = sorted(
1241
  bill_like_records,
1242
  key=lambda item: _source_record_priority(item, str(row.get("target_label") or "")),
 
953
  def _edge_evidence_chips(row: Dict[str, Any], url_values: list[str] | None = None) -> list[str]:
954
  urls = url_values if url_values is not None else _split_pipe_values(row.get("source_urls", ""), limit=12)
955
  chips: list[str] = []
956
+ if url_values is None:
957
+ count_backed = [
958
+ ("annual disclosure", int(row.get("annual_link_count", 0) or 0) > 0),
959
+ ("trade disclosure", int(row.get("trade_link_count", 0) or 0) > 0),
960
+ ("committee roster", int(row.get("profile_link_count", 0) or 0) > 0),
961
+ ]
962
+ for label, enabled in count_backed:
963
+ if enabled and label not in chips:
964
+ chips.append(label)
965
  for url in urls:
966
  chip = _source_family_for_url(url)
967
  if chip not in chips:
 
1245
  url = str(chosen.get("url") or "")
1246
  selected.append(url)
1247
  selected_set.add(url)
1248
+ elif bill_like_records and normalized_target not in TOPIC_AREA_PREFERRED_BILL_HINTS:
1249
  chosen = sorted(
1250
  bill_like_records,
1251
  key=lambda item: _source_record_priority(item, str(row.get("target_label") or "")),