File size: 3,013 Bytes
bfdd027
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
{
  "parse_failure_count": 45,
  "total_failure_count": 74,
  "source_unavailable_failure_count": 29,
  "parse_failures_by_group": {
    "house_clerk_disclosures": 28,
    "house_cpf": 17
  },
  "parse_failures_by_reason": {
    "parse_failed": 45
  },
  "failure_reason_counts": {
    "parse_failed": 45,
    "source_unavailable": 29
  },
  "parse_failures_by_detail": {
    "ocr_text_unparseable": 28,
    "pdf_text_unparseable_after_ocr": 4,
    "pdf_text_unparseable": 13
  },
  "top_parse_failure_members": [
    {
      "member_slug": "michael_t_mccaul",
      "count": 8
    },
    {
      "member_slug": "ro_khanna",
      "count": 6
    },
    {
      "member_slug": "harold_rogers",
      "count": 5
    },
    {
      "member_slug": "nicole_malliotakis",
      "count": 3
    },
    {
      "member_slug": "harriet_m_hageman",
      "count": 3
    },
    {
      "member_slug": "beth_van_duyne",
      "count": 3
    },
    {
      "member_slug": "keith_self",
      "count": 2
    },
    {
      "member_slug": "christopher_h_smith",
      "count": 2
    },
    {
      "member_slug": "ann_wagner",
      "count": 2
    },
    {
      "member_slug": "tony_wied",
      "count": 2
    },
    {
      "member_slug": "ken_calvert",
      "count": 1
    },
    {
      "member_slug": "byron_donalds",
      "count": 1
    },
    {
      "member_slug": "mike_ezell",
      "count": 1
    },
    {
      "member_slug": "charles_j_chuck_fleischmann",
      "count": 1
    },
    {
      "member_slug": "seth_moulton",
      "count": 1
    },
    {
      "member_slug": "mark_pocan",
      "count": 1
    },
    {
      "member_slug": "david_rouzer",
      "count": 1
    },
    {
      "member_slug": "michael_k_simpson",
      "count": 1
    },
    {
      "member_slug": "mike_thompson",
      "count": 1
    }
  ],
  "source_unavailable_by_group": {
    "house_votes": 9,
    "lda_public_search": 1,
    "usaspending_public_site": 233
  },
  "partial_recovery_counts": {
    "reused_existing_raw_artifact": 50702,
    "pdf_parse_failed_but_page_context_preserved": 150,
    "page_supported_unresolved_request_recovered": 100,
    "ocr_candidate_ptr_rows_recovered": 11,
    "pdf_url_returned_html_but_recipient_recovered": 11,
    "usaspending_award_public_page": 14016,
    "recovered_public_award_pages_prelink:14016": 1
  },
  "guidance_signal_counts": {
    "house_cpf needs deterministic fallback or parser hardening": 84,
    "house_clerk_disclosures needs deterministic fallback or parser hardening": 28,
    "house_votes needs deterministic fallback or parser hardening": 9,
    "lda_public_search needs deterministic fallback or parser hardening": 2,
    "usaspending_public_site needs deterministic fallback or parser hardening": 1634
  },
  "house_cpf_filename_recovered_requests": 10,
  "house_cpf_unresolved_requests": 1866,
  "house_cpf_page_supported_requests": 3173
}