GodsDevProject commited on
Commit
0295d8d
·
verified ·
1 Parent(s): af459fb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +195 -63
app.py CHANGED
@@ -1,31 +1,55 @@
1
  import gradio as gr
2
- import time, hashlib, io, zipfile, os, tempfile
3
  import xml.etree.ElementTree as ET
4
- from datetime import datetime
5
  from urllib.parse import quote_plus
 
6
 
7
- from reportlab.platypus import SimpleDocTemplate, Paragraph, PageBreak
 
 
8
  from reportlab.lib.styles import getSampleStyleSheet
9
  from reportlab.lib.pagesizes import LETTER
10
 
11
  # ======================================================
12
- # HARD FEATURE FLAGS (GOVERNANCE ENFORCED)
13
  # ======================================================
14
 
15
- ENABLE_FAISS_PHASE_4 = False
16
- ENABLE_AI = True
 
17
 
18
  # ======================================================
19
- # FIPS MODE
20
  # ======================================================
21
 
22
  FIPS_140_MODE = False
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  # ======================================================
25
  # SESSION STATE
26
  # ======================================================
27
 
28
  LAST_RESULTS = []
 
29
 
30
  # ======================================================
31
  # CRYPTOGRAPHIC CORE
@@ -34,6 +58,11 @@ LAST_RESULTS = []
34
  def sha256_text(t: str):
35
  return hashlib.sha256(t.encode()).hexdigest()
36
 
 
 
 
 
 
37
  def provenance_headers(payload: str):
38
  return {
39
  "Tool-Version": "1.7.0",
@@ -66,7 +95,7 @@ DISTRICT_SCHEMAS = {
66
  }
67
 
68
  # ======================================================
69
- # COVER SHEET PDF
70
  # ======================================================
71
 
72
  def generate_cover_sheet_pdf(district, ecf_no):
@@ -79,10 +108,10 @@ def generate_cover_sheet_pdf(district, ecf_no):
79
  f"<b>CM/ECF PRE-FILING COVER SHEET</b><br/><br/>"
80
  f"<b>District:</b> {district}<br/>"
81
  f"<b>Reference No.:</b> {ecf_no}<br/><br/>"
82
- "This submission is a <b>pre-filing informational bundle</b> generated "
83
- "from publicly available FOIA electronic reading rooms.<br/><br/>"
84
- "No document in this bundle is filed, certified, or authenticated "
85
- "by any court, clerk, or agency."
86
  )
87
 
88
  doc.build([
@@ -102,8 +131,7 @@ def generate_proposed_exhibit_list():
102
  lines = ["PROPOSED EXHIBIT LIST\n"]
103
  for i, r in enumerate(LAST_RESULTS, 1):
104
  lines.append(
105
- f"Exhibit {i:03d}: {r['agency']} FOIA Reading Room "
106
- f"({r['url']})"
107
  )
108
  return "\n".join(lines)
109
 
@@ -114,11 +142,11 @@ def generate_proposed_exhibit_list():
114
  def clerk_verification_checklist():
115
  return (
116
  "CLERK VERIFICATION CHECKLIST\n\n"
117
- "☐ Confirm exhibit URLs resolve to issuing agency domains\n"
118
- "☐ Confirm SHA-256 hash matches downloaded agency document\n"
119
  "☐ Confirm document is publicly released\n"
120
- "☐ Note: Tool does NOT certify authenticity\n"
121
- "☐ Note: No sealed or restricted material included\n\n"
122
  "Relevant Rules:\n"
123
  "• FRE 902(5)\n"
124
  "• FRE 803(8)\n"
@@ -126,7 +154,7 @@ def clerk_verification_checklist():
126
  )
127
 
128
  # ======================================================
129
- # PDF GENERATION (WITH AI / ETHICS FOOTER)
130
  # ======================================================
131
 
132
  def generate_pdf(title, body, exhibit_no, ecf_no):
@@ -137,7 +165,7 @@ def generate_pdf(title, body, exhibit_no, ecf_no):
137
  canvas.setFont("Helvetica", 8)
138
  canvas.drawString(
139
  40, 20,
140
- "AI-assisted formatting only; no substantive analysis or factual assertions."
141
  )
142
  canvas.drawRightString(
143
  580, 20,
@@ -162,43 +190,147 @@ def generate_pdf(title, body, exhibit_no, ecf_no):
162
  return buf
163
 
164
  # ======================================================
165
- # FOIA ADAPTERS
166
  # ======================================================
167
 
168
  class FOIAAdapter:
169
- agency = ""
170
- url = ""
171
- def search(self, q):
 
 
 
 
172
  return [{
173
  "agency": self.agency,
174
  "title": f"{self.agency} FOIA Reading Room",
175
- "url": self.url.format(q=quote_plus(q)),
 
 
 
 
176
  }]
177
 
178
  class CIA(FOIAAdapter):
179
  agency = "CIA"
180
- url = "https://www.cia.gov/readingroom/search/site/{q}"
181
 
182
  class FBI(FOIAAdapter):
183
  agency = "FBI"
184
- url = "https://vault.fbi.gov/search?SearchableText={q}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
- ALL_ADAPTERS = {"CIA": CIA(), "FBI": FBI()}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
  # ======================================================
189
  # SEARCH
190
  # ======================================================
191
 
192
  def run_search(query, agencies):
193
- global LAST_RESULTS
 
194
  LAST_RESULTS = []
195
  rows = []
196
- for a in agencies:
197
- for r in ALL_ADAPTERS[a].search(query):
198
- r["hash"] = sha256_text(r["url"])
 
 
 
 
 
 
 
199
  LAST_RESULTS.append(r)
200
- rows.append([r["agency"], r["title"], r["url"], r["hash"]])
201
- return rows
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
 
203
  # ======================================================
204
  # COURT BUNDLE
@@ -206,63 +338,63 @@ def run_search(query, agencies):
206
 
207
  def generate_court_bundle(district):
208
  ecf_no = generate_ecf_filing_number()
209
-
210
  with tempfile.TemporaryDirectory() as td:
211
  zpath = os.path.join(td, "court_bundle.zip")
212
-
213
  with zipfile.ZipFile(zpath, "w") as z:
214
- # Cover sheet
215
- cover = generate_cover_sheet_pdf(district, ecf_no)
216
- z.writestr("00_Cover_Sheet.pdf", cover.read())
217
-
218
- # Exhibits
219
  for i, r in enumerate(LAST_RESULTS, 1):
220
  pdf = generate_pdf(
221
  "Judicial Appendix",
222
- f"{r['agency']} FOIA Reading Room\n{r['url']}",
223
  f"{i:03d}",
224
- ecf_no,
225
  )
226
  z.writestr(f"Exhibit_{i:03d}.pdf", pdf.read())
227
- z.writestr(
228
- f"Exhibit_{i:03d}.sha256",
229
- sha256_text(r["url"])
230
- )
231
-
232
- # Support docs
233
  z.writestr("proposed_exhibit_list.txt", generate_proposed_exhibit_list())
234
  z.writestr("clerk_verification_checklist.txt", clerk_verification_checklist())
235
-
236
  return open(zpath, "rb")
237
 
238
  # ======================================================
239
  # UI
240
  # ======================================================
241
 
242
- with gr.Blocks(title="Federal FOIA Intelligence Search") as app:
243
- gr.Markdown("## Federal FOIA Intelligence Search")
 
 
 
 
 
 
 
244
 
245
  with gr.Tab("Search"):
246
  agencies = gr.CheckboxGroup(
247
  list(ALL_ADAPTERS.keys()),
248
- value=list(ALL_ADAPTERS.keys())
 
249
  )
250
- query = gr.Textbox()
251
  table = gr.Dataframe(
252
- headers=["Agency", "Title", "URL", "SHA-256"]
253
  )
254
- gr.Button("Search").click(run_search, [query, agencies], table)
 
 
255
 
256
- with gr.Tab("Court Bundle"):
257
- district = gr.Dropdown(
258
- list(DISTRICT_SCHEMAS.keys()),
259
- value="Generic"
260
- )
261
  gr.File(label="Download Court Bundle").upload(
262
  lambda d=district: generate_court_bundle(d)
263
  )
 
264
 
265
- with gr.Tab("Clerk Checklist"):
266
- gr.Textbox(value=clerk_verification_checklist(), lines=16)
 
 
 
267
 
268
  app.launch()
 
1
  import gradio as gr
2
+ import time, hashlib, io, zipfile, os, tempfile, base64
3
  import xml.etree.ElementTree as ET
4
+ from datetime import datetime, timedelta
5
  from urllib.parse import quote_plus
6
+ import requests
7
 
8
+ from reportlab.platypus import (
9
+ SimpleDocTemplate, Paragraph, Spacer, PageBreak
10
+ )
11
  from reportlab.lib.styles import getSampleStyleSheet
12
  from reportlab.lib.pagesizes import LETTER
13
 
14
  # ======================================================
15
+ # HARD FEATURE FLAGS (GOVERNANCE — MUST NOT CHANGE)
16
  # ======================================================
17
 
18
+ ENABLE_FAISS_PHASE_4 = False # HARD DISABLED
19
+ ENABLE_AI = True # OPT-IN ONLY
20
+ ENABLE_PDF_EXTRACTION = True # OPT-IN ONLY
21
 
22
  # ======================================================
23
+ # FIPS MODE (DECLARATIVE)
24
  # ======================================================
25
 
26
  FIPS_140_MODE = False
27
 
28
+ # ======================================================
29
+ # OPTIONAL PDF SUPPORT
30
+ # ======================================================
31
+
32
+ PDF_TEXT_AVAILABLE = False
33
+ PDF_THUMBNAIL_AVAILABLE = False
34
+
35
+ try:
36
+ from pdfminer.high_level import extract_text
37
+ PDF_TEXT_AVAILABLE = True
38
+ except Exception:
39
+ pass
40
+
41
+ try:
42
+ from pdf2image import convert_from_bytes
43
+ PDF_THUMBNAIL_AVAILABLE = True
44
+ except Exception:
45
+ pass
46
+
47
  # ======================================================
48
  # SESSION STATE
49
  # ======================================================
50
 
51
  LAST_RESULTS = []
52
+ SELECTED_INDEX = None
53
 
54
  # ======================================================
55
  # CRYPTOGRAPHIC CORE
 
58
  def sha256_text(t: str):
59
  return hashlib.sha256(t.encode()).hexdigest()
60
 
61
+ def citation_hash(r):
62
+ return hashlib.sha256(
63
+ f"{r['agency']}|{r['resolved_url']}|{r['timestamp']}".encode()
64
+ ).hexdigest()[:16]
65
+
66
  def provenance_headers(payload: str):
67
  return {
68
  "Tool-Version": "1.7.0",
 
95
  }
96
 
97
  # ======================================================
98
+ # COVER SHEET PDF (CM/ECF STYLE)
99
  # ======================================================
100
 
101
  def generate_cover_sheet_pdf(district, ecf_no):
 
108
  f"<b>CM/ECF PRE-FILING COVER SHEET</b><br/><br/>"
109
  f"<b>District:</b> {district}<br/>"
110
  f"<b>Reference No.:</b> {ecf_no}<br/><br/>"
111
+ "This submission is a <b>pre-filing informational bundle</b> "
112
+ "generated from publicly available FOIA electronic reading rooms.<br/><br/>"
113
+ "No document is filed, certified, or authenticated by any court, "
114
+ "clerk, or agency."
115
  )
116
 
117
  doc.build([
 
131
  lines = ["PROPOSED EXHIBIT LIST\n"]
132
  for i, r in enumerate(LAST_RESULTS, 1):
133
  lines.append(
134
+ f"Exhibit {i:03d}: {r['agency']} FOIA Reading Room ({r['resolved_url']})"
 
135
  )
136
  return "\n".join(lines)
137
 
 
142
  def clerk_verification_checklist():
143
  return (
144
  "CLERK VERIFICATION CHECKLIST\n\n"
145
+ "☐ Confirm exhibit URLs resolve to agency domains\n"
146
+ "☐ Confirm SHA-256 hash matches downloaded document\n"
147
  "☐ Confirm document is publicly released\n"
148
+ "☐ Tool does NOT certify authenticity\n"
149
+ "☐ No sealed or restricted material included\n\n"
150
  "Relevant Rules:\n"
151
  "• FRE 902(5)\n"
152
  "• FRE 803(8)\n"
 
154
  )
155
 
156
  # ======================================================
157
+ # PDF GENERATION (ETHICS FOOTER)
158
  # ======================================================
159
 
160
  def generate_pdf(title, body, exhibit_no, ecf_no):
 
165
  canvas.setFont("Helvetica", 8)
166
  canvas.drawString(
167
  40, 20,
168
+ "AI-assisted formatting only; no legal analysis or factual assertions."
169
  )
170
  canvas.drawRightString(
171
  580, 20,
 
190
  return buf
191
 
192
  # ======================================================
193
+ # FOIA ADAPTERS (LINK-OUT ONLY)
194
  # ======================================================
195
 
196
  class FOIAAdapter:
197
+ agency = "UNKNOWN"
198
+ search_url = ""
199
+
200
+ def search(self, query):
201
+ start = time.time()
202
+ url = self.search_url.format(q=quote_plus(query))
203
+ latency = round((time.time() - start) * 1000, 1)
204
  return [{
205
  "agency": self.agency,
206
  "title": f"{self.agency} FOIA Reading Room",
207
+ "url": url,
208
+ "timestamp": datetime.utcnow().isoformat(),
209
+ "latency_ms": latency,
210
+ "sealed": False,
211
+ "redacted": False,
212
  }]
213
 
214
  class CIA(FOIAAdapter):
215
  agency = "CIA"
216
+ search_url = "https://www.cia.gov/readingroom/search/site/{q}"
217
 
218
  class FBI(FOIAAdapter):
219
  agency = "FBI"
220
+ search_url = "https://vault.fbi.gov/search?SearchableText={q}"
221
+
222
+ class DOJ(FOIAAdapter):
223
+ agency = "DOJ"
224
+ search_url = "https://www.justice.gov/foia/library?search={q}"
225
+
226
+ class DHS(FOIAAdapter):
227
+ agency = "DHS"
228
+ search_url = "https://www.dhs.gov/foia-library/search?search={q}"
229
+
230
+ class STATE(FOIAAdapter):
231
+ agency = "State Department"
232
+ search_url = "https://foia.state.gov/Search/Search.aspx?q={q}"
233
+
234
+ class NSA(FOIAAdapter):
235
+ agency = "NSA"
236
+ search_url = "https://www.nsa.gov/resources/everyone/foia/reading-room/?q={q}"
237
+
238
+ ALL_ADAPTERS = {
239
+ "CIA": CIA(),
240
+ "FBI": FBI(),
241
+ "DOJ": DOJ(),
242
+ "DHS": DHS(),
243
+ "State": STATE(),
244
+ "NSA": NSA(),
245
+ }
246
 
247
+ # ======================================================
248
+ # PDF RESOLUTION
249
+ # ======================================================
250
+
251
+ def resolve_pdf_url(url):
252
+ try:
253
+ r = requests.get(url, timeout=15, allow_redirects=True)
254
+ ct = r.headers.get("content-type", "").lower()
255
+ is_pdf = r.url.lower().endswith(".pdf") or "application/pdf" in ct
256
+ return is_pdf, r.url
257
+ except Exception:
258
+ return False, url
259
+
260
+ def generate_pdf_thumbnails(url, max_pages=3):
261
+ if not PDF_THUMBNAIL_AVAILABLE:
262
+ return []
263
+ try:
264
+ r = requests.get(url, timeout=15)
265
+ images = convert_from_bytes(r.content, first_page=1, last_page=max_pages)
266
+ thumbs = []
267
+ for img in images:
268
+ buf = io.BytesIO()
269
+ img.save(buf, format="PNG")
270
+ thumbs.append(base64.b64encode(buf.getvalue()).decode())
271
+ return thumbs
272
+ except Exception:
273
+ return []
274
 
275
  # ======================================================
276
  # SEARCH
277
  # ======================================================
278
 
279
  def run_search(query, agencies):
280
+ global LAST_RESULTS, SELECTED_INDEX
281
+ SELECTED_INDEX = None
282
  LAST_RESULTS = []
283
  rows = []
284
+
285
+ for name in agencies:
286
+ adapter = ALL_ADAPTERS[name]
287
+ for r in adapter.search(query):
288
+ r["resolved_pdf"], r["resolved_url"] = resolve_pdf_url(r["url"])
289
+ r["hash"] = citation_hash(r)
290
+ r["thumbnails"] = (
291
+ generate_pdf_thumbnails(r["resolved_url"])
292
+ if r["resolved_pdf"] else []
293
+ )
294
  LAST_RESULTS.append(r)
295
+ rows.append([
296
+ r["agency"],
297
+ r["title"],
298
+ r["resolved_url"],
299
+ r["hash"],
300
+ f"{r['latency_ms']} ms"
301
+ ])
302
+
303
+ return rows, render_cards(), "No document selected"
304
+
305
+ # ======================================================
306
+ # RENDER CARDS
307
+ # ======================================================
308
+
309
+ def render_cards():
310
+ cards = []
311
+ for idx, r in enumerate(LAST_RESULTS):
312
+ badge = "PUBLIC"
313
+ thumbs = "".join(
314
+ f'<img src="data:image/png;base64,{t}" '
315
+ f'style="width:30%;margin:4px;border-radius:6px;border:1px solid #ccc" />'
316
+ for t in r["thumbnails"]
317
+ )
318
+ preview = thumbs or f'<a href="{r["resolved_url"]}" target="_blank">Open Source</a>'
319
+ cards.append(f"""
320
+ <div class="card">
321
+ <div class="card-header">
322
+ <b>{r['agency']}</b>
323
+ <span class="badge">{badge}</span>
324
+ </div>
325
+ <div>{r['title']}</div>
326
+ <div>{preview}</div>
327
+ <div class="actions">
328
+ <button onclick="selectDoc({idx})">Select</button>
329
+ <a href="{r['resolved_url']}" target="_blank">View</a>
330
+ </div>
331
+ </div>
332
+ """)
333
+ return "".join(cards) or "<i>No results</i>"
334
 
335
  # ======================================================
336
  # COURT BUNDLE
 
338
 
339
  def generate_court_bundle(district):
340
  ecf_no = generate_ecf_filing_number()
 
341
  with tempfile.TemporaryDirectory() as td:
342
  zpath = os.path.join(td, "court_bundle.zip")
 
343
  with zipfile.ZipFile(zpath, "w") as z:
344
+ z.writestr("00_Cover_Sheet.pdf",
345
+ generate_cover_sheet_pdf(district, ecf_no).read())
 
 
 
346
  for i, r in enumerate(LAST_RESULTS, 1):
347
  pdf = generate_pdf(
348
  "Judicial Appendix",
349
+ f"{r['agency']} FOIA Reading Room\n{r['resolved_url']}",
350
  f"{i:03d}",
351
+ ecf_no
352
  )
353
  z.writestr(f"Exhibit_{i:03d}.pdf", pdf.read())
354
+ z.writestr(f"Exhibit_{i:03d}.sha256", r["hash"])
 
 
 
 
 
355
  z.writestr("proposed_exhibit_list.txt", generate_proposed_exhibit_list())
356
  z.writestr("clerk_verification_checklist.txt", clerk_verification_checklist())
 
357
  return open(zpath, "rb")
358
 
359
  # ======================================================
360
  # UI
361
  # ======================================================
362
 
363
+ CSS = """
364
+ .card { border:1px solid #ddd; border-radius:16px; padding:16px; margin-bottom:20px; }
365
+ .card-header { display:flex; justify-content:space-between; }
366
+ .actions { margin-top:10px; display:flex; gap:12px; }
367
+ .badge { background:#eef; padding:4px 10px; border-radius:999px; }
368
+ """
369
+
370
+ with gr.Blocks(css=CSS, title="Federal FOIA Intelligence Search") as app:
371
+ gr.Markdown("## Federal FOIA Intelligence Search\nPublic FOIA reading rooms only")
372
 
373
  with gr.Tab("Search"):
374
  agencies = gr.CheckboxGroup(
375
  list(ALL_ADAPTERS.keys()),
376
+ value=list(ALL_ADAPTERS.keys()),
377
+ label="Agencies"
378
  )
379
+ query = gr.Textbox(placeholder="Search terms")
380
  table = gr.Dataframe(
381
+ headers=["Agency", "Title", "Resolved URL", "Hash", "Latency"]
382
  )
383
+ gallery = gr.HTML()
384
+ status = gr.Textbox(label="Selection Status")
385
+ gr.Button("Search").click(run_search, [query, agencies], [table, gallery, status])
386
 
387
+ with gr.Tab("Court / Clerk"):
388
+ district = gr.Dropdown(list(DISTRICT_SCHEMAS.keys()), value="Generic")
 
 
 
389
  gr.File(label="Download Court Bundle").upload(
390
  lambda d=district: generate_court_bundle(d)
391
  )
392
+ gr.Textbox(value=clerk_verification_checklist(), lines=14)
393
 
394
+ with gr.Tab("Governance & Trust"):
395
+ gr.HTML("""
396
+ <iframe src="/governance-site/index.html"
397
+ style="width:100%;height:700px;border:1px solid #ccc;border-radius:12px;"></iframe>
398
+ """)
399
 
400
  app.launch()