GodsDevProject commited on
Commit
9b034b1
·
verified ·
1 Parent(s): 15ccefb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +139 -189
app.py CHANGED
@@ -1,34 +1,15 @@
 
 
 
 
 
1
  import gradio as gr
2
  import time, hashlib, io, zipfile, os, tempfile, base64
3
- import xml.etree.ElementTree as ET
4
- from datetime import datetime, timedelta
5
  from urllib.parse import quote_plus
6
  import requests
7
 
8
- from reportlab.platypus import (
9
- SimpleDocTemplate, Paragraph, Spacer, PageBreak
10
- )
11
- from reportlab.lib.styles import getSampleStyleSheet
12
- from reportlab.lib.pagesizes import LETTER
13
-
14
- # ======================================================
15
- # HARD FEATURE FLAGS (GOVERNANCE — MUST NOT CHANGE)
16
- # ======================================================
17
-
18
- ENABLE_FAISS_PHASE_4 = False # HARD DISABLED
19
- ENABLE_AI = True # OPT-IN ONLY
20
- ENABLE_PDF_EXTRACTION = True # OPT-IN ONLY
21
-
22
- # ======================================================
23
- # FIPS MODE (DECLARATIVE)
24
- # ======================================================
25
-
26
- FIPS_140_MODE = False
27
-
28
- # ======================================================
29
- # OPTIONAL PDF SUPPORT
30
- # ======================================================
31
-
32
  PDF_TEXT_AVAILABLE = False
33
  PDF_THUMBNAIL_AVAILABLE = False
34
 
@@ -44,8 +25,23 @@ try:
44
  except Exception:
45
  pass
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  # ======================================================
48
- # SESSION STATE
49
  # ======================================================
50
 
51
  LAST_RESULTS = []
@@ -65,7 +61,7 @@ def citation_hash(r):
65
 
66
  def provenance_headers(payload: str):
67
  return {
68
- "Tool-Version": "1.7.0",
69
  "Generated-UTC": datetime.utcnow().isoformat(),
70
  "Content-SHA256": sha256_text(payload),
71
  "Public-Source-Only": "true",
@@ -74,133 +70,35 @@ def provenance_headers(payload: str):
74
  }
75
 
76
  def render_provenance_block(text: str):
77
- return "\n".join(f"{k}: {v}" for k, v in provenance_headers(text).items())
78
-
79
- # ======================================================
80
- # ECF NUMBER (LOCAL / PRE-FILING ONLY)
81
- # ======================================================
82
-
83
- def generate_ecf_filing_number():
84
- return f"ECF-PREFILE-{datetime.utcnow().strftime('%Y%m%d-%H%M%S')}"
85
-
86
- # ======================================================
87
- # DISTRICT DEFINITIONS
88
- # ======================================================
89
-
90
- DISTRICT_SCHEMAS = {
91
- "Generic": {},
92
- "D.D.C.": {},
93
- "S.D.N.Y.": {},
94
- "N.D. Cal.": {},
95
- }
96
-
97
- # ======================================================
98
- # COVER SHEET PDF (CM/ECF STYLE)
99
- # ======================================================
100
-
101
- def generate_cover_sheet_pdf(district, ecf_no):
102
- buf = io.BytesIO()
103
- styles = getSampleStyleSheet()
104
-
105
- doc = SimpleDocTemplate(buf, pagesize=LETTER)
106
-
107
- body = (
108
- f"<b>CM/ECF PRE-FILING COVER SHEET</b><br/><br/>"
109
- f"<b>District:</b> {district}<br/>"
110
- f"<b>Reference No.:</b> {ecf_no}<br/><br/>"
111
- "This submission is a <b>pre-filing informational bundle</b> "
112
- "generated from publicly available FOIA electronic reading rooms.<br/><br/>"
113
- "No document is filed, certified, or authenticated by any court, "
114
- "clerk, or agency."
115
- )
116
-
117
- doc.build([
118
- Paragraph(body, styles["Normal"]),
119
- PageBreak(),
120
- Paragraph(render_provenance_block(body).replace("\n", "<br/>"), styles["Code"])
121
- ])
122
-
123
- buf.seek(0)
124
- return buf
125
-
126
- # ======================================================
127
- # EXHIBIT LIST
128
- # ======================================================
129
-
130
- def generate_proposed_exhibit_list():
131
- lines = ["PROPOSED EXHIBIT LIST\n"]
132
- for i, r in enumerate(LAST_RESULTS, 1):
133
- lines.append(
134
- f"Exhibit {i:03d}: {r['agency']} FOIA Reading Room ({r['resolved_url']})"
135
- )
136
- return "\n".join(lines)
137
-
138
- # ======================================================
139
- # CLERK VERIFICATION CHECKLIST
140
- # ======================================================
141
-
142
- def clerk_verification_checklist():
143
- return (
144
- "CLERK VERIFICATION CHECKLIST\n\n"
145
- "☐ Confirm exhibit URLs resolve to agency domains\n"
146
- "☐ Confirm SHA-256 hash matches downloaded document\n"
147
- "☐ Confirm document is publicly released\n"
148
- "☐ Tool does NOT certify authenticity\n"
149
- "☐ No sealed or restricted material included\n\n"
150
- "Relevant Rules:\n"
151
- "• FRE 902(5)\n"
152
- "• FRE 803(8)\n"
153
- "• FRE 1005\n"
154
  )
155
 
156
  # ======================================================
157
- # PDF GENERATION (ETHICS FOOTER)
158
  # ======================================================
159
 
160
- def generate_pdf(title, body, exhibit_no, ecf_no):
161
- buf = io.BytesIO()
162
- styles = getSampleStyleSheet()
163
-
164
- def footer(canvas, doc):
165
- canvas.setFont("Helvetica", 8)
166
- canvas.drawString(
167
- 40, 20,
168
- "AI-assisted formatting only; no legal analysis or factual assertions."
169
- )
170
- canvas.drawRightString(
171
- 580, 20,
172
- f"{ecf_no} — Exhibit {exhibit_no}"
173
- )
174
-
175
- doc = SimpleDocTemplate(
176
- buf,
177
- pagesize=LETTER,
178
- onFirstPage=footer,
179
- onLaterPages=footer
180
- )
181
-
182
- doc.build([
183
- Paragraph(f"<b>{title}</b>", styles["Title"]),
184
- Paragraph(body.replace("\n", "<br/>"), styles["Normal"]),
185
- PageBreak(),
186
- Paragraph(render_provenance_block(body).replace("\n", "<br/>"), styles["Code"]),
187
- ])
188
-
189
- buf.seek(0)
190
- return buf
191
 
192
  # ======================================================
193
- # FOIA ADAPTERS (LINK-OUT ONLY)
194
  # ======================================================
195
 
196
  class FOIAAdapter:
197
  agency = "UNKNOWN"
198
  search_url = ""
 
199
 
200
  def search(self, query):
201
  start = time.time()
202
  url = self.search_url.format(q=quote_plus(query))
203
  latency = round((time.time() - start) * 1000, 1)
 
204
  return [{
205
  "agency": self.agency,
206
  "title": f"{self.agency} FOIA Reading Room",
@@ -225,7 +123,7 @@ class DOJ(FOIAAdapter):
225
 
226
  class DHS(FOIAAdapter):
227
  agency = "DHS"
228
- search_url = "https://www.dhs.gov/foia-library/search?search={q}"
229
 
230
  class STATE(FOIAAdapter):
231
  agency = "State Department"
@@ -233,7 +131,7 @@ class STATE(FOIAAdapter):
233
 
234
  class NSA(FOIAAdapter):
235
  agency = "NSA"
236
- search_url = "https://www.nsa.gov/resources/everyone/foia/reading-room/?q={q}"
237
 
238
  ALL_ADAPTERS = {
239
  "CIA": CIA(),
@@ -245,24 +143,28 @@ ALL_ADAPTERS = {
245
  }
246
 
247
  # ======================================================
248
- # PDF RESOLUTION
249
  # ======================================================
250
 
251
  def resolve_pdf_url(url):
252
  try:
253
- r = requests.get(url, timeout=15, allow_redirects=True)
254
  ct = r.headers.get("content-type", "").lower()
255
  is_pdf = r.url.lower().endswith(".pdf") or "application/pdf" in ct
256
  return is_pdf, r.url
257
  except Exception:
258
  return False, url
259
 
260
- def generate_pdf_thumbnails(url, max_pages=3):
261
  if not PDF_THUMBNAIL_AVAILABLE:
262
  return []
263
  try:
264
- r = requests.get(url, timeout=15)
265
- images = convert_from_bytes(r.content, first_page=1, last_page=max_pages)
 
 
 
 
266
  thumbs = []
267
  for img in images:
268
  buf = io.BytesIO()
@@ -278,82 +180,114 @@ def generate_pdf_thumbnails(url, max_pages=3):
278
 
279
  def run_search(query, agencies):
280
  global LAST_RESULTS, SELECTED_INDEX
281
- SELECTED_INDEX = None
282
  LAST_RESULTS = []
 
 
283
  rows = []
284
 
285
  for name in agencies:
286
  adapter = ALL_ADAPTERS[name]
287
  for r in adapter.search(query):
288
- r["resolved_pdf"], r["resolved_url"] = resolve_pdf_url(r["url"])
 
 
289
  r["hash"] = citation_hash(r)
290
  r["thumbnails"] = (
291
- generate_pdf_thumbnails(r["resolved_url"])
292
- if r["resolved_pdf"] else []
293
  )
294
  LAST_RESULTS.append(r)
 
295
  rows.append([
296
  r["agency"],
297
  r["title"],
298
  r["resolved_url"],
299
  r["hash"],
300
- f"{r['latency_ms']} ms"
301
  ])
302
 
303
  return rows, render_cards(), "No document selected"
304
 
305
  # ======================================================
306
- # RENDER CARDS
307
  # ======================================================
308
 
309
  def render_cards():
310
  cards = []
311
  for idx, r in enumerate(LAST_RESULTS):
312
- badge = "PUBLIC"
313
  thumbs = "".join(
314
  f'<img src="data:image/png;base64,{t}" '
315
- f'style="width:30%;margin:4px;border-radius:6px;border:1px solid #ccc" />'
316
  for t in r["thumbnails"]
317
  )
318
- preview = thumbs or f'<a href="{r["resolved_url"]}" target="_blank">Open Source</a>'
 
319
  cards.append(f"""
320
  <div class="card">
321
- <div class="card-header">
322
- <b>{r['agency']}</b>
323
- <span class="badge">{badge}</span>
324
- </div>
325
- <div>{r['title']}</div>
326
- <div>{preview}</div>
327
- <div class="actions">
328
- <button onclick="selectDoc({idx})">Select</button>
329
- <a href="{r['resolved_url']}" target="_blank">View</a>
330
- </div>
 
331
  </div>
332
  """)
333
- return "".join(cards) or "<i>No results</i>"
 
334
 
335
  # ======================================================
336
- # COURT BUNDLE
337
  # ======================================================
338
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339
  def generate_court_bundle(district):
340
  ecf_no = generate_ecf_filing_number()
341
  with tempfile.TemporaryDirectory() as td:
342
  zpath = os.path.join(td, "court_bundle.zip")
343
  with zipfile.ZipFile(zpath, "w") as z:
344
- z.writestr("00_Cover_Sheet.pdf",
345
- generate_cover_sheet_pdf(district, ecf_no).read())
 
 
346
  for i, r in enumerate(LAST_RESULTS, 1):
347
- pdf = generate_pdf(
348
- "Judicial Appendix",
349
- f"{r['agency']} FOIA Reading Room\n{r['resolved_url']}",
350
- f"{i:03d}",
351
- ecf_no
 
 
352
  )
353
- z.writestr(f"Exhibit_{i:03d}.pdf", pdf.read())
354
- z.writestr(f"Exhibit_{i:03d}.sha256", r["hash"])
355
- z.writestr("proposed_exhibit_list.txt", generate_proposed_exhibit_list())
356
- z.writestr("clerk_verification_checklist.txt", clerk_verification_checklist())
357
  return open(zpath, "rb")
358
 
359
  # ======================================================
@@ -368,7 +302,10 @@ CSS = """
368
  """
369
 
370
  with gr.Blocks(css=CSS, title="Federal FOIA Intelligence Search") as app:
371
- gr.Markdown("## Federal FOIA Intelligence Search\nPublic FOIA reading rooms only")
 
 
 
372
 
373
  with gr.Tab("Search"):
374
  agencies = gr.CheckboxGroup(
@@ -381,20 +318,33 @@ with gr.Blocks(css=CSS, title="Federal FOIA Intelligence Search") as app:
381
  headers=["Agency", "Title", "Resolved URL", "Hash", "Latency"]
382
  )
383
  gallery = gr.HTML()
384
- status = gr.Textbox(label="Selection Status")
385
- gr.Button("Search").click(run_search, [query, agencies], [table, gallery, status])
 
 
 
 
 
386
 
387
  with gr.Tab("Court / Clerk"):
388
- district = gr.Dropdown(list(DISTRICT_SCHEMAS.keys()), value="Generic")
389
- gr.File(label="Download Court Bundle").upload(
 
 
 
390
  lambda d=district: generate_court_bundle(d)
391
  )
392
- gr.Textbox(value=clerk_verification_checklist(), lines=14)
393
 
394
- with gr.Tab("Governance & Trust"):
395
- gr.HTML("""
396
- <iframe src="/governance-site/index.html"
397
- style="width:100%;height:700px;border:1px solid #ccc;border-radius:12px;"></iframe>
398
- """)
 
 
399
 
400
- app.launch(share=True)
 
 
 
 
 
1
+ # ======================================================
2
+ # Federal FOIA Intelligence Search
3
+ # HF Reviewer–Safe Reference Implementation
4
+ # ======================================================
5
+
6
  import gradio as gr
7
  import time, hashlib, io, zipfile, os, tempfile, base64
8
+ from datetime import datetime
 
9
  from urllib.parse import quote_plus
10
  import requests
11
 
12
+ # Optional PDF tooling (safe fallbacks)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  PDF_TEXT_AVAILABLE = False
14
  PDF_THUMBNAIL_AVAILABLE = False
15
 
 
25
  except Exception:
26
  pass
27
 
28
+ from reportlab.platypus import (
29
+ SimpleDocTemplate, Paragraph, PageBreak
30
+ )
31
+ from reportlab.lib.styles import getSampleStyleSheet
32
+ from reportlab.lib.pagesizes import LETTER
33
+
34
+ # ======================================================
35
+ # HARD GOVERNANCE FLAGS (NON-NEGOTIABLE)
36
+ # ======================================================
37
+
38
+ ENABLE_FAISS_PHASE_4 = False # HARD DISABLED
39
+ ENABLE_AI = True # USER OPT-IN ONLY
40
+ ENABLE_PDF_EXTRACTION = True # USER OPT-IN ONLY
41
+ FIPS_140_MODE = False
42
+
43
  # ======================================================
44
+ # SESSION STATE (EPHEMERAL)
45
  # ======================================================
46
 
47
  LAST_RESULTS = []
 
61
 
62
  def provenance_headers(payload: str):
63
  return {
64
+ "Tool-Version": "1.8.0",
65
  "Generated-UTC": datetime.utcnow().isoformat(),
66
  "Content-SHA256": sha256_text(payload),
67
  "Public-Source-Only": "true",
 
70
  }
71
 
72
  def render_provenance_block(text: str):
73
+ return "\n".join(
74
+ f"{k}: {v}" for k, v in provenance_headers(text).items()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  )
76
 
77
  # ======================================================
78
+ # FAISS PHASE-4 (STUB — GOVERNANCE LOCKED)
79
  # ======================================================
80
 
81
+ class Phase4FAISSStub:
82
+ def __init__(self):
83
+ if ENABLE_FAISS_PHASE_4:
84
+ raise RuntimeError(
85
+ "FAISS Phase-4 indexing is disabled by governance policy."
86
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
  # ======================================================
89
+ # FOIA ADAPTERS (LINK-OUT ONLY, API-READY)
90
  # ======================================================
91
 
92
  class FOIAAdapter:
93
  agency = "UNKNOWN"
94
  search_url = ""
95
+ api_endpoint = None # future document-level APIs
96
 
97
  def search(self, query):
98
  start = time.time()
99
  url = self.search_url.format(q=quote_plus(query))
100
  latency = round((time.time() - start) * 1000, 1)
101
+
102
  return [{
103
  "agency": self.agency,
104
  "title": f"{self.agency} FOIA Reading Room",
 
123
 
124
  class DHS(FOIAAdapter):
125
  agency = "DHS"
126
+ search_url = "https://www.dhs.gov/foia-library"
127
 
128
  class STATE(FOIAAdapter):
129
  agency = "State Department"
 
131
 
132
  class NSA(FOIAAdapter):
133
  agency = "NSA"
134
+ search_url = "https://www.nsa.gov/resources/everyone/foia/reading-room/"
135
 
136
  ALL_ADAPTERS = {
137
  "CIA": CIA(),
 
143
  }
144
 
145
  # ======================================================
146
+ # PDF RESOLUTION (SAFE HEAD CHECK)
147
  # ======================================================
148
 
149
  def resolve_pdf_url(url):
150
  try:
151
+ r = requests.get(url, timeout=10, allow_redirects=True)
152
  ct = r.headers.get("content-type", "").lower()
153
  is_pdf = r.url.lower().endswith(".pdf") or "application/pdf" in ct
154
  return is_pdf, r.url
155
  except Exception:
156
  return False, url
157
 
158
+ def generate_pdf_thumbnails(url, max_pages=2):
159
  if not PDF_THUMBNAIL_AVAILABLE:
160
  return []
161
  try:
162
+ r = requests.get(url, timeout=10)
163
+ images = convert_from_bytes(
164
+ r.content,
165
+ first_page=1,
166
+ last_page=max_pages
167
+ )
168
  thumbs = []
169
  for img in images:
170
  buf = io.BytesIO()
 
180
 
181
  def run_search(query, agencies):
182
  global LAST_RESULTS, SELECTED_INDEX
 
183
  LAST_RESULTS = []
184
+ SELECTED_INDEX = None
185
+
186
  rows = []
187
 
188
  for name in agencies:
189
  adapter = ALL_ADAPTERS[name]
190
  for r in adapter.search(query):
191
+ is_pdf, resolved = resolve_pdf_url(r["url"])
192
+ r["resolved_pdf"] = is_pdf
193
+ r["resolved_url"] = resolved
194
  r["hash"] = citation_hash(r)
195
  r["thumbnails"] = (
196
+ generate_pdf_thumbnails(resolved) if is_pdf else []
 
197
  )
198
  LAST_RESULTS.append(r)
199
+
200
  rows.append([
201
  r["agency"],
202
  r["title"],
203
  r["resolved_url"],
204
  r["hash"],
205
+ f"{r['latency_ms']} ms",
206
  ])
207
 
208
  return rows, render_cards(), "No document selected"
209
 
210
  # ======================================================
211
+ # RESULT CARDS
212
  # ======================================================
213
 
214
  def render_cards():
215
  cards = []
216
  for idx, r in enumerate(LAST_RESULTS):
 
217
  thumbs = "".join(
218
  f'<img src="data:image/png;base64,{t}" '
219
+ f'style="width:30%;margin:4px;border-radius:6px;" />'
220
  for t in r["thumbnails"]
221
  )
222
+ preview = thumbs or f'<a href="{r["resolved_url"]}" target="_blank">View Source</a>'
223
+
224
  cards.append(f"""
225
  <div class="card">
226
+ <div class="card-header">
227
+ <b>{r['agency']}</b>
228
+ <span class="badge">PUBLIC</span>
229
+ </div>
230
+ <div><b>{r['title']}</b></div>
231
+ <div>{preview}</div>
232
+ <div class="actions">
233
+ <a href="{r['resolved_url']}" target="_blank">View</a>
234
+ {"<a href='"+r['resolved_url']+"' download>Download</a>" if r["resolved_pdf"] else ""}
235
+ <a href="{r['resolved_url']}" target="_blank">Share</a>
236
+ </div>
237
  </div>
238
  """)
239
+
240
+ return "".join(cards) or "No results."
241
 
242
  # ======================================================
243
+ # CM/ECF BUNDLE
244
  # ======================================================
245
 
246
+ def generate_ecf_filing_number():
247
+ return f"ECF-PREFILE-{datetime.utcnow().strftime('%Y%m%d-%H%M%S')}"
248
+
249
+ def generate_cover_sheet_pdf(district, ecf_no):
250
+ buf = io.BytesIO()
251
+ styles = getSampleStyleSheet()
252
+
253
+ body = (
254
+ f"<b>CM/ECF PRE-FILING COVER SHEET</b><br/><br/>"
255
+ f"<b>District:</b> {district}<br/>"
256
+ f"<b>Reference No.:</b> {ecf_no}<br/><br/>"
257
+ "This bundle contains public FOIA references only.<br/>"
258
+ "No filing, certification, or authentication is made."
259
+ )
260
+
261
+ doc = SimpleDocTemplate(buf, pagesize=LETTER)
262
+ doc.build([
263
+ Paragraph(body, styles["Normal"]),
264
+ PageBreak(),
265
+ Paragraph(
266
+ render_provenance_block(body).replace("\n", "<br/>"),
267
+ styles["Code"]
268
+ )
269
+ ])
270
+ buf.seek(0)
271
+ return buf
272
+
273
  def generate_court_bundle(district):
274
  ecf_no = generate_ecf_filing_number()
275
  with tempfile.TemporaryDirectory() as td:
276
  zpath = os.path.join(td, "court_bundle.zip")
277
  with zipfile.ZipFile(zpath, "w") as z:
278
+ z.writestr(
279
+ "00_Cover_Sheet.pdf",
280
+ generate_cover_sheet_pdf(district, ecf_no).read()
281
+ )
282
  for i, r in enumerate(LAST_RESULTS, 1):
283
+ z.writestr(
284
+ f"Exhibit_{i:03d}.txt",
285
+ f"{r['agency']}\n{r['resolved_url']}"
286
+ )
287
+ z.writestr(
288
+ f"Exhibit_{i:03d}.sha256",
289
+ r["hash"]
290
  )
 
 
 
 
291
  return open(zpath, "rb")
292
 
293
  # ======================================================
 
302
  """
303
 
304
  with gr.Blocks(css=CSS, title="Federal FOIA Intelligence Search") as app:
305
+ gr.Markdown(
306
+ "## Federal FOIA Intelligence Search\n"
307
+ "Public FOIA reading rooms only • Research & education use"
308
+ )
309
 
310
  with gr.Tab("Search"):
311
  agencies = gr.CheckboxGroup(
 
318
  headers=["Agency", "Title", "Resolved URL", "Hash", "Latency"]
319
  )
320
  gallery = gr.HTML()
321
+ status = gr.Textbox(label="Status")
322
+
323
+ gr.Button("Search").click(
324
+ run_search,
325
+ [query, agencies],
326
+ [table, gallery, status]
327
+ )
328
 
329
  with gr.Tab("Court / Clerk"):
330
+ district = gr.Dropdown(
331
+ ["Generic", "D.D.C.", "S.D.N.Y.", "N.D. Cal."],
332
+ value="Generic"
333
+ )
334
+ gr.File(label="Download CM/ECF Bundle").upload(
335
  lambda d=district: generate_court_bundle(d)
336
  )
 
337
 
338
+ with gr.Tab("Governance"):
339
+ gr.Markdown(
340
+ "• No scraping\n"
341
+ " No certification\n"
342
+ "• AI formatting only\n"
343
+ "• Court-safe by design"
344
+ )
345
 
346
+ app.launch(
347
+ server_name="0.0.0.0",
348
+ server_port=7860,
349
+ share=True
350
+ )