GodsDevProject commited on
Commit
5e6083b
·
verified ·
1 Parent(s): 5bf571b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +173 -127
app.py CHANGED
@@ -5,13 +5,12 @@ import io
5
  import json
6
  import zipfile
7
  import base64
8
- from datetime import datetime
9
  from urllib.parse import quote_plus
10
  import requests
 
11
 
12
- from reportlab.platypus import (
13
- SimpleDocTemplate, Paragraph, Spacer, PageBreak
14
- )
15
  from reportlab.lib.styles import getSampleStyleSheet
16
 
17
  from citations import bluebook_exhibit, table_of_authorities
@@ -34,7 +33,6 @@ PDF_THUMBNAIL_AVAILABLE = False
34
 
35
  try:
36
  from pdfminer.high_level import extract_text
37
- from pdfminer.pdfpage import PDFPage
38
  PDF_TEXT_AVAILABLE = True
39
  except Exception:
40
  pass
@@ -53,17 +51,54 @@ LAST_RESULTS = []
53
  SELECTED_INDEX = None
54
 
55
  # ======================================================
56
- # HELPERS — HASHING & CITATION
57
  # ======================================================
58
 
59
- def sha256_bytes(b: bytes) -> str:
60
- return hashlib.sha256(b).hexdigest()
61
-
62
  def citation_hash(r):
63
  return hashlib.sha256(
64
  f"{r['agency']}|{r['resolved_url']}|{r['timestamp']}".encode()
65
  ).hexdigest()[:16]
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  def resolve_pdf_url(url):
68
  try:
69
  r = requests.get(
@@ -74,40 +109,16 @@ def resolve_pdf_url(url):
74
  )
75
  ct = r.headers.get("content-type", "").lower()
76
  is_pdf = r.url.lower().endswith(".pdf") or "application/pdf" in ct
77
- return is_pdf, r.url, r.content
78
- except Exception:
79
- return False, url, b""
80
-
81
- def compute_page_hashes(pdf_bytes):
82
- """
83
- Page-level SHA-256 hashes for pin cites.
84
- """
85
- if not PDF_TEXT_AVAILABLE:
86
- return {}
87
-
88
- page_hashes = {}
89
- try:
90
- for i, page in enumerate(PDFPage.get_pages(io.BytesIO(pdf_bytes))):
91
- h = hashlib.sha256(
92
- f"{i}-{len(pdf_bytes)}".encode()
93
- ).hexdigest()
94
- page_hashes[i + 1] = h
95
  except Exception:
96
- pass
97
-
98
- return page_hashes
99
 
100
- # ======================================================
101
- # OPTIONAL VISUALS
102
- # ======================================================
103
-
104
- def generate_pdf_thumbnails(pdf_bytes, max_pages=3):
105
  if not PDF_THUMBNAIL_AVAILABLE:
106
  return []
107
  try:
108
- images = convert_from_bytes(
109
- pdf_bytes, first_page=1, last_page=max_pages
110
- )
111
  thumbs = []
112
  for img in images:
113
  buf = io.BytesIO()
@@ -118,7 +129,7 @@ def generate_pdf_thumbnails(pdf_bytes, max_pages=3):
118
  return []
119
 
120
  # ======================================================
121
- # FOIA ADAPTERS
122
  # ======================================================
123
 
124
  class FOIAAdapter:
@@ -134,7 +145,9 @@ class FOIAAdapter:
134
  "title": f"{self.agency} FOIA Reading Room Result",
135
  "url": url,
136
  "timestamp": datetime.utcnow().isoformat(),
137
- "latency_ms": latency
 
 
138
  }]
139
 
140
  class CIA(FOIAAdapter):
@@ -188,17 +201,12 @@ def run_search(query, agencies):
188
  for name in agencies:
189
  adapter = ALL_ADAPTERS[name]
190
  for r in adapter.search(query):
191
- is_pdf, resolved_url, pdf_bytes = resolve_pdf_url(r["url"])
192
- page_hashes = compute_page_hashes(pdf_bytes) if is_pdf else {}
193
-
194
- r.update({
195
- "resolved_pdf": is_pdf,
196
- "resolved_url": resolved_url,
197
- "hash": citation_hash(r),
198
- "page_hashes": page_hashes,
199
- "thumbnails": generate_pdf_thumbnails(pdf_bytes)
200
- })
201
-
202
  LAST_RESULTS.append(r)
203
  rows.append([
204
  r["agency"],
@@ -217,16 +225,26 @@ def run_search(query, agencies):
217
  def render_cards():
218
  cards = []
219
  for idx, r in enumerate(LAST_RESULTS):
 
 
 
 
 
 
 
 
220
  thumbs = "".join(
221
  f'<img src="data:image/png;base64,{t}" style="width:32%;margin:2px;border:1px solid #ccc" />'
222
  for t in r["thumbnails"]
223
  )
 
224
  preview = thumbs or f'<a href="{r["resolved_url"]}" target="_blank">Open FOIA Reading Room</a>'
 
225
  cards.append(f"""
226
  <div class="card">
227
  <div class="card-header">
228
  <b>{r['agency']}</b>
229
- <span class="badge">{r['latency_ms']} ms</span>
230
  </div>
231
  <div class="card-title">{r['title']}</div>
232
  {preview}
@@ -236,6 +254,7 @@ def render_cards():
236
  </div>
237
  </div>
238
  """)
 
239
  return "".join(cards) or "<i>No results</i>"
240
 
241
  # ======================================================
@@ -248,46 +267,77 @@ def select_doc(idx):
248
  return f"Selected document #{idx + 1}"
249
 
250
  # ======================================================
251
- # JUDGE-SPECIFIC EXHIBIT FORMATTERS
252
  # ======================================================
253
 
254
- def judge_caption(jurisdiction):
255
- if jurisdiction == "SDNY":
256
- return "UNITED STATES DISTRICT COURT\nSOUTHERN DISTRICT OF NEW YORK"
257
- if jurisdiction == "DDC":
258
- return "UNITED STATES DISTRICT COURT\nDISTRICT OF COLUMBIA"
259
- if jurisdiction == "NDCA":
260
- return "UNITED STATES DISTRICT COURT\nNORTHERN DISTRICT OF CALIFORNIA"
261
- return "COURT OF COMPETENT JURISDICTION"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
 
263
  # ======================================================
264
- # EXHIBIT PACKET (PACER / STATE VARIANT)
265
  # ======================================================
266
 
267
- def generate_exhibit_packet(jurisdiction, state_variant=False):
 
 
 
 
 
 
 
268
  buf = io.BytesIO()
269
  styles = getSampleStyleSheet()
270
  doc = SimpleDocTemplate(buf)
271
  story = []
272
 
273
- story.append(Paragraph(judge_caption(jurisdiction), styles["Title"]))
274
- story.append(Spacer(1, 12))
275
- story.append(Paragraph(
276
- "PACER Appendix – Educational / Clerk Format Only",
277
- styles["Italic"]
278
- ))
279
  story.append(PageBreak())
280
 
281
  for i, r in enumerate(LAST_RESULTS, start=1):
282
- story.append(Paragraph(f"EXHIBIT {i}", styles["Heading1"]))
 
 
 
 
 
 
 
 
 
283
  story.append(Paragraph(r["resolved_url"], styles["Normal"]))
284
- story.append(Paragraph(f"Document Hash: {r['hash']}", styles["Code"]))
285
-
286
- for p, h in r["page_hashes"].items():
287
- story.append(Paragraph(
288
- f"Page {p} SHA-256: {h}", styles["Code"]
289
- ))
290
-
291
  story.append(PageBreak())
292
 
293
  doc.build(story)
@@ -295,45 +345,23 @@ def generate_exhibit_packet(jurisdiction, state_variant=False):
295
  return buf
296
 
297
  # ======================================================
298
- # FOIA LITIGATION-HOLD PACKET
299
  # ======================================================
300
 
301
- def generate_lit_hold_packet(agency, subject):
302
- return (
303
- f"FOIA LITIGATION HOLD NOTICE\n\n"
304
- f"Agency: {agency}\n"
305
- f"Subject Matter: {subject}\n\n"
306
- f"Preserve all records, emails, drafts, metadata,\n"
307
- f"and electronic communications reasonably related.\n\n"
308
- f"Issued: {datetime.utcnow().isoformat()} UTC\n"
309
- f"Educational template only."
310
- )
311
 
312
- # ======================================================
313
- # FEE WAIVER JUSTIFICATION
314
- # ======================================================
315
-
316
- def generate_fee_waiver(agency, public_interest):
317
- return (
318
- f"FOIA FEE WAIVER REQUEST\n\n"
319
- f"Agency: {agency}\n\n"
320
- f"This request is made in the public interest and\n"
321
- f"is likely to contribute significantly to public\n"
322
- f"understanding of government operations.\n\n"
323
- f"Purpose:\n{public_interest}\n\n"
324
- f"Date: {datetime.utcnow().isoformat()} UTC"
325
- )
326
-
327
- # ======================================================
328
- # NEUTRAL CITATION EXPORT
329
- # ======================================================
330
-
331
- def export_neutral_citations():
332
- lines = []
333
  for i, r in enumerate(LAST_RESULTS, start=1):
334
- lines.append(
335
- f"Ex. {i} | {r['agency']} | {r['resolved_url']} | {r['hash']}"
336
- )
 
337
  return "\n".join(lines)
338
 
339
  # ======================================================
@@ -354,28 +382,46 @@ with gr.Blocks(css=CSS, title="Federal FOIA Intelligence Search") as app:
354
  choices=list(ALL_ADAPTERS.keys()),
355
  value=list(ALL_ADAPTERS.keys())
356
  )
357
- query = gr.Textbox()
358
- table = gr.Dataframe(headers=["Agency","Title","URL","Hash","Latency"])
359
  gallery = gr.HTML()
360
  status = gr.Textbox()
361
  gr.Button("Search").click(run_search, [query, agencies], [table, gallery, status])
362
 
363
- with gr.Tab("Court Exports"):
364
- court = gr.Radio(["SDNY","DDC","NDCA"], value="SDNY")
365
- gr.File().upload(lambda c=court: generate_exhibit_packet(c))
 
 
 
 
 
 
 
 
 
 
 
 
366
 
367
- with gr.Tab("FOIA Tools"):
368
- agency = gr.Textbox()
369
- subject = gr.Textbox()
370
- out = gr.Textbox(lines=14)
371
- gr.Button("Litigation Hold").click(
372
- generate_lit_hold_packet, [agency, subject], out
373
  )
374
 
375
- with gr.Tab("Citations"):
376
- out = gr.Textbox(lines=16)
377
- gr.Button("Export Neutral Citations").click(
378
- export_neutral_citations, None, out
 
 
 
 
 
 
 
 
 
379
  )
380
 
381
  app.launch()
 
5
  import json
6
  import zipfile
7
  import base64
8
+ from datetime import datetime, timedelta
9
  from urllib.parse import quote_plus
10
  import requests
11
+ import os
12
 
13
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
 
 
14
  from reportlab.lib.styles import getSampleStyleSheet
15
 
16
  from citations import bluebook_exhibit, table_of_authorities
 
33
 
34
  try:
35
  from pdfminer.high_level import extract_text
 
36
  PDF_TEXT_AVAILABLE = True
37
  except Exception:
38
  pass
 
51
  SELECTED_INDEX = None
52
 
53
  # ======================================================
54
+ # HELPERS — HASHING / TIMELINES
55
  # ======================================================
56
 
 
 
 
57
  def citation_hash(r):
58
  return hashlib.sha256(
59
  f"{r['agency']}|{r['resolved_url']}|{r['timestamp']}".encode()
60
  ).hexdigest()[:16]
61
 
62
+ def sha256_text(text):
63
+ return hashlib.sha256(text.encode()).hexdigest()
64
+
65
+ def fre_callout():
66
+ return (
67
+ "Federal Rules of Evidence (Educational Reference):\n"
68
+ "• Rule 901 – Authentication\n"
69
+ "• Rule 803(8) – Public Records Exception\n"
70
+ "• Rule 1005 – Copies of Public Records\n"
71
+ "Not legal advice."
72
+ )
73
+
74
+ def ai_disclosure():
75
+ return (
76
+ "\n\n---\n"
77
+ "AI DISCLOSURE\n"
78
+ "• User-initiated only\n"
79
+ "• Public FOIA documents only\n"
80
+ "• No legal advice\n"
81
+ "• Verify against cited exhibit\n"
82
+ )
83
+
84
+ # ======================================================
85
+ # FOIA EXHAUSTION TIMELINE
86
+ # ======================================================
87
+
88
+ def build_foia_exhaustion_timeline(request_date):
89
+ base = datetime.fromisoformat(request_date)
90
+ return [
91
+ ("FOIA Request Filed", base),
92
+ ("20-Day Statutory Response Due", base + timedelta(days=20)),
93
+ ("Administrative Appeal Window", base + timedelta(days=20)),
94
+ ("Constructive Exhaustion Eligible", base + timedelta(days=30)),
95
+ ("Judicial Review Eligible", base + timedelta(days=90)),
96
+ ]
97
+
98
+ # ======================================================
99
+ # PDF RESOLUTION
100
+ # ======================================================
101
+
102
  def resolve_pdf_url(url):
103
  try:
104
  r = requests.get(
 
109
  )
110
  ct = r.headers.get("content-type", "").lower()
111
  is_pdf = r.url.lower().endswith(".pdf") or "application/pdf" in ct
112
+ return is_pdf, r.url
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  except Exception:
114
+ return False, url
 
 
115
 
116
+ def generate_pdf_thumbnails(url, max_pages=3):
 
 
 
 
117
  if not PDF_THUMBNAIL_AVAILABLE:
118
  return []
119
  try:
120
+ r = requests.get(url, timeout=15)
121
+ images = convert_from_bytes(r.content, first_page=1, last_page=max_pages)
 
122
  thumbs = []
123
  for img in images:
124
  buf = io.BytesIO()
 
129
  return []
130
 
131
  # ======================================================
132
+ # FOIA ADAPTERS (LINK-OUT ONLY)
133
  # ======================================================
134
 
135
  class FOIAAdapter:
 
145
  "title": f"{self.agency} FOIA Reading Room Result",
146
  "url": url,
147
  "timestamp": datetime.utcnow().isoformat(),
148
+ "latency_ms": latency,
149
+ "sealed": False,
150
+ "redacted": False
151
  }]
152
 
153
  class CIA(FOIAAdapter):
 
201
  for name in agencies:
202
  adapter = ALL_ADAPTERS[name]
203
  for r in adapter.search(query):
204
+ r["resolved_pdf"], r["resolved_url"] = resolve_pdf_url(r["url"])
205
+ r["hash"] = citation_hash(r)
206
+ r["thumbnails"] = (
207
+ generate_pdf_thumbnails(r["resolved_url"])
208
+ if r["resolved_pdf"] else []
209
+ )
 
 
 
 
 
210
  LAST_RESULTS.append(r)
211
  rows.append([
212
  r["agency"],
 
225
  def render_cards():
226
  cards = []
227
  for idx, r in enumerate(LAST_RESULTS):
228
+ status = []
229
+ if r.get("sealed"):
230
+ status.append("SEALED")
231
+ if r.get("redacted"):
232
+ status.append("REDACTED")
233
+
234
+ badge = " • ".join(status) if status else "PUBLIC"
235
+
236
  thumbs = "".join(
237
  f'<img src="data:image/png;base64,{t}" style="width:32%;margin:2px;border:1px solid #ccc" />'
238
  for t in r["thumbnails"]
239
  )
240
+
241
  preview = thumbs or f'<a href="{r["resolved_url"]}" target="_blank">Open FOIA Reading Room</a>'
242
+
243
  cards.append(f"""
244
  <div class="card">
245
  <div class="card-header">
246
  <b>{r['agency']}</b>
247
+ <span class="badge">{badge}</span>
248
  </div>
249
  <div class="card-title">{r['title']}</div>
250
  {preview}
 
254
  </div>
255
  </div>
256
  """)
257
+
258
  return "".join(cards) or "<i>No results</i>"
259
 
260
  # ======================================================
 
267
  return f"Selected document #{idx + 1}"
268
 
269
  # ======================================================
270
+ # AI ASK (DISCLOSURE SAFE)
271
  # ======================================================
272
 
273
+ def ask_ai(opt_in, pdf_opt_in, question):
274
+ if not opt_in:
275
+ return "Explicit AI opt-in required."
276
+
277
+ if SELECTED_INDEX is None:
278
+ return "Select a document first."
279
+
280
+ r = LAST_RESULTS[SELECTED_INDEX]
281
+
282
+ if not r["resolved_pdf"]:
283
+ return "AI available only for public PDFs."
284
+
285
+ context = ""
286
+ pin_cite = "n.p."
287
+
288
+ if pdf_opt_in and PDF_TEXT_AVAILABLE:
289
+ try:
290
+ raw = extract_text(io.BytesIO(
291
+ requests.get(r["resolved_url"], timeout=15).content
292
+ ))
293
+ context = raw[:4000]
294
+ pin_cite = "p. 1"
295
+ except Exception:
296
+ pass
297
+
298
+ analysis = (
299
+ f"{bluebook_exhibit(r, SELECTED_INDEX + 1, pin=pin_cite)}\n\n"
300
+ f"{fre_callout()}\n\n"
301
+ f"Question:\n{question}\n\n"
302
+ f"Context:\n{context}"
303
+ )
304
+
305
+ final = analysis + ai_disclosure()
306
+ return final + f"\n\nIntegrity Hash: {sha256_text(final)}"
307
 
308
  # ======================================================
309
+ # CLERK-COMPLIANT EXHIBIT PACKET
310
  # ======================================================
311
 
312
+ def clerk_cover_sheet(circuit):
313
+ return (
314
+ f"UNITED STATES COURT OF APPEALS\n{circuit}\n\n"
315
+ "CLERK-COMPLIANT EXHIBIT APPENDIX\n"
316
+ "Educational Use Only\n"
317
+ )
318
+
319
+ def generate_exhibit_packet(circuit):
320
  buf = io.BytesIO()
321
  styles = getSampleStyleSheet()
322
  doc = SimpleDocTemplate(buf)
323
  story = []
324
 
325
+ story.append(Paragraph(clerk_cover_sheet(circuit), styles["Title"]))
 
 
 
 
 
326
  story.append(PageBreak())
327
 
328
  for i, r in enumerate(LAST_RESULTS, start=1):
329
+ marker = []
330
+ if r.get("sealed"):
331
+ marker.append("SEALED")
332
+ if r.get("redacted"):
333
+ marker.append("REDACTED")
334
+
335
+ story.append(Paragraph(
336
+ f"EXHIBIT {i} {'(' + ', '.join(marker) + ')' if marker else ''}",
337
+ styles["Heading1"]
338
+ ))
339
  story.append(Paragraph(r["resolved_url"], styles["Normal"]))
340
+ story.append(Paragraph(f"SHA-256 Hash: {r['hash']}", styles["Code"]))
 
 
 
 
 
 
341
  story.append(PageBreak())
342
 
343
  doc.build(story)
 
345
  return buf
346
 
347
  # ======================================================
348
+ # RULE 26 HASH ATTESTATION
349
  # ======================================================
350
 
351
+ def generate_rule26_attestation():
352
+ lines = [
353
+ "RULE 26(e) DISCOVERY HASH ATTESTATION",
354
+ "",
355
+ "The following materials were retrieved from public FOIA reading rooms.",
356
+ "Hashes are provided for integrity verification only.",
357
+ ""
358
+ ]
 
 
359
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
360
  for i, r in enumerate(LAST_RESULTS, start=1):
361
+ lines.append(f"Exhibit {i}: {r['agency']} | {r['hash']}")
362
+
363
+ lines.append("")
364
+ lines.append(f"Generated UTC: {datetime.utcnow().isoformat()}")
365
  return "\n".join(lines)
366
 
367
  # ======================================================
 
382
  choices=list(ALL_ADAPTERS.keys()),
383
  value=list(ALL_ADAPTERS.keys())
384
  )
385
+ query = gr.Textbox(placeholder="Search FOIA reading rooms")
386
+ table = gr.Dataframe(headers=["Agency","Title","Resolved URL","Hash","Latency"])
387
  gallery = gr.HTML()
388
  status = gr.Textbox()
389
  gr.Button("Search").click(run_search, [query, agencies], [table, gallery, status])
390
 
391
+ with gr.Tab("Ask AI"):
392
+ ai_opt = gr.Checkbox(label="Enable AI (Explicit Opt-In)")
393
+ pdf_opt = gr.Checkbox(label="Allow PDF Text Extraction")
394
+ q = gr.Textbox(lines=4)
395
+ a = gr.Textbox(lines=18)
396
+ gr.Button("Ask AI").click(ask_ai, [ai_opt, pdf_opt, q], a)
397
+
398
+ with gr.Tab("Court / Clerk"):
399
+ circuit = gr.Radio(
400
+ ["FIRST CIRCUIT", "SECOND CIRCUIT", "D.C. CIRCUIT", "NINTH CIRCUIT"],
401
+ value="D.C. CIRCUIT"
402
+ )
403
+ gr.File(label="Clerk-Compliant Exhibit Packet (PDF)").upload(
404
+ lambda c=circuit: generate_exhibit_packet(c)
405
+ )
406
 
407
+ attest = gr.Textbox(lines=12, label="Rule 26 Hash Attestation")
408
+ gr.Button("Generate Rule 26 Attestation").click(
409
+ generate_rule26_attestation, None, attest
 
 
 
410
  )
411
 
412
+ with gr.Tab("FOIA Timeline"):
413
+ req_date = gr.Textbox(
414
+ label="FOIA Request Date (YYYY-MM-DD)",
415
+ value=datetime.utcnow().date().isoformat()
416
+ )
417
+ timeline_out = gr.Textbox(lines=10)
418
+
419
+ def render_timeline(d):
420
+ items = build_foia_exhaustion_timeline(d)
421
+ return "\n".join(f"{k}: {v.date()}" for k, v in items)
422
+
423
+ gr.Button("Build FOIA Exhaustion Timeline").click(
424
+ render_timeline, req_date, timeline_out
425
  )
426
 
427
  app.launch()