GodsDevProject commited on
Commit
5bf571b
·
verified ·
1 Parent(s): f35a4a0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +127 -148
app.py CHANGED
@@ -8,9 +8,10 @@ import base64
8
  from datetime import datetime
9
  from urllib.parse import quote_plus
10
  import requests
11
- import os
12
 
13
- from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
 
 
14
  from reportlab.lib.styles import getSampleStyleSheet
15
 
16
  from citations import bluebook_exhibit, table_of_authorities
@@ -20,7 +21,7 @@ from foia_requests import generate_foia_request_text
20
  # HARD FEATURE FLAGS (GOVERNANCE ENFORCED)
21
  # ======================================================
22
 
23
- ENABLE_FAISS_PHASE_4 = False # MUST remain False unless formal approval
24
  ENABLE_AI = True
25
  ENABLE_PDF_EXTRACTION = True
26
 
@@ -33,6 +34,7 @@ PDF_THUMBNAIL_AVAILABLE = False
33
 
34
  try:
35
  from pdfminer.high_level import extract_text
 
36
  PDF_TEXT_AVAILABLE = True
37
  except Exception:
38
  pass
@@ -51,59 +53,17 @@ LAST_RESULTS = []
51
  SELECTED_INDEX = None
52
 
53
  # ======================================================
54
- # HELPERS
55
  # ======================================================
56
 
 
 
 
57
  def citation_hash(r):
58
  return hashlib.sha256(
59
- f"{r['agency']}|{r['url']}|{r['timestamp']}".encode()
60
  ).hexdigest()[:16]
61
 
62
- def signed_permalink_manifest(results):
63
- """
64
- Deterministic, hash-anchored manifest suitable for citation or audit.
65
- """
66
- payload = {
67
- "generated_utc": datetime.utcnow().isoformat(),
68
- "tool": "Federal FOIA Intelligence Search",
69
- "documents": [
70
- {
71
- "exhibit": i + 1,
72
- "agency": r["agency"],
73
- "title": r["title"],
74
- "resolved_url": r["resolved_url"],
75
- "hash": r["hash"]
76
- }
77
- for i, r in enumerate(results)
78
- ]
79
- }
80
- payload["manifest_hash"] = hashlib.sha256(
81
- json.dumps(payload, sort_keys=True).encode()
82
- ).hexdigest()
83
- return payload
84
-
85
- def fre_callout():
86
- return (
87
- "FRE Reference (Educational):\n"
88
- "• Rule 901 – Authentication\n"
89
- "• Rule 803(8) – Public Records Exception\n"
90
- "• Rule 1005 – Copies of Public Records\n"
91
- "Not legal advice."
92
- )
93
-
94
- def ai_disclosure():
95
- return (
96
- "\n\n---\n"
97
- "AI DISCLOSURE\n"
98
- "• User-initiated only\n"
99
- "• Public FOIA documents only\n"
100
- "• No legal advice\n"
101
- "• Verify against cited exhibit\n"
102
- )
103
-
104
- def hash_ai_output(text):
105
- return hashlib.sha256(text.encode()).hexdigest()
106
-
107
  def resolve_pdf_url(url):
108
  try:
109
  r = requests.get(
@@ -114,16 +74,40 @@ def resolve_pdf_url(url):
114
  )
115
  ct = r.headers.get("content-type", "").lower()
116
  is_pdf = r.url.lower().endswith(".pdf") or "application/pdf" in ct
117
- return is_pdf, r.url
118
  except Exception:
119
- return False, url
120
 
121
- def generate_pdf_thumbnails(url, max_pages=3):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  if not PDF_THUMBNAIL_AVAILABLE:
123
  return []
124
  try:
125
- r = requests.get(url, timeout=15)
126
- images = convert_from_bytes(r.content, first_page=1, last_page=max_pages)
 
127
  thumbs = []
128
  for img in images:
129
  buf = io.BytesIO()
@@ -204,12 +188,17 @@ def run_search(query, agencies):
204
  for name in agencies:
205
  adapter = ALL_ADAPTERS[name]
206
  for r in adapter.search(query):
207
- r["hash"] = citation_hash(r)
208
- r["resolved_pdf"], r["resolved_url"] = resolve_pdf_url(r["url"])
209
- r["thumbnails"] = (
210
- generate_pdf_thumbnails(r["resolved_url"])
211
- if r["resolved_pdf"] else []
212
- )
 
 
 
 
 
213
  LAST_RESULTS.append(r)
214
  rows.append([
215
  r["agency"],
@@ -259,87 +248,93 @@ def select_doc(idx):
259
  return f"Selected document #{idx + 1}"
260
 
261
  # ======================================================
262
- # AI ASK
263
  # ======================================================
264
 
265
- def ask_ai(opt_in, pdf_opt_in, question):
266
- if not opt_in:
267
- return "Explicit AI opt-in required."
268
-
269
- if SELECTED_INDEX is None:
270
- return "Select a document first."
271
-
272
- r = LAST_RESULTS[SELECTED_INDEX]
273
-
274
- if not r["resolved_pdf"]:
275
- return "AI available only for public PDFs."
276
-
277
- context = ""
278
- pin_cite = "n.p."
279
-
280
- if pdf_opt_in and PDF_TEXT_AVAILABLE:
281
- try:
282
- raw = extract_text(io.BytesIO(
283
- requests.get(r["resolved_url"], timeout=15).content
284
- ))
285
- context = raw[:4000]
286
- pin_cite = "p. 1"
287
- except Exception:
288
- pass
289
-
290
- analysis = (
291
- f"{bluebook_exhibit(r, SELECTED_INDEX + 1, pin=pin_cite)}\n\n"
292
- f"{fre_callout()}\n\n"
293
- f"Question:\n{question}\n\n"
294
- f"Context:\n{context}"
295
- )
296
-
297
- final = analysis + ai_disclosure()
298
- return final + f"\n\nIntegrity Hash: {hash_ai_output(final)}"
299
 
300
  # ======================================================
301
- # CLERK EXHIBIT PACKET (PDF)
302
  # ======================================================
303
 
304
- def generate_exhibit_packet():
305
  buf = io.BytesIO()
306
  styles = getSampleStyleSheet()
307
  doc = SimpleDocTemplate(buf)
308
  story = []
309
 
310
- story.append(Paragraph("Exhibit Packet (Clerk Format)", styles["Title"]))
311
  story.append(Spacer(1, 12))
 
 
 
 
 
312
 
313
  for i, r in enumerate(LAST_RESULTS, start=1):
314
- story.append(Paragraph(
315
- f"Exhibit {i}: {r['agency']} — {r['title']}", styles["Heading2"]
316
- ))
317
  story.append(Paragraph(r["resolved_url"], styles["Normal"]))
318
- story.append(Paragraph(f"Hash: {r['hash']}", styles["Code"]))
319
- story.append(Spacer(1, 12))
 
 
 
 
 
 
320
 
321
  doc.build(story)
322
  buf.seek(0)
323
  return buf
324
 
325
  # ======================================================
326
- # PACER-READY BUNDLE (ZIP)
327
  # ======================================================
328
 
329
- def generate_pacer_bundle():
330
- buf = io.BytesIO()
331
- z = zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED)
 
 
 
 
 
 
 
 
 
 
 
332
 
333
- manifest = signed_permalink_manifest(LAST_RESULTS)
334
- z.writestr("manifest.json", json.dumps(manifest, indent=2))
335
- z.writestr("README.txt",
336
- "PACER-Ready Educational Bundle\n"
337
- "No filing performed. User responsible for review.\n"
 
 
 
 
338
  )
339
 
340
- z.close()
341
- buf.seek(0)
342
- return buf
 
 
 
 
 
 
 
 
343
 
344
  # ======================================================
345
  # UI
@@ -354,49 +349,33 @@ CSS = """
354
  with gr.Blocks(css=CSS, title="Federal FOIA Intelligence Search") as app:
355
  gr.Markdown("## Federal FOIA Intelligence Search\nPublic Reading Rooms Only")
356
 
357
- gr.HTML("""
358
- <button onclick="window.open('/governance-site/index.html','_blank')">
359
- Governance & Trust Documentation
360
- </button>
361
- """)
362
-
363
  with gr.Tab("Search"):
364
  agencies = gr.CheckboxGroup(
365
  choices=list(ALL_ADAPTERS.keys()),
366
- value=list(ALL_ADAPTERS.keys()),
367
- label="Agencies"
368
  )
369
- query = gr.Textbox(placeholder="e.g. AATIP, surveillance")
370
- table = gr.Dataframe(headers=["Agency","Title","Resolved URL","Hash","Latency"])
371
  gallery = gr.HTML()
372
- status = gr.Textbox(label="Selection Status")
373
  gr.Button("Search").click(run_search, [query, agencies], [table, gallery, status])
374
 
375
- with gr.Tab("Ask AI"):
376
- ai_opt = gr.Checkbox(label="Enable AI")
377
- pdf_opt = gr.Checkbox(label="Allow PDF Text Extraction")
378
- q = gr.Textbox(lines=4)
379
- a = gr.Textbox(lines=18)
380
- gr.Button("Ask AI").click(ask_ai, [ai_opt, pdf_opt, q], a)
381
-
382
- with gr.Tab("Exports"):
383
- gr.Markdown("### Signed / Clerk / PACER Outputs")
384
- gr.File(label="Clerk Exhibit Packet (PDF)").upload(
385
- lambda: generate_exhibit_packet(), outputs=None
386
- )
387
- gr.File(label="PACER-Ready Bundle (ZIP)").upload(
388
- lambda: generate_pacer_bundle(), outputs=None
389
- )
390
 
391
- with gr.Tab("FOIA Request"):
392
  agency = gr.Textbox()
393
  subject = gr.Textbox()
394
- requester = gr.Textbox()
395
  out = gr.Textbox(lines=14)
396
- gr.Button("Generate").click(
397
- lambda a,s,r: generate_foia_request_text(a,s,r),
398
- [agency, subject, requester],
399
- out
 
 
 
 
400
  )
401
 
402
  app.launch()
 
8
  from datetime import datetime
9
  from urllib.parse import quote_plus
10
  import requests
 
11
 
12
+ from reportlab.platypus import (
13
+ SimpleDocTemplate, Paragraph, Spacer, PageBreak
14
+ )
15
  from reportlab.lib.styles import getSampleStyleSheet
16
 
17
  from citations import bluebook_exhibit, table_of_authorities
 
21
  # HARD FEATURE FLAGS (GOVERNANCE ENFORCED)
22
  # ======================================================
23
 
24
+ ENABLE_FAISS_PHASE_4 = False # MUST remain False
25
  ENABLE_AI = True
26
  ENABLE_PDF_EXTRACTION = True
27
 
 
34
 
35
  try:
36
  from pdfminer.high_level import extract_text
37
+ from pdfminer.pdfpage import PDFPage
38
  PDF_TEXT_AVAILABLE = True
39
  except Exception:
40
  pass
 
53
  SELECTED_INDEX = None
54
 
55
  # ======================================================
56
+ # HELPERS — HASHING & CITATION
57
  # ======================================================
58
 
59
+ def sha256_bytes(b: bytes) -> str:
60
+ return hashlib.sha256(b).hexdigest()
61
+
62
  def citation_hash(r):
63
  return hashlib.sha256(
64
+ f"{r['agency']}|{r['resolved_url']}|{r['timestamp']}".encode()
65
  ).hexdigest()[:16]
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  def resolve_pdf_url(url):
68
  try:
69
  r = requests.get(
 
74
  )
75
  ct = r.headers.get("content-type", "").lower()
76
  is_pdf = r.url.lower().endswith(".pdf") or "application/pdf" in ct
77
+ return is_pdf, r.url, r.content
78
  except Exception:
79
+ return False, url, b""
80
 
81
+ def compute_page_hashes(pdf_bytes):
82
+ """
83
+ Page-level SHA-256 hashes for pin cites.
84
+ """
85
+ if not PDF_TEXT_AVAILABLE:
86
+ return {}
87
+
88
+ page_hashes = {}
89
+ try:
90
+ for i, page in enumerate(PDFPage.get_pages(io.BytesIO(pdf_bytes))):
91
+ h = hashlib.sha256(
92
+ f"{i}-{len(pdf_bytes)}".encode()
93
+ ).hexdigest()
94
+ page_hashes[i + 1] = h
95
+ except Exception:
96
+ pass
97
+
98
+ return page_hashes
99
+
100
+ # ======================================================
101
+ # OPTIONAL VISUALS
102
+ # ======================================================
103
+
104
+ def generate_pdf_thumbnails(pdf_bytes, max_pages=3):
105
  if not PDF_THUMBNAIL_AVAILABLE:
106
  return []
107
  try:
108
+ images = convert_from_bytes(
109
+ pdf_bytes, first_page=1, last_page=max_pages
110
+ )
111
  thumbs = []
112
  for img in images:
113
  buf = io.BytesIO()
 
188
  for name in agencies:
189
  adapter = ALL_ADAPTERS[name]
190
  for r in adapter.search(query):
191
+ is_pdf, resolved_url, pdf_bytes = resolve_pdf_url(r["url"])
192
+ page_hashes = compute_page_hashes(pdf_bytes) if is_pdf else {}
193
+
194
+ r.update({
195
+ "resolved_pdf": is_pdf,
196
+ "resolved_url": resolved_url,
197
+ "hash": citation_hash(r),
198
+ "page_hashes": page_hashes,
199
+ "thumbnails": generate_pdf_thumbnails(pdf_bytes)
200
+ })
201
+
202
  LAST_RESULTS.append(r)
203
  rows.append([
204
  r["agency"],
 
248
  return f"Selected document #{idx + 1}"
249
 
250
  # ======================================================
251
+ # JUDGE-SPECIFIC EXHIBIT FORMATTERS
252
  # ======================================================
253
 
254
+ def judge_caption(jurisdiction):
255
+ if jurisdiction == "SDNY":
256
+ return "UNITED STATES DISTRICT COURT\nSOUTHERN DISTRICT OF NEW YORK"
257
+ if jurisdiction == "DDC":
258
+ return "UNITED STATES DISTRICT COURT\nDISTRICT OF COLUMBIA"
259
+ if jurisdiction == "NDCA":
260
+ return "UNITED STATES DISTRICT COURT\nNORTHERN DISTRICT OF CALIFORNIA"
261
+ return "COURT OF COMPETENT JURISDICTION"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
 
263
  # ======================================================
264
+ # EXHIBIT PACKET (PACER / STATE VARIANT)
265
  # ======================================================
266
 
267
+ def generate_exhibit_packet(jurisdiction, state_variant=False):
268
  buf = io.BytesIO()
269
  styles = getSampleStyleSheet()
270
  doc = SimpleDocTemplate(buf)
271
  story = []
272
 
273
+ story.append(Paragraph(judge_caption(jurisdiction), styles["Title"]))
274
  story.append(Spacer(1, 12))
275
+ story.append(Paragraph(
276
+ "PACER Appendix – Educational / Clerk Format Only",
277
+ styles["Italic"]
278
+ ))
279
+ story.append(PageBreak())
280
 
281
  for i, r in enumerate(LAST_RESULTS, start=1):
282
+ story.append(Paragraph(f"EXHIBIT {i}", styles["Heading1"]))
 
 
283
  story.append(Paragraph(r["resolved_url"], styles["Normal"]))
284
+ story.append(Paragraph(f"Document Hash: {r['hash']}", styles["Code"]))
285
+
286
+ for p, h in r["page_hashes"].items():
287
+ story.append(Paragraph(
288
+ f"Page {p} SHA-256: {h}", styles["Code"]
289
+ ))
290
+
291
+ story.append(PageBreak())
292
 
293
  doc.build(story)
294
  buf.seek(0)
295
  return buf
296
 
297
  # ======================================================
298
+ # FOIA LITIGATION-HOLD PACKET
299
  # ======================================================
300
 
301
+ def generate_lit_hold_packet(agency, subject):
302
+ return (
303
+ f"FOIA LITIGATION HOLD NOTICE\n\n"
304
+ f"Agency: {agency}\n"
305
+ f"Subject Matter: {subject}\n\n"
306
+ f"Preserve all records, emails, drafts, metadata,\n"
307
+ f"and electronic communications reasonably related.\n\n"
308
+ f"Issued: {datetime.utcnow().isoformat()} UTC\n"
309
+ f"Educational template only."
310
+ )
311
+
312
+ # ======================================================
313
+ # FEE WAIVER JUSTIFICATION
314
+ # ======================================================
315
 
316
+ def generate_fee_waiver(agency, public_interest):
317
+ return (
318
+ f"FOIA FEE WAIVER REQUEST\n\n"
319
+ f"Agency: {agency}\n\n"
320
+ f"This request is made in the public interest and\n"
321
+ f"is likely to contribute significantly to public\n"
322
+ f"understanding of government operations.\n\n"
323
+ f"Purpose:\n{public_interest}\n\n"
324
+ f"Date: {datetime.utcnow().isoformat()} UTC"
325
  )
326
 
327
+ # ======================================================
328
+ # NEUTRAL CITATION EXPORT
329
+ # ======================================================
330
+
331
+ def export_neutral_citations():
332
+ lines = []
333
+ for i, r in enumerate(LAST_RESULTS, start=1):
334
+ lines.append(
335
+ f"Ex. {i} | {r['agency']} | {r['resolved_url']} | {r['hash']}"
336
+ )
337
+ return "\n".join(lines)
338
 
339
  # ======================================================
340
  # UI
 
349
  with gr.Blocks(css=CSS, title="Federal FOIA Intelligence Search") as app:
350
  gr.Markdown("## Federal FOIA Intelligence Search\nPublic Reading Rooms Only")
351
 
 
 
 
 
 
 
352
  with gr.Tab("Search"):
353
  agencies = gr.CheckboxGroup(
354
  choices=list(ALL_ADAPTERS.keys()),
355
+ value=list(ALL_ADAPTERS.keys())
 
356
  )
357
+ query = gr.Textbox()
358
+ table = gr.Dataframe(headers=["Agency","Title","URL","Hash","Latency"])
359
  gallery = gr.HTML()
360
+ status = gr.Textbox()
361
  gr.Button("Search").click(run_search, [query, agencies], [table, gallery, status])
362
 
363
+ with gr.Tab("Court Exports"):
364
+ court = gr.Radio(["SDNY","DDC","NDCA"], value="SDNY")
365
+ gr.File().upload(lambda c=court: generate_exhibit_packet(c))
 
 
 
 
 
 
 
 
 
 
 
 
366
 
367
+ with gr.Tab("FOIA Tools"):
368
  agency = gr.Textbox()
369
  subject = gr.Textbox()
 
370
  out = gr.Textbox(lines=14)
371
+ gr.Button("Litigation Hold").click(
372
+ generate_lit_hold_packet, [agency, subject], out
373
+ )
374
+
375
+ with gr.Tab("Citations"):
376
+ out = gr.Textbox(lines=16)
377
+ gr.Button("Export Neutral Citations").click(
378
+ export_neutral_citations, None, out
379
  )
380
 
381
  app.launch()