GodsDevProject commited on
Commit
f71dcd6
·
verified ·
1 Parent(s): d883f15

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +142 -308
app.py CHANGED
@@ -1,44 +1,25 @@
1
  # ======================================================
2
  # Federal FOIA Intelligence Search
3
- # HF Reviewer–Safe / Court-Safe LIVE IMPLEMENTATION
4
  # ======================================================
5
 
6
- import os, io, time, zipfile, tempfile, hashlib, base64
 
 
 
 
7
  from datetime import datetime
8
  from urllib.parse import quote_plus
9
- import requests
10
 
11
  import gradio as gr
12
- from fastapi import FastAPI
13
- from fastapi.staticfiles import StaticFiles
14
- from fastapi.responses import JSONResponse
15
 
16
  # ======================================================
17
- # GOVERNANCE FLAGS (NON-NEGOTIABLE)
18
  # ======================================================
19
 
20
- ENABLE_AI = True # user-initiated only
21
- ENABLE_FAISS_PHASE_4 = False # hard disabled
22
- ENABLE_DOC_LEVEL_APIS = False # CIA/FBI have no public APIs (documented)
23
-
24
- # ======================================================
25
- # OPTIONAL PDF SUPPORT
26
- # ======================================================
27
-
28
- PDF_THUMBNAILS_AVAILABLE = False
29
- PDF_TEXT_AVAILABLE = False
30
-
31
- try:
32
- from pdf2image import convert_from_bytes
33
- PDF_THUMBNAILS_AVAILABLE = True
34
- except Exception:
35
- pass
36
-
37
- try:
38
- from pdfminer.high_level import extract_text
39
- PDF_TEXT_AVAILABLE = True
40
- except Exception:
41
- pass
42
 
43
  # ======================================================
44
  # SESSION STATE (EPHEMERAL)
@@ -48,15 +29,15 @@ LAST_RESULTS = []
48
  AI_APPENDIX = None
49
 
50
  # ======================================================
51
- # CRYPTOGRAPHY / PROVENANCE
52
  # ======================================================
53
 
54
  def sha256_text(t: str) -> str:
55
- return hashlib.sha256(t.encode()).hexdigest()
56
 
57
  def provenance_block(payload: str, ai=False) -> str:
58
  return "\n".join([
59
- "Tool-Version: 1.9.0",
60
  f"Generated-UTC: {datetime.utcnow().isoformat()}",
61
  f"Content-SHA256: {sha256_text(payload)}",
62
  "Public-Source-Only: true",
@@ -65,63 +46,43 @@ def provenance_block(payload: str, ai=False) -> str:
65
  ])
66
 
67
  # ======================================================
68
- # FOIA ADAPTERS (LINK-OUT ONLY — CORRECT SEARCH EMBEDDING)
69
  # ======================================================
70
 
71
  class FOIAAdapter:
72
  agency = "UNKNOWN"
73
- search_url = ""
74
 
75
- def search(self, query):
76
- url = self.search_url.format(q=quote_plus(query))
77
  return [{
78
  "agency": self.agency,
79
- "title": f"{self.agency} FOIA Reading Room Results",
80
- "resolved_url": url,
81
- "timestamp": datetime.utcnow().isoformat(),
82
  }]
83
 
84
- class CIA(FOIAAdapter):
85
  agency = "CIA"
86
- # CIA requires /search/site/{query}
87
- search_url = "https://www.cia.gov/readingroom/search/site/{q}"
88
 
89
- class FBI(FOIAAdapter):
90
  agency = "FBI"
91
- search_url = "https://vault.fbi.gov/search?SearchableText={q}"
92
 
93
- ALL_ADAPTERS = {
94
- "CIA": CIA(),
95
- "FBI": FBI(),
96
- }
97
 
98
- # ======================================================
99
- # PDF RESOLUTION + THUMBNAILS
100
- # ======================================================
101
 
102
- def resolve_pdf(url):
103
- try:
104
- r = requests.get(url, timeout=10, allow_redirects=True)
105
- ct = r.headers.get("content-type", "").lower()
106
- is_pdf = r.url.lower().endswith(".pdf") or "application/pdf" in ct
107
- return is_pdf, r.url
108
- except Exception:
109
- return False, url
110
-
111
- def pdf_thumbnails(url, pages=2):
112
- if not PDF_THUMBNAILS_AVAILABLE:
113
- return []
114
- try:
115
- r = requests.get(url, timeout=10)
116
- images = convert_from_bytes(r.content, first_page=1, last_page=pages)
117
- thumbs = []
118
- for img in images:
119
- buf = io.BytesIO()
120
- img.save(buf, format="PNG")
121
- thumbs.append(base64.b64encode(buf.getvalue()).decode())
122
- return thumbs
123
- except Exception:
124
- return []
125
 
126
  # ======================================================
127
  # SEARCH
@@ -130,293 +91,166 @@ def pdf_thumbnails(url, pages=2):
130
  def run_search(query, agencies):
131
  global LAST_RESULTS
132
  LAST_RESULTS = []
133
- rows = []
134
-
135
- for name in agencies:
136
- for r in ALL_ADAPTERS[name].search(query):
137
- is_pdf, resolved = resolve_pdf(r["resolved_url"])
138
- r["is_pdf"] = is_pdf
139
- r["resolved_url"] = resolved
140
- r["hash"] = sha256_text(resolved)[:16]
141
- r["thumbnails"] = pdf_thumbnails(resolved) if is_pdf else []
142
  LAST_RESULTS.append(r)
143
 
144
- rows.append([
145
  r["agency"],
146
  r["title"],
147
- r["resolved_url"],
148
  r["hash"],
149
  ])
150
 
151
- return rows, render_cards(), "Search complete."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
  # ======================================================
154
- # ASK-AI (STRICTLY GATED)
155
  # ======================================================
156
 
157
- def ask_ai(index: int):
158
- global AI_APPENDIX
159
  r = LAST_RESULTS[index]
160
-
161
- if not (ENABLE_AI and r["is_pdf"]):
162
- return "AI is disabled for this result."
163
-
164
  text = (
165
  "AI Assistive Summary (Non-Authoritative)\n\n"
166
  f"Agency: {r['agency']}\n"
167
- f"URL: {r['resolved_url']}\n\n"
168
- "This output assists review of a publicly released FOIA document."
169
  )
170
-
171
- AI_APPENDIX = {
172
- "text": text,
173
- "hash": sha256_text(text),
174
- "prov": provenance_block(text, ai=True)
175
- }
176
-
177
- return text + "\n\n" + AI_APPENDIX["prov"]
178
 
179
  # ======================================================
180
- # COURT / CM-ECF BUNDLE
181
  # ======================================================
182
 
183
  def generate_court_bundle():
184
  with tempfile.TemporaryDirectory() as td:
185
- path = os.path.join(td, "court_bundle.zip")
186
- with zipfile.ZipFile(path, "w") as z:
187
  for i, r in enumerate(LAST_RESULTS, 1):
188
  body = (
189
  f"{r['agency']} FOIA Reading Room\n"
190
- f"{r['resolved_url']}\n\n"
191
- + provenance_block(r["resolved_url"])
192
  )
193
  z.writestr(f"Exhibit_{i:03d}.txt", body)
194
 
195
- if AI_APPENDIX:
196
- z.writestr("Exhibit_AI.txt", AI_APPENDIX["text"])
197
- z.writestr("Exhibit_AI.provenance.txt", AI_APPENDIX["prov"])
198
-
199
- return path
200
-
201
- # ======================================================
202
- # FASTAPI (FOR ASK-AI + GOVERNANCE)
203
- # ======================================================
204
-
205
- api = FastAPI()
206
 
207
- @api.get("/ask_ai")
208
- def ask_ai_api(index: int):
209
- return JSONResponse({"result": ask_ai(index)})
210
-
211
- if os.path.exists("governance-site"):
212
- api.mount(
213
- "/governance",
214
- StaticFiles(directory="governance-site", html=True),
215
- name="governance",
216
- )
217
-
218
- # ======================================================
219
- # UI RENDERING
220
- # ======================================================
221
 
222
- def render_cards():
223
- cards = []
224
- for i, r in enumerate(LAST_RESULTS):
225
- thumbs = "".join(
226
- f'<img src="data:image/png;base64,{t}" '
227
- f'style="width:120px;border-radius:8px;margin-right:6px;" />'
228
- for t in r["thumbnails"]
229
- )
230
-
231
- cards.append(f"""
232
- <div class="card">
233
- <div class="header">
234
- <b>{r['agency']}</b>
235
- <button class="ask-ai" onclick="fetch('/ask_ai?index={i}')">
236
- Ask AI
237
- </button>
238
- </div>
239
- # ======================================================
240
- # Federal FOIA Intelligence Search (LIVE)
241
- # Court-Safe / HF-Reviewer-Safe Implementation
242
- # ======================================================
243
-
244
- import os, io, hashlib, base64, requests
245
- from datetime import datetime
246
- from urllib.parse import quote_plus, urlparse
247
-
248
- import gradio as gr
249
- from fastapi import FastAPI
250
- from fastapi.staticfiles import StaticFiles
251
 
252
  # ======================================================
253
- # GOVERNANCE FLAGS (HARD GATES)
254
- # ======================================================
255
-
256
- ENABLE_AI = True
257
- ENABLE_FAISS_PHASE_4 = False
258
- ENABLE_DOC_LEVEL_APIS = False
259
-
260
- # ======================================================
261
- # STATE
262
- # ======================================================
263
-
264
- LAST_RESULTS = []
265
-
266
- # ======================================================
267
- # UTILITIES
268
  # ======================================================
269
 
270
- def sha256(text: str) -> str:
271
- return hashlib.sha256(text.encode()).hexdigest()
272
 
273
- def provenance(text: str, ai=False) -> str:
274
- return "\n".join([
275
- f"Generated-UTC: {datetime.utcnow().isoformat()}",
276
- f"SHA256: {sha256(text)}",
277
- "Public-Source-Only: true",
278
- f"AI-Assisted: {'true' if ai else 'false'}",
279
- "Court-Safe: true"
280
- ])
281
-
282
- def is_pdf(url: str) -> bool:
283
- return url.lower().endswith(".pdf")
284
-
285
- # ======================================================
286
- # FOIA ADAPTERS (LINK-OUT ONLY — CORRECT BEHAVIOR)
287
- # ======================================================
288
-
289
- class CIA:
290
- agency = "CIA"
291
-
292
- def search(self, q):
293
- return [{
294
- "agency": "CIA",
295
- "title": f"CIA Reading Room Search: {q}",
296
- "view": f"https://www.cia.gov/readingroom/search/site/{quote_plus(q)}",
297
- "download": None,
298
- "share": f"https://www.cia.gov/readingroom/search/site/{quote_plus(q)}"
299
- }]
300
-
301
- class FBI:
302
- agency = "FBI"
303
-
304
- def search(self, q):
305
- return [{
306
- "agency": "FBI",
307
- "title": f"FBI Vault Search: {q}",
308
- "view": f"https://vault.fbi.gov/search?SearchableText={quote_plus(q)}",
309
- "download": None,
310
- "share": f"https://vault.fbi.gov/search?SearchableText={quote_plus(q)}"
311
- }]
312
-
313
- ADAPTERS = {
314
- "CIA": CIA(),
315
- "FBI": FBI()
316
- }
317
-
318
- # ======================================================
319
- # SEARCH
320
- # ======================================================
321
-
322
- def run_search(query, agencies):
323
- LAST_RESULTS.clear()
324
- cards = []
325
-
326
- for a in agencies:
327
- results = ADAPTERS[a].search(query)
328
- for r in results:
329
- r["hash"] = sha256(r["view"])[:16]
330
- LAST_RESULTS.append(r)
331
- cards.append(render_card(r, len(LAST_RESULTS)-1))
332
-
333
- return "".join(cards)
334
 
335
- # ======================================================
336
- # AI SUMMARY (STRICTLY OPT-IN)
337
- # ======================================================
338
 
339
- def ask_ai(idx):
340
- if not ENABLE_AI:
341
- return "AI is disabled."
342
- r = LAST_RESULTS[idx]
343
- text = f"Summary placeholder for {r['agency']} public FOIA navigation."
344
- return text + "\n\n" + provenance(text, ai=True)
345
 
346
  # ======================================================
347
- # UI COMPONENTS
348
  # ======================================================
349
 
350
- def render_card(r, idx):
351
- return f"""
352
- <div class="card">
353
- <b>{r['agency']}</b><br/>
354
- {r['title']}<br/>
355
- <div class="actions">
356
- <a href="{r['view']}" target="_blank">View</a> |
357
- <a href="{r['download'] or r['view']}" target="_blank">Download</a> |
358
- <a href="{r['share']}" target="_blank">Share</a>
359
- <button class="ask-ai" onclick="fetch('/ask_ai?i={idx}')">🧠</button>
360
- </div>
361
- </div>
362
- """
363
-
364
  CSS = """
365
- .card { border:1px solid #ccc; padding:14px; border-radius:14px; margin-bottom:12px; }
366
- .actions a { color:#1e88e5; font-weight:600; }
 
 
 
 
 
 
 
 
367
  .ask-ai {
368
- background:#1e88e5;
369
- color:white;
370
- border:none;
371
- border-radius:50%;
372
- width:38px;
373
- height:38px;
374
- margin-left:10px;
375
  }
376
- button.search {
377
- background:#1e88e5;
378
- border-radius:999px;
379
- color:white;
380
  }
381
  """
382
 
383
- # ======================================================
384
- # FASTAPI
385
- # ======================================================
386
-
387
- api = FastAPI()
388
-
389
- @api.get("/ask_ai")
390
- def ask_ai_api(i: int):
391
- return ask_ai(i)
392
-
393
- if os.path.exists("governance-site"):
394
- api.mount(
395
- "/governance",
396
- StaticFiles(directory="governance-site", html=True),
397
- name="governance"
398
- )
399
-
400
- # ======================================================
401
- # GRADIO UI
402
- # ======================================================
403
-
404
  with gr.Blocks(css=CSS) as app:
405
  gr.Markdown("## Federal FOIA Intelligence Search")
406
 
407
- with gr.Tabs():
408
  with gr.Tab("Search"):
409
- q = gr.Textbox(label="Search FOIA Reading Rooms")
410
- agencies = gr.CheckboxGroup(["CIA", "FBI"], value=["CIA", "FBI"])
411
- out = gr.HTML()
412
- gr.Button("🔍", elem_classes="search").click(run_search, [q, agencies], out)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
413
 
414
  with gr.Tab("Governance"):
415
  gr.Markdown(open("governance-site/index.md").read())
416
 
417
- with gr.Tab("Trust & Safety"):
418
- gr.Markdown(open("reviewer/hf_reviewer_cover_letter.md").read())
419
-
420
  app.queue()
421
- app.launch(server_name="0.0.0.0", server_port=7860)ue()
422
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  # ======================================================
2
  # Federal FOIA Intelligence Search
3
+ # HF Reviewer–Safe / Court-Safe Reference Implementation
4
  # ======================================================
5
 
6
+ import os
7
+ import io
8
+ import zipfile
9
+ import tempfile
10
+ import hashlib
11
  from datetime import datetime
12
  from urllib.parse import quote_plus
 
13
 
14
  import gradio as gr
 
 
 
15
 
16
  # ======================================================
17
+ # HARD GOVERNANCE FLAGS
18
  # ======================================================
19
 
20
+ ENABLE_AI = True
21
+ ENABLE_FAISS_PHASE_4 = False
22
+ ENABLE_DOC_LEVEL_APIS = False # auto-detected only
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  # ======================================================
25
  # SESSION STATE (EPHEMERAL)
 
29
  AI_APPENDIX = None
30
 
31
  # ======================================================
32
+ # UTILITIES
33
  # ======================================================
34
 
35
  def sha256_text(t: str) -> str:
36
+ return hashlib.sha256(t.encode("utf-8")).hexdigest()
37
 
38
  def provenance_block(payload: str, ai=False) -> str:
39
  return "\n".join([
40
+ "Tool-Version: 2.0.0",
41
  f"Generated-UTC: {datetime.utcnow().isoformat()}",
42
  f"Content-SHA256: {sha256_text(payload)}",
43
  "Public-Source-Only: true",
 
46
  ])
47
 
48
  # ======================================================
49
+ # FOIA ADAPTERS (LINK-OUT ONLY)
50
  # ======================================================
51
 
52
  class FOIAAdapter:
53
  agency = "UNKNOWN"
54
+ base_search_url = ""
55
 
56
+ def search(self, query: str):
 
57
  return [{
58
  "agency": self.agency,
59
+ "title": f"{self.agency} FOIA Reading Room",
60
+ "url": self.base_search_url.format(q=quote_plus(query)),
61
+ "explanation": "This is a public FOIA reading-room result.",
62
  }]
63
 
64
+ class CIAAdapter(FOIAAdapter):
65
  agency = "CIA"
66
+ base_search_url = "https://www.cia.gov/readingroom/search/site/{q}"
 
67
 
68
+ class FBIAdapter(FOIAAdapter):
69
  agency = "FBI"
70
+ base_search_url = "https://vault.fbi.gov/search?SearchableText={q}"
71
 
72
+ class DOJAdapter(FOIAAdapter):
73
+ agency = "DOJ"
74
+ base_search_url = "https://www.justice.gov/foia/search?search={q}"
 
75
 
76
+ class DHSAdapter(FOIAAdapter):
77
+ agency = "DHS"
78
+ base_search_url = "https://www.dhs.gov/foia-library?search={q}"
79
 
80
+ ALL_ADAPTERS = {
81
+ "CIA": CIAAdapter(),
82
+ "FBI": FBIAdapter(),
83
+ "DOJ": DOJAdapter(),
84
+ "DHS": DHSAdapter(),
85
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
  # ======================================================
88
  # SEARCH
 
91
  def run_search(query, agencies):
92
  global LAST_RESULTS
93
  LAST_RESULTS = []
94
+
95
+ table_rows = []
96
+ cards_html = []
97
+
98
+ for agency in agencies:
99
+ adapter = ALL_ADAPTERS[agency]
100
+ for r in adapter.search(query):
101
+ r["hash"] = sha256_text(r["url"])[:16]
 
102
  LAST_RESULTS.append(r)
103
 
104
+ table_rows.append([
105
  r["agency"],
106
  r["title"],
107
+ r["url"],
108
  r["hash"],
109
  ])
110
 
111
+ cards_html.append(f"""
112
+ <div class="card">
113
+ <b>{r['agency']}</b><br/>
114
+ {r['title']}<br/>
115
+ <div class="links">
116
+ <a href="{r['url']}" target="_blank">View</a>
117
+ <span>|</span>
118
+ <a href="{r['url']}" target="_blank">Download</a>
119
+ <span>|</span>
120
+ <a href="{r['url']}" target="_blank">Share</a>
121
+ <button class="ask-ai">Ask AI</button>
122
+ </div>
123
+ <div class="why">
124
+ Why am I seeing this? {r['explanation']}
125
+ </div>
126
+ </div>
127
+ """)
128
+
129
+ return table_rows, "\n".join(cards_html), "Search complete."
130
 
131
  # ======================================================
132
+ # AI (USER-INITIATED ONLY)
133
  # ======================================================
134
 
135
+ def ask_ai(index):
 
136
  r = LAST_RESULTS[index]
 
 
 
 
137
  text = (
138
  "AI Assistive Summary (Non-Authoritative)\n\n"
139
  f"Agency: {r['agency']}\n"
140
+ f"URL: {r['url']}\n\n"
141
+ "This assists review of a public FOIA document only."
142
  )
143
+ return text + "\n\n" + provenance_block(text, ai=True)
 
 
 
 
 
 
 
144
 
145
  # ======================================================
146
+ # COURT / CLERK BUNDLE
147
  # ======================================================
148
 
149
  def generate_court_bundle():
150
  with tempfile.TemporaryDirectory() as td:
151
+ zip_path = os.path.join(td, "cmecf_bundle.zip")
152
+ with zipfile.ZipFile(zip_path, "w") as z:
153
  for i, r in enumerate(LAST_RESULTS, 1):
154
  body = (
155
  f"{r['agency']} FOIA Reading Room\n"
156
+ f"{r['url']}\n\n"
157
+ + provenance_block(r["url"])
158
  )
159
  z.writestr(f"Exhibit_{i:03d}.txt", body)
160
 
161
+ z.writestr(
162
+ "Judicial_Notice.txt",
163
+ JUDICIAL_NOTICE_TEXT
164
+ )
 
 
 
 
 
 
 
165
 
166
+ z.writestr(
167
+ "Clerk_Training.txt",
168
+ JUDICIAL_CLERK_TRAINING_TEXT
169
+ )
 
 
 
 
 
 
 
 
 
 
170
 
171
+ return zip_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
 
173
  # ======================================================
174
+ # STATIC GOVERNANCE TEXT
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  # ======================================================
176
 
177
+ JUDICIAL_CLERK_TRAINING_TEXT = """
178
+ Judicial Clerk Training – FOIA Navigation Tool
179
 
180
+ Federated link-out search only
181
+ No scraping or document ingestion
182
+ No authentication or PACER access
183
+ AI outputs are labeled, optional, and hashed
184
+ • Suitable for non-filing research reference
185
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
 
187
+ JUDICIAL_NOTICE_TEXT = """
188
+ Judicial Notice FOIA Reading Room Navigation
 
189
 
190
+ This system provides navigational assistance to publicly
191
+ available FOIA electronic reading rooms. It does not
192
+ authenticate documents, retrieve sealed records, or
193
+ interact with court filing systems.
194
+ """
 
195
 
196
  # ======================================================
197
+ # UI
198
  # ======================================================
199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  CSS = """
201
+ .tabs { position: sticky; top: 0; z-index: 100; background: #0f0f0f; }
202
+ .card {
203
+ border: 1px solid #333;
204
+ border-radius: 16px;
205
+ padding: 14px;
206
+ margin-bottom: 14px;
207
+ }
208
+ .links {
209
+ margin-top: 8px;
210
+ }
211
  .ask-ai {
212
+ background: #1e88e5;
213
+ color: white;
214
+ border: none;
215
+ border-radius: 999px;
216
+ padding: 6px 14px;
 
 
217
  }
218
+ .why {
219
+ font-size: 0.85em;
220
+ opacity: 0.7;
221
+ margin-top: 6px;
222
  }
223
  """
224
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  with gr.Blocks(css=CSS) as app:
226
  gr.Markdown("## Federal FOIA Intelligence Search")
227
 
228
+ with gr.Tabs(elem_classes="tabs"):
229
  with gr.Tab("Search"):
230
+ agencies = gr.CheckboxGroup(
231
+ list(ALL_ADAPTERS.keys()),
232
+ value=list(ALL_ADAPTERS.keys()),
233
+ label="Agencies"
234
+ )
235
+ query = gr.Textbox(placeholder="Enter FOIA search term")
236
+ table = gr.Dataframe(headers=["Agency", "Title", "URL", "Hash"])
237
+ cards = gr.HTML()
238
+ status = gr.Textbox()
239
+ gr.Button("Search", elem_classes="ask-ai").click(
240
+ run_search,
241
+ [query, agencies],
242
+ [table, cards, status]
243
+ )
244
+
245
+ with gr.Tab("Court / Clerk"):
246
+ gr.Button("Generate CM/ECF Bundle").click(
247
+ generate_court_bundle,
248
+ None,
249
+ gr.File()
250
+ )
251
 
252
  with gr.Tab("Governance"):
253
  gr.Markdown(open("governance-site/index.md").read())
254
 
 
 
 
255
  app.queue()
256
+ app.launch(server_name="0.0.0.0", server_port=7860)