GodsDevProject commited on
Commit
5daaf8c
·
verified ·
1 Parent(s): 59314d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -167
app.py CHANGED
@@ -4,33 +4,28 @@
4
  # ======================================================
5
 
6
  import gradio as gr
7
- import time, hashlib, io, zipfile, os, tempfile, base64, json
8
  from datetime import datetime
9
  from urllib.parse import quote_plus
10
- import requests
11
 
12
- from fastapi import FastAPI, Response
 
13
  from fastapi.staticfiles import StaticFiles
14
-
15
- from reportlab.platypus import SimpleDocTemplate, Paragraph, PageBreak
16
- from reportlab.lib.styles import getSampleStyleSheet
17
- from reportlab.lib.pagesizes import LETTER
18
 
19
  # ======================================================
20
  # HARD GOVERNANCE FLAGS (NON-NEGOTIABLE)
21
  # ======================================================
22
 
23
- ENABLE_FAISS_PHASE_4 = False # DEFAULT OFF – requires formal approval
24
- ENABLE_AI = True # USER OPT-IN ONLY
25
- ENABLE_PDF_EXTRACTION = True # USER OPT-IN ONLY
26
- ENABLE_DOC_LEVEL_APIS = False # API-ONLY, OFF BY DEFAULT
27
 
28
  # ======================================================
29
  # SESSION STATE (EPHEMERAL)
30
  # ======================================================
31
 
32
  LAST_RESULTS = []
33
- AI_APPENDICES = []
34
  SELECTED_INDEX = None
35
 
36
  # ======================================================
@@ -38,7 +33,7 @@ SELECTED_INDEX = None
38
  # ======================================================
39
 
40
  def sha256_text(t: str):
41
- return hashlib.sha256(t.encode("utf-8")).hexdigest()
42
 
43
  def citation_hash(r):
44
  return hashlib.sha256(
@@ -47,43 +42,23 @@ def citation_hash(r):
47
 
48
  def provenance_headers(payload: str):
49
  return {
50
- "Tool-Version": "1.8.0",
51
  "Generated-UTC": datetime.utcnow().isoformat(),
52
  "Content-SHA256": sha256_text(payload),
53
  "Public-Source-Only": "true",
54
  "AI-Assisted": "formatting-only",
55
- "Court-Safe": "true",
56
  }
57
 
58
  def render_provenance_block(text: str):
59
  return "\n".join(f"{k}: {v}" for k, v in provenance_headers(text).items())
60
 
61
  # ======================================================
62
- # PHASE-4 FAISS (HARD GATED)
63
- # ======================================================
64
-
65
- class Phase4FAISS:
66
- def __init__(self):
67
- if not ENABLE_FAISS_PHASE_4:
68
- raise RuntimeError(
69
- "Phase-4 FAISS indexing is disabled pending formal approval."
70
- )
71
- self.index = {}
72
-
73
- def add_document(self, doc_id, text):
74
- self.index[doc_id] = text
75
-
76
- def search(self, query):
77
- return [] # intentionally non-operational until approved
78
-
79
- # ======================================================
80
- # FOIA ADAPTERS (LIVE LINK-OUT + API-READY)
81
  # ======================================================
82
 
83
  class FOIAAdapter:
84
  agency = "UNKNOWN"
85
  search_url = ""
86
- api_endpoint = None # API-ONLY when available
87
 
88
  def search(self, query):
89
  start = time.time()
@@ -98,24 +73,13 @@ class FOIAAdapter:
98
  "sealed": False,
99
  }]
100
 
101
- def api_ingest(self, query):
102
- if not ENABLE_DOC_LEVEL_APIS or not self.api_endpoint:
103
- return []
104
- try:
105
- r = requests.get(self.api_endpoint, params={"q": query}, timeout=10)
106
- return r.json().get("documents", [])
107
- except Exception:
108
- return []
109
-
110
  class CIA(FOIAAdapter):
111
  agency = "CIA"
112
  search_url = "https://www.cia.gov/readingroom/search/site/{q}"
113
- api_endpoint = None # published when CIA releases API
114
 
115
  class FBI(FOIAAdapter):
116
  agency = "FBI"
117
  search_url = "https://vault.fbi.gov/search?SearchableText={q}"
118
- api_endpoint = None # placeholder for FBI API
119
 
120
  class DOJ(FOIAAdapter):
121
  agency = "DOJ"
@@ -148,7 +112,7 @@ ALL_ADAPTERS = {
148
 
149
  def resolve_pdf_url(url):
150
  try:
151
- r = requests.get(url, timeout=10, allow_redirects=True)
152
  ct = r.headers.get("content-type", "").lower()
153
  is_pdf = r.url.lower().endswith(".pdf") or "application/pdf" in ct
154
  return is_pdf, r.url
@@ -161,8 +125,8 @@ def resolve_pdf_url(url):
161
 
162
  def run_search(query, agencies):
163
  global LAST_RESULTS, SELECTED_INDEX
164
- LAST_RESULTS = []
165
  SELECTED_INDEX = None
 
166
  rows = []
167
 
168
  for name in agencies:
@@ -179,52 +143,30 @@ def run_search(query, agencies):
179
  f"{r['latency_ms']} ms",
180
  ])
181
 
182
- return rows, render_cards(), "No document selected"
183
 
184
  # ======================================================
185
- # AI GOVERNANCE + HASHED OUTPUT
186
  # ======================================================
187
 
188
  def can_enable_ai(r):
189
  return (
190
  ENABLE_AI
191
  and r.get("resolved_pdf", False)
 
192
  and not r.get("sealed", False)
193
  )
194
 
195
- def ask_ai_for_document(index):
196
- global SELECTED_INDEX, AI_APPENDICES
197
- SELECTED_INDEX = index
198
  r = LAST_RESULTS[index]
199
-
200
- ai_text = (
201
- "AI-ASSISTED REFERENCE SUMMARY\n\n"
202
- f"Agency: {r['agency']}\n"
203
- f"Source URL: {r['resolved_url']}\n\n"
204
- "This content is assistive, non-authoritative, "
205
- "and not offered as evidence or legal analysis."
206
- )
207
-
208
- ai_hash = sha256_text(ai_text)
209
- provenance = render_provenance_block(ai_text)
210
-
211
- appendix = {
212
- "index": index,
213
- "text": ai_text,
214
- "hash": ai_hash,
215
- "provenance": provenance,
216
  }
217
 
218
- AI_APPENDICES.append(appendix)
219
-
220
- return (
221
- ai_text
222
- + "\n\n---\nAI HASH:\n"
223
- + ai_hash
224
- + "\n\nPROVENANCE:\n"
225
- + provenance
226
- )
227
-
228
  # ======================================================
229
  # RENDER RESULT CARDS
230
  # ======================================================
@@ -232,81 +174,60 @@ def ask_ai_for_document(index):
232
  def render_cards():
233
  cards = []
234
  for idx, r in enumerate(LAST_RESULTS):
235
- enabled = can_enable_ai(r)
236
  cards.append(f"""
237
  <div class="card">
238
  <div class="card-header">
239
  <strong>{r['agency']}</strong>
240
  <button class="ask-ai"
241
- onclick="window.askAI({idx})"
242
- {"disabled" if not enabled else ""}>
243
  Ask AI
244
  </button>
245
  </div>
246
  <div><b>{r['title']}</b></div>
247
  <div class="actions">
248
  <a href="{r['resolved_url']}" target="_blank">View Source</a>
 
 
 
 
249
  </div>
250
  </div>
251
  """)
252
- return "".join(cards) or "No results found."
253
 
254
  # ======================================================
255
- # COURT / CM-ECF BUNDLE (AI SEPARATED)
256
  # ======================================================
257
 
258
  def generate_court_bundle():
259
- ecf_no = f"ECF-PREFILE-{datetime.utcnow().strftime('%Y%m%d-%H%M%S')}"
260
- with tempfile.TemporaryDirectory() as td:
261
- zpath = os.path.join(td, "court_bundle.zip")
262
- with zipfile.ZipFile(zpath, "w") as z:
263
-
264
  for i, r in enumerate(LAST_RESULTS, 1):
265
- content = (
266
- f"{r['agency']} FOIA Reading Room\n"
267
- f"{r['resolved_url']}\n\n"
268
- f"{render_provenance_block(r['resolved_url'])}"
269
- )
270
- z.writestr(f"Exhibit_{i:03d}.txt", content)
271
- z.writestr(f"Exhibit_{i:03d}.sha256", r["hash"])
272
-
273
- for j, a in enumerate(AI_APPENDICES, 1):
274
  z.writestr(
275
- f"AI_Appendix_{j:03d}.txt",
276
- a["text"] + "\n\n" + a["provenance"],
277
  )
278
- z.writestr(
279
- f"AI_Appendix_{j:03d}.sha256",
280
- a["hash"],
281
- )
282
-
283
- z.writestr(
284
- "HF_Reviewer_Cover_Letter.txt",
285
- "This application indexes public FOIA materials only.\n"
286
- "AI output is segregated, hashed, disclosed, and non-evidentiary."
287
- )
288
-
289
- z.writestr(
290
- "Judicial_Clerk_Training_Notes.txt",
291
- "• FOIA sources only\n"
292
- "• Verify URL + hash\n"
293
- "• AI appendices are informational only\n"
294
- )
295
-
296
- z.writestr(
297
- "Trust_and_Safety_Justification.txt",
298
- "HF Trust & Safety Review:\n"
299
- "No private data, no training on user content, no deception."
300
- )
301
-
302
- return zpath
303
 
304
  # ======================================================
305
- # FASTAPI MOUNT (GOVERNANCE SITE)
306
  # ======================================================
307
 
308
  fastapi_app = FastAPI()
309
 
 
 
 
 
 
 
 
 
 
 
 
310
  if os.path.exists("governance-site"):
311
  fastapi_app.mount(
312
  "/gov",
@@ -314,51 +235,53 @@ if os.path.exists("governance-site"):
314
  name="governance",
315
  )
316
 
317
- @fastapi_app.get("/ask_ai")
318
- def ask_ai_endpoint(index: int):
319
- return Response(ask_ai_for_document(index), media_type="text/plain")
320
-
321
  # ======================================================
322
  # UI
323
  # ======================================================
324
 
325
  CSS = """
326
- .card { border:1px solid #2a2a2a; border-radius:18px; padding:18px;
327
- margin-bottom:22px; background:#0f0f0f; }
328
- .card-header { display:flex; justify-content:space-between; }
329
- .ask-ai { background:#1e88e5; color:white; border:none;
330
- padding:6px 16px; border-radius:999px; }
331
- .ask-ai:disabled { background:#555; }
332
  """
333
 
334
- with gr.Blocks(css=CSS, title="Federal FOIA Intelligence Search") as app:
335
- gr.Markdown("## Federal FOIA Intelligence Search\nPublic FOIA sources only")
336
-
337
- with gr.Tab("Search"):
338
- agencies = gr.CheckboxGroup(list(ALL_ADAPTERS.keys()),
339
- value=list(ALL_ADAPTERS.keys()))
340
- query = gr.Textbox()
341
- table = gr.Dataframe(headers=["Agency","Title","URL","Hash","Latency"])
342
- gallery = gr.HTML()
343
- status = gr.Textbox(lines=10)
344
- gr.Button("Search").click(run_search, [query, agencies],
345
- [table, gallery, status])
346
-
347
- with gr.Tab("Court / CM-ECF"):
348
- gr.File(label="Download Court Bundle").upload(
349
- lambda: generate_court_bundle()
350
- )
351
-
352
- with gr.Tab("Trust & Governance"):
353
- gr.HTML(
354
- "<iframe src='/gov/index.html' "
355
- "style='width:100%;height:700px;border:1px solid #ccc'></iframe>"
356
- )
357
-
358
- app = gr.mount_gradio_app(fastapi_app, app, path="/")
359
-
360
- app.js = """
361
- window.askAI = function(idx) {
362
- fetch('/ask_ai?index=' + idx)
363
  }
364
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  # ======================================================
5
 
6
  import gradio as gr
7
+ import time, hashlib, zipfile, os, tempfile
8
  from datetime import datetime
9
  from urllib.parse import quote_plus
 
10
 
11
+ import requests
12
+ from fastapi import FastAPI, Query
13
  from fastapi.staticfiles import StaticFiles
14
+ from fastapi.responses import JSONResponse, FileResponse
 
 
 
15
 
16
  # ======================================================
17
  # HARD GOVERNANCE FLAGS (NON-NEGOTIABLE)
18
  # ======================================================
19
 
20
+ ENABLE_FAISS_PHASE_4 = False # HARD DISABLED
21
+ ENABLE_AI = True # USER OPT-IN ONLY
22
+ ENABLE_PDF_EXTRACTION = True # USER OPT-IN ONLY
 
23
 
24
  # ======================================================
25
  # SESSION STATE (EPHEMERAL)
26
  # ======================================================
27
 
28
  LAST_RESULTS = []
 
29
  SELECTED_INDEX = None
30
 
31
  # ======================================================
 
33
  # ======================================================
34
 
35
  def sha256_text(t: str):
36
+ return hashlib.sha256(t.encode()).hexdigest()
37
 
38
  def citation_hash(r):
39
  return hashlib.sha256(
 
42
 
43
  def provenance_headers(payload: str):
44
  return {
45
+ "Tool-Version": "1.7.1",
46
  "Generated-UTC": datetime.utcnow().isoformat(),
47
  "Content-SHA256": sha256_text(payload),
48
  "Public-Source-Only": "true",
49
  "AI-Assisted": "formatting-only",
 
50
  }
51
 
52
  def render_provenance_block(text: str):
53
  return "\n".join(f"{k}: {v}" for k, v in provenance_headers(text).items())
54
 
55
  # ======================================================
56
+ # FOIA ADAPTERS (LINK-OUT ONLY)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  # ======================================================
58
 
59
  class FOIAAdapter:
60
  agency = "UNKNOWN"
61
  search_url = ""
 
62
 
63
  def search(self, query):
64
  start = time.time()
 
73
  "sealed": False,
74
  }]
75
 
 
 
 
 
 
 
 
 
 
76
  class CIA(FOIAAdapter):
77
  agency = "CIA"
78
  search_url = "https://www.cia.gov/readingroom/search/site/{q}"
 
79
 
80
  class FBI(FOIAAdapter):
81
  agency = "FBI"
82
  search_url = "https://vault.fbi.gov/search?SearchableText={q}"
 
83
 
84
  class DOJ(FOIAAdapter):
85
  agency = "DOJ"
 
112
 
113
  def resolve_pdf_url(url):
114
  try:
115
+ r = requests.get(url, timeout=8, allow_redirects=True)
116
  ct = r.headers.get("content-type", "").lower()
117
  is_pdf = r.url.lower().endswith(".pdf") or "application/pdf" in ct
118
  return is_pdf, r.url
 
125
 
126
  def run_search(query, agencies):
127
  global LAST_RESULTS, SELECTED_INDEX
 
128
  SELECTED_INDEX = None
129
+ LAST_RESULTS = []
130
  rows = []
131
 
132
  for name in agencies:
 
143
  f"{r['latency_ms']} ms",
144
  ])
145
 
146
+ return rows, render_cards(), "No document selected."
147
 
148
  # ======================================================
149
+ # ASK-AI GOVERNANCE GATE
150
  # ======================================================
151
 
152
  def can_enable_ai(r):
153
  return (
154
  ENABLE_AI
155
  and r.get("resolved_pdf", False)
156
+ and r.get("resolved_url", "").lower().endswith(".pdf")
157
  and not r.get("sealed", False)
158
  )
159
 
160
+ def ask_ai_for_document(index: int):
 
 
161
  r = LAST_RESULTS[index]
162
+ return {
163
+ "status": "enabled",
164
+ "agency": r["agency"],
165
+ "title": r["title"],
166
+ "url": r["resolved_url"],
167
+ "provenance": provenance_headers(r["resolved_url"]),
 
 
 
 
 
 
 
 
 
 
 
168
  }
169
 
 
 
 
 
 
 
 
 
 
 
170
  # ======================================================
171
  # RENDER RESULT CARDS
172
  # ======================================================
 
174
  def render_cards():
175
  cards = []
176
  for idx, r in enumerate(LAST_RESULTS):
 
177
  cards.append(f"""
178
  <div class="card">
179
  <div class="card-header">
180
  <strong>{r['agency']}</strong>
181
  <button class="ask-ai"
182
+ onclick="askAI({idx})"
183
+ {"disabled" if not can_enable_ai(r) else ""}>
184
  Ask AI
185
  </button>
186
  </div>
187
  <div><b>{r['title']}</b></div>
188
  <div class="actions">
189
  <a href="{r['resolved_url']}" target="_blank">View Source</a>
190
+ {"<a href='"+r['resolved_url']+"' target='_blank'>Preview PDF</a>" if r["resolved_pdf"] else ""}
191
+ </div>
192
+ <div class="helper">
193
+ Why am I seeing this? This is a publicly released FOIA document.
194
  </div>
195
  </div>
196
  """)
197
+ return "".join(cards) or "No results."
198
 
199
  # ======================================================
200
+ # COURT BUNDLE
201
  # ======================================================
202
 
203
  def generate_court_bundle():
204
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as tf:
205
+ with zipfile.ZipFile(tf.name, "w") as z:
 
 
 
206
  for i, r in enumerate(LAST_RESULTS, 1):
 
 
 
 
 
 
 
 
 
207
  z.writestr(
208
+ f"Exhibit_{i:03d}.txt",
209
+ f"{r['agency']}\n{r['resolved_url']}\n\n{render_provenance_block(r['resolved_url'])}"
210
  )
211
+ z.writestr(f"Exhibit_{i:03d}.sha256", r["hash"])
212
+ return tf.name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
 
214
  # ======================================================
215
+ # FASTAPI CORE
216
  # ======================================================
217
 
218
  fastapi_app = FastAPI()
219
 
220
+ @fastapi_app.get("/ask_ai")
221
+ def ask_ai(index: int = Query(...)):
222
+ if index >= len(LAST_RESULTS):
223
+ return JSONResponse({"error": "Invalid index"}, status_code=400)
224
+ return ask_ai_for_document(index)
225
+
226
+ @fastapi_app.get("/court_bundle")
227
+ def court_bundle():
228
+ path = generate_court_bundle()
229
+ return FileResponse(path, filename="court_bundle.zip")
230
+
231
  if os.path.exists("governance-site"):
232
  fastapi_app.mount(
233
  "/gov",
 
235
  name="governance",
236
  )
237
 
 
 
 
 
238
  # ======================================================
239
  # UI
240
  # ======================================================
241
 
242
  CSS = """
243
+ .card {border:1px solid #2a2a2a;border-radius:18px;padding:18px;margin-bottom:22px;background:#0f0f0f;}
244
+ .card-header {display:flex;justify-content:space-between;align-items:center;}
245
+ .ask-ai {background:linear-gradient(135deg,#1e88e5,#1565c0);color:white;border:none;padding:6px 16px;border-radius:999px;font-weight:600;}
246
+ .ask-ai:disabled {background:#555;}
247
+ .helper {font-size:0.75rem;color:#aaa;margin-top:6px;}
248
+ .actions {margin-top:12px;display:flex;gap:16px;}
249
  """
250
 
251
+ JS = """
252
+ function askAI(idx){
253
+ fetch('/ask_ai?index=' + idx)
254
+ .then(r=>r.json())
255
+ .then(d=>alert('AI Enabled for: ' + d.title));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
  }
257
+ """
258
+
259
+ with gr.Blocks() as gradio_ui:
260
+ gr.Markdown("## Federal FOIA Intelligence Search")
261
+ agencies = gr.CheckboxGroup(choices=list(ALL_ADAPTERS.keys()), value=list(ALL_ADAPTERS.keys()))
262
+ query = gr.Textbox(placeholder="Search terms")
263
+ table = gr.Dataframe(headers=["Agency","Title","URL","Hash","Latency"])
264
+ gallery = gr.HTML()
265
+ status = gr.Textbox(lines=4)
266
+ gr.Button("Search").click(run_search,[query,agencies],[table,gallery,status])
267
+
268
+ gr.Markdown("### Court")
269
+ gr.Button("Download Court Bundle").click(
270
+ lambda: "/court_bundle",
271
+ None,
272
+ gr.File()
273
+ )
274
+
275
+ # ======================================================
276
+ # MOUNT + LAUNCH
277
+ # ======================================================
278
+
279
+ app = gr.mount_gradio_app(fastapi_app, gradio_ui, path="/")
280
+
281
+ if __name__ == "__main__":
282
+ gradio_ui.launch(
283
+ css=CSS,
284
+ js=JS,
285
+ title="Federal FOIA Intelligence Search",
286
+ show_error=True,
287
+ )