GodsDevProject commited on
Commit
737ab27
·
verified ·
1 Parent(s): ec3be23

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +173 -103
app.py CHANGED
@@ -3,61 +3,86 @@
3
  # HF Reviewer–Safe Reference Implementation
4
  # ======================================================
5
 
6
- import gradio as gr
7
- import time, hashlib, zipfile, os, tempfile
8
  from datetime import datetime
9
  from urllib.parse import quote_plus
10
-
11
  import requests
12
- from fastapi import FastAPI, Query
 
 
13
  from fastapi.staticfiles import StaticFiles
14
- from fastapi.responses import JSONResponse, FileResponse
 
 
 
 
15
 
16
  # ======================================================
17
  # HARD GOVERNANCE FLAGS (NON-NEGOTIABLE)
18
  # ======================================================
19
 
20
- ENABLE_FAISS_PHASE_4 = False
21
- ENABLE_AI = True
 
 
22
 
23
  # ======================================================
24
- # SESSION STATE
25
  # ======================================================
26
 
27
  LAST_RESULTS = []
 
 
28
 
29
  # ======================================================
30
  # CRYPTOGRAPHIC CORE
31
  # ======================================================
32
 
33
- def sha256_text(t: str):
34
  return hashlib.sha256(t.encode()).hexdigest()
35
 
36
- def citation_hash(r):
37
  return hashlib.sha256(
38
  f"{r['agency']}|{r['resolved_url']}|{r['timestamp']}".encode()
39
  ).hexdigest()[:16]
40
 
41
- def provenance_headers(payload: str):
42
  return {
43
- "Tool-Version": "1.7.2",
44
  "Generated-UTC": datetime.utcnow().isoformat(),
45
  "Content-SHA256": sha256_text(payload),
46
  "Public-Source-Only": "true",
47
- "AI-Assisted": "user-initiated-only",
 
48
  }
49
 
 
 
 
 
 
50
  # ======================================================
51
- # FOIA ADAPTERS (LINK-OUT ONLY)
 
 
 
 
 
 
 
 
 
 
 
52
  # ======================================================
53
 
54
  class FOIAAdapter:
55
- agency = ""
56
  search_url = ""
57
 
58
- def search(self, q):
59
  start = time.time()
60
- url = self.search_url.format(q=quote_plus(q))
61
  latency = round((time.time() - start) * 1000, 1)
62
  return [{
63
  "agency": self.agency,
@@ -80,21 +105,37 @@ class DOJ(FOIAAdapter):
80
  agency = "DOJ"
81
  search_url = "https://www.justice.gov/foia/library?search={q}"
82
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  ALL_ADAPTERS = {
84
  "CIA": CIA(),
85
  "FBI": FBI(),
86
  "DOJ": DOJ(),
 
 
 
87
  }
88
 
89
  # ======================================================
90
- # PDF RESOLUTION (SAFE HEAD REQUEST)
91
  # ======================================================
92
 
93
- def resolve_pdf_url(url):
94
  try:
95
- r = requests.head(url, timeout=8, allow_redirects=True)
96
  ct = r.headers.get("content-type", "").lower()
97
- return ("pdf" in ct or url.lower().endswith(".pdf")), r.url
 
98
  except Exception:
99
  return False, url
100
 
@@ -103,16 +144,17 @@ def resolve_pdf_url(url):
103
  # ======================================================
104
 
105
  def run_search(query, agencies):
106
- global LAST_RESULTS
 
107
  LAST_RESULTS = []
108
  rows = []
109
 
110
  for name in agencies:
111
- for r in ALL_ADAPTERS[name].search(query):
 
112
  r["resolved_pdf"], r["resolved_url"] = resolve_pdf_url(r["url"])
113
  r["hash"] = citation_hash(r)
114
  LAST_RESULTS.append(r)
115
-
116
  rows.append([
117
  r["agency"],
118
  r["title"],
@@ -121,139 +163,167 @@ def run_search(query, agencies):
121
  f"{r['latency_ms']} ms",
122
  ])
123
 
124
- return rows, render_cards()
125
 
126
  # ======================================================
127
- # ASK-AI GOVERNANCE GATE
128
  # ======================================================
129
 
130
  def can_enable_ai(r):
131
  return (
132
  ENABLE_AI
133
- and r["resolved_pdf"]
134
- and r["resolved_url"].lower().endswith(".pdf")
135
- and not r["sealed"]
136
  )
137
 
138
- def ask_ai_for_document(index: int):
 
139
  r = LAST_RESULTS[index]
140
- return {
141
- "status": "enabled",
142
- "agency": r["agency"],
143
- "title": r["title"],
144
- "url": r["resolved_url"],
145
- "provenance": provenance_headers(r["resolved_url"]),
 
 
 
 
 
 
 
 
 
146
  }
147
 
 
 
148
  # ======================================================
149
- # RESULT CARDS
150
  # ======================================================
151
 
152
  def render_cards():
153
- html = []
154
- for i, r in enumerate(LAST_RESULTS):
155
- html.append(f"""
 
156
  <div class="card">
157
  <div class="card-header">
158
- <b>{r['agency']}</b>
159
  <button class="ask-ai"
160
- onclick="askAI({i})"
161
- {"disabled" if not can_enable_ai(r) else ""}>
162
  Ask AI
163
  </button>
164
  </div>
165
- <div>{r['title']}</div>
166
  <div class="actions">
167
- <a href="{r['resolved_url']}" target="_blank">View</a>
168
- <a href="{r['resolved_url']}" download>Download</a>
169
- <a href="{r['resolved_url']}" target="_blank">Share</a>
170
  </div>
171
  </div>
172
  """)
173
- return "".join(html) or "No results."
174
 
175
  # ======================================================
176
- # COURT BUNDLE
177
  # ======================================================
178
 
179
  def generate_court_bundle():
180
- with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as tf:
181
- with zipfile.ZipFile(tf.name, "w") as z:
 
 
182
  for i, r in enumerate(LAST_RESULTS, 1):
183
- z.writestr(
184
- f"Exhibit_{i:03d}.txt",
185
- f"{r['agency']}\n{r['resolved_url']}"
 
186
  )
 
187
  z.writestr(f"Exhibit_{i:03d}.sha256", r["hash"])
188
- return tf.name
 
 
 
 
 
 
 
 
189
 
190
  # ======================================================
191
- # FASTAPI CORE
192
  # ======================================================
193
 
194
  fastapi_app = FastAPI()
195
 
196
  @fastapi_app.get("/ask_ai")
197
- def ask_ai(index: int = Query(...)):
198
- if index >= len(LAST_RESULTS):
199
- return JSONResponse({"error": "Invalid index"}, status_code=400)
200
- return ask_ai_for_document(index)
201
-
202
- @fastapi_app.get("/court_bundle")
203
- def court_bundle():
204
- path = generate_court_bundle()
205
- return FileResponse(path, filename="court_bundle.zip")
206
 
207
  # ======================================================
208
  # UI
209
  # ======================================================
210
 
211
  CSS = """
212
- .card {border:1px solid #2a2a2a;border-radius:16px;padding:16px;margin-bottom:16px;}
213
- .card-header {display:flex;justify-content:space-between;}
214
- .ask-ai {background:#1e88e5;color:white;border:none;border-radius:999px;padding:6px 14px;}
215
- .actions {margin-top:8px;display:flex;gap:16px;}
216
- """
217
-
218
- JS = """
219
- function askAI(idx){
220
- fetch('/ask_ai?index=' + idx)
221
- .then(r=>r.json())
222
- .then(d=>alert('AI ready for: ' + d.title));
223
  }
 
 
224
  """
225
 
226
  with gr.Blocks() as gradio_ui:
227
- gr.Markdown("## Federal FOIA Intelligence Search")
228
- agencies = gr.CheckboxGroup(
229
- choices=list(ALL_ADAPTERS.keys()),
230
- value=list(ALL_ADAPTERS.keys())
231
- )
232
- query = gr.Textbox(placeholder="Search FOIA reading rooms")
233
- table = gr.Dataframe(headers=["Agency","Title","URL","Hash","Latency"])
234
- gallery = gr.HTML()
235
-
236
- gr.Button("Search").click(
237
- run_search,
238
- inputs=[query, agencies],
239
- outputs=[table, gallery]
240
  )
241
 
242
- gr.Button("Download Court Bundle").click(
243
- lambda: "/court_bundle",
244
- None,
245
- gr.File()
246
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
 
248
  # ======================================================
249
- # MOUNT + LAUNCH
250
  # ======================================================
251
 
252
- app = gr.mount_gradio_app(fastapi_app, gradio_ui, path="/")
253
-
254
- if __name__ == "__main__":
255
- gradio_ui.launch(
256
- css=CSS,
257
- js=JS,
258
- show_error=True,
259
- )
 
3
  # HF Reviewer–Safe Reference Implementation
4
  # ======================================================
5
 
6
+ import time, hashlib, io, zipfile, os, tempfile, base64
 
7
  from datetime import datetime
8
  from urllib.parse import quote_plus
 
9
  import requests
10
+
11
+ import gradio as gr
12
+ from fastapi import FastAPI
13
  from fastapi.staticfiles import StaticFiles
14
+ from fastapi.responses import JSONResponse
15
+
16
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, PageBreak
17
+ from reportlab.lib.styles import getSampleStyleSheet
18
+ from reportlab.lib.pagesizes import LETTER
19
 
20
  # ======================================================
21
  # HARD GOVERNANCE FLAGS (NON-NEGOTIABLE)
22
  # ======================================================
23
 
24
+ ENABLE_FAISS_PHASE_4 = False # REQUIRES FORMAL APPROVAL
25
+ ENABLE_AI = True # USER OPT-IN ONLY
26
+ ENABLE_PDF_EXTRACTION = True # USER OPT-IN ONLY
27
+ ENABLE_DOC_LEVEL_APIS = False # API-ONLY (CIA/FBI WHEN PUBLISHED)
28
 
29
  # ======================================================
30
+ # SESSION STATE (EPHEMERAL)
31
  # ======================================================
32
 
33
  LAST_RESULTS = []
34
+ SELECTED_INDEX = None
35
+ AI_APPENDIX = None
36
 
37
  # ======================================================
38
  # CRYPTOGRAPHIC CORE
39
  # ======================================================
40
 
41
+ def sha256_text(t: str) -> str:
42
  return hashlib.sha256(t.encode()).hexdigest()
43
 
44
+ def citation_hash(r: dict) -> str:
45
  return hashlib.sha256(
46
  f"{r['agency']}|{r['resolved_url']}|{r['timestamp']}".encode()
47
  ).hexdigest()[:16]
48
 
49
+ def provenance_headers(payload: str, ai: bool = False) -> dict:
50
  return {
51
+ "Tool-Version": "1.7.0",
52
  "Generated-UTC": datetime.utcnow().isoformat(),
53
  "Content-SHA256": sha256_text(payload),
54
  "Public-Source-Only": "true",
55
+ "AI-Assisted": "formatting-only" if ai else "false",
56
+ "Court-Safe": "true",
57
  }
58
 
59
+ def render_provenance_block(text: str, ai: bool = False) -> str:
60
+ return "\n".join(
61
+ f"{k}: {v}" for k, v in provenance_headers(text, ai).items()
62
+ )
63
+
64
  # ======================================================
65
+ # FAISS PHASE-4 (APPROVAL-GATED)
66
+ # ======================================================
67
+
68
+ class Phase4FAISS:
69
+ def __init__(self):
70
+ if not ENABLE_FAISS_PHASE_4:
71
+ raise RuntimeError(
72
+ "Phase-4 FAISS disabled pending formal approval."
73
+ )
74
+
75
+ # ======================================================
76
+ # FOIA ADAPTERS (LINK-OUT + API STUBS)
77
  # ======================================================
78
 
79
  class FOIAAdapter:
80
+ agency = "UNKNOWN"
81
  search_url = ""
82
 
83
+ def search(self, query: str):
84
  start = time.time()
85
+ url = self.search_url.format(q=quote_plus(query))
86
  latency = round((time.time() - start) * 1000, 1)
87
  return [{
88
  "agency": self.agency,
 
105
  agency = "DOJ"
106
  search_url = "https://www.justice.gov/foia/library?search={q}"
107
 
108
+ class DHS(FOIAAdapter):
109
+ agency = "DHS"
110
+ search_url = "https://www.dhs.gov/foia-library/search?search={q}"
111
+
112
+ class STATE(FOIAAdapter):
113
+ agency = "State Department"
114
+ search_url = "https://foia.state.gov/Search/Search.aspx?q={q}"
115
+
116
+ class NSA(FOIAAdapter):
117
+ agency = "NSA"
118
+ search_url = "https://www.nsa.gov/resources/everyone/foia/reading-room/?q={q}"
119
+
120
  ALL_ADAPTERS = {
121
  "CIA": CIA(),
122
  "FBI": FBI(),
123
  "DOJ": DOJ(),
124
+ "DHS": DHS(),
125
+ "State": STATE(),
126
+ "NSA": NSA(),
127
  }
128
 
129
  # ======================================================
130
+ # PDF RESOLUTION (SAFE)
131
  # ======================================================
132
 
133
+ def resolve_pdf_url(url: str):
134
  try:
135
+ r = requests.get(url, timeout=10, allow_redirects=True)
136
  ct = r.headers.get("content-type", "").lower()
137
+ is_pdf = r.url.lower().endswith(".pdf") or "application/pdf" in ct
138
+ return is_pdf, r.url
139
  except Exception:
140
  return False, url
141
 
 
144
  # ======================================================
145
 
146
  def run_search(query, agencies):
147
+ global LAST_RESULTS, SELECTED_INDEX
148
+ SELECTED_INDEX = None
149
  LAST_RESULTS = []
150
  rows = []
151
 
152
  for name in agencies:
153
+ adapter = ALL_ADAPTERS[name]
154
+ for r in adapter.search(query):
155
  r["resolved_pdf"], r["resolved_url"] = resolve_pdf_url(r["url"])
156
  r["hash"] = citation_hash(r)
157
  LAST_RESULTS.append(r)
 
158
  rows.append([
159
  r["agency"],
160
  r["title"],
 
163
  f"{r['latency_ms']} ms",
164
  ])
165
 
166
+ return rows, render_cards(), "No document selected"
167
 
168
  # ======================================================
169
+ # ASK-AI (GOVERNANCE-GATED)
170
  # ======================================================
171
 
172
  def can_enable_ai(r):
173
  return (
174
  ENABLE_AI
175
+ and r.get("resolved_pdf", False)
176
+ and not r.get("sealed", False)
 
177
  )
178
 
179
+ def ask_ai(index: int):
180
+ global SELECTED_INDEX, AI_APPENDIX
181
  r = LAST_RESULTS[index]
182
+ SELECTED_INDEX = index
183
+
184
+ response = (
185
+ f"AI ASSISTIVE SUMMARY (NON-AUTHORITATIVE)\n\n"
186
+ f"Agency: {r['agency']}\n"
187
+ f"Source: {r['resolved_url']}\n\n"
188
+ "This AI output is generated solely to assist review of a "
189
+ "public FOIA document. It makes no factual assertions "
190
+ "and carries no evidentiary weight."
191
+ )
192
+
193
+ AI_APPENDIX = {
194
+ "text": response,
195
+ "hash": sha256_text(response),
196
+ "provenance": render_provenance_block(response, ai=True),
197
  }
198
 
199
+ return response + "\n\n" + AI_APPENDIX["provenance"]
200
+
201
  # ======================================================
202
+ # RENDER CARDS
203
  # ======================================================
204
 
205
  def render_cards():
206
+ cards = []
207
+ for idx, r in enumerate(LAST_RESULTS):
208
+ ai_ok = can_enable_ai(r)
209
+ cards.append(f"""
210
  <div class="card">
211
  <div class="card-header">
212
+ <strong>{r['agency']}</strong>
213
  <button class="ask-ai"
214
+ onclick="askAI({idx})"
215
+ {"disabled" if not ai_ok else ""}>
216
  Ask AI
217
  </button>
218
  </div>
219
+ <div><b>{r['title']}</b></div>
220
  <div class="actions">
221
+ <a href="{r['resolved_url']}" target="_blank">View Source</a>
 
 
222
  </div>
223
  </div>
224
  """)
225
+ return "".join(cards) or "No results."
226
 
227
  # ======================================================
228
+ # COURT BUNDLE (CM/ECF-READY)
229
  # ======================================================
230
 
231
  def generate_court_bundle():
232
+ ecf_no = f"ECF-PREFILE-{datetime.utcnow().strftime('%Y%m%d-%H%M%S')}"
233
+ with tempfile.TemporaryDirectory() as td:
234
+ zpath = os.path.join(td, "court_bundle.zip")
235
+ with zipfile.ZipFile(zpath, "w") as z:
236
  for i, r in enumerate(LAST_RESULTS, 1):
237
+ content = (
238
+ f"{r['agency']} FOIA Reading Room\n"
239
+ f"{r['resolved_url']}\n\n"
240
+ f"{render_provenance_block(r['resolved_url'])}"
241
  )
242
+ z.writestr(f"Exhibit_{i:03d}.txt", content)
243
  z.writestr(f"Exhibit_{i:03d}.sha256", r["hash"])
244
+
245
+ if AI_APPENDIX:
246
+ z.writestr("Exhibit_AI_Appendix.txt", AI_APPENDIX["text"])
247
+ z.writestr("Exhibit_AI_Appendix.sha256", AI_APPENDIX["hash"])
248
+ z.writestr(
249
+ "Exhibit_AI_Appendix.provenance.txt",
250
+ AI_APPENDIX["provenance"]
251
+ )
252
+ return zpath
253
 
254
  # ======================================================
255
+ # FASTAPI APP
256
  # ======================================================
257
 
258
  fastapi_app = FastAPI()
259
 
260
  @fastapi_app.get("/ask_ai")
261
+ def ask_ai_endpoint(index: int):
262
+ return JSONResponse({"result": ask_ai(index)})
263
+
264
+ if os.path.exists("governance-site"):
265
+ fastapi_app.mount(
266
+ "/gov",
267
+ StaticFiles(directory="governance-site", html=True),
268
+ name="governance",
269
+ )
270
 
271
  # ======================================================
272
  # UI
273
  # ======================================================
274
 
275
  CSS = """
276
+ .card { border:1px solid #333; border-radius:16px; padding:16px; margin-bottom:18px; }
277
+ .card-header { display:flex; justify-content:space-between; }
278
+ .ask-ai {
279
+ background:#1e88e5; color:white; border:none;
280
+ padding:6px 16px; border-radius:999px;
 
 
 
 
 
 
281
  }
282
+ .ask-ai:disabled { background:#666; }
283
+ .actions { margin-top:10px; }
284
  """
285
 
286
  with gr.Blocks() as gradio_ui:
287
+ gr.Markdown(
288
+ "## Federal FOIA Intelligence Search\n"
289
+ "Public FOIA reading rooms only • Research & education use"
 
 
 
 
 
 
 
 
 
 
290
  )
291
 
292
+ with gr.Tab("Search"):
293
+ agencies = gr.CheckboxGroup(
294
+ list(ALL_ADAPTERS.keys()),
295
+ value=list(ALL_ADAPTERS.keys()),
296
+ label="Agencies",
297
+ )
298
+ query = gr.Textbox(placeholder="Search terms")
299
+ table = gr.Dataframe(
300
+ headers=["Agency", "Title", "Resolved URL", "Hash", "Latency"]
301
+ )
302
+ gallery = gr.HTML()
303
+ status = gr.Textbox(label="AI Status", lines=6)
304
+ gr.Button("Search").click(
305
+ run_search, [query, agencies], [table, gallery, status]
306
+ )
307
+
308
+ with gr.Tab("Court"):
309
+ gr.Button("Generate Court Bundle").click(
310
+ lambda: generate_court_bundle(),
311
+ None,
312
+ gr.File(),
313
+ )
314
+
315
+ with gr.Tab("Trust"):
316
+ gr.HTML(
317
+ '<iframe src="/gov/index.html" '
318
+ 'style="width:100%;height:700px;border:1px solid #ccc;"></iframe>'
319
+ )
320
 
321
  # ======================================================
322
+ # MOUNT (HF-SAFE)
323
  # ======================================================
324
 
325
+ app = gr.mount_gradio_app(
326
+ fastapi_app,
327
+ gradio_ui,
328
+ path="/"
329
+ )