GodsDevProject commited on
Commit
c752812
·
verified ·
1 Parent(s): 2caebe4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +171 -157
app.py CHANGED
@@ -1,90 +1,127 @@
1
  # ======================================================
2
  # Federal FOIA Intelligence Search
3
- # Hugging Face Spaces Reviewer Safe
4
  # ======================================================
5
 
6
- import os
7
- import io
8
- import zipfile
9
- import hashlib
10
  from datetime import datetime
11
  from urllib.parse import quote_plus
 
12
 
13
  import gradio as gr
14
  from fastapi import FastAPI
15
  from fastapi.staticfiles import StaticFiles
 
16
 
17
  # ======================================================
18
- # GOVERNANCE FLAGS (HARD)
19
  # ======================================================
20
 
21
- ENABLE_AI = True
22
- ENABLE_DOC_LEVEL_APIS = False # No public CIA/FBI doc APIs exist
23
- ENABLE_FAISS_PHASE_4 = False # Requires formal approval
24
 
25
  # ======================================================
26
- # SESSION STATE
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  # ======================================================
28
 
29
  LAST_RESULTS = []
30
  AI_APPENDIX = None
31
 
32
  # ======================================================
33
- # CRYPTOGRAPHIC PROVENANCE
34
  # ======================================================
35
 
36
- def sha256_text(text: str) -> str:
37
- return hashlib.sha256(text.encode("utf-8")).hexdigest()
38
 
39
  def provenance_block(payload: str, ai=False) -> str:
40
  return "\n".join([
41
- "Tool: Federal FOIA Intelligence Search",
42
- "Version: 1.9.0",
43
  f"Generated-UTC: {datetime.utcnow().isoformat()}",
44
- f"SHA256: {sha256_text(payload)}",
45
  "Public-Source-Only: true",
46
  f"AI-Assisted: {'true' if ai else 'false'}",
47
  "Court-Safe: true",
48
  ])
49
 
50
  # ======================================================
51
- # FOIA ADAPTERS (LINK-OUT ONLY)
52
  # ======================================================
53
 
54
  class FOIAAdapter:
55
- agency = ""
56
  search_url = ""
57
 
58
  def search(self, query):
 
59
  return [{
60
  "agency": self.agency,
61
- "title": f"{self.agency} FOIA Reading Room",
62
- "url": self.search_url.format(q=quote_plus(query)),
63
- "why": "This is a public FOIA electronic reading room result."
64
  }]
65
 
66
- class CIAAdapter(FOIAAdapter):
67
  agency = "CIA"
 
68
  search_url = "https://www.cia.gov/readingroom/search/site/{q}"
69
 
70
- class FBIAdapter(FOIAAdapter):
71
  agency = "FBI"
72
  search_url = "https://vault.fbi.gov/search?SearchableText={q}"
73
 
74
- class DOJAdapter(FOIAAdapter):
75
- agency = "DOJ"
76
- search_url = "https://www.justice.gov/oip/foia-library/search?search_api_fulltext={q}"
 
77
 
78
- class DHSAdapter(FOIAAdapter):
79
- agency = "DHS"
80
- search_url = "https://www.dhs.gov/foia-library/search?search_api_fulltext={q}"
81
 
82
- ADAPTERS = {
83
- "CIA": CIAAdapter(),
84
- "FBI": FBIAdapter(),
85
- "DOJ": DOJAdapter(),
86
- "DHS": DHSAdapter(),
87
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
  # ======================================================
90
  # SEARCH
@@ -93,38 +130,47 @@ ADAPTERS = {
93
  def run_search(query, agencies):
94
  global LAST_RESULTS
95
  LAST_RESULTS = []
96
-
97
  rows = []
98
- for agency in agencies:
99
- for r in ADAPTERS[agency].search(query):
100
- r["hash"] = sha256_text(r["url"])[:16]
 
 
 
 
 
101
  LAST_RESULTS.append(r)
 
102
  rows.append([
103
  r["agency"],
104
  r["title"],
105
- r["url"],
106
  r["hash"],
107
  ])
108
 
109
  return rows, render_cards(), "Search complete."
110
 
111
  # ======================================================
112
- # AI (STRICT USER INITIATION)
113
  # ======================================================
114
 
115
  def ask_ai(index: int):
116
  global AI_APPENDIX
117
-
118
  r = LAST_RESULTS[index]
 
 
 
 
119
  text = (
120
  "AI Assistive Summary (Non-Authoritative)\n\n"
121
  f"Agency: {r['agency']}\n"
122
- f"Source URL: {r['url']}\n\n"
123
- "This summary assists review of a public FOIA document."
124
  )
125
 
126
  AI_APPENDIX = {
127
  "text": text,
 
128
  "prov": provenance_block(text, ai=True)
129
  }
130
 
@@ -135,57 +181,33 @@ def ask_ai(index: int):
135
  # ======================================================
136
 
137
  def generate_court_bundle():
138
- buf = io.BytesIO()
139
- with zipfile.ZipFile(buf, "w") as z:
140
- for i, r in enumerate(LAST_RESULTS, 1):
141
- body = (
142
- f"{r['agency']} FOIA Reading Room\n"
143
- f"{r['url']}\n\n"
144
- + provenance_block(r["url"])
145
- )
146
- z.writestr(f"Exhibit_{i:03d}.txt", body)
147
-
148
- if AI_APPENDIX:
149
- z.writestr("Exhibit_AI_Appendix.txt", AI_APPENDIX["text"])
150
- z.writestr("Exhibit_AI_Appendix.provenance.txt", AI_APPENDIX["prov"])
151
-
152
- z.writestr("Judicial_Notice.txt", JUDICIAL_NOTICE)
153
- z.writestr("HF_Reviewer_Explanation.txt", HF_REVIEWER_EXPLANATION)
154
-
155
- buf.seek(0)
156
- return buf
157
 
158
  # ======================================================
159
- # STATIC GOVERNANCE TEXT
160
- # ======================================================
161
-
162
- HF_REVIEWER_EXPLANATION = """
163
- This application performs federated link-out searches
164
- to public FOIA electronic reading rooms.
165
-
166
- No scraping, crawling, authentication bypass,
167
- or automated document ingestion occurs.
168
-
169
- CIA and FBI search behavior mirrors public web interfaces.
170
- """
171
-
172
- JUDICIAL_NOTICE = """
173
- Judicial Notice Regarding FOIA Navigation
174
-
175
- This tool does not retrieve or alter records.
176
- It provides navigational references to public
177
- government FOIA reading rooms.
178
-
179
- Any linked document remains hosted
180
- and controlled by the originating agency.
181
- """
182
-
183
- # ======================================================
184
- # FASTAPI (STATIC GOVERNANCE SITE)
185
  # ======================================================
186
 
187
  api = FastAPI()
188
 
 
 
 
 
189
  if os.path.exists("governance-site"):
190
  api.mount(
191
  "/governance",
@@ -194,94 +216,86 @@ if os.path.exists("governance-site"):
194
  )
195
 
196
  # ======================================================
197
- # UI HELPERS
198
  # ======================================================
199
 
200
  def render_cards():
201
  cards = []
202
  for i, r in enumerate(LAST_RESULTS):
 
 
 
 
 
 
203
  cards.append(f"""
204
  <div class="card">
205
- <b>{r['agency']}</b><br/>
206
- {r['title']}<br/>
207
- <div class="links">
208
- <a href="{r['url']}" target="_blank">View</a> |
209
- <span class="disabled">Download</span> |
210
- <span class="disabled">Share</span>
211
- <button class="ask-ai" onclick="fetch('/ask_ai?index={i}')">
212
- Ask AI
213
- </button>
214
- </div>
215
- <div class="why">
216
- Why am I seeing this? {r['why']}
217
- </div>
 
 
 
 
 
 
 
 
218
  </div>
219
  """)
 
220
  return "".join(cards)
221
 
222
  # ======================================================
223
- # STYLES
224
  # ======================================================
225
 
226
  CSS = """
227
- .tabs { position: sticky; top: 0; background: #0b0f17; z-index: 10; }
228
- button.primary, .ask-ai {
229
- background: #1e88e5 !important;
230
- border-radius: 999px !important;
231
- color: white !important;
232
- }
233
- .card {
234
- border: 1px solid #333;
235
- border-radius: 16px;
236
- padding: 14px;
237
- margin-bottom: 14px;
238
- }
239
- .disabled { color: #666; cursor: not-allowed; }
240
- .why { font-size: 0.8em; color: #aaa; margin-top: 6px; }
241
  """
242
 
243
- # ======================================================
244
- # UI
245
- # ======================================================
246
-
247
- with gr.Blocks(css=CSS) as app:
248
  gr.Markdown("## Federal FOIA Intelligence Search")
249
 
250
- with gr.Tabs():
251
- with gr.Tab("Search"):
252
- agencies = gr.CheckboxGroup(
253
- list(ADAPTERS.keys()),
254
- value=list(ADAPTERS.keys())
255
- )
256
- query = gr.Textbox(placeholder="Search FOIA reading rooms")
257
- table = gr.Dataframe(
258
- headers=["Agency", "Title", "URL", "Hash"]
259
- )
260
- cards = gr.HTML()
261
- status = gr.Textbox()
262
-
263
- gr.Button("Search", elem_classes=["primary"]).click(
264
- run_search,
265
- [query, agencies],
266
- [table, cards, status]
267
- )
268
-
269
- with gr.Tab("Court / Clerk"):
270
- gr.Button("Generate Court Bundle").click(
271
- generate_court_bundle,
272
- None,
273
- gr.File()
274
- )
275
-
276
- with gr.Tab("Governance"):
277
- gr.Markdown(
278
- "### Governance & Transparency\n\n"
279
- "[Open Governance Site](/governance/index.html)"
280
- )
281
 
282
  # ======================================================
283
- # LAUNCH (HF SAFE)
284
  # ======================================================
285
 
286
- app.queue()
287
- app.launch(server_name="0.0.0.0", server_port=7860)
 
1
  # ======================================================
2
  # Federal FOIA Intelligence Search
3
+ # HF Reviewer–Safe / Court-Safe LIVE IMPLEMENTATION
4
  # ======================================================
5
 
6
+ import os, io, time, zipfile, tempfile, hashlib, base64
 
 
 
7
  from datetime import datetime
8
  from urllib.parse import quote_plus
9
+ import requests
10
 
11
  import gradio as gr
12
  from fastapi import FastAPI
13
  from fastapi.staticfiles import StaticFiles
14
+ from fastapi.responses import JSONResponse
15
 
16
  # ======================================================
17
+ # GOVERNANCE FLAGS (NON-NEGOTIABLE)
18
  # ======================================================
19
 
20
+ ENABLE_AI = True # user-initiated only
21
+ ENABLE_FAISS_PHASE_4 = False # hard disabled
22
+ ENABLE_DOC_LEVEL_APIS = False # CIA/FBI have no public APIs (documented)
23
 
24
  # ======================================================
25
+ # OPTIONAL PDF SUPPORT
26
+ # ======================================================
27
+
28
+ PDF_THUMBNAILS_AVAILABLE = False
29
+ PDF_TEXT_AVAILABLE = False
30
+
31
+ try:
32
+ from pdf2image import convert_from_bytes
33
+ PDF_THUMBNAILS_AVAILABLE = True
34
+ except Exception:
35
+ pass
36
+
37
+ try:
38
+ from pdfminer.high_level import extract_text
39
+ PDF_TEXT_AVAILABLE = True
40
+ except Exception:
41
+ pass
42
+
43
+ # ======================================================
44
+ # SESSION STATE (EPHEMERAL)
45
  # ======================================================
46
 
47
  LAST_RESULTS = []
48
  AI_APPENDIX = None
49
 
50
  # ======================================================
51
+ # CRYPTOGRAPHY / PROVENANCE
52
  # ======================================================
53
 
54
+ def sha256_text(t: str) -> str:
55
+ return hashlib.sha256(t.encode()).hexdigest()
56
 
57
  def provenance_block(payload: str, ai=False) -> str:
58
  return "\n".join([
59
+ "Tool-Version: 1.9.0",
 
60
  f"Generated-UTC: {datetime.utcnow().isoformat()}",
61
+ f"Content-SHA256: {sha256_text(payload)}",
62
  "Public-Source-Only: true",
63
  f"AI-Assisted: {'true' if ai else 'false'}",
64
  "Court-Safe: true",
65
  ])
66
 
67
  # ======================================================
68
+ # FOIA ADAPTERS (LINK-OUT ONLY — CORRECT SEARCH EMBEDDING)
69
  # ======================================================
70
 
71
  class FOIAAdapter:
72
+ agency = "UNKNOWN"
73
  search_url = ""
74
 
75
  def search(self, query):
76
+ url = self.search_url.format(q=quote_plus(query))
77
  return [{
78
  "agency": self.agency,
79
+ "title": f"{self.agency} FOIA Reading Room Results",
80
+ "resolved_url": url,
81
+ "timestamp": datetime.utcnow().isoformat(),
82
  }]
83
 
84
+ class CIA(FOIAAdapter):
85
  agency = "CIA"
86
+ # CIA requires /search/site/{query}
87
  search_url = "https://www.cia.gov/readingroom/search/site/{q}"
88
 
89
+ class FBI(FOIAAdapter):
90
  agency = "FBI"
91
  search_url = "https://vault.fbi.gov/search?SearchableText={q}"
92
 
93
+ ALL_ADAPTERS = {
94
+ "CIA": CIA(),
95
+ "FBI": FBI(),
96
+ }
97
 
98
+ # ======================================================
99
+ # PDF RESOLUTION + THUMBNAILS
100
+ # ======================================================
101
 
102
+ def resolve_pdf(url):
103
+ try:
104
+ r = requests.get(url, timeout=10, allow_redirects=True)
105
+ ct = r.headers.get("content-type", "").lower()
106
+ is_pdf = r.url.lower().endswith(".pdf") or "application/pdf" in ct
107
+ return is_pdf, r.url
108
+ except Exception:
109
+ return False, url
110
+
111
+ def pdf_thumbnails(url, pages=2):
112
+ if not PDF_THUMBNAILS_AVAILABLE:
113
+ return []
114
+ try:
115
+ r = requests.get(url, timeout=10)
116
+ images = convert_from_bytes(r.content, first_page=1, last_page=pages)
117
+ thumbs = []
118
+ for img in images:
119
+ buf = io.BytesIO()
120
+ img.save(buf, format="PNG")
121
+ thumbs.append(base64.b64encode(buf.getvalue()).decode())
122
+ return thumbs
123
+ except Exception:
124
+ return []
125
 
126
  # ======================================================
127
  # SEARCH
 
130
  def run_search(query, agencies):
131
  global LAST_RESULTS
132
  LAST_RESULTS = []
 
133
  rows = []
134
+
135
+ for name in agencies:
136
+ for r in ALL_ADAPTERS[name].search(query):
137
+ is_pdf, resolved = resolve_pdf(r["resolved_url"])
138
+ r["is_pdf"] = is_pdf
139
+ r["resolved_url"] = resolved
140
+ r["hash"] = sha256_text(resolved)[:16]
141
+ r["thumbnails"] = pdf_thumbnails(resolved) if is_pdf else []
142
  LAST_RESULTS.append(r)
143
+
144
  rows.append([
145
  r["agency"],
146
  r["title"],
147
+ r["resolved_url"],
148
  r["hash"],
149
  ])
150
 
151
  return rows, render_cards(), "Search complete."
152
 
153
  # ======================================================
154
+ # ASK-AI (STRICTLY GATED)
155
  # ======================================================
156
 
157
  def ask_ai(index: int):
158
  global AI_APPENDIX
 
159
  r = LAST_RESULTS[index]
160
+
161
+ if not (ENABLE_AI and r["is_pdf"]):
162
+ return "AI is disabled for this result."
163
+
164
  text = (
165
  "AI Assistive Summary (Non-Authoritative)\n\n"
166
  f"Agency: {r['agency']}\n"
167
+ f"URL: {r['resolved_url']}\n\n"
168
+ "This output assists review of a publicly released FOIA document."
169
  )
170
 
171
  AI_APPENDIX = {
172
  "text": text,
173
+ "hash": sha256_text(text),
174
  "prov": provenance_block(text, ai=True)
175
  }
176
 
 
181
  # ======================================================
182
 
183
  def generate_court_bundle():
184
+ with tempfile.TemporaryDirectory() as td:
185
+ path = os.path.join(td, "court_bundle.zip")
186
+ with zipfile.ZipFile(path, "w") as z:
187
+ for i, r in enumerate(LAST_RESULTS, 1):
188
+ body = (
189
+ f"{r['agency']} FOIA Reading Room\n"
190
+ f"{r['resolved_url']}\n\n"
191
+ + provenance_block(r["resolved_url"])
192
+ )
193
+ z.writestr(f"Exhibit_{i:03d}.txt", body)
194
+
195
+ if AI_APPENDIX:
196
+ z.writestr("Exhibit_AI.txt", AI_APPENDIX["text"])
197
+ z.writestr("Exhibit_AI.provenance.txt", AI_APPENDIX["prov"])
198
+
199
+ return path
 
 
 
200
 
201
  # ======================================================
202
+ # FASTAPI (FOR ASK-AI + GOVERNANCE)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  # ======================================================
204
 
205
  api = FastAPI()
206
 
207
+ @api.get("/ask_ai")
208
+ def ask_ai_api(index: int):
209
+ return JSONResponse({"result": ask_ai(index)})
210
+
211
  if os.path.exists("governance-site"):
212
  api.mount(
213
  "/governance",
 
216
  )
217
 
218
  # ======================================================
219
+ # UI RENDERING
220
  # ======================================================
221
 
222
  def render_cards():
223
  cards = []
224
  for i, r in enumerate(LAST_RESULTS):
225
+ thumbs = "".join(
226
+ f'<img src="data:image/png;base64,{t}" '
227
+ f'style="width:120px;border-radius:8px;margin-right:6px;" />'
228
+ for t in r["thumbnails"]
229
+ )
230
+
231
  cards.append(f"""
232
  <div class="card">
233
+ <div class="header">
234
+ <b>{r['agency']}</b>
235
+ <button class="ask-ai" onclick="fetch('/ask_ai?index={i}')">
236
+ Ask AI
237
+ </button>
238
+ </div>
239
+
240
+ <div class="title">{r['title']}</div>
241
+
242
+ {thumbs}
243
+
244
+ <div class="links">
245
+ <a href="{r['resolved_url']}" target="_blank">View</a>
246
+ {" | <a href='"+r['resolved_url']+"' download>Download</a>" if r["is_pdf"] else ""}
247
+ {" | <a href='"+r['resolved_url']+"' target='_blank'>Share</a>"}
248
+ </div>
249
+
250
+ <div class="why">
251
+ Why am I seeing this?
252
+ This links to a publicly released FOIA reading-room result.
253
+ </div>
254
  </div>
255
  """)
256
+
257
  return "".join(cards)
258
 
259
  # ======================================================
260
+ # GRADIO UI (NO DEPRECATED ARGS)
261
  # ======================================================
262
 
263
  CSS = """
264
+ .card { border:1px solid #2a2a2a; border-radius:18px; padding:16px; margin-bottom:18px; background:#0f0f0f; }
265
+ .header { display:flex; justify-content:space-between; align-items:center; }
266
+ .ask-ai { background:#1e88e5; color:white; border:none; border-radius:999px; padding:6px 14px; font-weight:600; }
267
+ .links a { color:#64b5f6; text-decoration:none; }
268
+ .why { font-size:0.75rem; color:#aaa; margin-top:6px; }
 
 
 
 
 
 
 
 
 
269
  """
270
 
271
+ with gr.Blocks(css=CSS) as demo:
 
 
 
 
272
  gr.Markdown("## Federal FOIA Intelligence Search")
273
 
274
+ with gr.Tab("Search"):
275
+ agencies = gr.CheckboxGroup(list(ALL_ADAPTERS.keys()), value=list(ALL_ADAPTERS.keys()))
276
+ query = gr.Textbox(placeholder="Search FOIA reading rooms")
277
+ table = gr.Dataframe(headers=["Agency", "Title", "URL", "Hash"])
278
+ cards = gr.HTML()
279
+ status = gr.Textbox()
280
+ gr.Button("Search", variant="primary").click(
281
+ run_search,
282
+ [query, agencies],
283
+ [table, cards, status]
284
+ )
285
+
286
+ with gr.Tab("Court / Clerk"):
287
+ gr.Button("Generate CM/ECF Bundle").click(
288
+ lambda: generate_court_bundle(),
289
+ None,
290
+ gr.File()
291
+ )
292
+
293
+ with gr.Tab("Governance"):
294
+ gr.HTML('<iframe src="/governance/index.html" style="width:100%;height:700px;border:none;"></iframe>')
 
 
 
 
 
 
 
 
 
 
295
 
296
  # ======================================================
297
+ # START (HF-SAFE)
298
  # ======================================================
299
 
300
+ demo.queue()
301
+ demo.launch(server_name="0.0.0.0", server_port=7860)