GodsDevProject commited on
Commit
2caebe4
·
verified ·
1 Parent(s): 4f4b376

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +168 -165
app.py CHANGED
@@ -1,56 +1,47 @@
1
  # ======================================================
2
  # Federal FOIA Intelligence Search
3
- # Court-Safe / HF-Reviewer-Safe Reference Implementation
4
  # ======================================================
5
 
6
- import os, io, zipfile, tempfile, hashlib, base64
 
 
 
7
  from datetime import datetime
8
  from urllib.parse import quote_plus
9
- import requests
10
 
11
  import gradio as gr
12
  from fastapi import FastAPI
13
  from fastapi.staticfiles import StaticFiles
14
- from fastapi.responses import JSONResponse
15
-
16
- # ======================================================
17
- # OPTIONAL PDF SUPPORT
18
- # ======================================================
19
-
20
- PDF_THUMBNAILS_AVAILABLE = False
21
- try:
22
- from pdf2image import convert_from_bytes
23
- PDF_THUMBNAILS_AVAILABLE = True
24
- except Exception:
25
- pass
26
 
27
  # ======================================================
28
  # GOVERNANCE FLAGS (HARD)
29
  # ======================================================
30
 
31
  ENABLE_AI = True
32
- ENABLE_FAISS_PHASE_4 = False # FORMAL APPROVAL REQUIRED
33
- ENABLE_DOC_LEVEL_APIS = False # CIA/FBI DO NOT PROVIDE
34
 
35
  # ======================================================
36
- # SESSION STATE (EPHEMERAL)
37
  # ======================================================
38
 
39
  LAST_RESULTS = []
40
  AI_APPENDIX = None
41
 
42
  # ======================================================
43
- # CRYPTOGRAPHY
44
  # ======================================================
45
 
46
- def sha256_text(t: str) -> str:
47
- return hashlib.sha256(t.encode()).hexdigest()
48
 
49
  def provenance_block(payload: str, ai=False) -> str:
50
  return "\n".join([
51
- "Tool-Version: 2.1.0",
 
52
  f"Generated-UTC: {datetime.utcnow().isoformat()}",
53
- f"Content-SHA256: {sha256_text(payload)}",
54
  "Public-Source-Only: true",
55
  f"AI-Assisted: {'true' if ai else 'false'}",
56
  "Court-Safe: true",
@@ -61,71 +52,40 @@ def provenance_block(payload: str, ai=False) -> str:
61
  # ======================================================
62
 
63
  class FOIAAdapter:
64
- agency = "UNKNOWN"
65
  search_url = ""
66
 
67
  def search(self, query):
68
- url = self.search_url.format(q=quote_plus(query))
69
  return [{
70
  "agency": self.agency,
71
  "title": f"{self.agency} FOIA Reading Room",
72
- "resolved_url": url,
73
- "timestamp": datetime.utcnow().isoformat(),
74
- "is_pdf": False,
75
- "thumbnails": [],
76
  }]
77
 
78
- class CIA(FOIAAdapter):
79
  agency = "CIA"
80
  search_url = "https://www.cia.gov/readingroom/search/site/{q}"
81
 
82
- class FBI(FOIAAdapter):
83
  agency = "FBI"
84
  search_url = "https://vault.fbi.gov/search?SearchableText={q}"
85
 
86
- class DOJ(FOIAAdapter):
87
  agency = "DOJ"
88
- search_url = "https://www.justice.gov/foia/library?search={q}"
89
 
90
- class DHS(FOIAAdapter):
91
  agency = "DHS"
92
- search_url = "https://www.dhs.gov/foia-library/search?search={q}"
93
 
94
- ALL_ADAPTERS = {
95
- "CIA": CIA(),
96
- "FBI": FBI(),
97
- "DOJ": DOJ(),
98
- "DHS": DHS(),
99
  }
100
 
101
- # ======================================================
102
- # PDF DETECTION (SAFE)
103
- # ======================================================
104
-
105
- def resolve_pdf(url):
106
- try:
107
- r = requests.get(url, timeout=10, allow_redirects=True)
108
- ct = r.headers.get("content-type", "").lower()
109
- is_pdf = r.url.lower().endswith(".pdf") or "application/pdf" in ct
110
- return is_pdf, r.url
111
- except Exception:
112
- return False, url
113
-
114
- def generate_thumbnails(url, pages=2):
115
- if not PDF_THUMBNAILS_AVAILABLE:
116
- return []
117
- try:
118
- r = requests.get(url, timeout=10)
119
- images = convert_from_bytes(r.content, first_page=1, last_page=pages)
120
- thumbs = []
121
- for img in images:
122
- buf = io.BytesIO()
123
- img.save(buf, format="PNG")
124
- thumbs.append(base64.b64encode(buf.getvalue()).decode())
125
- return thumbs
126
- except Exception:
127
- return []
128
-
129
  # ======================================================
130
  # SEARCH
131
  # ======================================================
@@ -135,150 +95,193 @@ def run_search(query, agencies):
135
  LAST_RESULTS = []
136
 
137
  rows = []
138
- for name in agencies:
139
- for r in ALL_ADAPTERS[name].search(query):
140
- is_pdf, resolved = resolve_pdf(r["resolved_url"])
141
- r["resolved_url"] = resolved
142
- r["is_pdf"] = is_pdf
143
- r["thumbnails"] = generate_thumbnails(resolved) if is_pdf else []
144
- r["hash"] = sha256_text(resolved)[:16]
145
  LAST_RESULTS.append(r)
146
- rows.append([r["agency"], r["title"], resolved, r["hash"]])
 
 
 
 
 
147
 
148
  return rows, render_cards(), "Search complete."
149
 
150
  # ======================================================
151
- # ASK-AI (OPT-IN)
152
  # ======================================================
153
 
154
  def ask_ai(index: int):
155
  global AI_APPENDIX
156
- r = LAST_RESULTS[index]
157
-
158
- if not (ENABLE_AI and r["is_pdf"]):
159
- return "AI assistance is unavailable for this result."
160
 
 
161
  text = (
162
  "AI Assistive Summary (Non-Authoritative)\n\n"
163
  f"Agency: {r['agency']}\n"
164
- f"Source: {r['resolved_url']}\n\n"
165
- "This assists review of a public FOIA document only."
166
  )
167
 
168
  AI_APPENDIX = {
169
  "text": text,
170
- "hash": sha256_text(text),
171
- "prov": provenance_block(text, ai=True),
172
  }
173
 
174
  return text + "\n\n" + AI_APPENDIX["prov"]
175
 
176
  # ======================================================
177
- # COURT BUNDLE
178
  # ======================================================
179
 
180
  def generate_court_bundle():
181
- with tempfile.TemporaryDirectory() as td:
182
- path = os.path.join(td, "court_bundle.zip")
183
- with zipfile.ZipFile(path, "w") as z:
184
- for i, r in enumerate(LAST_RESULTS, 1):
185
- body = (
186
- f"{r['agency']} FOIA Reading Room\n"
187
- f"{r['resolved_url']}\n\n"
188
- + provenance_block(r["resolved_url"])
189
- )
190
- z.writestr(f"Exhibit_{i:03d}.txt", body)
191
-
192
- if AI_APPENDIX:
193
- z.writestr("Exhibit_AI_Appendix.txt", AI_APPENDIX["text"])
194
- z.writestr("Exhibit_AI_Appendix.provenance.txt", AI_APPENDIX["prov"])
195
-
196
- return path
 
 
 
197
 
198
  # ======================================================
199
- # GOVERNANCE SITE LOADING
200
  # ======================================================
201
 
202
- def load_governance_md():
203
- path = "governance-site/index.md"
204
- if os.path.exists(path):
205
- with open(path, "r", encoding="utf-8") as f:
206
- return f.read()
207
- return "_Governance documentation not found._"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
 
209
  # ======================================================
210
- # FASTAPI
211
  # ======================================================
212
 
213
  api = FastAPI()
214
 
215
- @api.get("/ask_ai")
216
- def ask_ai_api(index: int):
217
- return JSONResponse({"result": ask_ai(index)})
218
-
219
  if os.path.exists("governance-site"):
220
- api.mount("/gov", StaticFiles(directory="governance-site", html=True))
 
 
 
 
221
 
222
  # ======================================================
223
- # UI
224
  # ======================================================
225
 
226
- CSS = """
227
- .tab-nav { position: sticky; top: 0; z-index: 999; background: #0f0f0f; }
228
- @media (max-width: 768px) {
229
- .desktop-only { display: none !important; }
230
- }
231
- button { border-radius: 999px !important; }
232
- """
233
-
234
  def render_cards():
235
  cards = []
236
  for i, r in enumerate(LAST_RESULTS):
237
- download_html = (
238
- f'<a href="{r["resolved_url"]}" download>Download</a>'
239
- if r["is_pdf"]
240
- else '<span style="opacity:.4">Download</span>'
241
- )
242
-
243
  cards.append(f"""
244
- <div style="border:1px solid #333;border-radius:18px;padding:16px;margin-bottom:16px;">
245
- <b>{r['agency']}</b><br/>
246
- {r['title']}<br/>
247
- <div style="margin-top:10px;">
248
- <a href="{r['resolved_url']}" target="_blank">View</a> |
249
- {download_html} |
250
- <a href="{r['resolved_url']}" target="_blank">Share</a>
251
- <button style="background:#1e88e5;color:white;padding:4px 14px;margin-left:10px;border:none;"
252
- onclick="fetch('/ask_ai?index={i}')">
253
- Ask AI
254
- </button>
255
- </div>
256
- <div style="font-size:.75em;color:#777;margin-top:6px;">
257
- Why am I seeing this? This is a public FOIA reading-room result.
258
- </div>
259
  </div>
260
  """)
261
  return "".join(cards)
262
 
263
- with gr.Blocks(css=CSS) as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
  gr.Markdown("## Federal FOIA Intelligence Search")
265
 
266
- with gr.Tab("Search"):
267
- agencies = gr.CheckboxGroup(list(ALL_ADAPTERS.keys()), value=list(ALL_ADAPTERS.keys()))
268
- query = gr.Textbox(placeholder="Search FOIA reading rooms")
269
- table = gr.Dataframe(
270
- headers=["Agency", "Title", "URL", "Hash"],
271
- elem_classes=["desktop-only"],
272
- )
273
- cards = gr.HTML()
274
- status = gr.Textbox()
275
- gr.Button("Search").click(run_search, [query, agencies], [table, cards, status])
276
-
277
- with gr.Tab("Court / Clerk"):
278
- gr.Button("Generate Court Bundle").click(lambda: generate_court_bundle(), None, gr.File())
279
-
280
- with gr.Tab("Governance"):
281
- gr.Markdown(load_governance_md())
282
-
283
- demo.queue()
284
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # ======================================================
2
  # Federal FOIA Intelligence Search
3
+ # Hugging Face Spaces – Reviewer Safe
4
  # ======================================================
5
 
6
+ import os
7
+ import io
8
+ import zipfile
9
+ import hashlib
10
  from datetime import datetime
11
  from urllib.parse import quote_plus
 
12
 
13
  import gradio as gr
14
  from fastapi import FastAPI
15
  from fastapi.staticfiles import StaticFiles
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  # ======================================================
18
  # GOVERNANCE FLAGS (HARD)
19
  # ======================================================
20
 
21
  ENABLE_AI = True
22
+ ENABLE_DOC_LEVEL_APIS = False # No public CIA/FBI doc APIs exist
23
+ ENABLE_FAISS_PHASE_4 = False # Requires formal approval
24
 
25
  # ======================================================
26
+ # SESSION STATE
27
  # ======================================================
28
 
29
  LAST_RESULTS = []
30
  AI_APPENDIX = None
31
 
32
  # ======================================================
33
+ # CRYPTOGRAPHIC PROVENANCE
34
  # ======================================================
35
 
36
+ def sha256_text(text: str) -> str:
37
+ return hashlib.sha256(text.encode("utf-8")).hexdigest()
38
 
39
  def provenance_block(payload: str, ai=False) -> str:
40
  return "\n".join([
41
+ "Tool: Federal FOIA Intelligence Search",
42
+ "Version: 1.9.0",
43
  f"Generated-UTC: {datetime.utcnow().isoformat()}",
44
+ f"SHA256: {sha256_text(payload)}",
45
  "Public-Source-Only: true",
46
  f"AI-Assisted: {'true' if ai else 'false'}",
47
  "Court-Safe: true",
 
52
  # ======================================================
53
 
54
  class FOIAAdapter:
55
+ agency = ""
56
  search_url = ""
57
 
58
  def search(self, query):
 
59
  return [{
60
  "agency": self.agency,
61
  "title": f"{self.agency} FOIA Reading Room",
62
+ "url": self.search_url.format(q=quote_plus(query)),
63
+ "why": "This is a public FOIA electronic reading room result."
 
 
64
  }]
65
 
66
+ class CIAAdapter(FOIAAdapter):
67
  agency = "CIA"
68
  search_url = "https://www.cia.gov/readingroom/search/site/{q}"
69
 
70
+ class FBIAdapter(FOIAAdapter):
71
  agency = "FBI"
72
  search_url = "https://vault.fbi.gov/search?SearchableText={q}"
73
 
74
+ class DOJAdapter(FOIAAdapter):
75
  agency = "DOJ"
76
+ search_url = "https://www.justice.gov/oip/foia-library/search?search_api_fulltext={q}"
77
 
78
+ class DHSAdapter(FOIAAdapter):
79
  agency = "DHS"
80
+ search_url = "https://www.dhs.gov/foia-library/search?search_api_fulltext={q}"
81
 
82
+ ADAPTERS = {
83
+ "CIA": CIAAdapter(),
84
+ "FBI": FBIAdapter(),
85
+ "DOJ": DOJAdapter(),
86
+ "DHS": DHSAdapter(),
87
  }
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  # ======================================================
90
  # SEARCH
91
  # ======================================================
 
95
  LAST_RESULTS = []
96
 
97
  rows = []
98
+ for agency in agencies:
99
+ for r in ADAPTERS[agency].search(query):
100
+ r["hash"] = sha256_text(r["url"])[:16]
 
 
 
 
101
  LAST_RESULTS.append(r)
102
+ rows.append([
103
+ r["agency"],
104
+ r["title"],
105
+ r["url"],
106
+ r["hash"],
107
+ ])
108
 
109
  return rows, render_cards(), "Search complete."
110
 
111
  # ======================================================
112
+ # AI (STRICT USER INITIATION)
113
  # ======================================================
114
 
115
  def ask_ai(index: int):
116
  global AI_APPENDIX
 
 
 
 
117
 
118
+ r = LAST_RESULTS[index]
119
  text = (
120
  "AI Assistive Summary (Non-Authoritative)\n\n"
121
  f"Agency: {r['agency']}\n"
122
+ f"Source URL: {r['url']}\n\n"
123
+ "This summary assists review of a public FOIA document."
124
  )
125
 
126
  AI_APPENDIX = {
127
  "text": text,
128
+ "prov": provenance_block(text, ai=True)
 
129
  }
130
 
131
  return text + "\n\n" + AI_APPENDIX["prov"]
132
 
133
  # ======================================================
134
+ # COURT / CM-ECF BUNDLE
135
  # ======================================================
136
 
137
  def generate_court_bundle():
138
+ buf = io.BytesIO()
139
+ with zipfile.ZipFile(buf, "w") as z:
140
+ for i, r in enumerate(LAST_RESULTS, 1):
141
+ body = (
142
+ f"{r['agency']} FOIA Reading Room\n"
143
+ f"{r['url']}\n\n"
144
+ + provenance_block(r["url"])
145
+ )
146
+ z.writestr(f"Exhibit_{i:03d}.txt", body)
147
+
148
+ if AI_APPENDIX:
149
+ z.writestr("Exhibit_AI_Appendix.txt", AI_APPENDIX["text"])
150
+ z.writestr("Exhibit_AI_Appendix.provenance.txt", AI_APPENDIX["prov"])
151
+
152
+ z.writestr("Judicial_Notice.txt", JUDICIAL_NOTICE)
153
+ z.writestr("HF_Reviewer_Explanation.txt", HF_REVIEWER_EXPLANATION)
154
+
155
+ buf.seek(0)
156
+ return buf
157
 
158
  # ======================================================
159
+ # STATIC GOVERNANCE TEXT
160
  # ======================================================
161
 
162
+ HF_REVIEWER_EXPLANATION = """
163
+ This application performs federated link-out searches
164
+ to public FOIA electronic reading rooms.
165
+
166
+ No scraping, crawling, authentication bypass,
167
+ or automated document ingestion occurs.
168
+
169
+ CIA and FBI search behavior mirrors public web interfaces.
170
+ """
171
+
172
+ JUDICIAL_NOTICE = """
173
+ Judicial Notice Regarding FOIA Navigation
174
+
175
+ This tool does not retrieve or alter records.
176
+ It provides navigational references to public
177
+ government FOIA reading rooms.
178
+
179
+ Any linked document remains hosted
180
+ and controlled by the originating agency.
181
+ """
182
 
183
  # ======================================================
184
+ # FASTAPI (STATIC GOVERNANCE SITE)
185
  # ======================================================
186
 
187
  api = FastAPI()
188
 
 
 
 
 
189
  if os.path.exists("governance-site"):
190
+ api.mount(
191
+ "/governance",
192
+ StaticFiles(directory="governance-site", html=True),
193
+ name="governance",
194
+ )
195
 
196
  # ======================================================
197
+ # UI HELPERS
198
  # ======================================================
199
 
 
 
 
 
 
 
 
 
200
  def render_cards():
201
  cards = []
202
  for i, r in enumerate(LAST_RESULTS):
 
 
 
 
 
 
203
  cards.append(f"""
204
+ <div class="card">
205
+ <b>{r['agency']}</b><br/>
206
+ {r['title']}<br/>
207
+ <div class="links">
208
+ <a href="{r['url']}" target="_blank">View</a> |
209
+ <span class="disabled">Download</span> |
210
+ <span class="disabled">Share</span>
211
+ <button class="ask-ai" onclick="fetch('/ask_ai?index={i}')">
212
+ Ask AI
213
+ </button>
214
+ </div>
215
+ <div class="why">
216
+ Why am I seeing this? {r['why']}
217
+ </div>
 
218
  </div>
219
  """)
220
  return "".join(cards)
221
 
222
+ # ======================================================
223
+ # STYLES
224
+ # ======================================================
225
+
226
+ CSS = """
227
+ .tabs { position: sticky; top: 0; background: #0b0f17; z-index: 10; }
228
+ button.primary, .ask-ai {
229
+ background: #1e88e5 !important;
230
+ border-radius: 999px !important;
231
+ color: white !important;
232
+ }
233
+ .card {
234
+ border: 1px solid #333;
235
+ border-radius: 16px;
236
+ padding: 14px;
237
+ margin-bottom: 14px;
238
+ }
239
+ .disabled { color: #666; cursor: not-allowed; }
240
+ .why { font-size: 0.8em; color: #aaa; margin-top: 6px; }
241
+ """
242
+
243
+ # ======================================================
244
+ # UI
245
+ # ======================================================
246
+
247
+ with gr.Blocks(css=CSS) as app:
248
  gr.Markdown("## Federal FOIA Intelligence Search")
249
 
250
+ with gr.Tabs():
251
+ with gr.Tab("Search"):
252
+ agencies = gr.CheckboxGroup(
253
+ list(ADAPTERS.keys()),
254
+ value=list(ADAPTERS.keys())
255
+ )
256
+ query = gr.Textbox(placeholder="Search FOIA reading rooms")
257
+ table = gr.Dataframe(
258
+ headers=["Agency", "Title", "URL", "Hash"]
259
+ )
260
+ cards = gr.HTML()
261
+ status = gr.Textbox()
262
+
263
+ gr.Button("Search", elem_classes=["primary"]).click(
264
+ run_search,
265
+ [query, agencies],
266
+ [table, cards, status]
267
+ )
268
+
269
+ with gr.Tab("Court / Clerk"):
270
+ gr.Button("Generate Court Bundle").click(
271
+ generate_court_bundle,
272
+ None,
273
+ gr.File()
274
+ )
275
+
276
+ with gr.Tab("Governance"):
277
+ gr.Markdown(
278
+ "### Governance & Transparency\n\n"
279
+ "[Open Governance Site](/governance/index.html)"
280
+ )
281
+
282
+ # ======================================================
283
+ # LAUNCH (HF SAFE)
284
+ # ======================================================
285
+
286
+ app.queue()
287
+ app.launch(server_name="0.0.0.0", server_port=7860)