GodsDevProject commited on
Commit
5790830
·
verified ·
1 Parent(s): cc720ae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -68
app.py CHANGED
@@ -1,9 +1,9 @@
1
  # ======================================================
2
  # Federal FOIA Intelligence Search
3
- # HF Reviewer–Safe / Court-Safe Reference Implementation
4
  # ======================================================
5
 
6
- import os, io, zipfile, tempfile, hashlib, base64, time
7
  from datetime import datetime
8
  from urllib.parse import quote_plus
9
  import requests
@@ -13,33 +13,24 @@ from fastapi import FastAPI
13
  from fastapi.staticfiles import StaticFiles
14
  from fastapi.responses import JSONResponse
15
 
16
- # Optional PDF support
17
- PDF_THUMBNAILS_AVAILABLE = False
18
- PDF_TEXT_AVAILABLE = False
19
 
 
20
  try:
21
  from pdf2image import convert_from_bytes
22
  PDF_THUMBNAILS_AVAILABLE = True
23
  except Exception:
24
  pass
25
 
26
- try:
27
- from pdfminer.high_level import extract_text
28
- PDF_TEXT_AVAILABLE = True
29
- except Exception:
30
- pass
31
-
32
- from reportlab.platypus import SimpleDocTemplate, Paragraph, PageBreak
33
- from reportlab.lib.styles import getSampleStyleSheet
34
- from reportlab.lib.pagesizes import LETTER
35
-
36
  # ======================================================
37
- # HARD GOVERNANCE FLAGS (NON-NEGOTIABLE)
38
  # ======================================================
39
 
40
- ENABLE_AI = True # USER-INITIATED ONLY
41
- ENABLE_FAISS_PHASE_4 = False # FORMAL APPROVAL REQUIRED
42
- ENABLE_DOC_LEVEL_APIS = False # CIA/FBI DO NOT CURRENTLY PROVIDE
43
 
44
  # ======================================================
45
  # SESSION STATE (EPHEMERAL)
@@ -49,7 +40,7 @@ LAST_RESULTS = []
49
  AI_APPENDIX = None
50
 
51
  # ======================================================
52
- # CRYPTOGRAPHIC CORE
53
  # ======================================================
54
 
55
  def sha256_text(t: str) -> str:
@@ -57,7 +48,7 @@ def sha256_text(t: str) -> str:
57
 
58
  def provenance_block(payload: str, ai=False) -> str:
59
  return "\n".join([
60
- "Tool-Version: 2.0.0",
61
  f"Generated-UTC: {datetime.utcnow().isoformat()}",
62
  f"Content-SHA256: {sha256_text(payload)}",
63
  "Public-Source-Only: true",
@@ -66,7 +57,7 @@ def provenance_block(payload: str, ai=False) -> str:
66
  ])
67
 
68
  # ======================================================
69
- # FOIA ADAPTERS (LINK-OUT ONLY — ACCURATE)
70
  # ======================================================
71
 
72
  class FOIAAdapter:
@@ -81,7 +72,7 @@ class FOIAAdapter:
81
  "resolved_url": url,
82
  "timestamp": datetime.utcnow().isoformat(),
83
  "is_pdf": False,
84
- "thumbnails": []
85
  }]
86
 
87
  class CIA(FOIAAdapter):
@@ -108,7 +99,7 @@ ALL_ADAPTERS = {
108
  }
109
 
110
  # ======================================================
111
- # PDF DETECTION (SAFE — NO SCRAPING)
112
  # ======================================================
113
 
114
  def resolve_pdf(url):
@@ -157,7 +148,7 @@ def run_search(query, agencies):
157
  return rows, render_cards(), "Search complete."
158
 
159
  # ======================================================
160
- # ASK-AI (STRICTLY GATED)
161
  # ======================================================
162
 
163
  def ask_ai(index: int):
@@ -165,25 +156,25 @@ def ask_ai(index: int):
165
  r = LAST_RESULTS[index]
166
 
167
  if not (ENABLE_AI and r["is_pdf"]):
168
- return "AI is disabled for this result."
169
 
170
- summary = (
171
  "AI Assistive Summary (Non-Authoritative)\n\n"
172
  f"Agency: {r['agency']}\n"
173
  f"Source: {r['resolved_url']}\n\n"
174
- "This summary assists review of a public FOIA document only."
175
  )
176
 
177
  AI_APPENDIX = {
178
- "text": summary,
179
- "hash": sha256_text(summary),
180
- "prov": provenance_block(summary, ai=True)
181
  }
182
 
183
- return summary + "\n\n" + AI_APPENDIX["prov"]
184
 
185
  # ======================================================
186
- # COURT BUNDLE (CM/ECF-READY)
187
  # ======================================================
188
 
189
  def generate_court_bundle():
@@ -202,30 +193,18 @@ def generate_court_bundle():
202
  z.writestr("Exhibit_AI_Appendix.txt", AI_APPENDIX["text"])
203
  z.writestr("Exhibit_AI_Appendix.provenance.txt", AI_APPENDIX["prov"])
204
 
205
- z.writestr("Judicial_Notice.txt", JUDICIAL_NOTICE)
206
- z.writestr("HF_Reviewer_Cover_Letter.txt", HF_REVIEWER_COVER_LETTER)
207
-
208
  return path
209
 
210
  # ======================================================
211
- # STATIC GOVERNANCE TEXT
212
  # ======================================================
213
 
214
- JUDICIAL_NOTICE = """
215
- This system provides navigation to public FOIA reading rooms only.
216
- It does not host, certify, authenticate, or modify records.
217
- Authoritative documents remain with issuing agencies.
218
- """
219
-
220
- HF_REVIEWER_COVER_LETTER = """
221
- This Hugging Face Space is a governance-first reference implementation.
222
-
223
- • Link-out only
224
- • Public FOIA sources only
225
- • AI is opt-in, hashed, and user-initiated
226
- • No document scraping or indexing
227
- • Court-safe by design
228
- """
229
 
230
  # ======================================================
231
  # FASTAPI
@@ -245,33 +224,36 @@ if os.path.exists("governance-site"):
245
  # ======================================================
246
 
247
  CSS = """
248
- button { border-radius:999px !important; }
249
- .tab-nav { position:sticky; top:0; background:#fff; z-index:999; }
 
 
 
250
  """
251
 
252
  def render_cards():
253
  cards = []
254
  for i, r in enumerate(LAST_RESULTS):
255
- thumbs = "".join(
256
- f'<img src="data:image/png;base64,{t}" style="width:120px;border-radius:8px;margin-right:6px;" />'
257
- for t in r["thumbnails"]
 
258
  )
259
- disabled = "" if r["is_pdf"] else "opacity:0.4;pointer-events:none;"
260
  cards.append(f"""
261
- <div style="border:1px solid #ddd;border-radius:16px;padding:16px;margin-bottom:16px;">
262
  <b>{r['agency']}</b><br/>
263
  {r['title']}<br/>
264
- {thumbs}
265
  <div style="margin-top:10px;">
266
  <a href="{r['resolved_url']}" target="_blank">View</a> |
267
- <a href="{r['resolved_url']}" download style="{disabled}">Download</a> |
268
  <a href="{r['resolved_url']}" target="_blank">Share</a>
269
- <button style="background:#1e88e5;color:white;padding:4px 12px;margin-left:10px;border:none;"
270
  onclick="fetch('/ask_ai?index={i}')">
271
  Ask AI
272
  </button>
273
  </div>
274
- <div style="font-size:0.75em;color:#666;margin-top:6px;">
275
  Why am I seeing this? This is a public FOIA reading-room result.
276
  </div>
277
  </div>
@@ -284,18 +266,19 @@ with gr.Blocks(css=CSS) as demo:
284
  with gr.Tab("Search"):
285
  agencies = gr.CheckboxGroup(list(ALL_ADAPTERS.keys()), value=list(ALL_ADAPTERS.keys()))
286
  query = gr.Textbox(placeholder="Search FOIA reading rooms")
287
- table = gr.Dataframe(headers=["Agency", "Title", "URL", "Hash"])
 
 
 
288
  cards = gr.HTML()
289
  status = gr.Textbox()
290
- gr.Button("Search", elem_classes=["primary"]).click(
291
- run_search, [query, agencies], [table, cards, status]
292
- )
293
 
294
  with gr.Tab("Court / Clerk"):
295
  gr.Button("Generate Court Bundle").click(lambda: generate_court_bundle(), None, gr.File())
296
 
297
  with gr.Tab("Governance"):
298
- gr.Markdown(HF_REVIEWER_COVER_LETTER)
299
 
300
  demo.queue()
301
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  # ======================================================
2
  # Federal FOIA Intelligence Search
3
+ # Court-Safe / HF-Reviewer-Safe Reference Implementation
4
  # ======================================================
5
 
6
+ import os, io, zipfile, tempfile, hashlib, base64
7
  from datetime import datetime
8
  from urllib.parse import quote_plus
9
  import requests
 
13
  from fastapi.staticfiles import StaticFiles
14
  from fastapi.responses import JSONResponse
15
 
16
+ # ======================================================
17
+ # OPTIONAL PDF SUPPORT
18
+ # ======================================================
19
 
20
+ PDF_THUMBNAILS_AVAILABLE = False
21
  try:
22
  from pdf2image import convert_from_bytes
23
  PDF_THUMBNAILS_AVAILABLE = True
24
  except Exception:
25
  pass
26
 
 
 
 
 
 
 
 
 
 
 
27
  # ======================================================
28
+ # GOVERNANCE FLAGS (HARD)
29
  # ======================================================
30
 
31
+ ENABLE_AI = True
32
+ ENABLE_FAISS_PHASE_4 = False # FORMAL APPROVAL REQUIRED
33
+ ENABLE_DOC_LEVEL_APIS = False # CIA/FBI DO NOT PROVIDE
34
 
35
  # ======================================================
36
  # SESSION STATE (EPHEMERAL)
 
40
  AI_APPENDIX = None
41
 
42
  # ======================================================
43
+ # CRYPTOGRAPHY
44
  # ======================================================
45
 
46
  def sha256_text(t: str) -> str:
 
48
 
49
  def provenance_block(payload: str, ai=False) -> str:
50
  return "\n".join([
51
+ "Tool-Version: 2.1.0",
52
  f"Generated-UTC: {datetime.utcnow().isoformat()}",
53
  f"Content-SHA256: {sha256_text(payload)}",
54
  "Public-Source-Only: true",
 
57
  ])
58
 
59
  # ======================================================
60
+ # FOIA ADAPTERS (LINK-OUT ONLY)
61
  # ======================================================
62
 
63
  class FOIAAdapter:
 
72
  "resolved_url": url,
73
  "timestamp": datetime.utcnow().isoformat(),
74
  "is_pdf": False,
75
+ "thumbnails": [],
76
  }]
77
 
78
  class CIA(FOIAAdapter):
 
99
  }
100
 
101
  # ======================================================
102
+ # PDF DETECTION (SAFE)
103
  # ======================================================
104
 
105
  def resolve_pdf(url):
 
148
  return rows, render_cards(), "Search complete."
149
 
150
  # ======================================================
151
+ # ASK-AI (OPT-IN)
152
  # ======================================================
153
 
154
  def ask_ai(index: int):
 
156
  r = LAST_RESULTS[index]
157
 
158
  if not (ENABLE_AI and r["is_pdf"]):
159
+ return "AI assistance is unavailable for this result."
160
 
161
+ text = (
162
  "AI Assistive Summary (Non-Authoritative)\n\n"
163
  f"Agency: {r['agency']}\n"
164
  f"Source: {r['resolved_url']}\n\n"
165
+ "This assists review of a public FOIA document only."
166
  )
167
 
168
  AI_APPENDIX = {
169
+ "text": text,
170
+ "hash": sha256_text(text),
171
+ "prov": provenance_block(text, ai=True),
172
  }
173
 
174
+ return text + "\n\n" + AI_APPENDIX["prov"]
175
 
176
  # ======================================================
177
+ # COURT BUNDLE
178
  # ======================================================
179
 
180
  def generate_court_bundle():
 
193
  z.writestr("Exhibit_AI_Appendix.txt", AI_APPENDIX["text"])
194
  z.writestr("Exhibit_AI_Appendix.provenance.txt", AI_APPENDIX["prov"])
195
 
 
 
 
196
  return path
197
 
198
  # ======================================================
199
+ # GOVERNANCE SITE LOADING
200
  # ======================================================
201
 
202
+ def load_governance_md():
203
+ path = "governance-site/index.md"
204
+ if os.path.exists(path):
205
+ with open(path, "r", encoding="utf-8") as f:
206
+ return f.read()
207
+ return "_Governance documentation not found._"
 
 
 
 
 
 
 
 
 
208
 
209
  # ======================================================
210
  # FASTAPI
 
224
  # ======================================================
225
 
226
  CSS = """
227
+ .tab-nav { position: sticky; top: 0; z-index: 999; background: #0f0f0f; }
228
+ @media (max-width: 768px) {
229
+ .desktop-only { display: none !important; }
230
+ }
231
+ button { border-radius: 999px !important; }
232
  """
233
 
234
  def render_cards():
235
  cards = []
236
  for i, r in enumerate(LAST_RESULTS):
237
+ download_html = (
238
+ f'<a href="{r["resolved_url"]}" download>Download</a>'
239
+ if r["is_pdf"]
240
+ else '<span style="opacity:.4">Download</span>'
241
  )
242
+
243
  cards.append(f"""
244
+ <div style="border:1px solid #333;border-radius:18px;padding:16px;margin-bottom:16px;">
245
  <b>{r['agency']}</b><br/>
246
  {r['title']}<br/>
 
247
  <div style="margin-top:10px;">
248
  <a href="{r['resolved_url']}" target="_blank">View</a> |
249
+ {download_html} |
250
  <a href="{r['resolved_url']}" target="_blank">Share</a>
251
+ <button style="background:#1e88e5;color:white;padding:4px 14px;margin-left:10px;border:none;"
252
  onclick="fetch('/ask_ai?index={i}')">
253
  Ask AI
254
  </button>
255
  </div>
256
+ <div style="font-size:.75em;color:#777;margin-top:6px;">
257
  Why am I seeing this? This is a public FOIA reading-room result.
258
  </div>
259
  </div>
 
266
  with gr.Tab("Search"):
267
  agencies = gr.CheckboxGroup(list(ALL_ADAPTERS.keys()), value=list(ALL_ADAPTERS.keys()))
268
  query = gr.Textbox(placeholder="Search FOIA reading rooms")
269
+ table = gr.Dataframe(
270
+ headers=["Agency", "Title", "URL", "Hash"],
271
+ elem_classes=["desktop-only"],
272
+ )
273
  cards = gr.HTML()
274
  status = gr.Textbox()
275
+ gr.Button("Search").click(run_search, [query, agencies], [table, cards, status])
 
 
276
 
277
  with gr.Tab("Court / Clerk"):
278
  gr.Button("Generate Court Bundle").click(lambda: generate_court_bundle(), None, gr.File())
279
 
280
  with gr.Tab("Governance"):
281
+ gr.Markdown(load_governance_md())
282
 
283
  demo.queue()
284
  demo.launch(server_name="0.0.0.0", server_port=7860)