GodsDevProject commited on
Commit
ce2d379
·
verified ·
1 Parent(s): caa6583

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -97
app.py CHANGED
@@ -27,10 +27,12 @@ except Exception:
27
  # CONFIG / FEATURE GATES
28
  # ======================================================
29
 
30
- ENABLE_SEMANTIC = False
 
31
  ENABLE_PDF_THUMBNAILS = True
32
  ENABLE_ENTITY_GRAPHS = True
33
  ENABLE_TIMELINES = True
 
34
 
35
  # ======================================================
36
  # BASE ADAPTER
@@ -45,6 +47,7 @@ class FOIAAdapter:
45
  start = time.time()
46
  url = self.search_url.format(q=quote_plus(query))
47
  latency = round(time.time() - start, 3)
 
48
  return [{
49
  "agency": self.agency,
50
  "title": f"{self.agency} FOIA Search Results",
@@ -86,17 +89,27 @@ class NSA(FOIAAdapter):
86
  agency = "NSA"
87
  search_url = "https://www.nsa.gov/resources/everyone/foia/reading-room/?q={q}"
88
 
89
- LIVE_ADAPTERS = [CIA(), FBI(), DOJ(), DHS(), STATE(), GSA(), NSA()]
 
 
 
 
 
 
 
 
90
 
91
  # ======================================================
92
- # STUB ADAPTERS (NON-EXPORTABLE)
93
  # ======================================================
94
 
95
  class StubAdapter(FOIAAdapter):
96
  is_live = False
 
97
  def __init__(self, agency):
98
  self.agency = agency
99
  self.search_url = ""
 
100
  def search(self, query):
101
  return [{
102
  "agency": self.agency,
@@ -131,6 +144,9 @@ def bluebook_full(r):
131
  f"{r['url']} (retrieved {datetime.utcnow().strftime('%b %d, %Y')})."
132
  )
133
 
 
 
 
134
  # ======================================================
135
  # GLOBAL STATE
136
  # ======================================================
@@ -154,6 +170,7 @@ def run_search(query, include_stubs, semantic_mode):
154
  coverage[r["agency"]] += 1
155
  if r["is_live"]:
156
  LAST_LIVE_RECORDS.append(r)
 
157
  rows.append([
158
  r["agency"],
159
  "LIVE" if r["is_live"] else "STUB",
@@ -169,119 +186,120 @@ def run_search(query, include_stubs, semantic_mode):
169
  if coverage.get(agency, 0) == 0:
170
  gap_md += f"- ❌ **{agency}**: no public results found\n"
171
 
172
- return rows, gap_md, build_pdf_thumbnail_gallery()
173
 
174
  # ======================================================
175
- # PDF THUMBNAIL GALLERY
176
  # ======================================================
177
 
178
- def build_pdf_thumbnail_gallery():
179
- cards = []
180
- for r in LAST_LIVE_RECORDS:
181
- url = r["url"]
182
- if not url.lower().endswith(".pdf"):
183
- continue
184
- cards.append(f"""
185
- <div style="border:1px solid #ddd;border-radius:8px;padding:12px;margin-bottom:16px;">
186
- <b>{r['agency']} — {r['title']}</b><br><br>
187
- <iframe src="{url}" width="100%" height="220"></iframe>
188
- <div style="margin-top:8px;">
189
- <a href="{url}" target="_blank">View</a> |
190
- <a href="{url}" download>Download</a> |
191
- <a href="#" onclick="shareDoc('{url}')">Share</a> |
192
- <a href="#" onclick="askAI('{r['agency']}', '{r['title']}', '{url}')">Ask AI</a>
193
- </div>
194
- </div>
195
- """)
196
- return "".join(cards) if cards else "<i>No PDF documents found.</i>"
197
 
198
  # ======================================================
199
- # JOURNALIST ZIP EXPORT (LINKS + CITATIONS ONLY)
200
  # ======================================================
201
 
202
- def generate_journalist_zip():
203
- if not LAST_LIVE_RECORDS:
204
- return None
205
-
206
- mem = io.BytesIO()
207
- with zipfile.ZipFile(mem, "w", zipfile.ZIP_DEFLATED) as z:
208
- z.writestr(
209
- "README.txt",
210
- "Public FOIA sources only.\n"
211
- "This ZIP contains links and citations only.\n"
212
- "No documents are hosted or redistributed.\n"
213
- )
214
-
215
- z.writestr(
216
- "citations.txt",
217
- "\n\n".join(bluebook_full(r) for r in LAST_LIVE_RECORDS)
218
- )
219
-
220
- csv = "agency,title,url,retrieved\n"
221
- for r in LAST_LIVE_RECORDS:
222
- csv += f"{r['agency']},{r['title']},{r['url']},{r['timestamp']}\n"
223
- z.writestr("links.csv", csv)
224
 
225
- pdf_links = "\n".join(
226
- r["url"] for r in LAST_LIVE_RECORDS if r["url"].lower().endswith(".pdf")
227
- )
228
- z.writestr("pdf_links.txt", pdf_links)
229
 
230
- mem.seek(0)
231
- return mem
 
 
 
 
 
 
 
 
 
 
 
232
 
233
  # ======================================================
234
- # PUBLIC SHAREABLE RESULT PAGE (STATIC HTML)
235
  # ======================================================
236
 
237
- def generate_share_page():
238
- if not LAST_LIVE_RECORDS:
239
- return None
 
 
 
 
 
 
 
 
240
 
241
- html = """
242
- <html><head><title>FOIA Search Results</title></head><body>
243
- <h1>Federal FOIA Search Results</h1>
244
- <p>Public electronic reading rooms only.</p><hr>
 
 
 
 
 
 
 
 
 
 
 
245
  """
246
 
247
- for r in LAST_LIVE_RECORDS:
248
- html += f"""
249
- <h3>{r['agency']} — {r['title']}</h3>
250
- <p><a href="{r['url']}" target="_blank">{r['url']}</a></p>
251
- <p><b>Citation:</b> {bluebook_full(r)}</p>
252
- <p><b>Hash:</b> {citation_hash(r)}</p>
253
- <hr>
254
- """
255
 
256
- html += "</body></html>"
 
 
 
 
 
 
 
 
 
 
 
 
257
 
258
- buf = io.BytesIO(html.encode("utf-8"))
259
- return buf
 
 
 
 
 
 
 
260
 
261
  # ======================================================
262
  # UI
263
  # ======================================================
264
 
265
- JS_HELPERS = """
266
- <script>
267
- function shareDoc(url) {
268
- if (navigator.share) {
269
- navigator.share({ title: "FOIA Document", url: url });
270
- } else {
271
- navigator.clipboard.writeText(url);
272
- alert("Link copied to clipboard");
273
- }
274
- }
275
- function askAI(a,t,u){
276
- alert("AI analysis placeholder for: " + t);
277
- }
278
- </script>
279
- """
280
-
281
  with gr.Blocks(title="Federal FOIA Intelligence Search") as app:
282
- gr.HTML(JS_HELPERS)
 
 
283
 
284
- gr.Markdown("# 🏛️ Federal FOIA Intelligence Search")
 
 
285
 
286
  query = gr.Textbox(label="Search FOIA Libraries")
287
  include_stubs = gr.Checkbox(label="Include Extended Coverage (STUB)", value=False)
@@ -295,19 +313,34 @@ with gr.Blocks(title="Federal FOIA Intelligence Search") as app:
295
  )
296
 
297
  gap_panel = gr.Markdown()
298
- pdf_gallery = gr.HTML()
 
299
 
300
  search_btn.click(
301
  run_search,
302
  inputs=[query, include_stubs, semantic_toggle],
303
- outputs=[results, gap_panel, pdf_gallery]
 
 
 
 
 
 
 
 
 
 
 
 
 
304
  )
305
 
306
- gr.Markdown("## 📄 PDF Document Previews")
307
- pdf_gallery.render()
 
308
 
309
- gr.Markdown("## 🗂 Journalist Tools")
310
- gr.Button("Download Journalist ZIP").click(generate_journalist_zip, outputs=gr.File())
311
- gr.Button("Generate Shareable Result Page").click(generate_share_page, outputs=gr.File())
312
 
313
  app.launch()
 
27
  # CONFIG / FEATURE GATES
28
  # ======================================================
29
 
30
+ ENABLE_SEMANTIC = False # user opt-in only
31
+ ENABLE_PDF_EXPORT = True # LIVE results only
32
  ENABLE_PDF_THUMBNAILS = True
33
  ENABLE_ENTITY_GRAPHS = True
34
  ENABLE_TIMELINES = True
35
+ ENABLE_JOURNALIST_ZIP = True
36
 
37
  # ======================================================
38
  # BASE ADAPTER
 
47
  start = time.time()
48
  url = self.search_url.format(q=quote_plus(query))
49
  latency = round(time.time() - start, 3)
50
+
51
  return [{
52
  "agency": self.agency,
53
  "title": f"{self.agency} FOIA Search Results",
 
89
  agency = "NSA"
90
  search_url = "https://www.nsa.gov/resources/everyone/foia/reading-room/?q={q}"
91
 
92
+ LIVE_ADAPTERS = [
93
+ CIA(),
94
+ FBI(),
95
+ DOJ(),
96
+ DHS(),
97
+ STATE(),
98
+ GSA(),
99
+ NSA()
100
+ ]
101
 
102
  # ======================================================
103
+ # STUB ADAPTERS (CLEARLY LABELED)
104
  # ======================================================
105
 
106
  class StubAdapter(FOIAAdapter):
107
  is_live = False
108
+
109
  def __init__(self, agency):
110
  self.agency = agency
111
  self.search_url = ""
112
+
113
  def search(self, query):
114
  return [{
115
  "agency": self.agency,
 
144
  f"{r['url']} (retrieved {datetime.utcnow().strftime('%b %d, %Y')})."
145
  )
146
 
147
+ def bluebook_short(r):
148
+ return f"{r['agency']}, FOIA Reading Room, {r['url']}."
149
+
150
  # ======================================================
151
  # GLOBAL STATE
152
  # ======================================================
 
170
  coverage[r["agency"]] += 1
171
  if r["is_live"]:
172
  LAST_LIVE_RECORDS.append(r)
173
+
174
  rows.append([
175
  r["agency"],
176
  "LIVE" if r["is_live"] else "STUB",
 
186
  if coverage.get(agency, 0) == 0:
187
  gap_md += f"- ❌ **{agency}**: no public results found\n"
188
 
189
+ return rows, gap_md
190
 
191
  # ======================================================
192
+ # SEMANTIC STATUS
193
  # ======================================================
194
 
195
+ def semantic_status(enabled):
196
+ if enabled and not FAISS_AVAILABLE:
197
+ return "⚠ Semantic mode unavailable (optional dependencies missing)"
198
+ if enabled:
199
+ return "🧠 Semantic mode enabled (metadata only)"
200
+ return "Semantic mode off"
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
202
  # ======================================================
203
+ # FOIA REQUEST GENERATOR (PDF)
204
  # ======================================================
205
 
206
+ def generate_foia_request(requester, description):
207
+ buffer = io.BytesIO()
208
+ doc = SimpleDocTemplate(buffer)
209
+ styles = getSampleStyleSheet()
210
+ story = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
 
212
+ story.append(Paragraph("<b>Freedom of Information Act Request</b>", styles["Title"]))
213
+ story.append(Spacer(1, 12))
 
 
214
 
215
+ story.append(Paragraph(f"<b>Requester:</b> {requester}", styles["Normal"]))
216
+ story.append(Spacer(1, 8))
217
+
218
+ story.append(Paragraph("<b>Description of Records Requested:</b>", styles["Normal"]))
219
+ story.append(Paragraph(description, styles["Normal"]))
220
+ story.append(Spacer(1, 12))
221
+
222
+ agencies = ", ".join(sorted({r["agency"] for r in LAST_LIVE_RECORDS}))
223
+ story.append(Paragraph(f"<b>Agencies Referenced:</b> {agencies}", styles["Normal"]))
224
+
225
+ doc.build(story)
226
+ buffer.seek(0)
227
+ return buffer
228
 
229
  # ======================================================
230
+ # ENTITY GRAPH + TIMELINE
231
  # ======================================================
232
 
233
+ def build_entity_graph():
234
+ domains = Counter(urlparse(r["url"]).netloc for r in LAST_LIVE_RECORDS if r["url"])
235
+ return go.Figure([go.Bar(x=list(domains.keys()), y=list(domains.values()))])
236
+
237
+ def build_timeline():
238
+ dates = Counter(r["timestamp"][:10] for r in LAST_LIVE_RECORDS if r["timestamp"])
239
+ return go.Figure([go.Bar(x=list(dates.keys()), y=list(dates.values()))])
240
+
241
+ # ======================================================
242
+ # PDF PREVIEW + ACTION BUTTONS
243
+ # ======================================================
244
 
245
+ def preview_selected(row):
246
+ if not row:
247
+ return "<i>Select a result</i>"
248
+
249
+ url = row[3]
250
+ if not url:
251
+ return "<i>No preview available (STUB)</i>"
252
+
253
+ buttons = f"""
254
+ <div style="margin-bottom:8px">
255
+ <a href="{url}" target="_blank">View</a> |
256
+ <a href="{url}" download>Download</a> |
257
+ <a href="{url}" target="_blank">Share</a> |
258
+ <i>Ask AI (link-out only)</i>
259
+ </div>
260
  """
261
 
262
+ if url.lower().endswith(".pdf"):
263
+ return buttons + f"<iframe src='{url}' width='100%' height='520'></iframe>"
264
+
265
+ return buttons + f"<a href='{url}' target='_blank'>Open link</a>"
 
 
 
 
266
 
267
+ # ======================================================
268
+ # JOURNALIST ZIP EXPORT
269
+ # ======================================================
270
+
271
+ def journalist_zip():
272
+ buffer = io.BytesIO()
273
+ with zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED) as z:
274
+ citations = []
275
+ links = []
276
+
277
+ for r in LAST_LIVE_RECORDS:
278
+ citations.append(bluebook_full(r))
279
+ links.append(f"{r['agency']},{r['title']},{r['url']},{r['timestamp']}")
280
 
281
+ z.writestr("README.txt",
282
+ "Public FOIA links only.\nNo documents are included.\n")
283
+ z.writestr("citations.txt", "\n".join(citations))
284
+ z.writestr("links.csv", "agency,title,url,timestamp\n" + "\n".join(links))
285
+ z.writestr("pdf_links.txt",
286
+ "\n".join(r["url"] for r in LAST_LIVE_RECORDS if r["url"].lower().endswith(".pdf")))
287
+
288
+ buffer.seek(0)
289
+ return buffer
290
 
291
  # ======================================================
292
  # UI
293
  # ======================================================
294
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
  with gr.Blocks(title="Federal FOIA Intelligence Search") as app:
296
+ gr.Markdown("""
297
+ # 🏛️ Federal FOIA Intelligence Search
298
+ **Public Electronic Reading Rooms Only**
299
 
300
+ LIVE results are exportable
301
+ ⚠ STUB results are informational only
302
+ """)
303
 
304
  query = gr.Textbox(label="Search FOIA Libraries")
305
  include_stubs = gr.Checkbox(label="Include Extended Coverage (STUB)", value=False)
 
313
  )
314
 
315
  gap_panel = gr.Markdown()
316
+ preview_panel = gr.HTML()
317
+ semantic_status_md = gr.Markdown()
318
 
319
  search_btn.click(
320
  run_search,
321
  inputs=[query, include_stubs, semantic_toggle],
322
+ outputs=[results, gap_panel]
323
+ )
324
+
325
+ semantic_toggle.change(semantic_status, semantic_toggle, semantic_status_md)
326
+
327
+ results.select(lambda e: preview_selected(e.value), outputs=preview_panel)
328
+
329
+ gr.Markdown("## FOIA Request Generator")
330
+ requester = gr.Textbox(label="Your Name / Organization")
331
+ description = gr.Textbox(label="Describe the records requested", lines=4)
332
+ gr.Button("Generate FOIA Request PDF").click(
333
+ generate_foia_request,
334
+ inputs=[requester, description],
335
+ outputs=gr.File()
336
  )
337
 
338
+ gr.Markdown("## Analysis Tools")
339
+ gr.Button("Show Entity Graph").click(build_entity_graph, outputs=gr.Plot())
340
+ gr.Button("Show Timeline").click(build_timeline, outputs=gr.Plot())
341
 
342
+ if ENABLE_JOURNALIST_ZIP:
343
+ gr.Markdown("## Journalist Export")
344
+ gr.Button("Download Journalist ZIP").click(journalist_zip, outputs=gr.File())
345
 
346
  app.launch()