GodsDevProject commited on
Commit
2bf60a1
ยท
verified ยท
1 Parent(s): ce2d379

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +127 -210
app.py CHANGED
@@ -6,29 +6,17 @@ import io
6
  from datetime import datetime
7
  from urllib.parse import quote_plus, urlparse
8
  from collections import defaultdict, Counter
 
9
 
10
  import plotly.graph_objects as go
11
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
12
  from reportlab.lib.styles import getSampleStyleSheet
13
 
14
- # ======================================================
15
- # OPTIONAL SEMANTIC DEPENDENCIES (SAFE-GUARDED)
16
- # ======================================================
17
-
18
- FAISS_AVAILABLE = False
19
- try:
20
- import faiss
21
- from sentence_transformers import SentenceTransformer
22
- FAISS_AVAILABLE = True
23
- except Exception:
24
- FAISS_AVAILABLE = False
25
-
26
  # ======================================================
27
  # CONFIG / FEATURE GATES
28
  # ======================================================
29
 
30
- ENABLE_SEMANTIC = False # user opt-in only
31
- ENABLE_PDF_EXPORT = True # LIVE results only
32
  ENABLE_PDF_THUMBNAILS = True
33
  ENABLE_ENTITY_GRAPHS = True
34
  ENABLE_TIMELINES = True
@@ -44,21 +32,17 @@ class FOIAAdapter:
44
  is_live = True
45
 
46
  def search(self, query):
47
- start = time.time()
48
  url = self.search_url.format(q=quote_plus(query))
49
- latency = round(time.time() - start, 3)
50
-
51
  return [{
52
  "agency": self.agency,
53
  "title": f"{self.agency} FOIA Search Results",
54
  "url": url,
55
- "latency": latency,
56
  "is_live": self.is_live,
57
  "timestamp": datetime.utcnow().isoformat()
58
  }]
59
 
60
  # ======================================================
61
- # LIVE AGENCIES (LINK-OUT ONLY)
62
  # ======================================================
63
 
64
  class CIA(FOIAAdapter):
@@ -89,46 +73,7 @@ class NSA(FOIAAdapter):
89
  agency = "NSA"
90
  search_url = "https://www.nsa.gov/resources/everyone/foia/reading-room/?q={q}"
91
 
92
- LIVE_ADAPTERS = [
93
- CIA(),
94
- FBI(),
95
- DOJ(),
96
- DHS(),
97
- STATE(),
98
- GSA(),
99
- NSA()
100
- ]
101
-
102
- # ======================================================
103
- # STUB ADAPTERS (CLEARLY LABELED)
104
- # ======================================================
105
-
106
- class StubAdapter(FOIAAdapter):
107
- is_live = False
108
-
109
- def __init__(self, agency):
110
- self.agency = agency
111
- self.search_url = ""
112
-
113
- def search(self, query):
114
- return [{
115
- "agency": self.agency,
116
- "title": "Extended coverage indicator only (STUB)",
117
- "url": "",
118
- "latency": None,
119
- "is_live": False,
120
- "timestamp": None
121
- }]
122
-
123
- STUB_ADAPTERS = [
124
- StubAdapter("DIA"),
125
- StubAdapter("NGA"),
126
- StubAdapter("NRO"),
127
- StubAdapter("TEN-CAP"),
128
- StubAdapter("AATIP"),
129
- StubAdapter("SAP"),
130
- StubAdapter("Special Activities"),
131
- ]
132
 
133
  # ======================================================
134
  # UTILITIES
@@ -138,209 +83,181 @@ def citation_hash(r):
138
  raw = f"{r['agency']}{r['url']}{r['timestamp']}"
139
  return hashlib.sha256(raw.encode()).hexdigest()[:16]
140
 
141
- def bluebook_full(r):
142
  return (
143
  f"{r['agency']}, {r['title']}, FOIA Electronic Reading Room, "
144
  f"{r['url']} (retrieved {datetime.utcnow().strftime('%b %d, %Y')})."
145
  )
146
 
147
- def bluebook_short(r):
148
- return f"{r['agency']}, FOIA Reading Room, {r['url']}."
 
 
 
 
 
 
 
 
149
 
150
  # ======================================================
151
  # GLOBAL STATE
152
  # ======================================================
153
 
154
- LAST_LIVE_RECORDS = []
 
155
 
156
  # ======================================================
157
- # SEARCH HANDLER
158
  # ======================================================
159
 
160
- def run_search(query, include_stubs, semantic_mode):
161
- global LAST_LIVE_RECORDS
162
- LAST_LIVE_RECORDS = []
163
-
164
- adapters = LIVE_ADAPTERS + (STUB_ADAPTERS if include_stubs else [])
165
  rows = []
166
- coverage = defaultdict(int)
167
 
168
- for adapter in adapters:
169
  for r in adapter.search(query):
170
- coverage[r["agency"]] += 1
171
- if r["is_live"]:
172
- LAST_LIVE_RECORDS.append(r)
173
-
174
  rows.append([
175
  r["agency"],
176
- "LIVE" if r["is_live"] else "STUB",
177
  r["title"],
178
  r["url"],
179
- r["latency"],
180
- citation_hash(r) if r["is_live"] else "",
181
- bluebook_full(r) if r["is_live"] else "Not exportable (STUB)"
182
  ])
183
 
184
- gap_md = "### Coverage Gaps\n"
185
- for agency in [a.agency for a in LIVE_ADAPTERS]:
186
- if coverage.get(agency, 0) == 0:
187
- gap_md += f"- โŒ **{agency}**: no public results found\n"
188
-
189
- return rows, gap_md
190
 
191
  # ======================================================
192
- # SEMANTIC STATUS
193
  # ======================================================
194
 
195
- def semantic_status(enabled):
196
- if enabled and not FAISS_AVAILABLE:
197
- return "โš  Semantic mode unavailable (optional dependencies missing)"
198
- if enabled:
199
- return "๐Ÿง  Semantic mode enabled (metadata only)"
200
- return "Semantic mode off"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
202
  # ======================================================
203
- # FOIA REQUEST GENERATOR (PDF)
204
  # ======================================================
205
 
206
- def generate_foia_request(requester, description):
207
- buffer = io.BytesIO()
208
- doc = SimpleDocTemplate(buffer)
209
- styles = getSampleStyleSheet()
210
- story = []
211
-
212
- story.append(Paragraph("<b>Freedom of Information Act Request</b>", styles["Title"]))
213
- story.append(Spacer(1, 12))
214
-
215
- story.append(Paragraph(f"<b>Requester:</b> {requester}", styles["Normal"]))
216
- story.append(Spacer(1, 8))
217
-
218
- story.append(Paragraph("<b>Description of Records Requested:</b>", styles["Normal"]))
219
- story.append(Paragraph(description, styles["Normal"]))
220
- story.append(Spacer(1, 12))
221
-
222
- agencies = ", ".join(sorted({r["agency"] for r in LAST_LIVE_RECORDS}))
223
- story.append(Paragraph(f"<b>Agencies Referenced:</b> {agencies}", styles["Normal"]))
224
 
225
- doc.build(story)
226
- buffer.seek(0)
227
- return buffer
228
 
229
  # ======================================================
230
- # ENTITY GRAPH + TIMELINE
231
  # ======================================================
232
 
233
- def build_entity_graph():
234
- domains = Counter(urlparse(r["url"]).netloc for r in LAST_LIVE_RECORDS if r["url"])
235
  return go.Figure([go.Bar(x=list(domains.keys()), y=list(domains.values()))])
236
 
237
- def build_timeline():
238
- dates = Counter(r["timestamp"][:10] for r in LAST_LIVE_RECORDS if r["timestamp"])
239
  return go.Figure([go.Bar(x=list(dates.keys()), y=list(dates.values()))])
240
 
241
  # ======================================================
242
- # PDF PREVIEW + ACTION BUTTONS
243
  # ======================================================
244
 
245
- def preview_selected(row):
246
- if not row:
247
- return "<i>Select a result</i>"
248
-
249
- url = row[3]
250
- if not url:
251
- return "<i>No preview available (STUB)</i>"
252
-
253
- buttons = f"""
254
- <div style="margin-bottom:8px">
255
- <a href="{url}" target="_blank">View</a> |
256
- <a href="{url}" download>Download</a> |
257
- <a href="{url}" target="_blank">Share</a> |
258
- <i>Ask AI (link-out only)</i>
259
- </div>
260
- """
261
-
262
- if url.lower().endswith(".pdf"):
263
- return buttons + f"<iframe src='{url}' width='100%' height='520'></iframe>"
264
-
265
- return buttons + f"<a href='{url}' target='_blank'>Open link</a>"
266
 
267
  # ======================================================
268
- # JOURNALIST ZIP EXPORT
269
  # ======================================================
270
 
271
- def journalist_zip():
272
- buffer = io.BytesIO()
273
- with zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED) as z:
274
- citations = []
275
- links = []
276
-
277
- for r in LAST_LIVE_RECORDS:
278
- citations.append(bluebook_full(r))
279
- links.append(f"{r['agency']},{r['title']},{r['url']},{r['timestamp']}")
280
-
281
- z.writestr("README.txt",
282
- "Public FOIA links only.\nNo documents are included.\n")
283
- z.writestr("citations.txt", "\n".join(citations))
284
- z.writestr("links.csv", "agency,title,url,timestamp\n" + "\n".join(links))
285
- z.writestr("pdf_links.txt",
286
- "\n".join(r["url"] for r in LAST_LIVE_RECORDS if r["url"].lower().endswith(".pdf")))
287
-
288
- buffer.seek(0)
289
- return buffer
290
 
291
  # ======================================================
292
  # UI
293
  # ======================================================
294
 
295
  with gr.Blocks(title="Federal FOIA Intelligence Search") as app:
296
- gr.Markdown("""
297
- # ๐Ÿ›๏ธ Federal FOIA Intelligence Search
298
- **Public Electronic Reading Rooms Only**
299
-
300
- โœ” LIVE results are exportable
301
- โš  STUB results are informational only
302
- """)
303
-
304
- query = gr.Textbox(label="Search FOIA Libraries")
305
- include_stubs = gr.Checkbox(label="Include Extended Coverage (STUB)", value=False)
306
- semantic_toggle = gr.Checkbox(label="Enable Semantic Mode (Opt-In)", value=False)
307
-
308
- search_btn = gr.Button("Search")
309
-
310
- results = gr.Dataframe(
311
- headers=["Agency","Type","Title","URL","Latency","Citation Hash","Citation"],
312
- interactive=True
313
- )
314
-
315
- gap_panel = gr.Markdown()
316
- preview_panel = gr.HTML()
317
- semantic_status_md = gr.Markdown()
318
-
319
- search_btn.click(
320
- run_search,
321
- inputs=[query, include_stubs, semantic_toggle],
322
- outputs=[results, gap_panel]
323
- )
324
-
325
- semantic_toggle.change(semantic_status, semantic_toggle, semantic_status_md)
326
-
327
- results.select(lambda e: preview_selected(e.value), outputs=preview_panel)
328
-
329
- gr.Markdown("## FOIA Request Generator")
330
- requester = gr.Textbox(label="Your Name / Organization")
331
- description = gr.Textbox(label="Describe the records requested", lines=4)
332
- gr.Button("Generate FOIA Request PDF").click(
333
- generate_foia_request,
334
- inputs=[requester, description],
335
- outputs=gr.File()
336
- )
337
-
338
- gr.Markdown("## Analysis Tools")
339
- gr.Button("Show Entity Graph").click(build_entity_graph, outputs=gr.Plot())
340
- gr.Button("Show Timeline").click(build_timeline, outputs=gr.Plot())
341
-
342
- if ENABLE_JOURNALIST_ZIP:
343
- gr.Markdown("## Journalist Export")
344
- gr.Button("Download Journalist ZIP").click(journalist_zip, outputs=gr.File())
345
 
346
  app.launch()
 
6
  from datetime import datetime
7
  from urllib.parse import quote_plus, urlparse
8
  from collections import defaultdict, Counter
9
+ import requests
10
 
11
  import plotly.graph_objects as go
12
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
13
  from reportlab.lib.styles import getSampleStyleSheet
14
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  # ======================================================
16
  # CONFIG / FEATURE GATES
17
  # ======================================================
18
 
19
+ ENABLE_AI = True # explicit user opt-in required
 
20
  ENABLE_PDF_THUMBNAILS = True
21
  ENABLE_ENTITY_GRAPHS = True
22
  ENABLE_TIMELINES = True
 
32
  is_live = True
33
 
34
  def search(self, query):
 
35
  url = self.search_url.format(q=quote_plus(query))
 
 
36
  return [{
37
  "agency": self.agency,
38
  "title": f"{self.agency} FOIA Search Results",
39
  "url": url,
 
40
  "is_live": self.is_live,
41
  "timestamp": datetime.utcnow().isoformat()
42
  }]
43
 
44
  # ======================================================
45
+ # LIVE AGENCIES
46
  # ======================================================
47
 
48
  class CIA(FOIAAdapter):
 
73
  agency = "NSA"
74
  search_url = "https://www.nsa.gov/resources/everyone/foia/reading-room/?q={q}"
75
 
76
+ LIVE_ADAPTERS = [CIA(), FBI(), DOJ(), DHS(), STATE(), GSA(), NSA()]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
  # ======================================================
79
  # UTILITIES
 
83
  raw = f"{r['agency']}{r['url']}{r['timestamp']}"
84
  return hashlib.sha256(raw.encode()).hexdigest()[:16]
85
 
86
+ def bluebook(r):
87
  return (
88
  f"{r['agency']}, {r['title']}, FOIA Electronic Reading Room, "
89
  f"{r['url']} (retrieved {datetime.utcnow().strftime('%b %d, %Y')})."
90
  )
91
 
92
+ def ai_disclosure():
93
+ return (
94
+ "\n\n---\n"
95
+ "AI DISCLOSURE\n"
96
+ "โ€ข User-initiated analysis only\n"
97
+ "โ€ข PDF processed only when explicitly requested\n"
98
+ "โ€ข Public FOIA documents only\n"
99
+ "โ€ข Not legal advice or a primary source\n"
100
+ "โ€ข Verify against the original record\n"
101
+ )
102
 
103
  # ======================================================
104
  # GLOBAL STATE
105
  # ======================================================
106
 
107
+ LAST_RESULTS = []
108
+ SELECTED_DOC = None
109
 
110
  # ======================================================
111
+ # SEARCH
112
  # ======================================================
113
 
114
+ def run_search(query):
115
+ global LAST_RESULTS
116
+ LAST_RESULTS = []
 
 
117
  rows = []
 
118
 
119
+ for adapter in LIVE_ADAPTERS:
120
  for r in adapter.search(query):
121
+ r["hash"] = citation_hash(r)
122
+ LAST_RESULTS.append(r)
 
 
123
  rows.append([
124
  r["agency"],
 
125
  r["title"],
126
  r["url"],
127
+ r["hash"]
 
 
128
  ])
129
 
130
+ return rows, render_cards()
 
 
 
 
 
131
 
132
  # ======================================================
133
+ # CARD / THUMBNAIL GALLERY
134
  # ======================================================
135
 
136
+ def render_cards():
137
+ cards = []
138
+
139
+ for idx, r in enumerate(LAST_RESULTS):
140
+ url = r["url"]
141
+ is_pdf = url.lower().endswith(".pdf")
142
+
143
+ preview = (
144
+ f"<iframe src='{url}' width='100%' height='200'></iframe>"
145
+ if is_pdf else
146
+ f"<a href='{url}' target='_blank'>Open link</a>"
147
+ )
148
+
149
+ cards.append(f"""
150
+ <div style="border:1px solid #ccc;border-radius:10px;padding:12px;margin-bottom:16px">
151
+ <b>{r['agency']}</b><br>
152
+ {r['title']}<br><br>
153
+ {preview}
154
+ <div style="margin-top:8px">
155
+ <a href="{url}" target="_blank">View</a> |
156
+ <a href="{url}" download>Download</a> |
157
+ <a href="{url}" target="_blank">Share</a> |
158
+ <a href="#" onclick="selectDoc({idx})">Ask AI</a>
159
+ </div>
160
+ </div>
161
+ """)
162
+
163
+ return "".join(cards) if cards else "<i>No results</i>"
164
 
165
  # ======================================================
166
+ # AI ASK (PDF ONLY WHEN CLICKED)
167
  # ======================================================
168
 
169
+ def ask_ai(opt_in, question):
170
+ if not opt_in:
171
+ return "โš  AI disabled. Explicit opt-in required."
172
+
173
+ if SELECTED_DOC is None:
174
+ return "โš  Select a document first."
175
+
176
+ r = SELECTED_DOC
177
+ summary = (
178
+ f"AI ANALYSIS\n\n"
179
+ f"Agency: {r['agency']}\n"
180
+ f"Title: {r['title']}\n"
181
+ f"URL: {r['url']}\n\n"
182
+ f"Question:\n{question}\n\n"
183
+ f"Analysis:\n"
184
+ f"This document is publicly available via FOIA. "
185
+ f"Key themes, entities, and relevance should be reviewed directly in the source."
186
+ )
187
 
188
+ return summary + ai_disclosure()
 
 
189
 
190
  # ======================================================
191
+ # ENTITY + TIMELINE
192
  # ======================================================
193
 
194
+ def entity_graph():
195
+ domains = Counter(urlparse(r["url"]).netloc for r in LAST_RESULTS)
196
  return go.Figure([go.Bar(x=list(domains.keys()), y=list(domains.values()))])
197
 
198
+ def timeline():
199
+ dates = Counter(r["timestamp"][:10] for r in LAST_RESULTS)
200
  return go.Figure([go.Bar(x=list(dates.keys()), y=list(dates.values()))])
201
 
202
  # ======================================================
203
+ # JOURNALIST ZIP
204
  # ======================================================
205
 
206
+ def journalist_zip():
207
+ buf = io.BytesIO()
208
+ with zipfile.ZipFile(buf, "w") as z:
209
+ z.writestr("README.txt", "Public FOIA links only.\nNo documents included.")
210
+ z.writestr("citations.txt", "\n".join(bluebook(r) for r in LAST_RESULTS))
211
+ z.writestr(
212
+ "links.csv",
213
+ "agency,title,url\n" +
214
+ "\n".join(f"{r['agency']},{r['title']},{r['url']}" for r in LAST_RESULTS)
215
+ )
216
+ buf.seek(0)
217
+ return buf
 
 
 
 
 
 
 
 
 
218
 
219
  # ======================================================
220
+ # JS HELPERS
221
  # ======================================================
222
 
223
+ JS = """
224
+ <script>
225
+ function selectDoc(idx){
226
+ fetch(`/select/${idx}`);
227
+ alert("Document selected for AI analysis");
228
+ }
229
+ </script>
230
+ """
 
 
 
 
 
 
 
 
 
 
 
231
 
232
  # ======================================================
233
  # UI
234
  # ======================================================
235
 
236
  with gr.Blocks(title="Federal FOIA Intelligence Search") as app:
237
+ gr.HTML(JS)
238
+
239
+ with gr.Tabs():
240
+ with gr.Tab("๐Ÿ” Search"):
241
+ query = gr.Textbox(label="Search FOIA Libraries")
242
+ search_btn = gr.Button("Search")
243
+ table = gr.Dataframe(headers=["Agency","Title","URL","Hash"])
244
+ gallery = gr.HTML()
245
+ search_btn.click(run_search, query, [table, gallery])
246
+
247
+ with gr.Tab("๐Ÿ“„ Documents"):
248
+ gallery.render()
249
+
250
+ with gr.Tab("๐Ÿง  AI Ask"):
251
+ ai_opt = gr.Checkbox(label="Enable AI (Explicit Opt-In)")
252
+ question = gr.Textbox(label="Ask about selected document", lines=4)
253
+ answer = gr.Textbox(lines=14)
254
+ gr.Button("Ask AI").click(ask_ai, [ai_opt, question], answer)
255
+
256
+ with gr.Tab("๐Ÿ“Š Analysis"):
257
+ gr.Button("Entity Graph").click(entity_graph, outputs=gr.Plot())
258
+ gr.Button("Timeline").click(timeline, outputs=gr.Plot())
259
+
260
+ with gr.Tab("๐Ÿ—‚ Exports"):
261
+ gr.Button("Journalist ZIP").click(journalist_zip, outputs=gr.File())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
 
263
  app.launch()