GodsDevProject commited on
Commit
6a02a5b
ยท
verified ยท
1 Parent(s): ef5f53e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +276 -60
app.py CHANGED
@@ -1,25 +1,44 @@
1
  import gradio as gr
 
2
  import hashlib
3
- import io
4
  import zipfile
 
 
5
  from datetime import datetime
6
  from urllib.parse import quote_plus, urlparse
7
- from collections import Counter
 
8
 
9
  import plotly.graph_objects as go
 
 
 
 
 
 
10
 
11
- from bluebook import bluebook_full
12
- from appendix import build_litigation_appendix
 
 
 
 
13
 
14
  # ======================================================
15
- # CONFIG
16
  # ======================================================
17
 
18
- ENABLE_AI = True
19
- ENABLE_PUBLIC_SHARE = True
 
 
 
 
 
 
20
 
21
  # ======================================================
22
- # ADAPTER
23
  # ======================================================
24
 
25
  class FOIAAdapter:
@@ -27,13 +46,22 @@ class FOIAAdapter:
27
  search_url = ""
28
 
29
  def search(self, query):
 
 
 
 
30
  return [{
31
  "agency": self.agency,
32
- "title": f"{self.agency} FOIA Search Results",
33
- "url": self.search_url.format(q=quote_plus(query)),
34
- "timestamp": datetime.utcnow().isoformat()
 
35
  }]
36
 
 
 
 
 
37
  class CIA(FOIAAdapter):
38
  agency = "CIA"
39
  search_url = "https://www.cia.gov/readingroom/search/site/{q}"
@@ -42,110 +70,298 @@ class FBI(FOIAAdapter):
42
  agency = "FBI"
43
  search_url = "https://vault.fbi.gov/search?SearchableText={q}"
44
 
45
- LIVE = [CIA(), FBI()]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  # ======================================================
48
- # STATE
49
  # ======================================================
50
 
51
- RESULTS = []
52
- SHARES = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  # ======================================================
55
  # SEARCH
56
  # ======================================================
57
 
58
  def run_search(query):
59
- global RESULTS
60
- RESULTS = []
61
  rows = []
62
 
63
- for a in LIVE:
64
- for r in a.search(query):
65
- r["hash"] = hashlib.sha256(r["url"].encode()).hexdigest()[:16]
66
- RESULTS.append(r)
67
- rows.append([r["agency"], r["title"], r["url"], r["hash"]])
 
 
 
 
 
 
68
 
69
  return rows, render_cards()
70
 
71
  # ======================================================
72
- # CARDS
73
  # ======================================================
74
 
75
  def render_cards():
76
  cards = []
77
- for i, r in enumerate(RESULTS):
 
 
 
 
 
 
 
 
 
 
 
78
  cards.append(f"""
79
- <div style="border:1px solid #ddd;border-radius:12px;padding:14px;margin-bottom:16px">
80
- <b>{r['agency']}</b><br>
81
- {r['title']}<br><br>
82
- <a href="{r['url']}" target="_blank">View</a> |
83
- <a href="{r['url']}" download>Download</a> |
84
- <a href="#" onclick="share({i})">Share</a> |
85
- <i>Ask AI (opt-in)</i>
 
 
 
 
86
  </div>
87
  """)
88
- return "".join(cards)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  # ======================================================
91
- # SHARE PAGE
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  # ======================================================
93
 
94
  def create_share():
95
- sid = hashlib.sha256(str(RESULTS).encode()).hexdigest()[:12]
96
- SHARES[sid] = RESULTS.copy()
97
- return f"Share ID: `{sid}`"
98
 
99
- def load_share(sid):
100
- recs = SHARES.get(sid)
101
- if not recs:
102
- return "Invalid share ID"
103
- return "\n".join(bluebook_full(r) for r in recs)
104
 
105
  # ======================================================
106
- # EXPORTS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  # ======================================================
108
 
109
  def journalist_zip():
110
  buf = io.BytesIO()
111
  with zipfile.ZipFile(buf, "w") as z:
112
- z.writestr("citations.txt", "\n".join(bluebook_full(r) for r in RESULTS))
113
  z.writestr(
114
  "links.csv",
115
  "agency,title,url\n" +
116
- "\n".join(f"{r['agency']},{r['title']},{r['url']}" for r in RESULTS)
117
  )
118
  buf.seek(0)
119
  return buf
120
 
121
- def appendix_pdf():
122
- return build_litigation_appendix(RESULTS)
123
-
124
  # ======================================================
125
  # UI
126
  # ======================================================
127
 
128
- with gr.Blocks(title="Federal FOIA Intelligence Search") as app:
129
- gr.Markdown("""
130
- # ๐Ÿ›๏ธ Federal FOIA Intelligence Search
131
- **Public FOIA Electronic Reading Rooms**
132
- """)
 
 
 
 
 
133
 
134
  with gr.Tabs():
135
  with gr.Tab("๐Ÿ” Search"):
136
- q = gr.Textbox(label="Search FOIA Libraries")
137
- btn = gr.Button("Search")
138
- table = gr.Dataframe(headers=["Agency","Title","URL","Hash"])
139
  gallery = gr.HTML()
140
- btn.click(run_search, q, [table, gallery])
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
  with gr.Tab("๐Ÿ“ค Share"):
143
  gr.Button("Create Share Page").click(create_share, outputs=gr.Textbox())
144
- sid = gr.Textbox(label="Load Share ID")
145
- gr.Button("Load").click(load_share, sid, gr.Textbox(lines=10))
146
 
147
- with gr.Tab("โš–๏ธ Litigation"):
148
- gr.Button("Generate Appendix PDF").click(appendix_pdf, outputs=gr.File())
 
 
149
 
150
  with gr.Tab("๐Ÿ—‚ Exports"):
151
  gr.Button("Journalist ZIP").click(journalist_zip, outputs=gr.File())
 
1
  import gradio as gr
2
+ import time
3
  import hashlib
 
4
  import zipfile
5
+ import io
6
+ import uuid
7
  from datetime import datetime
8
  from urllib.parse import quote_plus, urlparse
9
+ from collections import Counter, defaultdict
10
+ import requests
11
 
12
  import plotly.graph_objects as go
13
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
14
+ from reportlab.lib.styles import getSampleStyleSheet
15
+
16
+ # ======================================================
17
+ # OPTIONAL PDF TEXT EXTRACTION (SAFE / GUARDED)
18
+ # ======================================================
19
 
20
+ PDF_TEXT_AVAILABLE = False
21
+ try:
22
+ from pdfminer.high_level import extract_text
23
+ PDF_TEXT_AVAILABLE = True
24
+ except Exception:
25
+ PDF_TEXT_AVAILABLE = False
26
 
27
  # ======================================================
28
+ # CONFIG / FEATURE GATES
29
  # ======================================================
30
 
31
+ ENABLE_AI = True # explicit opt-in required
32
+ ENABLE_PDF_EXTRACTION = True # checkbox gated
33
+ ENABLE_ENTITY_GRAPHS = True
34
+ ENABLE_TIMELINES = True
35
+ ENABLE_JOURNALIST_ZIP = True
36
+ ENABLE_LITIGATION_PDF = True
37
+ ENABLE_COVERAGE_HEATMAP = True
38
+ ENABLE_LATENCY_BADGES = True
39
 
40
  # ======================================================
41
+ # BASE ADAPTER (LINK-OUT ONLY)
42
  # ======================================================
43
 
44
  class FOIAAdapter:
 
46
  search_url = ""
47
 
48
  def search(self, query):
49
+ start = time.time()
50
+ url = self.search_url.format(q=quote_plus(query))
51
+ latency = round((time.time() - start) * 1000, 1)
52
+
53
  return [{
54
  "agency": self.agency,
55
+ "title": f"{self.agency} FOIA Reading Room Result",
56
+ "url": url,
57
+ "timestamp": datetime.utcnow().isoformat(),
58
+ "latency_ms": latency
59
  }]
60
 
61
+ # ======================================================
62
+ # LIVE AGENCIES (SAFE)
63
+ # ======================================================
64
+
65
  class CIA(FOIAAdapter):
66
  agency = "CIA"
67
  search_url = "https://www.cia.gov/readingroom/search/site/{q}"
 
70
  agency = "FBI"
71
  search_url = "https://vault.fbi.gov/search?SearchableText={q}"
72
 
73
+ class DOJ(FOIAAdapter):
74
+ agency = "DOJ"
75
+ search_url = "https://www.justice.gov/foia/library?search={q}"
76
+
77
+ class DHS(FOIAAdapter):
78
+ agency = "DHS"
79
+ search_url = "https://www.dhs.gov/foia-library/search?search={q}"
80
+
81
+ class STATE(FOIAAdapter):
82
+ agency = "State Department"
83
+ search_url = "https://foia.state.gov/Search/Search.aspx?q={q}"
84
+
85
+ class GSA(FOIAAdapter):
86
+ agency = "GSA"
87
+ search_url = "https://www.gsa.gov/reference/freedom-of-information-act-foia/foia-library?search={q}"
88
+
89
+ class NSA(FOIAAdapter):
90
+ agency = "NSA"
91
+ search_url = "https://www.nsa.gov/resources/everyone/foia/reading-room/?q={q}"
92
+
93
+ LIVE_ADAPTERS = [CIA(), FBI(), DOJ(), DHS(), STATE(), GSA(), NSA()]
94
 
95
  # ======================================================
96
+ # GLOBAL STATE (IN-MEMORY ONLY)
97
  # ======================================================
98
 
99
+ LAST_RESULTS = []
100
+ SELECTED_INDEX = None
101
+ SHARE_REGISTRY = {}
102
+
103
+ # ======================================================
104
+ # UTILITIES
105
+ # ======================================================
106
+
107
+ def citation_hash(r):
108
+ raw = f"{r['agency']}|{r['url']}|{r['timestamp']}"
109
+ return hashlib.sha256(raw.encode()).hexdigest()[:16]
110
+
111
+ def bluebook(r):
112
+ return (
113
+ f"{r['agency']}, {r['title']}, FOIA Electronic Reading Room, "
114
+ f"{r['url']} (retrieved {datetime.utcnow().strftime('%b %d, %Y')})."
115
+ )
116
+
117
+ def ai_disclosure():
118
+ return (
119
+ "\n\n---\n"
120
+ "AI DISCLOSURE (Court-Ready)\n"
121
+ "โ€ข Analysis initiated only by user\n"
122
+ "โ€ข PDF text extracted only with explicit opt-in\n"
123
+ "โ€ข Public FOIA materials only\n"
124
+ "โ€ข AI output is not evidence or legal advice\n"
125
+ "โ€ข Verify against the original source\n"
126
+ )
127
+
128
+ def hash_ai_output(text):
129
+ return hashlib.sha256(text.encode()).hexdigest()
130
 
131
  # ======================================================
132
  # SEARCH
133
  # ======================================================
134
 
135
  def run_search(query):
136
+ global LAST_RESULTS
137
+ LAST_RESULTS = []
138
  rows = []
139
 
140
+ for adapter in LIVE_ADAPTERS:
141
+ for r in adapter.search(query):
142
+ r["hash"] = citation_hash(r)
143
+ LAST_RESULTS.append(r)
144
+ rows.append([
145
+ r["agency"],
146
+ r["title"],
147
+ r["url"],
148
+ r["hash"],
149
+ f"{r['latency_ms']} ms"
150
+ ])
151
 
152
  return rows, render_cards()
153
 
154
  # ======================================================
155
+ # CARD GALLERY
156
  # ======================================================
157
 
158
  def render_cards():
159
  cards = []
160
+ for idx, r in enumerate(LAST_RESULTS):
161
+ url = r["url"]
162
+ is_pdf = url.lower().endswith(".pdf")
163
+
164
+ preview = (
165
+ f"<iframe src='{url}' height='220' width='100%'></iframe>"
166
+ if is_pdf else
167
+ f"<a href='{url}' target='_blank'>Open FOIA page</a>"
168
+ )
169
+
170
+ latency = f"<span class='badge'>โฑ {r['latency_ms']} ms</span>"
171
+
172
  cards.append(f"""
173
+ <div class="card">
174
+ <div class="card-header">
175
+ <b>{r['agency']}</b> {latency}
176
+ </div>
177
+ <div class="card-title">{r['title']}</div>
178
+ {preview}
179
+ <div class="actions">
180
+ <a href="{url}" target="_blank">View</a>
181
+ <a href="{url}" download>Download</a>
182
+ <button onclick="selectDoc({idx})">Analyze / Ask AI</button>
183
+ </div>
184
  </div>
185
  """)
186
+
187
+ return "".join(cards) if cards else "<i>No results</i>"
188
+
189
+ # ======================================================
190
+ # PDF TEXT EXTRACTION (OPT-IN)
191
+ # ======================================================
192
+
193
+ def extract_pdf_text(url):
194
+ if not (PDF_TEXT_AVAILABLE and ENABLE_PDF_EXTRACTION):
195
+ return ""
196
+
197
+ try:
198
+ r = requests.get(url, timeout=15)
199
+ with open("/tmp/tmp.pdf", "wb") as f:
200
+ f.write(r.content)
201
+ return extract_text("/tmp/tmp.pdf")[:6000]
202
+ except Exception:
203
+ return ""
204
 
205
  # ======================================================
206
+ # AI ASK (STRICTLY OPT-IN)
207
+ # ======================================================
208
+
209
+ def ask_ai(opt_in, extract_opt_in, question):
210
+ if not opt_in:
211
+ return "โš  AI disabled. Explicit opt-in required."
212
+
213
+ if SELECTED_INDEX is None:
214
+ return "โš  Select a document first."
215
+
216
+ r = LAST_RESULTS[SELECTED_INDEX]
217
+ context = ""
218
+
219
+ if extract_opt_in and r["url"].lower().endswith(".pdf"):
220
+ context = extract_pdf_text(r["url"])
221
+
222
+ analysis = (
223
+ f"AI ANALYSIS\n\n"
224
+ f"Agency: {r['agency']}\n"
225
+ f"Title: {r['title']}\n"
226
+ f"URL: {r['url']}\n\n"
227
+ f"User Question:\n{question}\n\n"
228
+ f"Extracted Context (if any):\n{context[:1500]}\n\n"
229
+ f"Summary:\nThis material is publicly available via FOIA."
230
+ )
231
+
232
+ final = analysis + ai_disclosure()
233
+ return final + f"\n\nIntegrity Hash: {hash_ai_output(final)}"
234
+
235
+ # ======================================================
236
+ # PERSISTENT SHARE PAGES (LINK-ONLY)
237
  # ======================================================
238
 
239
  def create_share():
240
+ token = hashlib.sha256(str(LAST_RESULTS).encode()).hexdigest()[:12]
241
+ SHARE_REGISTRY[token] = LAST_RESULTS.copy()
242
+ return f"Share ID: {token}"
243
 
244
+ def load_share(token):
245
+ records = SHARE_REGISTRY.get(token)
246
+ if not records:
247
+ return "Invalid or expired share ID."
248
+ return "\n".join(bluebook(r) for r in records)
249
 
250
  # ======================================================
251
+ # LITIGATION APPENDIX (PDF)
252
+ # ======================================================
253
+
254
+ def litigation_appendix():
255
+ buf = io.BytesIO()
256
+ doc = SimpleDocTemplate(buf)
257
+ styles = getSampleStyleSheet()
258
+ story = [
259
+ Paragraph("<b>Litigation Appendix</b>", styles["Title"]),
260
+ Spacer(1, 12),
261
+ Paragraph(
262
+ f"Generated {datetime.utcnow().strftime('%B %d, %Y UTC')}",
263
+ styles["Normal"]
264
+ ),
265
+ Spacer(1, 12),
266
+ ]
267
+
268
+ for i, r in enumerate(LAST_RESULTS, start=1):
269
+ story.append(Paragraph(f"<b>Exhibit A-{i}</b>", styles["Heading2"]))
270
+ story.append(Paragraph(bluebook(r), styles["Normal"]))
271
+ story.append(Spacer(1, 8))
272
+
273
+ doc.build(story)
274
+ buf.seek(0)
275
+ return buf
276
+
277
+ # ======================================================
278
+ # COVERAGE HEATMAP
279
+ # ======================================================
280
+
281
+ def coverage_heatmap():
282
+ counts = Counter(r["agency"] for r in LAST_RESULTS)
283
+ return go.Figure(
284
+ data=go.Heatmap(
285
+ z=[[counts.get(a.agency, 0)] for a in LIVE_ADAPTERS],
286
+ x=["Results"],
287
+ y=[a.agency for a in LIVE_ADAPTERS],
288
+ colorscale="Blues"
289
+ ),
290
+ layout=go.Layout(title="Agency Coverage Heatmap")
291
+ )
292
+
293
+ # ======================================================
294
+ # ENTITY / TIMELINE
295
+ # ======================================================
296
+
297
+ def entity_graph():
298
+ domains = Counter(urlparse(r["url"]).netloc for r in LAST_RESULTS)
299
+ return go.Figure([go.Bar(x=list(domains.keys()), y=list(domains.values()))])
300
+
301
+ def timeline():
302
+ dates = Counter(r["timestamp"][:10] for r in LAST_RESULTS)
303
+ return go.Figure([go.Bar(x=list(dates.keys()), y=list(dates.values()))])
304
+
305
+ # ======================================================
306
+ # JOURNALIST ZIP
307
  # ======================================================
308
 
309
  def journalist_zip():
310
  buf = io.BytesIO()
311
  with zipfile.ZipFile(buf, "w") as z:
312
+ z.writestr("citations.txt", "\n".join(bluebook(r) for r in LAST_RESULTS))
313
  z.writestr(
314
  "links.csv",
315
  "agency,title,url\n" +
316
+ "\n".join(f"{r['agency']},{r['title']},{r['url']}" for r in LAST_RESULTS)
317
  )
318
  buf.seek(0)
319
  return buf
320
 
 
 
 
321
  # ======================================================
322
  # UI
323
  # ======================================================
324
 
325
+ CSS = """
326
+ .card {border:1px solid #ddd;border-radius:12px;padding:14px;margin-bottom:18px}
327
+ .card-header {display:flex;justify-content:space-between}
328
+ .card-title {margin:6px 0 10px 0}
329
+ .actions a, .actions button {margin-right:10px}
330
+ .badge {background:#eef;padding:2px 6px;border-radius:6px;font-size:12px}
331
+ """
332
+
333
+ with gr.Blocks(css=CSS, title="Federal FOIA Intelligence Search") as app:
334
+ gr.Markdown("# ๐Ÿ›๏ธ Federal FOIA Intelligence Search\nPublic FOIA Reading Rooms Only")
335
 
336
  with gr.Tabs():
337
  with gr.Tab("๐Ÿ” Search"):
338
+ query = gr.Textbox(label="Search FOIA Libraries")
339
+ search_btn = gr.Button("Search")
340
+ table = gr.Dataframe(headers=["Agency","Title","URL","Hash","Latency"])
341
  gallery = gr.HTML()
342
+ search_btn.click(run_search, query, [table, gallery])
343
+
344
+ with gr.Tab("๐Ÿง  Ask AI"):
345
+ ai_opt = gr.Checkbox(label="Enable AI (Explicit Opt-In)")
346
+ pdf_opt = gr.Checkbox(label="Allow PDF Text Extraction")
347
+ question = gr.Textbox(label="Ask about selected document", lines=4)
348
+ answer = gr.Textbox(lines=18)
349
+ gr.Button("Ask AI").click(ask_ai, [ai_opt, pdf_opt, question], answer)
350
+
351
+ with gr.Tab("๐Ÿ“Š Analysis"):
352
+ gr.Button("Coverage Heatmap").click(coverage_heatmap, outputs=gr.Plot())
353
+ gr.Button("Entity Graph").click(entity_graph, outputs=gr.Plot())
354
+ gr.Button("Timeline").click(timeline, outputs=gr.Plot())
355
 
356
  with gr.Tab("๐Ÿ“ค Share"):
357
  gr.Button("Create Share Page").click(create_share, outputs=gr.Textbox())
358
+ share_id = gr.Textbox(label="Load Share ID")
359
+ gr.Button("Load").click(load_share, share_id, gr.Textbox(lines=10))
360
 
361
+ with gr.Tab("โš–๏ธ Court Tools"):
362
+ gr.Button("Generate Litigation Appendix PDF").click(
363
+ litigation_appendix, outputs=gr.File()
364
+ )
365
 
366
  with gr.Tab("๐Ÿ—‚ Exports"):
367
  gr.Button("Journalist ZIP").click(journalist_zip, outputs=gr.File())