GodsDevProject commited on
Commit
88900ac
ยท
verified ยท
1 Parent(s): 6a02a5b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -148
app.py CHANGED
@@ -6,15 +6,18 @@ import io
6
  import uuid
7
  from datetime import datetime
8
  from urllib.parse import quote_plus, urlparse
9
- from collections import Counter, defaultdict
10
  import requests
11
 
12
  import plotly.graph_objects as go
13
- from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
14
  from reportlab.lib.styles import getSampleStyleSheet
15
 
 
 
 
16
  # ======================================================
17
- # OPTIONAL PDF TEXT EXTRACTION (SAFE / GUARDED)
18
  # ======================================================
19
 
20
  PDF_TEXT_AVAILABLE = False
@@ -25,17 +28,13 @@ except Exception:
25
  PDF_TEXT_AVAILABLE = False
26
 
27
  # ======================================================
28
- # CONFIG / FEATURE GATES
29
  # ======================================================
30
 
31
- ENABLE_AI = True # explicit opt-in required
32
- ENABLE_PDF_EXTRACTION = True # checkbox gated
33
- ENABLE_ENTITY_GRAPHS = True
34
- ENABLE_TIMELINES = True
35
- ENABLE_JOURNALIST_ZIP = True
36
  ENABLE_LITIGATION_PDF = True
37
  ENABLE_COVERAGE_HEATMAP = True
38
- ENABLE_LATENCY_BADGES = True
39
 
40
  # ======================================================
41
  # BASE ADAPTER (LINK-OUT ONLY)
@@ -49,7 +48,6 @@ class FOIAAdapter:
49
  start = time.time()
50
  url = self.search_url.format(q=quote_plus(query))
51
  latency = round((time.time() - start) * 1000, 1)
52
-
53
  return [{
54
  "agency": self.agency,
55
  "title": f"{self.agency} FOIA Reading Room Result",
@@ -59,7 +57,7 @@ class FOIAAdapter:
59
  }]
60
 
61
  # ======================================================
62
- # LIVE AGENCIES (SAFE)
63
  # ======================================================
64
 
65
  class CIA(FOIAAdapter):
@@ -93,36 +91,29 @@ class NSA(FOIAAdapter):
93
  LIVE_ADAPTERS = [CIA(), FBI(), DOJ(), DHS(), STATE(), GSA(), NSA()]
94
 
95
  # ======================================================
96
- # GLOBAL STATE (IN-MEMORY ONLY)
97
  # ======================================================
98
 
99
  LAST_RESULTS = []
100
  SELECTED_INDEX = None
101
- SHARE_REGISTRY = {}
102
 
103
  # ======================================================
104
  # UTILITIES
105
  # ======================================================
106
 
107
  def citation_hash(r):
108
- raw = f"{r['agency']}|{r['url']}|{r['timestamp']}"
109
- return hashlib.sha256(raw.encode()).hexdigest()[:16]
110
-
111
- def bluebook(r):
112
- return (
113
- f"{r['agency']}, {r['title']}, FOIA Electronic Reading Room, "
114
- f"{r['url']} (retrieved {datetime.utcnow().strftime('%b %d, %Y')})."
115
- )
116
 
117
  def ai_disclosure():
118
  return (
119
  "\n\n---\n"
120
- "AI DISCLOSURE (Court-Ready)\n"
121
- "โ€ข Analysis initiated only by user\n"
122
- "โ€ข PDF text extracted only with explicit opt-in\n"
123
  "โ€ข Public FOIA materials only\n"
124
  "โ€ข AI output is not evidence or legal advice\n"
125
- "โ€ข Verify against the original source\n"
126
  )
127
 
128
  def hash_ai_output(text):
@@ -152,63 +143,58 @@ def run_search(query):
152
  return rows, render_cards()
153
 
154
  # ======================================================
155
- # CARD GALLERY
156
  # ======================================================
157
 
158
  def render_cards():
159
  cards = []
160
  for idx, r in enumerate(LAST_RESULTS):
161
- url = r["url"]
162
- is_pdf = url.lower().endswith(".pdf")
163
-
164
  preview = (
165
- f"<iframe src='{url}' height='220' width='100%'></iframe>"
166
- if is_pdf else
167
- f"<a href='{url}' target='_blank'>Open FOIA page</a>"
168
  )
169
 
170
- latency = f"<span class='badge'>โฑ {r['latency_ms']} ms</span>"
171
-
172
  cards.append(f"""
173
  <div class="card">
174
- <div class="card-header">
175
- <b>{r['agency']}</b> {latency}
176
- </div>
177
- <div class="card-title">{r['title']}</div>
178
- {preview}
179
- <div class="actions">
180
- <a href="{url}" target="_blank">View</a>
181
- <a href="{url}" download>Download</a>
182
- <button onclick="selectDoc({idx})">Analyze / Ask AI</button>
183
- </div>
 
184
  </div>
185
  """)
186
 
187
  return "".join(cards) if cards else "<i>No results</i>"
188
 
189
  # ======================================================
190
- # PDF TEXT EXTRACTION (OPT-IN)
191
  # ======================================================
192
 
193
  def extract_pdf_text(url):
194
  if not (PDF_TEXT_AVAILABLE and ENABLE_PDF_EXTRACTION):
195
  return ""
196
-
197
  try:
198
  r = requests.get(url, timeout=15)
199
- with open("/tmp/tmp.pdf", "wb") as f:
200
  f.write(r.content)
201
- return extract_text("/tmp/tmp.pdf")[:6000]
202
  except Exception:
203
  return ""
204
 
205
  # ======================================================
206
- # AI ASK (STRICTLY OPT-IN)
207
  # ======================================================
208
 
209
- def ask_ai(opt_in, extract_opt_in, question):
210
  if not opt_in:
211
- return "โš  AI disabled. Explicit opt-in required."
212
 
213
  if SELECTED_INDEX is None:
214
  return "โš  Select a document first."
@@ -216,58 +202,42 @@ def ask_ai(opt_in, extract_opt_in, question):
216
  r = LAST_RESULTS[SELECTED_INDEX]
217
  context = ""
218
 
219
- if extract_opt_in and r["url"].lower().endswith(".pdf"):
220
  context = extract_pdf_text(r["url"])
221
 
222
  analysis = (
223
- f"AI ANALYSIS\n\n"
224
- f"Agency: {r['agency']}\n"
225
- f"Title: {r['title']}\n"
226
- f"URL: {r['url']}\n\n"
227
  f"User Question:\n{question}\n\n"
228
- f"Extracted Context (if any):\n{context[:1500]}\n\n"
229
- f"Summary:\nThis material is publicly available via FOIA."
 
230
  )
231
 
232
  final = analysis + ai_disclosure()
233
  return final + f"\n\nIntegrity Hash: {hash_ai_output(final)}"
234
 
235
  # ======================================================
236
- # PERSISTENT SHARE PAGES (LINK-ONLY)
237
- # ======================================================
238
-
239
- def create_share():
240
- token = hashlib.sha256(str(LAST_RESULTS).encode()).hexdigest()[:12]
241
- SHARE_REGISTRY[token] = LAST_RESULTS.copy()
242
- return f"Share ID: {token}"
243
-
244
- def load_share(token):
245
- records = SHARE_REGISTRY.get(token)
246
- if not records:
247
- return "Invalid or expired share ID."
248
- return "\n".join(bluebook(r) for r in records)
249
-
250
- # ======================================================
251
- # LITIGATION APPENDIX (PDF)
252
  # ======================================================
253
 
254
  def litigation_appendix():
255
  buf = io.BytesIO()
256
  doc = SimpleDocTemplate(buf)
257
  styles = getSampleStyleSheet()
258
- story = [
259
- Paragraph("<b>Litigation Appendix</b>", styles["Title"]),
260
- Spacer(1, 12),
261
- Paragraph(
262
- f"Generated {datetime.utcnow().strftime('%B %d, %Y UTC')}",
263
- styles["Normal"]
264
- ),
265
- Spacer(1, 12),
266
- ]
 
267
 
268
  for i, r in enumerate(LAST_RESULTS, start=1):
269
- story.append(Paragraph(f"<b>Exhibit A-{i}</b>", styles["Heading2"]))
270
- story.append(Paragraph(bluebook(r), styles["Normal"]))
271
  story.append(Spacer(1, 8))
272
 
273
  doc.build(story)
@@ -291,79 +261,61 @@ def coverage_heatmap():
291
  )
292
 
293
  # ======================================================
294
- # ENTITY / TIMELINE
295
  # ======================================================
296
 
297
- def entity_graph():
298
- domains = Counter(urlparse(r["url"]).netloc for r in LAST_RESULTS)
299
- return go.Figure([go.Bar(x=list(domains.keys()), y=list(domains.values()))])
300
-
301
- def timeline():
302
- dates = Counter(r["timestamp"][:10] for r in LAST_RESULTS)
303
- return go.Figure([go.Bar(x=list(dates.keys()), y=list(dates.values()))])
304
-
305
- # ======================================================
306
- # JOURNALIST ZIP
307
- # ======================================================
308
-
309
- def journalist_zip():
310
- buf = io.BytesIO()
311
- with zipfile.ZipFile(buf, "w") as z:
312
- z.writestr("citations.txt", "\n".join(bluebook(r) for r in LAST_RESULTS))
313
- z.writestr(
314
- "links.csv",
315
- "agency,title,url\n" +
316
- "\n".join(f"{r['agency']},{r['title']},{r['url']}" for r in LAST_RESULTS)
317
- )
318
- buf.seek(0)
319
- return buf
320
 
321
  # ======================================================
322
  # UI
323
  # ======================================================
324
 
325
  CSS = """
326
- .card {border:1px solid #ddd;border-radius:12px;padding:14px;margin-bottom:18px}
 
327
  .card-header {display:flex;justify-content:space-between}
328
- .card-title {margin:6px 0 10px 0}
329
- .actions a, .actions button {margin-right:10px}
330
- .badge {background:#eef;padding:2px 6px;border-radius:6px;font-size:12px}
331
  """
332
 
333
  with gr.Blocks(css=CSS, title="Federal FOIA Intelligence Search") as app:
334
- gr.Markdown("# ๐Ÿ›๏ธ Federal FOIA Intelligence Search\nPublic FOIA Reading Rooms Only")
335
-
336
- with gr.Tabs():
337
- with gr.Tab("๐Ÿ” Search"):
338
- query = gr.Textbox(label="Search FOIA Libraries")
339
- search_btn = gr.Button("Search")
340
- table = gr.Dataframe(headers=["Agency","Title","URL","Hash","Latency"])
341
- gallery = gr.HTML()
342
- search_btn.click(run_search, query, [table, gallery])
343
-
344
- with gr.Tab("๐Ÿง  Ask AI"):
345
- ai_opt = gr.Checkbox(label="Enable AI (Explicit Opt-In)")
346
- pdf_opt = gr.Checkbox(label="Allow PDF Text Extraction")
347
- question = gr.Textbox(label="Ask about selected document", lines=4)
348
- answer = gr.Textbox(lines=18)
349
- gr.Button("Ask AI").click(ask_ai, [ai_opt, pdf_opt, question], answer)
350
-
351
- with gr.Tab("๐Ÿ“Š Analysis"):
352
- gr.Button("Coverage Heatmap").click(coverage_heatmap, outputs=gr.Plot())
353
- gr.Button("Entity Graph").click(entity_graph, outputs=gr.Plot())
354
- gr.Button("Timeline").click(timeline, outputs=gr.Plot())
355
-
356
- with gr.Tab("๐Ÿ“ค Share"):
357
- gr.Button("Create Share Page").click(create_share, outputs=gr.Textbox())
358
- share_id = gr.Textbox(label="Load Share ID")
359
- gr.Button("Load").click(load_share, share_id, gr.Textbox(lines=10))
360
-
361
- with gr.Tab("โš–๏ธ Court Tools"):
362
- gr.Button("Generate Litigation Appendix PDF").click(
363
- litigation_appendix, outputs=gr.File()
364
- )
365
-
366
- with gr.Tab("๐Ÿ—‚ Exports"):
367
- gr.Button("Journalist ZIP").click(journalist_zip, outputs=gr.File())
 
 
368
 
369
  app.launch()
 
6
  import uuid
7
  from datetime import datetime
8
  from urllib.parse import quote_plus, urlparse
9
+ from collections import Counter
10
  import requests
11
 
12
  import plotly.graph_objects as go
13
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
14
  from reportlab.lib.styles import getSampleStyleSheet
15
 
16
+ from citations import bluebook_exhibit, table_of_authorities
17
+ from foia_requests import generate_foia_request_text
18
+
19
  # ======================================================
20
+ # OPTIONAL PDF TEXT EXTRACTION (STRICTLY OPT-IN)
21
  # ======================================================
22
 
23
  PDF_TEXT_AVAILABLE = False
 
28
  PDF_TEXT_AVAILABLE = False
29
 
30
  # ======================================================
31
+ # FEATURE GATES (HF SAFE)
32
  # ======================================================
33
 
34
+ ENABLE_AI = True
35
+ ENABLE_PDF_EXTRACTION = True
 
 
 
36
  ENABLE_LITIGATION_PDF = True
37
  ENABLE_COVERAGE_HEATMAP = True
 
38
 
39
  # ======================================================
40
  # BASE ADAPTER (LINK-OUT ONLY)
 
48
  start = time.time()
49
  url = self.search_url.format(q=quote_plus(query))
50
  latency = round((time.time() - start) * 1000, 1)
 
51
  return [{
52
  "agency": self.agency,
53
  "title": f"{self.agency} FOIA Reading Room Result",
 
57
  }]
58
 
59
  # ======================================================
60
+ # LIVE AGENCIES (PUBLIC READING ROOMS)
61
  # ======================================================
62
 
63
  class CIA(FOIAAdapter):
 
91
  LIVE_ADAPTERS = [CIA(), FBI(), DOJ(), DHS(), STATE(), GSA(), NSA()]
92
 
93
  # ======================================================
94
+ # GLOBAL STATE (SESSION MEMORY ONLY)
95
  # ======================================================
96
 
97
  LAST_RESULTS = []
98
  SELECTED_INDEX = None
 
99
 
100
  # ======================================================
101
  # UTILITIES
102
  # ======================================================
103
 
104
  def citation_hash(r):
105
+ return hashlib.sha256(
106
+ f"{r['agency']}|{r['url']}|{r['timestamp']}".encode()
107
+ ).hexdigest()[:16]
 
 
 
 
 
108
 
109
  def ai_disclosure():
110
  return (
111
  "\n\n---\n"
112
+ "AI DISCLOSURE\n"
113
+ "โ€ข User-initiated analysis only\n"
 
114
  "โ€ข Public FOIA materials only\n"
115
  "โ€ข AI output is not evidence or legal advice\n"
116
+ "โ€ข Verify against original sources\n"
117
  )
118
 
119
  def hash_ai_output(text):
 
143
  return rows, render_cards()
144
 
145
  # ======================================================
146
+ # RESULTS CARDS (POLISHED)
147
  # ======================================================
148
 
149
  def render_cards():
150
  cards = []
151
  for idx, r in enumerate(LAST_RESULTS):
 
 
 
152
  preview = (
153
+ f"<iframe src='{r['url']}' height='220' width='100%'></iframe>"
154
+ if r["url"].lower().endswith(".pdf")
155
+ else f"<a href='{r['url']}' target='_blank'>Open FOIA Page</a>"
156
  )
157
 
 
 
158
  cards.append(f"""
159
  <div class="card">
160
+ <div class="card-header">
161
+ <b>{r['agency']}</b>
162
+ <span class="badge">โฑ {r['latency_ms']} ms</span>
163
+ </div>
164
+ <div class="card-title">{r['title']}</div>
165
+ {preview}
166
+ <div class="actions">
167
+ <button onclick="selectDoc({idx})">Ask AI</button>
168
+ <a href="{r['url']}" target="_blank">View</a>
169
+ <a href="{r['url']}" download>Download</a>
170
+ </div>
171
  </div>
172
  """)
173
 
174
  return "".join(cards) if cards else "<i>No results</i>"
175
 
176
  # ======================================================
177
+ # PDF EXTRACTION (OPT-IN)
178
  # ======================================================
179
 
180
  def extract_pdf_text(url):
181
  if not (PDF_TEXT_AVAILABLE and ENABLE_PDF_EXTRACTION):
182
  return ""
 
183
  try:
184
  r = requests.get(url, timeout=15)
185
+ with open("/tmp/doc.pdf", "wb") as f:
186
  f.write(r.content)
187
+ return extract_text("/tmp/doc.pdf")[:6000]
188
  except Exception:
189
  return ""
190
 
191
  # ======================================================
192
+ # AI ASK + CITATION CROSS-CHECK
193
  # ======================================================
194
 
195
+ def ask_ai(opt_in, pdf_opt_in, question):
196
  if not opt_in:
197
+ return "โš  AI requires explicit opt-in."
198
 
199
  if SELECTED_INDEX is None:
200
  return "โš  Select a document first."
 
202
  r = LAST_RESULTS[SELECTED_INDEX]
203
  context = ""
204
 
205
+ if pdf_opt_in and r["url"].lower().endswith(".pdf"):
206
  context = extract_pdf_text(r["url"])
207
 
208
  analysis = (
209
+ f"{bluebook_exhibit(r, SELECTED_INDEX + 1)}\n\n"
 
 
 
210
  f"User Question:\n{question}\n\n"
211
+ f"Extracted Context:\n{context[:1500]}\n\n"
212
+ f"AI Summary:\nThis is a public FOIA document. "
213
+ f"Assertions should be verified against the cited exhibit."
214
  )
215
 
216
  final = analysis + ai_disclosure()
217
  return final + f"\n\nIntegrity Hash: {hash_ai_output(final)}"
218
 
219
  # ======================================================
220
+ # LITIGATION APPENDIX (WITH TOA)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  # ======================================================
222
 
223
  def litigation_appendix():
224
  buf = io.BytesIO()
225
  doc = SimpleDocTemplate(buf)
226
  styles = getSampleStyleSheet()
227
+ story = []
228
+
229
+ story.append(Paragraph("Litigation Appendix", styles["Title"]))
230
+ story.append(Spacer(1, 12))
231
+
232
+ story.append(Paragraph("Table of Authorities", styles["Heading1"]))
233
+ for line in table_of_authorities(LAST_RESULTS):
234
+ story.append(Paragraph(line, styles["Normal"]))
235
+
236
+ story.append(PageBreak())
237
 
238
  for i, r in enumerate(LAST_RESULTS, start=1):
239
+ story.append(Paragraph(f"Exhibit A-{i}", styles["Heading2"]))
240
+ story.append(Paragraph(bluebook_exhibit(r, i), styles["Normal"]))
241
  story.append(Spacer(1, 8))
242
 
243
  doc.build(story)
 
261
  )
262
 
263
  # ======================================================
264
+ # FOIA REQUEST GENERATOR
265
  # ======================================================
266
 
267
+ def foia_request(agency, subject, requester):
268
+ return generate_foia_request_text(agency, subject, requester)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
 
270
  # ======================================================
271
  # UI
272
  # ======================================================
273
 
274
  CSS = """
275
+ .search textarea {font-size:18px;padding:14px}
276
+ .card {border:1px solid #ddd;border-radius:14px;padding:16px;margin-bottom:18px}
277
  .card-header {display:flex;justify-content:space-between}
278
+ .card-title {margin:8px 0 12px}
279
+ .actions button, .actions a {margin-right:10px}
280
+ .badge {background:#eef;padding:4px 8px;border-radius:8px;font-size:12px}
281
  """
282
 
283
  with gr.Blocks(css=CSS, title="Federal FOIA Intelligence Search") as app:
284
+ gr.Markdown("## ๐Ÿ›๏ธ Federal FOIA Intelligence Search\nPublic Reading Rooms Only")
285
+
286
+ with gr.Tab("๐Ÿ” Search"):
287
+ query = gr.Textbox(
288
+ label="Search FOIA Reading Rooms",
289
+ elem_classes=["search"],
290
+ placeholder="e.g. procurement, AATIP, surveillance"
291
+ )
292
+ search_btn = gr.Button("Search", variant="primary")
293
+ table = gr.Dataframe(headers=["Agency","Title","URL","Hash","Latency"])
294
+ gallery = gr.HTML()
295
+ search_btn.click(run_search, query, [table, gallery])
296
+
297
+ with gr.Tab("๐Ÿง  Ask AI"):
298
+ ai_opt = gr.Checkbox(label="Enable AI (Explicit Opt-In)")
299
+ pdf_opt = gr.Checkbox(label="Allow PDF Text Extraction")
300
+ question = gr.Textbox(lines=4)
301
+ answer = gr.Textbox(lines=18)
302
+ gr.Button("Ask AI").click(ask_ai, [ai_opt, pdf_opt, question], answer)
303
+
304
+ with gr.Tab("๐Ÿ“Š Analysis"):
305
+ gr.Button("Coverage Heatmap").click(coverage_heatmap, outputs=gr.Plot())
306
+
307
+ with gr.Tab("โš–๏ธ Court Tools"):
308
+ gr.Button("Generate Litigation Appendix PDF").click(
309
+ litigation_appendix, outputs=gr.File()
310
+ )
311
+
312
+ with gr.Tab("๐Ÿ“ FOIA Request"):
313
+ agency = gr.Textbox(label="Agency")
314
+ subject = gr.Textbox(label="Records Requested")
315
+ requester = gr.Textbox(label="Requester Name")
316
+ output = gr.Textbox(lines=14)
317
+ gr.Button("Generate FOIA Request").click(
318
+ foia_request, [agency, subject, requester], output
319
+ )
320
 
321
  app.launch()