GodsDevProject commited on
Commit
f9878e1
·
verified ·
1 Parent(s): a0acd7f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +207 -123
app.py CHANGED
@@ -1,25 +1,42 @@
1
  import gradio as gr
2
- import hashlib
3
  import time
 
 
 
4
  from datetime import datetime
5
- from urllib.parse import quote_plus
6
- from sentence_transformers import SentenceTransformer
7
- import faiss
8
- from reportlab.lib.pagesizes import LETTER
9
- from reportlab.pdfgen import canvas
10
- import tempfile
11
- import os
12
- import webbrowser
13
-
14
- # =====================================================
15
- # CONFIG
16
- # =====================================================
17
-
18
- SEMANTIC_MODEL = "all-MiniLM-L6-v2"
19
-
20
- # =====================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  # BASE ADAPTER
22
- # =====================================================
23
 
24
  class FOIAAdapter:
25
  agency = "UNKNOWN"
@@ -33,16 +50,16 @@ class FOIAAdapter:
33
 
34
  return [{
35
  "agency": self.agency,
36
- "title": f"{self.agency} FOIA Reading Room Result",
37
  "url": url,
38
  "latency": latency,
39
  "is_live": self.is_live,
40
  "timestamp": datetime.utcnow().isoformat()
41
  }]
42
 
43
- # =====================================================
44
- # LIVE AGENCIES (PUBLIC FOIA)
45
- # =====================================================
46
 
47
  class CIA(FOIAAdapter):
48
  agency = "CIA"
@@ -74,16 +91,15 @@ class NSA(FOIAAdapter):
74
 
75
  LIVE_ADAPTERS = [CIA(), FBI(), DOJ(), DHS(), STATE(), GSA(), NSA()]
76
 
77
- # =====================================================
78
- # STUB ADAPTERS (OPT-IN)
79
- # =====================================================
80
 
81
  class StubAdapter(FOIAAdapter):
82
  is_live = False
83
- search_url = ""
84
-
85
  def __init__(self, agency):
86
  self.agency = agency
 
87
 
88
  def search(self, query):
89
  return [{
@@ -105,30 +121,44 @@ STUB_ADAPTERS = [
105
  StubAdapter("Special Activities"),
106
  ]
107
 
108
- # =====================================================
109
  # UTILITIES
110
- # =====================================================
111
 
112
  def citation_hash(r):
113
  raw = f"{r['agency']}{r['url']}{r['timestamp']}"
114
  return hashlib.sha256(raw.encode()).hexdigest()[:16]
115
 
116
- def bluebook(r):
117
  return (
118
  f"{r['agency']}, {r['title']}, FOIA Electronic Reading Room, "
119
  f"{r['url']} (retrieved {datetime.utcnow().strftime('%b %d, %Y')})."
120
  )
121
 
122
- # =====================================================
123
- # SEARCH
124
- # =====================================================
 
 
 
 
 
 
 
 
 
 
125
 
126
- def run_search(query, include_stubs):
127
- rows = []
128
  adapters = LIVE_ADAPTERS + (STUB_ADAPTERS if include_stubs else [])
 
 
 
 
 
 
 
 
129
 
130
- for a in adapters:
131
- for r in a.search(query):
132
  rows.append([
133
  r["agency"],
134
  "LIVE" if r["is_live"] else "STUB",
@@ -136,107 +166,161 @@ def run_search(query, include_stubs):
136
  r["url"],
137
  r["latency"],
138
  citation_hash(r) if r["is_live"] else "",
139
- bluebook(r) if r["is_live"] else "Not exportable (STUB)",
140
- r
141
  ])
142
 
143
- return rows
144
-
145
- # =====================================================
146
- # PREVIEW / THUMBNAIL
147
- # =====================================================
148
-
149
- def preview(row):
150
- if not row:
151
- return "<i>Select a result</i>"
152
-
153
- url = row[3]
154
- if isinstance(url, str) and url.lower().endswith(".pdf"):
155
- return f"<iframe src='{url}' width='100%' height='520'></iframe>"
156
-
157
- return "<i>No PDF preview available</i>"
158
-
159
- # =====================================================
160
- # ANALYZE WITH AI (FREE, LOCAL)
161
- # =====================================================
162
-
163
- model = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
- def analyze(row):
166
- global model
167
- if not row or row[1] != "LIVE":
168
- return "Analysis unavailable."
169
 
170
- if model is None:
171
- model = SentenceTransformer(SEMANTIC_MODEL)
 
 
172
 
173
- return (
174
- f"AI Summary (Metadata-based):\n\n"
175
- f"Agency: {row[0]}\n"
176
- f"Title: {row[2]}\n"
177
- f"Public URL: {row[3]}\n\n"
178
- f"This document originates from a public FOIA reading room. "
179
- f"Use the View or Download buttons to inspect the source."
180
  )
181
 
182
- # =====================================================
183
- # COURT-READY PDF EXPORT
184
- # =====================================================
185
-
186
- def export_pdf(rows):
187
- live = [r for r in rows if r[1] == "LIVE"]
188
- if not live:
189
- return None
190
-
191
- fd, path = tempfile.mkstemp(suffix=".pdf")
192
- os.close(fd)
193
-
194
- c = canvas.Canvas(path, pagesize=LETTER)
195
- width, height = LETTER
196
 
197
- for i, r in enumerate(live):
198
- c.setFont("Helvetica-Bold", 12)
199
- c.drawString(40, height - 40, f"Exhibit A-{i+1}")
200
- c.setFont("Helvetica", 10)
201
- c.drawString(40, height - 70, r[2])
202
- c.drawString(40, height - 90, r[3])
203
- c.drawString(40, 40, f"Integrity Hash: {r[5]}")
204
- c.showPage()
205
 
206
- c.save()
207
- return path
 
 
208
 
209
- # =====================================================
210
- # UI
211
- # =====================================================
212
 
213
- with gr.Blocks(title="Federal FOIA Intelligence Search") as app:
214
- gr.Markdown("""
215
- # 🏛️ Federal FOIA Intelligence Search
216
- **Public Electronic Reading Rooms Only**
217
- """)
218
 
219
- q = gr.Textbox(label="Search FOIA Libraries")
220
- include_stubs = gr.Checkbox(label="Include Extended Coverage (STUB)", value=False)
221
- semantic = gr.Checkbox(label="Enable Semantic Refine (Opt-In)", value=False)
222
 
223
- btn = gr.Button("Search")
 
 
224
 
225
- table = gr.Dataframe(
226
- headers=["Agency", "Type", "Title", "URL", "Latency", "Hash", "Citation", "_raw"],
227
- interactive=True,
228
- visible=True
229
  )
230
 
231
- preview_panel = gr.HTML()
232
- analysis = gr.Textbox(label="AI Analysis", lines=6)
233
-
234
- export_btn = gr.Button("Export Court PDF (LIVE ONLY)")
235
- export_file = gr.File()
236
-
237
- btn.click(run_search, [q, include_stubs], table)
238
- table.select(lambda e: preview(e.value), None, preview_panel)
239
- table.select(lambda e: analyze(e.value), None, analysis)
240
- export_btn.click(export_pdf, table, export_file)
241
 
242
  app.launch()
 
1
  import gradio as gr
 
2
  import time
3
+ import hashlib
4
+ import zipfile
5
+ import io
6
  from datetime import datetime
7
+ from urllib.parse import quote_plus, urlparse
8
+ from collections import defaultdict, Counter
9
+
10
+ import plotly.graph_objects as go
11
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
12
+ from reportlab.lib.styles import getSampleStyleSheet
13
+
14
+ # ======================================================
15
+ # OPTIONAL SEMANTIC DEPENDENCIES (SAFE-GUARDED)
16
+ # ======================================================
17
+
18
+ FAISS_AVAILABLE = False
19
+ try:
20
+ import faiss
21
+ from sentence_transformers import SentenceTransformer
22
+ FAISS_AVAILABLE = True
23
+ except Exception:
24
+ FAISS_AVAILABLE = False
25
+
26
+ # ======================================================
27
+ # CONFIG / FEATURE GATES
28
+ # ======================================================
29
+
30
+ ENABLE_SEMANTIC = False # Opt-in only
31
+ ENABLE_PDF_EXPORT = True
32
+ ENABLE_HEATMAP = True
33
+ ENABLE_PDF_THUMBNAILS = True
34
+ ENABLE_ENTITY_GRAPHS = True
35
+ ENABLE_TIMELINES = True
36
+
37
+ # ======================================================
38
  # BASE ADAPTER
39
+ # ======================================================
40
 
41
  class FOIAAdapter:
42
  agency = "UNKNOWN"
 
50
 
51
  return [{
52
  "agency": self.agency,
53
+ "title": f"{self.agency} FOIA Search Results",
54
  "url": url,
55
  "latency": latency,
56
  "is_live": self.is_live,
57
  "timestamp": datetime.utcnow().isoformat()
58
  }]
59
 
60
+ # ======================================================
61
+ # LIVE AGENCIES (LINK-OUT ONLY)
62
+ # ======================================================
63
 
64
  class CIA(FOIAAdapter):
65
  agency = "CIA"
 
91
 
92
  LIVE_ADAPTERS = [CIA(), FBI(), DOJ(), DHS(), STATE(), GSA(), NSA()]
93
 
94
+ # ======================================================
95
+ # STUB ADAPTERS (NON-EXPORTABLE)
96
+ # ======================================================
97
 
98
  class StubAdapter(FOIAAdapter):
99
  is_live = False
 
 
100
  def __init__(self, agency):
101
  self.agency = agency
102
+ self.search_url = ""
103
 
104
  def search(self, query):
105
  return [{
 
121
  StubAdapter("Special Activities"),
122
  ]
123
 
124
+ # ======================================================
125
  # UTILITIES
126
+ # ======================================================
127
 
128
  def citation_hash(r):
129
  raw = f"{r['agency']}{r['url']}{r['timestamp']}"
130
  return hashlib.sha256(raw.encode()).hexdigest()[:16]
131
 
132
+ def bluebook_full(r):
133
  return (
134
  f"{r['agency']}, {r['title']}, FOIA Electronic Reading Room, "
135
  f"{r['url']} (retrieved {datetime.utcnow().strftime('%b %d, %Y')})."
136
  )
137
 
138
+ # ======================================================
139
+ # GLOBAL STATE
140
+ # ======================================================
141
+
142
+ LAST_LIVE_RECORDS = []
143
+
144
+ # ======================================================
145
+ # SEARCH HANDLER
146
+ # ======================================================
147
+
148
+ def run_search(query, include_stubs, semantic_mode):
149
+ global LAST_LIVE_RECORDS
150
+ LAST_LIVE_RECORDS = []
151
 
 
 
152
  adapters = LIVE_ADAPTERS + (STUB_ADAPTERS if include_stubs else [])
153
+ rows = []
154
+ coverage = defaultdict(int)
155
+
156
+ for adapter in adapters:
157
+ for r in adapter.search(query):
158
+ coverage[r["agency"]] += 1
159
+ if r["is_live"]:
160
+ LAST_LIVE_RECORDS.append(r)
161
 
 
 
162
  rows.append([
163
  r["agency"],
164
  "LIVE" if r["is_live"] else "STUB",
 
166
  r["url"],
167
  r["latency"],
168
  citation_hash(r) if r["is_live"] else "",
169
+ bluebook_full(r) if r["is_live"] else "Not exportable (STUB)"
 
170
  ])
171
 
172
+ gap_md = "### Coverage Gaps\n"
173
+ for agency in [a.agency for a in LIVE_ADAPTERS]:
174
+ if coverage.get(agency, 0) == 0:
175
+ gap_md += f"- ❌ **{agency}**: no public results found\n"
176
+
177
+ return rows, gap_md, build_pdf_thumbnail_gallery()
178
+
179
+ # ======================================================
180
+ # PDF THUMBNAIL GALLERY
181
+ # ======================================================
182
+
183
+ def build_pdf_thumbnail_gallery():
184
+ if not ENABLE_PDF_THUMBNAILS:
185
+ return "<i>PDF previews disabled</i>"
186
+
187
+ cards = []
188
+
189
+ for r in LAST_LIVE_RECORDS:
190
+ url = r["url"]
191
+ if not url or not url.lower().endswith(".pdf"):
192
+ continue
193
+
194
+ card = f"""
195
+ <div style="border:1px solid #ddd;border-radius:8px;padding:12px;margin-bottom:16px;">
196
+ <b>{r['agency']} — {r['title']}</b><br><br>
197
+ <iframe src="{url}" width="100%" height="220"></iframe>
198
+ <div style="margin-top:8px;">
199
+ <a href="{url}" target="_blank">View</a> |
200
+ <a href="{url}" download>Download</a> |
201
+ <a href="#" onclick="shareDoc('{url}')">Share</a> |
202
+ <a href="#" onclick="askAI('{r['agency']}', '{r['title']}', '{url}')">Ask AI</a>
203
+ </div>
204
+ </div>
205
+ """
206
+ cards.append(card)
207
+
208
+ return "".join(cards) if cards else "<i>No PDF documents found.</i>"
209
+
210
+ # ======================================================
211
+ # CLIENT JS
212
+ # ======================================================
213
+
214
+ JS_HELPERS = """
215
+ <script>
216
+ function shareDoc(url) {
217
+ if (navigator.share) {
218
+ navigator.share({ title: "FOIA Document", url: url });
219
+ } else {
220
+ navigator.clipboard.writeText(url);
221
+ alert("Link copied to clipboard");
222
+ }
223
+ }
224
+
225
+ function askAI(agency, title, url) {
226
+ alert(
227
+ "AI Summary (Preview)\\n\\n" +
228
+ "Agency: " + agency + "\\n" +
229
+ "Title: " + title + "\\n\\n" +
230
+ "Semantic analysis, entities, and citations can be enabled in opt-in AI mode."
231
+ );
232
+ }
233
+ </script>
234
+ """
235
+
236
+ # ======================================================
237
+ # ENTITY GRAPH + TIMELINE
238
+ # ======================================================
239
+
240
+ def build_entity_graph():
241
+ domains = Counter(urlparse(r["url"]).netloc for r in LAST_LIVE_RECORDS)
242
+ return go.Figure([go.Bar(x=list(domains.keys()), y=list(domains.values()))])
243
+
244
+ def build_timeline():
245
+ dates = Counter(r["timestamp"][:10] for r in LAST_LIVE_RECORDS)
246
+ return go.Figure([go.Bar(x=list(dates.keys()), y=list(dates.values()))])
247
+
248
+ # ======================================================
249
+ # FOIA REQUEST GENERATOR
250
+ # ======================================================
251
+
252
+ def generate_foia_request(requester, description):
253
+ buffer = io.BytesIO()
254
+ doc = SimpleDocTemplate(buffer)
255
+ styles = getSampleStyleSheet()
256
+ story = []
257
+
258
+ story.append(Paragraph("<b>Freedom of Information Act Request</b>", styles["Title"]))
259
+ story.append(Spacer(1, 12))
260
+ story.append(Paragraph(f"<b>Requester:</b> {requester}", styles["Normal"]))
261
+ story.append(Spacer(1, 8))
262
+ story.append(Paragraph("<b>Description:</b>", styles["Normal"]))
263
+ story.append(Paragraph(description, styles["Normal"]))
264
+ story.append(Spacer(1, 12))
265
+
266
+ agencies = ", ".join(sorted({r["agency"] for r in LAST_LIVE_RECORDS}))
267
+ story.append(Paragraph(f"<b>Agencies:</b> {agencies}", styles["Normal"]))
268
+
269
+ doc.build(story)
270
+ buffer.seek(0)
271
+ return buffer
272
+
273
+ # ======================================================
274
+ # UI
275
+ # ======================================================
276
 
277
+ with gr.Blocks(title="Federal FOIA Intelligence Search") as app:
278
+ gr.HTML(JS_HELPERS)
 
 
279
 
280
+ gr.Markdown(
281
+ """
282
+ # 🏛️ Federal FOIA Intelligence Search
283
+ **Public FOIA Electronic Reading Rooms Only**
284
 
285
+ LIVE = court-ready
286
+ STUB = informational only
287
+ """
 
 
 
 
288
  )
289
 
290
+ query = gr.Textbox(label="Search FOIA Libraries")
291
+ include_stubs = gr.Checkbox(label="Include Extended Coverage (STUB)", value=False)
292
+ semantic_toggle = gr.Checkbox(label="Enable Semantic Mode (Opt-In)", value=False)
 
 
 
 
 
 
 
 
 
 
 
293
 
294
+ search_btn = gr.Button("Search")
 
 
 
 
 
 
 
295
 
296
+ results = gr.Dataframe(
297
+ headers=["Agency","Type","Title","URL","Latency","Citation Hash","Citation"],
298
+ interactive=True
299
+ )
300
 
301
+ gap_panel = gr.Markdown()
302
+ pdf_gallery = gr.HTML()
 
303
 
304
+ search_btn.click(
305
+ run_search,
306
+ inputs=[query, include_stubs, semantic_toggle],
307
+ outputs=[results, gap_panel, pdf_gallery]
308
+ )
309
 
310
+ gr.Markdown("## 📄 PDF Document Previews")
311
+ pdf_gallery.render()
 
312
 
313
+ gr.Markdown("## FOIA Request Generator")
314
+ requester = gr.Textbox(label="Your Name / Organization")
315
+ description = gr.Textbox(label="Describe the records requested", lines=4)
316
 
317
+ gr.Button("Generate FOIA Request PDF").click(
318
+ generate_foia_request,
319
+ inputs=[requester, description],
320
+ outputs=gr.File()
321
  )
322
 
323
+ gr.Button("Show Entity Graph").click(build_entity_graph, outputs=gr.Plot())
324
+ gr.Button("Show Timeline").click(build_timeline, outputs=gr.Plot())
 
 
 
 
 
 
 
 
325
 
326
  app.launch()