GodsDevProject commited on
Commit
449afb8
·
verified ·
1 Parent(s): a063941

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -233
app.py CHANGED
@@ -1,42 +1,27 @@
1
  import gradio as gr
2
- import time
3
  import hashlib
4
  import zipfile
5
  import io
6
- import uuid
7
  from datetime import datetime
8
  from urllib.parse import quote_plus, urlparse
9
  from collections import Counter
10
- import requests
11
 
12
  import plotly.graph_objects as go
13
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
14
  from reportlab.lib.styles import getSampleStyleSheet
15
 
16
- # ======================================================
17
- # OPTIONAL PDF TEXT EXTRACTION (SAFE / GUARDED)
18
- # ======================================================
19
-
20
- PDF_TEXT_AVAILABLE = False
21
- try:
22
- from pdfminer.high_level import extract_text
23
- PDF_TEXT_AVAILABLE = True
24
- except Exception:
25
- PDF_TEXT_AVAILABLE = False
26
 
27
  # ======================================================
28
- # CONFIG / FEATURE GATES
29
  # ======================================================
30
 
31
- ENABLE_AI = True # explicit opt-in required
32
- ENABLE_PDF_EXTRACTION = True # user + checkbox gated
33
- ENABLE_ENTITY_GRAPHS = True
34
- ENABLE_TIMELINES = True
35
  ENABLE_JOURNALIST_ZIP = True
36
- ENABLE_LITIGATION_PDF = True
37
 
38
  # ======================================================
39
- # BASE ADAPTER (LINK-OUT ONLY)
40
  # ======================================================
41
 
42
  class FOIAAdapter:
@@ -44,18 +29,13 @@ class FOIAAdapter:
44
  search_url = ""
45
 
46
  def search(self, query):
47
- url = self.search_url.format(q=quote_plus(query))
48
  return [{
49
  "agency": self.agency,
50
- "title": f"{self.agency} FOIA Reading Room Result",
51
- "url": url,
52
  "timestamp": datetime.utcnow().isoformat()
53
  }]
54
 
55
- # ======================================================
56
- # LIVE AGENCIES (SAFE)
57
- # ======================================================
58
-
59
  class CIA(FOIAAdapter):
60
  agency = "CIA"
61
  search_url = "https://www.cia.gov/readingroom/search/site/{q}"
@@ -64,212 +44,61 @@ class FBI(FOIAAdapter):
64
  agency = "FBI"
65
  search_url = "https://vault.fbi.gov/search?SearchableText={q}"
66
 
67
- class DOJ(FOIAAdapter):
68
- agency = "DOJ"
69
- search_url = "https://www.justice.gov/foia/library?search={q}"
70
-
71
- class DHS(FOIAAdapter):
72
- agency = "DHS"
73
- search_url = "https://www.dhs.gov/foia-library/search?search={q}"
74
-
75
- class STATE(FOIAAdapter):
76
- agency = "State Department"
77
- search_url = "https://foia.state.gov/Search/Search.aspx?q={q}"
78
 
79
- class GSA(FOIAAdapter):
80
- agency = "GSA"
81
- search_url = "https://www.gsa.gov/reference/freedom-of-information-act-foia/foia-library?search={q}"
82
-
83
- class NSA(FOIAAdapter):
84
- agency = "NSA"
85
- search_url = "https://www.nsa.gov/resources/everyone/foia/reading-room/?q={q}"
86
-
87
- LIVE_ADAPTERS = [CIA(), FBI(), DOJ(), DHS(), STATE(), GSA(), NSA()]
88
-
89
- # ======================================================
90
- # GLOBAL STATE (IN-MEMORY ONLY)
91
  # ======================================================
92
-
93
- LAST_RESULTS = []
94
- SELECTED_INDEX = None
95
- SHARE_REGISTRY = {}
96
-
97
  # ======================================================
98
- # UTILITIES
99
- # ======================================================
100
-
101
- def citation_hash(r):
102
- raw = f"{r['agency']}|{r['url']}|{r['timestamp']}"
103
- return hashlib.sha256(raw.encode()).hexdigest()[:16]
104
 
105
- def bluebook(r):
106
- return (
107
- f"{r['agency']}, {r['title']}, FOIA Electronic Reading Room, "
108
- f"{r['url']} (retrieved {datetime.utcnow().strftime('%b %d, %Y')})."
109
- )
110
-
111
- def ai_disclosure():
112
- return (
113
- "\n\n---\n"
114
- "AI DISCLOSURE\n"
115
- "• User-initiated analysis only\n"
116
- "• PDF text extracted only with explicit opt-in\n"
117
- "• Public FOIA documents only\n"
118
- "• AI output is not evidence or legal advice\n"
119
- "• Verify against the original source\n"
120
- )
121
-
122
- def hash_ai_output(text):
123
- return hashlib.sha256(text.encode()).hexdigest()
124
 
125
  # ======================================================
126
  # SEARCH
127
  # ======================================================
128
 
129
  def run_search(query):
130
- global LAST_RESULTS
131
- LAST_RESULTS = []
132
  rows = []
133
 
134
- for adapter in LIVE_ADAPTERS:
135
- for r in adapter.search(query):
136
- r["hash"] = citation_hash(r)
137
- LAST_RESULTS.append(r)
138
  rows.append([r["agency"], r["title"], r["url"], r["hash"]])
139
 
140
- return rows, render_cards()
141
-
142
- # ======================================================
143
- # CARD GALLERY
144
- # ======================================================
145
-
146
- def render_cards():
147
- cards = []
148
- for idx, r in enumerate(LAST_RESULTS):
149
- url = r["url"]
150
- is_pdf = url.lower().endswith(".pdf")
151
-
152
- preview = (
153
- f"<iframe src='{url}' height='220' width='100%'></iframe>"
154
- if is_pdf else
155
- f"<a href='{url}' target='_blank'>Open link</a>"
156
- )
157
-
158
- cards.append(f"""
159
- <div class="card">
160
- <b>{r['agency']}</b><br>
161
- {r['title']}<br><br>
162
- {preview}
163
- <div class="actions">
164
- <a href="{url}" target="_blank">View</a>
165
- <a href="{url}" download>Download</a>
166
- <a href="/share/{idx}" target="_blank">Share</a>
167
- <button onclick="selectDoc({idx})">Ask AI</button>
168
- </div>
169
- </div>
170
- """)
171
-
172
- return "".join(cards) if cards else "<i>No results</i>"
173
 
174
  # ======================================================
175
- # PDF TEXT EXTRACTION (OPT-IN)
176
  # ======================================================
177
 
178
- def extract_pdf_text(url):
179
- if not (PDF_TEXT_AVAILABLE and ENABLE_PDF_EXTRACTION):
180
- return ""
181
 
182
- try:
183
- r = requests.get(url, timeout=15)
184
- with open("/tmp/tmp.pdf", "wb") as f:
185
- f.write(r.content)
186
- return extract_text("/tmp/tmp.pdf")[:6000]
187
- except Exception:
188
- return ""
189
-
190
- # ======================================================
191
- # AI ASK (STRICTLY OPT-IN)
192
- # ======================================================
193
-
194
- def ask_ai(opt_in, extract_opt_in, question):
195
- if not opt_in:
196
- return "⚠ AI disabled. Explicit opt-in required."
197
-
198
- if SELECTED_INDEX is None:
199
- return "⚠ Select a document first."
200
-
201
- r = LAST_RESULTS[SELECTED_INDEX]
202
- context = ""
203
-
204
- if extract_opt_in and r["url"].lower().endswith(".pdf"):
205
- context = extract_pdf_text(r["url"])
206
-
207
- analysis = (
208
  f"AI ANALYSIS\n\n"
209
- f"Agency: {r['agency']}\n"
210
- f"Title: {r['title']}\n"
211
- f"URL: {r['url']}\n\n"
212
  f"Question:\n{question}\n\n"
213
- f"Context Extracted:\n{context[:1500]}\n\n"
214
- f"Analysis:\nThis document is publicly available via FOIA."
 
 
 
215
  )
216
 
217
- final = analysis + ai_disclosure()
218
- return final + f"\n\nIntegrity Hash: {hash_ai_output(final)}"
219
-
220
- # ======================================================
221
- # SHARE PAGES (LINK-ONLY)
222
- # ======================================================
223
-
224
- def create_share(idx):
225
- token = str(uuid.uuid4())[:8]
226
- SHARE_REGISTRY[token] = LAST_RESULTS[idx]
227
- return f"Public Share Token: {token}"
228
-
229
  # ======================================================
230
- # LITIGATION APPENDIX (PDF)
231
- # ======================================================
232
-
233
- def litigation_appendix():
234
- buf = io.BytesIO()
235
- doc = SimpleDocTemplate(buf)
236
- styles = getSampleStyleSheet()
237
- story = [Paragraph("<b>Litigation Appendix</b>", styles["Title"]), Spacer(1, 12)]
238
-
239
- for i, r in enumerate(LAST_RESULTS, start=1):
240
- story.append(Paragraph(f"<b>Exhibit A-{i}</b>", styles["Heading2"]))
241
- story.append(Paragraph(bluebook(r), styles["Normal"]))
242
- story.append(Spacer(1, 8))
243
-
244
- doc.build(story)
245
- buf.seek(0)
246
- return buf
247
-
248
- # ======================================================
249
- # ENTITY / TIMELINE
250
- # ======================================================
251
-
252
- def entity_graph():
253
- domains = Counter(urlparse(r["url"]).netloc for r in LAST_RESULTS)
254
- return go.Figure([go.Bar(x=list(domains.keys()), y=list(domains.values()))])
255
-
256
- def timeline():
257
- dates = Counter(r["timestamp"][:10] for r in LAST_RESULTS)
258
- return go.Figure([go.Bar(x=list(dates.keys()), y=list(dates.values()))])
259
-
260
- # ======================================================
261
- # JOURNALIST ZIP
262
  # ======================================================
263
 
264
  def journalist_zip():
265
  buf = io.BytesIO()
266
  with zipfile.ZipFile(buf, "w") as z:
267
- z.writestr("citations.txt", "\n".join(bluebook(r) for r in LAST_RESULTS))
268
- z.writestr(
269
- "links.csv",
270
- "agency,title,url\n" +
271
- "\n".join(f"{r['agency']},{r['title']},{r['url']}" for r in LAST_RESULTS)
272
- )
273
  buf.seek(0)
274
  return buf
275
 
@@ -277,39 +106,21 @@ def journalist_zip():
277
  # UI
278
  # ======================================================
279
 
280
- CSS = """
281
- .card {border:1px solid #ddd;border-radius:10px;padding:12px;margin-bottom:16px}
282
- .actions a, .actions button {margin-right:8px}
283
- """
284
-
285
- with gr.Blocks(css=CSS, title="Federal FOIA Intelligence Search") as app:
286
- gr.Markdown("# 🏛️ Federal FOIA Intelligence Search\nPublic Reading Rooms Only")
287
-
288
  with gr.Tabs():
289
- with gr.Tab("🔍 Search"):
290
- query = gr.Textbox(label="Search FOIA Libraries")
291
- search_btn = gr.Button("Search")
292
  table = gr.Dataframe(headers=["Agency","Title","URL","Hash"])
293
- gallery = gr.HTML()
294
- search_btn.click(run_search, query, [table, gallery])
295
-
296
- with gr.Tab("🧠 Ask AI"):
297
- ai_opt = gr.Checkbox(label="Enable AI (Explicit Opt-In)")
298
- pdf_opt = gr.Checkbox(label="Allow PDF text extraction (Explicit Opt-In)")
299
- question = gr.Textbox(label="Ask about selected document", lines=4)
300
- answer = gr.Textbox(lines=16)
301
- gr.Button("Ask AI").click(ask_ai, [ai_opt, pdf_opt, question], answer)
302
-
303
- with gr.Tab("📊 Analysis"):
304
- gr.Button("Entity Graph").click(entity_graph, outputs=gr.Plot())
305
- gr.Button("Timeline").click(timeline, outputs=gr.Plot())
306
 
307
- with gr.Tab("⚖️ Court Tools"):
308
- gr.Button("Generate Litigation Appendix PDF").click(
309
- litigation_appendix, outputs=gr.File()
310
- )
 
311
 
312
- with gr.Tab("🗂 Exports"):
313
  gr.Button("Journalist ZIP").click(journalist_zip, outputs=gr.File())
314
 
315
  app.launch()
 
1
  import gradio as gr
 
2
  import hashlib
3
  import zipfile
4
  import io
 
5
  from datetime import datetime
6
  from urllib.parse import quote_plus, urlparse
7
  from collections import Counter
 
8
 
9
  import plotly.graph_objects as go
10
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
11
  from reportlab.lib.styles import getSampleStyleSheet
12
 
13
+ from bluebook import bluebook_full
14
+ from semantic import SemanticIndex, FAISS_AVAILABLE
 
 
 
 
 
 
 
 
15
 
16
  # ======================================================
17
+ # CONFIG
18
  # ======================================================
19
 
20
+ ENABLE_AI = True
 
 
 
21
  ENABLE_JOURNALIST_ZIP = True
 
22
 
23
  # ======================================================
24
+ # FOIA ADAPTER
25
  # ======================================================
26
 
27
  class FOIAAdapter:
 
29
  search_url = ""
30
 
31
  def search(self, query):
 
32
  return [{
33
  "agency": self.agency,
34
+ "title": f"{self.agency} FOIA Search Results",
35
+ "url": self.search_url.format(q=quote_plus(query)),
36
  "timestamp": datetime.utcnow().isoformat()
37
  }]
38
 
 
 
 
 
39
  class CIA(FOIAAdapter):
40
  agency = "CIA"
41
  search_url = "https://www.cia.gov/readingroom/search/site/{q}"
 
44
  agency = "FBI"
45
  search_url = "https://vault.fbi.gov/search?SearchableText={q}"
46
 
47
+ LIVE_ADAPTERS = [CIA(), FBI()]
 
 
 
 
 
 
 
 
 
 
48
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  # ======================================================
50
+ # STATE
 
 
 
 
51
  # ======================================================
 
 
 
 
 
 
52
 
53
+ RESULTS = []
54
+ SEMANTIC = None
55
+ SELECTED = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  # ======================================================
58
  # SEARCH
59
  # ======================================================
60
 
61
  def run_search(query):
62
+ global RESULTS
63
+ RESULTS = []
64
  rows = []
65
 
66
+ for a in LIVE_ADAPTERS:
67
+ for r in a.search(query):
68
+ r["hash"] = hashlib.sha256(r["url"].encode()).hexdigest()[:16]
69
+ RESULTS.append(r)
70
  rows.append([r["agency"], r["title"], r["url"], r["hash"]])
71
 
72
+ return rows
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
  # ======================================================
75
+ # AI ASK
76
  # ======================================================
77
 
78
+ def ask_ai(opt_in, question):
79
+ if not opt_in or not SELECTED:
80
+ return "AI disabled or no document selected."
81
 
82
+ r = SELECTED
83
+ return (
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  f"AI ANALYSIS\n\n"
85
+ f"{r['title']}\n{r['url']}\n\n"
 
 
86
  f"Question:\n{question}\n\n"
87
+ f"Analysis is informational only.\n\n"
88
+ "AI DISCLOSURE:\n"
89
+ "• User-initiated\n"
90
+ "• Public FOIA documents only\n"
91
+ "• Verify against original source\n"
92
  )
93
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  # ======================================================
95
+ # EXPORT
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  # ======================================================
97
 
98
  def journalist_zip():
99
  buf = io.BytesIO()
100
  with zipfile.ZipFile(buf, "w") as z:
101
+ z.writestr("citations.txt", "\n".join(bluebook_full(r) for r in RESULTS))
 
 
 
 
 
102
  buf.seek(0)
103
  return buf
104
 
 
106
  # UI
107
  # ======================================================
108
 
109
+ with gr.Blocks(title="Federal FOIA Intelligence Search") as app:
 
 
 
 
 
 
 
110
  with gr.Tabs():
111
+ with gr.Tab("Search"):
112
+ q = gr.Textbox()
113
+ btn = gr.Button("Search")
114
  table = gr.Dataframe(headers=["Agency","Title","URL","Hash"])
115
+ btn.click(run_search, q, table)
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
+ with gr.Tab("AI Ask"):
118
+ opt = gr.Checkbox(label="Enable AI (Opt-In)")
119
+ question = gr.Textbox(lines=4)
120
+ answer = gr.Textbox(lines=12)
121
+ gr.Button("Ask AI").click(ask_ai, [opt, question], answer)
122
 
123
+ with gr.Tab("Exports"):
124
  gr.Button("Journalist ZIP").click(journalist_zip, outputs=gr.File())
125
 
126
  app.launch()