GodsDevProject commited on
Commit
4394fda
ยท
verified ยท
1 Parent(s): 2bf60a1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -64
app.py CHANGED
@@ -3,46 +3,57 @@ import time
3
  import hashlib
4
  import zipfile
5
  import io
 
6
  from datetime import datetime
7
  from urllib.parse import quote_plus, urlparse
8
- from collections import defaultdict, Counter
9
  import requests
10
 
11
  import plotly.graph_objects as go
12
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
13
  from reportlab.lib.styles import getSampleStyleSheet
14
 
 
 
 
 
 
 
 
 
 
 
 
15
  # ======================================================
16
  # CONFIG / FEATURE GATES
17
  # ======================================================
18
 
19
- ENABLE_AI = True # explicit user opt-in required
20
- ENABLE_PDF_THUMBNAILS = True
21
  ENABLE_ENTITY_GRAPHS = True
22
  ENABLE_TIMELINES = True
23
  ENABLE_JOURNALIST_ZIP = True
 
24
 
25
  # ======================================================
26
- # BASE ADAPTER
27
  # ======================================================
28
 
29
  class FOIAAdapter:
30
  agency = "UNKNOWN"
31
  search_url = ""
32
- is_live = True
33
 
34
  def search(self, query):
35
  url = self.search_url.format(q=quote_plus(query))
36
  return [{
37
  "agency": self.agency,
38
- "title": f"{self.agency} FOIA Search Results",
39
  "url": url,
40
- "is_live": self.is_live,
41
  "timestamp": datetime.utcnow().isoformat()
42
  }]
43
 
44
  # ======================================================
45
- # LIVE AGENCIES
46
  # ======================================================
47
 
48
  class CIA(FOIAAdapter):
@@ -75,12 +86,20 @@ class NSA(FOIAAdapter):
75
 
76
  LIVE_ADAPTERS = [CIA(), FBI(), DOJ(), DHS(), STATE(), GSA(), NSA()]
77
 
 
 
 
 
 
 
 
 
78
  # ======================================================
79
  # UTILITIES
80
  # ======================================================
81
 
82
  def citation_hash(r):
83
- raw = f"{r['agency']}{r['url']}{r['timestamp']}"
84
  return hashlib.sha256(raw.encode()).hexdigest()[:16]
85
 
86
  def bluebook(r):
@@ -94,18 +113,14 @@ def ai_disclosure():
94
  "\n\n---\n"
95
  "AI DISCLOSURE\n"
96
  "โ€ข User-initiated analysis only\n"
97
- "โ€ข PDF processed only when explicitly requested\n"
98
  "โ€ข Public FOIA documents only\n"
99
- "โ€ข Not legal advice or a primary source\n"
100
- "โ€ข Verify against the original record\n"
101
  )
102
 
103
- # ======================================================
104
- # GLOBAL STATE
105
- # ======================================================
106
-
107
- LAST_RESULTS = []
108
- SELECTED_DOC = None
109
 
110
  # ======================================================
111
  # SEARCH
@@ -120,42 +135,36 @@ def run_search(query):
120
  for r in adapter.search(query):
121
  r["hash"] = citation_hash(r)
122
  LAST_RESULTS.append(r)
123
- rows.append([
124
- r["agency"],
125
- r["title"],
126
- r["url"],
127
- r["hash"]
128
- ])
129
 
130
  return rows, render_cards()
131
 
132
  # ======================================================
133
- # CARD / THUMBNAIL GALLERY
134
  # ======================================================
135
 
136
  def render_cards():
137
  cards = []
138
-
139
  for idx, r in enumerate(LAST_RESULTS):
140
  url = r["url"]
141
  is_pdf = url.lower().endswith(".pdf")
142
 
143
  preview = (
144
- f"<iframe src='{url}' width='100%' height='200'></iframe>"
145
  if is_pdf else
146
  f"<a href='{url}' target='_blank'>Open link</a>"
147
  )
148
 
149
  cards.append(f"""
150
- <div style="border:1px solid #ccc;border-radius:10px;padding:12px;margin-bottom:16px">
151
  <b>{r['agency']}</b><br>
152
  {r['title']}<br><br>
153
  {preview}
154
- <div style="margin-top:8px">
155
- <a href="{url}" target="_blank">View</a> |
156
- <a href="{url}" download>Download</a> |
157
- <a href="{url}" target="_blank">Share</a> |
158
- <a href="#" onclick="selectDoc({idx})">Ask AI</a>
159
  </div>
160
  </div>
161
  """)
@@ -163,32 +172,81 @@ def render_cards():
163
  return "".join(cards) if cards else "<i>No results</i>"
164
 
165
  # ======================================================
166
- # AI ASK (PDF ONLY WHEN CLICKED)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  # ======================================================
168
 
169
- def ask_ai(opt_in, question):
170
  if not opt_in:
171
  return "โš  AI disabled. Explicit opt-in required."
172
 
173
- if SELECTED_DOC is None:
174
  return "โš  Select a document first."
175
 
176
- r = SELECTED_DOC
177
- summary = (
 
 
 
 
 
178
  f"AI ANALYSIS\n\n"
179
  f"Agency: {r['agency']}\n"
180
  f"Title: {r['title']}\n"
181
  f"URL: {r['url']}\n\n"
182
  f"Question:\n{question}\n\n"
183
- f"Analysis:\n"
184
- f"This document is publicly available via FOIA. "
185
- f"Key themes, entities, and relevance should be reviewed directly in the source."
186
  )
187
 
188
- return summary + ai_disclosure()
 
189
 
190
  # ======================================================
191
- # ENTITY + TIMELINE
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  # ======================================================
193
 
194
  def entity_graph():
@@ -206,7 +264,6 @@ def timeline():
206
  def journalist_zip():
207
  buf = io.BytesIO()
208
  with zipfile.ZipFile(buf, "w") as z:
209
- z.writestr("README.txt", "Public FOIA links only.\nNo documents included.")
210
  z.writestr("citations.txt", "\n".join(bluebook(r) for r in LAST_RESULTS))
211
  z.writestr(
212
  "links.csv",
@@ -217,24 +274,16 @@ def journalist_zip():
217
  return buf
218
 
219
  # ======================================================
220
- # JS HELPERS
221
  # ======================================================
222
 
223
- JS = """
224
- <script>
225
- function selectDoc(idx){
226
- fetch(`/select/${idx}`);
227
- alert("Document selected for AI analysis");
228
- }
229
- </script>
230
  """
231
 
232
- # ======================================================
233
- # UI
234
- # ======================================================
235
-
236
- with gr.Blocks(title="Federal FOIA Intelligence Search") as app:
237
- gr.HTML(JS)
238
 
239
  with gr.Tabs():
240
  with gr.Tab("๐Ÿ” Search"):
@@ -244,19 +293,22 @@ with gr.Blocks(title="Federal FOIA Intelligence Search") as app:
244
  gallery = gr.HTML()
245
  search_btn.click(run_search, query, [table, gallery])
246
 
247
- with gr.Tab("๐Ÿ“„ Documents"):
248
- gallery.render()
249
-
250
- with gr.Tab("๐Ÿง  AI Ask"):
251
  ai_opt = gr.Checkbox(label="Enable AI (Explicit Opt-In)")
 
252
  question = gr.Textbox(label="Ask about selected document", lines=4)
253
- answer = gr.Textbox(lines=14)
254
- gr.Button("Ask AI").click(ask_ai, [ai_opt, question], answer)
255
 
256
  with gr.Tab("๐Ÿ“Š Analysis"):
257
  gr.Button("Entity Graph").click(entity_graph, outputs=gr.Plot())
258
  gr.Button("Timeline").click(timeline, outputs=gr.Plot())
259
 
 
 
 
 
 
260
  with gr.Tab("๐Ÿ—‚ Exports"):
261
  gr.Button("Journalist ZIP").click(journalist_zip, outputs=gr.File())
262
 
 
3
  import hashlib
4
  import zipfile
5
  import io
6
+ import uuid
7
  from datetime import datetime
8
  from urllib.parse import quote_plus, urlparse
9
+ from collections import Counter
10
  import requests
11
 
12
  import plotly.graph_objects as go
13
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
14
  from reportlab.lib.styles import getSampleStyleSheet
15
 
16
+ # ======================================================
17
+ # OPTIONAL PDF TEXT EXTRACTION (SAFE / GUARDED)
18
+ # ======================================================
19
+
20
+ PDF_TEXT_AVAILABLE = False
21
+ try:
22
+ from pdfminer.high_level import extract_text
23
+ PDF_TEXT_AVAILABLE = True
24
+ except Exception:
25
+ PDF_TEXT_AVAILABLE = False
26
+
27
  # ======================================================
28
  # CONFIG / FEATURE GATES
29
  # ======================================================
30
 
31
+ ENABLE_AI = True # explicit opt-in required
32
+ ENABLE_PDF_EXTRACTION = True # user + checkbox gated
33
  ENABLE_ENTITY_GRAPHS = True
34
  ENABLE_TIMELINES = True
35
  ENABLE_JOURNALIST_ZIP = True
36
+ ENABLE_LITIGATION_PDF = True
37
 
38
  # ======================================================
39
+ # BASE ADAPTER (LINK-OUT ONLY)
40
  # ======================================================
41
 
42
  class FOIAAdapter:
43
  agency = "UNKNOWN"
44
  search_url = ""
 
45
 
46
  def search(self, query):
47
  url = self.search_url.format(q=quote_plus(query))
48
  return [{
49
  "agency": self.agency,
50
+ "title": f"{self.agency} FOIA Reading Room Result",
51
  "url": url,
 
52
  "timestamp": datetime.utcnow().isoformat()
53
  }]
54
 
55
  # ======================================================
56
+ # LIVE AGENCIES (SAFE)
57
  # ======================================================
58
 
59
  class CIA(FOIAAdapter):
 
86
 
87
  LIVE_ADAPTERS = [CIA(), FBI(), DOJ(), DHS(), STATE(), GSA(), NSA()]
88
 
89
+ # ======================================================
90
+ # GLOBAL STATE (IN-MEMORY ONLY)
91
+ # ======================================================
92
+
93
+ LAST_RESULTS = []
94
+ SELECTED_INDEX = None
95
+ SHARE_REGISTRY = {}
96
+
97
  # ======================================================
98
  # UTILITIES
99
  # ======================================================
100
 
101
  def citation_hash(r):
102
+ raw = f"{r['agency']}|{r['url']}|{r['timestamp']}"
103
  return hashlib.sha256(raw.encode()).hexdigest()[:16]
104
 
105
  def bluebook(r):
 
113
  "\n\n---\n"
114
  "AI DISCLOSURE\n"
115
  "โ€ข User-initiated analysis only\n"
116
+ "โ€ข PDF text extracted only with explicit opt-in\n"
117
  "โ€ข Public FOIA documents only\n"
118
+ "โ€ข AI output is not evidence or legal advice\n"
119
+ "โ€ข Verify against the original source\n"
120
  )
121
 
122
+ def hash_ai_output(text):
123
+ return hashlib.sha256(text.encode()).hexdigest()
 
 
 
 
124
 
125
  # ======================================================
126
  # SEARCH
 
135
  for r in adapter.search(query):
136
  r["hash"] = citation_hash(r)
137
  LAST_RESULTS.append(r)
138
+ rows.append([r["agency"], r["title"], r["url"], r["hash"]])
 
 
 
 
 
139
 
140
  return rows, render_cards()
141
 
142
  # ======================================================
143
+ # CARD GALLERY
144
  # ======================================================
145
 
146
  def render_cards():
147
  cards = []
 
148
  for idx, r in enumerate(LAST_RESULTS):
149
  url = r["url"]
150
  is_pdf = url.lower().endswith(".pdf")
151
 
152
  preview = (
153
+ f"<iframe src='{url}' height='220' width='100%'></iframe>"
154
  if is_pdf else
155
  f"<a href='{url}' target='_blank'>Open link</a>"
156
  )
157
 
158
  cards.append(f"""
159
+ <div class="card">
160
  <b>{r['agency']}</b><br>
161
  {r['title']}<br><br>
162
  {preview}
163
+ <div class="actions">
164
+ <a href="{url}" target="_blank">View</a>
165
+ <a href="{url}" download>Download</a>
166
+ <a href="/share/{idx}" target="_blank">Share</a>
167
+ <button onclick="selectDoc({idx})">Ask AI</button>
168
  </div>
169
  </div>
170
  """)
 
172
  return "".join(cards) if cards else "<i>No results</i>"
173
 
174
  # ======================================================
175
+ # PDF TEXT EXTRACTION (OPT-IN)
176
+ # ======================================================
177
+
178
+ def extract_pdf_text(url):
179
+ if not (PDF_TEXT_AVAILABLE and ENABLE_PDF_EXTRACTION):
180
+ return ""
181
+
182
+ try:
183
+ r = requests.get(url, timeout=15)
184
+ with open("/tmp/tmp.pdf", "wb") as f:
185
+ f.write(r.content)
186
+ return extract_text("/tmp/tmp.pdf")[:6000]
187
+ except Exception:
188
+ return ""
189
+
190
+ # ======================================================
191
+ # AI ASK (STRICTLY OPT-IN)
192
  # ======================================================
193
 
194
+ def ask_ai(opt_in, extract_opt_in, question):
195
  if not opt_in:
196
  return "โš  AI disabled. Explicit opt-in required."
197
 
198
+ if SELECTED_INDEX is None:
199
  return "โš  Select a document first."
200
 
201
+ r = LAST_RESULTS[SELECTED_INDEX]
202
+ context = ""
203
+
204
+ if extract_opt_in and r["url"].lower().endswith(".pdf"):
205
+ context = extract_pdf_text(r["url"])
206
+
207
+ analysis = (
208
  f"AI ANALYSIS\n\n"
209
  f"Agency: {r['agency']}\n"
210
  f"Title: {r['title']}\n"
211
  f"URL: {r['url']}\n\n"
212
  f"Question:\n{question}\n\n"
213
+ f"Context Extracted:\n{context[:1500]}\n\n"
214
+ f"Analysis:\nThis document is publicly available via FOIA."
 
215
  )
216
 
217
+ final = analysis + ai_disclosure()
218
+ return final + f"\n\nIntegrity Hash: {hash_ai_output(final)}"
219
 
220
  # ======================================================
221
+ # SHARE PAGES (LINK-ONLY)
222
+ # ======================================================
223
+
224
+ def create_share(idx):
225
+ token = str(uuid.uuid4())[:8]
226
+ SHARE_REGISTRY[token] = LAST_RESULTS[idx]
227
+ return f"Public Share Token: {token}"
228
+
229
+ # ======================================================
230
+ # LITIGATION APPENDIX (PDF)
231
+ # ======================================================
232
+
233
+ def litigation_appendix():
234
+ buf = io.BytesIO()
235
+ doc = SimpleDocTemplate(buf)
236
+ styles = getSampleStyleSheet()
237
+ story = [Paragraph("<b>Litigation Appendix</b>", styles["Title"]), Spacer(1, 12)]
238
+
239
+ for i, r in enumerate(LAST_RESULTS, start=1):
240
+ story.append(Paragraph(f"<b>Exhibit A-{i}</b>", styles["Heading2"]))
241
+ story.append(Paragraph(bluebook(r), styles["Normal"]))
242
+ story.append(Spacer(1, 8))
243
+
244
+ doc.build(story)
245
+ buf.seek(0)
246
+ return buf
247
+
248
+ # ======================================================
249
+ # ENTITY / TIMELINE
250
  # ======================================================
251
 
252
  def entity_graph():
 
264
  def journalist_zip():
265
  buf = io.BytesIO()
266
  with zipfile.ZipFile(buf, "w") as z:
 
267
  z.writestr("citations.txt", "\n".join(bluebook(r) for r in LAST_RESULTS))
268
  z.writestr(
269
  "links.csv",
 
274
  return buf
275
 
276
  # ======================================================
277
+ # UI
278
  # ======================================================
279
 
280
+ CSS = """
281
+ .card {border:1px solid #ddd;border-radius:10px;padding:12px;margin-bottom:16px}
282
+ .actions a, .actions button {margin-right:8px}
 
 
 
 
283
  """
284
 
285
+ with gr.Blocks(css=CSS, title="Federal FOIA Intelligence Search") as app:
286
+ gr.Markdown("# ๐Ÿ›๏ธ Federal FOIA Intelligence Search\nPublic Reading Rooms Only")
 
 
 
 
287
 
288
  with gr.Tabs():
289
  with gr.Tab("๐Ÿ” Search"):
 
293
  gallery = gr.HTML()
294
  search_btn.click(run_search, query, [table, gallery])
295
 
296
+ with gr.Tab("๐Ÿง  Ask AI"):
 
 
 
297
  ai_opt = gr.Checkbox(label="Enable AI (Explicit Opt-In)")
298
+ pdf_opt = gr.Checkbox(label="Allow PDF text extraction (Explicit Opt-In)")
299
  question = gr.Textbox(label="Ask about selected document", lines=4)
300
+ answer = gr.Textbox(lines=16)
301
+ gr.Button("Ask AI").click(ask_ai, [ai_opt, pdf_opt, question], answer)
302
 
303
  with gr.Tab("๐Ÿ“Š Analysis"):
304
  gr.Button("Entity Graph").click(entity_graph, outputs=gr.Plot())
305
  gr.Button("Timeline").click(timeline, outputs=gr.Plot())
306
 
307
+ with gr.Tab("โš–๏ธ Court Tools"):
308
+ gr.Button("Generate Litigation Appendix PDF").click(
309
+ litigation_appendix, outputs=gr.File()
310
+ )
311
+
312
  with gr.Tab("๐Ÿ—‚ Exports"):
313
  gr.Button("Journalist ZIP").click(journalist_zip, outputs=gr.File())
314