GodsDevProject commited on
Commit
af459fb
·
verified ·
1 Parent(s): 00d0843

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +179 -280
app.py CHANGED
@@ -1,23 +1,12 @@
1
- # ======================================================
2
- # app.py — FastAPI + Gradio Hybrid (Court / Research Modes)
3
- # ======================================================
4
-
5
  import gradio as gr
6
- import time
7
- import hashlib
8
- import io
9
- import base64
10
- from datetime import datetime, timedelta
11
  from urllib.parse import quote_plus
12
- import requests
13
- from fastapi import FastAPI
14
- from fastapi.middleware.cors import CORSMiddleware
15
 
16
  from reportlab.platypus import SimpleDocTemplate, Paragraph, PageBreak
17
  from reportlab.lib.styles import getSampleStyleSheet
18
-
19
- from citations import bluebook_exhibit
20
- from foia_requests import generate_foia_request_text
21
 
22
  # ======================================================
23
  # HARD FEATURE FLAGS (GOVERNANCE ENFORCED)
@@ -25,345 +14,255 @@ from foia_requests import generate_foia_request_text
25
 
26
  ENABLE_FAISS_PHASE_4 = False
27
  ENABLE_AI = True
28
- ENABLE_PDF_EXTRACTION = True
29
 
30
  # ======================================================
31
- # OPTIONAL PDF SUPPORT
32
  # ======================================================
33
 
34
- PDF_TEXT_AVAILABLE = False
35
- PDF_THUMBNAIL_AVAILABLE = False
36
 
37
- try:
38
- from pdfminer.high_level import extract_text
39
- PDF_TEXT_AVAILABLE = True
40
- except Exception:
41
- pass
42
 
43
- try:
44
- from pdf2image import convert_from_bytes
45
- PDF_THUMBNAIL_AVAILABLE = True
46
- except Exception:
47
- pass
48
 
49
  # ======================================================
50
- # FASTAPI CORE
51
  # ======================================================
52
 
53
- api = FastAPI(
54
- title="Federal FOIA Intelligence Search",
55
- version="1.0.0",
56
- description="Public FOIA Reading Room Intelligence Tool"
57
- )
58
 
59
- api.add_middleware(
60
- CORSMiddleware,
61
- allow_origins=["*"],
62
- allow_methods=["*"],
63
- allow_headers=["*"],
64
- )
 
 
 
 
 
 
65
 
66
  # ======================================================
67
- # SESSION STATE
68
  # ======================================================
69
 
70
- LAST_RESULTS = []
71
- SELECTED_INDEX = None
72
- APP_MODE = "research" # research | court | newsroom
73
 
74
  # ======================================================
75
- # HELPERS
76
  # ======================================================
77
 
78
- def citation_hash(r):
79
- return hashlib.sha256(
80
- f"{r['agency']}|{r['resolved_url']}|{r['timestamp']}".encode()
81
- ).hexdigest()[:16]
 
 
82
 
83
- def sha256_text(text):
84
- return hashlib.sha256(text.encode()).hexdigest()
 
85
 
86
- def fre_callout():
87
- return (
88
- "Federal Rules of Evidence (Educational Reference):\n"
89
- "• Rule 901 – Authentication\n"
90
- "• Rule 803(8) – Public Records Exception\n"
91
- "• Rule 1005 – Copies of Public Records\n"
92
- "Not legal advice."
93
- )
94
 
95
- def ai_disclosure():
96
- return (
97
- "\n\n---\n"
98
- "AI DISCLOSURE\n"
99
- " User-initiated only\n"
100
- " Public FOIA documents only\n"
101
- " No legal advice\n"
102
- " Verify against cited exhibit\n"
 
 
103
  )
104
 
 
 
 
 
 
 
 
 
 
105
  # ======================================================
106
- # FOIA EXHAUSTION TIMELINE
107
  # ======================================================
108
 
109
- def build_foia_exhaustion_timeline(request_date):
110
- base = datetime.fromisoformat(request_date)
111
- return [
112
- ("FOIA Request Filed", base),
113
- ("20-Day Statutory Response Due", base + timedelta(days=20)),
114
- ("Administrative Appeal Window", base + timedelta(days=20)),
115
- ("Constructive Exhaustion Eligible", base + timedelta(days=30)),
116
- ("Judicial Review Eligible", base + timedelta(days=90)),
117
- ]
118
 
119
  # ======================================================
120
- # PDF RESOLUTION
121
  # ======================================================
122
 
123
- def resolve_pdf_url(url):
124
- try:
125
- r = requests.get(
126
- url,
127
- timeout=15,
128
- allow_redirects=True,
129
- headers={"User-Agent": "FOIA-Research-Tool"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  )
131
- ct = r.headers.get("content-type", "").lower()
132
- is_pdf = r.url.lower().endswith(".pdf") or "application/pdf" in ct
133
- return is_pdf, r.url
134
- except Exception:
135
- return False, url
136
-
137
- def generate_pdf_thumbnails(url, max_pages=3):
138
- if not PDF_THUMBNAIL_AVAILABLE:
139
- return []
140
- try:
141
- r = requests.get(url, timeout=15)
142
- images = convert_from_bytes(r.content, first_page=1, last_page=max_pages)
143
- thumbs = []
144
- for img in images:
145
- buf = io.BytesIO()
146
- img.save(buf, format="PNG")
147
- thumbs.append(base64.b64encode(buf.getvalue()).decode())
148
- return thumbs
149
- except Exception:
150
- return []
151
 
152
  # ======================================================
153
- # FOIA ADAPTERS (LINK-OUT ONLY)
154
  # ======================================================
155
 
156
  class FOIAAdapter:
157
- agency = "UNKNOWN"
158
- search_url = ""
159
-
160
- def search(self, query):
161
- start = time.time()
162
- url = self.search_url.format(q=quote_plus(query))
163
- latency = round((time.time() - start) * 1000, 1)
164
  return [{
165
  "agency": self.agency,
166
- "title": f"{self.agency} FOIA Reading Room Result",
167
- "url": url,
168
- "timestamp": datetime.utcnow().isoformat(),
169
- "latency_ms": latency,
170
- "sealed": False,
171
- "redacted": False
172
  }]
173
 
174
  class CIA(FOIAAdapter):
175
  agency = "CIA"
176
- search_url = "https://www.cia.gov/readingroom/search/site/{q}"
177
 
178
  class FBI(FOIAAdapter):
179
  agency = "FBI"
180
- search_url = "https://vault.fbi.gov/search?SearchableText={q}"
181
-
182
- class DOJ(FOIAAdapter):
183
- agency = "DOJ"
184
- search_url = "https://www.justice.gov/foia/library?search={q}"
185
-
186
- class DHS(FOIAAdapter):
187
- agency = "DHS"
188
- search_url = "https://www.dhs.gov/foia-library/search?search={q}"
189
-
190
- class STATE(FOIAAdapter):
191
- agency = "State Department"
192
- search_url = "https://foia.state.gov/Search/Search.aspx?q={q}"
193
-
194
- class NSA(FOIAAdapter):
195
- agency = "NSA"
196
- search_url = "https://www.nsa.gov/resources/everyone/foia/reading-room/?q={q}"
197
-
198
- ALL_ADAPTERS = {
199
- "CIA": CIA(),
200
- "FBI": FBI(),
201
- "DOJ": DOJ(),
202
- "DHS": DHS(),
203
- "State": STATE(),
204
- "NSA": NSA()
205
- }
206
 
207
  # ======================================================
208
  # SEARCH
209
  # ======================================================
210
 
211
  def run_search(query, agencies):
212
- global LAST_RESULTS, SELECTED_INDEX
213
- SELECTED_INDEX = None
214
  LAST_RESULTS = []
215
  rows = []
216
-
217
- for name in agencies:
218
- adapter = ALL_ADAPTERS[name]
219
- for r in adapter.search(query):
220
- r["resolved_pdf"], r["resolved_url"] = resolve_pdf_url(r["url"])
221
- r["hash"] = citation_hash(r)
222
- r["thumbnails"] = (
223
- generate_pdf_thumbnails(r["resolved_url"])
224
- if r["resolved_pdf"] else []
225
- )
226
  LAST_RESULTS.append(r)
227
- rows.append([
228
- r["agency"],
229
- r["title"],
230
- r["resolved_url"],
231
- r["hash"],
232
- f"{r['latency_ms']} ms"
233
- ])
234
-
235
- return rows, render_cards(), "Ready"
236
 
237
  # ======================================================
238
- # CARD RENDER
239
  # ======================================================
240
 
241
- def render_cards():
242
- cards = []
243
- for idx, r in enumerate(LAST_RESULTS):
244
- thumbs = "".join(
245
- f'<img src="data:image/png;base64,{t}" style="width:32%;margin:4px;border-radius:6px" />'
246
- for t in r["thumbnails"]
247
- )
248
-
249
- preview = thumbs or f'<a href="{r["resolved_url"]}" target="_blank">Open Source</a>'
250
 
251
- cards.append(f"""
252
- <div class="card">
253
- <div class="card-header">
254
- <b>{r['agency']}</b>
255
- <span class="badge">PUBLIC</span>
256
- </div>
257
- <div>{preview}</div>
258
- <small>Hash: {r['hash']}</small>
259
- </div>
260
- """)
261
 
262
- return "".join(cards)
263
-
264
- # ======================================================
265
- # AI ASK
266
- # ======================================================
267
-
268
- def ask_ai(opt_in, pdf_opt_in, question):
269
- if not opt_in:
270
- return "Explicit AI opt-in required."
271
- if SELECTED_INDEX is None:
272
- return "Select a document first."
273
-
274
- r = LAST_RESULTS[SELECTED_INDEX]
275
- if not r["resolved_pdf"]:
276
- return "AI available only for public PDFs."
277
-
278
- context = ""
279
- pin = "n.p."
280
-
281
- if pdf_opt_in and PDF_TEXT_AVAILABLE:
282
- try:
283
- raw = extract_text(io.BytesIO(
284
- requests.get(r["resolved_url"], timeout=15).content
285
- ))
286
- context = raw[:4000]
287
- pin = "p. 1"
288
- except Exception:
289
- pass
290
-
291
- analysis = (
292
- f"{bluebook_exhibit(r, SELECTED_INDEX + 1, pin=pin)}\n\n"
293
- f"{fre_callout()}\n\n"
294
- f"Question:\n{question}\n\nContext:\n{context}"
295
- )
296
 
297
- final = analysis + ai_disclosure()
298
- return final + f"\n\nIntegrity Hash: {sha256_text(final)}"
 
 
 
 
 
 
 
 
 
 
 
299
 
300
- # ======================================================
301
- # MODE SWITCH
302
- # ======================================================
303
 
304
- def set_mode(mode):
305
- global APP_MODE
306
- APP_MODE = mode
307
- return f"Mode: {mode.upper()}"
308
 
309
  # ======================================================
310
  # UI
311
  # ======================================================
312
 
313
- CSS = """
314
- .card {border:1px solid #ddd;border-radius:14px;padding:14px;margin:14px 0;background:#fafafa;}
315
- .badge {background:#eef;padding:4px 10px;border-radius:999px;font-size:0.75em;}
316
- .gradio-container .tab-nav {position:sticky;top:0;z-index:1000;background:white}
317
- @media (max-width: 640px){
318
- .tab-nav button span {display:none}
319
- }
320
- """
321
-
322
- with gr.Blocks(css=CSS, title="FOIA Intelligence Search") as gradio_app:
323
  gr.Markdown("## Federal FOIA Intelligence Search")
324
 
325
- with gr.Row():
326
- mode = gr.Radio(
327
- ["research", "court", "newsroom"],
328
- value="research",
329
- label="Operating Mode"
330
- )
331
- mode_status = gr.Textbox(interactive=False)
332
-
333
- mode.change(set_mode, mode, mode_status)
334
-
335
  with gr.Tab("Search"):
336
- agencies = gr.CheckboxGroup(list(ALL_ADAPTERS.keys()), value=list(ALL_ADAPTERS.keys()))
 
 
 
337
  query = gr.Textbox()
338
- table = gr.Dataframe(headers=["Agency","Title","URL","Hash","Latency"])
339
- cards = gr.HTML()
340
- gr.Button("Search").click(run_search, [query, agencies], [table, cards, mode_status])
341
-
342
- with gr.Tab("AI"):
343
- ai_opt = gr.Checkbox(label="Enable AI")
344
- pdf_opt = gr.Checkbox(label="Allow PDF Extraction")
345
- q = gr.Textbox(lines=4)
346
- a = gr.Textbox(lines=18)
347
- gr.Button("Ask").click(ask_ai, [ai_opt, pdf_opt, q], a)
348
-
349
- with gr.Tab("Timeline"):
350
- d = gr.Textbox(value=datetime.utcnow().date().isoformat())
351
- t = gr.Textbox(lines=8)
352
- gr.Button("Build").click(
353
- lambda d: "\n".join(f"{k}: {v.date()}" for k,v in build_foia_exhaustion_timeline(d)),
354
- d, t
355
  )
 
356
 
357
- # ======================================================
358
- # MOUNT GRADIO INTO FASTAPI
359
- # ======================================================
360
-
361
- api = gr.mount_gradio_app(api, gradio_app, path="/")
 
 
 
362
 
363
- # ======================================================
364
- # ENTRYPOINT
365
- # ======================================================
366
 
367
- if __name__ == "__main__":
368
- import uvicorn
369
- uvicorn.run(api, host="0.0.0.0", port=7860)
 
 
 
 
 
1
  import gradio as gr
2
+ import time, hashlib, io, zipfile, os, tempfile
3
+ import xml.etree.ElementTree as ET
4
+ from datetime import datetime
 
 
5
  from urllib.parse import quote_plus
 
 
 
6
 
7
  from reportlab.platypus import SimpleDocTemplate, Paragraph, PageBreak
8
  from reportlab.lib.styles import getSampleStyleSheet
9
+ from reportlab.lib.pagesizes import LETTER
 
 
10
 
11
  # ======================================================
12
  # HARD FEATURE FLAGS (GOVERNANCE ENFORCED)
 
14
 
15
  ENABLE_FAISS_PHASE_4 = False
16
  ENABLE_AI = True
 
17
 
18
  # ======================================================
19
+ # FIPS MODE
20
  # ======================================================
21
 
22
+ FIPS_140_MODE = False
 
23
 
24
+ # ======================================================
25
+ # SESSION STATE
26
+ # ======================================================
 
 
27
 
28
+ LAST_RESULTS = []
 
 
 
 
29
 
30
  # ======================================================
31
+ # CRYPTOGRAPHIC CORE
32
  # ======================================================
33
 
34
+ def sha256_text(t: str):
35
+ return hashlib.sha256(t.encode()).hexdigest()
 
 
 
36
 
37
+ def provenance_headers(payload: str):
38
+ return {
39
+ "Tool-Version": "1.7.0",
40
+ "Generated-UTC": datetime.utcnow().isoformat(),
41
+ "Content-SHA256": sha256_text(payload),
42
+ "Public-Source-Only": "true",
43
+ "AI-Assisted": "formatting-only",
44
+ "FIPS-140-Mode": str(FIPS_140_MODE).lower(),
45
+ }
46
+
47
+ def render_provenance_block(text: str):
48
+ return "\n".join(f"{k}: {v}" for k, v in provenance_headers(text).items())
49
 
50
  # ======================================================
51
+ # ECF NUMBER (LOCAL / PRE-FILING ONLY)
52
  # ======================================================
53
 
54
+ def generate_ecf_filing_number():
55
+ return f"ECF-PREFILE-{datetime.utcnow().strftime('%Y%m%d-%H%M%S')}"
 
56
 
57
  # ======================================================
58
+ # DISTRICT DEFINITIONS
59
  # ======================================================
60
 
61
+ DISTRICT_SCHEMAS = {
62
+ "Generic": {},
63
+ "D.D.C.": {},
64
+ "S.D.N.Y.": {},
65
+ "N.D. Cal.": {},
66
+ }
67
 
68
+ # ======================================================
69
+ # COVER SHEET PDF
70
+ # ======================================================
71
 
72
+ def generate_cover_sheet_pdf(district, ecf_no):
73
+ buf = io.BytesIO()
74
+ styles = getSampleStyleSheet()
 
 
 
 
 
75
 
76
+ doc = SimpleDocTemplate(buf, pagesize=LETTER)
77
+
78
+ body = (
79
+ f"<b>CM/ECF PRE-FILING COVER SHEET</b><br/><br/>"
80
+ f"<b>District:</b> {district}<br/>"
81
+ f"<b>Reference No.:</b> {ecf_no}<br/><br/>"
82
+ "This submission is a <b>pre-filing informational bundle</b> generated "
83
+ "from publicly available FOIA electronic reading rooms.<br/><br/>"
84
+ "No document in this bundle is filed, certified, or authenticated "
85
+ "by any court, clerk, or agency."
86
  )
87
 
88
+ doc.build([
89
+ Paragraph(body, styles["Normal"]),
90
+ PageBreak(),
91
+ Paragraph(render_provenance_block(body).replace("\n", "<br/>"), styles["Code"])
92
+ ])
93
+
94
+ buf.seek(0)
95
+ return buf
96
+
97
  # ======================================================
98
+ # EXHIBIT LIST
99
  # ======================================================
100
 
101
+ def generate_proposed_exhibit_list():
102
+ lines = ["PROPOSED EXHIBIT LIST\n"]
103
+ for i, r in enumerate(LAST_RESULTS, 1):
104
+ lines.append(
105
+ f"Exhibit {i:03d}: {r['agency']} FOIA Reading Room "
106
+ f"({r['url']})"
107
+ )
108
+ return "\n".join(lines)
 
109
 
110
  # ======================================================
111
+ # CLERK VERIFICATION CHECKLIST
112
  # ======================================================
113
 
114
+ def clerk_verification_checklist():
115
+ return (
116
+ "CLERK VERIFICATION CHECKLIST\n\n"
117
+ "☐ Confirm exhibit URLs resolve to issuing agency domains\n"
118
+ "☐ Confirm SHA-256 hash matches downloaded agency document\n"
119
+ "☐ Confirm document is publicly released\n"
120
+ "☐ Note: Tool does NOT certify authenticity\n"
121
+ "☐ Note: No sealed or restricted material included\n\n"
122
+ "Relevant Rules:\n"
123
+ "• FRE 902(5)\n"
124
+ "• FRE 803(8)\n"
125
+ "• FRE 1005\n"
126
+ )
127
+
128
+ # ======================================================
129
+ # PDF GENERATION (WITH AI / ETHICS FOOTER)
130
+ # ======================================================
131
+
132
+ def generate_pdf(title, body, exhibit_no, ecf_no):
133
+ buf = io.BytesIO()
134
+ styles = getSampleStyleSheet()
135
+
136
+ def footer(canvas, doc):
137
+ canvas.setFont("Helvetica", 8)
138
+ canvas.drawString(
139
+ 40, 20,
140
+ "AI-assisted formatting only; no substantive analysis or factual assertions."
141
+ )
142
+ canvas.drawRightString(
143
+ 580, 20,
144
+ f"{ecf_no} — Exhibit {exhibit_no}"
145
  )
146
+
147
+ doc = SimpleDocTemplate(
148
+ buf,
149
+ pagesize=LETTER,
150
+ onFirstPage=footer,
151
+ onLaterPages=footer
152
+ )
153
+
154
+ doc.build([
155
+ Paragraph(f"<b>{title}</b>", styles["Title"]),
156
+ Paragraph(body.replace("\n", "<br/>"), styles["Normal"]),
157
+ PageBreak(),
158
+ Paragraph(render_provenance_block(body).replace("\n", "<br/>"), styles["Code"]),
159
+ ])
160
+
161
+ buf.seek(0)
162
+ return buf
 
 
 
163
 
164
  # ======================================================
165
+ # FOIA ADAPTERS
166
  # ======================================================
167
 
168
  class FOIAAdapter:
169
+ agency = ""
170
+ url = ""
171
+ def search(self, q):
 
 
 
 
172
  return [{
173
  "agency": self.agency,
174
+ "title": f"{self.agency} FOIA Reading Room",
175
+ "url": self.url.format(q=quote_plus(q)),
 
 
 
 
176
  }]
177
 
178
  class CIA(FOIAAdapter):
179
  agency = "CIA"
180
+ url = "https://www.cia.gov/readingroom/search/site/{q}"
181
 
182
  class FBI(FOIAAdapter):
183
  agency = "FBI"
184
+ url = "https://vault.fbi.gov/search?SearchableText={q}"
185
+
186
+ ALL_ADAPTERS = {"CIA": CIA(), "FBI": FBI()}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
  # ======================================================
189
  # SEARCH
190
  # ======================================================
191
 
192
  def run_search(query, agencies):
193
+ global LAST_RESULTS
 
194
  LAST_RESULTS = []
195
  rows = []
196
+ for a in agencies:
197
+ for r in ALL_ADAPTERS[a].search(query):
198
+ r["hash"] = sha256_text(r["url"])
 
 
 
 
 
 
 
199
  LAST_RESULTS.append(r)
200
+ rows.append([r["agency"], r["title"], r["url"], r["hash"]])
201
+ return rows
 
 
 
 
 
 
 
202
 
203
  # ======================================================
204
+ # COURT BUNDLE
205
  # ======================================================
206
 
207
+ def generate_court_bundle(district):
208
+ ecf_no = generate_ecf_filing_number()
 
 
 
 
 
 
 
209
 
210
+ with tempfile.TemporaryDirectory() as td:
211
+ zpath = os.path.join(td, "court_bundle.zip")
 
 
 
 
 
 
 
 
212
 
213
+ with zipfile.ZipFile(zpath, "w") as z:
214
+ # Cover sheet
215
+ cover = generate_cover_sheet_pdf(district, ecf_no)
216
+ z.writestr("00_Cover_Sheet.pdf", cover.read())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
 
218
+ # Exhibits
219
+ for i, r in enumerate(LAST_RESULTS, 1):
220
+ pdf = generate_pdf(
221
+ "Judicial Appendix",
222
+ f"{r['agency']} FOIA Reading Room\n{r['url']}",
223
+ f"{i:03d}",
224
+ ecf_no,
225
+ )
226
+ z.writestr(f"Exhibit_{i:03d}.pdf", pdf.read())
227
+ z.writestr(
228
+ f"Exhibit_{i:03d}.sha256",
229
+ sha256_text(r["url"])
230
+ )
231
 
232
+ # Support docs
233
+ z.writestr("proposed_exhibit_list.txt", generate_proposed_exhibit_list())
234
+ z.writestr("clerk_verification_checklist.txt", clerk_verification_checklist())
235
 
236
+ return open(zpath, "rb")
 
 
 
237
 
238
  # ======================================================
239
  # UI
240
  # ======================================================
241
 
242
+ with gr.Blocks(title="Federal FOIA Intelligence Search") as app:
 
 
 
 
 
 
 
 
 
243
  gr.Markdown("## Federal FOIA Intelligence Search")
244
 
 
 
 
 
 
 
 
 
 
 
245
  with gr.Tab("Search"):
246
+ agencies = gr.CheckboxGroup(
247
+ list(ALL_ADAPTERS.keys()),
248
+ value=list(ALL_ADAPTERS.keys())
249
+ )
250
  query = gr.Textbox()
251
+ table = gr.Dataframe(
252
+ headers=["Agency", "Title", "URL", "SHA-256"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
  )
254
+ gr.Button("Search").click(run_search, [query, agencies], table)
255
 
256
+ with gr.Tab("Court Bundle"):
257
+ district = gr.Dropdown(
258
+ list(DISTRICT_SCHEMAS.keys()),
259
+ value="Generic"
260
+ )
261
+ gr.File(label="Download Court Bundle").upload(
262
+ lambda d=district: generate_court_bundle(d)
263
+ )
264
 
265
+ with gr.Tab("Clerk Checklist"):
266
+ gr.Textbox(value=clerk_verification_checklist(), lines=16)
 
267
 
268
+ app.launch()