Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -17,16 +17,14 @@ from fastapi.responses import JSONResponse, FileResponse
|
|
| 17 |
# HARD GOVERNANCE FLAGS (NON-NEGOTIABLE)
|
| 18 |
# ======================================================
|
| 19 |
|
| 20 |
-
ENABLE_FAISS_PHASE_4 = False
|
| 21 |
-
ENABLE_AI = True
|
| 22 |
-
ENABLE_PDF_EXTRACTION = True # USER OPT-IN ONLY
|
| 23 |
|
| 24 |
# ======================================================
|
| 25 |
-
# SESSION STATE
|
| 26 |
# ======================================================
|
| 27 |
|
| 28 |
LAST_RESULTS = []
|
| 29 |
-
SELECTED_INDEX = None
|
| 30 |
|
| 31 |
# ======================================================
|
| 32 |
# CRYPTOGRAPHIC CORE
|
|
@@ -42,27 +40,24 @@ def citation_hash(r):
|
|
| 42 |
|
| 43 |
def provenance_headers(payload: str):
|
| 44 |
return {
|
| 45 |
-
"Tool-Version": "1.7.
|
| 46 |
"Generated-UTC": datetime.utcnow().isoformat(),
|
| 47 |
"Content-SHA256": sha256_text(payload),
|
| 48 |
"Public-Source-Only": "true",
|
| 49 |
-
"AI-Assisted": "
|
| 50 |
}
|
| 51 |
|
| 52 |
-
def render_provenance_block(text: str):
|
| 53 |
-
return "\n".join(f"{k}: {v}" for k, v in provenance_headers(text).items())
|
| 54 |
-
|
| 55 |
# ======================================================
|
| 56 |
# FOIA ADAPTERS (LINK-OUT ONLY)
|
| 57 |
# ======================================================
|
| 58 |
|
| 59 |
class FOIAAdapter:
|
| 60 |
-
agency = "
|
| 61 |
search_url = ""
|
| 62 |
|
| 63 |
-
def search(self,
|
| 64 |
start = time.time()
|
| 65 |
-
url = self.search_url.format(q=quote_plus(
|
| 66 |
latency = round((time.time() - start) * 1000, 1)
|
| 67 |
return [{
|
| 68 |
"agency": self.agency,
|
|
@@ -85,37 +80,21 @@ class DOJ(FOIAAdapter):
|
|
| 85 |
agency = "DOJ"
|
| 86 |
search_url = "https://www.justice.gov/foia/library?search={q}"
|
| 87 |
|
| 88 |
-
class DHS(FOIAAdapter):
|
| 89 |
-
agency = "DHS"
|
| 90 |
-
search_url = "https://www.dhs.gov/foia-library/search?search={q}"
|
| 91 |
-
|
| 92 |
-
class STATE(FOIAAdapter):
|
| 93 |
-
agency = "State Department"
|
| 94 |
-
search_url = "https://foia.state.gov/Search/Search.aspx?q={q}"
|
| 95 |
-
|
| 96 |
-
class NSA(FOIAAdapter):
|
| 97 |
-
agency = "NSA"
|
| 98 |
-
search_url = "https://www.nsa.gov/resources/everyone/foia/reading-room/?q={q}"
|
| 99 |
-
|
| 100 |
ALL_ADAPTERS = {
|
| 101 |
"CIA": CIA(),
|
| 102 |
"FBI": FBI(),
|
| 103 |
"DOJ": DOJ(),
|
| 104 |
-
"DHS": DHS(),
|
| 105 |
-
"State": STATE(),
|
| 106 |
-
"NSA": NSA(),
|
| 107 |
}
|
| 108 |
|
| 109 |
# ======================================================
|
| 110 |
-
# PDF RESOLUTION (SAFE)
|
| 111 |
# ======================================================
|
| 112 |
|
| 113 |
def resolve_pdf_url(url):
|
| 114 |
try:
|
| 115 |
-
r = requests.
|
| 116 |
ct = r.headers.get("content-type", "").lower()
|
| 117 |
-
|
| 118 |
-
return is_pdf, r.url
|
| 119 |
except Exception:
|
| 120 |
return False, url
|
| 121 |
|
|
@@ -124,17 +103,16 @@ def resolve_pdf_url(url):
|
|
| 124 |
# ======================================================
|
| 125 |
|
| 126 |
def run_search(query, agencies):
|
| 127 |
-
global LAST_RESULTS
|
| 128 |
-
SELECTED_INDEX = None
|
| 129 |
LAST_RESULTS = []
|
| 130 |
rows = []
|
| 131 |
|
| 132 |
for name in agencies:
|
| 133 |
-
|
| 134 |
-
for r in adapter.search(query):
|
| 135 |
r["resolved_pdf"], r["resolved_url"] = resolve_pdf_url(r["url"])
|
| 136 |
r["hash"] = citation_hash(r)
|
| 137 |
LAST_RESULTS.append(r)
|
|
|
|
| 138 |
rows.append([
|
| 139 |
r["agency"],
|
| 140 |
r["title"],
|
|
@@ -143,7 +121,7 @@ def run_search(query, agencies):
|
|
| 143 |
f"{r['latency_ms']} ms",
|
| 144 |
])
|
| 145 |
|
| 146 |
-
return rows, render_cards()
|
| 147 |
|
| 148 |
# ======================================================
|
| 149 |
# ASK-AI GOVERNANCE GATE
|
|
@@ -152,9 +130,9 @@ def run_search(query, agencies):
|
|
| 152 |
def can_enable_ai(r):
|
| 153 |
return (
|
| 154 |
ENABLE_AI
|
| 155 |
-
and r
|
| 156 |
-
and r
|
| 157 |
-
and not r
|
| 158 |
)
|
| 159 |
|
| 160 |
def ask_ai_for_document(index: int):
|
|
@@ -168,33 +146,31 @@ def ask_ai_for_document(index: int):
|
|
| 168 |
}
|
| 169 |
|
| 170 |
# ======================================================
|
| 171 |
-
#
|
| 172 |
# ======================================================
|
| 173 |
|
| 174 |
def render_cards():
|
| 175 |
-
|
| 176 |
-
for
|
| 177 |
-
|
| 178 |
<div class="card">
|
| 179 |
<div class="card-header">
|
| 180 |
-
<
|
| 181 |
<button class="ask-ai"
|
| 182 |
-
onclick="askAI({
|
| 183 |
{"disabled" if not can_enable_ai(r) else ""}>
|
| 184 |
Ask AI
|
| 185 |
</button>
|
| 186 |
</div>
|
| 187 |
-
<div
|
| 188 |
<div class="actions">
|
| 189 |
-
<a href="{r['resolved_url']}" target="_blank">View
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
<div class="helper">
|
| 193 |
-
Why am I seeing this? This is a publicly released FOIA document.
|
| 194 |
</div>
|
| 195 |
</div>
|
| 196 |
""")
|
| 197 |
-
return "".join(
|
| 198 |
|
| 199 |
# ======================================================
|
| 200 |
# COURT BUNDLE
|
|
@@ -206,7 +182,7 @@ def generate_court_bundle():
|
|
| 206 |
for i, r in enumerate(LAST_RESULTS, 1):
|
| 207 |
z.writestr(
|
| 208 |
f"Exhibit_{i:03d}.txt",
|
| 209 |
-
f"{r['agency']}\n{r['resolved_url']}
|
| 210 |
)
|
| 211 |
z.writestr(f"Exhibit_{i:03d}.sha256", r["hash"])
|
| 212 |
return tf.name
|
|
@@ -228,44 +204,41 @@ def court_bundle():
|
|
| 228 |
path = generate_court_bundle()
|
| 229 |
return FileResponse(path, filename="court_bundle.zip")
|
| 230 |
|
| 231 |
-
if os.path.exists("governance-site"):
|
| 232 |
-
fastapi_app.mount(
|
| 233 |
-
"/gov",
|
| 234 |
-
StaticFiles(directory="governance-site", html=True),
|
| 235 |
-
name="governance",
|
| 236 |
-
)
|
| 237 |
-
|
| 238 |
# ======================================================
|
| 239 |
# UI
|
| 240 |
# ======================================================
|
| 241 |
|
| 242 |
CSS = """
|
| 243 |
-
.card {border:1px solid #2a2a2a;border-radius:
|
| 244 |
-
.card-header {display:flex;justify-content:space-between;
|
| 245 |
-
.ask-ai {background
|
| 246 |
-
.
|
| 247 |
-
.helper {font-size:0.75rem;color:#aaa;margin-top:6px;}
|
| 248 |
-
.actions {margin-top:12px;display:flex;gap:16px;}
|
| 249 |
"""
|
| 250 |
|
| 251 |
JS = """
|
| 252 |
function askAI(idx){
|
| 253 |
fetch('/ask_ai?index=' + idx)
|
| 254 |
.then(r=>r.json())
|
| 255 |
-
.then(d=>alert('AI
|
| 256 |
}
|
| 257 |
"""
|
| 258 |
|
| 259 |
with gr.Blocks() as gradio_ui:
|
| 260 |
gr.Markdown("## Federal FOIA Intelligence Search")
|
| 261 |
-
agencies = gr.CheckboxGroup(
|
| 262 |
-
|
|
|
|
|
|
|
|
|
|
| 263 |
table = gr.Dataframe(headers=["Agency","Title","URL","Hash","Latency"])
|
| 264 |
gallery = gr.HTML()
|
| 265 |
-
status = gr.Textbox(lines=4)
|
| 266 |
-
gr.Button("Search").click(run_search,[query,agencies],[table,gallery,status])
|
| 267 |
|
| 268 |
-
gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
gr.Button("Download Court Bundle").click(
|
| 270 |
lambda: "/court_bundle",
|
| 271 |
None,
|
|
@@ -282,6 +255,5 @@ if __name__ == "__main__":
|
|
| 282 |
gradio_ui.launch(
|
| 283 |
css=CSS,
|
| 284 |
js=JS,
|
| 285 |
-
title="Federal FOIA Intelligence Search",
|
| 286 |
show_error=True,
|
| 287 |
)
|
|
|
|
| 17 |
# HARD GOVERNANCE FLAGS (NON-NEGOTIABLE)
|
| 18 |
# ======================================================
|
| 19 |
|
| 20 |
+
ENABLE_FAISS_PHASE_4 = False
|
| 21 |
+
ENABLE_AI = True
|
|
|
|
| 22 |
|
| 23 |
# ======================================================
|
| 24 |
+
# SESSION STATE
|
| 25 |
# ======================================================
|
| 26 |
|
| 27 |
LAST_RESULTS = []
|
|
|
|
| 28 |
|
| 29 |
# ======================================================
|
| 30 |
# CRYPTOGRAPHIC CORE
|
|
|
|
| 40 |
|
| 41 |
def provenance_headers(payload: str):
|
| 42 |
return {
|
| 43 |
+
"Tool-Version": "1.7.2",
|
| 44 |
"Generated-UTC": datetime.utcnow().isoformat(),
|
| 45 |
"Content-SHA256": sha256_text(payload),
|
| 46 |
"Public-Source-Only": "true",
|
| 47 |
+
"AI-Assisted": "user-initiated-only",
|
| 48 |
}
|
| 49 |
|
|
|
|
|
|
|
|
|
|
| 50 |
# ======================================================
|
| 51 |
# FOIA ADAPTERS (LINK-OUT ONLY)
|
| 52 |
# ======================================================
|
| 53 |
|
| 54 |
class FOIAAdapter:
|
| 55 |
+
agency = ""
|
| 56 |
search_url = ""
|
| 57 |
|
| 58 |
+
def search(self, q):
|
| 59 |
start = time.time()
|
| 60 |
+
url = self.search_url.format(q=quote_plus(q))
|
| 61 |
latency = round((time.time() - start) * 1000, 1)
|
| 62 |
return [{
|
| 63 |
"agency": self.agency,
|
|
|
|
| 80 |
agency = "DOJ"
|
| 81 |
search_url = "https://www.justice.gov/foia/library?search={q}"
|
| 82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
ALL_ADAPTERS = {
|
| 84 |
"CIA": CIA(),
|
| 85 |
"FBI": FBI(),
|
| 86 |
"DOJ": DOJ(),
|
|
|
|
|
|
|
|
|
|
| 87 |
}
|
| 88 |
|
| 89 |
# ======================================================
|
| 90 |
+
# PDF RESOLUTION (SAFE HEAD REQUEST)
|
| 91 |
# ======================================================
|
| 92 |
|
| 93 |
def resolve_pdf_url(url):
|
| 94 |
try:
|
| 95 |
+
r = requests.head(url, timeout=8, allow_redirects=True)
|
| 96 |
ct = r.headers.get("content-type", "").lower()
|
| 97 |
+
return ("pdf" in ct or url.lower().endswith(".pdf")), r.url
|
|
|
|
| 98 |
except Exception:
|
| 99 |
return False, url
|
| 100 |
|
|
|
|
| 103 |
# ======================================================
|
| 104 |
|
| 105 |
def run_search(query, agencies):
|
| 106 |
+
global LAST_RESULTS
|
|
|
|
| 107 |
LAST_RESULTS = []
|
| 108 |
rows = []
|
| 109 |
|
| 110 |
for name in agencies:
|
| 111 |
+
for r in ALL_ADAPTERS[name].search(query):
|
|
|
|
| 112 |
r["resolved_pdf"], r["resolved_url"] = resolve_pdf_url(r["url"])
|
| 113 |
r["hash"] = citation_hash(r)
|
| 114 |
LAST_RESULTS.append(r)
|
| 115 |
+
|
| 116 |
rows.append([
|
| 117 |
r["agency"],
|
| 118 |
r["title"],
|
|
|
|
| 121 |
f"{r['latency_ms']} ms",
|
| 122 |
])
|
| 123 |
|
| 124 |
+
return rows, render_cards()
|
| 125 |
|
| 126 |
# ======================================================
|
| 127 |
# ASK-AI GOVERNANCE GATE
|
|
|
|
| 130 |
def can_enable_ai(r):
|
| 131 |
return (
|
| 132 |
ENABLE_AI
|
| 133 |
+
and r["resolved_pdf"]
|
| 134 |
+
and r["resolved_url"].lower().endswith(".pdf")
|
| 135 |
+
and not r["sealed"]
|
| 136 |
)
|
| 137 |
|
| 138 |
def ask_ai_for_document(index: int):
|
|
|
|
| 146 |
}
|
| 147 |
|
| 148 |
# ======================================================
|
| 149 |
+
# RESULT CARDS
|
| 150 |
# ======================================================
|
| 151 |
|
| 152 |
def render_cards():
|
| 153 |
+
html = []
|
| 154 |
+
for i, r in enumerate(LAST_RESULTS):
|
| 155 |
+
html.append(f"""
|
| 156 |
<div class="card">
|
| 157 |
<div class="card-header">
|
| 158 |
+
<b>{r['agency']}</b>
|
| 159 |
<button class="ask-ai"
|
| 160 |
+
onclick="askAI({i})"
|
| 161 |
{"disabled" if not can_enable_ai(r) else ""}>
|
| 162 |
Ask AI
|
| 163 |
</button>
|
| 164 |
</div>
|
| 165 |
+
<div>{r['title']}</div>
|
| 166 |
<div class="actions">
|
| 167 |
+
<a href="{r['resolved_url']}" target="_blank">View</a>
|
| 168 |
+
<a href="{r['resolved_url']}" download>Download</a>
|
| 169 |
+
<a href="{r['resolved_url']}" target="_blank">Share</a>
|
|
|
|
|
|
|
| 170 |
</div>
|
| 171 |
</div>
|
| 172 |
""")
|
| 173 |
+
return "".join(html) or "No results."
|
| 174 |
|
| 175 |
# ======================================================
|
| 176 |
# COURT BUNDLE
|
|
|
|
| 182 |
for i, r in enumerate(LAST_RESULTS, 1):
|
| 183 |
z.writestr(
|
| 184 |
f"Exhibit_{i:03d}.txt",
|
| 185 |
+
f"{r['agency']}\n{r['resolved_url']}"
|
| 186 |
)
|
| 187 |
z.writestr(f"Exhibit_{i:03d}.sha256", r["hash"])
|
| 188 |
return tf.name
|
|
|
|
| 204 |
path = generate_court_bundle()
|
| 205 |
return FileResponse(path, filename="court_bundle.zip")
|
| 206 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
# ======================================================
|
| 208 |
# UI
|
| 209 |
# ======================================================
|
| 210 |
|
| 211 |
CSS = """
|
| 212 |
+
.card {border:1px solid #2a2a2a;border-radius:16px;padding:16px;margin-bottom:16px;}
|
| 213 |
+
.card-header {display:flex;justify-content:space-between;}
|
| 214 |
+
.ask-ai {background:#1e88e5;color:white;border:none;border-radius:999px;padding:6px 14px;}
|
| 215 |
+
.actions {margin-top:8px;display:flex;gap:16px;}
|
|
|
|
|
|
|
| 216 |
"""
|
| 217 |
|
| 218 |
JS = """
|
| 219 |
function askAI(idx){
|
| 220 |
fetch('/ask_ai?index=' + idx)
|
| 221 |
.then(r=>r.json())
|
| 222 |
+
.then(d=>alert('AI ready for: ' + d.title));
|
| 223 |
}
|
| 224 |
"""
|
| 225 |
|
| 226 |
with gr.Blocks() as gradio_ui:
|
| 227 |
gr.Markdown("## Federal FOIA Intelligence Search")
|
| 228 |
+
agencies = gr.CheckboxGroup(
|
| 229 |
+
choices=list(ALL_ADAPTERS.keys()),
|
| 230 |
+
value=list(ALL_ADAPTERS.keys())
|
| 231 |
+
)
|
| 232 |
+
query = gr.Textbox(placeholder="Search FOIA reading rooms")
|
| 233 |
table = gr.Dataframe(headers=["Agency","Title","URL","Hash","Latency"])
|
| 234 |
gallery = gr.HTML()
|
|
|
|
|
|
|
| 235 |
|
| 236 |
+
gr.Button("Search").click(
|
| 237 |
+
run_search,
|
| 238 |
+
inputs=[query, agencies],
|
| 239 |
+
outputs=[table, gallery]
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
gr.Button("Download Court Bundle").click(
|
| 243 |
lambda: "/court_bundle",
|
| 244 |
None,
|
|
|
|
| 255 |
gradio_ui.launch(
|
| 256 |
css=CSS,
|
| 257 |
js=JS,
|
|
|
|
| 258 |
show_error=True,
|
| 259 |
)
|