FOIA_Doc_Search / app.py
GodsDevProject's picture
Update app.py
c7b002a verified
raw
history blame
5.24 kB
"""
Federal FOIA Intelligence Search
Public Electronic Reading Rooms Only
"""
import asyncio
import gradio as gr
import plotly.graph_objects as go
from datetime import datetime
from ingest.registry import get_enabled_adapters
from ingest.cluster import build_embeddings, cluster_embeddings
from ingest.coverage import build_coverage_heatmap, build_agency_map
from ingest.health import get_health_snapshot
from ingest.export import (
export_journalist_zip,
export_pdf_report,
export_congressional_briefing,
)
from ingest.utils import (
highlight_terms,
classify_foia_exemptions,
format_bluebook_citation,
)
from ingest.timeline import build_timeline_figure
from ingest.graph import build_cluster_graph
from ingest.trends import build_exemption_trend_chart
TITLE = "πŸ›οΈ Federal FOIA Intelligence Search"
SUBTITLE = "Public Electronic Reading Rooms Only"
# ---------------- Async-safe search ----------------
async def _run_search(query, enable_live, hide_stub):
adapters = get_enabled_adapters(enable_live=enable_live)
tasks = [a.search(query) for a in adapters]
responses = await asyncio.gather(*tasks, return_exceptions=True)
results = []
per_agency_counts = {}
for adapter, resp in zip(adapters, responses):
if isinstance(resp, Exception):
continue
if hide_stub and adapter.is_stub:
continue
per_agency_counts[adapter.source_name] = len(resp)
for r in resp:
r["source"] = adapter.source_name
r["latency"] = adapter.last_latency
r["exemptions"] = classify_foia_exemptions(r.get("snippet", ""))
r["citation"] = format_bluebook_citation(r)
r["date"] = r.get("date", datetime.utcnow().isoformat())
results.append(r)
embeddings = build_embeddings(results)
clusters = cluster_embeddings(results, embeddings)
return (
results,
clusters,
build_coverage_heatmap(per_agency_counts),
build_agency_map(per_agency_counts),
build_timeline_figure(results),
build_exemption_trend_chart(results),
build_cluster_graph(results, clusters),
)
def search_ui(query, enable_live, hide_stub):
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
(
results,
clusters,
heatmap,
agency_map,
timeline,
exemption_trends,
cluster_graph,
) = loop.run_until_complete(
_run_search(query, enable_live, hide_stub)
)
table = []
for r in results:
table.append([
r["source"],
highlight_terms(r["title"], query),
r["url"],
r.get("snippet", ""),
",".join(r["exemptions"]),
r["citation"],
])
return table, heatmap, agency_map, timeline, exemption_trends, cluster_graph
# ---------------- UI ----------------
with gr.Blocks(title="Federal FOIA Intelligence Search") as demo:
gr.Markdown(f"# {TITLE}")
gr.Markdown(f"### {SUBTITLE}")
gr.Markdown(
"**Terms of Use:** Indexes only documents already released via official FOIA Electronic Reading Rooms."
)
with gr.Row():
query = gr.Textbox(label="Search FOIA documents")
search_btn = gr.Button("Search")
with gr.Row():
enable_live = gr.Checkbox(label="Enable Live Public Adapters", value=False)
hide_stub = gr.Checkbox(label="Hide Stub Sources", value=True)
results_table = gr.Dataframe(
headers=["Agency", "Title", "URL", "Snippet", "FOIA Exemptions", "Citation"],
wrap=True,
)
preview_box = gr.Markdown(label="πŸ“„ Document Preview")
with gr.Tabs():
with gr.Tab("🧠 Cluster Graph"):
cluster_plot = gr.Plot()
with gr.Tab("πŸ—ΊοΈ Agency Coverage Map"):
agency_map_plot = gr.Plot()
with gr.Tab("πŸ“Š Coverage Heatmap"):
heatmap_box = gr.JSON()
with gr.Tab("πŸ—“οΈ Timeline"):
timeline_plot = gr.Plot()
with gr.Tab("βš–οΈ FOIA Exemption Trends"):
exemption_plot = gr.Plot()
with gr.Tab("πŸ₯ Health"):
health_box = gr.JSON(value=get_health_snapshot())
with gr.Row():
export_zip_btn = gr.Button("🧾 Journalist ZIP")
export_pdf_btn = gr.Button("πŸ“„ Transparency PDF")
export_congress_btn = gr.Button("πŸ›οΈ Congressional Briefing PDF")
export_status = gr.Textbox(label="Export Status")
search_btn.click(
fn=search_ui,
inputs=[query, enable_live, hide_stub],
outputs=[
results_table,
heatmap_box,
agency_map_plot,
timeline_plot,
exemption_plot,
cluster_plot,
],
)
results_table.select(
lambda df, evt: f"### {df[evt.index][1]}\n\n{df[evt.index][3]}\n\nπŸ”— {df[evt.index][2]}",
outputs=preview_box,
)
export_zip_btn.click(export_journalist_zip, results_table, export_status)
export_pdf_btn.click(export_pdf_report, results_table, export_status)
export_congress_btn.click(export_congressional_briefing, results_table, export_status)
demo.queue().launch()