Spaces:
Sleeping
Sleeping
| """ | |
| Federal FOIA Intelligence Search | |
| Public Electronic Reading Rooms Only | |
| """ | |
| import asyncio | |
| import gradio as gr | |
| import plotly.graph_objects as go | |
| from datetime import datetime | |
| from ingest.registry import get_enabled_adapters | |
| from ingest.cluster import build_embeddings, cluster_embeddings | |
| from ingest.coverage import build_coverage_heatmap, build_agency_map | |
| from ingest.health import get_health_snapshot | |
| from ingest.export import ( | |
| export_journalist_zip, | |
| export_pdf_report, | |
| export_congressional_briefing, | |
| ) | |
| from ingest.utils import ( | |
| highlight_terms, | |
| classify_foia_exemptions, | |
| format_bluebook_citation, | |
| ) | |
| from ingest.timeline import build_timeline_figure | |
| from ingest.graph import build_cluster_graph | |
| from ingest.trends import build_exemption_trend_chart | |
| TITLE = "ποΈ Federal FOIA Intelligence Search" | |
| SUBTITLE = "Public Electronic Reading Rooms Only" | |
| # ---------------- Async-safe search ---------------- | |
| async def _run_search(query, enable_live, hide_stub): | |
| adapters = get_enabled_adapters(enable_live=enable_live) | |
| tasks = [a.search(query) for a in adapters] | |
| responses = await asyncio.gather(*tasks, return_exceptions=True) | |
| results = [] | |
| per_agency_counts = {} | |
| for adapter, resp in zip(adapters, responses): | |
| if isinstance(resp, Exception): | |
| continue | |
| if hide_stub and adapter.is_stub: | |
| continue | |
| per_agency_counts[adapter.source_name] = len(resp) | |
| for r in resp: | |
| r["source"] = adapter.source_name | |
| r["latency"] = adapter.last_latency | |
| r["exemptions"] = classify_foia_exemptions(r.get("snippet", "")) | |
| r["citation"] = format_bluebook_citation(r) | |
| r["date"] = r.get("date", datetime.utcnow().isoformat()) | |
| results.append(r) | |
| embeddings = build_embeddings(results) | |
| clusters = cluster_embeddings(results, embeddings) | |
| return ( | |
| results, | |
| clusters, | |
| build_coverage_heatmap(per_agency_counts), | |
| build_agency_map(per_agency_counts), | |
| build_timeline_figure(results), | |
| build_exemption_trend_chart(results), | |
| build_cluster_graph(results, clusters), | |
| ) | |
| def search_ui(query, enable_live, hide_stub): | |
| loop = asyncio.new_event_loop() | |
| asyncio.set_event_loop(loop) | |
| ( | |
| results, | |
| clusters, | |
| heatmap, | |
| agency_map, | |
| timeline, | |
| exemption_trends, | |
| cluster_graph, | |
| ) = loop.run_until_complete( | |
| _run_search(query, enable_live, hide_stub) | |
| ) | |
| table = [] | |
| for r in results: | |
| table.append([ | |
| r["source"], | |
| highlight_terms(r["title"], query), | |
| r["url"], | |
| r.get("snippet", ""), | |
| ",".join(r["exemptions"]), | |
| r["citation"], | |
| ]) | |
| return table, heatmap, agency_map, timeline, exemption_trends, cluster_graph | |
| # ---------------- UI ---------------- | |
| with gr.Blocks(title="Federal FOIA Intelligence Search") as demo: | |
| gr.Markdown(f"# {TITLE}") | |
| gr.Markdown(f"### {SUBTITLE}") | |
| gr.Markdown( | |
| "**Terms of Use:** Indexes only documents already released via official FOIA Electronic Reading Rooms." | |
| ) | |
| with gr.Row(): | |
| query = gr.Textbox(label="Search FOIA documents") | |
| search_btn = gr.Button("Search") | |
| with gr.Row(): | |
| enable_live = gr.Checkbox(label="Enable Live Public Adapters", value=False) | |
| hide_stub = gr.Checkbox(label="Hide Stub Sources", value=True) | |
| results_table = gr.Dataframe( | |
| headers=["Agency", "Title", "URL", "Snippet", "FOIA Exemptions", "Citation"], | |
| wrap=True, | |
| ) | |
| preview_box = gr.Markdown(label="π Document Preview") | |
| with gr.Tabs(): | |
| with gr.Tab("π§ Cluster Graph"): | |
| cluster_plot = gr.Plot() | |
| with gr.Tab("πΊοΈ Agency Coverage Map"): | |
| agency_map_plot = gr.Plot() | |
| with gr.Tab("π Coverage Heatmap"): | |
| heatmap_box = gr.JSON() | |
| with gr.Tab("ποΈ Timeline"): | |
| timeline_plot = gr.Plot() | |
| with gr.Tab("βοΈ FOIA Exemption Trends"): | |
| exemption_plot = gr.Plot() | |
| with gr.Tab("π₯ Health"): | |
| health_box = gr.JSON(value=get_health_snapshot()) | |
| with gr.Row(): | |
| export_zip_btn = gr.Button("π§Ύ Journalist ZIP") | |
| export_pdf_btn = gr.Button("π Transparency PDF") | |
| export_congress_btn = gr.Button("ποΈ Congressional Briefing PDF") | |
| export_status = gr.Textbox(label="Export Status") | |
| search_btn.click( | |
| fn=search_ui, | |
| inputs=[query, enable_live, hide_stub], | |
| outputs=[ | |
| results_table, | |
| heatmap_box, | |
| agency_map_plot, | |
| timeline_plot, | |
| exemption_plot, | |
| cluster_plot, | |
| ], | |
| ) | |
| results_table.select( | |
| lambda df, evt: f"### {df[evt.index][1]}\n\n{df[evt.index][3]}\n\nπ {df[evt.index][2]}", | |
| outputs=preview_box, | |
| ) | |
| export_zip_btn.click(export_journalist_zip, results_table, export_status) | |
| export_pdf_btn.click(export_pdf_report, results_table, export_status) | |
| export_congress_btn.click(export_congressional_briefing, results_table, export_status) | |
| demo.queue().launch() |