FOIA_Doc_Search / app.py
GodsDevProject's picture
Create app.py
bd6ac57 verified
raw
history blame
5 kB
import gradio as gr
from typing import List, Dict
from ingest.registry import get_all_adapters
from ingest.export import export_results
from ingest.health import get_adapter_health
from ingest.coverage import coverage_summary
from ingest.discovery import agency_discovery
from ingest.semantic import semantic_refine, semantic_available
from ingest.timeline import release_timeline
from ingest.latency import latency_badges
from analytics.events import log_event
ALL_ADAPTERS = get_all_adapters()
def run_search(
query: str,
include_stubs: bool,
enable_extended: bool,
acknowledge_extended: bool,
enable_semantic: bool,
) -> List[Dict]:
log_event("search", {"len": len(query or "")})
if not query:
return []
results = []
for adapter in ALL_ADAPTERS:
if not include_stubs and not adapter.is_live:
continue
if adapter.is_extended:
if not enable_extended or not acknowledge_extended:
continue
try:
docs = adapter.search(query)
for d in docs:
d.setdefault("agency", adapter.name)
d.setdefault("status", "🟒 Live" if adapter.is_live else "πŸ”’ Stub")
d.setdefault("exportable", adapter.is_live)
results.append(d)
except Exception as e:
results.append({
"agency": adapter.name,
"title": "Adapter Error",
"snippet": str(e),
"url": "",
"status": "⚠️ Error",
"exportable": False,
})
if enable_semantic and semantic_available():
results = semantic_refine(query, results)
return results
def table_from_results(results):
return [
[
r.get("agency"),
r.get("status"),
r.get("title"),
r.get("snippet"),
r.get("url"),
]
for r in results
]
def export_handler(results):
exportable = [r for r in results if r.get("exportable")]
if not exportable:
return gr.File.update(visible=False)
return gr.File.update(value=export_results(exportable), visible=True)
with gr.Blocks() as app:
gr.Markdown(
"# πŸ›οΈ Federal FOIA Intelligence Search\n"
"Public Electronic Reading Rooms only"
)
gr.Markdown(
"ℹ️ Stub results are informational and cannot be exported.\n\n"
"Semantic refinement is optional and runs only on returned results."
)
query = gr.Textbox(label="Search query")
with gr.Row():
include_stubs = gr.Checkbox(label="Include Stub Results", value=True)
enable_extended = gr.Checkbox(label="Enable Extended Coverage", value=False)
enable_semantic = gr.Checkbox(
label="Enable Semantic Refinement (Experimental)",
value=False,
interactive=semantic_available()
)
acknowledge_extended = gr.Checkbox(
label="I understand some agencies block automated access",
value=False
)
search_btn = gr.Button("Search")
results_state = gr.State([])
results_table = gr.Dataframe(
headers=["Agency", "Status", "Title", "Snippet", "URL"],
wrap=True,
interactive=False
)
export_btn = gr.Button("Export Results (ZIP)", interactive=False)
export_file = gr.File(visible=False)
gr.Markdown("## πŸ“Š Coverage Heatmap")
coverage_table = gr.Dataframe(
headers=["Agency", "Result Count"],
interactive=False
)
gr.Markdown("## πŸ•’ Release Timeline")
timeline_table = gr.Dataframe(
headers=["Period", "Documents"],
interactive=False
)
gr.Markdown("## ⚑ Agency Latency Badges")
latency_table = gr.Dataframe(
headers=["Agency", "Latency (s)", "Badge"],
interactive=False
)
gr.Markdown("## 🌐 Agency Discovery")
discovery_table = gr.Dataframe(
headers=["Agency", "Status", "Reason"],
interactive=False
)
search_btn.click(
fn=run_search,
inputs=[
query,
include_stubs,
enable_extended,
acknowledge_extended,
enable_semantic,
],
outputs=results_state
).then(
fn=lambda r: (
table_from_results(r),
coverage_summary(r),
release_timeline(r),
gr.Button.update(interactive=any(x.get("exportable") for x in r))
),
inputs=results_state,
outputs=[
results_table,
coverage_table,
timeline_table,
export_btn
]
)
export_btn.click(
fn=export_handler,
inputs=results_state,
outputs=export_file
)
latency_table.value = latency_badges(ALL_ADAPTERS)
discovery_table.value = agency_discovery()
gr.Markdown("## πŸ” Adapter Health")
gr.JSON(get_adapter_health())
app.launch()