GodsDevProject commited on
Commit
19e3440
·
verified ·
1 Parent(s): 8f72632

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -104
app.py CHANGED
@@ -1,131 +1,116 @@
1
- # Federal FOIA Intelligence Search
2
- # Public Electronic Reading Rooms Only
 
 
 
3
 
4
- import gradio as gr
5
  import time
6
- from collections import defaultdict
 
7
  from typing import List, Dict
 
 
 
 
 
8
 
9
- from ingest.registry import get_all_adapters
10
- from analytics.semantic import semantic_cluster
11
- from analytics.health import adapter_health
12
- from analytics.foia_bcodes import classify_b_codes
13
- from exports.journalist_zip import build_zip
14
- from ui.heatmap import render_heatmap
15
-
16
- APP_TITLE = "Federal FOIA Intelligence Search"
17
  SUBTITLE = "Public Electronic Reading Rooms Only"
18
 
19
- ALL_ADAPTERS = get_all_adapters()
20
 
 
 
 
 
21
 
22
- def run_search(query: str, include_stubs: bool, extended: bool):
23
- start = time.time()
 
24
 
25
- results = []
26
- per_agency_counts = defaultdict(int)
27
- health = {}
28
 
29
- for name, adapter in ALL_ADAPTERS.items():
30
- if adapter.is_stub and not include_stubs:
31
- continue
32
- if adapter.extended and not extended:
33
  continue
34
- if adapter.killed:
35
  continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- try:
38
- t0 = time.time()
39
- docs = adapter.search_sync(query)
40
- latency = time.time() - t0
41
- health[name] = adapter_health(latency)
42
- for d in docs:
43
- d["agency"] = name
44
- d["b_codes"] = classify_b_codes(d)
45
- results.append(d)
46
- per_agency_counts[name] += 1
47
- except Exception as e:
48
- health[name] = {"status": "error", "detail": str(e)}
49
-
50
- clusters = semantic_cluster(results)
51
- elapsed = round(time.time() - start, 2)
52
-
53
- return {
54
- "results": results,
55
- "clusters": clusters,
56
- "counts": dict(per_agency_counts),
57
- "health": health,
58
- "elapsed": elapsed,
59
- }
60
-
61
-
62
- def render_results(state):
63
- md = []
64
- for r in state["results"]:
65
- badge = f"⚖️ {','.join(r['b_codes'])}" if r["b_codes"] else ""
66
- md.append(
67
- f"### [{r['title']}]({r['url']})\n"
68
- f"*{r['agency']}* {badge}\n\n"
69
- f"{r['snippet']}\n"
70
- )
71
- return "\n---\n".join(md)
72
-
73
-
74
- with gr.Blocks(title=APP_TITLE) as demo:
75
- gr.Markdown(f"# 🏛️ {APP_TITLE}\n### *{SUBTITLE}*")
76
  gr.Markdown(
77
- "Federated discovery across **lawfully published U.S. Government FOIA Electronic Reading Rooms**.\n\n"
78
- "### Safeguards\n"
79
- "- Public documents only\n"
80
- "- Robots.txt enforced\n"
81
- "- No restricted systems\n"
82
- "- No classified access\n"
83
- "- Per-agency kill switches\n"
84
  )
85
 
86
- query = gr.Textbox(label="Search FOIA Reading Rooms", placeholder="UAP")
87
- include_stubs = gr.Checkbox(True, label="Include Stub Sources (Coverage Only)")
88
- extended = gr.Checkbox(False, label="Enable Extended Features (Live Intelligence-Related Rooms)")
 
 
 
 
89
 
90
- search_btn = gr.Button("Search")
 
 
 
 
91
 
92
- results_md = gr.Markdown()
93
- heatmap = gr.HTML()
94
- latency_md = gr.Markdown()
95
- cluster_md = gr.Markdown()
96
- export_btn = gr.Button("🧾 Export Selected to ZIP")
97
 
98
- state = gr.State()
 
 
 
99
 
100
- def on_search(q, s, e):
101
- state_val = run_search(q, s, e)
102
- return (
103
- state_val,
104
- render_results(state_val),
105
- render_heatmap(state_val["counts"]),
106
- f"⏱️ Completed in {state_val['elapsed']}s",
107
- f"🧠 Clusters: {len(state_val['clusters'])}",
108
- )
109
 
110
  search_btn.click(
111
- on_search,
112
- inputs=[query, include_stubs, extended],
113
- outputs=[state, results_md, heatmap, latency_md, cluster_md],
114
  )
115
 
116
  export_btn.click(
117
- lambda s: build_zip(s["results"]),
118
- inputs=[state],
119
- outputs=[],
120
- )
121
-
122
- gr.Markdown(
123
- "## ⚖️ Legal & Ethical Notice\n"
124
- "This system indexes **only documents already released to the public** under FOIA.\n\n"
125
- "**It does not:**\n"
126
- "- Circumvent access controls\n"
127
- "- Access classified systems\n"
128
- "- Declassify or infer restricted information\n"
129
  )
130
 
131
- demo.launch()
 
1
+ """
2
+ Federal FOIA Intelligence Search
3
+ Public Electronic Reading Rooms Only
4
+ HF Spaces compatible (no asyncio.run)
5
+ """
6
 
 
7
  import time
8
+ import asyncio
9
+ import gradio as gr
10
  from typing import List, Dict
11
+ from ingest.registry import get_enabled_adapters
12
+ from ingest.health import get_health_snapshot
13
+ from ingest.cluster import semantic_cluster
14
+ from ingest.export import export_zip
15
+ from ingest.utils import highlight_terms
16
 
17
+ TITLE = "🏛️ Federal FOIA Intelligence Search"
 
 
 
 
 
 
 
18
  SUBTITLE = "Public Electronic Reading Rooms Only"
19
 
20
+ # ---------- Async-safe search wrapper ----------
21
 
22
+ async def run_search(query: str, enable_live: bool, hide_stub: bool):
23
+ adapters = get_enabled_adapters(enable_live=enable_live)
24
+ results = []
25
+ per_agency_counts = {}
26
 
27
+ tasks = []
28
+ for adapter in adapters:
29
+ tasks.append(adapter.search(query))
30
 
31
+ responses = await asyncio.gather(*tasks, return_exceptions=True)
 
 
32
 
33
+ for adapter, response in zip(adapters, responses):
34
+ if isinstance(response, Exception):
 
 
35
  continue
36
+ if hide_stub and adapter.is_stub:
37
  continue
38
+ per_agency_counts[adapter.source_name] = len(response)
39
+ for r in response:
40
+ r["source"] = adapter.source_name
41
+ r["latency"] = adapter.last_latency
42
+ results.append(r)
43
+
44
+ clustered = semantic_cluster(results)
45
+ return results, clustered, per_agency_counts
46
+
47
+
48
+ def search_ui(query, enable_live, hide_stub):
49
+ loop = asyncio.new_event_loop()
50
+ asyncio.set_event_loop(loop)
51
+ results, clusters, counts = loop.run_until_complete(
52
+ run_search(query, enable_live, hide_stub)
53
+ )
54
+
55
+ table = []
56
+ for r in results:
57
+ table.append([
58
+ r["source"],
59
+ highlight_terms(r["title"], query),
60
+ r["url"],
61
+ r.get("snippet", ""),
62
+ f"{r.get('latency', 0):.2f}s"
63
+ ])
64
+
65
+ return table, clusters, counts
66
+
67
+
68
+ # ---------- UI ----------
69
+
70
+ with gr.Blocks(title="Federal FOIA Intelligence Search") as demo:
71
+ gr.Markdown(f"# {TITLE}")
72
+ gr.Markdown(f"### {SUBTITLE}")
73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  gr.Markdown(
75
+ "**Terms of Use:** This tool indexes only documents already released "
76
+ "to the public via official FOIA Electronic Reading Rooms."
 
 
 
 
 
77
  )
78
 
79
+ with gr.Row():
80
+ query = gr.Textbox(label="Search FOIA documents")
81
+ search_btn = gr.Button("Search")
82
+
83
+ with gr.Row():
84
+ enable_live = gr.Checkbox(label="Enable Live Public Adapters", value=False)
85
+ hide_stub = gr.Checkbox(label="Hide Stub Sources", value=True)
86
 
87
+ results_table = gr.Dataframe(
88
+ headers=["Agency", "Title", "URL", "Snippet", "Latency"],
89
+ interactive=False,
90
+ wrap=True
91
+ )
92
 
93
+ clusters_box = gr.JSON(label="Semantic Clusters (Explainable)")
94
+ counts_box = gr.JSON(label="Per-Agency Result Counts")
 
 
 
95
 
96
+ health_box = gr.JSON(
97
+ value=get_health_snapshot(),
98
+ label="Agency Health / Latency"
99
+ )
100
 
101
+ export_btn = gr.Button("Export Selected → Journalist ZIP")
102
+ export_status = gr.Textbox(label="Export Status")
 
 
 
 
 
 
 
103
 
104
  search_btn.click(
105
+ fn=search_ui,
106
+ inputs=[query, enable_live, hide_stub],
107
+ outputs=[results_table, clusters_box, counts_box]
108
  )
109
 
110
  export_btn.click(
111
+ fn=export_zip,
112
+ inputs=[results_table],
113
+ outputs=[export_status]
 
 
 
 
 
 
 
 
 
114
  )
115
 
116
+ demo.queue().launch()