GodsDevProject commited on
Commit
fa93750
·
verified ·
1 Parent(s): 19e3440

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -30
app.py CHANGED
@@ -1,55 +1,62 @@
1
  """
2
  Federal FOIA Intelligence Search
3
  Public Electronic Reading Rooms Only
4
- HF Spaces compatible (no asyncio.run)
5
  """
6
 
7
- import time
8
  import asyncio
 
9
  import gradio as gr
10
  from typing import List, Dict
 
11
  from ingest.registry import get_enabled_adapters
12
  from ingest.health import get_health_snapshot
13
- from ingest.cluster import semantic_cluster
14
- from ingest.export import export_zip
15
- from ingest.utils import highlight_terms
 
16
 
17
  TITLE = "🏛️ Federal FOIA Intelligence Search"
18
  SUBTITLE = "Public Electronic Reading Rooms Only"
19
 
20
- # ---------- Async-safe search wrapper ----------
21
-
22
- async def run_search(query: str, enable_live: bool, hide_stub: bool):
23
- adapters = get_enabled_adapters(enable_live=enable_live)
24
- results = []
25
- per_agency_counts = {}
26
 
27
- tasks = []
28
- for adapter in adapters:
29
- tasks.append(adapter.search(query))
30
 
 
 
 
31
  responses = await asyncio.gather(*tasks, return_exceptions=True)
32
 
33
- for adapter, response in zip(adapters, responses):
34
- if isinstance(response, Exception):
 
 
 
35
  continue
36
  if hide_stub and adapter.is_stub:
37
  continue
38
- per_agency_counts[adapter.source_name] = len(response)
39
- for r in response:
40
  r["source"] = adapter.source_name
41
  r["latency"] = adapter.last_latency
 
 
42
  results.append(r)
43
 
44
- clustered = semantic_cluster(results)
45
- return results, clustered, per_agency_counts
 
 
 
 
 
46
 
47
 
48
  def search_ui(query, enable_live, hide_stub):
49
  loop = asyncio.new_event_loop()
50
  asyncio.set_event_loop(loop)
51
- results, clusters, counts = loop.run_until_complete(
52
- run_search(query, enable_live, hide_stub)
53
  )
54
 
55
  table = []
@@ -59,20 +66,21 @@ def search_ui(query, enable_live, hide_stub):
59
  highlight_terms(r["title"], query),
60
  r["url"],
61
  r.get("snippet", ""),
 
62
  f"{r.get('latency', 0):.2f}s"
63
  ])
64
 
65
- return table, clusters, counts
66
 
67
 
68
- # ---------- UI ----------
69
 
70
  with gr.Blocks(title="Federal FOIA Intelligence Search") as demo:
71
  gr.Markdown(f"# {TITLE}")
72
  gr.Markdown(f"### {SUBTITLE}")
73
 
74
  gr.Markdown(
75
- "**Terms of Use:** This tool indexes only documents already released "
76
  "to the public via official FOIA Electronic Reading Rooms."
77
  )
78
 
@@ -85,30 +93,31 @@ with gr.Blocks(title="Federal FOIA Intelligence Search") as demo:
85
  hide_stub = gr.Checkbox(label="Hide Stub Sources", value=True)
86
 
87
  results_table = gr.Dataframe(
88
- headers=["Agency", "Title", "URL", "Snippet", "Latency"],
89
  interactive=False,
90
  wrap=True
91
  )
92
 
93
- clusters_box = gr.JSON(label="Semantic Clusters (Explainable)")
94
  counts_box = gr.JSON(label="Per-Agency Result Counts")
 
95
 
96
  health_box = gr.JSON(
97
  value=get_health_snapshot(),
98
  label="Agency Health / Latency"
99
  )
100
 
101
- export_btn = gr.Button("Export Selected → Journalist ZIP")
102
  export_status = gr.Textbox(label="Export Status")
103
 
104
  search_btn.click(
105
  fn=search_ui,
106
  inputs=[query, enable_live, hide_stub],
107
- outputs=[results_table, clusters_box, counts_box]
108
  )
109
 
110
  export_btn.click(
111
- fn=export_zip,
112
  inputs=[results_table],
113
  outputs=[export_status]
114
  )
 
1
  """
2
  Federal FOIA Intelligence Search
3
  Public Electronic Reading Rooms Only
4
+ HF Spaces compatible
5
  """
6
 
 
7
  import asyncio
8
+ import time
9
  import gradio as gr
10
  from typing import List, Dict
11
+
12
  from ingest.registry import get_enabled_adapters
13
  from ingest.health import get_health_snapshot
14
+ from ingest.cluster import build_embeddings, cluster_embeddings
15
+ from ingest.export import export_journalist_zip
16
+ from ingest.utils import highlight_terms, classify_foia_exemptions, format_citation
17
+ from ingest.coverage import build_coverage_heatmap
18
 
19
  TITLE = "🏛️ Federal FOIA Intelligence Search"
20
  SUBTITLE = "Public Electronic Reading Rooms Only"
21
 
 
 
 
 
 
 
22
 
23
+ # ---------------- Async-safe search ----------------
 
 
24
 
25
+ async def _run_search(query: str, enable_live: bool, hide_stub: bool):
26
+ adapters = get_enabled_adapters(enable_live=enable_live)
27
+ tasks = [a.search(query) for a in adapters]
28
  responses = await asyncio.gather(*tasks, return_exceptions=True)
29
 
30
+ results: List[Dict] = []
31
+ per_agency_counts: Dict[str, int] = {}
32
+
33
+ for adapter, resp in zip(adapters, responses):
34
+ if isinstance(resp, Exception):
35
  continue
36
  if hide_stub and adapter.is_stub:
37
  continue
38
+ per_agency_counts[adapter.source_name] = len(resp)
39
+ for r in resp:
40
  r["source"] = adapter.source_name
41
  r["latency"] = adapter.last_latency
42
+ r["exemptions"] = classify_foia_exemptions(r.get("snippet", ""))
43
+ r["citation"] = format_citation(r)
44
  results.append(r)
45
 
46
+ # Embeddings + clustering
47
+ embeddings = build_embeddings(results)
48
+ clusters = cluster_embeddings(results, embeddings)
49
+
50
+ coverage = build_coverage_heatmap(per_agency_counts)
51
+
52
+ return results, clusters, per_agency_counts, coverage
53
 
54
 
55
  def search_ui(query, enable_live, hide_stub):
56
  loop = asyncio.new_event_loop()
57
  asyncio.set_event_loop(loop)
58
+ results, clusters, counts, coverage = loop.run_until_complete(
59
+ _run_search(query, enable_live, hide_stub)
60
  )
61
 
62
  table = []
 
66
  highlight_terms(r["title"], query),
67
  r["url"],
68
  r.get("snippet", ""),
69
+ ",".join(r["exemptions"]),
70
  f"{r.get('latency', 0):.2f}s"
71
  ])
72
 
73
+ return table, clusters, counts, coverage
74
 
75
 
76
+ # ---------------- UI ----------------
77
 
78
  with gr.Blocks(title="Federal FOIA Intelligence Search") as demo:
79
  gr.Markdown(f"# {TITLE}")
80
  gr.Markdown(f"### {SUBTITLE}")
81
 
82
  gr.Markdown(
83
+ "**Terms of Use:** This application indexes only documents already released "
84
  "to the public via official FOIA Electronic Reading Rooms."
85
  )
86
 
 
93
  hide_stub = gr.Checkbox(label="Hide Stub Sources", value=True)
94
 
95
  results_table = gr.Dataframe(
96
+ headers=["Agency", "Title", "URL", "Snippet", "FOIA Exemptions", "Latency"],
97
  interactive=False,
98
  wrap=True
99
  )
100
 
101
+ clusters_box = gr.JSON(label="Semantic Clusters (Interactive Graph Data)")
102
  counts_box = gr.JSON(label="Per-Agency Result Counts")
103
+ coverage_box = gr.JSON(label="Coverage Heatmap Data")
104
 
105
  health_box = gr.JSON(
106
  value=get_health_snapshot(),
107
  label="Agency Health / Latency"
108
  )
109
 
110
+ export_btn = gr.Button("Export → Journalist ZIP")
111
  export_status = gr.Textbox(label="Export Status")
112
 
113
  search_btn.click(
114
  fn=search_ui,
115
  inputs=[query, enable_live, hide_stub],
116
+ outputs=[results_table, clusters_box, counts_box, coverage_box]
117
  )
118
 
119
  export_btn.click(
120
+ fn=export_journalist_zip,
121
  inputs=[results_table],
122
  outputs=[export_status]
123
  )