GodsDevProject commited on
Commit
7bbac3f
ยท
verified ยท
1 Parent(s): 8b51de5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -25
app.py CHANGED
@@ -1,20 +1,22 @@
1
  """
2
  Federal FOIA Intelligence Search
3
  Public Electronic Reading Rooms Only
4
- HF Spaces compatible
5
  """
6
 
7
  import asyncio
8
- import time
9
  import gradio as gr
10
- from typing import List, Dict
 
11
 
12
  from ingest.registry import get_enabled_adapters
13
- from ingest.health import get_health_snapshot
14
  from ingest.cluster import build_embeddings, cluster_embeddings
15
- from ingest.export import export_journalist_zip
16
- from ingest.utils import highlight_terms, classify_foia_exemptions, format_citation
17
  from ingest.coverage import build_coverage_heatmap
 
 
 
 
 
18
 
19
  TITLE = "๐Ÿ›๏ธ Federal FOIA Intelligence Search"
20
  SUBTITLE = "Public Electronic Reading Rooms Only"
@@ -27,35 +29,47 @@ async def _run_search(query: str, enable_live: bool, hide_stub: bool):
27
  tasks = [a.search(query) for a in adapters]
28
  responses = await asyncio.gather(*tasks, return_exceptions=True)
29
 
30
- results: List[Dict] = []
31
- per_agency_counts: Dict[str, int] = {}
32
 
33
  for adapter, resp in zip(adapters, responses):
34
  if isinstance(resp, Exception):
35
  continue
36
  if hide_stub and adapter.is_stub:
37
  continue
 
38
  per_agency_counts[adapter.source_name] = len(resp)
 
39
  for r in resp:
40
  r["source"] = adapter.source_name
41
  r["latency"] = adapter.last_latency
42
  r["exemptions"] = classify_foia_exemptions(r.get("snippet", ""))
43
  r["citation"] = format_citation(r)
 
44
  results.append(r)
45
 
46
- # Embeddings + clustering
47
  embeddings = build_embeddings(results)
48
  clusters = cluster_embeddings(results, embeddings)
49
 
50
  coverage = build_coverage_heatmap(per_agency_counts)
 
 
51
 
52
- return results, clusters, per_agency_counts, coverage
53
 
54
 
55
  def search_ui(query, enable_live, hide_stub):
56
  loop = asyncio.new_event_loop()
57
  asyncio.set_event_loop(loop)
58
- results, clusters, counts, coverage = loop.run_until_complete(
 
 
 
 
 
 
 
 
59
  _run_search(query, enable_live, hide_stub)
60
  )
61
 
@@ -70,7 +84,7 @@ def search_ui(query, enable_live, hide_stub):
70
  f"{r.get('latency', 0):.2f}s"
71
  ])
72
 
73
- return table, clusters, counts, coverage
74
 
75
 
76
  # ---------------- UI ----------------
@@ -80,8 +94,8 @@ with gr.Blocks(title="Federal FOIA Intelligence Search") as demo:
80
  gr.Markdown(f"### {SUBTITLE}")
81
 
82
  gr.Markdown(
83
- "**Terms of Use:** This application indexes only documents already released "
84
- "to the public via official FOIA Electronic Reading Rooms."
85
  )
86
 
87
  with gr.Row():
@@ -94,32 +108,50 @@ with gr.Blocks(title="Federal FOIA Intelligence Search") as demo:
94
 
95
  results_table = gr.Dataframe(
96
  headers=["Agency", "Title", "URL", "Snippet", "FOIA Exemptions", "Latency"],
97
- interactive=False,
98
  wrap=True
99
  )
100
 
101
- clusters_box = gr.JSON(label="Semantic Clusters (Interactive Graph Data)")
102
- counts_box = gr.JSON(label="Per-Agency Result Counts")
103
- coverage_box = gr.JSON(label="Coverage Heatmap Data")
104
 
105
- health_box = gr.JSON(
106
- value=get_health_snapshot(),
107
- label="Agency Health / Latency"
108
- )
 
 
 
 
 
 
 
 
109
 
110
- export_btn = gr.Button("Export โ†’ Journalist ZIP")
111
  export_status = gr.Textbox(label="Export Status")
112
 
113
  search_btn.click(
114
  fn=search_ui,
115
  inputs=[query, enable_live, hide_stub],
116
- outputs=[results_table, clusters_box, counts_box, coverage_box]
 
 
 
 
 
 
117
  )
118
 
119
- export_btn.click(
120
  fn=export_journalist_zip,
121
  inputs=[results_table],
122
  outputs=[export_status]
123
  )
124
 
 
 
 
 
 
 
125
  demo.queue().launch()
 
1
  """
2
  Federal FOIA Intelligence Search
3
  Public Electronic Reading Rooms Only
 
4
  """
5
 
6
  import asyncio
 
7
  import gradio as gr
8
+ import plotly.graph_objects as go
9
+ from datetime import datetime
10
 
11
  from ingest.registry import get_enabled_adapters
 
12
  from ingest.cluster import build_embeddings, cluster_embeddings
13
+ from ingest.health import get_health_snapshot
 
14
  from ingest.coverage import build_coverage_heatmap
15
+ from ingest.export import export_journalist_zip, export_pdf_report
16
+ from ingest.utils import highlight_terms, classify_foia_exemptions, format_citation
17
+ from ingest.timeline import build_timeline_figure
18
+ from ingest.graph import build_cluster_graph
19
+
20
 
21
  TITLE = "๐Ÿ›๏ธ Federal FOIA Intelligence Search"
22
  SUBTITLE = "Public Electronic Reading Rooms Only"
 
29
  tasks = [a.search(query) for a in adapters]
30
  responses = await asyncio.gather(*tasks, return_exceptions=True)
31
 
32
+ results = []
33
+ per_agency_counts = {}
34
 
35
  for adapter, resp in zip(adapters, responses):
36
  if isinstance(resp, Exception):
37
  continue
38
  if hide_stub and adapter.is_stub:
39
  continue
40
+
41
  per_agency_counts[adapter.source_name] = len(resp)
42
+
43
  for r in resp:
44
  r["source"] = adapter.source_name
45
  r["latency"] = adapter.last_latency
46
  r["exemptions"] = classify_foia_exemptions(r.get("snippet", ""))
47
  r["citation"] = format_citation(r)
48
+ r["date"] = r.get("date", datetime.utcnow().isoformat())
49
  results.append(r)
50
 
 
51
  embeddings = build_embeddings(results)
52
  clusters = cluster_embeddings(results, embeddings)
53
 
54
  coverage = build_coverage_heatmap(per_agency_counts)
55
+ cluster_graph = build_cluster_graph(results, clusters)
56
+ timeline_fig = build_timeline_figure(results)
57
 
58
+ return results, clusters, per_agency_counts, coverage, cluster_graph, timeline_fig
59
 
60
 
61
  def search_ui(query, enable_live, hide_stub):
62
  loop = asyncio.new_event_loop()
63
  asyncio.set_event_loop(loop)
64
+
65
+ (
66
+ results,
67
+ clusters,
68
+ counts,
69
+ coverage,
70
+ cluster_graph,
71
+ timeline_fig
72
+ ) = loop.run_until_complete(
73
  _run_search(query, enable_live, hide_stub)
74
  )
75
 
 
84
  f"{r.get('latency', 0):.2f}s"
85
  ])
86
 
87
+ return table, counts, coverage, cluster_graph, timeline_fig
88
 
89
 
90
  # ---------------- UI ----------------
 
94
  gr.Markdown(f"### {SUBTITLE}")
95
 
96
  gr.Markdown(
97
+ "**Terms of Use:** Indexes only documents already released "
98
+ "via official FOIA Electronic Reading Rooms."
99
  )
100
 
101
  with gr.Row():
 
108
 
109
  results_table = gr.Dataframe(
110
  headers=["Agency", "Title", "URL", "Snippet", "FOIA Exemptions", "Latency"],
 
111
  wrap=True
112
  )
113
 
114
+ with gr.Tabs():
115
+ with gr.Tab("๐Ÿ“Š Coverage Heatmap"):
116
+ coverage_box = gr.JSON()
117
 
118
+ with gr.Tab("๐Ÿง  Semantic Cluster Graph"):
119
+ cluster_plot = gr.Plot()
120
+
121
+ with gr.Tab("๐Ÿ—“๏ธ Timeline View"):
122
+ timeline_plot = gr.Plot()
123
+
124
+ with gr.Tab("๐Ÿฅ Agency Health"):
125
+ health_box = gr.JSON(value=get_health_snapshot())
126
+
127
+ with gr.Row():
128
+ export_zip_btn = gr.Button("๐Ÿงพ Journalist ZIP Export")
129
+ export_pdf_btn = gr.Button("๐Ÿ“„ PDF Transparency Report")
130
 
 
131
  export_status = gr.Textbox(label="Export Status")
132
 
133
  search_btn.click(
134
  fn=search_ui,
135
  inputs=[query, enable_live, hide_stub],
136
+ outputs=[
137
+ results_table,
138
+ coverage_box,
139
+ coverage_box,
140
+ cluster_plot,
141
+ timeline_plot
142
+ ]
143
  )
144
 
145
+ export_zip_btn.click(
146
  fn=export_journalist_zip,
147
  inputs=[results_table],
148
  outputs=[export_status]
149
  )
150
 
151
+ export_pdf_btn.click(
152
+ fn=export_pdf_report,
153
+ inputs=[results_table],
154
+ outputs=[export_status]
155
+ )
156
+
157
  demo.queue().launch()