GodsDevProject commited on
Commit
451133e
·
verified ·
1 Parent(s): f10ecc3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -186
app.py CHANGED
@@ -1,226 +1,131 @@
1
- # app.py
2
  # Federal FOIA Intelligence Search
3
  # Public Electronic Reading Rooms Only
4
 
5
  import gradio as gr
6
- import asyncio
7
  import time
8
- import csv
9
- import io
10
- import zipfile
11
  from collections import defaultdict
12
- from typing import Dict, List
13
 
14
  from ingest.registry import get_all_adapters
15
- from policy.kill_switch import KillSwitch
16
- from analytics import log_event
17
-
 
 
18
 
19
  APP_TITLE = "Federal FOIA Intelligence Search"
20
- APP_SUBTITLE = "Public Electronic Reading Rooms Only"
21
 
22
- kill_switch = KillSwitch()
23
  ALL_ADAPTERS = get_all_adapters()
24
 
25
 
26
- # ============================================================
27
- # Utility: FOIA Exemption Heuristic
28
- # ============================================================
29
-
30
- def classify_foia_exemption(text: str) -> str:
31
- text = text.lower()
32
- if "classified" in text or "national security" in text:
33
- return "b(1)"
34
- if "statute" in text:
35
- return "b(3)"
36
- if "privacy" in text or "personnel" in text:
37
- return "b(6)"
38
- if "law enforcement" in text or "investigation" in text:
39
- return "b(7)"
40
- return "—"
41
-
42
 
43
- # ============================================================
44
- # Adapter Filtering
45
- # ============================================================
46
 
47
- def get_active_adapters(include_stubs, enable_extended):
48
- active = {}
49
  for name, adapter in ALL_ADAPTERS.items():
50
- if kill_switch.is_disabled(name):
51
  continue
52
- if getattr(adapter, "is_stub", False) and not include_stubs:
53
  continue
54
- if getattr(adapter, "extended", False) and not enable_extended:
55
  continue
56
- active[name] = adapter
57
- return active
58
-
59
 
60
- # ============================================================
61
- # Async Search + Metrics
62
- # ============================================================
63
-
64
- async def timed_search(adapter, query):
65
- start = time.time()
66
- try:
67
- results = await adapter.search(query)
68
- latency = round(time.time() - start, 2)
69
- return results, latency, "🟢 Healthy"
70
- except Exception:
71
- return [], None, "🔴 Error"
72
-
73
-
74
- async def federated_search(query, adapters):
75
- tasks = {
76
- name: timed_search(adapter, query)
77
- for name, adapter in adapters.items()
 
 
 
 
78
  }
79
 
80
- responses = await asyncio.gather(*tasks.values())
81
-
82
- results = []
83
- metrics = {}
84
-
85
- for (name, _), (docs, latency, health) in zip(tasks.items(), responses):
86
- metrics[name] = {
87
- "count": len(docs),
88
- "latency": latency,
89
- "health": health
90
- }
91
- for d in docs:
92
- d["agency"] = name
93
- d["foia_exemption"] = classify_foia_exemption(
94
- d.get("snippet", "")
95
- )
96
- results.append(d)
97
-
98
- return results, metrics
99
-
100
-
101
- # ============================================================
102
- # Search Handler
103
- # ============================================================
104
-
105
- def search_handler(query, include_stubs, enable_extended):
106
- if not query.strip():
107
- return [], [], "⚠️ Enter a search term."
108
 
109
- log_event("search", {"query": query})
110
-
111
- adapters = get_active_adapters(include_stubs, enable_extended)
112
-
113
- try:
114
- results, metrics = asyncio.run(
115
- federated_search(query, adapters)
116
- )
117
- except RuntimeError:
118
- loop = asyncio.get_event_loop()
119
- results, metrics = loop.run_until_complete(
120
- federated_search(query, adapters)
121
- )
122
-
123
- table = [
124
- [
125
- r["agency"],
126
- r.get("title"),
127
- r.get("url"),
128
- r.get("snippet"),
129
- r["foia_exemption"]
130
- ]
131
- for r in results
132
- ]
133
-
134
- heatmap = [
135
- [
136
- agency,
137
- m["count"],
138
- m["latency"] or "—",
139
- m["health"]
140
- ]
141
- for agency, m in metrics.items()
142
- ]
143
-
144
- status = f"Found {len(results)} documents across {len(metrics)} agencies."
145
- return table, heatmap, status
146
-
147
-
148
- # ============================================================
149
- # Journalist ZIP Export
150
- # ============================================================
151
-
152
- def export_selected(rows):
153
- if not rows:
154
- return None
155
-
156
- buf = io.BytesIO()
157
- with zipfile.ZipFile(buf, "w") as z:
158
- csv_buf = io.StringIO()
159
- writer = csv.writer(csv_buf)
160
- writer.writerow(["Agency", "Title", "URL", "Snippet", "FOIA Exemption"])
161
- for r in rows:
162
- writer.writerow(r)
163
- z.writestr("index.csv", csv_buf.getvalue())
164
- z.writestr(
165
- "README.txt",
166
- "Federal FOIA Intelligence Search\nPublic Electronic Reading Rooms Only"
167
  )
168
-
169
- buf.seek(0)
170
- return buf
171
 
172
 
173
- # ============================================================
174
- # UI
175
- # ============================================================
176
-
177
  with gr.Blocks(title=APP_TITLE) as demo:
178
- gr.Markdown(f"""
179
- # 🏛️ **{APP_TITLE}**
180
- ### *{APP_SUBTITLE}*
181
-
182
- Public-interest discovery across U.S. Government FOIA Electronic Reading Rooms.
183
- """)
 
 
 
 
184
 
185
- query = gr.Textbox(label="Search FOIA Reading Rooms")
186
- include_stubs = gr.Checkbox(label="Include Stub Sources", value=True)
187
- enable_extended = gr.Checkbox(label="Enable Extended Features", value=False)
188
 
189
  search_btn = gr.Button("Search")
190
- status = gr.Markdown()
191
-
192
- results = gr.Dataframe(
193
- headers=["Agency", "Title", "URL", "Snippet", "FOIA Exemption"],
194
- interactive=True,
195
- wrap=True
196
- )
197
 
198
- coverage = gr.Dataframe(
199
- headers=["Agency", "Result Count", "Latency (s)", "Health"],
200
- interactive=False
201
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
202
 
203
  search_btn.click(
204
- search_handler,
205
- inputs=[query, include_stubs, enable_extended],
206
- outputs=[results, coverage, status]
207
  )
208
 
209
- export_btn = gr.Button("🧾 Export Selected Results (ZIP)")
210
- export_file = gr.File()
211
-
212
  export_btn.click(
213
- export_selected,
214
- inputs=[results],
215
- outputs=[export_file]
216
  )
217
 
218
- gr.Markdown("""
219
- ---
220
- ### ⚖️ Legal Notice
221
- FOIA exemption labels are **heuristic indicators only** and do not replace
222
- official agency determinations.
223
- """)
 
 
224
 
225
- if __name__ == "__main__":
226
- demo.launch()
 
 
1
  # Federal FOIA Intelligence Search
2
  # Public Electronic Reading Rooms Only
3
 
4
  import gradio as gr
 
5
  import time
 
 
 
6
  from collections import defaultdict
7
+ from typing import List, Dict
8
 
9
  from ingest.registry import get_all_adapters
10
+ from analytics.semantic import semantic_cluster
11
+ from analytics.health import adapter_health
12
+ from analytics.foia_bcodes import classify_b_codes
13
+ from exports.journalist_zip import build_zip
14
+ from ui.heatmap import render_heatmap
15
 
16
  APP_TITLE = "Federal FOIA Intelligence Search"
17
+ SUBTITLE = "Public Electronic Reading Rooms Only"
18
 
 
19
  ALL_ADAPTERS = get_all_adapters()
20
 
21
 
22
+ def run_search(query: str, include_stubs: bool, extended: bool):
23
+ start = time.time()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
+ results = []
26
+ per_agency_counts = defaultdict(int)
27
+ health = {}
28
 
 
 
29
  for name, adapter in ALL_ADAPTERS.items():
30
+ if adapter.is_stub and not include_stubs:
31
  continue
32
+ if adapter.extended and not extended:
33
  continue
34
+ if adapter.killed:
35
  continue
 
 
 
36
 
37
+ try:
38
+ t0 = time.time()
39
+ docs = adapter.search_sync(query)
40
+ latency = time.time() - t0
41
+ health[name] = adapter_health(latency)
42
+ for d in docs:
43
+ d["agency"] = name
44
+ d["b_codes"] = classify_b_codes(d)
45
+ results.append(d)
46
+ per_agency_counts[name] += 1
47
+ except Exception as e:
48
+ health[name] = {"status": "error", "detail": str(e)}
49
+
50
+ clusters = semantic_cluster(results)
51
+ elapsed = round(time.time() - start, 2)
52
+
53
+ return {
54
+ "results": results,
55
+ "clusters": clusters,
56
+ "counts": dict(per_agency_counts),
57
+ "health": health,
58
+ "elapsed": elapsed,
59
  }
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
+ def render_results(state):
63
+ md = []
64
+ for r in state["results"]:
65
+ badge = f"⚖️ {','.join(r['b_codes'])}" if r["b_codes"] else ""
66
+ md.append(
67
+ f"### [{r['title']}]({r['url']})\n"
68
+ f"*{r['agency']}* {badge}\n\n"
69
+ f"{r['snippet']}\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  )
71
+ return "\n---\n".join(md)
 
 
72
 
73
 
 
 
 
 
74
  with gr.Blocks(title=APP_TITLE) as demo:
75
+ gr.Markdown(f"# 🏛️ {APP_TITLE}\n### *{SUBTITLE}*")
76
+ gr.Markdown(
77
+ "Federated discovery across **lawfully published U.S. Government FOIA Electronic Reading Rooms**.\n\n"
78
+ "### Safeguards\n"
79
+ "- Public documents only\n"
80
+ "- Robots.txt enforced\n"
81
+ "- No restricted systems\n"
82
+ "- No classified access\n"
83
+ "- Per-agency kill switches\n"
84
+ )
85
 
86
+ query = gr.Textbox(label="Search FOIA Reading Rooms", placeholder="UAP")
87
+ include_stubs = gr.Checkbox(True, label="Include Stub Sources (Coverage Only)")
88
+ extended = gr.Checkbox(False, label="Enable Extended Features (Live Intelligence-Related Rooms)")
89
 
90
  search_btn = gr.Button("Search")
 
 
 
 
 
 
 
91
 
92
+ results_md = gr.Markdown()
93
+ heatmap = gr.HTML()
94
+ latency_md = gr.Markdown()
95
+ cluster_md = gr.Markdown()
96
+ export_btn = gr.Button("🧾 Export Selected to ZIP")
97
+
98
+ state = gr.State()
99
+
100
+ def on_search(q, s, e):
101
+ state_val = run_search(q, s, e)
102
+ return (
103
+ state_val,
104
+ render_results(state_val),
105
+ render_heatmap(state_val["counts"]),
106
+ f"⏱️ Completed in {state_val['elapsed']}s",
107
+ f"🧠 Clusters: {len(state_val['clusters'])}",
108
+ )
109
 
110
  search_btn.click(
111
+ on_search,
112
+ inputs=[query, include_stubs, extended],
113
+ outputs=[state, results_md, heatmap, latency_md, cluster_md],
114
  )
115
 
 
 
 
116
  export_btn.click(
117
+ lambda s: build_zip(s["results"]),
118
+ inputs=[state],
119
+ outputs=[],
120
  )
121
 
122
+ gr.Markdown(
123
+ "## ⚖️ Legal & Ethical Notice\n"
124
+ "This system indexes **only documents already released to the public** under FOIA.\n\n"
125
+ "**It does not:**\n"
126
+ "- Circumvent access controls\n"
127
+ "- Access classified systems\n"
128
+ "- Declassify or infer restricted information\n"
129
+ )
130
 
131
+ demo.launch()