GodsDevProject commited on
Commit
f10ecc3
·
verified ·
1 Parent(s): 8d11cfe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +137 -137
app.py CHANGED
@@ -4,6 +4,11 @@
4
 
5
  import gradio as gr
6
  import asyncio
 
 
 
 
 
7
  from typing import Dict, List
8
 
9
  from ingest.registry import get_all_adapters
@@ -11,10 +16,6 @@ from policy.kill_switch import KillSwitch
11
  from analytics import log_event
12
 
13
 
14
- # ============================================================
15
- # App Metadata
16
- # ============================================================
17
-
18
  APP_TITLE = "Federal FOIA Intelligence Search"
19
  APP_SUBTITLE = "Public Electronic Reading Rooms Only"
20
 
@@ -23,12 +24,28 @@ ALL_ADAPTERS = get_all_adapters()
23
 
24
 
25
  # ============================================================
26
- # Adapter Utilities
27
  # ============================================================
28
 
29
- def get_active_adapters(include_stubs: bool, enable_extended: bool) -> Dict[str, object]:
30
- active = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
 
 
32
  for name, adapter in ALL_ADAPTERS.items():
33
  if kill_switch.is_disabled(name):
34
  continue
@@ -37,81 +54,120 @@ def get_active_adapters(include_stubs: bool, enable_extended: bool) -> Dict[str,
37
  if getattr(adapter, "extended", False) and not enable_extended:
38
  continue
39
  active[name] = adapter
40
-
41
  return active
42
 
43
 
44
- def build_coverage_heatmap(include_stubs: bool, enable_extended: bool):
45
- """
46
- Returns a table describing agency coverage state.
47
- """
48
- rows = []
49
 
50
- for name, adapter in ALL_ADAPTERS.items():
51
- if kill_switch.is_disabled(name):
52
- status = "🔴 Disabled"
53
- elif getattr(adapter, "is_stub", False):
54
- status = "🟡 Stub"
55
- else:
56
- status = "🟢 Live"
 
57
 
58
- scope = "Extended" if getattr(adapter, "extended", False) else "Default"
59
 
60
- if scope == "Extended" and not enable_extended:
61
- status = "⚪ Hidden (Extended Disabled)"
 
 
 
62
 
63
- if getattr(adapter, "is_stub", False) and not include_stubs:
64
- status = "⚪ Hidden (Stubs Disabled)"
65
 
66
- rows.append([
67
- name,
68
- scope,
69
- status
70
- ])
71
 
72
- return rows
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
 
75
  # ============================================================
76
- # Async Search
77
  # ============================================================
78
 
79
- async def federated_search(query: str, adapters: Dict[str, object]) -> List[dict]:
80
- tasks = [adapter.search(query) for adapter in adapters.values()]
81
- results = await asyncio.gather(*tasks, return_exceptions=True)
82
-
83
- flattened = []
84
- for r in results:
85
- if isinstance(r, Exception):
86
- continue
87
- flattened.extend(r)
88
-
89
- return flattened
90
-
91
-
92
  def search_handler(query, include_stubs, enable_extended):
93
  if not query.strip():
94
- return [], "⚠️ Please enter a search term."
95
 
96
  log_event("search", {"query": query})
97
 
98
  adapters = get_active_adapters(include_stubs, enable_extended)
99
 
100
- if not adapters:
101
- return [], "⚠️ No active sources enabled."
102
-
103
  try:
104
- results = asyncio.run(federated_search(query, adapters))
 
 
105
  except RuntimeError:
106
  loop = asyncio.get_event_loop()
107
- results = loop.run_until_complete(
108
  federated_search(query, adapters)
109
  )
110
 
111
- if not results:
112
- return [], "No public FOIA documents found."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
- return results, f"Found {len(results)} public FOIA documents."
 
115
 
116
 
117
  # ============================================================
@@ -119,108 +175,52 @@ def search_handler(query, include_stubs, enable_extended):
119
  # ============================================================
120
 
121
  with gr.Blocks(title=APP_TITLE) as demo:
122
- gr.Markdown(
123
- f"""
124
  # 🏛️ **{APP_TITLE}**
125
  ### *{APP_SUBTITLE}*
126
 
127
- Federated discovery across **lawfully published U.S. Government FOIA
128
- Electronic Reading Rooms**.
129
-
130
- **Safeguards**
131
- - Public documents only
132
- - Robots.txt enforced
133
- - No restricted systems
134
- - No classified access
135
- - Per-agency kill switches
136
- """
137
- )
138
-
139
- query = gr.Textbox(
140
- label="Search FOIA Reading Rooms",
141
- placeholder="e.g. UAP, MKULTRA, satellite reconnaissance"
142
- )
143
 
144
- with gr.Row():
145
- include_stubs = gr.Checkbox(
146
- label="Include Stub Sources (Coverage Only)",
147
- value=True
148
- )
149
- enable_extended = gr.Checkbox(
150
- label="Enable Extended Features (Live Intelligence-Related Rooms)",
151
- value=False
152
- )
153
 
154
  search_btn = gr.Button("Search")
155
-
156
  status = gr.Markdown()
157
 
158
  results = gr.Dataframe(
159
- headers=["source", "title", "url", "snippet"],
160
- datatype=["str", "str", "str", "str"],
161
- interactive=False,
162
  wrap=True
163
  )
164
 
 
 
 
 
 
165
  search_btn.click(
166
- fn=search_handler,
167
  inputs=[query, include_stubs, enable_extended],
168
- outputs=[results, status]
169
  )
170
 
171
- # ========================================================
172
- # Coverage Heatmap Panel
173
- # ========================================================
174
-
175
- with gr.Accordion("🗺️ Agency Coverage Map", open=False):
176
- gr.Markdown(
177
- """
178
- This panel shows **search coverage status** by agency.
179
 
180
- - 🟢 Live = searchable public FOIA reading room
181
- - 🟡 Stub = declared coverage (not yet live)
182
- - 🔴 Disabled = kill switch / unavailable
183
- """
184
- )
185
-
186
- coverage_table = gr.Dataframe(
187
- headers=["Agency", "Scope", "Coverage Status"],
188
- datatype=["str", "str", "str"],
189
- interactive=False
190
- )
191
-
192
- def refresh_coverage(include_stubs, enable_extended):
193
- return build_coverage_heatmap(include_stubs, enable_extended)
194
-
195
- include_stubs.change(
196
- refresh_coverage,
197
- inputs=[include_stubs, enable_extended],
198
- outputs=[coverage_table]
199
- )
200
-
201
- enable_extended.change(
202
- refresh_coverage,
203
- inputs=[include_stubs, enable_extended],
204
- outputs=[coverage_table]
205
- )
206
-
207
- # Initial render
208
- coverage_table.value = build_coverage_heatmap(True, False)
209
 
210
- gr.Markdown(
211
- """
212
  ---
213
- ### ⚖️ Legal & Ethical Notice
214
-
215
- This system indexes **only documents already released to the public**
216
- by U.S. Government agencies under the Freedom of Information Act (FOIA).
217
-
218
- It does **not**:
219
- - Circumvent access controls
220
- - Access classified systems
221
- - Declassify or infer restricted information
222
- """
223
- )
224
 
225
  if __name__ == "__main__":
226
  demo.launch()
 
4
 
5
  import gradio as gr
6
  import asyncio
7
+ import time
8
+ import csv
9
+ import io
10
+ import zipfile
11
+ from collections import defaultdict
12
  from typing import Dict, List
13
 
14
  from ingest.registry import get_all_adapters
 
16
  from analytics import log_event
17
 
18
 
 
 
 
 
19
  APP_TITLE = "Federal FOIA Intelligence Search"
20
  APP_SUBTITLE = "Public Electronic Reading Rooms Only"
21
 
 
24
 
25
 
26
  # ============================================================
27
+ # Utility: FOIA Exemption Heuristic
28
  # ============================================================
29
 
30
+ def classify_foia_exemption(text: str) -> str:
31
+ text = text.lower()
32
+ if "classified" in text or "national security" in text:
33
+ return "b(1)"
34
+ if "statute" in text:
35
+ return "b(3)"
36
+ if "privacy" in text or "personnel" in text:
37
+ return "b(6)"
38
+ if "law enforcement" in text or "investigation" in text:
39
+ return "b(7)"
40
+ return "—"
41
+
42
+
43
+ # ============================================================
44
+ # Adapter Filtering
45
+ # ============================================================
46
 
47
+ def get_active_adapters(include_stubs, enable_extended):
48
+ active = {}
49
  for name, adapter in ALL_ADAPTERS.items():
50
  if kill_switch.is_disabled(name):
51
  continue
 
54
  if getattr(adapter, "extended", False) and not enable_extended:
55
  continue
56
  active[name] = adapter
 
57
  return active
58
 
59
 
60
+ # ============================================================
61
+ # Async Search + Metrics
62
+ # ============================================================
 
 
63
 
64
+ async def timed_search(adapter, query):
65
+ start = time.time()
66
+ try:
67
+ results = await adapter.search(query)
68
+ latency = round(time.time() - start, 2)
69
+ return results, latency, "🟢 Healthy"
70
+ except Exception:
71
+ return [], None, "🔴 Error"
72
 
 
73
 
74
+ async def federated_search(query, adapters):
75
+ tasks = {
76
+ name: timed_search(adapter, query)
77
+ for name, adapter in adapters.items()
78
+ }
79
 
80
+ responses = await asyncio.gather(*tasks.values())
 
81
 
82
+ results = []
83
+ metrics = {}
 
 
 
84
 
85
+ for (name, _), (docs, latency, health) in zip(tasks.items(), responses):
86
+ metrics[name] = {
87
+ "count": len(docs),
88
+ "latency": latency,
89
+ "health": health
90
+ }
91
+ for d in docs:
92
+ d["agency"] = name
93
+ d["foia_exemption"] = classify_foia_exemption(
94
+ d.get("snippet", "")
95
+ )
96
+ results.append(d)
97
+
98
+ return results, metrics
99
 
100
 
101
  # ============================================================
102
+ # Search Handler
103
  # ============================================================
104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  def search_handler(query, include_stubs, enable_extended):
106
  if not query.strip():
107
+ return [], [], "⚠️ Enter a search term."
108
 
109
  log_event("search", {"query": query})
110
 
111
  adapters = get_active_adapters(include_stubs, enable_extended)
112
 
 
 
 
113
  try:
114
+ results, metrics = asyncio.run(
115
+ federated_search(query, adapters)
116
+ )
117
  except RuntimeError:
118
  loop = asyncio.get_event_loop()
119
+ results, metrics = loop.run_until_complete(
120
  federated_search(query, adapters)
121
  )
122
 
123
+ table = [
124
+ [
125
+ r["agency"],
126
+ r.get("title"),
127
+ r.get("url"),
128
+ r.get("snippet"),
129
+ r["foia_exemption"]
130
+ ]
131
+ for r in results
132
+ ]
133
+
134
+ heatmap = [
135
+ [
136
+ agency,
137
+ m["count"],
138
+ m["latency"] or "—",
139
+ m["health"]
140
+ ]
141
+ for agency, m in metrics.items()
142
+ ]
143
+
144
+ status = f"Found {len(results)} documents across {len(metrics)} agencies."
145
+ return table, heatmap, status
146
+
147
+
148
+ # ============================================================
149
+ # Journalist ZIP Export
150
+ # ============================================================
151
+
152
+ def export_selected(rows):
153
+ if not rows:
154
+ return None
155
+
156
+ buf = io.BytesIO()
157
+ with zipfile.ZipFile(buf, "w") as z:
158
+ csv_buf = io.StringIO()
159
+ writer = csv.writer(csv_buf)
160
+ writer.writerow(["Agency", "Title", "URL", "Snippet", "FOIA Exemption"])
161
+ for r in rows:
162
+ writer.writerow(r)
163
+ z.writestr("index.csv", csv_buf.getvalue())
164
+ z.writestr(
165
+ "README.txt",
166
+ "Federal FOIA Intelligence Search\nPublic Electronic Reading Rooms Only"
167
+ )
168
 
169
+ buf.seek(0)
170
+ return buf
171
 
172
 
173
  # ============================================================
 
175
  # ============================================================
176
 
177
  with gr.Blocks(title=APP_TITLE) as demo:
178
+ gr.Markdown(f"""
 
179
  # 🏛️ **{APP_TITLE}**
180
  ### *{APP_SUBTITLE}*
181
 
182
+ Public-interest discovery across U.S. Government FOIA Electronic Reading Rooms.
183
+ """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
+ query = gr.Textbox(label="Search FOIA Reading Rooms")
186
+ include_stubs = gr.Checkbox(label="Include Stub Sources", value=True)
187
+ enable_extended = gr.Checkbox(label="Enable Extended Features", value=False)
 
 
 
 
 
 
188
 
189
  search_btn = gr.Button("Search")
 
190
  status = gr.Markdown()
191
 
192
  results = gr.Dataframe(
193
+ headers=["Agency", "Title", "URL", "Snippet", "FOIA Exemption"],
194
+ interactive=True,
 
195
  wrap=True
196
  )
197
 
198
+ coverage = gr.Dataframe(
199
+ headers=["Agency", "Result Count", "Latency (s)", "Health"],
200
+ interactive=False
201
+ )
202
+
203
  search_btn.click(
204
+ search_handler,
205
  inputs=[query, include_stubs, enable_extended],
206
+ outputs=[results, coverage, status]
207
  )
208
 
209
+ export_btn = gr.Button("🧾 Export Selected Results (ZIP)")
210
+ export_file = gr.File()
 
 
 
 
 
 
211
 
212
+ export_btn.click(
213
+ export_selected,
214
+ inputs=[results],
215
+ outputs=[export_file]
216
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
 
218
+ gr.Markdown("""
 
219
  ---
220
+ ### ⚖️ Legal Notice
221
+ FOIA exemption labels are **heuristic indicators only** and do not replace
222
+ official agency determinations.
223
+ """)
 
 
 
 
 
 
 
224
 
225
  if __name__ == "__main__":
226
  demo.launch()