GodsDevProject commited on
Commit
b299465
·
verified ·
1 Parent(s): c932ad0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +204 -20
app.py CHANGED
@@ -1,51 +1,235 @@
1
  import asyncio
 
 
 
2
  import gradio as gr
3
- from typing import Dict, List
4
 
5
- from ingest.registry import get_all_adapters, BASE_AGENCIES, EXTENDED_AGENCIES
 
 
 
 
 
 
6
  import saved_searches
7
 
8
 
 
 
 
 
9
  ALL_ADAPTERS = get_all_adapters()
10
 
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  async def federated_search_async(
13
  query: str,
14
  enabled_agencies: List[str],
15
  ):
16
  adapters = [
17
- ALL_ADAPTERS[k]
18
- for k in enabled_agencies
19
- if k in ALL_ADAPTERS
20
  ]
21
 
22
- tasks = [a.search(query) for a in adapters]
23
- results = await asyncio.gather(*tasks, return_exceptions=True)
24
 
25
- flat = []
26
- for r in results:
27
- if isinstance(r, list):
28
- flat.extend(r)
29
- return flat
30
 
 
 
 
 
 
 
 
31
 
32
- def run_search(query, hide_stubs, extended, enabled_agencies):
33
- loop = asyncio.get_event_loop()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  agencies = list(BASE_AGENCIES)
36
- if extended:
37
- agencies.extend([a for a in enabled_agencies if a in EXTENDED_AGENCIES])
38
 
39
- results = loop.run_until_complete(
 
 
 
 
 
 
40
  federated_search_async(query, agencies)
41
  )
42
 
43
- saved_searches.save(query, extended)
44
 
45
  if hide_stubs:
46
  results = [r for r in results if r.get("live")]
47
 
48
  if not results:
49
- return "No results found."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
- return "\n\n---\n\n".join(render(r) for r in results)
 
 
1
  import asyncio
2
+ import time
3
+ from typing import List, Dict
4
+
5
  import gradio as gr
 
6
 
7
+ from ingest.registry import (
8
+ get_all_adapters,
9
+ BASE_AGENCIES,
10
+ EXTENDED_AGENCIES,
11
+ )
12
+ from citations import cite
13
+ import coverage
14
  import saved_searches
15
 
16
 
17
+ # -----------------------------
18
+ # Adapter Registry
19
+ # -----------------------------
20
+
21
  ALL_ADAPTERS = get_all_adapters()
22
 
23
 
24
+ # -----------------------------
25
+ # Async Search Engine
26
+ # -----------------------------
27
+
28
+ async def _search_adapter(adapter, query: str) -> Dict:
29
+ start = time.time()
30
+ try:
31
+ results = await adapter.search(query)
32
+ latency = round(time.time() - start, 2)
33
+ return {
34
+ "ok": True,
35
+ "results": results,
36
+ "latency": latency,
37
+ "source": adapter.source_name,
38
+ }
39
+ except Exception as e:
40
+ return {
41
+ "ok": False,
42
+ "results": [],
43
+ "latency": None,
44
+ "source": adapter.source_name,
45
+ "error": str(e),
46
+ }
47
+
48
+
49
  async def federated_search_async(
50
  query: str,
51
  enabled_agencies: List[str],
52
  ):
53
  adapters = [
54
+ ALL_ADAPTERS[a]
55
+ for a in enabled_agencies
56
+ if a in ALL_ADAPTERS
57
  ]
58
 
59
+ tasks = [_search_adapter(a, query) for a in adapters]
60
+ responses = await asyncio.gather(*tasks)
61
 
62
+ flat_results = []
63
+ health = {}
 
 
 
64
 
65
+ for r in responses:
66
+ health[r["source"]] = {
67
+ "ok": r["ok"],
68
+ "latency": r["latency"],
69
+ }
70
+ if r["ok"]:
71
+ flat_results.extend(r["results"])
72
 
73
+ return flat_results, health
74
+
75
+
76
+ # -----------------------------
77
+ # Rendering Helpers
78
+ # -----------------------------
79
+
80
+ def badge(result: Dict) -> str:
81
+ badges = []
82
+ badges.append("🟢 LIVE" if result.get("live") else "🟡 STUB")
83
+ if result.get("extended"):
84
+ badges.append("⚠️ EXTENDED")
85
+ return " · ".join(badges)
86
+
87
+
88
+ def render_result(r: Dict) -> str:
89
+ return f"""
90
+ ### {r['title']}
91
+ **{r['source']} · {badge(r)}**
92
+
93
+ {r['snippet']}
94
+
95
+ 🔗 {r['url']}
96
+
97
+ <details>
98
+ <summary>📑 Citation</summary>
99
+
100
+ {cite(r)}
101
+
102
+ </details>
103
+ """
104
+
105
+
106
+ def render_health(health: Dict) -> str:
107
+ lines = ["### 🩺 Agency Health\n"]
108
+ for agency, h in health.items():
109
+ if h["ok"]:
110
+ lines.append(f"- **{agency}**: 🟢 {h['latency']}s")
111
+ else:
112
+ lines.append(f"- **{agency}**: 🔴 unavailable")
113
+ return "\n".join(lines)
114
+
115
+
116
+ # -----------------------------
117
+ # Sync Wrapper (HF Safe)
118
+ # -----------------------------
119
+
120
+ def run_search(
121
+ query: str,
122
+ hide_stubs: bool,
123
+ enable_extended: bool,
124
+ enabled_extended_agencies: List[str],
125
+ ):
126
+ if not query.strip():
127
+ return "Enter a search term.", "", ""
128
 
129
  agencies = list(BASE_AGENCIES)
 
 
130
 
131
+ if enable_extended:
132
+ agencies.extend(
133
+ [a for a in enabled_extended_agencies if a in EXTENDED_AGENCIES]
134
+ )
135
+
136
+ loop = asyncio.get_event_loop()
137
+ results, health = loop.run_until_complete(
138
  federated_search_async(query, agencies)
139
  )
140
 
141
+ saved_searches.save(query, enable_extended)
142
 
143
  if hide_stubs:
144
  results = [r for r in results if r.get("live")]
145
 
146
  if not results:
147
+ return "No results found.", render_health(health), coverage.render_coverage()
148
+
149
+ rendered = "\n\n---\n\n".join(render_result(r) for r in results)
150
+ return rendered, render_health(health), coverage.render_coverage()
151
+
152
+
153
+ # -----------------------------
154
+ # Gradio UI
155
+ # -----------------------------
156
+
157
+ with gr.Blocks(title="Federated FOIA Search") as demo:
158
+ gr.Markdown(
159
+ """
160
+ # 🏛️ Federated FOIA Document Search
161
+ Search **public government FOIA reading rooms** across multiple agencies.
162
+
163
+ - Default: **Safe, public-only sources**
164
+ - Extended mode: **Additional public releases (opt-in)**
165
+ - No authentication
166
+ - No classified or restricted systems
167
+ """
168
+ )
169
+
170
+ with gr.Row():
171
+ query = gr.Textbox(
172
+ label="Search term",
173
+ placeholder="e.g. UAP radar incident",
174
+ )
175
+
176
+ with gr.Row():
177
+ hide_stubs = gr.Checkbox(
178
+ label="Hide stub (non-live) sources",
179
+ value=False,
180
+ )
181
+ enable_extended = gr.Checkbox(
182
+ label="Enable Extended Features (live but sensitive public sources)",
183
+ value=False,
184
+ )
185
+
186
+ with gr.Accordion("Extended Agency Kill Switches", open=False):
187
+ extended_agencies = gr.CheckboxGroup(
188
+ choices=EXTENDED_AGENCIES,
189
+ label="Enable specific extended agencies",
190
+ )
191
+
192
+ search_btn = gr.Button("🔍 Search")
193
+
194
+ results_md = gr.Markdown()
195
+ health_md = gr.Markdown()
196
+ coverage_md = gr.Markdown()
197
+
198
+ search_btn.click(
199
+ fn=run_search,
200
+ inputs=[
201
+ query,
202
+ hide_stubs,
203
+ enable_extended,
204
+ extended_agencies,
205
+ ],
206
+ outputs=[
207
+ results_md,
208
+ health_md,
209
+ coverage_md,
210
+ ],
211
+ )
212
+
213
+ with gr.Accordion("Saved Searches", open=False):
214
+ gr.Markdown(saved_searches.render())
215
+
216
+ with gr.Accordion("About & Legal", open=False):
217
+ gr.Markdown(
218
+ """
219
+ **Legal Notice**
220
+
221
+ This tool:
222
+ - Accesses **only publicly available FOIA Electronic Reading Rooms**
223
+ - Respects robots.txt and rate limits
224
+ - Does not bypass access controls
225
+ - Is intended for journalism, research, and public-interest use
226
+ """
227
+ )
228
+
229
+
230
+ # -----------------------------
231
+ # HF Entry Point
232
+ # -----------------------------
233
 
234
+ if __name__ == "__main__":
235
+ demo.launch()