GodsDevProject commited on
Commit
37f4615
·
verified ·
1 Parent(s): 6d124ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +174 -180
app.py CHANGED
@@ -1,239 +1,233 @@
 
1
  import time
 
2
  from typing import List, Dict
3
 
4
- import gradio as gr
 
 
5
 
6
- from ingest.registry import (
7
- get_all_adapters,
8
- BASE_AGENCIES,
9
- EXTENDED_AGENCIES,
10
- )
11
- from citations import cite
12
- import coverage
13
- import saved_searches
14
-
15
-
16
- # ======================================================
17
- # Adapter Registry
18
- # ======================================================
19
-
20
- ALL_ADAPTERS = get_all_adapters()
21
-
22
-
23
- # ======================================================
24
- # Async Federated Search Engine
25
- # ======================================================
26
-
27
- async def _search_adapter(adapter, query: str) -> Dict:
28
- start = time.time()
29
- try:
30
- results = await adapter.search(query)
31
- latency = round(time.time() - start, 2)
32
- return {
33
- "ok": True,
34
- "results": results,
35
- "latency": latency,
36
- "source": adapter.source_name,
37
- }
38
- except Exception as e:
39
- return {
40
- "ok": False,
41
- "results": [],
42
- "latency": None,
43
- "source": adapter.source_name,
44
- "error": str(e),
45
- }
46
-
47
-
48
- async def federated_search_async(
49
- query: str,
50
- enabled_agencies: List[str],
51
- ):
52
- adapters = [
53
- ALL_ADAPTERS[a]
54
- for a in enabled_agencies
55
- if a in ALL_ADAPTERS
56
- ]
57
 
58
- tasks = [_search_adapter(a, query) for a in adapters]
59
- responses = await gradio_async_gather(tasks)
 
60
 
61
- flat_results = []
62
- health = {}
 
 
 
 
 
 
63
 
64
- for r in responses:
65
- health[r["source"]] = {
66
- "ok": r["ok"],
67
- "latency": r["latency"],
68
- }
69
- if r["ok"]:
70
- flat_results.extend(r["results"])
71
 
72
- return flat_results, health
 
 
 
 
 
 
 
 
 
73
 
74
 
75
- async def gradio_async_gather(tasks):
 
 
 
 
 
 
 
 
 
76
  """
77
- HF-safe async gather wrapper.
78
  """
79
- import asyncio
80
- return await asyncio.gather(*tasks, return_exceptions=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
 
 
 
82
 
83
- # ======================================================
84
- # Rendering Helpers
85
- # ======================================================
 
86
 
87
- def badge(result: Dict) -> str:
88
- badges = []
89
- badges.append("🟢 LIVE" if result.get("live") else "🟡 STUB")
90
- if result.get("extended"):
91
- badges.append("⚠️ EXTENDED")
92
- return " · ".join(badges)
93
 
 
94
 
95
- def render_result(r: Dict) -> str:
96
- return f"""
97
- ### {r['title']}
98
- **{r['source']} · {badge(r)}**
99
 
100
- {r['snippet']}
 
101
 
102
- 🔗 {r['url']}
 
 
 
103
 
104
- <details>
105
- <summary>📑 Citation</summary>
 
 
 
 
106
 
107
- {cite(r)}
108
 
109
- </details>
 
110
  """
 
111
 
 
112
 
113
- def render_health(health: Dict) -> str:
114
- lines = ["### 🩺 Agency Health\n"]
115
- for agency, h in health.items():
116
- if h["ok"]:
117
- lines.append(f"- **{agency}**: 🟢 {h['latency']}s")
118
- else:
119
- lines.append(f"- **{agency}**: 🔴 unavailable")
120
- return "\n".join(lines)
121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
- # ======================================================
124
- # HF-SAFE ASYNC SEARCH HANDLER
125
- # ======================================================
126
 
127
- async def run_search(
128
- query: str,
129
- hide_stubs: bool,
130
- enable_extended: bool,
131
- enabled_extended_agencies: List[str],
132
- ):
 
133
  if not query.strip():
134
- return "Enter a search term.", "", ""
135
 
136
- agencies = list(BASE_AGENCIES)
137
 
138
- if enable_extended:
139
- agencies.extend(
140
- [a for a in enabled_extended_agencies if a in EXTENDED_AGENCIES]
141
- )
142
 
143
- results, health = await federated_search_async(query, agencies)
 
 
 
 
144
 
145
- saved_searches.save(query, enable_extended)
146
 
147
- if hide_stubs:
148
- results = [r for r in results if r.get("live")]
 
149
 
150
- if not results:
151
- return "No results found.", render_health(health), coverage.render_coverage()
152
 
153
- rendered = "\n\n---\n\n".join(render_result(r) for r in results)
154
- return rendered, render_health(health), coverage.render_coverage()
 
155
 
156
 
157
- # ======================================================
158
- # Gradio UI
159
- # ======================================================
160
 
161
- with gr.Blocks(title="Federated FOIA Document Search") as demo:
162
  gr.Markdown(
163
  """
164
  # 🏛️ Federated FOIA Document Search
165
-
166
- Search **public government FOIA electronic reading rooms** across multiple agencies.
167
-
168
- - **Default:** Safe, public-only sources
169
- - **Extended mode:** Additional public releases (opt-in)
170
- - No authentication
171
- - No classified or restricted systems
172
  """
173
  )
174
 
175
- query = gr.Textbox(
176
- label="Search term",
177
- placeholder="e.g. UAP, radar incident, AATIP",
178
- )
179
 
180
- with gr.Row():
181
- hide_stubs = gr.Checkbox(
182
- label="Hide stub (non-live) sources",
183
- value=False,
184
- )
185
- enable_extended = gr.Checkbox(
186
- label="Enable Extended Features (live but sensitive public sources)",
187
- value=False,
188
- )
189
 
190
- with gr.Accordion("Extended Agency Kill Switches", open=False):
191
- extended_agencies = gr.CheckboxGroup(
192
- choices=EXTENDED_AGENCIES,
193
- label="Enable specific extended agencies",
194
- )
195
 
196
- search_btn = gr.Button("🔍 Search")
 
 
 
 
 
 
 
 
197
 
198
- results_md = gr.Markdown()
199
- health_md = gr.Markdown()
200
  coverage_md = gr.Markdown()
201
 
202
  search_btn.click(
203
  fn=run_search,
204
- inputs=[
205
- query,
206
- hide_stubs,
207
- enable_extended,
208
- extended_agencies,
209
- ],
210
- outputs=[
211
- results_md,
212
- health_md,
213
- coverage_md,
214
- ],
215
  )
216
 
217
- with gr.Accordion("Saved Searches", open=False):
218
- gr.Markdown(saved_searches.render())
219
-
220
- with gr.Accordion("About & Legal", open=False):
221
- gr.Markdown(
222
- """
223
- **Legal Notice**
224
-
225
- This tool:
226
- - Accesses **only publicly available FOIA electronic reading rooms**
227
- - Respects robots.txt and rate limits
228
- - Does not bypass access controls
229
- - Is intended for journalism, research, and public-interest use
230
- """
231
- )
232
 
 
 
 
 
 
233
 
234
- # ======================================================
235
- # HF ENTRY POINT
236
- # ======================================================
237
 
238
  if __name__ == "__main__":
239
- demo.launch()
 
1
+ import gradio as gr
2
  import time
3
+ import re
4
  from typing import List, Dict
5
 
6
+ ###############################################################################
7
+ # GLOBAL STATE (HF SAFE)
8
+ ###############################################################################
9
 
10
+ RESULT_CACHE: List[Dict] = []
11
+ SELECTED_INDEX = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ ###############################################################################
14
+ # UTILITIES
15
+ ###############################################################################
16
 
17
+ def highlight(text: str, query: str) -> str:
18
+ if not query:
19
+ return text
20
+ terms = [re.escape(t) for t in query.split() if len(t) > 1]
21
+ if not terms:
22
+ return text
23
+ pattern = re.compile(rf"({'|'.join(terms)})", re.IGNORECASE)
24
+ return pattern.sub(r"<mark>\1</mark>", text)
25
 
 
 
 
 
 
 
 
26
 
27
+ def redaction_score(text: str) -> float:
28
+ """
29
+ Heuristic confidence score that document may contain redactions.
30
+ 0.0 = none detected, 1.0 = heavy redaction likelihood
31
+ """
32
+ hits = sum(
33
+ k in text.lower()
34
+ for k in ["redact", "b(1)", "b(3)", "withheld", "classified"]
35
+ )
36
+ return round(min(1.0, hits * 0.25), 2)
37
 
38
 
39
+ def badge(r: Dict) -> str:
40
+ live = "🟢 LIVE" if r["live"] else "🟡 STUB"
41
+ return f"`{live}` · `{r['agency']}`"
42
+
43
+
44
+ ###############################################################################
45
+ # MOCK ADAPTER OUTPUT (REPLACE WITH REAL ADAPTERS)
46
+ ###############################################################################
47
+
48
+ def run_federated_query(query: str) -> List[Dict]:
49
  """
50
+ HF-safe simulated adapter aggregation.
51
  """
52
+ time.sleep(0.4)
53
+
54
+ return [
55
+ {
56
+ "title": "UAP Task Force Report (Preliminary)",
57
+ "snippet": "The UAPTF evaluated a number of UAP incidents...",
58
+ "url": "https://www.dni.gov/files/ODNI/documents/assessments/Prelimary-Assessments-UAP-20210625.pdf",
59
+ "agency": "ODNI",
60
+ "source": "ODNI FOIA Reading Room",
61
+ "live": True,
62
+ },
63
+ {
64
+ "title": "CIA Memorandum on Aerial Phenomena",
65
+ "snippet": "This document was partially redacted under b(3)...",
66
+ "url": "https://www.cia.gov/readingroom/docs/DOC_000001.pdf",
67
+ "agency": "CIA",
68
+ "source": "CIA FOIA Reading Room",
69
+ "live": True,
70
+ },
71
+ {
72
+ "title": "Project BLUE BOOK Summary",
73
+ "snippet": "Historical summary of investigations into unidentified objects...",
74
+ "url": "https://www.archives.gov/research/military/air-force/ufos",
75
+ "agency": "USAF",
76
+ "source": "National Archives",
77
+ "live": False,
78
+ },
79
+ ]
80
+
81
 
82
+ ###############################################################################
83
+ # RESULT RENDERING
84
+ ###############################################################################
85
 
86
+ def agency_counts(results: List[Dict]) -> str:
87
+ counts = {}
88
+ for r in results:
89
+ counts[r["agency"]] = counts.get(r["agency"], 0) + 1
90
 
91
+ lines = ["### 🏛️ Per-Agency Coverage"]
92
+ for agency, count in sorted(counts.items()):
93
+ lines.append(f"- **{agency}**: {count}")
 
 
 
94
 
95
+ return "\n".join(lines)
96
 
 
 
 
 
97
 
98
+ def render_results(results: List[Dict], query: str) -> str:
99
+ lines = ["### 📚 Search Results (click to preview)\n"]
100
 
101
+ for i, r in enumerate(results):
102
+ title = highlight(r["title"], query)
103
+ snippet = highlight(r["snippet"], query)
104
+ score = redaction_score(r["snippet"])
105
 
106
+ lines.append(
107
+ f"""
108
+ <div style="cursor:pointer" onclick="">
109
+ **{i+1}. {title}**
110
+ {badge(r)}
111
+ 🛡️ Redaction Confidence: **{score}**
112
 
113
+ {snippet}
114
 
115
+ 🔗 [Open Source]({r['url']})
116
+ </div>
117
  """
118
+ )
119
 
120
+ return "\n\n---\n\n".join(lines)
121
 
 
 
 
 
 
 
 
 
122
 
123
+ ###############################################################################
124
+ # PREVIEW PANEL
125
+ ###############################################################################
126
+
127
+ def render_preview(index: int) -> str:
128
+ if not RESULT_CACHE:
129
+ return "_No document selected._"
130
+
131
+ index = max(0, min(index, len(RESULT_CACHE) - 1))
132
+ r = RESULT_CACHE[index]
133
+
134
+ score = redaction_score(r["snippet"])
135
+
136
+ header = f"""
137
+ ### 📄 Document Preview ({index + 1}/{len(RESULT_CACHE)})
138
+ **{r['title']}**
139
+ {r['source']} · `{r['agency']}`
140
+ 🛡️ Redaction Confidence: **{score}**
141
+ """
142
+
143
+ iframe = f"""
144
+ <iframe src="{r['url']}" width="100%" height="550px"
145
+ style="border:1px solid #444;border-radius:8px;"></iframe>
146
+ """
147
+
148
+ return header + iframe
149
 
 
 
 
150
 
151
+ ###############################################################################
152
+ # SEARCH HANDLER
153
+ ###############################################################################
154
+
155
+ def run_search(query: str):
156
+ global RESULT_CACHE, SELECTED_INDEX
157
+
158
  if not query.strip():
159
+ return "⚠️ Enter a search term.", "_", "_"
160
 
161
+ results = run_federated_query(query)
162
 
163
+ RESULT_CACHE = results
164
+ SELECTED_INDEX = 0
 
 
165
 
166
+ return (
167
+ render_results(results, query),
168
+ render_preview(0),
169
+ agency_counts(results),
170
+ )
171
 
 
172
 
173
+ def next_doc(idx):
174
+ idx = min(idx + 1, len(RESULT_CACHE) - 1)
175
+ return render_preview(idx), idx
176
 
 
 
177
 
178
+ def prev_doc(idx):
179
+ idx = max(idx - 1, 0)
180
+ return render_preview(idx), idx
181
 
182
 
183
+ ###############################################################################
184
+ # UI
185
+ ###############################################################################
186
 
187
+ with gr.Blocks(theme=gr.themes.Soft()) as app:
188
  gr.Markdown(
189
  """
190
  # 🏛️ Federated FOIA Document Search
191
+ Search public FOIA reading rooms across agencies.
 
 
 
 
 
 
192
  """
193
  )
194
 
195
+ query = gr.Textbox(label="Search term", placeholder="e.g. UAP")
 
 
 
196
 
197
+ search_btn = gr.Button("🔍 Search")
 
 
 
 
 
 
 
 
198
 
199
+ selected_index = gr.State(0)
 
 
 
 
200
 
201
+ with gr.Row():
202
+ with gr.Column(scale=5):
203
+ results_md = gr.Markdown()
204
+
205
+ with gr.Column(scale=7):
206
+ preview_md = gr.Markdown()
207
+ with gr.Row():
208
+ prev_btn = gr.Button("⬅️ Previous")
209
+ next_btn = gr.Button("➡️ Next")
210
 
 
 
211
  coverage_md = gr.Markdown()
212
 
213
  search_btn.click(
214
  fn=run_search,
215
+ inputs=query,
216
+ outputs=[results_md, preview_md, coverage_md],
 
 
 
 
 
 
 
 
 
217
  )
218
 
219
+ prev_btn.click(
220
+ fn=prev_doc,
221
+ inputs=selected_index,
222
+ outputs=[preview_md, selected_index],
223
+ )
 
 
 
 
 
 
 
 
 
 
224
 
225
+ next_btn.click(
226
+ fn=next_doc,
227
+ inputs=selected_index,
228
+ outputs=[preview_md, selected_index],
229
+ )
230
 
 
 
 
231
 
232
  if __name__ == "__main__":
233
+ app.launch()