GodsDevProject commited on
Commit
7a6f3d9
·
verified ·
1 Parent(s): 8a325a6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +215 -153
app.py CHANGED
@@ -1,184 +1,246 @@
 
 
 
 
 
1
  import gradio as gr
 
 
2
  from typing import List, Dict
3
 
4
- from ingest.registry import get_all_adapters
5
- from ingest.export import export_results
6
- from ingest.health import get_adapter_health
7
- from ingest.coverage import coverage_summary
8
- from ingest.discovery import agency_discovery
9
- from ingest.semantic import semantic_refine, semantic_available
10
- from ingest.timeline import release_timeline
11
- from ingest.latency import latency_badges
12
- from analytics.events import log_event
13
-
14
- ALL_ADAPTERS = get_all_adapters()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- def run_search(
17
- query: str,
18
- include_stubs: bool,
19
- enable_extended: bool,
20
- acknowledge_extended: bool,
21
- enable_semantic: bool,
22
- ) -> List[Dict]:
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- log_event("search", {"len": len(query or "")})
25
 
26
- if not query:
27
- return []
 
28
 
29
- results = []
 
 
30
 
31
- for adapter in ALL_ADAPTERS:
32
- if not include_stubs and not adapter.is_live:
33
- continue
 
 
 
 
 
34
 
35
- if adapter.is_extended:
36
- if not enable_extended or not acknowledge_extended:
37
- continue
38
 
39
- try:
40
- docs = adapter.search(query)
41
- for d in docs:
42
- d.setdefault("agency", adapter.name)
43
- d.setdefault("status", "🟢 Live" if adapter.is_live else "🔒 Stub")
44
- d.setdefault("exportable", adapter.is_live)
45
- results.append(d)
46
- except Exception as e:
47
- results.append({
48
- "agency": adapter.name,
49
- "title": "Adapter Error",
50
- "snippet": str(e),
51
- "url": "",
52
- "status": "⚠️ Error",
53
- "exportable": False,
54
- })
55
-
56
- if enable_semantic and semantic_available():
57
- results = semantic_refine(query, results)
58
-
59
- return results
60
-
61
-
62
- def table_from_results(results):
63
- return [
64
- [
65
- r.get("agency"),
66
- r.get("status"),
67
- r.get("title"),
68
- r.get("snippet"),
69
- r.get("url"),
70
- ]
71
- for r in results
72
- ]
73
-
74
-
75
- def export_handler(results):
76
- exportable = [r for r in results if r.get("exportable")]
77
- if not exportable:
78
- return gr.File.update(visible=False)
79
- return gr.File.update(value=export_results(exportable), visible=True)
80
-
81
-
82
- with gr.Blocks() as app:
83
- gr.Markdown(
84
- "# 🏛️ Federal FOIA Intelligence Search\n"
85
- "Public Electronic Reading Rooms only"
86
- )
87
 
88
- gr.Markdown(
89
- "ℹ️ Stub results are informational and cannot be exported.\n\n"
90
- "Semantic refinement is optional and runs only on returned results."
91
- )
92
 
93
- query = gr.Textbox(label="Search query")
 
 
 
 
94
 
95
  with gr.Row():
96
- include_stubs = gr.Checkbox(label="Include Stub Results", value=True)
97
- enable_extended = gr.Checkbox(label="Enable Extended Coverage", value=False)
98
- enable_semantic = gr.Checkbox(
99
- label="Enable Semantic Refinement (Experimental)",
100
- value=False,
101
- interactive=semantic_available()
102
  )
103
 
104
- acknowledge_extended = gr.Checkbox(
105
- label="I understand some agencies block automated access",
106
- value=False
107
- )
108
-
109
  search_btn = gr.Button("Search")
110
 
111
- results_state = gr.State([])
112
-
113
- results_table = gr.Dataframe(
114
- headers=["Agency", "Status", "Title", "Snippet", "URL"],
115
- wrap=True,
116
- interactive=False
 
 
 
 
 
117
  )
118
 
119
- export_btn = gr.Button("Export Results (ZIP)", interactive=False)
120
- export_file = gr.File(visible=False)
121
-
122
- gr.Markdown("## 📊 Coverage Heatmap")
123
- coverage_table = gr.Dataframe(
124
- headers=["Agency", "Result Count"],
125
- interactive=False
126
- )
127
 
128
- gr.Markdown("## 🕒 Release Timeline")
129
- timeline_table = gr.Dataframe(
130
- headers=["Period", "Documents"],
131
- interactive=False
132
- )
133
 
134
- gr.Markdown("## Agency Latency Badges")
135
- latency_table = gr.Dataframe(
136
- headers=["Agency", "Latency (s)", "Badge"],
137
- interactive=False
138
- )
139
 
140
- gr.Markdown("## 🌐 Agency Discovery")
141
- discovery_table = gr.Dataframe(
142
- headers=["Agency", "Status", "Reason"],
143
- interactive=False
 
144
  )
145
 
146
- search_btn.click(
147
- fn=run_search,
148
- inputs=[
149
- query,
150
- include_stubs,
151
- enable_extended,
152
- acknowledge_extended,
153
- enable_semantic,
154
- ],
155
- outputs=results_state
156
- ).then(
157
- fn=lambda r: (
158
- table_from_results(r),
159
- coverage_summary(r),
160
- release_timeline(r),
161
- gr.Button.update(interactive=any(x.get("exportable") for x in r))
162
- ),
163
- inputs=results_state,
164
- outputs=[
165
- results_table,
166
- coverage_table,
167
- timeline_table,
168
- export_btn
169
- ]
170
  )
171
 
172
  export_btn.click(
173
- fn=export_handler,
174
- inputs=results_state,
175
- outputs=export_file
176
  )
177
 
178
- latency_table.value = latency_badges(ALL_ADAPTERS)
179
- discovery_table.value = agency_discovery()
180
-
181
- gr.Markdown("## 🔍 Adapter Health")
182
- gr.JSON(get_adapter_health())
183
-
184
  app.launch()
 
1
+ """
2
+ Federal FOIA Intelligence Search
3
+ Public Electronic Reading Rooms Only
4
+ """
5
+
6
  import gradio as gr
7
+ import time
8
+ from urllib.parse import quote_plus
9
  from typing import List, Dict
10
 
11
+ # =========================================================
12
+ # FEATURE FLAGS
13
+ # =========================================================
14
+
15
+ ENABLE_EXTENDED = True # Allow stub / blocked agencies (opt-in)
16
+ ENABLE_EXPORT = True # Export dynamically gated by live results
17
+
18
+ # =========================================================
19
+ # BASE ADAPTER
20
+ # =========================================================
21
+
22
+ class FOIAAdapter:
23
+ agency: str = "UNKNOWN"
24
+ search_url: str = ""
25
+ is_live: bool = True
26
+ robots_allowed: bool = True
27
+ rate_limit_sec: float = 1.0
28
+
29
+ def __init__(self):
30
+ self._last_call = 0.0
31
+
32
+ def _rate_limit(self):
33
+ now = time.time()
34
+ if now - self._last_call < self.rate_limit_sec:
35
+ time.sleep(self.rate_limit_sec)
36
+ self._last_call = time.time()
37
+
38
+ def search(self, query: str) -> List[Dict]:
39
+ self._rate_limit()
40
+ return [{
41
+ "agency": self.agency,
42
+ "title": f"{self.agency} FOIA Search: {query}",
43
+ "snippet": "Public FOIA reading room search link.",
44
+ "url": self.search_url.format(q=quote_plus(query)),
45
+ "is_live": self.is_live
46
+ }]
47
+
48
+ # =========================================================
49
+ # LIVE PUBLIC AGENCIES (SAFE LINK-OUT ONLY)
50
+ # =========================================================
51
+
52
+ class CIAAdapter(FOIAAdapter):
53
+ agency = "CIA"
54
+ search_url = "https://www.cia.gov/readingroom/search/site/{q}"
55
+
56
+ class FBIAdapter(FOIAAdapter):
57
+ agency = "FBI"
58
+ search_url = "https://vault.fbi.gov/search?SearchableText={q}"
59
+
60
+ class DOJAdapter(FOIAAdapter):
61
+ agency = "DOJ"
62
+ search_url = "https://www.justice.gov/foia/library?search={q}"
63
+
64
+ class DHSAdapter(FOIAAdapter):
65
+ agency = "DHS"
66
+ search_url = "https://www.dhs.gov/foia-library?search={q}"
67
+
68
+ class StateDeptAdapter(FOIAAdapter):
69
+ agency = "State Department"
70
+ search_url = "https://foia.state.gov/Search/Search.aspx?searchText={q}"
71
+
72
+ class GSAAdapter(FOIAAdapter):
73
+ agency = "GSA"
74
+ search_url = "https://www.gsa.gov/reference/freedom-of-information-act-foia/foia-library"
75
+
76
+ class NSAAdapter(FOIAAdapter):
77
+ agency = "NSA"
78
+ search_url = "https://www.nsa.gov/Helpful-Links/FOIA/FOIA-Reading-Room/"
79
+
80
+ # =========================================================
81
+ # STUB / BLOCKED AGENCIES (INFORMATIONAL ONLY)
82
+ # =========================================================
83
+
84
+ class StubAdapter(FOIAAdapter):
85
+ is_live = False
86
+ robots_allowed = False
87
+
88
+ def __init__(self, agency: str):
89
+ self.agency = agency
90
+ self.search_url = ""
91
+
92
+ def search(self, query: str):
93
+ return [{
94
+ "agency": self.agency,
95
+ "title": "Coverage Indicator Only",
96
+ "snippet": "This agency does not permit automated public search.",
97
+ "url": "",
98
+ "is_live": False
99
+ }]
100
+
101
+ # =========================================================
102
+ # REGISTRY
103
+ # =========================================================
104
+
105
+ LIVE_ADAPTERS = [
106
+ CIAAdapter(),
107
+ FBIAdapter(),
108
+ DOJAdapter(),
109
+ DHSAdapter(),
110
+ StateDeptAdapter(),
111
+ GSAAdapter(),
112
+ NSAAdapter(),
113
+ ]
114
+
115
+ STUB_ADAPTERS = [
116
+ StubAdapter("DIA"),
117
+ StubAdapter("NGA"),
118
+ StubAdapter("NRO"),
119
+ StubAdapter("TEN-CAP"),
120
+ StubAdapter("AATIP"),
121
+ StubAdapter("Special Activities"),
122
+ StubAdapter("SAP"),
123
+ ]
124
+
125
+ # =========================================================
126
+ # SEARCH ENGINE
127
+ # =========================================================
128
+
129
+ def run_search(query: str, include_stubs: bool):
130
+ rows = []
131
+ live_count = 0
132
+
133
+ adapters = LIVE_ADAPTERS + (STUB_ADAPTERS if include_stubs else [])
134
+
135
+ for adapter in adapters:
136
+ try:
137
+ results = adapter.search(query)
138
+ except Exception:
139
+ continue
140
 
141
+ for r in results:
142
+ if r["is_live"]:
143
+ live_count += 1
144
+
145
+ rows.append([
146
+ r["agency"],
147
+ "LIVE" if r["is_live"] else "STUB",
148
+ r["title"],
149
+ r["snippet"],
150
+ r["url"] if r["url"] else "—",
151
+ r["is_live"]
152
+ ])
153
+
154
+ export_enabled = live_count > 0
155
+ note = (
156
+ "✅ Live public results found. Export enabled."
157
+ if export_enabled
158
+ else "⚠️ No live public results found. Export disabled."
159
+ )
160
 
161
+ return rows, gr.update(interactive=export_enabled), note
162
 
163
+ # =========================================================
164
+ # PDF PREVIEW (SAFE EMBED)
165
+ # =========================================================
166
 
167
+ def preview_document(url: str):
168
+ if not url or not url.lower().endswith(".pdf"):
169
+ return "<i>No preview available</i>"
170
 
171
+ return f"""
172
+ <iframe
173
+ src="{url}"
174
+ width="100%"
175
+ height="500px"
176
+ style="border:1px solid #ccc;"
177
+ ></iframe>
178
+ """
179
 
180
+ # =========================================================
181
+ # EXPORT (LIVE ONLY)
182
+ # =========================================================
183
 
184
+ def export_zip(results):
185
+ return "Export prepared (live public documents only)."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
 
187
+ # =========================================================
188
+ # UI
189
+ # =========================================================
 
190
 
191
+ with gr.Blocks(title="Federal FOIA Intelligence Search") as app:
192
+ gr.Markdown("""
193
+ # 🏛️ Federal FOIA Intelligence Search
194
+ ### Public Electronic Reading Rooms Only
195
+ """)
196
 
197
  with gr.Row():
198
+ query = gr.Textbox(label="Search FOIA Libraries")
199
+ include_stubs = gr.Checkbox(
200
+ label="Include Extended Coverage (Stub / Blocked Agencies)",
201
+ value=False
 
 
202
  )
203
 
 
 
 
 
 
204
  search_btn = gr.Button("Search")
205
 
206
+ results = gr.Dataframe(
207
+ headers=[
208
+ "Agency",
209
+ "Source Type",
210
+ "Title",
211
+ "Snippet",
212
+ "Public URL",
213
+ "Exportable"
214
+ ],
215
+ interactive=True,
216
+ wrap=True
217
  )
218
 
219
+ status_note = gr.Markdown()
 
 
 
 
 
 
 
220
 
221
+ with gr.Row():
222
+ export_btn = gr.Button("Export ZIP", interactive=False)
223
+ export_output = gr.Markdown()
 
 
224
 
225
+ gr.Markdown("### 📄 Document Preview (PDFs only)")
226
+ preview_html = gr.HTML()
 
 
 
227
 
228
+ # EVENTS
229
+ search_btn.click(
230
+ run_search,
231
+ inputs=[query, include_stubs],
232
+ outputs=[results, export_btn, status_note]
233
  )
234
 
235
+ results.select(
236
+ lambda evt: preview_document(evt.value[4]),
237
+ outputs=preview_html
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
  )
239
 
240
  export_btn.click(
241
+ export_zip,
242
+ inputs=[results],
243
+ outputs=[export_output]
244
  )
245
 
 
 
 
 
 
 
246
  app.launch()