GodsDevProject commited on
Commit
7005b69
·
verified ·
1 Parent(s): 7a6f3d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -155
app.py CHANGED
@@ -1,246 +1,203 @@
1
- """
2
- Federal FOIA Intelligence Search
3
- Public Electronic Reading Rooms Only
4
- """
5
-
6
  import gradio as gr
7
  import time
 
 
8
  from urllib.parse import quote_plus
9
- from typing import List, Dict
10
-
11
- # =========================================================
12
- # FEATURE FLAGS
13
- # =========================================================
14
 
15
- ENABLE_EXTENDED = True # Allow stub / blocked agencies (opt-in)
16
- ENABLE_EXPORT = True # Export dynamically gated by live results
17
-
18
- # =========================================================
19
  # BASE ADAPTER
20
- # =========================================================
21
 
22
  class FOIAAdapter:
23
- agency: str = "UNKNOWN"
24
- search_url: str = ""
25
- is_live: bool = True
26
- robots_allowed: bool = True
27
- rate_limit_sec: float = 1.0
28
-
29
- def __init__(self):
30
- self._last_call = 0.0
31
-
32
- def _rate_limit(self):
33
- now = time.time()
34
- if now - self._last_call < self.rate_limit_sec:
35
- time.sleep(self.rate_limit_sec)
36
- self._last_call = time.time()
37
-
38
- def search(self, query: str) -> List[Dict]:
39
- self._rate_limit()
40
  return [{
41
  "agency": self.agency,
42
- "title": f"{self.agency} FOIA Search: {query}",
43
- "snippet": "Public FOIA reading room search link.",
44
- "url": self.search_url.format(q=quote_plus(query)),
45
- "is_live": self.is_live
 
46
  }]
47
 
48
- # =========================================================
49
- # LIVE PUBLIC AGENCIES (SAFE LINK-OUT ONLY)
50
- # =========================================================
51
 
52
- class CIAAdapter(FOIAAdapter):
53
  agency = "CIA"
54
  search_url = "https://www.cia.gov/readingroom/search/site/{q}"
55
 
56
- class FBIAdapter(FOIAAdapter):
57
  agency = "FBI"
58
  search_url = "https://vault.fbi.gov/search?SearchableText={q}"
59
 
60
- class DOJAdapter(FOIAAdapter):
61
  agency = "DOJ"
62
  search_url = "https://www.justice.gov/foia/library?search={q}"
63
 
64
- class DHSAdapter(FOIAAdapter):
65
  agency = "DHS"
66
- search_url = "https://www.dhs.gov/foia-library?search={q}"
67
 
68
- class StateDeptAdapter(FOIAAdapter):
69
  agency = "State Department"
70
- search_url = "https://foia.state.gov/Search/Search.aspx?searchText={q}"
71
 
72
- class GSAAdapter(FOIAAdapter):
73
  agency = "GSA"
74
- search_url = "https://www.gsa.gov/reference/freedom-of-information-act-foia/foia-library"
75
 
76
- class NSAAdapter(FOIAAdapter):
77
  agency = "NSA"
78
- search_url = "https://www.nsa.gov/Helpful-Links/FOIA/FOIA-Reading-Room/"
 
 
 
 
79
 
80
- # =========================================================
81
- # STUB / BLOCKED AGENCIES (INFORMATIONAL ONLY)
82
- # =========================================================
83
 
84
  class StubAdapter(FOIAAdapter):
85
  is_live = False
86
- robots_allowed = False
87
 
88
- def __init__(self, agency: str):
89
  self.agency = agency
90
  self.search_url = ""
91
 
92
- def search(self, query: str):
93
  return [{
94
  "agency": self.agency,
95
- "title": "Coverage Indicator Only",
96
- "snippet": "This agency does not permit automated public search.",
97
  "url": "",
98
- "is_live": False
 
 
99
  }]
100
 
101
- # =========================================================
102
- # REGISTRY
103
- # =========================================================
104
-
105
- LIVE_ADAPTERS = [
106
- CIAAdapter(),
107
- FBIAdapter(),
108
- DOJAdapter(),
109
- DHSAdapter(),
110
- StateDeptAdapter(),
111
- GSAAdapter(),
112
- NSAAdapter(),
113
- ]
114
-
115
  STUB_ADAPTERS = [
116
  StubAdapter("DIA"),
117
  StubAdapter("NGA"),
118
  StubAdapter("NRO"),
119
  StubAdapter("TEN-CAP"),
120
  StubAdapter("AATIP"),
121
- StubAdapter("Special Activities"),
122
  StubAdapter("SAP"),
 
123
  ]
124
 
125
- # =========================================================
126
- # SEARCH ENGINE
127
- # =========================================================
128
 
129
- def run_search(query: str, include_stubs: bool):
130
- rows = []
131
- live_count = 0
132
 
133
- adapters = LIVE_ADAPTERS + (STUB_ADAPTERS if include_stubs else [])
 
 
 
 
 
134
 
135
- for adapter in adapters:
136
- try:
137
- results = adapter.search(query)
138
- except Exception:
139
- continue
140
 
141
- for r in results:
142
- if r["is_live"]:
143
- live_count += 1
144
 
 
 
145
  rows.append([
146
  r["agency"],
147
  "LIVE" if r["is_live"] else "STUB",
148
  r["title"],
149
- r["snippet"],
150
- r["url"] if r["url"] else "—",
151
- r["is_live"]
 
152
  ])
153
 
154
- export_enabled = live_count > 0
155
- note = (
156
- "✅ Live public results found. Export enabled."
157
- if export_enabled
158
- else "⚠️ No live public results found. Export disabled."
159
- )
160
-
161
- return rows, gr.update(interactive=export_enabled), note
162
 
163
- # =========================================================
164
- # PDF PREVIEW (SAFE EMBED)
165
- # =========================================================
166
 
167
- def preview_document(url: str):
168
- if not url or not url.lower().endswith(".pdf"):
169
- return "<i>No preview available</i>"
170
 
171
- return f"""
172
- <iframe
173
- src="{url}"
174
- width="100%"
175
- height="500px"
176
- style="border:1px solid #ccc;"
177
- ></iframe>
178
- """
179
 
180
- # =========================================================
181
- # EXPORT (LIVE ONLY)
182
- # =========================================================
183
 
184
- def export_zip(results):
185
- return "Export prepared (live public documents only)."
186
 
187
- # =========================================================
188
  # UI
189
- # =========================================================
190
 
191
  with gr.Blocks(title="Federal FOIA Intelligence Search") as app:
192
- gr.Markdown("""
193
- # 🏛️ Federal FOIA Intelligence Search
194
- ### Public Electronic Reading Rooms Only
195
- """)
196
-
197
- with gr.Row():
198
- query = gr.Textbox(label="Search FOIA Libraries")
199
- include_stubs = gr.Checkbox(
200
- label="Include Extended Coverage (Stub / Blocked Agencies)",
201
- value=False
202
- )
 
 
 
 
203
 
204
  search_btn = gr.Button("Search")
205
 
206
  results = gr.Dataframe(
207
  headers=[
208
  "Agency",
209
- "Source Type",
210
  "Title",
211
- "Snippet",
212
- "Public URL",
213
- "Exportable"
 
214
  ],
215
- interactive=True,
216
- wrap=True
217
  )
218
 
219
- status_note = gr.Markdown()
220
-
221
- with gr.Row():
222
- export_btn = gr.Button("Export ZIP", interactive=False)
223
- export_output = gr.Markdown()
224
 
225
- gr.Markdown("### 📄 Document Preview (PDFs only)")
226
- preview_html = gr.HTML()
227
 
228
- # EVENTS
229
  search_btn.click(
230
  run_search,
231
  inputs=[query, include_stubs],
232
- outputs=[results, export_btn, status_note]
233
  )
234
 
235
  results.select(
236
- lambda evt: preview_document(evt.value[4]),
237
- outputs=preview_html
238
- )
239
-
240
- export_btn.click(
241
- export_zip,
242
- inputs=[results],
243
- outputs=[export_output]
244
  )
245
 
246
  app.launch()
 
 
 
 
 
 
1
  import gradio as gr
2
  import time
3
+ import hashlib
4
+ from datetime import datetime
5
  from urllib.parse import quote_plus
 
 
 
 
 
6
 
7
+ # ======================================================
 
 
 
8
  # BASE ADAPTER
9
+ # ======================================================
10
 
11
  class FOIAAdapter:
12
+ agency = "UNKNOWN"
13
+ search_url = ""
14
+ is_live = True
15
+
16
+ def search(self, query):
17
+ start = time.time()
18
+ url = self.search_url.format(q=quote_plus(query))
19
+ latency = round(time.time() - start, 3)
20
+
 
 
 
 
 
 
 
 
21
  return [{
22
  "agency": self.agency,
23
+ "title": f"{self.agency} FOIA Search Results",
24
+ "url": url,
25
+ "latency": latency,
26
+ "is_live": self.is_live,
27
+ "timestamp": datetime.utcnow().isoformat()
28
  }]
29
 
30
+ # ======================================================
31
+ # LIVE AGENCIES (SAFE, PUBLIC FOIA LIBRARIES)
32
+ # ======================================================
33
 
34
+ class CIA(FOIAAdapter):
35
  agency = "CIA"
36
  search_url = "https://www.cia.gov/readingroom/search/site/{q}"
37
 
38
+ class FBI(FOIAAdapter):
39
  agency = "FBI"
40
  search_url = "https://vault.fbi.gov/search?SearchableText={q}"
41
 
42
+ class DOJ(FOIAAdapter):
43
  agency = "DOJ"
44
  search_url = "https://www.justice.gov/foia/library?search={q}"
45
 
46
+ class DHS(FOIAAdapter):
47
  agency = "DHS"
48
+ search_url = "https://www.dhs.gov/foia-library/search?search={q}"
49
 
50
+ class STATE(FOIAAdapter):
51
  agency = "State Department"
52
+ search_url = "https://foia.state.gov/Search/Search.aspx?q={q}"
53
 
54
+ class GSA(FOIAAdapter):
55
  agency = "GSA"
56
+ search_url = "https://www.gsa.gov/reference/freedom-of-information-act-foia/foia-library?search={q}"
57
 
58
+ class NSA(FOIAAdapter):
59
  agency = "NSA"
60
+ search_url = "https://www.nsa.gov/resources/everyone/foia/reading-room/?q={q}"
61
+
62
+ LIVE_ADAPTERS = [
63
+ CIA(), FBI(), DOJ(), DHS(), STATE(), GSA(), NSA()
64
+ ]
65
 
66
+ # ======================================================
67
+ # STUB ADAPTERS (OPT-IN, NON-EXPORTABLE)
68
+ # ======================================================
69
 
70
  class StubAdapter(FOIAAdapter):
71
  is_live = False
 
72
 
73
+ def __init__(self, agency):
74
  self.agency = agency
75
  self.search_url = ""
76
 
77
+ def search(self, query):
78
  return [{
79
  "agency": self.agency,
80
+ "title": "Extended coverage indicator only (STUB)",
 
81
  "url": "",
82
+ "latency": None,
83
+ "is_live": False,
84
+ "timestamp": None
85
  }]
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  STUB_ADAPTERS = [
88
  StubAdapter("DIA"),
89
  StubAdapter("NGA"),
90
  StubAdapter("NRO"),
91
  StubAdapter("TEN-CAP"),
92
  StubAdapter("AATIP"),
 
93
  StubAdapter("SAP"),
94
+ StubAdapter("Special Activities"),
95
  ]
96
 
97
+ # ======================================================
98
+ # UTILITIES
99
+ # ======================================================
100
 
101
+ def citation_hash(record):
102
+ raw = f"{record['agency']}{record['url']}{record['timestamp']}"
103
+ return hashlib.sha256(raw.encode()).hexdigest()[:16]
104
 
105
+ def bluebook(record):
106
+ return (
107
+ f"{record['agency']}, {record['title']}, "
108
+ f"FOIA Electronic Reading Room, {record['url']} "
109
+ f"(retrieved {datetime.utcnow().strftime('%b %d, %Y')})."
110
+ )
111
 
112
+ # ======================================================
113
+ # SEARCH HANDLER
114
+ # ======================================================
 
 
115
 
116
+ def run_search(query, include_stubs):
117
+ adapters = LIVE_ADAPTERS + (STUB_ADAPTERS if include_stubs else [])
118
+ rows = []
119
 
120
+ for adapter in adapters:
121
+ for r in adapter.search(query):
122
  rows.append([
123
  r["agency"],
124
  "LIVE" if r["is_live"] else "STUB",
125
  r["title"],
126
+ r["url"],
127
+ r["latency"],
128
+ citation_hash(r) if r["is_live"] else "",
129
+ bluebook(r) if r["is_live"] else "Not exportable (STUB)"
130
  ])
131
 
132
+ export_enabled = any(row[1] == "LIVE" for row in rows)
 
 
 
 
 
 
 
133
 
134
+ return rows, gr.update(interactive=export_enabled)
 
 
135
 
136
+ # ======================================================
137
+ # PREVIEW HANDLER
138
+ # ======================================================
139
 
140
+ def preview_selected(row):
141
+ if not row:
142
+ return "<i>Select a result to preview</i>"
 
 
 
 
 
143
 
144
+ url = row[3]
145
+ if isinstance(url, str) and url.lower().endswith(".pdf"):
146
+ return f"<iframe src='{url}' width='100%' height='520'></iframe>"
147
 
148
+ return "<i>No inline preview available (non-PDF or external page)</i>"
 
149
 
150
+ # ======================================================
151
  # UI
152
+ # ======================================================
153
 
154
  with gr.Blocks(title="Federal FOIA Intelligence Search") as app:
155
+ gr.Markdown(
156
+ """
157
+ # 🏛️ Federal FOIA Intelligence Search
158
+ **Public Electronic Reading Rooms Only**
159
+
160
+ - LIVE results are exportable and citation-ready
161
+ - STUB results are informational only and cannot be exported
162
+ """
163
+ )
164
+
165
+ query = gr.Textbox(label="Search FOIA Libraries")
166
+ include_stubs = gr.Checkbox(
167
+ label="Include Extended Coverage (STUB — non-exportable)",
168
+ value=False
169
+ )
170
 
171
  search_btn = gr.Button("Search")
172
 
173
  results = gr.Dataframe(
174
  headers=[
175
  "Agency",
176
+ "Type",
177
  "Title",
178
+ "URL",
179
+ "Latency (s)",
180
+ "Citation Hash",
181
+ "Bluebook Citation"
182
  ],
183
+ interactive=True
 
184
  )
185
 
186
+ export_note = gr.Markdown(
187
+ "*Stub results are informational and cannot be exported.*"
188
+ )
 
 
189
 
190
+ preview_panel = gr.HTML()
 
191
 
 
192
  search_btn.click(
193
  run_search,
194
  inputs=[query, include_stubs],
195
+ outputs=[results, export_note]
196
  )
197
 
198
  results.select(
199
+ fn=lambda e: preview_selected(e.value),
200
+ outputs=preview_panel
 
 
 
 
 
 
201
  )
202
 
203
  app.launch()