GodsDevProject commited on
Commit
2826d28
ยท
verified ยท
1 Parent(s): 9ee357c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +139 -86
app.py CHANGED
@@ -5,90 +5,91 @@ from typing import List, Dict
5
  from collections import defaultdict
6
 
7
  ###############################################################################
8
- # GLOBAL STATE
9
  ###############################################################################
10
 
11
  RESULT_CACHE: List[Dict] = []
12
- SELECTED_INDEX = 0
13
 
14
  ###############################################################################
15
- # ROBOTS.TXT POLICY
16
  ###############################################################################
17
 
18
  AGENCY_POLICY = {
19
- "CIA": {"robots_allowed": True, "reason": "Explicit public FOIA reading room"},
20
- "ODNI": {"robots_allowed": True, "reason": "Public PDF hosting"},
21
- "USAF": {"robots_allowed": True, "reason": "National Archives mirror"},
22
- "NSA": {"robots_allowed": False, "reason": "robots.txt disallows crawling"},
 
 
 
23
  }
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  ###############################################################################
26
  # FOIA EXEMPTION CLASSIFIER
27
  ###############################################################################
28
 
29
  B_CODES = {
30
  "b(1)": "National Security",
31
- "b(3)": "Statutory Exemption",
32
  "b(5)": "Deliberative Process",
33
  "b(7)": "Law Enforcement",
34
  }
35
 
36
-
37
  def classify_exemptions(text: str) -> Dict:
38
- found = []
39
- for code, desc in B_CODES.items():
40
- if code in text.lower():
41
- found.append(code)
42
 
43
- confidence = round(min(1.0, len(found) * 0.3), 2)
44
-
45
- return {
46
- "codes": found,
47
- "confidence": confidence,
48
- }
49
 
 
 
 
50
 
51
  ###############################################################################
52
- # SEMANTIC CLUSTERING (HF SAFE)
53
  ###############################################################################
54
 
55
- def semantic_cluster(results: List[Dict]) -> Dict[str, List[Dict]]:
56
  clusters = defaultdict(list)
57
-
58
  for r in results:
59
- title = r["title"].lower()
60
- if "uap" in title or "aerial" in title:
61
  clusters["๐Ÿ›ธ UAP / Aerial Phenomena"].append(r)
62
- elif "intelligence" in title:
63
  clusters["๐Ÿง  Intelligence Activities"].append(r)
64
  else:
65
  clusters["๐Ÿ“„ General Records"].append(r)
66
-
67
  return clusters
68
 
69
-
70
  ###############################################################################
71
- # UTILITIES
72
  ###############################################################################
73
 
74
- def highlight(text: str, query: str) -> str:
75
- if not query:
76
- return text
77
- pattern = re.compile(re.escape(query), re.IGNORECASE)
78
- return pattern.sub(r"<mark>\g<0></mark>", text)
79
-
80
-
81
- def redaction_score(text: str) -> float:
82
- hits = sum(k in text.lower() for k in ["redact", "withheld", "b("])
83
- return round(min(1.0, hits * 0.25), 2)
84
-
85
-
86
- ###############################################################################
87
- # MOCK LIVE QUERY (REPLACE ADAPTERS LATER)
88
- ###############################################################################
89
-
90
- def run_query(query: str) -> List[Dict]:
91
- time.sleep(0.4)
92
 
93
  raw = [
94
  {
@@ -114,89 +115,127 @@ def run_query(query: str) -> List[Dict]:
114
  },
115
  ]
116
 
117
- # Robots enforcement
118
  allowed = []
119
  for r in raw:
120
  policy = AGENCY_POLICY.get(r["agency"], {})
121
- if policy.get("robots_allowed", False):
122
- allowed.append(r)
 
 
 
123
 
124
  return allowed
125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
  ###############################################################################
128
  # RENDERERS
129
  ###############################################################################
130
 
131
- def render_clusters(results: List[Dict], query: str) -> str:
132
- clusters = semantic_cluster(results)
133
  blocks = []
134
 
135
- for cluster, items in clusters.items():
136
- section = [f"## {cluster}"]
137
- for i, r in enumerate(items):
138
- idx = RESULT_CACHE.index(r)
139
- exemptions = classify_exemptions(r["snippet"])
140
 
141
  section.append(
142
  f"""
143
  **{highlight(r['title'], query)}**
144
- ๐Ÿ›๏ธ {r['agency']} ยท ๐Ÿ›ก๏ธ Redaction {redaction_score(r['snippet'])}
145
- โš–๏ธ Exemptions: `{', '.join(exemptions['codes']) or 'None'}`
146
- ๐Ÿ”Ž Confidence: **{exemptions['confidence']}**
147
-
148
- ๐Ÿ”— [Source]({r['url']})
149
 
150
- โžก๏ธ Select index **{idx}**
 
151
  """
152
  )
153
-
154
  blocks.append("\n\n".join(section))
155
 
156
  return "\n\n---\n\n".join(blocks)
157
 
158
-
159
  def render_preview(index: int) -> str:
160
  if not RESULT_CACHE:
161
  return "_No document selected._"
162
 
163
  r = RESULT_CACHE[index]
164
- exemptions = classify_exemptions(r["snippet"])
 
 
 
 
 
165
 
166
  return f"""
167
- ### ๐Ÿ“„ Preview
168
  **{r['title']}**
169
  ๐Ÿ›๏ธ {r['agency']} ยท {r['source']}
170
 
171
  ๐Ÿ›ก๏ธ Redaction Risk: **{redaction_score(r['snippet'])}**
172
- โš–๏ธ FOIA Exemptions: `{', '.join(exemptions['codes']) or 'None'}`
173
- ๐Ÿ”Ž Confidence: **{exemptions['confidence']}**
174
 
175
- <iframe src="{r['url']}" width="100%" height="520px"
176
- style="border-radius:8px;border:1px solid #444;"></iframe>
177
  """
178
 
 
 
 
 
 
 
 
 
 
 
179
 
180
  ###############################################################################
181
  # EVENT HANDLERS
182
  ###############################################################################
183
 
184
- def search(query: str):
185
- global RESULT_CACHE, SELECTED_INDEX
186
- RESULT_CACHE = run_query(query)
187
- SELECTED_INDEX = 0
188
 
189
  return (
190
- render_clusters(RESULT_CACHE, query),
191
  render_preview(0),
 
 
192
  )
193
 
194
-
195
  def select_index(idx: int):
196
- global SELECTED_INDEX
197
- SELECTED_INDEX = idx
 
198
  return render_preview(idx)
199
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
  ###############################################################################
202
  # UI
@@ -205,29 +244,43 @@ def select_index(idx: int):
205
  with gr.Blocks(theme=gr.themes.Soft()) as app:
206
  gr.Markdown("# ๐Ÿ›๏ธ Federated FOIA Intelligence Search")
207
 
208
- query = gr.Textbox(label="Search")
209
  search_btn = gr.Button("๐Ÿ” Search")
210
 
211
  with gr.Row():
212
  with gr.Column(scale=5):
213
  results_md = gr.Markdown()
 
 
214
 
215
  with gr.Column(scale=7):
216
  preview_md = gr.Markdown()
217
-
218
- select_box = gr.Number(label="Select result index", precision=0)
 
 
219
 
220
  search_btn.click(
221
- fn=search,
222
  inputs=query,
223
- outputs=[results_md, preview_md],
224
  )
225
 
226
- select_box.change(
227
- fn=select_index,
228
- inputs=select_box,
229
  outputs=preview_md,
230
  )
231
 
 
 
 
 
 
 
 
 
 
 
232
  if __name__ == "__main__":
233
  app.launch()
 
5
  from collections import defaultdict
6
 
7
  ###############################################################################
8
+ # GLOBAL STATE (HF SAFE)
9
  ###############################################################################
10
 
11
  RESULT_CACHE: List[Dict] = []
12
+ CURRENT_INDEX = 0
13
 
14
  ###############################################################################
15
+ # AGENCY POLICY / ROBOTS / LIVE SAFETY
16
  ###############################################################################
17
 
18
  AGENCY_POLICY = {
19
+ "CIA": {"robots": True, "live": True},
20
+ "FBI": {"robots": True, "live": True},
21
+ "ODNI": {"robots": True, "live": True},
22
+ "USAF": {"robots": True, "live": True},
23
+ "NSA": {"robots": False, "live": False},
24
+ "NRO": {"robots": False, "live": False},
25
+ "SAP": {"robots": False, "live": False},
26
  }
27
 
28
+ ###############################################################################
29
+ # KILL SWITCH (AUTO + MANUAL SAFE)
30
+ ###############################################################################
31
+
32
+ class KillSwitch:
33
+ def __init__(self):
34
+ self.disabled = {}
35
+
36
+ def disable(self, agency: str, reason: str):
37
+ self.disabled[agency] = reason
38
+
39
+ def enabled(self, agency: str) -> bool:
40
+ return agency not in self.disabled
41
+
42
+ def reason(self, agency: str) -> str:
43
+ return self.disabled.get(agency, "")
44
+
45
+ KILL = KillSwitch()
46
+
47
  ###############################################################################
48
  # FOIA EXEMPTION CLASSIFIER
49
  ###############################################################################
50
 
51
  B_CODES = {
52
  "b(1)": "National Security",
53
+ "b(3)": "Statutory",
54
  "b(5)": "Deliberative Process",
55
  "b(7)": "Law Enforcement",
56
  }
57
 
 
58
  def classify_exemptions(text: str) -> Dict:
59
+ found = [k for k in B_CODES if k in text.lower()]
60
+ confidence = round(min(1.0, 0.3 * len(found)), 2)
61
+ return {"codes": found, "confidence": confidence}
 
62
 
63
+ ###############################################################################
64
+ # REDACTION SCORING
65
+ ###############################################################################
 
 
 
66
 
67
+ def redaction_score(text: str) -> float:
68
+ hits = sum(k in text.lower() for k in ["redact", "withheld", "b("])
69
+ return round(min(1.0, hits * 0.25), 2)
70
 
71
  ###############################################################################
72
+ # SEMANTIC CLUSTERING (HF SAFE HEURISTIC)
73
  ###############################################################################
74
 
75
+ def semantic_clusters(results: List[Dict]) -> Dict[str, List[Dict]]:
76
  clusters = defaultdict(list)
 
77
  for r in results:
78
+ t = r["title"].lower()
79
+ if "uap" in t or "aerial" in t:
80
  clusters["๐Ÿ›ธ UAP / Aerial Phenomena"].append(r)
81
+ elif "intelligence" in t:
82
  clusters["๐Ÿง  Intelligence Activities"].append(r)
83
  else:
84
  clusters["๐Ÿ“„ General Records"].append(r)
 
85
  return clusters
86
 
 
87
  ###############################################################################
88
+ # MOCK LIVE SEARCH (REPLACE WITH REAL ADAPTERS SAFELY)
89
  ###############################################################################
90
 
91
+ def run_search(query: str) -> List[Dict]:
92
+ time.sleep(0.3)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
  raw = [
95
  {
 
115
  },
116
  ]
117
 
 
118
  allowed = []
119
  for r in raw:
120
  policy = AGENCY_POLICY.get(r["agency"], {})
121
+ if not policy.get("robots", False):
122
+ continue
123
+ if not KILL.enabled(r["agency"]):
124
+ continue
125
+ allowed.append(r)
126
 
127
  return allowed
128
 
129
+ ###############################################################################
130
+ # TEXT UTILITIES
131
+ ###############################################################################
132
+
133
+ def highlight(text: str, query: str) -> str:
134
+ if not query:
135
+ return text
136
+ return re.sub(
137
+ re.escape(query),
138
+ lambda m: f"<mark>{m.group(0)}</mark>",
139
+ text,
140
+ flags=re.IGNORECASE,
141
+ )
142
 
143
  ###############################################################################
144
  # RENDERERS
145
  ###############################################################################
146
 
147
+ def render_results(results: List[Dict], query: str) -> str:
148
+ clusters = semantic_clusters(results)
149
  blocks = []
150
 
151
+ for name, items in clusters.items():
152
+ section = [f"## {name}"]
153
+ for idx, r in enumerate(items):
154
+ global_index = RESULT_CACHE.index(r)
155
+ ex = classify_exemptions(r["snippet"])
156
 
157
  section.append(
158
  f"""
159
  **{highlight(r['title'], query)}**
160
+ ๐Ÿ›๏ธ {r['agency']} ยท ๐Ÿ“Š Redaction {redaction_score(r['snippet'])}
161
+ โš–๏ธ Exemptions: `{', '.join(ex['codes']) or 'None'}` (conf {ex['confidence']})
 
 
 
162
 
163
+ ๐Ÿ”— {r['url']}
164
+ โžก๏ธ **Select #{global_index}**
165
  """
166
  )
 
167
  blocks.append("\n\n".join(section))
168
 
169
  return "\n\n---\n\n".join(blocks)
170
 
 
171
  def render_preview(index: int) -> str:
172
  if not RESULT_CACHE:
173
  return "_No document selected._"
174
 
175
  r = RESULT_CACHE[index]
176
+ ex = classify_exemptions(r["snippet"])
177
+
178
+ iframe = (
179
+ f'<iframe src="{r["url"]}" width="100%" height="520px" '
180
+ f'style="border:1px solid #444;border-radius:8px;"></iframe>'
181
+ )
182
 
183
  return f"""
184
+ ### ๐Ÿ“„ Document Preview
185
  **{r['title']}**
186
  ๐Ÿ›๏ธ {r['agency']} ยท {r['source']}
187
 
188
  ๐Ÿ›ก๏ธ Redaction Risk: **{redaction_score(r['snippet'])}**
189
+ โš–๏ธ FOIA Exemptions: `{', '.join(ex['codes']) or 'None'}`
190
+ ๐Ÿ”Ž Confidence: **{ex['confidence']}**
191
 
192
+ {iframe}
 
193
  """
194
 
195
+ def agency_coverage(results: List[Dict]) -> str:
196
+ counts = defaultdict(int)
197
+ for r in results:
198
+ counts[r["agency"]] += 1
199
+
200
+ rows = ["| Agency | Docs |", "|---|---|"]
201
+ for k, v in sorted(counts.items(), key=lambda x: -x[1]):
202
+ rows.append(f"| {k} | {v} |")
203
+
204
+ return "\n".join(rows)
205
 
206
  ###############################################################################
207
  # EVENT HANDLERS
208
  ###############################################################################
209
 
210
+ def do_search(query: str):
211
+ global RESULT_CACHE, CURRENT_INDEX
212
+ RESULT_CACHE = run_search(query)
213
+ CURRENT_INDEX = 0
214
 
215
  return (
216
+ render_results(RESULT_CACHE, query),
217
  render_preview(0),
218
+ agency_coverage(RESULT_CACHE),
219
+ 0,
220
  )
221
 
 
222
  def select_index(idx: int):
223
+ global CURRENT_INDEX
224
+ idx = int(max(0, min(idx, len(RESULT_CACHE) - 1)))
225
+ CURRENT_INDEX = idx
226
  return render_preview(idx)
227
 
228
+ def next_doc():
229
+ global CURRENT_INDEX
230
+ if CURRENT_INDEX < len(RESULT_CACHE) - 1:
231
+ CURRENT_INDEX += 1
232
+ return CURRENT_INDEX, render_preview(CURRENT_INDEX)
233
+
234
+ def prev_doc():
235
+ global CURRENT_INDEX
236
+ if CURRENT_INDEX > 0:
237
+ CURRENT_INDEX -= 1
238
+ return CURRENT_INDEX, render_preview(CURRENT_INDEX)
239
 
240
  ###############################################################################
241
  # UI
 
244
  with gr.Blocks(theme=gr.themes.Soft()) as app:
245
  gr.Markdown("# ๐Ÿ›๏ธ Federated FOIA Intelligence Search")
246
 
247
+ query = gr.Textbox(label="Search public FOIA reading rooms")
248
  search_btn = gr.Button("๐Ÿ” Search")
249
 
250
  with gr.Row():
251
  with gr.Column(scale=5):
252
  results_md = gr.Markdown()
253
+ with gr.Accordion("๐Ÿ—บ๏ธ Agency Coverage", open=False):
254
+ coverage_md = gr.Markdown()
255
 
256
  with gr.Column(scale=7):
257
  preview_md = gr.Markdown()
258
+ with gr.Row():
259
+ prev_btn = gr.Button("โฌ…๏ธ Prev")
260
+ next_btn = gr.Button("โžก๏ธ Next")
261
+ index_box = gr.Number(label="Selected index", precision=0)
262
 
263
  search_btn.click(
264
+ do_search,
265
  inputs=query,
266
+ outputs=[results_md, preview_md, coverage_md, index_box],
267
  )
268
 
269
+ index_box.change(
270
+ select_index,
271
+ inputs=index_box,
272
  outputs=preview_md,
273
  )
274
 
275
+ next_btn.click(
276
+ next_doc,
277
+ outputs=[index_box, preview_md],
278
+ )
279
+
280
+ prev_btn.click(
281
+ prev_doc,
282
+ outputs=[index_box, preview_md],
283
+ )
284
+
285
  if __name__ == "__main__":
286
  app.launch()