Firemedic15 commited on
Commit
079c60f
Β·
verified Β·
1 Parent(s): bbf5058

Upload 5 files

Browse files
Files changed (4) hide show
  1. app.py +61 -243
  2. brief.py +133 -237
  3. requirements.txt +0 -2
  4. tools.py +363 -212
app.py CHANGED
@@ -3,27 +3,24 @@ app.py β€” Multi-source OSINT Analyst Space
3
  Agentic loop powered by smolagents + HuggingFace Inference API.
4
 
5
  Required Space Secrets:
6
- ACLED_USERNAME β€” your myACLED account email (from https://developer.acleddata.com)
7
- ACLED_PASSWORD β€” your myACLED account password
8
  HF_TOKEN β€” HuggingFace token (for Inference API, set automatically in Spaces)
9
  """
10
 
11
  import os
12
- import time
13
  from datetime import datetime
14
- from typing import Optional
15
 
16
  import gradio as gr
17
  from smolagents import InferenceClientModel, ToolCallingAgent
18
 
19
- from tools import fetch_acled_events, fetch_rss_headlines, list_available_sources, fetch_travel_advisory
20
  from brief import (
21
  BRIEF_PROMPT_SCHEMA,
22
  ThreatBrief,
23
  parse_brief_from_llm,
24
  render_brief_html,
25
  )
26
- from export import generate_pdf
27
 
28
  # ---------------------------------------------------------------------------
29
  # Model + Agent setup
@@ -33,14 +30,13 @@ MODEL_ID = "Qwen/Qwen2.5-72B-Instruct" # Strong free model on HF Inference
33
 
34
  def build_agent() -> ToolCallingAgent:
35
  model = InferenceClientModel(
36
- model_id=MODEL_ID,
37
- token=os.environ.get("HF_TOKEN"),
38
- timeout=90, # per-call timeout in seconds
39
- )
40
  agent = ToolCallingAgent(
41
- tools=[fetch_acled_events, fetch_rss_headlines, list_available_sources, fetch_travel_advisory],
42
  model=model,
43
- max_steps=12,
44
  verbosity_level=1,
45
  )
46
  return agent
@@ -53,8 +49,8 @@ def build_agent() -> ToolCallingAgent:
53
  SYSTEM_PROMPT = """You are a professional OSINT intelligence analyst specializing
54
  in geopolitical conflict and security threat assessment. Your job is to:
55
 
56
- 1. Call the available tools to gather data from ACLED and RSS news sources.
57
- 2. Collect enough information to assess the security situation.
58
  3. Synthesize your findings into a structured threat brief.
59
 
60
  Always start by checking what sources are available if needed.
@@ -64,82 +60,52 @@ Be thorough β€” use multiple sources before drawing conclusions.
64
 
65
  def run_analysis(
66
  country: str,
67
- passport_country: str,
68
  rss_sources: list,
69
  days_back: int,
70
  progress=gr.Progress(),
71
- ) -> tuple:
72
  """
73
  Runs the agentic OSINT analysis loop and returns:
74
  - Structured HTML threat brief
75
  - Raw agent trace for transparency
76
- - ThreatBrief object (stored in gr.State for PDF export)
77
  """
78
- if not country or not str(country).strip():
79
- return "<p style='color:red'>Please select a country or region.</p>", "", None
80
- country = str(country).strip()
81
 
82
  progress(0.1, desc="Initializing agent...")
83
 
84
  sources_str = ",".join(rss_sources) if rss_sources else "reuters_world,bbc_world"
85
 
86
- include_embassy = bool(passport_country and passport_country != "Not specified")
87
-
88
- embassy_instruction = (
89
- f"4. REQUIRED β€” Populate the 'embassy' JSON field with the {passport_country} embassy "
90
- f"or nearest consulate in '{country}'. Include: name, street address, main phone number, "
91
- f"after-hours emergency phone, and official website URL.\n"
92
- if include_embassy else ""
93
- )
94
- step_analyse = 5 if include_embassy else 4
95
- step_output = 6 if include_embassy else 5
96
-
97
  task = f"""
98
  Conduct an OSINT threat assessment for: {country}
99
- {f"Traveller passport country: {passport_country}" if include_embassy else ""}
100
 
101
  Instructions:
102
  1. Fetch ACLED armed conflict events for '{country}' over the last {days_back} days.
103
  2. Fetch recent RSS news headlines related to '{country}' from these sources: {sources_str}.
104
- 3. REQUIRED β€” Call fetch_travel_advisory for '{country}' and include the result in the travel_advisory fields.
105
- {embassy_instruction}{step_analyse}. Analyse all collected data carefully.
106
- {step_output}. Produce your final output as ONLY a JSON threat brief matching this schema:
107
 
108
  {BRIEF_PROMPT_SCHEMA}
109
 
110
  Today's date: {datetime.utcnow().strftime('%Y-%m-%d')}
111
  """
112
 
 
 
113
  progress(0.2, desc="Agent gathering OSINT data...")
114
 
115
- raw_output = None
116
- last_error = None
117
- for attempt in range(1, 4): # up to 3 attempts
118
- try:
119
- agent = build_agent()
120
- raw_output = agent.run(task, additional_args={"system_prompt": SYSTEM_PROMPT})
121
- break # success β€” exit retry loop
122
- except Exception as e:
123
- last_error = e
124
- err_str = str(e).lower()
125
- is_timeout = any(k in err_str for k in ("504", "timeout", "gateway", "timed out"))
126
- if is_timeout and attempt < 3:
127
- progress(0.2 + attempt * 0.1, desc=f"HF API timeout β€” retrying (attempt {attempt + 1}/3)...")
128
- time.sleep(5 * attempt) # 5s, then 10s back-off
129
- continue
130
- # Non-retryable error or final attempt β€” surface it
131
- error_html = f"""
132
- <div style='padding:20px;background:#fff3f3;border:1px solid #cc0000;border-radius:8px'>
133
- <strong>Analysis failed (attempt {attempt}/3):</strong><br>
134
- <code style='font-size:0.85em'>{e}</code><br><br>
135
- <em>If you see a 504 / gateway timeout, the HF Inference API is under heavy load.
136
- Wait a minute and try again, or reduce the number of selected news sources.</em>
137
- </div>
138
- """
139
- return error_html, str(e), None
140
-
141
- if raw_output is None:
142
- return "<p style='color:red'>Analysis failed after 3 attempts. Please try again later.</p>", str(last_error), None
143
 
144
  progress(0.85, desc="Parsing intelligence brief...")
145
 
@@ -153,10 +119,6 @@ Today's date: {datetime.utcnow().strftime('%Y-%m-%d')}
153
  confidence="Low",
154
  )
155
 
156
- # Stamp passport country so PDF / HTML renderer can use it even if LLM omitted it
157
- if passport_country and passport_country != "Not specified":
158
- brief.passport_country = passport_country
159
-
160
  progress(0.95, desc="Rendering brief...")
161
 
162
  html_output = render_brief_html(brief)
@@ -164,14 +126,14 @@ Today's date: {datetime.utcnow().strftime('%Y-%m-%d')}
164
  # Build a plain-text trace for the "Raw Trace" tab
165
  trace_lines = [f"=== OSINT Analysis: {country} ==="]
166
  trace_lines.append(f"Model: {MODEL_ID}")
167
- trace_lines.append(f"Sources: ACLED + {sources_str}")
168
  trace_lines.append(f"Days back: {days_back}")
169
  trace_lines.append(f"Date: {datetime.utcnow().strftime('%Y-%m-%d %H:%M UTC')}")
170
  trace_lines.append("\n--- Raw Agent Output ---")
171
  trace_lines.append(str(raw_output))
172
 
173
  progress(1.0, desc="Done.")
174
- return html_output, "\n".join(trace_lines), brief
175
 
176
 
177
  # ---------------------------------------------------------------------------
@@ -179,110 +141,21 @@ Today's date: {datetime.utcnow().strftime('%Y-%m-%d')}
179
  # ---------------------------------------------------------------------------
180
 
181
  RSS_SOURCE_OPTIONS = [
182
- # General world news
183
- ("BBC World", "bbc_world"),
184
- ("Al Jazeera", "al_jazeera"),
185
- ("France 24", "france24"),
186
- ("Euronews", "euronews"),
187
- ("NPR World", "npr_world"),
188
- ("Sky News", "sky_news"),
189
- ("UN News", "un_news"),
190
- ("Intl Business Times", "ibt"),
191
- # Regional: Middle East
192
- ("Middle East Eye", "middle_east_eye"),
193
- ("Al-Monitor", "al_monitor"),
194
- ("Arab News", "arab_news"),
195
- # Regional: Africa
196
- ("AllAfrica", "allafrica"),
197
- # Regional: Asia-Pacific
198
- ("Radio Free Asia", "radio_free_asia"),
199
- ("S. China Morning Post", "scmp"),
200
- # Regional: South Asia
201
- ("Dawn (Pakistan)", "dawn"),
202
- # Regional: Russia / E. Europe
203
- ("The Moscow Times", "moscow_times"),
204
- # OSINT / investigative
205
- ("Bellingcat", "bellingcat"),
206
- ("The Intercept", "the_intercept"),
207
- ("OCCRP", "occrp"),
208
- # Policy / security analysis
209
- ("Crisis Group", "crisis_group"),
210
- ("War on the Rocks", "war_on_rocks"),
211
- ("Just Security", "just_security"),
212
- ("Defense One", "defense_one"),
213
- ("The Cipher Brief", "cipher_brief"),
214
- ("Stimson Center", "stimson"),
215
- # Human rights
216
- ("Human Rights Watch", "hrw"),
217
- ("Amnesty Intl", "amnesty"),
218
- ]
219
-
220
- DESTINATION_COUNTRIES = [
221
- "Afghanistan", "Albania", "Algeria", "Andorra", "Angola", "Antigua and Barbuda",
222
- "Argentina", "Armenia", "Australia", "Austria", "Azerbaijan",
223
- "Bahamas", "Bahrain", "Bangladesh", "Barbados", "Belarus", "Belgium", "Belize",
224
- "Benin", "Bhutan", "Bolivia", "Bosnia and Herzegovina", "Botswana", "Brazil",
225
- "Brunei", "Bulgaria", "Burkina Faso", "Burundi",
226
- "Cabo Verde", "Cambodia", "Cameroon", "Canada", "Central African Republic", "Chad",
227
- "Chile", "China", "Colombia", "Comoros", "Congo (Republic)", "Congo (DRC)",
228
- "Costa Rica", "Croatia", "Cuba", "Cyprus", "Czech Republic",
229
- "Denmark", "Djibouti", "Dominica", "Dominican Republic",
230
- "Ecuador", "Egypt", "El Salvador", "Equatorial Guinea", "Eritrea", "Estonia",
231
- "Eswatini", "Ethiopia",
232
- "Fiji", "Finland", "France",
233
- "Gabon", "Gambia", "Georgia", "Germany", "Ghana", "Greece", "Grenada",
234
- "Guatemala", "Guinea", "Guinea-Bissau", "Guyana",
235
- "Haiti", "Honduras", "Hungary",
236
- "Iceland", "India", "Indonesia", "Iran", "Iraq", "Ireland", "Israel", "Italy",
237
- "Jamaica", "Japan", "Jordan",
238
- "Kazakhstan", "Kenya", "Kiribati", "Kosovo", "Kuwait", "Kyrgyzstan",
239
- "Laos", "Latvia", "Lebanon", "Lesotho", "Liberia", "Libya", "Liechtenstein",
240
- "Lithuania", "Luxembourg",
241
- "Madagascar", "Malawi", "Malaysia", "Maldives", "Mali", "Malta", "Marshall Islands",
242
- "Mauritania", "Mauritius", "Mexico", "Micronesia", "Moldova", "Monaco", "Mongolia",
243
- "Montenegro", "Morocco", "Mozambique", "Myanmar",
244
- "Namibia", "Nauru", "Nepal", "Netherlands", "New Zealand", "Nicaragua", "Niger",
245
- "Nigeria", "North Korea", "North Macedonia", "Norway",
246
- "Oman",
247
- "Pakistan", "Palau", "Palestine", "Panama", "Papua New Guinea", "Paraguay", "Peru",
248
- "Philippines", "Poland", "Portugal",
249
- "Qatar",
250
- "Romania", "Russia", "Rwanda",
251
- "Saint Kitts and Nevis", "Saint Lucia", "Saint Vincent and the Grenadines",
252
- "Samoa", "San Marino", "Sao Tome and Principe", "Saudi Arabia", "Senegal",
253
- "Serbia", "Seychelles", "Sierra Leone", "Singapore", "Slovakia", "Slovenia",
254
- "Solomon Islands", "Somalia", "South Africa", "South Korea", "South Sudan", "Spain",
255
- "Sri Lanka", "Sudan", "Suriname", "Sweden", "Switzerland", "Syria",
256
- "Taiwan", "Tajikistan", "Tanzania", "Thailand", "Timor-Leste", "Togo", "Tonga",
257
- "Trinidad and Tobago", "Tunisia", "Turkey", "Turkmenistan", "Tuvalu",
258
- "Uganda", "Ukraine", "United Arab Emirates", "United Kingdom", "United States",
259
- "Uruguay", "Uzbekistan",
260
- "Vanuatu", "Vatican City", "Venezuela", "Vietnam",
261
- "Yemen",
262
- "Zambia", "Zimbabwe",
263
- ]
264
-
265
- PASSPORT_COUNTRIES = [
266
- "Not specified",
267
- "Afghanistan", "Albania", "Algeria", "Argentina", "Australia", "Austria",
268
- "Bangladesh", "Belgium", "Bolivia", "Brazil", "Cambodia", "Canada", "Chile",
269
- "China", "Colombia", "Croatia", "Czech Republic", "Denmark", "Ecuador", "Egypt",
270
- "Ethiopia", "Finland", "France", "Germany", "Ghana", "Greece", "Guatemala",
271
- "Hungary", "India", "Indonesia", "Iran", "Iraq", "Ireland", "Israel", "Italy",
272
- "Japan", "Jordan", "Kazakhstan", "Kenya", "Kuwait", "Malaysia", "Mexico",
273
- "Morocco", "Nepal", "Netherlands", "New Zealand", "Nigeria", "Norway",
274
- "Pakistan", "Peru", "Philippines", "Poland", "Portugal", "Qatar",
275
- "Romania", "Russia", "Saudi Arabia", "Senegal", "Singapore", "South Africa",
276
- "South Korea", "Spain", "Sri Lanka", "Sudan", "Sweden", "Switzerland",
277
- "Taiwan", "Thailand", "Turkey", "Ukraine", "United Arab Emirates",
278
- "United Kingdom", "United States", "Venezuela", "Vietnam", "Zimbabwe",
279
  ]
280
 
281
  EXAMPLE_QUERIES = [
282
- ["Sudan", ["bbc_world", "al_jazeera", "middle_east_eye", "hrw"], 14],
283
- ["Myanmar", ["bbc_world", "crisis_group", "radio_free_asia", "hrw"], 21],
284
- ["Ukraine", ["bbc_world", "npr_world", "sky_news", "war_on_rocks"], 7],
285
- ["Haiti", ["bbc_world", "al_jazeera", "un_news", "amnesty"], 14],
286
  ]
287
 
288
  CSS = """
@@ -294,48 +167,29 @@ footer { display: none !important; }
294
 
295
  with gr.Blocks(title="OSINT Threat Analyst", css=CSS, theme=gr.themes.Soft()) as demo:
296
 
297
- brief_state = gr.State(None)
298
-
299
  gr.HTML("""
300
  <div style="text-align:center;padding:20px 0 10px 0">
301
- <h1 style="font-size:2em;margin:0">OSINT Threat Analyst</h1>
302
  <p style="color:#666;margin:6px 0 0 0">
303
- Agentic multi-source intelligence briefing Β· ACLED + RSS Β· State Dept advisories Β· Powered by HuggingFace
304
  </p>
305
  </div>
306
- <div style="text-align:center;background:#f0f4ff;border:1px solid #c7d4f0;border-radius:8px;padding:12px 24px;max-width:700px;margin:0 auto 16px auto;color:#1a2a5e;font-size:0.95em">
307
- Enter the country or region you are researching or travelling to, select your preferred news sources, and click <strong>Run Analysis</strong> to generate a full intelligence brief.
308
- </div>
309
  """)
310
 
311
  with gr.Row():
312
  with gr.Column(scale=1):
313
  gr.Markdown("### Configure Analysis")
314
 
315
- country_input = gr.Dropdown(
316
- choices=DESTINATION_COUNTRIES,
317
- value=None,
318
- label="Country / Region of Interest",
319
- info="Type to search and select the country you are researching or travelling to.",
320
- filterable=True,
321
- allow_custom_value=True,
322
  )
323
 
324
- passport_input = gr.Dropdown(
325
- choices=PASSPORT_COUNTRIES,
326
- value="Not specified",
327
- label="Passport Country",
328
- info="Select your passport country to include embassy and consulate information in the report.",
329
- filterable=True,
330
- )
331
-
332
- with gr.Row():
333
- select_all_btn = gr.Button("Deselect All", size="sm", scale=1)
334
-
335
  rss_sources = gr.CheckboxGroup(
336
  choices=RSS_SOURCE_OPTIONS,
337
- value=[v for _, v in RSS_SOURCE_OPTIONS], # all selected by default
338
- label="News Sources",
339
  )
340
 
341
  days_back = gr.Slider(
@@ -347,21 +201,26 @@ with gr.Blocks(title="OSINT Threat Analyst", css=CSS, theme=gr.themes.Soft()) as
347
  )
348
 
349
  analyze_btn = gr.Button(
350
- "Run Analysis",
351
  variant="primary",
352
  elem_id="analyze-btn",
353
  )
354
 
 
 
 
 
 
355
 
356
  with gr.Column(scale=2):
357
  with gr.Tabs():
358
- with gr.Tab("Threat Brief"):
359
  brief_output = gr.HTML(
360
  value="<div style='padding:40px;text-align:center;color:#999'>"
361
  "Configure your query and click Run Analysis.</div>"
362
  )
363
 
364
- with gr.Tab("Raw Agent Trace"):
365
  trace_output = gr.Textbox(
366
  label="Agent trace",
367
  lines=30,
@@ -369,57 +228,16 @@ with gr.Blocks(title="OSINT Threat Analyst", css=CSS, theme=gr.themes.Soft()) as
369
  placeholder="Agent reasoning and tool calls will appear here...",
370
  )
371
 
372
- with gr.Row():
373
- export_btn = gr.Button(
374
- "Export PDF Report",
375
- variant="secondary",
376
- interactive=False,
377
- scale=1,
378
- )
379
-
380
- pdf_file = gr.File(
381
- label="Download PDF Report",
382
- visible=False,
383
- interactive=False,
384
- )
385
-
386
  gr.Examples(
387
  examples=EXAMPLE_QUERIES,
388
  inputs=[country_input, rss_sources, days_back],
389
  label="Example Queries",
390
  )
391
 
392
- _ALL_SOURCE_KEYS = [v for _, v in RSS_SOURCE_OPTIONS]
393
-
394
- def toggle_sources(current_values):
395
- """Select all if any are unchecked; deselect all if all are checked."""
396
- if len(current_values) == len(_ALL_SOURCE_KEYS):
397
- return gr.update(value=[]), gr.update(value="Select All")
398
- return gr.update(value=_ALL_SOURCE_KEYS), gr.update(value="Deselect All")
399
-
400
- select_all_btn.click(
401
- fn=toggle_sources,
402
- inputs=[rss_sources],
403
- outputs=[rss_sources, select_all_btn],
404
- )
405
-
406
  analyze_btn.click(
407
  fn=run_analysis,
408
- inputs=[country_input, passport_input, rss_sources, days_back],
409
- outputs=[brief_output, trace_output, brief_state],
410
- ).then(
411
- fn=lambda b: gr.update(interactive=b is not None),
412
- inputs=[brief_state],
413
- outputs=[export_btn],
414
- )
415
-
416
- export_btn.click(
417
- fn=lambda b: generate_pdf(b) if b is not None else None,
418
- inputs=[brief_state],
419
- outputs=[pdf_file],
420
- ).then(
421
- fn=lambda: gr.update(visible=True),
422
- outputs=[pdf_file],
423
  )
424
 
425
  gr.HTML("""
 
3
  Agentic loop powered by smolagents + HuggingFace Inference API.
4
 
5
  Required Space Secrets:
6
+ ACLED_API_KEY β€” from https://developer.acleddata.com
7
+ ACLED_EMAIL β€” email used to register for ACLED access
8
  HF_TOKEN β€” HuggingFace token (for Inference API, set automatically in Spaces)
9
  """
10
 
11
  import os
 
12
  from datetime import datetime
 
13
 
14
  import gradio as gr
15
  from smolagents import InferenceClientModel, ToolCallingAgent
16
 
17
+ from tools import fetch_acled_events, fetch_rss_headlines, fetch_airspace_status, list_available_sources
18
  from brief import (
19
  BRIEF_PROMPT_SCHEMA,
20
  ThreatBrief,
21
  parse_brief_from_llm,
22
  render_brief_html,
23
  )
 
24
 
25
  # ---------------------------------------------------------------------------
26
  # Model + Agent setup
 
30
 
31
  def build_agent() -> ToolCallingAgent:
32
  model = InferenceClientModel(
33
+ model_id=MODEL_ID,
34
+ token=os.environ.get("HF_TOKEN"),
35
+ )
 
36
  agent = ToolCallingAgent(
37
+ tools=[fetch_acled_events, fetch_rss_headlines, fetch_airspace_status, list_available_sources],
38
  model=model,
39
+ max_steps=8,
40
  verbosity_level=1,
41
  )
42
  return agent
 
49
  SYSTEM_PROMPT = """You are a professional OSINT intelligence analyst specializing
50
  in geopolitical conflict and security threat assessment. Your job is to:
51
 
52
+ 1. Call the available tools to gather data from ACLED, RSS news sources, and airspace feeds.
53
+ 2. Collect enough information to assess the security situation, including airspace status.
54
  3. Synthesize your findings into a structured threat brief.
55
 
56
  Always start by checking what sources are available if needed.
 
60
 
61
  def run_analysis(
62
  country: str,
 
63
  rss_sources: list,
64
  days_back: int,
65
  progress=gr.Progress(),
66
+ ) -> tuple[str, str]:
67
  """
68
  Runs the agentic OSINT analysis loop and returns:
69
  - Structured HTML threat brief
70
  - Raw agent trace for transparency
 
71
  """
72
+ if not country.strip():
73
+ return "<p style='color:red'>Please enter a country or region.</p>", ""
 
74
 
75
  progress(0.1, desc="Initializing agent...")
76
 
77
  sources_str = ",".join(rss_sources) if rss_sources else "reuters_world,bbc_world"
78
 
 
 
 
 
 
 
 
 
 
 
 
79
  task = f"""
80
  Conduct an OSINT threat assessment for: {country}
 
81
 
82
  Instructions:
83
  1. Fetch ACLED armed conflict events for '{country}' over the last {days_back} days.
84
  2. Fetch recent RSS news headlines related to '{country}' from these sources: {sources_str}.
85
+ 3. Fetch airspace status for '{country}' using the airspace tool.
86
+ 4. Analyze all collected data carefully.
87
+ 5. Produce your final output as ONLY a JSON threat brief matching this schema:
88
 
89
  {BRIEF_PROMPT_SCHEMA}
90
 
91
  Today's date: {datetime.utcnow().strftime('%Y-%m-%d')}
92
  """
93
 
94
+ agent = build_agent()
95
+
96
  progress(0.2, desc="Agent gathering OSINT data...")
97
 
98
+ try:
99
+ raw_output = agent.run(task, additional_args={"system_prompt": SYSTEM_PROMPT})
100
+ except Exception as e:
101
+ error_html = f"""
102
+ <div style='padding:20px;background:#fff3f3;border:1px solid #ff0000;border-radius:8px'>
103
+ <strong>Analysis failed:</strong> {e}<br><br>
104
+ <em>Common causes: ACLED credentials missing, HF Inference API overloaded,
105
+ or model timeout. Check Space secrets and try again.</em>
106
+ </div>
107
+ """
108
+ return error_html, str(e)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
  progress(0.85, desc="Parsing intelligence brief...")
111
 
 
119
  confidence="Low",
120
  )
121
 
 
 
 
 
122
  progress(0.95, desc="Rendering brief...")
123
 
124
  html_output = render_brief_html(brief)
 
126
  # Build a plain-text trace for the "Raw Trace" tab
127
  trace_lines = [f"=== OSINT Analysis: {country} ==="]
128
  trace_lines.append(f"Model: {MODEL_ID}")
129
+ trace_lines.append(f"Sources: ACLED + Airspace + {sources_str}")
130
  trace_lines.append(f"Days back: {days_back}")
131
  trace_lines.append(f"Date: {datetime.utcnow().strftime('%Y-%m-%d %H:%M UTC')}")
132
  trace_lines.append("\n--- Raw Agent Output ---")
133
  trace_lines.append(str(raw_output))
134
 
135
  progress(1.0, desc="Done.")
136
+ return html_output, "\n".join(trace_lines)
137
 
138
 
139
  # ---------------------------------------------------------------------------
 
141
  # ---------------------------------------------------------------------------
142
 
143
  RSS_SOURCE_OPTIONS = [
144
+ ("Reuters World", "reuters_world"),
145
+ ("BBC World", "bbc_world"),
146
+ ("Al Jazeera", "al_jazeera"),
147
+ ("Bellingcat", "bellingcat"),
148
+ ("Crisis Group", "crisis_group"),
149
+ ("ACLED Blog", "acled_blog"),
150
+ ("UN News", "un_news"),
151
+ ("Foreign Policy", "foreign_policy"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  ]
153
 
154
  EXAMPLE_QUERIES = [
155
+ ["Sudan", ["reuters_world", "bbc_world", "al_jazeera"], 14],
156
+ ["Myanmar", ["reuters_world", "crisis_group", "bellingcat"], 21],
157
+ ["Ukraine", ["reuters_world", "bbc_world", "foreign_policy"], 7],
158
+ ["Haiti", ["reuters_world", "al_jazeera", "un_news"], 14],
159
  ]
160
 
161
  CSS = """
 
167
 
168
  with gr.Blocks(title="OSINT Threat Analyst", css=CSS, theme=gr.themes.Soft()) as demo:
169
 
 
 
170
  gr.HTML("""
171
  <div style="text-align:center;padding:20px 0 10px 0">
172
+ <h1 style="font-size:2em;margin:0">🌐 OSINT Threat Analyst</h1>
173
  <p style="color:#666;margin:6px 0 0 0">
174
+ Agentic multi-source intelligence briefing Β· ACLED + RSS Β· Powered by HuggingFace
175
  </p>
176
  </div>
 
 
 
177
  """)
178
 
179
  with gr.Row():
180
  with gr.Column(scale=1):
181
  gr.Markdown("### Configure Analysis")
182
 
183
+ country_input = gr.Textbox(
184
+ label="Country / Region",
185
+ placeholder="e.g. Sudan, Myanmar, Haiti...",
186
+ max_lines=1,
 
 
 
187
  )
188
 
 
 
 
 
 
 
 
 
 
 
 
189
  rss_sources = gr.CheckboxGroup(
190
  choices=RSS_SOURCE_OPTIONS,
191
+ value=["reuters_world", "bbc_world", "al_jazeera"],
192
+ label="RSS News Sources",
193
  )
194
 
195
  days_back = gr.Slider(
 
201
  )
202
 
203
  analyze_btn = gr.Button(
204
+ "πŸ” Run Analysis",
205
  variant="primary",
206
  elem_id="analyze-btn",
207
  )
208
 
209
+ gr.Markdown(
210
+ "**Note:** ACLED requires free API credentials set as Space secrets. "
211
+ "[Register here](https://developer.acleddata.com).",
212
+ elem_classes=["note"],
213
+ )
214
 
215
  with gr.Column(scale=2):
216
  with gr.Tabs():
217
+ with gr.Tab("πŸ“‹ Threat Brief"):
218
  brief_output = gr.HTML(
219
  value="<div style='padding:40px;text-align:center;color:#999'>"
220
  "Configure your query and click Run Analysis.</div>"
221
  )
222
 
223
+ with gr.Tab("πŸ”Ž Raw Agent Trace"):
224
  trace_output = gr.Textbox(
225
  label="Agent trace",
226
  lines=30,
 
228
  placeholder="Agent reasoning and tool calls will appear here...",
229
  )
230
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  gr.Examples(
232
  examples=EXAMPLE_QUERIES,
233
  inputs=[country_input, rss_sources, days_back],
234
  label="Example Queries",
235
  )
236
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  analyze_btn.click(
238
  fn=run_analysis,
239
+ inputs=[country_input, rss_sources, days_back],
240
+ outputs=[brief_output, trace_output],
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  )
242
 
243
  gr.HTML("""
brief.py CHANGED
@@ -36,27 +36,17 @@ class ThreatBrief:
36
  event_types: List[str] = field(default_factory=list)
37
  key_locations: List[str] = field(default_factory=list)
38
  fatalities_reported: Optional[int] = None
39
- country_summary: str = ""
40
  narrative_summary: str = ""
41
- risk_analysis: str = ""
42
  key_findings: List[str] = field(default_factory=list)
43
  indicators_of_escalation: List[str] = field(default_factory=list)
44
  recommended_watch_items: List[str] = field(default_factory=list)
45
  source_types_used: List[str] = field(default_factory=list)
46
- recent_events: List[str] = field(default_factory=list)
47
  notable_news: List[NewsItem] = field(default_factory=list)
48
- travel_advisory_level: str = ""
49
- travel_advisory_level_text: str = ""
50
- travel_advisory_indicators: List[str] = field(default_factory=list)
51
- travel_advisory_date: str = ""
52
- travel_advisory_url: str = ""
53
- passport_country: str = ""
54
- embassy_name: str = ""
55
- embassy_address: str = ""
56
- embassy_phone: str = ""
57
- embassy_emergency_phone: str = ""
58
- embassy_website: str = ""
59
- embassy_notes: str = ""
60
 
61
  def to_dict(self) -> dict:
62
  d = asdict(self)
@@ -101,58 +91,36 @@ produce a threat brief as a JSON object with EXACTLY these fields:
101
  "event_types": ["<type1>", "<type2>"],
102
  "key_locations": ["<location1>", "<location2>"],
103
  "fatalities_reported": <integer or null>,
104
- "country_summary": "<3-5 sentence factual background on the country: geography, population, political system, recent history, and why it is geopolitically significant>",
105
  "narrative_summary": "<2-4 sentence analytical narrative synthesizing all sources>",
106
- "risk_analysis": "<3-5 sentence focused risk assessment covering: (1) primary threat vectors and actors posing the greatest danger, (2) likelihood of near-term escalation, (3) risk to civilians, travelers, and humanitarian operations, (4) any compounding factors such as economic collapse, displacement, or regional spillover>",
107
  "key_findings": ["<finding1>", "<finding2>", "<finding3>"],
108
  "indicators_of_escalation": ["<indicator1>", "<indicator2>"],
109
  "recommended_watch_items": ["<watch_item1>", "<watch_item2>"],
110
- "source_types_used": ["ACLED", "RSS", "State Dept Travel Advisory"],
111
- "recent_events": [
112
- "<one-line summary of a specific ACLED event, e.g. '2026-05-10 | Rakhine State β€” Armed clash between Arakan Army and military forces, 4 fatalities reported'>",
113
- "<repeat for each significant event, up to 10>"
114
- ],
115
- "travel_advisory": {
116
- "level": "<1|2|3|4 β€” just the number, or 'Unknown'>",
117
- "level_text": "<full level string, e.g. 'Level 3: Reconsider Travel'>",
118
- "indicators": ["<e.g. Crime>", "<e.g. Terrorism>"],
119
- "date_updated": "<date string from advisory>",
120
- "url": "<full URL to the advisory page, or empty string>"
121
- },
122
- "embassy": {
123
- "name": "<full official name of the embassy or consulate, e.g. 'U.S. Embassy Nairobi'>",
124
- "address": "<street address of the embassy in the destination country>",
125
- "phone": "<main switchboard phone number including country code>",
126
- "emergency_phone": "<after-hours emergency line for citizens, including country code>",
127
- "website": "<full URL to the embassy website>",
128
- "notes": "<1-2 sentences on anything a traveller should know: nearest consulate if no embassy, appointment requirements, or special emergency procedures>"
129
- },
130
  "notable_news": [
131
  {
132
  "title": "<article headline>",
133
  "source": "<news source name>",
134
  "published": "<publication date>",
135
- "summary": "<1-2 sentence analytical summary explaining why this article matters>",
136
  "url": "<article URL>",
137
  "notable": true
138
  }
139
- ]
 
 
 
 
 
140
  }
141
 
142
- For embassy: provide the PASSPORT_COUNTRY's embassy or nearest consulate in the destination country.
143
- If no embassy exists there, name the nearest one in a neighbouring country and explain in notes.
144
- Use your best knowledge of official embassy details; always include the website so the traveller can verify.
145
-
146
- For recent_events: list each significant ACLED event as a single line in the format
147
- "DATE | LOCATION β€” description, N fatalities". Include up to 10 events ordered
148
- most-recent first. If ACLED returned no data, use an empty array [].
149
 
150
- For notable_news: include ALL relevant articles from the RSS results, up to 10.
151
- Do NOT filter only by the NOTABLE flag β€” include any article that relates to the
152
- country's security, politics, humanitarian situation, or conflict dynamics.
153
- Write the summary field in your own analytical words β€” explain WHY the article
154
- matters to the threat picture, not just what it says.
155
- If no RSS articles were retrieved, use an empty array [].
156
 
157
  Return ONLY the JSON object. No preamble, no markdown fences.
158
  """
@@ -182,9 +150,6 @@ def parse_brief_from_llm(raw_text: str) -> ThreatBrief:
182
  notable=n.get("notable", True),
183
  ))
184
 
185
- ta = data.get("travel_advisory", {})
186
- em = data.get("embassy", {})
187
-
188
  return ThreatBrief(
189
  region=data.get("region", ""),
190
  country=data.get("country", ""),
@@ -195,26 +160,17 @@ def parse_brief_from_llm(raw_text: str) -> ThreatBrief:
195
  event_types=data.get("event_types", []),
196
  key_locations=data.get("key_locations", []),
197
  fatalities_reported=data.get("fatalities_reported"),
198
- country_summary=data.get("country_summary", ""),
199
  narrative_summary=data.get("narrative_summary", raw_text),
200
- risk_analysis=data.get("risk_analysis", ""),
201
  key_findings=data.get("key_findings", []),
202
  indicators_of_escalation=data.get("indicators_of_escalation", []),
203
  recommended_watch_items=data.get("recommended_watch_items", []),
204
  source_types_used=data.get("source_types_used", []),
205
- recent_events=data.get("recent_events", []),
206
  notable_news=news_items,
207
- travel_advisory_level=str(ta.get("level", "")),
208
- travel_advisory_level_text=ta.get("level_text", ""),
209
- travel_advisory_indicators=ta.get("indicators", []),
210
- travel_advisory_date=ta.get("date_updated", ""),
211
- travel_advisory_url=ta.get("url", ""),
212
- embassy_name=em.get("name", ""),
213
- embassy_address=em.get("address", ""),
214
- embassy_phone=em.get("phone", ""),
215
- embassy_emergency_phone=em.get("emergency_phone", ""),
216
- embassy_website=em.get("website", ""),
217
- embassy_notes=em.get("notes", ""),
218
  )
219
  except json.JSONDecodeError:
220
  return _fallback_brief(raw_text)
@@ -233,95 +189,6 @@ def _fallback_brief(raw_text: str) -> ThreatBrief:
233
  # HTML Renderer β€” system dark mode aware via CSS custom properties
234
  # ---------------------------------------------------------------------------
235
 
236
- def _render_travel_advisory(brief: ThreatBrief) -> str:
237
- level = brief.travel_advisory_level.strip()
238
- if not level or level == "Unknown":
239
- return """
240
- <div class="section muted-section" style="border-top:none">
241
- <p class="muted-text">US State Department travel advisory not available for this country.</p>
242
- </div>"""
243
-
244
- color = ADVISORY_LEVEL_COLORS.get(level, "#999")
245
- label = ADVISORY_LEVEL_LABELS.get(level, brief.travel_advisory_level_text or f"Level {level}")
246
- indicators_html = (
247
- " &nbsp;Β·&nbsp; ".join(f'<span class="risk-tag">{i}</span>' for i in brief.travel_advisory_indicators)
248
- if brief.travel_advisory_indicators
249
- else "<em>None listed</em>"
250
- )
251
- link_html = (
252
- f' &nbsp;<a href="{brief.travel_advisory_url}" target="_blank" style="font-size:0.82em;color:var(--text-link)">Full advisory β†’</a>'
253
- if brief.travel_advisory_url else ""
254
- )
255
- date_html = f'<span style="color:var(--text-muted);font-size:0.82em">Updated: {brief.travel_advisory_date}</span>' if brief.travel_advisory_date else ""
256
-
257
- return f"""
258
- <div class="section" style="border-top:none;border-left:4px solid {color}">
259
- <h3 style="margin-top:0;margin-bottom:10px">
260
- πŸ—ΊοΈ US State Dept Travel Advisory
261
- <span style="background:{color};color:white;padding:2px 12px;border-radius:12px;font-size:0.82em;font-weight:bold;margin-left:8px">
262
- Level {level}: {label}
263
- </span>
264
- {link_html}
265
- </h3>
266
- <div style="margin-bottom:6px"><strong>Risk categories:</strong> &nbsp;{indicators_html}</div>
267
- {date_html}
268
- </div>"""
269
-
270
-
271
- ADVISORY_LEVEL_COLORS = {
272
- "1": "#27AE60",
273
- "2": "#F39C12",
274
- "3": "#E67E22",
275
- "4": "#C0392B",
276
- }
277
-
278
- ADVISORY_LEVEL_LABELS = {
279
- "1": "Exercise Normal Precautions",
280
- "2": "Exercise Increased Caution",
281
- "3": "Reconsider Travel",
282
- "4": "Do Not Travel",
283
- }
284
-
285
-
286
- def _render_embassy(brief: ThreatBrief) -> str:
287
- if not brief.passport_country:
288
- return ""
289
-
290
- if not brief.embassy_name:
291
- return f"""
292
- <div class="section muted-section" style="border-top:none">
293
- <p class="muted-text">Embassy information for {brief.passport_country} passport holders not available.</p>
294
- </div>"""
295
-
296
- website_html = (
297
- f'<a href="{brief.embassy_website}" target="_blank" style="color:var(--text-link);font-size:0.85em">{brief.embassy_website}</a>'
298
- if brief.embassy_website else "<em>Not available</em>"
299
- )
300
-
301
- rows = []
302
- if brief.embassy_address:
303
- rows.append(f"<tr><td style='width:160px;color:var(--text-muted);padding:4px 12px 4px 0;vertical-align:top'>Address</td><td style='color:var(--text-body)'>{brief.embassy_address}</td></tr>")
304
- if brief.embassy_phone:
305
- rows.append(f"<tr><td style='color:var(--text-muted);padding:4px 12px 4px 0'>Main phone</td><td style='color:var(--text-body)'>{brief.embassy_phone}</td></tr>")
306
- if brief.embassy_emergency_phone:
307
- rows.append(f"<tr><td style='color:var(--text-muted);padding:4px 12px 4px 0'>Emergency line</td><td style='color:var(--text-body);font-weight:600'>{brief.embassy_emergency_phone}</td></tr>")
308
- rows.append(f"<tr><td style='color:var(--text-muted);padding:4px 12px 4px 0'>Website</td><td>{website_html}</td></tr>")
309
-
310
- notes_html = f'<p style="margin:10px 0 0 0;font-size:0.88em;color:var(--text-muted);font-style:italic">{brief.embassy_notes}</p>' if brief.embassy_notes else ""
311
-
312
- return f"""
313
- <div class="section" style="border-top:none;border-left:4px solid #1a56db">
314
- <h3 style="margin-top:0;margin-bottom:10px">
315
- {brief.passport_country} Embassy / Consulate
316
- <span style="font-size:0.75em;font-weight:normal;color:var(--text-muted);margin-left:8px">in {brief.country}</span>
317
- </h3>
318
- <div style="font-weight:600;color:var(--text-primary);margin-bottom:8px">{brief.embassy_name}</div>
319
- <table style="border-collapse:collapse;font-size:0.9em">{" ".join(rows)}</table>
320
- {notes_html}
321
- <p style="margin:10px 0 0 0;font-size:0.78em;color:var(--text-muted)">Verify contact details before travel β€” embassy information can change.</p>
322
- </div>"""
323
-
324
-
325
  def render_brief_html(brief: ThreatBrief) -> str:
326
  sev_color = SEVERITY_COLORS.get(brief.severity, "#999")
327
  conf_color = CONFIDENCE_COLORS.get(brief.confidence, "#999")
@@ -344,65 +211,80 @@ def render_brief_html(brief: ThreatBrief) -> str:
344
  )
345
  sources_str = ", ".join(brief.source_types_used) if brief.source_types_used else "N/A"
346
 
347
- # Build recent events section
348
- if brief.recent_events:
349
- event_rows = "".join(
350
- f"<tr><td style='padding:4px 8px;border-bottom:1px solid var(--border);font-family:monospace;font-size:0.82em;color:var(--text-body)'>{e}</td></tr>"
351
- for e in brief.recent_events
352
- )
353
- events_section = f"""
 
 
 
 
 
 
 
 
 
 
 
 
354
  <div class="section">
355
- <h3 class="section-title">Recent Conflict Events
356
- <span class="section-count">{len(brief.recent_events)} events Β· ACLED</span>
357
  </h3>
358
- <table style="width:100%;border-collapse:collapse">
359
- {event_rows}
360
- </table>
361
  </div>"""
362
  else:
363
- events_section = """
364
  <div class="section muted-section">
365
- <h3 class="section-title">Recent Conflict Events</h3>
366
- <p class="muted-text">No ACLED conflict events retrieved. Check ACLED credentials or try a different date range.</p>
367
  </div>"""
368
 
369
- # Build news cards β€” always render the section
370
- cards = []
371
- for item in brief.notable_news:
372
- link_html = (
373
- f'<a href="{item.url}" target="_blank" class="news-link">Read full article β†’</a>'
374
- if item.url else ""
375
- )
376
- notable_badge = (
377
- '<span style="background:#fef3c7;color:#92400e;font-size:0.72em;'
378
- 'padding:1px 6px;border-radius:8px;margin-left:6px;font-weight:600">NOTABLE</span>'
379
- if item.notable else ""
380
- )
381
- cards.append(f"""
382
- <div class="news-card">
383
- <div class="news-meta">
384
- <span class="news-source-badge">{item.source}</span>
385
- <span class="news-date">{item.published[:25] if item.published else ""}</span>
386
- {notable_badge}
387
- </div>
388
- <div class="news-title">{item.title}</div>
389
- <div class="news-summary">{item.summary}</div>
390
- {link_html}
391
- </div>""")
392
-
393
- if cards:
394
- news_section = f"""
395
- <div class="section">
396
- <h3 class="section-title">Recent News
397
- <span class="section-count">{len(brief.notable_news)} articles</span>
398
  </h3>
399
- {"".join(cards)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
400
  </div>"""
401
  else:
402
- news_section = """
403
  <div class="section muted-section">
404
- <h3 class="section-title">Recent News</h3>
405
- <p class="muted-text">No news articles were retrieved for this query. Try adding more RSS sources or broadening the search.</p>
406
  </div>"""
407
 
408
  html = f"""
@@ -552,6 +434,43 @@ def render_brief_html(brief: ThreatBrief) -> str:
552
  }}
553
  .news-link:hover {{ text-decoration: underline; }}
554
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
555
  .warn-section {{
556
  background: var(--bg-warn);
557
  border: 1px solid var(--border);
@@ -562,15 +481,6 @@ def render_brief_html(brief: ThreatBrief) -> str:
562
  .warn-section h4 {{ color: var(--text-primary); margin-top: 0; }}
563
  .warn-section ul {{ padding-left: 18px; line-height: 1.9; margin: 0; }}
564
 
565
- .risk-tag {{
566
- background: var(--bg-accent);
567
- border: 1px solid var(--border);
568
- color: var(--text-body);
569
- padding: 1px 8px;
570
- border-radius: 6px;
571
- font-size: 0.82em;
572
- }}
573
-
574
  .brief-footer {{
575
  background: var(--bg-secondary);
576
  border: 1px solid var(--border);
@@ -585,7 +495,7 @@ def render_brief_html(brief: ThreatBrief) -> str:
585
  <div class="brief-wrap">
586
 
587
  <div class="brief-header">
588
- <h2>OSINT Threat Brief</h2>
589
  <p>{brief.country or 'Unknown'} &nbsp;|&nbsp; {brief.region} &nbsp;|&nbsp; {brief.assessment_date}</p>
590
  </div>
591
 
@@ -596,53 +506,39 @@ def render_brief_html(brief: ThreatBrief) -> str:
596
  <span><strong>Fatalities:</strong> {fatalities_str}</span>
597
  </div>
598
 
599
- <div class="section">
600
- <h3>Country Background</h3>
601
- <p>{brief.country_summary or '<em>Not available</em>'}</p>
602
- </div>
603
-
604
- {_render_travel_advisory(brief)}
605
-
606
- {_render_embassy(brief)}
607
-
608
  <div class="section">
609
  <h3>Analytical Summary</h3>
610
  <p>{brief.narrative_summary or '<em>Not available</em>'}</p>
611
  </div>
612
 
613
- <div class="section" style="border-left:4px solid #C0392B">
614
- <h3 style="margin-top:0;color:#C0392B">Risk Assessment</h3>
615
- <p>{brief.risk_analysis or '<em>Risk assessment not available.</em>'}</p>
616
- </div>
617
-
618
  <div class="grid-2">
619
  <div class="grid-cell grid-cell-border">
620
- <h4>Key Findings</h4>
621
  <ul>{bullet_list(brief.key_findings)}</ul>
622
  </div>
623
  <div class="grid-cell">
624
- <h4>Escalation Indicators</h4>
625
  <ul>{bullet_list(brief.indicators_of_escalation)}</ul>
626
  </div>
627
  </div>
628
 
629
  <div class="grid-2">
630
  <div class="grid-cell grid-cell-border" style="border-top:1px solid var(--border)">
631
- <h4>Primary Actors</h4>
632
  <ul>{bullet_list(brief.primary_actors)}</ul>
633
  </div>
634
  <div class="grid-cell" style="border-top:1px solid var(--border)">
635
- <h4>Key Locations</h4>
636
  <ul>{bullet_list(brief.key_locations)}</ul>
637
  </div>
638
  </div>
639
 
640
- {events_section}
641
-
642
  {news_section}
643
 
 
 
644
  <div class="warn-section">
645
- <h4>Recommended Watch Items</h4>
646
  <ul>{bullet_list(brief.recommended_watch_items)}</ul>
647
  </div>
648
 
 
36
  event_types: List[str] = field(default_factory=list)
37
  key_locations: List[str] = field(default_factory=list)
38
  fatalities_reported: Optional[int] = None
 
39
  narrative_summary: str = ""
 
40
  key_findings: List[str] = field(default_factory=list)
41
  indicators_of_escalation: List[str] = field(default_factory=list)
42
  recommended_watch_items: List[str] = field(default_factory=list)
43
  source_types_used: List[str] = field(default_factory=list)
 
44
  notable_news: List[NewsItem] = field(default_factory=list)
45
+ airspace_status: str = ""
46
+ airspace_restrictions: List[str] = field(default_factory=list)
47
+ no_fly_zones: List[str] = field(default_factory=list)
48
+ air_defense_activity: List[str] = field(default_factory=list)
49
+ aviation_notes: str = ""
 
 
 
 
 
 
 
50
 
51
  def to_dict(self) -> dict:
52
  d = asdict(self)
 
91
  "event_types": ["<type1>", "<type2>"],
92
  "key_locations": ["<location1>", "<location2>"],
93
  "fatalities_reported": <integer or null>,
 
94
  "narrative_summary": "<2-4 sentence analytical narrative synthesizing all sources>",
 
95
  "key_findings": ["<finding1>", "<finding2>", "<finding3>"],
96
  "indicators_of_escalation": ["<indicator1>", "<indicator2>"],
97
  "recommended_watch_items": ["<watch_item1>", "<watch_item2>"],
98
+ "source_types_used": ["ACLED", "RSS", "AIRSPACE"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  "notable_news": [
100
  {
101
  "title": "<article headline>",
102
  "source": "<news source name>",
103
  "published": "<publication date>",
104
+ "summary": "<1-2 sentence summary of why this article is significant>",
105
  "url": "<article URL>",
106
  "notable": true
107
  }
108
+ ],
109
+ "airspace_status": "<one of: Open | Restricted | Partially Restricted | Closed | Unknown>",
110
+ "airspace_restrictions": ["<restriction or NOTAM description1>", "<restriction2>"],
111
+ "no_fly_zones": ["<zone name or description1>", "<zone2>"],
112
+ "air_defense_activity": ["<activity description1>", "<activity2>"],
113
+ "aviation_notes": "<1-2 sentence summary of overall airspace picture and commercial aviation impact>"
114
  }
115
 
116
+ For notable_news: include the 3-6 most significant articles from the RSS results.
117
+ Prioritize articles marked as NOTABLE in the RSS output. Write the summary field
118
+ in your own analytical words β€” explain WHY the article matters to the threat picture,
119
+ not just what it says. If no notable articles were found, use an empty array [].
 
 
 
120
 
121
+ For airspace fields: use data from the AIRSPACE tool output. If no airspace data
122
+ was retrieved, set airspace_status to "Unknown" and use empty arrays. Be concise
123
+ and factual β€” cite specific restrictions, zones, or incidents where available.
 
 
 
124
 
125
  Return ONLY the JSON object. No preamble, no markdown fences.
126
  """
 
150
  notable=n.get("notable", True),
151
  ))
152
 
 
 
 
153
  return ThreatBrief(
154
  region=data.get("region", ""),
155
  country=data.get("country", ""),
 
160
  event_types=data.get("event_types", []),
161
  key_locations=data.get("key_locations", []),
162
  fatalities_reported=data.get("fatalities_reported"),
 
163
  narrative_summary=data.get("narrative_summary", raw_text),
 
164
  key_findings=data.get("key_findings", []),
165
  indicators_of_escalation=data.get("indicators_of_escalation", []),
166
  recommended_watch_items=data.get("recommended_watch_items", []),
167
  source_types_used=data.get("source_types_used", []),
 
168
  notable_news=news_items,
169
+ airspace_status=data.get("airspace_status", "Unknown"),
170
+ airspace_restrictions=data.get("airspace_restrictions", []),
171
+ no_fly_zones=data.get("no_fly_zones", []),
172
+ air_defense_activity=data.get("air_defense_activity", []),
173
+ aviation_notes=data.get("aviation_notes", ""),
 
 
 
 
 
 
174
  )
175
  except json.JSONDecodeError:
176
  return _fallback_brief(raw_text)
 
189
  # HTML Renderer β€” system dark mode aware via CSS custom properties
190
  # ---------------------------------------------------------------------------
191
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  def render_brief_html(brief: ThreatBrief) -> str:
193
  sev_color = SEVERITY_COLORS.get(brief.severity, "#999")
194
  conf_color = CONFIDENCE_COLORS.get(brief.confidence, "#999")
 
211
  )
212
  sources_str = ", ".join(brief.source_types_used) if brief.source_types_used else "N/A"
213
 
214
+ # Build notable news cards
215
+ if brief.notable_news:
216
+ cards = []
217
+ for item in brief.notable_news:
218
+ link_html = (
219
+ f'<a href="{item.url}" target="_blank" class="news-link">Read full article β†’</a>'
220
+ if item.url else ""
221
+ )
222
+ cards.append(f"""
223
+ <div class="news-card">
224
+ <div class="news-meta">
225
+ <span class="news-source-badge">{item.source}</span>
226
+ <span class="news-date">{item.published[:25] if item.published else ""}</span>
227
+ </div>
228
+ <div class="news-title">{item.title}</div>
229
+ <div class="news-summary">{item.summary}</div>
230
+ {link_html}
231
+ </div>""")
232
+ news_section = f"""
233
  <div class="section">
234
+ <h3 class="section-title">πŸ“° Notable News
235
+ <span class="section-count">{len(brief.notable_news)} articles</span>
236
  </h3>
237
+ {"".join(cards)}
 
 
238
  </div>"""
239
  else:
240
+ news_section = """
241
  <div class="section muted-section">
242
+ <p class="muted-text">No notable news articles retrieved.</p>
 
243
  </div>"""
244
 
245
+ AIRSPACE_STATUS_COLORS = {
246
+ "Open": "#27AE60",
247
+ "Restricted": "#E67E22",
248
+ "Partially Restricted": "#F39C12",
249
+ "Closed": "#C0392B",
250
+ "Unknown": "#7F8C8D",
251
+ }
252
+ as_color = AIRSPACE_STATUS_COLORS.get(brief.airspace_status, "#7F8C8D")
253
+
254
+ has_airspace_data = (
255
+ brief.airspace_restrictions
256
+ or brief.no_fly_zones
257
+ or brief.air_defense_activity
258
+ or brief.aviation_notes
259
+ or (brief.airspace_status and brief.airspace_status != "Unknown")
260
+ )
261
+
262
+ if has_airspace_data:
263
+ airspace_section = f"""
264
+ <div class="section airspace-section">
265
+ <h3 class="section-title">✈️ Airspace &amp; Aviation Status
266
+ <span style="margin-left:10px">{badge(brief.airspace_status or 'Unknown', as_color)}</span>
 
 
 
 
 
 
 
267
  </h3>
268
+ {f'<p class="airspace-notes">{brief.aviation_notes}</p>' if brief.aviation_notes else ""}
269
+ <div class="airspace-grid">
270
+ <div class="airspace-cell">
271
+ <h4>🚫 No-Fly Zones</h4>
272
+ <ul>{bullet_list(brief.no_fly_zones)}</ul>
273
+ </div>
274
+ <div class="airspace-cell">
275
+ <h4>⚠️ Active Restrictions / NOTAMs</h4>
276
+ <ul>{bullet_list(brief.airspace_restrictions)}</ul>
277
+ </div>
278
+ <div class="airspace-cell airspace-cell-full">
279
+ <h4>πŸ›‘οΈ Air Defense Activity</h4>
280
+ <ul>{bullet_list(brief.air_defense_activity)}</ul>
281
+ </div>
282
+ </div>
283
  </div>"""
284
  else:
285
+ airspace_section = """
286
  <div class="section muted-section">
287
+ <p class="muted-text">✈️ No airspace restriction data retrieved for this country.</p>
 
288
  </div>"""
289
 
290
  html = f"""
 
434
  }}
435
  .news-link:hover {{ text-decoration: underline; }}
436
 
437
+ .airspace-section {{
438
+ background: var(--bg-primary);
439
+ border: 1px solid var(--border);
440
+ border-top: none;
441
+ padding: 18px 24px;
442
+ color: var(--text-body);
443
+ }}
444
+ .airspace-notes {{
445
+ line-height: 1.7;
446
+ margin: 0 0 14px 0;
447
+ color: var(--text-body);
448
+ }}
449
+ .airspace-grid {{
450
+ display: grid;
451
+ grid-template-columns: 1fr 1fr;
452
+ gap: 0;
453
+ }}
454
+ .airspace-cell {{
455
+ padding: 12px 16px 12px 0;
456
+ border-right: 1px solid var(--border);
457
+ }}
458
+ .airspace-cell:last-child {{ border-right: none; padding-right: 0; }}
459
+ .airspace-cell-full {{
460
+ grid-column: 1 / -1;
461
+ border-right: none;
462
+ border-top: 1px solid var(--border);
463
+ padding-top: 12px;
464
+ margin-top: 4px;
465
+ }}
466
+ .airspace-cell h4 {{
467
+ color: var(--text-primary);
468
+ margin-top: 0;
469
+ margin-bottom: 8px;
470
+ font-size: 0.95em;
471
+ }}
472
+ .airspace-cell ul {{ padding-left: 18px; line-height: 1.9; margin: 0; }}
473
+
474
  .warn-section {{
475
  background: var(--bg-warn);
476
  border: 1px solid var(--border);
 
481
  .warn-section h4 {{ color: var(--text-primary); margin-top: 0; }}
482
  .warn-section ul {{ padding-left: 18px; line-height: 1.9; margin: 0; }}
483
 
 
 
 
 
 
 
 
 
 
484
  .brief-footer {{
485
  background: var(--bg-secondary);
486
  border: 1px solid var(--border);
 
495
  <div class="brief-wrap">
496
 
497
  <div class="brief-header">
498
+ <h2>πŸ›‘οΈ OSINT Threat Brief</h2>
499
  <p>{brief.country or 'Unknown'} &nbsp;|&nbsp; {brief.region} &nbsp;|&nbsp; {brief.assessment_date}</p>
500
  </div>
501
 
 
506
  <span><strong>Fatalities:</strong> {fatalities_str}</span>
507
  </div>
508
 
 
 
 
 
 
 
 
 
 
509
  <div class="section">
510
  <h3>Analytical Summary</h3>
511
  <p>{brief.narrative_summary or '<em>Not available</em>'}</p>
512
  </div>
513
 
 
 
 
 
 
514
  <div class="grid-2">
515
  <div class="grid-cell grid-cell-border">
516
+ <h4>πŸ” Key Findings</h4>
517
  <ul>{bullet_list(brief.key_findings)}</ul>
518
  </div>
519
  <div class="grid-cell">
520
+ <h4>⚠️ Escalation Indicators</h4>
521
  <ul>{bullet_list(brief.indicators_of_escalation)}</ul>
522
  </div>
523
  </div>
524
 
525
  <div class="grid-2">
526
  <div class="grid-cell grid-cell-border" style="border-top:1px solid var(--border)">
527
+ <h4>🎭 Primary Actors</h4>
528
  <ul>{bullet_list(brief.primary_actors)}</ul>
529
  </div>
530
  <div class="grid-cell" style="border-top:1px solid var(--border)">
531
+ <h4>πŸ“ Key Locations</h4>
532
  <ul>{bullet_list(brief.key_locations)}</ul>
533
  </div>
534
  </div>
535
 
 
 
536
  {news_section}
537
 
538
+ {airspace_section}
539
+
540
  <div class="warn-section">
541
+ <h4>πŸ“‘ Recommended Watch Items</h4>
542
  <ul>{bullet_list(brief.recommended_watch_items)}</ul>
543
  </div>
544
 
requirements.txt CHANGED
@@ -2,5 +2,3 @@ gradio>=5.23.0
2
  smolagents>=1.10.0
3
  feedparser>=6.0.10
4
  requests>=2.31.0
5
- fpdf2>=2.7.0
6
- Pillow>=10.0.0
 
2
  smolagents>=1.10.0
3
  feedparser>=6.0.10
4
  requests>=2.31.0
 
 
tools.py CHANGED
@@ -2,8 +2,11 @@
2
  tools.py β€” OSINT data source tools for the agentic analyst loop.
3
 
4
  Required Space Secrets:
5
- ACLED_USERNAME β€” your myACLED account email (from https://developer.acleddata.com)
6
- ACLED_PASSWORD β€” your myACLED account password
 
 
 
7
  """
8
 
9
  import os
@@ -16,26 +19,20 @@ from datetime import datetime, timedelta
16
  from smolagents import tool
17
 
18
  # ---------------------------------------------------------------------------
19
- # ACLED OAuth 2.0 token cache
20
- # API docs: https://acleddata.com/api-documentation/acled-endpoint
21
- # Auth: password grant flow β†’ Bearer token in Authorization header
22
  # ---------------------------------------------------------------------------
23
 
24
- ACLED_TOKEN_URL = "https://acleddata.com/oauth/token"
25
- ACLED_BASE = "https://acleddata.com/api/acled/read"
26
-
27
- _token_cache: dict = {
28
  "access_token": None,
29
- "expires_at": 0.0,
30
- "lock": threading.Lock(),
31
  }
32
 
 
 
 
33
 
34
  def _get_acled_token() -> str:
35
- """
36
- Return a valid Bearer token, refreshing via OAuth password grant if needed.
37
- Caches the token so we don't re-auth on every tool call.
38
- """
39
  with _token_cache["lock"]:
40
  now = time.time()
41
  if _token_cache["access_token"] and now < _token_cache["expires_at"]:
@@ -46,47 +43,31 @@ def _get_acled_token() -> str:
46
 
47
  if not username or not password:
48
  raise EnvironmentError(
49
- "ACLED credentials missing. "
50
- "Add ACLED_USERNAME and ACLED_PASSWORD as Space secrets "
51
- "(Settings β†’ Variables and Secrets). "
52
- "Register free at https://developer.acleddata.com"
53
  )
54
 
55
- try:
56
- resp = requests.post(
57
- ACLED_TOKEN_URL,
58
- data={
59
- "grant_type": "password",
60
- "client_id": "acled",
61
- "username": username,
62
- "password": password,
63
- },
64
- headers={"Content-Type": "application/x-www-form-urlencoded"},
65
- timeout=20,
66
- )
67
- except requests.RequestException as e:
68
- raise EnvironmentError(f"ACLED token request failed (network): {e}") from e
69
 
70
  if resp.status_code != 200:
71
  raise EnvironmentError(
72
- f"ACLED token request failed (HTTP {resp.status_code}): {resp.text[:300]}"
73
  )
74
 
75
- try:
76
- token_data = resp.json()
77
- except ValueError:
78
- raise EnvironmentError(f"ACLED token response not JSON: {resp.text[:300]}")
79
-
80
- access_token = token_data.get("access_token")
81
- if not access_token:
82
- raise EnvironmentError(
83
- f"ACLED token response missing 'access_token'. Got: {token_data}"
84
- )
85
-
86
- expires_in = token_data.get("expires_in", 86400)
87
- _token_cache["access_token"] = access_token
88
- _token_cache["expires_at"] = now + int(expires_in) - 300 # refresh 5 min early
89
- return access_token
90
 
91
 
92
  def _strip_html(text: str) -> str:
@@ -101,7 +82,7 @@ def _strip_html(text: str) -> str:
101
  # ---------------------------------------------------------------------------
102
 
103
  @tool
104
- def fetch_acled_events(country: str, days_back: int = 14, limit: int = 15) -> str:
105
  """
106
  Fetches recent armed conflict events from ACLED for a given country.
107
  Returns dates, locations, actor names, event types, and fatality counts.
@@ -115,38 +96,34 @@ def fetch_acled_events(country: str, days_back: int = 14, limit: int = 15) -> st
115
  token = _get_acled_token()
116
  except EnvironmentError as e:
117
  return f"[ACLED] Auth error: {e}"
 
 
118
 
119
  since = (datetime.utcnow() - timedelta(days=days_back)).strftime("%Y-%m-%d")
120
 
121
  params = {
122
- "country": country,
123
- "event_date": since,
124
  "event_date_where": ">=",
125
- "limit": min(limit, 50),
126
- "fields": "event_date|event_type|sub_event_type|actor1|actor2|location|admin1|fatalities|notes",
127
- "_format": "json",
128
  }
129
 
130
- headers = {"Authorization": f"Bearer {token}"}
131
-
132
- try:
133
- resp = requests.get(ACLED_BASE, params=params, headers=headers, timeout=20)
134
- except requests.RequestException as e:
135
- return f"[ACLED] Request failed (network): {e}"
136
-
137
- # Surface any non-200 HTTP status before trying to parse JSON
138
- if resp.status_code != 200:
139
- return f"[ACLED] HTTP {resp.status_code}: {resp.text[:300]}"
140
 
141
  try:
 
 
142
  data = resp.json()
143
- except ValueError:
144
- return f"[ACLED] Could not parse response as JSON. Raw: {resp.text[:300]}"
145
 
146
- api_status = data.get("status")
147
- if api_status != 200:
148
- msg = data.get("error") or data.get("message") or str(data)[:300]
149
- return f"[ACLED] API status {api_status}: {msg}"
150
 
151
  events = data.get("data", [])
152
  if not events:
@@ -156,15 +133,15 @@ def fetch_acled_events(country: str, days_back: int = 14, limit: int = 15) -> st
156
  total_fatalities = 0
157
 
158
  for ev in events:
159
- fatalities = int(ev.get("fatalities") or 0)
160
  total_fatalities += fatalities
161
  actor2_str = f" vs {ev['actor2']}" if ev.get("actor2") else ""
162
  lines.append(
163
- f"* {ev.get('event_date', '?')} | {ev.get('event_type', '?')} / {ev.get('sub_event_type', '')} | "
164
  f"{ev.get('location', '?')}, {ev.get('admin1', '?')} | "
165
  f"{ev.get('actor1', '?')}{actor2_str} | "
166
  f"Fatalities: {fatalities} | "
167
- f"Notes: {str(ev.get('notes', ''))[:80]}"
168
  )
169
 
170
  lines.append(f"\nTotal reported fatalities: {total_fatalities}")
@@ -176,55 +153,33 @@ def fetch_acled_events(country: str, days_back: int = 14, limit: int = 15) -> st
176
  # ---------------------------------------------------------------------------
177
 
178
  RSS_FEED_REGISTRY = {
179
- # Verified working as of 2026-05
180
-
181
- # --- General world news ---
182
- "bbc_world": "https://feeds.bbci.co.uk/news/world/rss.xml",
183
- "al_jazeera": "https://www.aljazeera.com/xml/rss/all.xml",
184
- "france24": "https://www.france24.com/en/rss",
185
- "euronews": "https://feeds.feedburner.com/euronews/en/news/",
186
- "npr_world": "https://feeds.npr.org/1004/rss.xml",
187
- "sky_news": "https://feeds.skynews.com/feeds/rss/world.xml",
188
- "un_news": "https://news.un.org/feed/subscribe/en/news/all/rss.xml",
189
- "ibt": "https://www.ibtimes.com/rss",
190
-
191
- # --- Regional: Middle East ---
192
- "middle_east_eye": "https://www.middleeasteye.net/rss",
193
- "al_monitor": "https://www.almonitor.com/rss",
194
- "arab_news": "https://www.arabnews.com/rss.xml",
195
-
196
- # --- Regional: Africa ---
197
- "allafrica": "https://allafrica.com/tools/headlines/rdf/latest/headlines.rdf",
198
-
199
- # --- Regional: Asia-Pacific ---
200
- "radio_free_asia": "https://www.rfa.org/english/rss2.xml",
201
- "scmp": "https://www.scmp.com/rss/91/feed",
202
-
203
- # --- Regional: South Asia ---
204
- "dawn": "https://www.dawn.com/feeds/home",
205
-
206
- # --- Regional: Russia / Eastern Europe ---
207
- "moscow_times": "https://www.themoscowtimes.com/rss/news",
208
-
209
- # --- OSINT / investigative ---
210
- "bellingcat": "https://www.bellingcat.com/feed/",
211
- "the_intercept": "https://theintercept.com/feed/?rss",
212
- "occrp": "https://www.occrp.org/en/component/rssfeed/index.xml",
213
-
214
- # --- Policy / security analysis ---
215
- "crisis_group": "https://www.crisisgroup.org/rss.xml",
216
- "war_on_rocks": "https://warontherocks.com/feed/",
217
- "just_security": "https://www.justsecurity.org/feed/",
218
- "defense_one": "https://www.defenseone.com/rss/all/",
219
- "cipher_brief": "https://www.thecipherbrief.com/feed",
220
- "stimson": "https://www.stimson.org/feed/",
221
-
222
- # --- Human rights ---
223
- "hrw": "https://www.hrw.org/rss.xml",
224
- "amnesty": "https://www.amnesty.org/en/feed/",
225
  }
226
 
227
- SCAN_LIMIT = 20 # Max entries scanned per feed β€” keeps LLM context manageable
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
 
229
  # Signal words that bump an article to "notable"
230
  NOTABLE_SIGNALS = [
@@ -246,8 +201,8 @@ def _is_notable(title: str, summary: str) -> bool:
246
  @tool
247
  def fetch_rss_headlines(
248
  topic: str,
249
- sources: str = "bbc_world,al_jazeera,france24",
250
- max_articles: int = 10,
251
  ) -> str:
252
  """
253
  Fetches recent RSS news headlines related to a topic or region.
@@ -263,21 +218,8 @@ def fetch_rss_headlines(
263
  al_jazeera, bellingcat, crisis_group, acled_blog, un_news, foreign_policy.
264
  max_articles: Maximum total articles to return across all sources (default 20).
265
  """
266
- # Common country aliases so searches don't miss alternate names in articles
267
- _ALIASES = {
268
- "myanmar": ["myanmar", "burma"],
269
- "burma": ["myanmar", "burma"],
270
- "ivory coast": ["ivory coast", "cΓ΄te d'ivoire"],
271
- "drc": ["drc", "congo", "democratic republic"],
272
- "car": ["central african republic", "car"],
273
- "uae": ["uae", "united arab emirates"],
274
- }
275
-
276
  source_keys = [s.strip() for s in sources.split(",") if s.strip()]
277
- base_keywords = [w.lower() for w in topic.lower().split() if len(w) > 2]
278
- topic_lower = topic.lower().strip()
279
- extra = _ALIASES.get(topic_lower, [])
280
- keywords = list(dict.fromkeys(base_keywords + extra)) # deduplicate, preserve order
281
  articles = []
282
  feed_errors = []
283
 
@@ -307,7 +249,7 @@ def fetch_rss_headlines(
307
 
308
  title = entry.get("title", "").strip()
309
  raw_summary = entry.get("summary", entry.get("description", ""))
310
- summary = _strip_html(raw_summary)[:150]
311
  published = entry.get("published", entry.get("updated", ""))
312
  link = entry.get("link", "")
313
 
@@ -359,94 +301,296 @@ def fetch_rss_headlines(
359
 
360
 
361
  # ---------------------------------------------------------------------------
362
- # US State Department Travel Advisory Tool
363
  # ---------------------------------------------------------------------------
364
 
365
- _ADVISORY_API = "https://cadataapi.state.gov/api/TravelAdvisories"
366
-
367
- _RISK_KEYWORDS = {
368
- "crime": "Crime",
369
- "terrorism": "Terrorism",
370
- "civil unrest": "Civil Unrest",
371
- "health": "Health",
372
- "natural disaster": "Natural Disaster",
373
- "kidnapping": "Kidnapping",
374
- "wrongful detention": "Wrongful Detention",
375
- "piracy": "Piracy",
376
- "maritime": "Maritime",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
377
  }
378
 
379
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
380
  @tool
381
- def fetch_travel_advisory(country: str) -> str:
382
  """
383
- Fetches the current US State Department travel advisory for a country
384
- using the official State Department data API.
385
- Returns the advisory level (1–4), risk categories, publication date,
386
- a plain-text summary, and a link to the full advisory.
 
 
 
 
 
 
 
387
 
388
- Advisory levels:
389
- 1 = Exercise Normal Precautions
390
- 2 = Exercise Increased Caution
391
- 3 = Reconsider Travel
392
- 4 = Do Not Travel
393
 
394
  Args:
395
- country: Country name to look up (e.g. 'Sudan', 'Ukraine', 'Haiti').
 
396
  """
397
- try:
398
- resp = requests.get(_ADVISORY_API, timeout=20)
399
- resp.raise_for_status()
400
- advisories = resp.json()
401
- except requests.RequestException as e:
402
- return f"[Travel Advisory] Request failed: {e}"
403
- except ValueError:
404
- return "[Travel Advisory] Could not parse API response as JSON."
405
-
406
- country_lower = country.lower().strip()
407
-
408
- match = None
409
- for entry in advisories:
410
- title = entry.get("Title", "")
411
- # Title format: "Country Name - Level N: Description"
412
- dest = title.split(" - Level ")[0].strip()
413
- if country_lower in dest.lower():
414
- match = entry
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
415
  break
 
 
 
 
 
 
 
 
416
 
417
- if not match:
418
- return (
419
- f"[Travel Advisory] No advisory found for '{country}'. "
420
- "Check spelling or try the country's common English name."
421
- )
422
 
423
- title = match.get("Title", "")
424
- link = match.get("Link", "")
425
- published = match.get("Published", match.get("Updated", ""))
426
- raw_summary = match.get("Summary", "")
427
- summary = _strip_html(raw_summary)[:250]
428
-
429
- level_match = re.search(r"Level\s+(\d)", title, re.IGNORECASE)
430
- level_num = level_match.group(1) if level_match else "Unknown"
431
-
432
- summary_lower = summary.lower()
433
- indicators = [
434
- label for keyword, label in _RISK_KEYWORDS.items()
435
- if keyword in summary_lower
436
- ]
437
-
438
- # Parse ISO timestamp to a readable date
439
- date_str = published[:10] if published else ""
440
-
441
- lines = [
442
- f"[Travel Advisory] {title}",
443
- f"Risk Categories: {', '.join(indicators) if indicators else 'See summary'}",
444
- f"Published: {date_str}",
445
- f"Summary: {summary}",
446
- ]
447
- if link:
448
- lines.append(f"Full Advisory: {link}")
449
- return "\n".join(lines)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
 
451
 
452
  # ---------------------------------------------------------------------------
@@ -463,5 +607,12 @@ def list_available_sources() -> str:
463
  lines = ["Available RSS sources:"]
464
  for key, url in RSS_FEED_REGISTRY.items():
465
  lines.append(f" * {key}: {url}")
 
 
 
 
 
 
 
466
  lines.append("\nACLED is also available for structured armed conflict event data.")
467
  return "\n".join(lines)
 
2
  tools.py β€” OSINT data source tools for the agentic analyst loop.
3
 
4
  Required Space Secrets:
5
+ ACLED_USERNAME β€” your myACLED email address
6
+ ACLED_PASSWORD β€” your myACLED password
7
+
8
+ Optional Space Secrets (enhance airspace data):
9
+ AVWX_TOKEN β€” free API token from https://avwx.rest (enables per-airport NOTAM lookups)
10
  """
11
 
12
  import os
 
19
  from smolagents import tool
20
 
21
  # ---------------------------------------------------------------------------
22
+ # ACLED OAuth token cache
 
 
23
  # ---------------------------------------------------------------------------
24
 
25
+ _token_cache = {
 
 
 
26
  "access_token": None,
27
+ "expires_at": 0,
28
+ "lock": threading.Lock(),
29
  }
30
 
31
+ ACLED_TOKEN_URL = "https://acleddata.com/oauth/token"
32
+ ACLED_BASE = "https://acleddata.com/api/acled/read"
33
+
34
 
35
  def _get_acled_token() -> str:
 
 
 
 
36
  with _token_cache["lock"]:
37
  now = time.time()
38
  if _token_cache["access_token"] and now < _token_cache["expires_at"]:
 
43
 
44
  if not username or not password:
45
  raise EnvironmentError(
46
+ "ACLED credentials missing. Add ACLED_USERNAME and ACLED_PASSWORD "
47
+ "as Space secrets under Settings -> Variables and Secrets."
 
 
48
  )
49
 
50
+ resp = requests.post(
51
+ ACLED_TOKEN_URL,
52
+ headers={"Content-Type": "application/x-www-form-urlencoded"},
53
+ data={
54
+ "username": username,
55
+ "password": password,
56
+ "grant_type": "password",
57
+ "client_id": "acled",
58
+ },
59
+ timeout=15,
60
+ )
 
 
 
61
 
62
  if resp.status_code != 200:
63
  raise EnvironmentError(
64
+ f"ACLED token request failed ({resp.status_code}): {resp.text[:200]}"
65
  )
66
 
67
+ token_data = resp.json()
68
+ _token_cache["access_token"] = token_data["access_token"]
69
+ _token_cache["expires_at"] = now + token_data.get("expires_in", 86400) - 300
70
+ return _token_cache["access_token"]
 
 
 
 
 
 
 
 
 
 
 
71
 
72
 
73
  def _strip_html(text: str) -> str:
 
82
  # ---------------------------------------------------------------------------
83
 
84
  @tool
85
+ def fetch_acled_events(country: str, days_back: int = 14, limit: int = 25) -> str:
86
  """
87
  Fetches recent armed conflict events from ACLED for a given country.
88
  Returns dates, locations, actor names, event types, and fatality counts.
 
96
  token = _get_acled_token()
97
  except EnvironmentError as e:
98
  return f"[ACLED] Auth error: {e}"
99
+ except requests.RequestException as e:
100
+ return f"[ACLED] Failed to obtain token: {e}"
101
 
102
  since = (datetime.utcnow() - timedelta(days=days_back)).strftime("%Y-%m-%d")
103
 
104
  params = {
105
+ "country": country,
106
+ "event_date": since,
107
  "event_date_where": ">=",
108
+ "limit": min(limit, 50),
109
+ "fields": "event_date|event_type|sub_event_type|actor1|actor2|location|admin1|fatalities|notes",
110
+ "_format": "json",
111
  }
112
 
113
+ headers = {
114
+ "Authorization": f"Bearer {token}",
115
+ "Content-Type": "application/json",
116
+ }
 
 
 
 
 
 
117
 
118
  try:
119
+ resp = requests.get(ACLED_BASE, params=params, headers=headers, timeout=15)
120
+ resp.raise_for_status()
121
  data = resp.json()
122
+ except requests.RequestException as e:
123
+ return f"[ACLED] Request failed: {e}"
124
 
125
+ if data.get("status") != 200:
126
+ return f"[ACLED] API error: {data.get('error', data)}"
 
 
127
 
128
  events = data.get("data", [])
129
  if not events:
 
133
  total_fatalities = 0
134
 
135
  for ev in events:
136
+ fatalities = int(ev.get("fatalities", 0))
137
  total_fatalities += fatalities
138
  actor2_str = f" vs {ev['actor2']}" if ev.get("actor2") else ""
139
  lines.append(
140
+ f"* {ev['event_date']} | {ev['event_type']} / {ev.get('sub_event_type', '')} | "
141
  f"{ev.get('location', '?')}, {ev.get('admin1', '?')} | "
142
  f"{ev.get('actor1', '?')}{actor2_str} | "
143
  f"Fatalities: {fatalities} | "
144
+ f"Notes: {ev.get('notes', '')[:120]}"
145
  )
146
 
147
  lines.append(f"\nTotal reported fatalities: {total_fatalities}")
 
153
  # ---------------------------------------------------------------------------
154
 
155
  RSS_FEED_REGISTRY = {
156
+ "reuters_world": "https://feeds.reuters.com/reuters/worldNews",
157
+ "bbc_world": "https://feeds.bbci.co.uk/news/world/rss.xml",
158
+ "al_jazeera": "https://www.aljazeera.com/xml/rss/all.xml",
159
+ "bellingcat": "https://www.bellingcat.com/feed/",
160
+ "crisis_group": "https://www.crisisgroup.org/rss.xml",
161
+ "acled_blog": "https://acleddata.com/feed/",
162
+ "un_news": "https://news.un.org/feed/subscribe/en/news/feed/rss.xml",
163
+ "foreign_policy": "https://foreignpolicy.com/feed/",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  }
165
 
166
+ AVIATION_RSS_SOURCES = {
167
+ "aviation_herald": "https://avherald.com/h?subscribe=rss",
168
+ "the_aviationist": "https://theaviationist.com/feed/",
169
+ "flight_global": "https://www.flightglobal.com/rss/",
170
+ "aviation_week": "https://aviationweek.com/rss/all",
171
+ "alert5": "https://alert5.com/feed/",
172
+ }
173
+
174
+ AIRSPACE_SIGNALS = [
175
+ "notam", "no-fly", "no fly", "airspace", "flight ban", "flight restriction",
176
+ "fir", "air defense", "air defence", "missile", "anti-aircraft",
177
+ "flight advisory", "aviation", "closed airspace", "restricted airspace",
178
+ "drone", "uav", "uas", "aircraft", "airline", "airport", "runway",
179
+ "air traffic", "eurocontrol", "icao", "overflight", "air corridor",
180
+ ]
181
+
182
+ SCAN_LIMIT = 50
183
 
184
  # Signal words that bump an article to "notable"
185
  NOTABLE_SIGNALS = [
 
201
  @tool
202
  def fetch_rss_headlines(
203
  topic: str,
204
+ sources: str = "reuters_world,bbc_world,al_jazeera",
205
+ max_articles: int = 20,
206
  ) -> str:
207
  """
208
  Fetches recent RSS news headlines related to a topic or region.
 
218
  al_jazeera, bellingcat, crisis_group, acled_blog, un_news, foreign_policy.
219
  max_articles: Maximum total articles to return across all sources (default 20).
220
  """
 
 
 
 
 
 
 
 
 
 
221
  source_keys = [s.strip() for s in sources.split(",") if s.strip()]
222
+ keywords = [w.lower() for w in topic.lower().split() if len(w) > 2]
 
 
 
223
  articles = []
224
  feed_errors = []
225
 
 
249
 
250
  title = entry.get("title", "").strip()
251
  raw_summary = entry.get("summary", entry.get("description", ""))
252
+ summary = _strip_html(raw_summary)[:300]
253
  published = entry.get("published", entry.get("updated", ""))
254
  link = entry.get("link", "")
255
 
 
301
 
302
 
303
  # ---------------------------------------------------------------------------
304
+ # Airspace helpers β€” EASA CZIBs, AviationWeather SIGMETs, AVWX NOTAMs
305
  # ---------------------------------------------------------------------------
306
 
307
+ # Country name β†’ list of primary ICAO airport codes (for AVWX NOTAM lookups)
308
+ COUNTRY_ICAO_MAP: dict[str, list[str]] = {
309
+ "ukraine": ["UKBB", "UKKK", "UKLL"],
310
+ "russia": ["UUEE", "UUDD", "ULLI"],
311
+ "sudan": ["HSSS"],
312
+ "myanmar": ["VYYY", "VYBR"],
313
+ "haiti": ["MTPP"],
314
+ "syria": ["OSDI", "OSLK"],
315
+ "iraq": ["ORBI", "ORMM", "ORKK"],
316
+ "libya": ["HLLT", "HLLB"],
317
+ "somalia": ["HCMM"],
318
+ "yemen": ["OYAA", "OYAB"],
319
+ "afghanistan": ["OAKB"],
320
+ "ethiopia": ["HAAB"],
321
+ "nigeria": ["DNMM", "DNKN", "DNAA"],
322
+ "mali": ["GABS", "GAMB"],
323
+ "burkina faso": ["DFFD"],
324
+ "niger": ["DRRN"],
325
+ "mozambique": ["FQMA"],
326
+ "central african republic": ["FEFF"],
327
+ "democratic republic of congo": ["FZAA", "FZNA"],
328
+ "drc": ["FZAA"],
329
+ "israel": ["LLBG", "LLHA"],
330
+ "palestine": ["LVGZ"],
331
+ "iran": ["OIIE", "OIII", "OIMM"],
332
+ "pakistan": ["OPKC", "OPLA", "OPPS"],
333
+ "north korea": ["ZKPY"],
334
+ "venezuela": ["SVMI", "SVMC"],
335
+ "mexico": ["MMMX", "MMGL", "MMMY"],
336
+ "colombia": ["SKBO", "SKCL"],
337
+ "lebanon": ["OLBA"],
338
+ "georgia": ["UGTB"],
339
+ "armenia": ["UDYZ"],
340
+ "azerbaijan": ["UBBB"],
341
  }
342
 
343
 
344
+ def _keywords(country: str) -> list[str]:
345
+ return [w.lower() for w in country.lower().split() if len(w) > 2]
346
+
347
+
348
+ def _fetch_easa_czibs(country: str) -> list[str]:
349
+ """Scrape EASA's live Conflict Zone Information Bulletins table."""
350
+ url = "https://www.easa.europa.eu/en/domains/air-operations/czibs"
351
+ try:
352
+ resp = requests.get(
353
+ url,
354
+ timeout=15,
355
+ headers={"User-Agent": "Mozilla/5.0 (compatible; OSINTBot/1.0)"},
356
+ )
357
+ resp.raise_for_status()
358
+ except requests.RequestException as e:
359
+ return [f"[EASA CZIB] Request failed: {e}"]
360
+
361
+ kws = _keywords(country)
362
+ html_text = resp.text
363
+
364
+ # Each bulletin row: strip tags β†’ plain text; keep rows matching country
365
+ rows = re.findall(r"<tr[^>]*>(.*?)</tr>", html_text, re.DOTALL | re.IGNORECASE)
366
+ results = []
367
+ for row in rows:
368
+ cell_text = re.sub(r"<[^>]+>", " ", row)
369
+ cell_text = re.sub(r"\s+", " ", cell_text).strip()
370
+ if cell_text and any(kw in cell_text.lower() for kw in kws):
371
+ # Extract detail page link if present
372
+ link_match = re.search(r'href="(/en/domains/air-operations/czibs/[^"]+)"', row)
373
+ link = f"https://www.easa.europa.eu{link_match.group(1)}" if link_match else ""
374
+ entry = f"EASA CZIB | {cell_text}"
375
+ if link:
376
+ entry += f" | {link}"
377
+ results.append(entry)
378
+
379
+ if not results:
380
+ return [f"[EASA CZIB] No active conflict zone bulletins found for '{country}'."]
381
+ return results
382
+
383
+
384
+ def _fetch_sigmets(country: str) -> list[str]:
385
+ """Fetch active international SIGMETs from AviationWeather.gov for the country's FIRs."""
386
+ url = "https://aviationweather.gov/api/data/isigmet?format=json"
387
+ try:
388
+ resp = requests.get(url, timeout=15)
389
+ resp.raise_for_status()
390
+ sigmets = resp.json()
391
+ except (requests.RequestException, ValueError) as e:
392
+ return [f"[SIGMET] Request failed: {e}"]
393
+
394
+ if not isinstance(sigmets, list):
395
+ return ["[SIGMET] Unexpected response format."]
396
+
397
+ kws = _keywords(country)
398
+ results = []
399
+ for s in sigmets:
400
+ fir_name = s.get("firName", "")
401
+ raw = s.get("rawSigmet", "")
402
+ searchable = (fir_name + " " + raw).lower()
403
+
404
+ if not any(kw in searchable for kw in kws):
405
+ continue
406
+
407
+ try:
408
+ valid_from = datetime.utcfromtimestamp(s["validTimeFrom"]).strftime("%Y-%m-%d %H:%MZ")
409
+ valid_to = datetime.utcfromtimestamp(s["validTimeTo"]).strftime("%Y-%m-%d %H:%MZ")
410
+ except (KeyError, TypeError, OSError):
411
+ valid_from = valid_to = "?"
412
+
413
+ base = s.get("base") or "SFC"
414
+ top = s.get("top", "?")
415
+
416
+ results.append(
417
+ f"SIGMET | {fir_name} | Hazard: {s.get('hazard','?')} {s.get('qualifier','')}"
418
+ f" | Valid: {valid_from} β†’ {valid_to}"
419
+ f" | FL: {base}–{top}"
420
+ f" | {raw[:150]}"
421
+ )
422
+
423
+ if not results:
424
+ return [f"[SIGMET] No active SIGMETs found for '{country}'."]
425
+ return results
426
+
427
+
428
+ def _fetch_avwx_notams(country: str, token: str) -> list[str]:
429
+ """Fetch NOTAMs via AVWX for the country's main airports (requires AVWX_TOKEN)."""
430
+ country_lower = country.lower()
431
+
432
+ # Direct or partial match in the ICAO map
433
+ icao_codes = COUNTRY_ICAO_MAP.get(country_lower)
434
+ if not icao_codes:
435
+ for k, v in COUNTRY_ICAO_MAP.items():
436
+ if country_lower in k or k in country_lower:
437
+ icao_codes = v
438
+ break
439
+
440
+ if not icao_codes:
441
+ return [f"[NOTAM] No ICAO airport codes mapped for '{country}'. Skipping AVWX lookup."]
442
+
443
+ headers = {"Authorization": f"BEARER {token}"}
444
+ results = []
445
+
446
+ for icao in icao_codes[:2]: # max 2 airports to stay within rate limits
447
+ try:
448
+ resp = requests.get(
449
+ f"https://avwx.rest/api/notam/{icao}",
450
+ headers=headers,
451
+ timeout=12,
452
+ )
453
+ if resp.status_code == 401:
454
+ return ["[NOTAM] AVWX token invalid or expired β€” check AVWX_TOKEN secret."]
455
+ resp.raise_for_status()
456
+ data = resp.json()
457
+ except (requests.RequestException, ValueError) as e:
458
+ results.append(f"[NOTAM] {icao}: {e}")
459
+ continue
460
+
461
+ notam_list = data if isinstance(data, list) else data.get("data", [])
462
+ if not notam_list:
463
+ results.append(f"[NOTAM] {icao}: No active NOTAMs.")
464
+ continue
465
+
466
+ for n in notam_list[:6]:
467
+ raw = (
468
+ n.get("raw")
469
+ or n.get("text", {}).get("repr", "")
470
+ or str(n)
471
+ )[:200]
472
+ results.append(f"NOTAM | {icao} | {raw}")
473
+
474
+ time.sleep(0.2)
475
+
476
+ return results if results else [f"[NOTAM] No NOTAMs returned for '{country}'."]
477
+
478
+
479
+ # ---------------------------------------------------------------------------
480
+ # Airspace tool β€” integrates EASA CZIBs + SIGMETs + AVWX NOTAMs + aviation RSS
481
+ # ---------------------------------------------------------------------------
482
+
483
  @tool
484
+ def fetch_airspace_status(country: str, max_articles: int = 12) -> str:
485
  """
486
+ Fetches airspace disruption intelligence for a given country from three
487
+ structured sources plus aviation news RSS feeds:
488
+
489
+ 1. EASA Conflict Zone Information Bulletins (CZIBs) β€” official EU conflict
490
+ zone airspace warnings (no auth required).
491
+ 2. AviationWeather.gov international SIGMETs β€” active hazards (thunderstorms,
492
+ volcanic ash, turbulence, tropical cyclones) within the country's FIRs
493
+ (no auth required).
494
+ 3. AVWX NOTAMs β€” per-airport notices to airmen for the country's main airports
495
+ (requires optional AVWX_TOKEN Space secret).
496
+ 4. Aviation news RSS feeds β€” filtered for country + airspace keywords.
497
 
498
+ Returns a combined report suitable for inclusion in a threat brief's airspace
499
+ section, covering no-fly zones, active restrictions, and aviation disruptions.
 
 
 
500
 
501
  Args:
502
+ country: Country name to query (e.g. 'Ukraine', 'Sudan', 'Libya').
503
+ max_articles: Maximum RSS articles to include (default 12).
504
  """
505
+ kws = _keywords(country)
506
+ sections: list[str] = []
507
+
508
+ # -- 1. EASA CZIBs --------------------------------------------------------
509
+ czib_results = _fetch_easa_czibs(country)
510
+ sections.append("=== EASA Conflict Zone Bulletins (CZIBs) ===")
511
+ sections.extend(czib_results)
512
+
513
+ # -- 2. AviationWeather SIGMETs -------------------------------------------
514
+ sigmet_results = _fetch_sigmets(country)
515
+ sections.append("\n=== Active SIGMETs (AviationWeather.gov) ===")
516
+ sections.extend(sigmet_results)
517
+
518
+ # -- 3. AVWX NOTAMs (optional) --------------------------------------------
519
+ avwx_token = os.environ.get("AVWX_TOKEN", "").strip()
520
+ if avwx_token:
521
+ notam_results = _fetch_avwx_notams(country, avwx_token)
522
+ sections.append("\n=== NOTAMs (AVWX) ===")
523
+ sections.extend(notam_results)
524
+ else:
525
+ sections.append("\n=== NOTAMs (AVWX) ===")
526
+ sections.append("[NOTAM] AVWX_TOKEN not set β€” skipping NOTAM lookup.")
527
+
528
+ # -- 4. Aviation RSS news -------------------------------------------------
529
+ articles = []
530
+ feed_errors = []
531
+
532
+ all_sources = {**AVIATION_RSS_SOURCES, **{
533
+ k: v for k, v in RSS_FEED_REGISTRY.items()
534
+ if k in ("reuters_world", "bbc_world", "al_jazeera")
535
+ }}
536
+
537
+ for key, url in all_sources.items():
538
+ if len(articles) >= max_articles:
539
  break
540
+ try:
541
+ feed = feedparser.parse(url)
542
+ if feed.bozo and not feed.entries:
543
+ feed_errors.append(f"[{key}] Feed error")
544
+ continue
545
+ except Exception as e:
546
+ feed_errors.append(f"[{key}] Exception: {e}")
547
+ continue
548
 
549
+ source_name = feed.feed.get("title", key)
 
 
 
 
550
 
551
+ for entry in feed.entries[:SCAN_LIMIT]:
552
+ if len(articles) >= max_articles:
553
+ break
554
+
555
+ title = entry.get("title", "").strip()
556
+ raw_summary = entry.get("summary", entry.get("description", ""))
557
+ summary = _strip_html(raw_summary)[:300]
558
+ published = entry.get("published", entry.get("updated", ""))
559
+ link = entry.get("link", "")
560
+ searchable = (title + " " + summary).lower()
561
+
562
+ if not (
563
+ any(kw in searchable for kw in kws)
564
+ and any(sig in searchable for sig in AIRSPACE_SIGNALS)
565
+ ):
566
+ continue
567
+
568
+ articles.append({
569
+ "source_name": source_name,
570
+ "published": published,
571
+ "title": title,
572
+ "summary": summary,
573
+ "url": link,
574
+ })
575
+
576
+ time.sleep(0.3)
577
+
578
+ sections.append(f"\n=== Aviation News ({len(articles)} articles) ===")
579
+ if articles:
580
+ for i, a in enumerate(articles, 1):
581
+ sections.append(
582
+ f"[{i}] {a['source_name']} | {a['published']}\n"
583
+ f" Title: {a['title']}\n"
584
+ f" Summary: {a['summary']}\n"
585
+ f" URL: {a['url']}"
586
+ )
587
+ else:
588
+ sections.append(f"No aviation news matched for '{country}'.")
589
+
590
+ if feed_errors:
591
+ sections.append(f"\n[Feed warnings: {'; '.join(feed_errors)}]")
592
+
593
+ return "\n".join(sections)
594
 
595
 
596
  # ---------------------------------------------------------------------------
 
607
  lines = ["Available RSS sources:"]
608
  for key, url in RSS_FEED_REGISTRY.items():
609
  lines.append(f" * {key}: {url}")
610
+ lines.append("\nAviation RSS sources (used by fetch_airspace_status):")
611
+ for key, url in AVIATION_RSS_SOURCES.items():
612
+ lines.append(f" * {key}: {url}")
613
+ lines.append("\nStructured airspace sources (used by fetch_airspace_status):")
614
+ lines.append(" * EASA CZIBs (no auth): https://www.easa.europa.eu/en/domains/air-operations/czibs")
615
+ lines.append(" * AviationWeather SIGMETs (no auth): https://aviationweather.gov/api/data/isigmet")
616
+ lines.append(" * AVWX NOTAMs (requires AVWX_TOKEN secret): https://avwx.rest")
617
  lines.append("\nACLED is also available for structured armed conflict event data.")
618
  return "\n".join(lines)