File size: 18,086 Bytes
dcb2915
 
 
 
 
b5299d1
 
dcb2915
 
 
 
f2dc9cb
dcb2915
26739ca
dcb2915
 
 
 
26739ca
dcb2915
 
 
 
 
 
26739ca
dcb2915
 
 
 
 
 
 
 
 
f2dc9cb
 
 
 
dcb2915
26739ca
dcb2915
f2dc9cb
dcb2915
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5cda21f
dcb2915
 
 
26739ca
dcb2915
 
 
 
26739ca
dcb2915
bbf5058
 
 
dcb2915
 
 
 
 
5cda21f
 
 
 
 
 
 
 
 
 
 
dcb2915
 
5cda21f
dcb2915
 
 
 
5cda21f
 
 
dcb2915
 
 
 
 
 
 
 
f2dc9cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dcb2915
 
 
 
 
 
 
 
 
 
 
 
 
5cda21f
 
 
 
dcb2915
 
 
 
 
 
 
 
 
 
 
 
 
 
26739ca
dcb2915
 
 
 
 
 
 
667fb71
bbf5058
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
667fb71
bbf5058
 
 
667fb71
bbf5058
 
 
 
 
 
667fb71
bbf5058
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dcb2915
 
5cda21f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dcb2915
667fb71
 
 
 
dcb2915
 
 
 
 
 
 
 
 
 
 
26739ca
 
dcb2915
 
562157b
dcb2915
26739ca
dcb2915
 
5cda21f
 
 
dcb2915
 
 
 
 
 
bbf5058
 
 
5cda21f
bbf5058
 
 
dcb2915
 
5cda21f
 
 
 
 
 
 
 
667fb71
 
 
dcb2915
 
667fb71
5cda21f
dcb2915
 
 
 
 
 
 
 
 
 
 
562157b
dcb2915
 
 
 
 
 
 
562157b
dcb2915
 
 
 
 
562157b
dcb2915
 
 
 
 
 
 
26739ca
 
562157b
26739ca
 
 
 
 
 
562157b
26739ca
 
 
 
dcb2915
 
 
 
 
 
667fb71
 
 
 
 
 
 
 
 
 
 
 
 
 
dcb2915
 
5cda21f
26739ca
 
 
 
 
 
 
 
 
 
 
 
 
 
dcb2915
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
"""

app.py β€” Multi-source OSINT Analyst Space

Agentic loop powered by smolagents + HuggingFace Inference API.



Required Space Secrets:

  ACLED_USERNAME  β€” your myACLED account email (from https://developer.acleddata.com)

  ACLED_PASSWORD  β€” your myACLED account password

  HF_TOKEN        β€” HuggingFace token (for Inference API, set automatically in Spaces)

"""

import os
import time
from datetime import datetime
from typing import Optional

import gradio as gr
from smolagents import InferenceClientModel, ToolCallingAgent

from tools import fetch_acled_events, fetch_rss_headlines, list_available_sources, fetch_travel_advisory
from brief import (
    BRIEF_PROMPT_SCHEMA,
    ThreatBrief,
    parse_brief_from_llm,
    render_brief_html,
)
from export import generate_pdf

# ---------------------------------------------------------------------------
# Model + Agent setup
# ---------------------------------------------------------------------------

MODEL_ID = "Qwen/Qwen2.5-72B-Instruct"   # Strong free model on HF Inference

def build_agent() -> ToolCallingAgent:
    model = InferenceClientModel(
        model_id=MODEL_ID,
        token=os.environ.get("HF_TOKEN"),
        timeout=90,             # per-call timeout in seconds
    )
    agent = ToolCallingAgent(
        tools=[fetch_acled_events, fetch_rss_headlines, list_available_sources, fetch_travel_advisory],
        model=model,
        max_steps=12,
        verbosity_level=1,
    )
    return agent


# ---------------------------------------------------------------------------
# Core analysis function
# ---------------------------------------------------------------------------

SYSTEM_PROMPT = """You are a professional OSINT intelligence analyst specializing

in geopolitical conflict and security threat assessment. Your job is to:



1. Call the available tools to gather data from ACLED and RSS news sources.

2. Collect enough information to assess the security situation.

3. Synthesize your findings into a structured threat brief.



Always start by checking what sources are available if needed.

Be thorough β€” use multiple sources before drawing conclusions.

"""


def run_analysis(

    country: str,

    passport_country: str,

    rss_sources: list,

    days_back: int,

    progress=gr.Progress(),

) -> tuple:
    """

    Runs the agentic OSINT analysis loop and returns:

    - Structured HTML threat brief

    - Raw agent trace for transparency

    - ThreatBrief object (stored in gr.State for PDF export)

    """
    if not country or not str(country).strip():
        return "<p style='color:red'>Please select a country or region.</p>", "", None
    country = str(country).strip()

    progress(0.1, desc="Initializing agent...")

    sources_str = ",".join(rss_sources) if rss_sources else "reuters_world,bbc_world"

    include_embassy = bool(passport_country and passport_country != "Not specified")

    embassy_instruction = (
        f"4. REQUIRED β€” Populate the 'embassy' JSON field with the {passport_country} embassy "
        f"or nearest consulate in '{country}'. Include: name, street address, main phone number, "
        f"after-hours emergency phone, and official website URL.\n"
        if include_embassy else ""
    )
    step_analyse = 5 if include_embassy else 4
    step_output  = 6 if include_embassy else 5

    task = f"""

Conduct an OSINT threat assessment for: {country}

{f"Traveller passport country: {passport_country}" if include_embassy else ""}



Instructions:

1. Fetch ACLED armed conflict events for '{country}' over the last {days_back} days.

2. Fetch recent RSS news headlines related to '{country}' from these sources: {sources_str}.

3. REQUIRED β€” Call fetch_travel_advisory for '{country}' and include the result in the travel_advisory fields.

{embassy_instruction}{step_analyse}. Analyse all collected data carefully.

{step_output}. Produce your final output as ONLY a JSON threat brief matching this schema:



{BRIEF_PROMPT_SCHEMA}



Today's date: {datetime.utcnow().strftime('%Y-%m-%d')}

"""

    progress(0.2, desc="Agent gathering OSINT data...")

    raw_output = None
    last_error = None
    for attempt in range(1, 4):          # up to 3 attempts
        try:
            agent = build_agent()
            raw_output = agent.run(task, additional_args={"system_prompt": SYSTEM_PROMPT})
            break                        # success β€” exit retry loop
        except Exception as e:
            last_error = e
            err_str = str(e).lower()
            is_timeout = any(k in err_str for k in ("504", "timeout", "gateway", "timed out"))
            if is_timeout and attempt < 3:
                progress(0.2 + attempt * 0.1, desc=f"HF API timeout β€” retrying (attempt {attempt + 1}/3)...")
                time.sleep(5 * attempt)  # 5s, then 10s back-off
                continue
            # Non-retryable error or final attempt β€” surface it
            error_html = f"""

            <div style='padding:20px;background:#fff3f3;border:1px solid #cc0000;border-radius:8px'>

                <strong>Analysis failed (attempt {attempt}/3):</strong><br>

                <code style='font-size:0.85em'>{e}</code><br><br>

                <em>If you see a 504 / gateway timeout, the HF Inference API is under heavy load.

                Wait a minute and try again, or reduce the number of selected news sources.</em>

            </div>

            """
            return error_html, str(e), None

    if raw_output is None:
        return "<p style='color:red'>Analysis failed after 3 attempts. Please try again later.</p>", str(last_error), None

    progress(0.85, desc="Parsing intelligence brief...")

    # Parse the agent's final output into a structured brief
    if isinstance(raw_output, str):
        brief = parse_brief_from_llm(raw_output)
    else:
        brief = ThreatBrief(
            narrative_summary=str(raw_output),
            severity="Unknown",
            confidence="Low",
        )

    # Stamp passport country so PDF / HTML renderer can use it even if LLM omitted it
    if passport_country and passport_country != "Not specified":
        brief.passport_country = passport_country

    progress(0.95, desc="Rendering brief...")

    html_output = render_brief_html(brief)

    # Build a plain-text trace for the "Raw Trace" tab
    trace_lines = [f"=== OSINT Analysis: {country} ==="]
    trace_lines.append(f"Model: {MODEL_ID}")
    trace_lines.append(f"Sources: ACLED + {sources_str}")
    trace_lines.append(f"Days back: {days_back}")
    trace_lines.append(f"Date: {datetime.utcnow().strftime('%Y-%m-%d %H:%M UTC')}")
    trace_lines.append("\n--- Raw Agent Output ---")
    trace_lines.append(str(raw_output))

    progress(1.0, desc="Done.")
    return html_output, "\n".join(trace_lines), brief


# ---------------------------------------------------------------------------
# Gradio UI
# ---------------------------------------------------------------------------

RSS_SOURCE_OPTIONS = [
    # General world news
    ("BBC World",              "bbc_world"),
    ("Al Jazeera",             "al_jazeera"),
    ("France 24",              "france24"),
    ("Euronews",               "euronews"),
    ("NPR World",              "npr_world"),
    ("Sky News",               "sky_news"),
    ("UN News",                "un_news"),
    ("Intl Business Times",    "ibt"),
    # Regional: Middle East
    ("Middle East Eye",        "middle_east_eye"),
    ("Al-Monitor",             "al_monitor"),
    ("Arab News",              "arab_news"),
    # Regional: Africa
    ("AllAfrica",              "allafrica"),
    # Regional: Asia-Pacific
    ("Radio Free Asia",        "radio_free_asia"),
    ("S. China Morning Post",  "scmp"),
    # Regional: South Asia
    ("Dawn (Pakistan)",        "dawn"),
    # Regional: Russia / E. Europe
    ("The Moscow Times",       "moscow_times"),
    # OSINT / investigative
    ("Bellingcat",             "bellingcat"),
    ("The Intercept",          "the_intercept"),
    ("OCCRP",                  "occrp"),
    # Policy / security analysis
    ("Crisis Group",           "crisis_group"),
    ("War on the Rocks",       "war_on_rocks"),
    ("Just Security",          "just_security"),
    ("Defense One",            "defense_one"),
    ("The Cipher Brief",       "cipher_brief"),
    ("Stimson Center",         "stimson"),
    # Human rights
    ("Human Rights Watch",     "hrw"),
    ("Amnesty Intl",           "amnesty"),
]

DESTINATION_COUNTRIES = [
    "Afghanistan", "Albania", "Algeria", "Andorra", "Angola", "Antigua and Barbuda",
    "Argentina", "Armenia", "Australia", "Austria", "Azerbaijan",
    "Bahamas", "Bahrain", "Bangladesh", "Barbados", "Belarus", "Belgium", "Belize",
    "Benin", "Bhutan", "Bolivia", "Bosnia and Herzegovina", "Botswana", "Brazil",
    "Brunei", "Bulgaria", "Burkina Faso", "Burundi",
    "Cabo Verde", "Cambodia", "Cameroon", "Canada", "Central African Republic", "Chad",
    "Chile", "China", "Colombia", "Comoros", "Congo (Republic)", "Congo (DRC)",
    "Costa Rica", "Croatia", "Cuba", "Cyprus", "Czech Republic",
    "Denmark", "Djibouti", "Dominica", "Dominican Republic",
    "Ecuador", "Egypt", "El Salvador", "Equatorial Guinea", "Eritrea", "Estonia",
    "Eswatini", "Ethiopia",
    "Fiji", "Finland", "France",
    "Gabon", "Gambia", "Georgia", "Germany", "Ghana", "Greece", "Grenada",
    "Guatemala", "Guinea", "Guinea-Bissau", "Guyana",
    "Haiti", "Honduras", "Hungary",
    "Iceland", "India", "Indonesia", "Iran", "Iraq", "Ireland", "Israel", "Italy",
    "Jamaica", "Japan", "Jordan",
    "Kazakhstan", "Kenya", "Kiribati", "Kosovo", "Kuwait", "Kyrgyzstan",
    "Laos", "Latvia", "Lebanon", "Lesotho", "Liberia", "Libya", "Liechtenstein",
    "Lithuania", "Luxembourg",
    "Madagascar", "Malawi", "Malaysia", "Maldives", "Mali", "Malta", "Marshall Islands",
    "Mauritania", "Mauritius", "Mexico", "Micronesia", "Moldova", "Monaco", "Mongolia",
    "Montenegro", "Morocco", "Mozambique", "Myanmar",
    "Namibia", "Nauru", "Nepal", "Netherlands", "New Zealand", "Nicaragua", "Niger",
    "Nigeria", "North Korea", "North Macedonia", "Norway",
    "Oman",
    "Pakistan", "Palau", "Palestine", "Panama", "Papua New Guinea", "Paraguay", "Peru",
    "Philippines", "Poland", "Portugal",
    "Qatar",
    "Romania", "Russia", "Rwanda",
    "Saint Kitts and Nevis", "Saint Lucia", "Saint Vincent and the Grenadines",
    "Samoa", "San Marino", "Sao Tome and Principe", "Saudi Arabia", "Senegal",
    "Serbia", "Seychelles", "Sierra Leone", "Singapore", "Slovakia", "Slovenia",
    "Solomon Islands", "Somalia", "South Africa", "South Korea", "South Sudan", "Spain",
    "Sri Lanka", "Sudan", "Suriname", "Sweden", "Switzerland", "Syria",
    "Taiwan", "Tajikistan", "Tanzania", "Thailand", "Timor-Leste", "Togo", "Tonga",
    "Trinidad and Tobago", "Tunisia", "Turkey", "Turkmenistan", "Tuvalu",
    "Uganda", "Ukraine", "United Arab Emirates", "United Kingdom", "United States",
    "Uruguay", "Uzbekistan",
    "Vanuatu", "Vatican City", "Venezuela", "Vietnam",
    "Yemen",
    "Zambia", "Zimbabwe",
]

PASSPORT_COUNTRIES = [
    "Not specified",
    "Afghanistan", "Albania", "Algeria", "Argentina", "Australia", "Austria",
    "Bangladesh", "Belgium", "Bolivia", "Brazil", "Cambodia", "Canada", "Chile",
    "China", "Colombia", "Croatia", "Czech Republic", "Denmark", "Ecuador", "Egypt",
    "Ethiopia", "Finland", "France", "Germany", "Ghana", "Greece", "Guatemala",
    "Hungary", "India", "Indonesia", "Iran", "Iraq", "Ireland", "Israel", "Italy",
    "Japan", "Jordan", "Kazakhstan", "Kenya", "Kuwait", "Malaysia", "Mexico",
    "Morocco", "Nepal", "Netherlands", "New Zealand", "Nigeria", "Norway",
    "Pakistan", "Peru", "Philippines", "Poland", "Portugal", "Qatar",
    "Romania", "Russia", "Saudi Arabia", "Senegal", "Singapore", "South Africa",
    "South Korea", "Spain", "Sri Lanka", "Sudan", "Sweden", "Switzerland",
    "Taiwan", "Thailand", "Turkey", "Ukraine", "United Arab Emirates",
    "United Kingdom", "United States", "Venezuela", "Vietnam", "Zimbabwe",
]

EXAMPLE_QUERIES = [
    ["Sudan",   ["bbc_world", "al_jazeera", "middle_east_eye", "hrw"], 14],
    ["Myanmar", ["bbc_world", "crisis_group", "radio_free_asia", "hrw"], 21],
    ["Ukraine", ["bbc_world", "npr_world", "sky_news", "war_on_rocks"], 7],
    ["Haiti",   ["bbc_world", "al_jazeera", "un_news", "amnesty"], 14],
]

CSS = """

.gradio-container { max-width: 1100px !important; margin: auto; }

#analyze-btn { background: #1a1a2e; color: white; }

#analyze-btn:hover { background: #16213e; }

footer { display: none !important; }

"""

with gr.Blocks(title="OSINT Threat Analyst", css=CSS, theme=gr.themes.Soft()) as demo:

    brief_state = gr.State(None)

    gr.HTML("""

    <div style="text-align:center;padding:20px 0 10px 0">

      <h1 style="font-size:2em;margin:0">OSINT Threat Analyst</h1>

      <p style="color:#666;margin:6px 0 0 0">

        Agentic multi-source intelligence briefing Β· ACLED + RSS Β· State Dept advisories Β· Powered by HuggingFace

      </p>

    </div>

    <div style="text-align:center;background:#f0f4ff;border:1px solid #c7d4f0;border-radius:8px;padding:12px 24px;max-width:700px;margin:0 auto 16px auto;color:#1a2a5e;font-size:0.95em">

      Enter the country or region you are researching or travelling to, select your preferred news sources, and click <strong>Run Analysis</strong> to generate a full intelligence brief.

    </div>

    """)

    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### Configure Analysis")

            country_input = gr.Dropdown(
                choices=DESTINATION_COUNTRIES,
                value=None,
                label="Country / Region of Interest",
                info="Type to search and select the country you are researching or travelling to.",
                filterable=True,
                allow_custom_value=True,
            )

            passport_input = gr.Dropdown(
                choices=PASSPORT_COUNTRIES,
                value="Not specified",
                label="Passport Country",
                info="Select your passport country to include embassy and consulate information in the report.",
                filterable=True,
            )

            with gr.Row():
                select_all_btn = gr.Button("Deselect All", size="sm", scale=1)

            rss_sources = gr.CheckboxGroup(
                choices=RSS_SOURCE_OPTIONS,
                value=[v for _, v in RSS_SOURCE_OPTIONS],   # all selected by default
                label="News Sources",
            )

            days_back = gr.Slider(
                minimum=7,
                maximum=90,
                value=14,
                step=7,
                label="ACLED lookback window (days)",
            )

            analyze_btn = gr.Button(
                "Run Analysis",
                variant="primary",
                elem_id="analyze-btn",
            )


        with gr.Column(scale=2):
            with gr.Tabs():
                with gr.Tab("Threat Brief"):
                    brief_output = gr.HTML(
                        value="<div style='padding:40px;text-align:center;color:#999'>"
                              "Configure your query and click Run Analysis.</div>"
                    )

                with gr.Tab("Raw Agent Trace"):
                    trace_output = gr.Textbox(
                        label="Agent trace",
                        lines=30,
                        interactive=False,
                        placeholder="Agent reasoning and tool calls will appear here...",
                    )

            with gr.Row():
                export_btn = gr.Button(
                    "Export PDF Report",
                    variant="secondary",
                    interactive=False,
                    scale=1,
                )

            pdf_file = gr.File(
                label="Download PDF Report",
                visible=False,
                interactive=False,
            )

    gr.Examples(
        examples=EXAMPLE_QUERIES,
        inputs=[country_input, rss_sources, days_back],
        label="Example Queries",
    )

    _ALL_SOURCE_KEYS = [v for _, v in RSS_SOURCE_OPTIONS]

    def toggle_sources(current_values):
        """Select all if any are unchecked; deselect all if all are checked."""
        if len(current_values) == len(_ALL_SOURCE_KEYS):
            return gr.update(value=[]), gr.update(value="Select All")
        return gr.update(value=_ALL_SOURCE_KEYS), gr.update(value="Deselect All")

    select_all_btn.click(
        fn=toggle_sources,
        inputs=[rss_sources],
        outputs=[rss_sources, select_all_btn],
    )

    analyze_btn.click(
        fn=run_analysis,
        inputs=[country_input, passport_input, rss_sources, days_back],
        outputs=[brief_output, trace_output, brief_state],
    ).then(
        fn=lambda b: gr.update(interactive=b is not None),
        inputs=[brief_state],
        outputs=[export_btn],
    )

    export_btn.click(
        fn=lambda b: generate_pdf(b) if b is not None else None,
        inputs=[brief_state],
        outputs=[pdf_file],
    ).then(
        fn=lambda: gr.update(visible=True),
        outputs=[pdf_file],
    )

    gr.HTML("""

    <div style="text-align:center;padding:16px;color:#999;font-size:0.8em;margin-top:20px">

      ⚠️ AI-generated from open sources. Not for operational use without verification.

      Built with <a href="https://github.com/huggingface/smolagents">smolagents</a> Β·

      Data: <a href="https://acleddata.com">ACLED</a> + public RSS feeds

    </div>

    """)


if __name__ == "__main__":
    demo.launch()