""" app.py — Multi-source OSINT Analyst Space Agentic loop powered by smolagents + HuggingFace Inference API. Required Space Secrets: ACLED_USERNAME — your myACLED account email (from https://developer.acleddata.com) ACLED_PASSWORD — your myACLED account password HF_TOKEN — HuggingFace token (for Inference API, set automatically in Spaces) """ import os import time from datetime import datetime from typing import Optional import gradio as gr from smolagents import InferenceClientModel, ToolCallingAgent from tools import fetch_acled_events, fetch_rss_headlines, list_available_sources, fetch_travel_advisory from brief import ( BRIEF_PROMPT_SCHEMA, ThreatBrief, parse_brief_from_llm, render_brief_html, ) from export import generate_pdf # --------------------------------------------------------------------------- # Model + Agent setup # --------------------------------------------------------------------------- MODEL_ID = "Qwen/Qwen2.5-72B-Instruct" # Strong free model on HF Inference def build_agent() -> ToolCallingAgent: model = InferenceClientModel( model_id=MODEL_ID, token=os.environ.get("HF_TOKEN"), timeout=90, # per-call timeout in seconds ) agent = ToolCallingAgent( tools=[fetch_acled_events, fetch_rss_headlines, list_available_sources, fetch_travel_advisory], model=model, max_steps=12, verbosity_level=1, ) return agent # --------------------------------------------------------------------------- # Core analysis function # --------------------------------------------------------------------------- SYSTEM_PROMPT = """You are a professional OSINT intelligence analyst specializing in geopolitical conflict and security threat assessment. Your job is to: 1. Call the available tools to gather data from ACLED and RSS news sources. 2. Collect enough information to assess the security situation. 3. Synthesize your findings into a structured threat brief. Always start by checking what sources are available if needed. Be thorough — use multiple sources before drawing conclusions. """ def run_analysis( country: str, passport_country: str, rss_sources: list, days_back: int, progress=gr.Progress(), ) -> tuple: """ Runs the agentic OSINT analysis loop and returns: - Structured HTML threat brief - Raw agent trace for transparency - ThreatBrief object (stored in gr.State for PDF export) """ if not country or not str(country).strip(): return "
Please select a country or region.
", "", None country = str(country).strip() progress(0.1, desc="Initializing agent...") sources_str = ",".join(rss_sources) if rss_sources else "reuters_world,bbc_world" include_embassy = bool(passport_country and passport_country != "Not specified") embassy_instruction = ( f"4. REQUIRED — Populate the 'embassy' JSON field with the {passport_country} embassy " f"or nearest consulate in '{country}'. Include: name, street address, main phone number, " f"after-hours emergency phone, and official website URL.\n" if include_embassy else "" ) step_analyse = 5 if include_embassy else 4 step_output = 6 if include_embassy else 5 task = f""" Conduct an OSINT threat assessment for: {country} {f"Traveller passport country: {passport_country}" if include_embassy else ""} Instructions: 1. Fetch ACLED armed conflict events for '{country}' over the last {days_back} days. 2. Fetch recent RSS news headlines related to '{country}' from these sources: {sources_str}. 3. REQUIRED — Call fetch_travel_advisory for '{country}' and include the result in the travel_advisory fields. {embassy_instruction}{step_analyse}. Analyse all collected data carefully. {step_output}. Produce your final output as ONLY a JSON threat brief matching this schema: {BRIEF_PROMPT_SCHEMA} Today's date: {datetime.utcnow().strftime('%Y-%m-%d')} """ progress(0.2, desc="Agent gathering OSINT data...") raw_output = None last_error = None for attempt in range(1, 4): # up to 3 attempts try: agent = build_agent() raw_output = agent.run(task, additional_args={"system_prompt": SYSTEM_PROMPT}) break # success — exit retry loop except Exception as e: last_error = e err_str = str(e).lower() is_timeout = any(k in err_str for k in ("504", "timeout", "gateway", "timed out")) if is_timeout and attempt < 3: progress(0.2 + attempt * 0.1, desc=f"HF API timeout — retrying (attempt {attempt + 1}/3)...") time.sleep(5 * attempt) # 5s, then 10s back-off continue # Non-retryable error or final attempt — surface it error_html = f"""{e}Analysis failed after 3 attempts. Please try again later.
", str(last_error), None progress(0.85, desc="Parsing intelligence brief...") # Parse the agent's final output into a structured brief if isinstance(raw_output, str): brief = parse_brief_from_llm(raw_output) else: brief = ThreatBrief( narrative_summary=str(raw_output), severity="Unknown", confidence="Low", ) # Stamp passport country so PDF / HTML renderer can use it even if LLM omitted it if passport_country and passport_country != "Not specified": brief.passport_country = passport_country progress(0.95, desc="Rendering brief...") html_output = render_brief_html(brief) # Build a plain-text trace for the "Raw Trace" tab trace_lines = [f"=== OSINT Analysis: {country} ==="] trace_lines.append(f"Model: {MODEL_ID}") trace_lines.append(f"Sources: ACLED + {sources_str}") trace_lines.append(f"Days back: {days_back}") trace_lines.append(f"Date: {datetime.utcnow().strftime('%Y-%m-%d %H:%M UTC')}") trace_lines.append("\n--- Raw Agent Output ---") trace_lines.append(str(raw_output)) progress(1.0, desc="Done.") return html_output, "\n".join(trace_lines), brief # --------------------------------------------------------------------------- # Gradio UI # --------------------------------------------------------------------------- RSS_SOURCE_OPTIONS = [ # General world news ("BBC World", "bbc_world"), ("Al Jazeera", "al_jazeera"), ("France 24", "france24"), ("Euronews", "euronews"), ("NPR World", "npr_world"), ("Sky News", "sky_news"), ("UN News", "un_news"), ("Intl Business Times", "ibt"), # Regional: Middle East ("Middle East Eye", "middle_east_eye"), ("Al-Monitor", "al_monitor"), ("Arab News", "arab_news"), # Regional: Africa ("AllAfrica", "allafrica"), # Regional: Asia-Pacific ("Radio Free Asia", "radio_free_asia"), ("S. China Morning Post", "scmp"), # Regional: South Asia ("Dawn (Pakistan)", "dawn"), # Regional: Russia / E. Europe ("The Moscow Times", "moscow_times"), # OSINT / investigative ("Bellingcat", "bellingcat"), ("The Intercept", "the_intercept"), ("OCCRP", "occrp"), # Policy / security analysis ("Crisis Group", "crisis_group"), ("War on the Rocks", "war_on_rocks"), ("Just Security", "just_security"), ("Defense One", "defense_one"), ("The Cipher Brief", "cipher_brief"), ("Stimson Center", "stimson"), # Human rights ("Human Rights Watch", "hrw"), ("Amnesty Intl", "amnesty"), ] DESTINATION_COUNTRIES = [ "Afghanistan", "Albania", "Algeria", "Andorra", "Angola", "Antigua and Barbuda", "Argentina", "Armenia", "Australia", "Austria", "Azerbaijan", "Bahamas", "Bahrain", "Bangladesh", "Barbados", "Belarus", "Belgium", "Belize", "Benin", "Bhutan", "Bolivia", "Bosnia and Herzegovina", "Botswana", "Brazil", "Brunei", "Bulgaria", "Burkina Faso", "Burundi", "Cabo Verde", "Cambodia", "Cameroon", "Canada", "Central African Republic", "Chad", "Chile", "China", "Colombia", "Comoros", "Congo (Republic)", "Congo (DRC)", "Costa Rica", "Croatia", "Cuba", "Cyprus", "Czech Republic", "Denmark", "Djibouti", "Dominica", "Dominican Republic", "Ecuador", "Egypt", "El Salvador", "Equatorial Guinea", "Eritrea", "Estonia", "Eswatini", "Ethiopia", "Fiji", "Finland", "France", "Gabon", "Gambia", "Georgia", "Germany", "Ghana", "Greece", "Grenada", "Guatemala", "Guinea", "Guinea-Bissau", "Guyana", "Haiti", "Honduras", "Hungary", "Iceland", "India", "Indonesia", "Iran", "Iraq", "Ireland", "Israel", "Italy", "Jamaica", "Japan", "Jordan", "Kazakhstan", "Kenya", "Kiribati", "Kosovo", "Kuwait", "Kyrgyzstan", "Laos", "Latvia", "Lebanon", "Lesotho", "Liberia", "Libya", "Liechtenstein", "Lithuania", "Luxembourg", "Madagascar", "Malawi", "Malaysia", "Maldives", "Mali", "Malta", "Marshall Islands", "Mauritania", "Mauritius", "Mexico", "Micronesia", "Moldova", "Monaco", "Mongolia", "Montenegro", "Morocco", "Mozambique", "Myanmar", "Namibia", "Nauru", "Nepal", "Netherlands", "New Zealand", "Nicaragua", "Niger", "Nigeria", "North Korea", "North Macedonia", "Norway", "Oman", "Pakistan", "Palau", "Palestine", "Panama", "Papua New Guinea", "Paraguay", "Peru", "Philippines", "Poland", "Portugal", "Qatar", "Romania", "Russia", "Rwanda", "Saint Kitts and Nevis", "Saint Lucia", "Saint Vincent and the Grenadines", "Samoa", "San Marino", "Sao Tome and Principe", "Saudi Arabia", "Senegal", "Serbia", "Seychelles", "Sierra Leone", "Singapore", "Slovakia", "Slovenia", "Solomon Islands", "Somalia", "South Africa", "South Korea", "South Sudan", "Spain", "Sri Lanka", "Sudan", "Suriname", "Sweden", "Switzerland", "Syria", "Taiwan", "Tajikistan", "Tanzania", "Thailand", "Timor-Leste", "Togo", "Tonga", "Trinidad and Tobago", "Tunisia", "Turkey", "Turkmenistan", "Tuvalu", "Uganda", "Ukraine", "United Arab Emirates", "United Kingdom", "United States", "Uruguay", "Uzbekistan", "Vanuatu", "Vatican City", "Venezuela", "Vietnam", "Yemen", "Zambia", "Zimbabwe", ] PASSPORT_COUNTRIES = [ "Not specified", "Afghanistan", "Albania", "Algeria", "Argentina", "Australia", "Austria", "Bangladesh", "Belgium", "Bolivia", "Brazil", "Cambodia", "Canada", "Chile", "China", "Colombia", "Croatia", "Czech Republic", "Denmark", "Ecuador", "Egypt", "Ethiopia", "Finland", "France", "Germany", "Ghana", "Greece", "Guatemala", "Hungary", "India", "Indonesia", "Iran", "Iraq", "Ireland", "Israel", "Italy", "Japan", "Jordan", "Kazakhstan", "Kenya", "Kuwait", "Malaysia", "Mexico", "Morocco", "Nepal", "Netherlands", "New Zealand", "Nigeria", "Norway", "Pakistan", "Peru", "Philippines", "Poland", "Portugal", "Qatar", "Romania", "Russia", "Saudi Arabia", "Senegal", "Singapore", "South Africa", "South Korea", "Spain", "Sri Lanka", "Sudan", "Sweden", "Switzerland", "Taiwan", "Thailand", "Turkey", "Ukraine", "United Arab Emirates", "United Kingdom", "United States", "Venezuela", "Vietnam", "Zimbabwe", ] EXAMPLE_QUERIES = [ ["Sudan", ["bbc_world", "al_jazeera", "middle_east_eye", "hrw"], 14], ["Myanmar", ["bbc_world", "crisis_group", "radio_free_asia", "hrw"], 21], ["Ukraine", ["bbc_world", "npr_world", "sky_news", "war_on_rocks"], 7], ["Haiti", ["bbc_world", "al_jazeera", "un_news", "amnesty"], 14], ] CSS = """ .gradio-container { max-width: 1100px !important; margin: auto; } #analyze-btn { background: #1a1a2e; color: white; } #analyze-btn:hover { background: #16213e; } footer { display: none !important; } """ with gr.Blocks(title="OSINT Threat Analyst", css=CSS, theme=gr.themes.Soft()) as demo: brief_state = gr.State(None) gr.HTML("""Agentic multi-source intelligence briefing · ACLED + RSS · State Dept advisories · Powered by HuggingFace