Spaces:
Running
Running
| """ | |
| app.py — Multi-source OSINT Analyst Space | |
| Agentic loop powered by smolagents + HuggingFace Inference API. | |
| Required Space Secrets: | |
| ACLED_USERNAME — your myACLED account email (from https://developer.acleddata.com) | |
| ACLED_PASSWORD — your myACLED account password | |
| HF_TOKEN — HuggingFace token (for Inference API, set automatically in Spaces) | |
| """ | |
| import os | |
| import time | |
| from datetime import datetime | |
| from typing import Optional | |
| import gradio as gr | |
| from smolagents import InferenceClientModel, ToolCallingAgent | |
| from tools import fetch_acled_events, fetch_rss_headlines, list_available_sources, fetch_travel_advisory | |
| from brief import ( | |
| BRIEF_PROMPT_SCHEMA, | |
| ThreatBrief, | |
| parse_brief_from_llm, | |
| render_brief_html, | |
| ) | |
| from export import generate_pdf | |
| # --------------------------------------------------------------------------- | |
| # Model + Agent setup | |
| # --------------------------------------------------------------------------- | |
| MODEL_ID = "Qwen/Qwen2.5-72B-Instruct" # Strong free model on HF Inference | |
| def build_agent() -> ToolCallingAgent: | |
| model = InferenceClientModel( | |
| model_id=MODEL_ID, | |
| token=os.environ.get("HF_TOKEN"), | |
| timeout=90, # per-call timeout in seconds | |
| ) | |
| agent = ToolCallingAgent( | |
| tools=[fetch_acled_events, fetch_rss_headlines, list_available_sources, fetch_travel_advisory], | |
| model=model, | |
| max_steps=12, | |
| verbosity_level=1, | |
| ) | |
| return agent | |
| # --------------------------------------------------------------------------- | |
| # Core analysis function | |
| # --------------------------------------------------------------------------- | |
| SYSTEM_PROMPT = """You are a professional OSINT intelligence analyst specializing | |
| in geopolitical conflict and security threat assessment. Your job is to: | |
| 1. Call the available tools to gather data from ACLED and RSS news sources. | |
| 2. Collect enough information to assess the security situation. | |
| 3. Synthesize your findings into a structured threat brief. | |
| Always start by checking what sources are available if needed. | |
| Be thorough — use multiple sources before drawing conclusions. | |
| """ | |
| def run_analysis( | |
| country: str, | |
| passport_country: str, | |
| rss_sources: list, | |
| days_back: int, | |
| progress=gr.Progress(), | |
| ) -> tuple: | |
| """ | |
| Runs the agentic OSINT analysis loop and returns: | |
| - Structured HTML threat brief | |
| - Raw agent trace for transparency | |
| - ThreatBrief object (stored in gr.State for PDF export) | |
| """ | |
| if not country or not str(country).strip(): | |
| return "<p style='color:red'>Please select a country or region.</p>", "", None | |
| country = str(country).strip() | |
| progress(0.1, desc="Initializing agent...") | |
| sources_str = ",".join(rss_sources) if rss_sources else "reuters_world,bbc_world" | |
| include_embassy = bool(passport_country and passport_country != "Not specified") | |
| embassy_instruction = ( | |
| f"4. REQUIRED — Populate the 'embassy' JSON field with the {passport_country} embassy " | |
| f"or nearest consulate in '{country}'. Include: name, street address, main phone number, " | |
| f"after-hours emergency phone, and official website URL.\n" | |
| if include_embassy else "" | |
| ) | |
| step_analyse = 5 if include_embassy else 4 | |
| step_output = 6 if include_embassy else 5 | |
| task = f""" | |
| Conduct an OSINT threat assessment for: {country} | |
| {f"Traveller passport country: {passport_country}" if include_embassy else ""} | |
| Instructions: | |
| 1. Fetch ACLED armed conflict events for '{country}' over the last {days_back} days. | |
| 2. Fetch recent RSS news headlines related to '{country}' from these sources: {sources_str}. | |
| 3. REQUIRED — Call fetch_travel_advisory for '{country}' and include the result in the travel_advisory fields. | |
| {embassy_instruction}{step_analyse}. Analyse all collected data carefully. | |
| {step_output}. Produce your final output as ONLY a JSON threat brief matching this schema: | |
| {BRIEF_PROMPT_SCHEMA} | |
| Today's date: {datetime.utcnow().strftime('%Y-%m-%d')} | |
| """ | |
| progress(0.2, desc="Agent gathering OSINT data...") | |
| raw_output = None | |
| last_error = None | |
| for attempt in range(1, 4): # up to 3 attempts | |
| try: | |
| agent = build_agent() | |
| raw_output = agent.run(task, additional_args={"system_prompt": SYSTEM_PROMPT}) | |
| break # success — exit retry loop | |
| except Exception as e: | |
| last_error = e | |
| err_str = str(e).lower() | |
| is_timeout = any(k in err_str for k in ("504", "timeout", "gateway", "timed out")) | |
| if is_timeout and attempt < 3: | |
| progress(0.2 + attempt * 0.1, desc=f"HF API timeout — retrying (attempt {attempt + 1}/3)...") | |
| time.sleep(5 * attempt) # 5s, then 10s back-off | |
| continue | |
| # Non-retryable error or final attempt — surface it | |
| error_html = f""" | |
| <div style='padding:20px;background:#fff3f3;border:1px solid #cc0000;border-radius:8px'> | |
| <strong>Analysis failed (attempt {attempt}/3):</strong><br> | |
| <code style='font-size:0.85em'>{e}</code><br><br> | |
| <em>If you see a 504 / gateway timeout, the HF Inference API is under heavy load. | |
| Wait a minute and try again, or reduce the number of selected news sources.</em> | |
| </div> | |
| """ | |
| return error_html, str(e), None | |
| if raw_output is None: | |
| return "<p style='color:red'>Analysis failed after 3 attempts. Please try again later.</p>", str(last_error), None | |
| progress(0.85, desc="Parsing intelligence brief...") | |
| # Parse the agent's final output into a structured brief | |
| if isinstance(raw_output, str): | |
| brief = parse_brief_from_llm(raw_output) | |
| else: | |
| brief = ThreatBrief( | |
| narrative_summary=str(raw_output), | |
| severity="Unknown", | |
| confidence="Low", | |
| ) | |
| # Stamp passport country so PDF / HTML renderer can use it even if LLM omitted it | |
| if passport_country and passport_country != "Not specified": | |
| brief.passport_country = passport_country | |
| progress(0.95, desc="Rendering brief...") | |
| html_output = render_brief_html(brief) | |
| # Build a plain-text trace for the "Raw Trace" tab | |
| trace_lines = [f"=== OSINT Analysis: {country} ==="] | |
| trace_lines.append(f"Model: {MODEL_ID}") | |
| trace_lines.append(f"Sources: ACLED + {sources_str}") | |
| trace_lines.append(f"Days back: {days_back}") | |
| trace_lines.append(f"Date: {datetime.utcnow().strftime('%Y-%m-%d %H:%M UTC')}") | |
| trace_lines.append("\n--- Raw Agent Output ---") | |
| trace_lines.append(str(raw_output)) | |
| progress(1.0, desc="Done.") | |
| return html_output, "\n".join(trace_lines), brief | |
| # --------------------------------------------------------------------------- | |
| # Gradio UI | |
| # --------------------------------------------------------------------------- | |
| RSS_SOURCE_OPTIONS = [ | |
| # General world news | |
| ("BBC World", "bbc_world"), | |
| ("Al Jazeera", "al_jazeera"), | |
| ("France 24", "france24"), | |
| ("Euronews", "euronews"), | |
| ("NPR World", "npr_world"), | |
| ("Sky News", "sky_news"), | |
| ("UN News", "un_news"), | |
| ("Intl Business Times", "ibt"), | |
| # Regional: Middle East | |
| ("Middle East Eye", "middle_east_eye"), | |
| ("Al-Monitor", "al_monitor"), | |
| ("Arab News", "arab_news"), | |
| # Regional: Africa | |
| ("AllAfrica", "allafrica"), | |
| # Regional: Asia-Pacific | |
| ("Radio Free Asia", "radio_free_asia"), | |
| ("S. China Morning Post", "scmp"), | |
| # Regional: South Asia | |
| ("Dawn (Pakistan)", "dawn"), | |
| # Regional: Russia / E. Europe | |
| ("The Moscow Times", "moscow_times"), | |
| # OSINT / investigative | |
| ("Bellingcat", "bellingcat"), | |
| ("The Intercept", "the_intercept"), | |
| ("OCCRP", "occrp"), | |
| # Policy / security analysis | |
| ("Crisis Group", "crisis_group"), | |
| ("War on the Rocks", "war_on_rocks"), | |
| ("Just Security", "just_security"), | |
| ("Defense One", "defense_one"), | |
| ("The Cipher Brief", "cipher_brief"), | |
| ("Stimson Center", "stimson"), | |
| # Human rights | |
| ("Human Rights Watch", "hrw"), | |
| ("Amnesty Intl", "amnesty"), | |
| ] | |
| DESTINATION_COUNTRIES = [ | |
| "Afghanistan", "Albania", "Algeria", "Andorra", "Angola", "Antigua and Barbuda", | |
| "Argentina", "Armenia", "Australia", "Austria", "Azerbaijan", | |
| "Bahamas", "Bahrain", "Bangladesh", "Barbados", "Belarus", "Belgium", "Belize", | |
| "Benin", "Bhutan", "Bolivia", "Bosnia and Herzegovina", "Botswana", "Brazil", | |
| "Brunei", "Bulgaria", "Burkina Faso", "Burundi", | |
| "Cabo Verde", "Cambodia", "Cameroon", "Canada", "Central African Republic", "Chad", | |
| "Chile", "China", "Colombia", "Comoros", "Congo (Republic)", "Congo (DRC)", | |
| "Costa Rica", "Croatia", "Cuba", "Cyprus", "Czech Republic", | |
| "Denmark", "Djibouti", "Dominica", "Dominican Republic", | |
| "Ecuador", "Egypt", "El Salvador", "Equatorial Guinea", "Eritrea", "Estonia", | |
| "Eswatini", "Ethiopia", | |
| "Fiji", "Finland", "France", | |
| "Gabon", "Gambia", "Georgia", "Germany", "Ghana", "Greece", "Grenada", | |
| "Guatemala", "Guinea", "Guinea-Bissau", "Guyana", | |
| "Haiti", "Honduras", "Hungary", | |
| "Iceland", "India", "Indonesia", "Iran", "Iraq", "Ireland", "Israel", "Italy", | |
| "Jamaica", "Japan", "Jordan", | |
| "Kazakhstan", "Kenya", "Kiribati", "Kosovo", "Kuwait", "Kyrgyzstan", | |
| "Laos", "Latvia", "Lebanon", "Lesotho", "Liberia", "Libya", "Liechtenstein", | |
| "Lithuania", "Luxembourg", | |
| "Madagascar", "Malawi", "Malaysia", "Maldives", "Mali", "Malta", "Marshall Islands", | |
| "Mauritania", "Mauritius", "Mexico", "Micronesia", "Moldova", "Monaco", "Mongolia", | |
| "Montenegro", "Morocco", "Mozambique", "Myanmar", | |
| "Namibia", "Nauru", "Nepal", "Netherlands", "New Zealand", "Nicaragua", "Niger", | |
| "Nigeria", "North Korea", "North Macedonia", "Norway", | |
| "Oman", | |
| "Pakistan", "Palau", "Palestine", "Panama", "Papua New Guinea", "Paraguay", "Peru", | |
| "Philippines", "Poland", "Portugal", | |
| "Qatar", | |
| "Romania", "Russia", "Rwanda", | |
| "Saint Kitts and Nevis", "Saint Lucia", "Saint Vincent and the Grenadines", | |
| "Samoa", "San Marino", "Sao Tome and Principe", "Saudi Arabia", "Senegal", | |
| "Serbia", "Seychelles", "Sierra Leone", "Singapore", "Slovakia", "Slovenia", | |
| "Solomon Islands", "Somalia", "South Africa", "South Korea", "South Sudan", "Spain", | |
| "Sri Lanka", "Sudan", "Suriname", "Sweden", "Switzerland", "Syria", | |
| "Taiwan", "Tajikistan", "Tanzania", "Thailand", "Timor-Leste", "Togo", "Tonga", | |
| "Trinidad and Tobago", "Tunisia", "Turkey", "Turkmenistan", "Tuvalu", | |
| "Uganda", "Ukraine", "United Arab Emirates", "United Kingdom", "United States", | |
| "Uruguay", "Uzbekistan", | |
| "Vanuatu", "Vatican City", "Venezuela", "Vietnam", | |
| "Yemen", | |
| "Zambia", "Zimbabwe", | |
| ] | |
| PASSPORT_COUNTRIES = [ | |
| "Not specified", | |
| "Afghanistan", "Albania", "Algeria", "Argentina", "Australia", "Austria", | |
| "Bangladesh", "Belgium", "Bolivia", "Brazil", "Cambodia", "Canada", "Chile", | |
| "China", "Colombia", "Croatia", "Czech Republic", "Denmark", "Ecuador", "Egypt", | |
| "Ethiopia", "Finland", "France", "Germany", "Ghana", "Greece", "Guatemala", | |
| "Hungary", "India", "Indonesia", "Iran", "Iraq", "Ireland", "Israel", "Italy", | |
| "Japan", "Jordan", "Kazakhstan", "Kenya", "Kuwait", "Malaysia", "Mexico", | |
| "Morocco", "Nepal", "Netherlands", "New Zealand", "Nigeria", "Norway", | |
| "Pakistan", "Peru", "Philippines", "Poland", "Portugal", "Qatar", | |
| "Romania", "Russia", "Saudi Arabia", "Senegal", "Singapore", "South Africa", | |
| "South Korea", "Spain", "Sri Lanka", "Sudan", "Sweden", "Switzerland", | |
| "Taiwan", "Thailand", "Turkey", "Ukraine", "United Arab Emirates", | |
| "United Kingdom", "United States", "Venezuela", "Vietnam", "Zimbabwe", | |
| ] | |
| EXAMPLE_QUERIES = [ | |
| ["Sudan", ["bbc_world", "al_jazeera", "middle_east_eye", "hrw"], 14], | |
| ["Myanmar", ["bbc_world", "crisis_group", "radio_free_asia", "hrw"], 21], | |
| ["Ukraine", ["bbc_world", "npr_world", "sky_news", "war_on_rocks"], 7], | |
| ["Haiti", ["bbc_world", "al_jazeera", "un_news", "amnesty"], 14], | |
| ] | |
| CSS = """ | |
| .gradio-container { max-width: 1100px !important; margin: auto; } | |
| #analyze-btn { background: #1a1a2e; color: white; } | |
| #analyze-btn:hover { background: #16213e; } | |
| footer { display: none !important; } | |
| """ | |
| with gr.Blocks(title="OSINT Threat Analyst", css=CSS, theme=gr.themes.Soft()) as demo: | |
| brief_state = gr.State(None) | |
| gr.HTML(""" | |
| <div style="text-align:center;padding:20px 0 10px 0"> | |
| <h1 style="font-size:2em;margin:0">OSINT Threat Analyst</h1> | |
| <p style="color:#666;margin:6px 0 0 0"> | |
| Agentic multi-source intelligence briefing · ACLED + RSS · State Dept advisories · Powered by HuggingFace | |
| </p> | |
| </div> | |
| <div style="text-align:center;background:#f0f4ff;border:1px solid #c7d4f0;border-radius:8px;padding:12px 24px;max-width:700px;margin:0 auto 16px auto;color:#1a2a5e;font-size:0.95em"> | |
| Enter the country or region you are researching or travelling to, select your preferred news sources, and click <strong>Run Analysis</strong> to generate a full intelligence brief. | |
| </div> | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### Configure Analysis") | |
| country_input = gr.Dropdown( | |
| choices=DESTINATION_COUNTRIES, | |
| value=None, | |
| label="Country / Region of Interest", | |
| info="Type to search and select the country you are researching or travelling to.", | |
| filterable=True, | |
| allow_custom_value=True, | |
| ) | |
| passport_input = gr.Dropdown( | |
| choices=PASSPORT_COUNTRIES, | |
| value="Not specified", | |
| label="Passport Country", | |
| info="Select your passport country to include embassy and consulate information in the report.", | |
| filterable=True, | |
| ) | |
| with gr.Row(): | |
| select_all_btn = gr.Button("Deselect All", size="sm", scale=1) | |
| rss_sources = gr.CheckboxGroup( | |
| choices=RSS_SOURCE_OPTIONS, | |
| value=[v for _, v in RSS_SOURCE_OPTIONS], # all selected by default | |
| label="News Sources", | |
| ) | |
| days_back = gr.Slider( | |
| minimum=7, | |
| maximum=90, | |
| value=14, | |
| step=7, | |
| label="ACLED lookback window (days)", | |
| ) | |
| analyze_btn = gr.Button( | |
| "Run Analysis", | |
| variant="primary", | |
| elem_id="analyze-btn", | |
| ) | |
| with gr.Column(scale=2): | |
| with gr.Tabs(): | |
| with gr.Tab("Threat Brief"): | |
| brief_output = gr.HTML( | |
| value="<div style='padding:40px;text-align:center;color:#999'>" | |
| "Configure your query and click Run Analysis.</div>" | |
| ) | |
| with gr.Tab("Raw Agent Trace"): | |
| trace_output = gr.Textbox( | |
| label="Agent trace", | |
| lines=30, | |
| interactive=False, | |
| placeholder="Agent reasoning and tool calls will appear here...", | |
| ) | |
| with gr.Row(): | |
| export_btn = gr.Button( | |
| "Export PDF Report", | |
| variant="secondary", | |
| interactive=False, | |
| scale=1, | |
| ) | |
| pdf_file = gr.File( | |
| label="Download PDF Report", | |
| visible=False, | |
| interactive=False, | |
| ) | |
| gr.Examples( | |
| examples=EXAMPLE_QUERIES, | |
| inputs=[country_input, rss_sources, days_back], | |
| label="Example Queries", | |
| ) | |
| _ALL_SOURCE_KEYS = [v for _, v in RSS_SOURCE_OPTIONS] | |
| def toggle_sources(current_values): | |
| """Select all if any are unchecked; deselect all if all are checked.""" | |
| if len(current_values) == len(_ALL_SOURCE_KEYS): | |
| return gr.update(value=[]), gr.update(value="Select All") | |
| return gr.update(value=_ALL_SOURCE_KEYS), gr.update(value="Deselect All") | |
| select_all_btn.click( | |
| fn=toggle_sources, | |
| inputs=[rss_sources], | |
| outputs=[rss_sources, select_all_btn], | |
| ) | |
| analyze_btn.click( | |
| fn=run_analysis, | |
| inputs=[country_input, passport_input, rss_sources, days_back], | |
| outputs=[brief_output, trace_output, brief_state], | |
| ).then( | |
| fn=lambda b: gr.update(interactive=b is not None), | |
| inputs=[brief_state], | |
| outputs=[export_btn], | |
| ) | |
| export_btn.click( | |
| fn=lambda b: generate_pdf(b) if b is not None else None, | |
| inputs=[brief_state], | |
| outputs=[pdf_file], | |
| ).then( | |
| fn=lambda: gr.update(visible=True), | |
| outputs=[pdf_file], | |
| ) | |
| gr.HTML(""" | |
| <div style="text-align:center;padding:16px;color:#999;font-size:0.8em;margin-top:20px"> | |
| ⚠️ AI-generated from open sources. Not for operational use without verification. | |
| Built with <a href="https://github.com/huggingface/smolagents">smolagents</a> · | |
| Data: <a href="https://acleddata.com">ACLED</a> + public RSS feeds | |
| </div> | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() | |