OSINTTool / app.py
Firemedic15's picture
Upload 7 files
bbf5058 verified
"""
app.py — Multi-source OSINT Analyst Space
Agentic loop powered by smolagents + HuggingFace Inference API.
Required Space Secrets:
ACLED_USERNAME — your myACLED account email (from https://developer.acleddata.com)
ACLED_PASSWORD — your myACLED account password
HF_TOKEN — HuggingFace token (for Inference API, set automatically in Spaces)
"""
import os
import time
from datetime import datetime
from typing import Optional
import gradio as gr
from smolagents import InferenceClientModel, ToolCallingAgent
from tools import fetch_acled_events, fetch_rss_headlines, list_available_sources, fetch_travel_advisory
from brief import (
BRIEF_PROMPT_SCHEMA,
ThreatBrief,
parse_brief_from_llm,
render_brief_html,
)
from export import generate_pdf
# ---------------------------------------------------------------------------
# Model + Agent setup
# ---------------------------------------------------------------------------
MODEL_ID = "Qwen/Qwen2.5-72B-Instruct" # Strong free model on HF Inference
def build_agent() -> ToolCallingAgent:
model = InferenceClientModel(
model_id=MODEL_ID,
token=os.environ.get("HF_TOKEN"),
timeout=90, # per-call timeout in seconds
)
agent = ToolCallingAgent(
tools=[fetch_acled_events, fetch_rss_headlines, list_available_sources, fetch_travel_advisory],
model=model,
max_steps=12,
verbosity_level=1,
)
return agent
# ---------------------------------------------------------------------------
# Core analysis function
# ---------------------------------------------------------------------------
SYSTEM_PROMPT = """You are a professional OSINT intelligence analyst specializing
in geopolitical conflict and security threat assessment. Your job is to:
1. Call the available tools to gather data from ACLED and RSS news sources.
2. Collect enough information to assess the security situation.
3. Synthesize your findings into a structured threat brief.
Always start by checking what sources are available if needed.
Be thorough — use multiple sources before drawing conclusions.
"""
def run_analysis(
country: str,
passport_country: str,
rss_sources: list,
days_back: int,
progress=gr.Progress(),
) -> tuple:
"""
Runs the agentic OSINT analysis loop and returns:
- Structured HTML threat brief
- Raw agent trace for transparency
- ThreatBrief object (stored in gr.State for PDF export)
"""
if not country or not str(country).strip():
return "<p style='color:red'>Please select a country or region.</p>", "", None
country = str(country).strip()
progress(0.1, desc="Initializing agent...")
sources_str = ",".join(rss_sources) if rss_sources else "reuters_world,bbc_world"
include_embassy = bool(passport_country and passport_country != "Not specified")
embassy_instruction = (
f"4. REQUIRED — Populate the 'embassy' JSON field with the {passport_country} embassy "
f"or nearest consulate in '{country}'. Include: name, street address, main phone number, "
f"after-hours emergency phone, and official website URL.\n"
if include_embassy else ""
)
step_analyse = 5 if include_embassy else 4
step_output = 6 if include_embassy else 5
task = f"""
Conduct an OSINT threat assessment for: {country}
{f"Traveller passport country: {passport_country}" if include_embassy else ""}
Instructions:
1. Fetch ACLED armed conflict events for '{country}' over the last {days_back} days.
2. Fetch recent RSS news headlines related to '{country}' from these sources: {sources_str}.
3. REQUIRED — Call fetch_travel_advisory for '{country}' and include the result in the travel_advisory fields.
{embassy_instruction}{step_analyse}. Analyse all collected data carefully.
{step_output}. Produce your final output as ONLY a JSON threat brief matching this schema:
{BRIEF_PROMPT_SCHEMA}
Today's date: {datetime.utcnow().strftime('%Y-%m-%d')}
"""
progress(0.2, desc="Agent gathering OSINT data...")
raw_output = None
last_error = None
for attempt in range(1, 4): # up to 3 attempts
try:
agent = build_agent()
raw_output = agent.run(task, additional_args={"system_prompt": SYSTEM_PROMPT})
break # success — exit retry loop
except Exception as e:
last_error = e
err_str = str(e).lower()
is_timeout = any(k in err_str for k in ("504", "timeout", "gateway", "timed out"))
if is_timeout and attempt < 3:
progress(0.2 + attempt * 0.1, desc=f"HF API timeout — retrying (attempt {attempt + 1}/3)...")
time.sleep(5 * attempt) # 5s, then 10s back-off
continue
# Non-retryable error or final attempt — surface it
error_html = f"""
<div style='padding:20px;background:#fff3f3;border:1px solid #cc0000;border-radius:8px'>
<strong>Analysis failed (attempt {attempt}/3):</strong><br>
<code style='font-size:0.85em'>{e}</code><br><br>
<em>If you see a 504 / gateway timeout, the HF Inference API is under heavy load.
Wait a minute and try again, or reduce the number of selected news sources.</em>
</div>
"""
return error_html, str(e), None
if raw_output is None:
return "<p style='color:red'>Analysis failed after 3 attempts. Please try again later.</p>", str(last_error), None
progress(0.85, desc="Parsing intelligence brief...")
# Parse the agent's final output into a structured brief
if isinstance(raw_output, str):
brief = parse_brief_from_llm(raw_output)
else:
brief = ThreatBrief(
narrative_summary=str(raw_output),
severity="Unknown",
confidence="Low",
)
# Stamp passport country so PDF / HTML renderer can use it even if LLM omitted it
if passport_country and passport_country != "Not specified":
brief.passport_country = passport_country
progress(0.95, desc="Rendering brief...")
html_output = render_brief_html(brief)
# Build a plain-text trace for the "Raw Trace" tab
trace_lines = [f"=== OSINT Analysis: {country} ==="]
trace_lines.append(f"Model: {MODEL_ID}")
trace_lines.append(f"Sources: ACLED + {sources_str}")
trace_lines.append(f"Days back: {days_back}")
trace_lines.append(f"Date: {datetime.utcnow().strftime('%Y-%m-%d %H:%M UTC')}")
trace_lines.append("\n--- Raw Agent Output ---")
trace_lines.append(str(raw_output))
progress(1.0, desc="Done.")
return html_output, "\n".join(trace_lines), brief
# ---------------------------------------------------------------------------
# Gradio UI
# ---------------------------------------------------------------------------
RSS_SOURCE_OPTIONS = [
# General world news
("BBC World", "bbc_world"),
("Al Jazeera", "al_jazeera"),
("France 24", "france24"),
("Euronews", "euronews"),
("NPR World", "npr_world"),
("Sky News", "sky_news"),
("UN News", "un_news"),
("Intl Business Times", "ibt"),
# Regional: Middle East
("Middle East Eye", "middle_east_eye"),
("Al-Monitor", "al_monitor"),
("Arab News", "arab_news"),
# Regional: Africa
("AllAfrica", "allafrica"),
# Regional: Asia-Pacific
("Radio Free Asia", "radio_free_asia"),
("S. China Morning Post", "scmp"),
# Regional: South Asia
("Dawn (Pakistan)", "dawn"),
# Regional: Russia / E. Europe
("The Moscow Times", "moscow_times"),
# OSINT / investigative
("Bellingcat", "bellingcat"),
("The Intercept", "the_intercept"),
("OCCRP", "occrp"),
# Policy / security analysis
("Crisis Group", "crisis_group"),
("War on the Rocks", "war_on_rocks"),
("Just Security", "just_security"),
("Defense One", "defense_one"),
("The Cipher Brief", "cipher_brief"),
("Stimson Center", "stimson"),
# Human rights
("Human Rights Watch", "hrw"),
("Amnesty Intl", "amnesty"),
]
DESTINATION_COUNTRIES = [
"Afghanistan", "Albania", "Algeria", "Andorra", "Angola", "Antigua and Barbuda",
"Argentina", "Armenia", "Australia", "Austria", "Azerbaijan",
"Bahamas", "Bahrain", "Bangladesh", "Barbados", "Belarus", "Belgium", "Belize",
"Benin", "Bhutan", "Bolivia", "Bosnia and Herzegovina", "Botswana", "Brazil",
"Brunei", "Bulgaria", "Burkina Faso", "Burundi",
"Cabo Verde", "Cambodia", "Cameroon", "Canada", "Central African Republic", "Chad",
"Chile", "China", "Colombia", "Comoros", "Congo (Republic)", "Congo (DRC)",
"Costa Rica", "Croatia", "Cuba", "Cyprus", "Czech Republic",
"Denmark", "Djibouti", "Dominica", "Dominican Republic",
"Ecuador", "Egypt", "El Salvador", "Equatorial Guinea", "Eritrea", "Estonia",
"Eswatini", "Ethiopia",
"Fiji", "Finland", "France",
"Gabon", "Gambia", "Georgia", "Germany", "Ghana", "Greece", "Grenada",
"Guatemala", "Guinea", "Guinea-Bissau", "Guyana",
"Haiti", "Honduras", "Hungary",
"Iceland", "India", "Indonesia", "Iran", "Iraq", "Ireland", "Israel", "Italy",
"Jamaica", "Japan", "Jordan",
"Kazakhstan", "Kenya", "Kiribati", "Kosovo", "Kuwait", "Kyrgyzstan",
"Laos", "Latvia", "Lebanon", "Lesotho", "Liberia", "Libya", "Liechtenstein",
"Lithuania", "Luxembourg",
"Madagascar", "Malawi", "Malaysia", "Maldives", "Mali", "Malta", "Marshall Islands",
"Mauritania", "Mauritius", "Mexico", "Micronesia", "Moldova", "Monaco", "Mongolia",
"Montenegro", "Morocco", "Mozambique", "Myanmar",
"Namibia", "Nauru", "Nepal", "Netherlands", "New Zealand", "Nicaragua", "Niger",
"Nigeria", "North Korea", "North Macedonia", "Norway",
"Oman",
"Pakistan", "Palau", "Palestine", "Panama", "Papua New Guinea", "Paraguay", "Peru",
"Philippines", "Poland", "Portugal",
"Qatar",
"Romania", "Russia", "Rwanda",
"Saint Kitts and Nevis", "Saint Lucia", "Saint Vincent and the Grenadines",
"Samoa", "San Marino", "Sao Tome and Principe", "Saudi Arabia", "Senegal",
"Serbia", "Seychelles", "Sierra Leone", "Singapore", "Slovakia", "Slovenia",
"Solomon Islands", "Somalia", "South Africa", "South Korea", "South Sudan", "Spain",
"Sri Lanka", "Sudan", "Suriname", "Sweden", "Switzerland", "Syria",
"Taiwan", "Tajikistan", "Tanzania", "Thailand", "Timor-Leste", "Togo", "Tonga",
"Trinidad and Tobago", "Tunisia", "Turkey", "Turkmenistan", "Tuvalu",
"Uganda", "Ukraine", "United Arab Emirates", "United Kingdom", "United States",
"Uruguay", "Uzbekistan",
"Vanuatu", "Vatican City", "Venezuela", "Vietnam",
"Yemen",
"Zambia", "Zimbabwe",
]
PASSPORT_COUNTRIES = [
"Not specified",
"Afghanistan", "Albania", "Algeria", "Argentina", "Australia", "Austria",
"Bangladesh", "Belgium", "Bolivia", "Brazil", "Cambodia", "Canada", "Chile",
"China", "Colombia", "Croatia", "Czech Republic", "Denmark", "Ecuador", "Egypt",
"Ethiopia", "Finland", "France", "Germany", "Ghana", "Greece", "Guatemala",
"Hungary", "India", "Indonesia", "Iran", "Iraq", "Ireland", "Israel", "Italy",
"Japan", "Jordan", "Kazakhstan", "Kenya", "Kuwait", "Malaysia", "Mexico",
"Morocco", "Nepal", "Netherlands", "New Zealand", "Nigeria", "Norway",
"Pakistan", "Peru", "Philippines", "Poland", "Portugal", "Qatar",
"Romania", "Russia", "Saudi Arabia", "Senegal", "Singapore", "South Africa",
"South Korea", "Spain", "Sri Lanka", "Sudan", "Sweden", "Switzerland",
"Taiwan", "Thailand", "Turkey", "Ukraine", "United Arab Emirates",
"United Kingdom", "United States", "Venezuela", "Vietnam", "Zimbabwe",
]
EXAMPLE_QUERIES = [
["Sudan", ["bbc_world", "al_jazeera", "middle_east_eye", "hrw"], 14],
["Myanmar", ["bbc_world", "crisis_group", "radio_free_asia", "hrw"], 21],
["Ukraine", ["bbc_world", "npr_world", "sky_news", "war_on_rocks"], 7],
["Haiti", ["bbc_world", "al_jazeera", "un_news", "amnesty"], 14],
]
CSS = """
.gradio-container { max-width: 1100px !important; margin: auto; }
#analyze-btn { background: #1a1a2e; color: white; }
#analyze-btn:hover { background: #16213e; }
footer { display: none !important; }
"""
with gr.Blocks(title="OSINT Threat Analyst", css=CSS, theme=gr.themes.Soft()) as demo:
brief_state = gr.State(None)
gr.HTML("""
<div style="text-align:center;padding:20px 0 10px 0">
<h1 style="font-size:2em;margin:0">OSINT Threat Analyst</h1>
<p style="color:#666;margin:6px 0 0 0">
Agentic multi-source intelligence briefing · ACLED + RSS · State Dept advisories · Powered by HuggingFace
</p>
</div>
<div style="text-align:center;background:#f0f4ff;border:1px solid #c7d4f0;border-radius:8px;padding:12px 24px;max-width:700px;margin:0 auto 16px auto;color:#1a2a5e;font-size:0.95em">
Enter the country or region you are researching or travelling to, select your preferred news sources, and click <strong>Run Analysis</strong> to generate a full intelligence brief.
</div>
""")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### Configure Analysis")
country_input = gr.Dropdown(
choices=DESTINATION_COUNTRIES,
value=None,
label="Country / Region of Interest",
info="Type to search and select the country you are researching or travelling to.",
filterable=True,
allow_custom_value=True,
)
passport_input = gr.Dropdown(
choices=PASSPORT_COUNTRIES,
value="Not specified",
label="Passport Country",
info="Select your passport country to include embassy and consulate information in the report.",
filterable=True,
)
with gr.Row():
select_all_btn = gr.Button("Deselect All", size="sm", scale=1)
rss_sources = gr.CheckboxGroup(
choices=RSS_SOURCE_OPTIONS,
value=[v for _, v in RSS_SOURCE_OPTIONS], # all selected by default
label="News Sources",
)
days_back = gr.Slider(
minimum=7,
maximum=90,
value=14,
step=7,
label="ACLED lookback window (days)",
)
analyze_btn = gr.Button(
"Run Analysis",
variant="primary",
elem_id="analyze-btn",
)
with gr.Column(scale=2):
with gr.Tabs():
with gr.Tab("Threat Brief"):
brief_output = gr.HTML(
value="<div style='padding:40px;text-align:center;color:#999'>"
"Configure your query and click Run Analysis.</div>"
)
with gr.Tab("Raw Agent Trace"):
trace_output = gr.Textbox(
label="Agent trace",
lines=30,
interactive=False,
placeholder="Agent reasoning and tool calls will appear here...",
)
with gr.Row():
export_btn = gr.Button(
"Export PDF Report",
variant="secondary",
interactive=False,
scale=1,
)
pdf_file = gr.File(
label="Download PDF Report",
visible=False,
interactive=False,
)
gr.Examples(
examples=EXAMPLE_QUERIES,
inputs=[country_input, rss_sources, days_back],
label="Example Queries",
)
_ALL_SOURCE_KEYS = [v for _, v in RSS_SOURCE_OPTIONS]
def toggle_sources(current_values):
"""Select all if any are unchecked; deselect all if all are checked."""
if len(current_values) == len(_ALL_SOURCE_KEYS):
return gr.update(value=[]), gr.update(value="Select All")
return gr.update(value=_ALL_SOURCE_KEYS), gr.update(value="Deselect All")
select_all_btn.click(
fn=toggle_sources,
inputs=[rss_sources],
outputs=[rss_sources, select_all_btn],
)
analyze_btn.click(
fn=run_analysis,
inputs=[country_input, passport_input, rss_sources, days_back],
outputs=[brief_output, trace_output, brief_state],
).then(
fn=lambda b: gr.update(interactive=b is not None),
inputs=[brief_state],
outputs=[export_btn],
)
export_btn.click(
fn=lambda b: generate_pdf(b) if b is not None else None,
inputs=[brief_state],
outputs=[pdf_file],
).then(
fn=lambda: gr.update(visible=True),
outputs=[pdf_file],
)
gr.HTML("""
<div style="text-align:center;padding:16px;color:#999;font-size:0.8em;margin-top:20px">
⚠️ AI-generated from open sources. Not for operational use without verification.
Built with <a href="https://github.com/huggingface/smolagents">smolagents</a> ·
Data: <a href="https://acleddata.com">ACLED</a> + public RSS feeds
</div>
""")
if __name__ == "__main__":
demo.launch()