Spaces:

Firemedic15
/

OSINTTool

Sleeping

App Files Files Community

Firemedic15 commited on 21 days ago

Commit

26739ca

verified ·

1 Parent(s): 26ac00e

Upload 6 files

Browse files

Files changed (5) hide show

app.py +46 -15
brief.py +81 -1
export.py +28 -20
requirements.txt +1 -0
tools.py +105 -1

app.py CHANGED Viewed

@@ -3,26 +3,26 @@ app.py — Multi-source OSINT Analyst Space
 Agentic loop powered by smolagents + HuggingFace Inference API.
 Required Space Secrets:
-  ACLED_USERNAME
-  ACLED_PASSWORD
-  ACLED_API_KEY   — from https://developer.acleddata.com
-  ACLED_EMAIL     — email used to register for ACLED access
   HF_TOKEN        — HuggingFace token (for Inference API, set automatically in Spaces)
 """
 import os
 from datetime import datetime
 import gradio as gr
 from smolagents import InferenceClientModel, ToolCallingAgent
-from tools import fetch_acled_events, fetch_rss_headlines, list_available_sources
 from brief import (
     BRIEF_PROMPT_SCHEMA,
     ThreatBrief,
     parse_brief_from_llm,
     render_brief_html,
 )
 # ---------------------------------------------------------------------------
 # Model + Agent setup
@@ -36,9 +36,9 @@ def build_agent() -> ToolCallingAgent:
     token=os.environ.get("HF_TOKEN"),
 )
     agent = ToolCallingAgent(
-        tools=[fetch_acled_events, fetch_rss_headlines, list_available_sources],
         model=model,
-        max_steps=8,
         verbosity_level=1,
     )
     return agent
@@ -65,14 +65,15 @@ def run_analysis(
     rss_sources: list,
     days_back: int,
     progress=gr.Progress(),
-) -> tuple[str, str]:
     """
     Runs the agentic OSINT analysis loop and returns:
     - Structured HTML threat brief
     - Raw agent trace for transparency
     """
     if not country.strip():
-        return "<p style='color:red'>Please enter a country or region.</p>", ""
     progress(0.1, desc="Initializing agent...")
@@ -84,8 +85,9 @@ Conduct an OSINT threat assessment for: {country}
 Instructions:
 1. Fetch ACLED armed conflict events for '{country}' over the last {days_back} days.
 2. Fetch recent RSS news headlines related to '{country}' from these sources: {sources_str}.
-3. Analyze all collected data carefully.
-4. Produce your final output as ONLY a JSON threat brief matching this schema:
 {BRIEF_PROMPT_SCHEMA}
@@ -106,7 +108,7 @@ Today's date: {datetime.utcnow().strftime('%Y-%m-%d')}
             or model timeout. Check Space secrets and try again.</em>
         </div>
         """
-        return error_html, str(e)
     progress(0.85, desc="Parsing intelligence brief...")
@@ -134,7 +136,7 @@ Today's date: {datetime.utcnow().strftime('%Y-%m-%d')}
     trace_lines.append(str(raw_output))
     progress(1.0, desc="Done.")
-    return html_output, "\n".join(trace_lines)
 # ---------------------------------------------------------------------------
@@ -168,11 +170,13 @@ footer { display: none !important; }
 with gr.Blocks(title="OSINT Threat Analyst", css=CSS, theme=gr.themes.Soft()) as demo:
     gr.HTML("""
     <div style="text-align:center;padding:20px 0 10px 0">
       <h1 style="font-size:2em;margin:0">🌐 OSINT Threat Analyst</h1>
       <p style="color:#666;margin:6px 0 0 0">
-        Agentic multi-source intelligence briefing · ACLED + RSS · Powered by HuggingFace
       </p>
     </div>
     """)
@@ -229,6 +233,20 @@ with gr.Blocks(title="OSINT Threat Analyst", css=CSS, theme=gr.themes.Soft()) as
                         placeholder="Agent reasoning and tool calls will appear here...",
                     )
     gr.Examples(
         examples=EXAMPLE_QUERIES,
         inputs=[country_input, rss_sources, days_back],
@@ -238,7 +256,20 @@ with gr.Blocks(title="OSINT Threat Analyst", css=CSS, theme=gr.themes.Soft()) as
     analyze_btn.click(
         fn=run_analysis,
         inputs=[country_input, rss_sources, days_back],
-        outputs=[brief_output, trace_output],
     )
     gr.HTML("""

 Agentic loop powered by smolagents + HuggingFace Inference API.
 Required Space Secrets:
+  ACLED_USERNAME  — your myACLED email address (from https://developer.acleddata.com)
+  ACLED_PASSWORD  — your myACLED password
   HF_TOKEN        — HuggingFace token (for Inference API, set automatically in Spaces)
 """
 import os
 from datetime import datetime
+from typing import Optional
 import gradio as gr
 from smolagents import InferenceClientModel, ToolCallingAgent
+from tools import fetch_acled_events, fetch_rss_headlines, list_available_sources, fetch_travel_advisory
 from brief import (
     BRIEF_PROMPT_SCHEMA,
     ThreatBrief,
     parse_brief_from_llm,
     render_brief_html,
 )
+from export import generate_pdf
 # ---------------------------------------------------------------------------
 # Model + Agent setup
     token=os.environ.get("HF_TOKEN"),
 )
     agent = ToolCallingAgent(
+        tools=[fetch_acled_events, fetch_rss_headlines, list_available_sources, fetch_travel_advisory],
         model=model,
+        max_steps=15,
         verbosity_level=1,
     )
     return agent
     rss_sources: list,
     days_back: int,
     progress=gr.Progress(),
+) -> tuple:
     """
     Runs the agentic OSINT analysis loop and returns:
     - Structured HTML threat brief
     - Raw agent trace for transparency
+    - ThreatBrief object (stored in gr.State for PDF export)
     """
     if not country.strip():
+        return "<p style='color:red'>Please enter a country or region.</p>", "", None
     progress(0.1, desc="Initializing agent...")
 Instructions:
 1. Fetch ACLED armed conflict events for '{country}' over the last {days_back} days.
 2. Fetch recent RSS news headlines related to '{country}' from these sources: {sources_str}.
+3. REQUIRED — You MUST call fetch_travel_advisory for '{country}' before writing your final answer. Include the result in the travel_advisory fields even if the level is Unknown.
+4. Analyze all collected data carefully.
+5. Produce your final output as ONLY a JSON threat brief matching this schema:
 {BRIEF_PROMPT_SCHEMA}
             or model timeout. Check Space secrets and try again.</em>
         </div>
         """
+        return error_html, str(e), None
     progress(0.85, desc="Parsing intelligence brief...")
     trace_lines.append(str(raw_output))
     progress(1.0, desc="Done.")
+    return html_output, "\n".join(trace_lines), brief
 # ---------------------------------------------------------------------------
 with gr.Blocks(title="OSINT Threat Analyst", css=CSS, theme=gr.themes.Soft()) as demo:
+    brief_state = gr.State(None)
     gr.HTML("""
     <div style="text-align:center;padding:20px 0 10px 0">
       <h1 style="font-size:2em;margin:0">🌐 OSINT Threat Analyst</h1>
       <p style="color:#666;margin:6px 0 0 0">
+        Agentic multi-source intelligence briefing · ACLED + RSS · State Dept advisories · Powered by HuggingFace
       </p>
     </div>
     """)
                         placeholder="Agent reasoning and tool calls will appear here...",
                     )
+            with gr.Row():
+                export_btn = gr.Button(
+                    "📄 Export PDF Report",
+                    variant="secondary",
+                    interactive=False,
+                    scale=1,
+                )
+            pdf_file = gr.File(
+                label="📥 Download PDF Report",
+                visible=False,
+                interactive=False,
+            )
     gr.Examples(
         examples=EXAMPLE_QUERIES,
         inputs=[country_input, rss_sources, days_back],
     analyze_btn.click(
         fn=run_analysis,
         inputs=[country_input, rss_sources, days_back],
+        outputs=[brief_output, trace_output, brief_state],
+    ).then(
+        fn=lambda b: gr.update(interactive=b is not None),
+        inputs=[brief_state],
+        outputs=[export_btn],
+    )
+    export_btn.click(
+        fn=lambda b: generate_pdf(b) if b is not None else None,
+        inputs=[brief_state],
+        outputs=[pdf_file],
+    ).then(
+        fn=lambda: gr.update(visible=True),
+        outputs=[pdf_file],
     )
     gr.HTML("""

brief.py CHANGED Viewed

@@ -42,6 +42,11 @@ class ThreatBrief:
     recommended_watch_items: List[str] = field(default_factory=list)
     source_types_used: List[str] = field(default_factory=list)
     notable_news: List[NewsItem] = field(default_factory=list)
     def to_dict(self) -> dict:
         d = asdict(self)
@@ -90,7 +95,14 @@ produce a threat brief as a JSON object with EXACTLY these fields:
   "key_findings": ["<finding1>", "<finding2>", "<finding3>"],
   "indicators_of_escalation": ["<indicator1>", "<indicator2>"],
   "recommended_watch_items": ["<watch_item1>", "<watch_item2>"],
-  "source_types_used": ["ACLED", "RSS"],
   "notable_news": [
     {
       "title": "<article headline>",
@@ -136,6 +148,8 @@ def parse_brief_from_llm(raw_text: str) -> ThreatBrief:
                 notable=n.get("notable", True),
             ))
         return ThreatBrief(
             region=data.get("region", ""),
             country=data.get("country", ""),
@@ -152,6 +166,11 @@ def parse_brief_from_llm(raw_text: str) -> ThreatBrief:
             recommended_watch_items=data.get("recommended_watch_items", []),
             source_types_used=data.get("source_types_used", []),
             notable_news=news_items,
         )
     except json.JSONDecodeError:
         return _fallback_brief(raw_text)
@@ -170,6 +189,56 @@ def _fallback_brief(raw_text: str) -> ThreatBrief:
 # HTML Renderer — system dark mode aware via CSS custom properties
 # ---------------------------------------------------------------------------
 def render_brief_html(brief: ThreatBrief) -> str:
     sev_color  = SEVERITY_COLORS.get(brief.severity, "#999")
     conf_color = CONFIDENCE_COLORS.get(brief.confidence, "#999")
@@ -380,6 +449,15 @@ def render_brief_html(brief: ThreatBrief) -> str:
   .warn-section h4 {{ color: var(--text-primary); margin-top: 0; }}
   .warn-section ul {{ padding-left: 18px; line-height: 1.9; margin: 0; }}
   .brief-footer {{
     background: var(--bg-secondary);
     border: 1px solid var(--border);
@@ -405,6 +483,8 @@ def render_brief_html(brief: ThreatBrief) -> str:
     <span><strong>Fatalities:</strong> {fatalities_str}</span>
   </div>
   <div class="section">
     <h3>Analytical Summary</h3>
     <p>{brief.narrative_summary or '<em>Not available</em>'}</p>

     recommended_watch_items: List[str] = field(default_factory=list)
     source_types_used: List[str] = field(default_factory=list)
     notable_news: List[NewsItem] = field(default_factory=list)
+    travel_advisory_level: str = ""
+    travel_advisory_level_text: str = ""
+    travel_advisory_indicators: List[str] = field(default_factory=list)
+    travel_advisory_date: str = ""
+    travel_advisory_url: str = ""
     def to_dict(self) -> dict:
         d = asdict(self)
   "key_findings": ["<finding1>", "<finding2>", "<finding3>"],
   "indicators_of_escalation": ["<indicator1>", "<indicator2>"],
   "recommended_watch_items": ["<watch_item1>", "<watch_item2>"],
+  "source_types_used": ["ACLED", "RSS", "State Dept Travel Advisory"],
+  "travel_advisory": {
+    "level": "<1|2|3|4 — just the number, or 'Unknown'>",
+    "level_text": "<full level string, e.g. 'Level 3: Reconsider Travel'>",
+    "indicators": ["<e.g. Crime>", "<e.g. Terrorism>"],
+    "date_updated": "<date string from advisory>",
+    "url": "<full URL to the advisory page, or empty string>"
+  },
   "notable_news": [
     {
       "title": "<article headline>",
                 notable=n.get("notable", True),
             ))
+        ta = data.get("travel_advisory", {})
         return ThreatBrief(
             region=data.get("region", ""),
             country=data.get("country", ""),
             recommended_watch_items=data.get("recommended_watch_items", []),
             source_types_used=data.get("source_types_used", []),
             notable_news=news_items,
+            travel_advisory_level=str(ta.get("level", "")),
+            travel_advisory_level_text=ta.get("level_text", ""),
+            travel_advisory_indicators=ta.get("indicators", []),
+            travel_advisory_date=ta.get("date_updated", ""),
+            travel_advisory_url=ta.get("url", ""),
         )
     except json.JSONDecodeError:
         return _fallback_brief(raw_text)
 # HTML Renderer — system dark mode aware via CSS custom properties
 # ---------------------------------------------------------------------------
+def _render_travel_advisory(brief: ThreatBrief) -> str:
+    level = brief.travel_advisory_level.strip()
+    if not level or level == "Unknown":
+        return """
+  <div class="section muted-section" style="border-top:none">
+    <p class="muted-text">🗺️ US State Department travel advisory not available for this country.</p>
+  </div>"""
+    color = ADVISORY_LEVEL_COLORS.get(level, "#999")
+    label = ADVISORY_LEVEL_LABELS.get(level, brief.travel_advisory_level_text or f"Level {level}")
+    indicators_html = (
+        " &nbsp;·&nbsp; ".join(f'<span class="risk-tag">{i}</span>' for i in brief.travel_advisory_indicators)
+        if brief.travel_advisory_indicators
+        else "<em>None listed</em>"
+    )
+    link_html = (
+        f' &nbsp;<a href="{brief.travel_advisory_url}" target="_blank" style="font-size:0.82em;color:var(--text-link)">Full advisory →</a>'
+        if brief.travel_advisory_url else ""
+    )
+    date_html = f'<span style="color:var(--text-muted);font-size:0.82em">Updated: {brief.travel_advisory_date}</span>' if brief.travel_advisory_date else ""
+    return f"""
+  <div class="section" style="border-top:none;border-left:4px solid {color}">
+    <h3 style="margin-top:0;margin-bottom:10px">
+      🗺️ US State Dept Travel Advisory
+      <span style="background:{color};color:white;padding:2px 12px;border-radius:12px;font-size:0.82em;font-weight:bold;margin-left:8px">
+        Level {level}: {label}
+      </span>
+      {link_html}
+    </h3>
+    <div style="margin-bottom:6px"><strong>Risk categories:</strong> &nbsp;{indicators_html}</div>
+    {date_html}
+  </div>"""
+ADVISORY_LEVEL_COLORS = {
+    "1": "#27AE60",
+    "2": "#F39C12",
+    "3": "#E67E22",
+    "4": "#C0392B",
+}
+ADVISORY_LEVEL_LABELS = {
+    "1": "Exercise Normal Precautions",
+    "2": "Exercise Increased Caution",
+    "3": "Reconsider Travel",
+    "4": "Do Not Travel",
+}
 def render_brief_html(brief: ThreatBrief) -> str:
     sev_color  = SEVERITY_COLORS.get(brief.severity, "#999")
     conf_color = CONFIDENCE_COLORS.get(brief.confidence, "#999")
   .warn-section h4 {{ color: var(--text-primary); margin-top: 0; }}
   .warn-section ul {{ padding-left: 18px; line-height: 1.9; margin: 0; }}
+  .risk-tag {{
+    background: var(--bg-accent);
+    border: 1px solid var(--border);
+    color: var(--text-body);
+    padding: 1px 8px;
+    border-radius: 6px;
+    font-size: 0.82em;
+  }}
   .brief-footer {{
     background: var(--bg-secondary);
     border: 1px solid var(--border);
     <span><strong>Fatalities:</strong> {fatalities_str}</span>
   </div>
+  {_render_travel_advisory(brief)}
   <div class="section">
     <h3>Analytical Summary</h3>
     <p>{brief.narrative_summary or '<em>Not available</em>'}</p>

export.py CHANGED Viewed

@@ -134,33 +134,41 @@ def generate_pdf(brief: ThreatBrief) -> str:
     pdf.cell(0, 7, _safe(f"Reported fatalities: {fat}"))
     pdf.ln(12)
-    # ---- State Dept Travel Advisory ----
-    if brief.travel_advisory_level and brief.travel_advisory_level not in ("", "Unknown"):
-        _section_heading(pdf, "U.S. State Department Travel Advisory")
-        adv_rgb = _ADVISORY_COLORS.get(brief.travel_advisory_level, (150, 150, 150))
-        level_label = brief.travel_advisory_level_text or f"Level {brief.travel_advisory_level}"
-        pdf.set_fill_color(*adv_rgb)
-        pdf.set_text_color(255, 255, 255)
-        pdf.set_font("Helvetica", "B", 10)
-        pdf.cell(0, 8, _safe(f"  {level_label}"), fill=True, ln=True)
-        pdf.set_text_color(44, 52, 68)
-        pdf.set_font("Helvetica", "", 9)
-        if brief.travel_advisory_indicators:
-            pdf.multi_cell(0, 5, _safe(f"Risk categories:  {',  '.join(brief.travel_advisory_indicators)}"))
-        if brief.travel_advisory_date:
-            pdf.cell(0, 5, _safe(f"Last updated: {brief.travel_advisory_date}"), ln=True)
-        if brief.travel_advisory_url:
-            pdf.set_text_color(37, 99, 235)
-            pdf.multi_cell(0, 5, _safe(f"Source: {brief.travel_advisory_url}"))
-            pdf.set_text_color(44, 52, 68)
-        pdf.ln(3)
     # ---- Analytical Summary ----
     _section_heading(pdf, "Analytical Summary")

     pdf.cell(0, 7, _safe(f"Reported fatalities: {fat}"))
     pdf.ln(12)
+    # ---- State Dept Travel Advisory (always rendered) ----
+    _section_heading(pdf, "U.S. State Department Travel Advisory")
+    level = brief.travel_advisory_level or ""
+    level_label = brief.travel_advisory_level_text or (
+        f"Level {level}" if level and level not in ("", "Unknown") else "Advisory level not retrieved"
+    )
+    adv_rgb = _ADVISORY_COLORS.get(level, (120, 120, 120))
+    pdf.set_fill_color(*adv_rgb)
+    pdf.set_text_color(255, 255, 255)
+    pdf.set_font("Helvetica", "B", 10)
+    pdf.cell(0, 8, _safe(f"  {level_label}"), fill=True, ln=True)
+    pdf.set_text_color(44, 52, 68)
+    pdf.set_font("Helvetica", "", 9)
+    if brief.travel_advisory_indicators:
+        pdf.multi_cell(0, 5, _safe(f"Risk categories:  {',  '.join(brief.travel_advisory_indicators)}"))
+    else:
+        pdf.cell(0, 5, "Risk categories:  See full advisory for details", ln=True)
+    if brief.travel_advisory_date:
+        pdf.cell(0, 5, _safe(f"Last updated: {brief.travel_advisory_date}"), ln=True)
+    if brief.travel_advisory_url:
+        pdf.set_text_color(37, 99, 235)
+        pdf.multi_cell(0, 5, _safe(f"Source: {brief.travel_advisory_url}"))
+        pdf.set_text_color(44, 52, 68)
+    else:
+        pdf.set_text_color(37, 99, 235)
+        pdf.cell(0, 5, "Source: https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories.html", ln=True)
+        pdf.set_text_color(44, 52, 68)
+    pdf.ln(3)
     # ---- Analytical Summary ----
     _section_heading(pdf, "Analytical Summary")

requirements.txt CHANGED Viewed

@@ -2,3 +2,4 @@ gradio>=5.23.0
 smolagents>=1.10.0
 feedparser>=6.0.10
 requests>=2.31.0

 smolagents>=1.10.0
 feedparser>=6.0.10
 requests>=2.31.0
+fpdf2>=2.7.0

tools.py CHANGED Viewed

@@ -199,8 +199,21 @@ def fetch_rss_headlines(
                  al_jazeera, bellingcat, crisis_group, acled_blog, un_news, foreign_policy.
         max_articles: Maximum total articles to return across all sources (default 20).
     """
     source_keys = [s.strip() for s in sources.split(",") if s.strip()]
-    keywords = [w.lower() for w in topic.lower().split() if len(w) > 2]
     articles = []
     feed_errors = []
@@ -281,6 +294,97 @@ def fetch_rss_headlines(
     return "\n\n".join(lines)
 # ---------------------------------------------------------------------------
 # Helper tool
 # ---------------------------------------------------------------------------

                  al_jazeera, bellingcat, crisis_group, acled_blog, un_news, foreign_policy.
         max_articles: Maximum total articles to return across all sources (default 20).
     """
+    # Common country aliases so searches don't miss alternate names in articles
+    _ALIASES = {
+        "myanmar": ["myanmar", "burma"],
+        "burma": ["myanmar", "burma"],
+        "ivory coast": ["ivory coast", "côte d'ivoire"],
+        "drc": ["drc", "congo", "democratic republic"],
+        "car": ["central african republic", "car"],
+        "uae": ["uae", "united arab emirates"],
+    }
     source_keys = [s.strip() for s in sources.split(",") if s.strip()]
+    base_keywords = [w.lower() for w in topic.lower().split() if len(w) > 2]
+    topic_lower = topic.lower().strip()
+    extra = _ALIASES.get(topic_lower, [])
+    keywords = list(dict.fromkeys(base_keywords + extra))  # deduplicate, preserve order
     articles = []
     feed_errors = []
     return "\n\n".join(lines)
+# ---------------------------------------------------------------------------
+# US State Department Travel Advisory Tool
+# ---------------------------------------------------------------------------
+_ADVISORY_API = "https://cadataapi.state.gov/api/TravelAdvisories"
+_RISK_KEYWORDS = {
+    "crime": "Crime",
+    "terrorism": "Terrorism",
+    "civil unrest": "Civil Unrest",
+    "health": "Health",
+    "natural disaster": "Natural Disaster",
+    "kidnapping": "Kidnapping",
+    "wrongful detention": "Wrongful Detention",
+    "piracy": "Piracy",
+    "maritime": "Maritime",
+}
+@tool
+def fetch_travel_advisory(country: str) -> str:
+    """
+    Fetches the current US State Department travel advisory for a country
+    using the official State Department data API.
+    Returns the advisory level (1–4), risk categories, publication date,
+    a plain-text summary, and a link to the full advisory.
+    Advisory levels:
+      1 = Exercise Normal Precautions
+      2 = Exercise Increased Caution
+      3 = Reconsider Travel
+      4 = Do Not Travel
+    Args:
+        country: Country name to look up (e.g. 'Sudan', 'Ukraine', 'Haiti').
+    """
+    try:
+        resp = requests.get(_ADVISORY_API, timeout=20)
+        resp.raise_for_status()
+        advisories = resp.json()
+    except requests.RequestException as e:
+        return f"[Travel Advisory] Request failed: {e}"
+    except ValueError:
+        return "[Travel Advisory] Could not parse API response as JSON."
+    country_lower = country.lower().strip()
+    match = None
+    for entry in advisories:
+        title = entry.get("Title", "")
+        # Title format: "Country Name - Level N: Description"
+        dest = title.split(" - Level ")[0].strip()
+        if country_lower in dest.lower():
+            match = entry
+            break
+    if not match:
+        return (
+            f"[Travel Advisory] No advisory found for '{country}'. "
+            "Check spelling or try the country's common English name."
+        )
+    title = match.get("Title", "")
+    link = match.get("Link", "")
+    published = match.get("Published", match.get("Updated", ""))
+    raw_summary = match.get("Summary", "")
+    summary = _strip_html(raw_summary)[:500]
+    level_match = re.search(r"Level\s+(\d)", title, re.IGNORECASE)
+    level_num = level_match.group(1) if level_match else "Unknown"
+    summary_lower = summary.lower()
+    indicators = [
+        label for keyword, label in _RISK_KEYWORDS.items()
+        if keyword in summary_lower
+    ]
+    # Parse ISO timestamp to a readable date
+    date_str = published[:10] if published else ""
+    lines = [
+        f"[Travel Advisory] {title}",
+        f"Risk Categories: {', '.join(indicators) if indicators else 'See summary'}",
+        f"Published: {date_str}",
+        f"Summary: {summary}",
+    ]
+    if link:
+        lines.append(f"Full Advisory: {link}")
+    return "\n".join(lines)
 # ---------------------------------------------------------------------------
 # Helper tool
 # ---------------------------------------------------------------------------