feat: initial SHADOW deployment
Browse files- README.md +117 -13
- agents/__init__.py +6 -0
- agents/__pycache__/__init__.cpython-314.pyc +0 -0
- agents/__pycache__/pipeline.cpython-314.pyc +0 -0
- agents/pipeline.py +168 -0
- app.py +620 -0
- core/__init__.py +1 -0
- core/__pycache__/__init__.cpython-314.pyc +0 -0
- core/__pycache__/execution_trace.cpython-314.pyc +0 -0
- core/__pycache__/kenyan_context.cpython-314.pyc +0 -0
- core/__pycache__/llm_client.cpython-314.pyc +0 -0
- core/__pycache__/osint_dataset.cpython-314.pyc +0 -0
- core/__pycache__/prompts.cpython-314.pyc +0 -0
- core/execution_trace.py +45 -0
- core/kenyan_context.py +453 -0
- core/llm_client.py +483 -0
- core/osint_dataset.py +249 -0
- core/prompts.py +344 -0
- core/synthetic_threat_intel.py +108 -0
- requirements.txt +3 -3
README.md
CHANGED
|
@@ -1,20 +1,124 @@
|
|
| 1 |
---
|
| 2 |
-
title: SHADOW
|
| 3 |
-
emoji:
|
| 4 |
colorFrom: red
|
| 5 |
-
colorTo:
|
| 6 |
-
sdk:
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
- streamlit
|
| 10 |
pinned: false
|
| 11 |
-
short_description: Silent AI fraud detection for Kenyan mobile users.
|
| 12 |
-
license: mit
|
| 13 |
---
|
| 14 |
|
| 15 |
-
#
|
| 16 |
|
| 17 |
-
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: SHADOW Kenyan Fraud Intelligence
|
| 3 |
+
emoji: 🛡️
|
| 4 |
colorFrom: red
|
| 5 |
+
colorTo: gray
|
| 6 |
+
sdk: streamlit
|
| 7 |
+
sdk_version: 1.35.0
|
| 8 |
+
app_file: app.py
|
|
|
|
| 9 |
pinned: false
|
|
|
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
+
# SHADOW — Kenyan Fraud Intelligence System
|
| 13 |
|
| 14 |
+
> AMD Developer Hackathon 2026 · Agentic AI Track
|
| 15 |
|
| 16 |
+
## Project Overview
|
| 17 |
+
|
| 18 |
+
Shadow is an advanced OSINT + LLM Hybrid Agentic Pipeline designed specifically to detect, analyze, and neutralize Kenyan-specific mobile fraud vectors. The system mitigates the impact of localized scams such as M-Pesa reversal fraud, Fuliza exploitation, KRA impersonation, and betting-related phishing.
|
| 19 |
+
|
| 20 |
+
Shadow solves the "Data Cold Start" problem by employing a hybrid architecture: it merges deterministic Open Source Intelligence (OSINT) with an explainable, multi-agent Large Language Model (LLM) pipeline. This ensures highly accurate classification, context-aware reasoning, and actionable mitigation strategies tailored to the Kenyan demographic, including support for English, Swahili, and Sheng dialects.
|
| 21 |
+
|
| 22 |
+
## Architecture Diagram
|
| 23 |
+
|
| 24 |
+
```text
|
| 25 |
+
[ Incoming SMS / Message ]
|
| 26 |
+
│
|
| 27 |
+
▼
|
| 28 |
+
┌──────────────────────────┐
|
| 29 |
+
│ OSINT Intelligence Layer│
|
| 30 |
+
│ (core/osint_dataset.py) │
|
| 31 |
+
│ - Deterministic Check │
|
| 32 |
+
│ - Keyword Matching │
|
| 33 |
+
│ - Scam Taxonomy Mapping │
|
| 34 |
+
└──────────┬───────────────┘
|
| 35 |
+
│
|
| 36 |
+
▼
|
| 37 |
+
┌──────────────────────────┐
|
| 38 |
+
│ Agent Pipeline Engine │
|
| 39 |
+
│ (agents/pipeline.py) │
|
| 40 |
+
│ │
|
| 41 |
+
│ 1. Language Agent │
|
| 42 |
+
│ 2. Threat Agent │
|
| 43 |
+
│ 3. Risk Agent │
|
| 44 |
+
│ 4. Action Agent │
|
| 45 |
+
└──────────┬───────────────┘
|
| 46 |
+
│
|
| 47 |
+
▼
|
| 48 |
+
┌──────────────────────────┐
|
| 49 |
+
│ AMD vLLM / Qwen Bridge │
|
| 50 |
+
│ (core/llm_client.py) │
|
| 51 |
+
│ - Context Injection │
|
| 52 |
+
│ - Reasoning Engine │
|
| 53 |
+
└──────────┬───────────────┘
|
| 54 |
+
│
|
| 55 |
+
▼
|
| 56 |
+
[ Explainable JSON Output & Execution Log ]
|
| 57 |
+
│
|
| 58 |
+
▼
|
| 59 |
+
┌──────────────────────────┐
|
| 60 |
+
│ Streamlit Live Dashboard│
|
| 61 |
+
│ (app/main.py) │
|
| 62 |
+
│ - Real-time Analysis UI │
|
| 63 |
+
│ - Execution Timeline │
|
| 64 |
+
│ - Risk Scoring Display │
|
| 65 |
+
└──────────────────────────┘
|
| 66 |
+
```
|
| 67 |
+
|
| 68 |
+
## Agent Pipeline Flow
|
| 69 |
+
|
| 70 |
+
1. **OSINT Pre-Analysis (Hybrid Intelligence Mode)**: Messages are instantly matched against known Kenyan scam topologies to provide a deterministic baseline.
|
| 71 |
+
2. **Language Agent**: Detects the dialect (English, Swahili, Sheng) and standardizes the context for subsequent analysis.
|
| 72 |
+
3. **Threat Agent**: Analyzes the intent of the message based on localized threat vectors.
|
| 73 |
+
4. **Risk Agent**: Computes a continuous risk score (0-100) and categorizes severity.
|
| 74 |
+
5. **Action Agent**: Determines the recommended user action (e.g., Block, Report to Safaricom, Ignore).
|
| 75 |
+
|
| 76 |
+
## Features
|
| 77 |
+
|
| 78 |
+
- **Kenyan Fraud Detection**: Specialized in detecting hyper-local scams (e.g., M-Pesa, Fuliza, KRA, Hustler Fund).
|
| 79 |
+
- **Sheng + Swahili Language Detection**: Seamlessly processes colloquialisms and mixed-language SMS typical in East Africa.
|
| 80 |
+
- **OSINT-Driven Classification**: Fuses known deterministic scam indicators with probabilistic AI reasoning.
|
| 81 |
+
- **Explainable AI Logs (`execution_log`)**: Glass-box observability that documents the exact reasoning step-by-step for full transparency.
|
| 82 |
+
- **Streamlit Live Dashboard**: Interactive real-time web UI for threat analysis and execution timeline visualization.
|
| 83 |
+
- **AMD Hardware Optimized**: Built to run on the AMD Developer Cloud utilizing vLLM and Qwen models, with a robust fallback mock mode for deterministic demos.
|
| 84 |
+
|
| 85 |
+
## Quick Start
|
| 86 |
+
|
| 87 |
+
```bash
|
| 88 |
+
pip install -r requirements.txt
|
| 89 |
+
streamlit run app/main.py
|
| 90 |
+
```
|
| 91 |
+
|
| 92 |
+
## How to Run
|
| 93 |
+
|
| 94 |
+
### 1. Install Dependencies
|
| 95 |
+
|
| 96 |
+
```bash
|
| 97 |
+
pip install -r requirements.txt
|
| 98 |
+
```
|
| 99 |
+
|
| 100 |
+
### 2. Configure Environment
|
| 101 |
+
|
| 102 |
+
```bash
|
| 103 |
+
# Copy the example environment file and add your AMD Cloud API key (optional — mock mode works without it)
|
| 104 |
+
cp .env.example .env
|
| 105 |
+
```
|
| 106 |
+
|
| 107 |
+
### 3. Launch the Streamlit Dashboard (Primary Interface)
|
| 108 |
+
|
| 109 |
+
```bash
|
| 110 |
+
streamlit run app/main.py
|
| 111 |
+
```
|
| 112 |
+
|
| 113 |
+
The dashboard runs at `http://localhost:8501` and provides a full interactive UI for submitting messages, viewing risk scores, agent reasoning, and the step-by-step execution timeline.
|
| 114 |
+
|
| 115 |
+
### 4. Run Pipeline Smoke Tests (CLI)
|
| 116 |
+
|
| 117 |
+
```bash
|
| 118 |
+
python scripts/test_pipeline.py
|
| 119 |
+
```
|
| 120 |
+
|
| 121 |
+
## Future Work
|
| 122 |
+
|
| 123 |
+
- **AMD MI300X Deployment**: Fully scale the vLLM integration on AMD MI300X infrastructure for enterprise-grade throughput.
|
| 124 |
+
- **WhatsApp Bot Integration**: Directly parse user-forwarded messages for instant fraud scoring.
|
agents/__init__.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Shadow MVP Pipeline Package
|
| 3 |
+
"""
|
| 4 |
+
from .pipeline import ShadowPipeline, ShadowState
|
| 5 |
+
|
| 6 |
+
__all__ = ["ShadowPipeline", "ShadowState"]
|
agents/__pycache__/__init__.cpython-314.pyc
ADDED
|
Binary file (301 Bytes). View file
|
|
|
agents/__pycache__/pipeline.cpython-314.pyc
ADDED
|
Binary file (9.43 kB). View file
|
|
|
agents/pipeline.py
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import time
|
| 2 |
+
import json
|
| 3 |
+
from dataclasses import dataclass, field
|
| 4 |
+
from typing import Dict, Any, List
|
| 5 |
+
|
| 6 |
+
from core.llm_client import ShadowLLMClient
|
| 7 |
+
from core.osint_dataset import classify_synthetic_message
|
| 8 |
+
from core.execution_trace import ExecutionTrace, format_execution_trace
|
| 9 |
+
from core.prompts import (
|
| 10 |
+
get_system_prompt,
|
| 11 |
+
build_language_agent_input,
|
| 12 |
+
build_threat_pattern_agent_input,
|
| 13 |
+
build_risk_scoring_agent_input,
|
| 14 |
+
build_action_agent_input
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
@dataclass
|
| 18 |
+
class ShadowState:
|
| 19 |
+
"""Central state object for the Shadow Pipeline."""
|
| 20 |
+
raw_message: str
|
| 21 |
+
precheck_data: Dict[str, Any] = field(default_factory=dict)
|
| 22 |
+
language_data: Dict[str, Any] = field(default_factory=dict)
|
| 23 |
+
threat_data: Dict[str, Any] = field(default_factory=dict)
|
| 24 |
+
risk_data: Dict[str, Any] = field(default_factory=dict)
|
| 25 |
+
action_data: Dict[str, Any] = field(default_factory=dict)
|
| 26 |
+
execution_log: List[str] = field(default_factory=list)
|
| 27 |
+
execution_trace: List[Dict[str, Any]] = field(default_factory=list)
|
| 28 |
+
formatted_trace: str = ""
|
| 29 |
+
|
| 30 |
+
class ShadowPipeline:
|
| 31 |
+
"""
|
| 32 |
+
Sequential orchestration engine that processes suspicious SMS
|
| 33 |
+
through all 4 Shadow agents.
|
| 34 |
+
"""
|
| 35 |
+
def __init__(self):
|
| 36 |
+
self.client = ShadowLLMClient()
|
| 37 |
+
|
| 38 |
+
def _safe_agent_run(self, agent_name: str, system_prompt: str, user_input: str, state: ShadowState, fallback_data: Dict[str, Any]) -> Dict[str, Any]:
|
| 39 |
+
"""Runs an agent safely, capturing timing and exceptions, applying fallback if needed."""
|
| 40 |
+
start_time = time.time()
|
| 41 |
+
try:
|
| 42 |
+
result = self.client.generate_response(system_prompt, user_input)
|
| 43 |
+
duration = round(time.time() - start_time, 2)
|
| 44 |
+
|
| 45 |
+
reasoning = result.get("reasoning_summary")
|
| 46 |
+
if not reasoning and agent_name == "ActionAgent":
|
| 47 |
+
reasoning = result.get("dashboard_summary", "Action agent completed.")
|
| 48 |
+
if not reasoning:
|
| 49 |
+
reasoning = "Analysis completed."
|
| 50 |
+
|
| 51 |
+
state.execution_log.append(f"{agent_name} ({duration}s): SUCCESS - {reasoning}")
|
| 52 |
+
return result
|
| 53 |
+
except Exception as e:
|
| 54 |
+
duration = round(time.time() - start_time, 2)
|
| 55 |
+
state.execution_log.append(f"{agent_name} ({duration}s): ERROR - {str(e)}")
|
| 56 |
+
return fallback_data
|
| 57 |
+
|
| 58 |
+
def run(self, message: str) -> ShadowState:
|
| 59 |
+
"""Executes the pipeline sequentially."""
|
| 60 |
+
state = ShadowState(raw_message=message)
|
| 61 |
+
trace = ExecutionTrace()
|
| 62 |
+
|
| 63 |
+
# Defined fallbacks for reliability
|
| 64 |
+
lang_fb = {"primary_language": "unknown", "confidence": 0.0}
|
| 65 |
+
threat_fb = {"scam_categories_detected": [], "primary_category": "none", "threat_signals": {}}
|
| 66 |
+
risk_fb = {"raw_score": 3, "risk_level": "MEDIUM"}
|
| 67 |
+
action_fb = {
|
| 68 |
+
"verdict": "INCONCLUSIVE",
|
| 69 |
+
"risk_level": "MEDIUM",
|
| 70 |
+
"scam_type": "Unknown",
|
| 71 |
+
"dashboard_summary": "Analysis failed, manual review required.",
|
| 72 |
+
"recommended_actions": [{"priority": 1, "action": "Manual Review", "reason": "Pipeline failure"}]
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
# Step 0: OSINT Pre-Analysis Stage
|
| 76 |
+
state.precheck_data = classify_synthetic_message(message)
|
| 77 |
+
precheck_risk = state.precheck_data.get("risk_level", "UNKNOWN")
|
| 78 |
+
precheck_category = state.precheck_data.get("probable_category", "unknown")
|
| 79 |
+
|
| 80 |
+
threat_context = None
|
| 81 |
+
if precheck_risk in ["HIGH", "CRITICAL"]:
|
| 82 |
+
osint_summary = f"Matched {precheck_category} pattern from deterministic dataset"
|
| 83 |
+
state.execution_log.append("OSINT PreCheck: Known Kenyan threat pattern detected")
|
| 84 |
+
threat_context = precheck_category
|
| 85 |
+
elif precheck_risk == "LOW" or precheck_category == "legitimate_transaction":
|
| 86 |
+
osint_summary = "No known OSINT match - escalating to LLM reasoning layer"
|
| 87 |
+
state.execution_log.append("OSINT PreCheck: Legitimate transaction pattern")
|
| 88 |
+
else:
|
| 89 |
+
osint_summary = "No known OSINT match - escalating to LLM reasoning layer"
|
| 90 |
+
|
| 91 |
+
trace.add_step(
|
| 92 |
+
agent="OSINT PRECHECK",
|
| 93 |
+
input_str=message,
|
| 94 |
+
output=state.precheck_data,
|
| 95 |
+
summary=osint_summary,
|
| 96 |
+
risk_hint=precheck_risk
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
# Step 1: Language Agent
|
| 100 |
+
sys_lang = get_system_prompt("language_agent")
|
| 101 |
+
user_lang = build_language_agent_input(message)
|
| 102 |
+
state.language_data = self._safe_agent_run("LanguageAgent", sys_lang, user_lang, state, lang_fb)
|
| 103 |
+
|
| 104 |
+
primary_lang = state.language_data.get("primary_language", "Unknown")
|
| 105 |
+
trace.add_step(
|
| 106 |
+
agent="LANGUAGE AGENT",
|
| 107 |
+
input_str=user_lang,
|
| 108 |
+
output=state.language_data,
|
| 109 |
+
summary=f"{primary_lang} detected"
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
+
# Step 2: Threat Pattern Agent
|
| 113 |
+
sys_threat = get_system_prompt("threat_pattern_agent")
|
| 114 |
+
user_threat = build_threat_pattern_agent_input(message, state.language_data, threat_context)
|
| 115 |
+
state.threat_data = self._safe_agent_run("ThreatPatternAgent", sys_threat, user_threat, state, threat_fb)
|
| 116 |
+
|
| 117 |
+
threat_summary = state.threat_data.get("reasoning_summary", "Threat analysis completed")
|
| 118 |
+
if state.threat_data.get("primary_category") and state.threat_data.get("primary_category") != "none":
|
| 119 |
+
threat_summary = f"{state.threat_data.get('primary_category')} intent confirmed"
|
| 120 |
+
|
| 121 |
+
trace.add_step(
|
| 122 |
+
agent="THREAT AGENT",
|
| 123 |
+
input_str=user_threat,
|
| 124 |
+
output=state.threat_data,
|
| 125 |
+
summary=threat_summary
|
| 126 |
+
)
|
| 127 |
+
|
| 128 |
+
# Step 3: Risk Scoring Agent
|
| 129 |
+
sys_risk = get_system_prompt("risk_scoring_agent")
|
| 130 |
+
user_risk = build_risk_scoring_agent_input(message, state.language_data, state.threat_data)
|
| 131 |
+
state.risk_data = self._safe_agent_run("RiskScoringAgent", sys_risk, user_risk, state, risk_fb)
|
| 132 |
+
|
| 133 |
+
risk_level = state.risk_data.get("risk_level", "UNKNOWN")
|
| 134 |
+
raw_score = state.risk_data.get("raw_score", 0)
|
| 135 |
+
trace.add_step(
|
| 136 |
+
agent="RISK AGENT",
|
| 137 |
+
input_str=user_risk,
|
| 138 |
+
output=state.risk_data,
|
| 139 |
+
summary=f"{risk_level} ({raw_score})",
|
| 140 |
+
risk_hint=risk_level
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
# Step 4: Action Agent
|
| 144 |
+
sys_action = get_system_prompt("action_agent")
|
| 145 |
+
user_action = build_action_agent_input(message, state.language_data, state.threat_data, state.risk_data)
|
| 146 |
+
state.action_data = self._safe_agent_run("ActionAgent", sys_action, user_action, state, action_fb)
|
| 147 |
+
|
| 148 |
+
verdict = state.action_data.get("verdict", "INCONCLUSIVE")
|
| 149 |
+
actions = state.action_data.get("recommended_actions", [])
|
| 150 |
+
action_names = " + ".join([a.get("action", "") for a in actions if a.get("action")]) if actions else verdict
|
| 151 |
+
if action_names == verdict:
|
| 152 |
+
action_summary = verdict
|
| 153 |
+
else:
|
| 154 |
+
action_summary = f"{verdict} -> {action_names}"
|
| 155 |
+
|
| 156 |
+
trace.add_step(
|
| 157 |
+
agent="ACTION AGENT",
|
| 158 |
+
input_str=user_action,
|
| 159 |
+
output=state.action_data,
|
| 160 |
+
summary=action_summary
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
state.execution_trace = trace.get_trace()
|
| 164 |
+
state.formatted_trace = format_execution_trace(state.execution_trace)
|
| 165 |
+
|
| 166 |
+
return state
|
| 167 |
+
|
| 168 |
+
# Hybrid Flow: OSINT -> LLM Fallback Evaluated
|
app.py
ADDED
|
@@ -0,0 +1,620 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import sys
|
| 3 |
+
import os
|
| 4 |
+
import time
|
| 5 |
+
|
| 6 |
+
# ── Page Config ───────────────────────────────────────────────────
|
| 7 |
+
st.set_page_config(
|
| 8 |
+
page_title="SHADOW — Kenyan Fraud Intelligence",
|
| 9 |
+
page_icon="🛡️",
|
| 10 |
+
layout="wide",
|
| 11 |
+
initial_sidebar_state="collapsed"
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
# ── Styling ───────────────────────────────────────────────────────
|
| 15 |
+
st.markdown("""
|
| 16 |
+
<style>
|
| 17 |
+
@import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;700&family=Inter:wght@400;600;700&display=swap');
|
| 18 |
+
|
| 19 |
+
html, body, [class*="css"] {
|
| 20 |
+
font-family: 'Inter', sans-serif;
|
| 21 |
+
background-color: #0a0a0f;
|
| 22 |
+
color: #e2e8f0;
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
.stApp {
|
| 26 |
+
background-color: #0a0a0f;
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
/* Header */
|
| 30 |
+
.shadow-header {
|
| 31 |
+
text-align: center;
|
| 32 |
+
padding: 2rem 0 1rem 0;
|
| 33 |
+
border-bottom: 1px solid #1e293b;
|
| 34 |
+
margin-bottom: 2rem;
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
.shadow-title {
|
| 38 |
+
font-size: 3rem;
|
| 39 |
+
font-weight: 700;
|
| 40 |
+
letter-spacing: 0.3em;
|
| 41 |
+
color: #f8fafc;
|
| 42 |
+
font-family: 'JetBrains Mono', monospace;
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
.shadow-subtitle {
|
| 46 |
+
color: #64748b;
|
| 47 |
+
font-size: 0.9rem;
|
| 48 |
+
letter-spacing: 0.15em;
|
| 49 |
+
margin-top: 0.3rem;
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
.amd-badge {
|
| 53 |
+
display: inline-block;
|
| 54 |
+
background: linear-gradient(135deg, #ED1C24, #FF6B35);
|
| 55 |
+
color: white;
|
| 56 |
+
font-size: 0.7rem;
|
| 57 |
+
font-weight: 700;
|
| 58 |
+
letter-spacing: 0.1em;
|
| 59 |
+
padding: 3px 10px;
|
| 60 |
+
border-radius: 3px;
|
| 61 |
+
margin-top: 0.5rem;
|
| 62 |
+
font-family: 'JetBrains Mono', monospace;
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
/* Verdict Cards */
|
| 66 |
+
.verdict-scam {
|
| 67 |
+
background: linear-gradient(135deg, #1a0505, #2d0808);
|
| 68 |
+
border: 2px solid #ef4444;
|
| 69 |
+
border-radius: 12px;
|
| 70 |
+
padding: 1.5rem;
|
| 71 |
+
text-align: center;
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
.verdict-suspicious {
|
| 75 |
+
background: linear-gradient(135deg, #1a1205, #2d1f08);
|
| 76 |
+
border: 2px solid #f59e0b;
|
| 77 |
+
border-radius: 12px;
|
| 78 |
+
padding: 1.5rem;
|
| 79 |
+
text-align: center;
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
.verdict-safe {
|
| 83 |
+
background: linear-gradient(135deg, #051a0a, #082d12);
|
| 84 |
+
border: 2px solid #22c55e;
|
| 85 |
+
border-radius: 12px;
|
| 86 |
+
padding: 1.5rem;
|
| 87 |
+
text-align: center;
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
.verdict-label {
|
| 91 |
+
font-size: 2rem;
|
| 92 |
+
font-weight: 700;
|
| 93 |
+
font-family: 'JetBrains Mono', monospace;
|
| 94 |
+
letter-spacing: 0.2em;
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
.verdict-scam .verdict-label { color: #ef4444; }
|
| 98 |
+
.verdict-suspicious .verdict-label { color: #f59e0b; }
|
| 99 |
+
.verdict-safe .verdict-label { color: #22c55e; }
|
| 100 |
+
|
| 101 |
+
.verdict-summary {
|
| 102 |
+
font-size: 0.85rem;
|
| 103 |
+
color: #94a3b8;
|
| 104 |
+
margin-top: 0.5rem;
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
/* Risk Score */
|
| 108 |
+
.risk-bar-container {
|
| 109 |
+
background: #1e293b;
|
| 110 |
+
border-radius: 6px;
|
| 111 |
+
height: 10px;
|
| 112 |
+
width: 100%;
|
| 113 |
+
margin: 0.5rem 0;
|
| 114 |
+
overflow: hidden;
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
.risk-bar-fill {
|
| 118 |
+
height: 10px;
|
| 119 |
+
border-radius: 6px;
|
| 120 |
+
transition: width 0.5s ease;
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
/* Trace Timeline */
|
| 124 |
+
.trace-container {
|
| 125 |
+
background: #0f172a;
|
| 126 |
+
border: 1px solid #1e293b;
|
| 127 |
+
border-radius: 10px;
|
| 128 |
+
padding: 1.2rem;
|
| 129 |
+
margin-top: 1rem;
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
.trace-step {
|
| 133 |
+
display: flex;
|
| 134 |
+
align-items: flex-start;
|
| 135 |
+
margin-bottom: 0.8rem;
|
| 136 |
+
padding-bottom: 0.8rem;
|
| 137 |
+
border-bottom: 1px solid #1e293b;
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
.trace-step:last-child {
|
| 141 |
+
border-bottom: none;
|
| 142 |
+
margin-bottom: 0;
|
| 143 |
+
padding-bottom: 0;
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
.trace-dot {
|
| 147 |
+
width: 10px;
|
| 148 |
+
height: 10px;
|
| 149 |
+
border-radius: 50%;
|
| 150 |
+
margin-top: 4px;
|
| 151 |
+
margin-right: 12px;
|
| 152 |
+
flex-shrink: 0;
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
.trace-agent {
|
| 156 |
+
font-family: 'JetBrains Mono', monospace;
|
| 157 |
+
font-size: 0.72rem;
|
| 158 |
+
font-weight: 700;
|
| 159 |
+
letter-spacing: 0.1em;
|
| 160 |
+
color: #64748b;
|
| 161 |
+
min-width: 160px;
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
.trace-summary {
|
| 165 |
+
font-size: 0.82rem;
|
| 166 |
+
color: #cbd5e1;
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
/* Info panels */
|
| 170 |
+
.info-panel {
|
| 171 |
+
background: #0f172a;
|
| 172 |
+
border: 1px solid #1e293b;
|
| 173 |
+
border-radius: 10px;
|
| 174 |
+
padding: 1.2rem;
|
| 175 |
+
margin-bottom: 1rem;
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
.info-panel h4 {
|
| 179 |
+
color: #64748b;
|
| 180 |
+
font-size: 0.75rem;
|
| 181 |
+
font-weight: 600;
|
| 182 |
+
letter-spacing: 0.12em;
|
| 183 |
+
text-transform: uppercase;
|
| 184 |
+
margin-bottom: 0.8rem;
|
| 185 |
+
font-family: 'JetBrains Mono', monospace;
|
| 186 |
+
}
|
| 187 |
+
|
| 188 |
+
.red-flag {
|
| 189 |
+
background: #1a0505;
|
| 190 |
+
border-left: 3px solid #ef4444;
|
| 191 |
+
padding: 0.4rem 0.8rem;
|
| 192 |
+
border-radius: 0 4px 4px 0;
|
| 193 |
+
font-size: 0.82rem;
|
| 194 |
+
color: #fca5a5;
|
| 195 |
+
margin-bottom: 0.4rem;
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
.action-item {
|
| 199 |
+
background: #0a1628;
|
| 200 |
+
border-left: 3px solid #3b82f6;
|
| 201 |
+
padding: 0.4rem 0.8rem;
|
| 202 |
+
border-radius: 0 4px 4px 0;
|
| 203 |
+
font-size: 0.82rem;
|
| 204 |
+
color: #93c5fd;
|
| 205 |
+
margin-bottom: 0.4rem;
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
.safety-tip {
|
| 209 |
+
background: #0a1628;
|
| 210 |
+
border: 1px solid #1e3a5f;
|
| 211 |
+
border-radius: 8px;
|
| 212 |
+
padding: 1rem;
|
| 213 |
+
margin-top: 0.5rem;
|
| 214 |
+
}
|
| 215 |
+
|
| 216 |
+
.safety-tip-lang {
|
| 217 |
+
font-size: 0.7rem;
|
| 218 |
+
font-weight: 700;
|
| 219 |
+
color: #3b82f6;
|
| 220 |
+
letter-spacing: 0.1em;
|
| 221 |
+
font-family: 'JetBrains Mono', monospace;
|
| 222 |
+
margin-bottom: 0.2rem;
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
.safety-tip-text {
|
| 226 |
+
font-size: 0.82rem;
|
| 227 |
+
color: #cbd5e1;
|
| 228 |
+
margin-bottom: 0.6rem;
|
| 229 |
+
}
|
| 230 |
+
|
| 231 |
+
/* Preset pills */
|
| 232 |
+
.preset-label {
|
| 233 |
+
font-size: 0.72rem;
|
| 234 |
+
color: #64748b;
|
| 235 |
+
font-family: 'JetBrains Mono', monospace;
|
| 236 |
+
letter-spacing: 0.1em;
|
| 237 |
+
margin-bottom: 0.5rem;
|
| 238 |
+
}
|
| 239 |
+
|
| 240 |
+
/* Input area */
|
| 241 |
+
.stTextArea textarea {
|
| 242 |
+
background-color: #0f172a !important;
|
| 243 |
+
border: 1px solid #1e293b !important;
|
| 244 |
+
color: #e2e8f0 !important;
|
| 245 |
+
font-family: 'JetBrains Mono', monospace !important;
|
| 246 |
+
font-size: 0.85rem !important;
|
| 247 |
+
border-radius: 8px !important;
|
| 248 |
+
}
|
| 249 |
+
|
| 250 |
+
.stTextArea textarea:focus {
|
| 251 |
+
border-color: #3b82f6 !important;
|
| 252 |
+
box-shadow: 0 0 0 2px rgba(59,130,246,0.2) !important;
|
| 253 |
+
}
|
| 254 |
+
|
| 255 |
+
.stButton button {
|
| 256 |
+
background: linear-gradient(135deg, #1d4ed8, #2563eb) !important;
|
| 257 |
+
color: white !important;
|
| 258 |
+
font-family: 'JetBrains Mono', monospace !important;
|
| 259 |
+
font-weight: 700 !important;
|
| 260 |
+
letter-spacing: 0.1em !important;
|
| 261 |
+
border: none !important;
|
| 262 |
+
border-radius: 8px !important;
|
| 263 |
+
padding: 0.6rem 2rem !important;
|
| 264 |
+
width: 100% !important;
|
| 265 |
+
font-size: 0.9rem !important;
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
.stButton button:hover {
|
| 269 |
+
background: linear-gradient(135deg, #1e40af, #1d4ed8) !important;
|
| 270 |
+
}
|
| 271 |
+
|
| 272 |
+
/* Divider */
|
| 273 |
+
hr { border-color: #1e293b !important; }
|
| 274 |
+
|
| 275 |
+
/* Spinner */
|
| 276 |
+
.stSpinner > div { border-top-color: #3b82f6 !important; }
|
| 277 |
+
</style>
|
| 278 |
+
""", unsafe_allow_html=True)
|
| 279 |
+
|
| 280 |
+
# ── Pipeline Import ───────────────────────────────────────────────
|
| 281 |
+
# Works whether run from project root (HF Spaces) or from app/ dir
|
| 282 |
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
| 283 |
+
|
| 284 |
+
try:
|
| 285 |
+
from agents.pipeline import ShadowPipeline
|
| 286 |
+
PIPELINE_AVAILABLE = True
|
| 287 |
+
except ImportError:
|
| 288 |
+
PIPELINE_AVAILABLE = False
|
| 289 |
+
|
| 290 |
+
# ── Preset Messages ───────────────────────────────────────────────
|
| 291 |
+
PRESETS = {
|
| 292 |
+
"— Select a demo scenario —": "",
|
| 293 |
+
"🔴 Safaricom Impersonation": "Habari kutoka Safaricom. Laini yako inatumika na mtu mwingine (double registration). Piga *33*0000* kuzuia hii haraka au akaunti yako itafungwa ndani ya masaa 2.",
|
| 294 |
+
"🔴 KRA Penalty Threat": "KRA ALERT: Uko na tax arrears ya KES 23,450 kwa iTax system yako. Lipa ndani ya masaa 48 au utashtakiwa. Piga simu 0756XXXXXX sasa.",
|
| 295 |
+
"🟠 M-Pesa Reversal Scam": "Aki naomba urudishe ile pesa nimekutumia by mistake saa hii. Ni ya fees ya mtoto tafadhali. Tuma haraka 0712XXXXXX.",
|
| 296 |
+
"🟠 Fuliza Boost Scam": "KAMA ULIPATA FULIZA SEMA THANKS. Inbox nikuboostie fuliza from 0 to 100k in 2 minutes hii January hakuna stress.",
|
| 297 |
+
"🟡 Betting Jackpot Scam": "Hongera! Wewe ndio mshindi wa 500k SportPesa Weekly Jackpot. Tuma 2,500 ya registration fee kupokea pesa kwa MPESA yako leo.",
|
| 298 |
+
"🟡 WhatsApp OTP Theft": "Boss nisamehe, nilituma code ya WhatsApp kwa namba yako by mistake. Naomba unitumie hiyo code 6-digits haraka niingie kwa group ya kazi.",
|
| 299 |
+
"✅ Legitimate M-Pesa": "MPESA Confirmed. You have received Ksh 3,500.00 from JOHN KAMAU 0722XXXXXX on 8/5/26 at 10:23 AM. New M-PESA balance is Ksh 4,120.00.",
|
| 300 |
+
}
|
| 301 |
+
|
| 302 |
+
# ── Risk Color Helper ─────────────────────────────────────────────
|
| 303 |
+
def get_risk_color(level: str) -> str:
|
| 304 |
+
return {
|
| 305 |
+
"CRITICAL": "#ef4444",
|
| 306 |
+
"HIGH": "#f97316",
|
| 307 |
+
"MEDIUM": "#f59e0b",
|
| 308 |
+
"LOW": "#22c55e"
|
| 309 |
+
}.get(level, "#64748b")
|
| 310 |
+
|
| 311 |
+
def get_verdict_class(verdict: str) -> str:
|
| 312 |
+
if verdict == "SCAM":
|
| 313 |
+
return "verdict-scam"
|
| 314 |
+
elif verdict == "SUSPICIOUS":
|
| 315 |
+
return "verdict-suspicious"
|
| 316 |
+
return "verdict-safe"
|
| 317 |
+
|
| 318 |
+
def get_verdict_emoji(verdict: str) -> str:
|
| 319 |
+
return {"SCAM": "🚨", "SUSPICIOUS": "⚠️", "SAFE": "✅"}.get(verdict, "❓")
|
| 320 |
+
|
| 321 |
+
def get_trace_dot_color(agent: str, risk_hint: str) -> str:
|
| 322 |
+
if risk_hint in ["CRITICAL", "HIGH"]:
|
| 323 |
+
return "#ef4444"
|
| 324 |
+
elif risk_hint in ["MEDIUM"]:
|
| 325 |
+
return "#f59e0b"
|
| 326 |
+
elif agent == "OSINT PRECHECK":
|
| 327 |
+
return "#8b5cf6"
|
| 328 |
+
elif agent == "LANGUAGE AGENT":
|
| 329 |
+
return "#3b82f6"
|
| 330 |
+
elif agent == "THREAT AGENT":
|
| 331 |
+
return "#f97316"
|
| 332 |
+
elif agent == "RISK AGENT":
|
| 333 |
+
return "#ef4444"
|
| 334 |
+
elif agent == "ACTION AGENT":
|
| 335 |
+
return "#22c55e"
|
| 336 |
+
return "#64748b"
|
| 337 |
+
|
| 338 |
+
# ── Header ────────────────────────────────────────────────────────
|
| 339 |
+
st.markdown("""
|
| 340 |
+
<div class="shadow-header">
|
| 341 |
+
<div class="shadow-title">◈ SHADOW</div>
|
| 342 |
+
<div class="shadow-subtitle">KENYAN FRAUD INTELLIGENCE SYSTEM</div>
|
| 343 |
+
<div class="amd-badge">⚡ POWERED BY AMD INSTINCT MI300X + ROCm</div>
|
| 344 |
+
</div>
|
| 345 |
+
""", unsafe_allow_html=True)
|
| 346 |
+
|
| 347 |
+
# ── Layout ────────────────────────────────────────────────────────
|
| 348 |
+
left_col, right_col = st.columns([1, 1.3], gap="large")
|
| 349 |
+
|
| 350 |
+
with left_col:
|
| 351 |
+
st.markdown("#### 📥 Analyze a Message")
|
| 352 |
+
|
| 353 |
+
# Preset selector
|
| 354 |
+
preset_choice = st.selectbox(
|
| 355 |
+
"Load a demo scenario",
|
| 356 |
+
options=list(PRESETS.keys()),
|
| 357 |
+
label_visibility="collapsed"
|
| 358 |
+
)
|
| 359 |
+
|
| 360 |
+
# Pre-fill text area from preset
|
| 361 |
+
default_text = PRESETS.get(preset_choice, "")
|
| 362 |
+
|
| 363 |
+
message = st.text_area(
|
| 364 |
+
"Message",
|
| 365 |
+
value=default_text,
|
| 366 |
+
height=160,
|
| 367 |
+
placeholder="Paste a suspicious SMS, WhatsApp message, or notification here...",
|
| 368 |
+
label_visibility="collapsed"
|
| 369 |
+
)
|
| 370 |
+
|
| 371 |
+
analyze_clicked = st.button("🔍 ANALYZE WITH SHADOW", use_container_width=True)
|
| 372 |
+
|
| 373 |
+
# Stats strip
|
| 374 |
+
st.markdown("<br>", unsafe_allow_html=True)
|
| 375 |
+
s1, s2, s3 = st.columns(3)
|
| 376 |
+
s1.metric("Scam Categories", "11")
|
| 377 |
+
s2.metric("Languages", "EN / SW / Sheng")
|
| 378 |
+
s3.metric("Pipeline Agents", "4")
|
| 379 |
+
|
| 380 |
+
st.markdown("---")
|
| 381 |
+
st.markdown("""
|
| 382 |
+
<div style='font-size:0.75rem; color:#475569; font-family: JetBrains Mono, monospace;'>
|
| 383 |
+
SHADOW uses a hybrid OSINT + 4-agent LLM pipeline to detect<br>
|
| 384 |
+
Kenyan mobile fraud in real time. Qwen3 inference runs on<br>
|
| 385 |
+
AMD Instinct MI300X via vLLM + ROCm.
|
| 386 |
+
</div>
|
| 387 |
+
""", unsafe_allow_html=True)
|
| 388 |
+
|
| 389 |
+
# ── Analysis Logic ─────────────────────────────────────────────────
|
| 390 |
+
with right_col:
|
| 391 |
+
if analyze_clicked:
|
| 392 |
+
if not message.strip():
|
| 393 |
+
st.warning("Please paste a message to analyze.")
|
| 394 |
+
else:
|
| 395 |
+
with st.spinner("Shadow is analyzing..."):
|
| 396 |
+
start = time.time()
|
| 397 |
+
|
| 398 |
+
if PIPELINE_AVAILABLE:
|
| 399 |
+
try:
|
| 400 |
+
pipeline = ShadowPipeline()
|
| 401 |
+
state = pipeline.run(message)
|
| 402 |
+
action = state.action_data or {}
|
| 403 |
+
risk = state.risk_data or {}
|
| 404 |
+
trace = state.execution_trace or []
|
| 405 |
+
elapsed = round(time.time() - start, 2)
|
| 406 |
+
except Exception as e:
|
| 407 |
+
st.error(f"Pipeline error: {str(e)}")
|
| 408 |
+
# Safe fallback
|
| 409 |
+
action = {
|
| 410 |
+
"verdict": "INCONCLUSIVE",
|
| 411 |
+
"risk_level": "UNKNOWN",
|
| 412 |
+
"scam_type": "Error",
|
| 413 |
+
"dashboard_summary": "An error occurred during analysis.",
|
| 414 |
+
"confidence": 0.0,
|
| 415 |
+
"explanation": {"red_flags_found": ["System error"]},
|
| 416 |
+
"recommended_actions": [],
|
| 417 |
+
"do_not_do": [],
|
| 418 |
+
"safety_tip": {},
|
| 419 |
+
"reporting": {}
|
| 420 |
+
}
|
| 421 |
+
risk = {"raw_score": 0}
|
| 422 |
+
trace = [{"agent": "SYSTEM", "step": 1, "summary": "Error running pipeline", "risk_hint": "UNKNOWN"}]
|
| 423 |
+
elapsed = round(time.time() - start, 2)
|
| 424 |
+
else:
|
| 425 |
+
# Fallback demo state if imports fail
|
| 426 |
+
action = {
|
| 427 |
+
"verdict": "SUSPICIOUS",
|
| 428 |
+
"risk_level": "MEDIUM",
|
| 429 |
+
"scam_type": "Pipeline Offline (Mock)",
|
| 430 |
+
"dashboard_summary": "This is a fallback response because the pipeline failed to load.",
|
| 431 |
+
"confidence": 0.50,
|
| 432 |
+
"explanation": {"red_flags_found": ["Mock execution"]},
|
| 433 |
+
"recommended_actions": [{"action": "Check system paths and imports"}],
|
| 434 |
+
"do_not_do": ["Trust this mock verdict"],
|
| 435 |
+
"safety_tip": {"english": "System is offline.", "swahili": "Mfumo haupatikani.", "sheng": "System iko chini."},
|
| 436 |
+
"reporting": {"should_report": False, "contacts": []}
|
| 437 |
+
}
|
| 438 |
+
risk = {"raw_score": 5}
|
| 439 |
+
trace = [{"agent": "MOCK AGENT", "step": 1, "summary": "Pipeline import failed", "risk_hint": "MEDIUM"}]
|
| 440 |
+
elapsed = 0.0
|
| 441 |
+
|
| 442 |
+
# Safe gets with empty defaults to prevent NoneType crashes
|
| 443 |
+
verdict = action.get("verdict") or "INCONCLUSIVE"
|
| 444 |
+
risk_level = action.get("risk_level") or "UNKNOWN"
|
| 445 |
+
scam_type = action.get("scam_type") or "Unknown"
|
| 446 |
+
summary = action.get("dashboard_summary") or ""
|
| 447 |
+
confidence = action.get("confidence")
|
| 448 |
+
if confidence is None:
|
| 449 |
+
confidence = 0.0
|
| 450 |
+
|
| 451 |
+
raw_score = risk.get("raw_score")
|
| 452 |
+
if raw_score is None:
|
| 453 |
+
raw_score = 0
|
| 454 |
+
|
| 455 |
+
explanation = action.get("explanation") or {}
|
| 456 |
+
red_flags = explanation.get("red_flags_found") or []
|
| 457 |
+
|
| 458 |
+
recommended = action.get("recommended_actions") or []
|
| 459 |
+
do_not = action.get("do_not_do") or []
|
| 460 |
+
|
| 461 |
+
safety_tip = action.get("safety_tip") or {}
|
| 462 |
+
reporting = action.get("reporting") or {}
|
| 463 |
+
|
| 464 |
+
# ── Verdict Card ──────────────────────────────────────
|
| 465 |
+
verdict_class = get_verdict_class(verdict)
|
| 466 |
+
verdict_emoji = get_verdict_emoji(verdict)
|
| 467 |
+
risk_color = get_risk_color(risk_level)
|
| 468 |
+
score_pct = min(int((raw_score / 10) * 100), 100)
|
| 469 |
+
|
| 470 |
+
st.markdown(f"""
|
| 471 |
+
<div class="{verdict_class}">
|
| 472 |
+
<div class="verdict-label">{verdict_emoji} {verdict}</div>
|
| 473 |
+
<div class="verdict-summary">{summary}</div>
|
| 474 |
+
<div style="margin-top:0.8rem; font-size:0.78rem; color:#64748b;">
|
| 475 |
+
{scam_type} | Confidence: {int(confidence*100)}% | {elapsed}s
|
| 476 |
+
</div>
|
| 477 |
+
</div>
|
| 478 |
+
""", unsafe_allow_html=True)
|
| 479 |
+
|
| 480 |
+
st.markdown("<br>", unsafe_allow_html=True)
|
| 481 |
+
|
| 482 |
+
# ── Risk Score Bar ────────────────────────────────────
|
| 483 |
+
st.markdown(f"""
|
| 484 |
+
<div class="info-panel">
|
| 485 |
+
<h4>⚡ Risk Score</h4>
|
| 486 |
+
<div style="display:flex; justify-content:space-between; margin-bottom:4px;">
|
| 487 |
+
<span style="font-size:0.8rem; color:#94a3b8;">Score: {raw_score}/10</span>
|
| 488 |
+
<span style="font-size:0.8rem; font-weight:700; color:{risk_color};">{risk_level}</span>
|
| 489 |
+
</div>
|
| 490 |
+
<div class="risk-bar-container">
|
| 491 |
+
<div class="risk-bar-fill" style="width:{score_pct}%; background:{risk_color};"></div>
|
| 492 |
+
</div>
|
| 493 |
+
</div>
|
| 494 |
+
""", unsafe_allow_html=True)
|
| 495 |
+
|
| 496 |
+
# ── Two columns: Red Flags + Actions ──────────────────
|
| 497 |
+
c1, c2 = st.columns(2)
|
| 498 |
+
|
| 499 |
+
with c1:
|
| 500 |
+
flags_html = "".join([f'<div class="red-flag">⚠ {f}</div>' for f in red_flags]) or '<div style="color:#64748b; font-size:0.8rem;">None detected</div>'
|
| 501 |
+
st.markdown(f"""
|
| 502 |
+
<div class="info-panel">
|
| 503 |
+
<h4>🚩 Red Flags</h4>
|
| 504 |
+
{flags_html}
|
| 505 |
+
</div>
|
| 506 |
+
""", unsafe_allow_html=True)
|
| 507 |
+
|
| 508 |
+
with c2:
|
| 509 |
+
actions_html = ""
|
| 510 |
+
for a in recommended:
|
| 511 |
+
if isinstance(a, dict):
|
| 512 |
+
action_text = a.get("action", "")
|
| 513 |
+
if action_text:
|
| 514 |
+
actions_html += f'<div class="action-item">→ {action_text}</div>'
|
| 515 |
+
elif isinstance(a, str):
|
| 516 |
+
actions_html += f'<div class="action-item">→ {a}</div>'
|
| 517 |
+
|
| 518 |
+
donot_html = "".join([f'<div class="red-flag">✗ {d}</div>' for d in do_not if isinstance(d, str)])
|
| 519 |
+
|
| 520 |
+
st.markdown(f"""
|
| 521 |
+
<div class="info-panel">
|
| 522 |
+
<h4>✅ What To Do</h4>
|
| 523 |
+
{actions_html}
|
| 524 |
+
{donot_html}
|
| 525 |
+
</div>
|
| 526 |
+
""", unsafe_allow_html=True)
|
| 527 |
+
|
| 528 |
+
# ── Execution Trace ───────────────────────────────────
|
| 529 |
+
trace_html = """
|
| 530 |
+
<div class="info-panel" style="margin-top:0;">
|
| 531 |
+
<h4>🧠 Agent Reasoning Timeline</h4>
|
| 532 |
+
<div class="trace-container">
|
| 533 |
+
"""
|
| 534 |
+
|
| 535 |
+
if not trace:
|
| 536 |
+
trace_html += '<div style="color:#64748b; font-size:0.8rem;">No trace available.</div>'
|
| 537 |
+
for step in trace:
|
| 538 |
+
if not isinstance(step, dict):
|
| 539 |
+
continue
|
| 540 |
+
agent = step.get("agent") or "SYSTEM"
|
| 541 |
+
summary_text = step.get("summary") or ""
|
| 542 |
+
risk_hint = step.get("risk_hint") or ""
|
| 543 |
+
dot_color = get_trace_dot_color(agent, risk_hint)
|
| 544 |
+
trace_html += f"""
|
| 545 |
+
<div class="trace-step">
|
| 546 |
+
<div class="trace-dot" style="background:{dot_color};"></div>
|
| 547 |
+
<div>
|
| 548 |
+
<div class="trace-agent">[{step.get('step', 0)}] {agent}</div>
|
| 549 |
+
<div class="trace-summary">{summary_text}</div>
|
| 550 |
+
</div>
|
| 551 |
+
</div>
|
| 552 |
+
"""
|
| 553 |
+
trace_html += "</div></div>"
|
| 554 |
+
st.markdown(trace_html, unsafe_allow_html=True)
|
| 555 |
+
|
| 556 |
+
# ── Safety Tip ────────────────────────────────────────
|
| 557 |
+
if safety_tip:
|
| 558 |
+
st.markdown(f"""
|
| 559 |
+
<div class="safety-tip">
|
| 560 |
+
<div class="safety-tip-lang">EN</div>
|
| 561 |
+
<div class="safety-tip-text">{safety_tip.get('english', 'Not available')}</div>
|
| 562 |
+
<div class="safety-tip-lang">SW</div>
|
| 563 |
+
<div class="safety-tip-text">{safety_tip.get('swahili', 'Haipatikani')}</div>
|
| 564 |
+
<div class="safety-tip-lang">SHENG</div>
|
| 565 |
+
<div class="safety-tip-text">{safety_tip.get('sheng', 'Haiwezekani')}</div>
|
| 566 |
+
</div>
|
| 567 |
+
""", unsafe_allow_html=True)
|
| 568 |
+
|
| 569 |
+
# ── Reporting ─────────────────────────────────────────
|
| 570 |
+
if reporting.get("should_report") and reporting.get("contacts"):
|
| 571 |
+
contacts = reporting.get("contacts", [])
|
| 572 |
+
contact_parts = []
|
| 573 |
+
for c in contacts:
|
| 574 |
+
if isinstance(c, dict) and 'name' in c and 'value' in c:
|
| 575 |
+
contact_parts.append(f"{c['name']}: <strong>{c['value']}</strong>")
|
| 576 |
+
|
| 577 |
+
if contact_parts:
|
| 578 |
+
contact_str = " | ".join(contact_parts)
|
| 579 |
+
st.markdown(f"""
|
| 580 |
+
<div style="margin-top:0.8rem; padding:0.8rem; background:#0a1628;
|
| 581 |
+
border:1px solid #1e3a5f; border-radius:8px;
|
| 582 |
+
font-size:0.8rem; color:#93c5fd;">
|
| 583 |
+
📢 Report this: {contact_str}
|
| 584 |
+
</div>
|
| 585 |
+
""", unsafe_allow_html=True)
|
| 586 |
+
|
| 587 |
+
else:
|
| 588 |
+
# Empty state
|
| 589 |
+
st.markdown("""
|
| 590 |
+
<div style="
|
| 591 |
+
height: 400px;
|
| 592 |
+
display: flex;
|
| 593 |
+
flex-direction: column;
|
| 594 |
+
align-items: center;
|
| 595 |
+
justify-content: center;
|
| 596 |
+
color: #334155;
|
| 597 |
+
border: 1px dashed #1e293b;
|
| 598 |
+
border-radius: 12px;
|
| 599 |
+
font-family: 'JetBrains Mono', monospace;
|
| 600 |
+
font-size: 0.85rem;
|
| 601 |
+
text-align: center;
|
| 602 |
+
padding: 2rem;
|
| 603 |
+
">
|
| 604 |
+
<div style="font-size: 3rem; margin-bottom: 1rem;">◈</div>
|
| 605 |
+
<div style="font-size: 1rem; color: #475569; margin-bottom: 0.5rem;">SHADOW IS WATCHING</div>
|
| 606 |
+
<div style="color: #334155;">Paste a message or select a demo scenario<br>to begin fraud analysis.</div>
|
| 607 |
+
</div>
|
| 608 |
+
""", unsafe_allow_html=True)
|
| 609 |
+
|
| 610 |
+
# ── Footer ─────────────────────────────────────────────────────────
|
| 611 |
+
st.markdown("""
|
| 612 |
+
<div style="text-align:center; padding: 2rem 0 1rem 0; color:#334155;
|
| 613 |
+
font-size:0.72rem; font-family:'JetBrains Mono', monospace;
|
| 614 |
+
border-top: 1px solid #1e293b; margin-top: 2rem;">
|
| 615 |
+
SHADOW — AMD Developer Hackathon 2026 |
|
| 616 |
+
Qwen3 on MI300X via vLLM + ROCm |
|
| 617 |
+
Built for Kenya's 54M mobile users |
|
| 618 |
+
<a href="https://github.com/kwisdomk/SHADOW" style="color:#3b82f6;">GitHub</a>
|
| 619 |
+
</div>
|
| 620 |
+
""", unsafe_allow_html=True)
|
core/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Shadow — core package
|
core/__pycache__/__init__.cpython-314.pyc
ADDED
|
Binary file (144 Bytes). View file
|
|
|
core/__pycache__/execution_trace.cpython-314.pyc
ADDED
|
Binary file (3.1 kB). View file
|
|
|
core/__pycache__/kenyan_context.cpython-314.pyc
ADDED
|
Binary file (15.3 kB). View file
|
|
|
core/__pycache__/llm_client.cpython-314.pyc
ADDED
|
Binary file (20.2 kB). View file
|
|
|
core/__pycache__/osint_dataset.cpython-314.pyc
ADDED
|
Binary file (14 kB). View file
|
|
|
core/__pycache__/prompts.cpython-314.pyc
ADDED
|
Binary file (13.4 kB). View file
|
|
|
core/execution_trace.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Dict, Any, List, Optional
|
| 2 |
+
|
| 3 |
+
class ExecutionTrace:
|
| 4 |
+
"""
|
| 5 |
+
Stores a sequential list of steps for the Live Execution Timeline.
|
| 6 |
+
Exposes the internal agentic reasoning of Shadow as a visual trace.
|
| 7 |
+
"""
|
| 8 |
+
def __init__(self):
|
| 9 |
+
self.steps: List[Dict[str, Any]] = []
|
| 10 |
+
|
| 11 |
+
def add_step(self, agent: str, input_str: str, output: Dict[str, Any], summary: str, risk_hint: Optional[str] = None):
|
| 12 |
+
step_number = len(self.steps)
|
| 13 |
+
step = {
|
| 14 |
+
"step": step_number,
|
| 15 |
+
"agent": agent,
|
| 16 |
+
"input": input_str,
|
| 17 |
+
"output": output,
|
| 18 |
+
"summary": summary,
|
| 19 |
+
"risk_hint": risk_hint
|
| 20 |
+
}
|
| 21 |
+
self.steps.append(step)
|
| 22 |
+
|
| 23 |
+
def get_trace(self) -> List[Dict[str, Any]]:
|
| 24 |
+
return self.steps
|
| 25 |
+
|
| 26 |
+
def clear(self):
|
| 27 |
+
self.steps = []
|
| 28 |
+
|
| 29 |
+
def format_execution_trace(trace: List[Dict[str, Any]]) -> str:
|
| 30 |
+
"""Returns a human-readable timeline of the execution trace."""
|
| 31 |
+
lines = []
|
| 32 |
+
for step in trace:
|
| 33 |
+
# Format: [STEP 1] OSINT PRECHECK → mpesa_reversal detected
|
| 34 |
+
# If the user specifically wants OSINT PRECHECK to be STEP 0, or if they meant the first step is step 0:
|
| 35 |
+
# The prompt says: "OSINT Precheck must be STEP 0: Log: agent = 'OSINT_PRECHECK'"
|
| 36 |
+
# But also says: "[STEP 1] OSINT PRECHECK -> ..." in the example.
|
| 37 |
+
# I'll just use the step_number from the dictionary (which starts at 0).
|
| 38 |
+
step_num = step["step"]
|
| 39 |
+
agent = step["agent"].replace("_", " ")
|
| 40 |
+
if not agent.endswith("AGENT") and agent != "OSINT PRECHECK":
|
| 41 |
+
# Just in case agent string doesn't include "AGENT" already
|
| 42 |
+
pass
|
| 43 |
+
lines.append(f"[STEP {step_num}] {agent.upper()} -> {step['summary']}")
|
| 44 |
+
|
| 45 |
+
return "\n".join(lines)
|
core/kenyan_context.py
ADDED
|
@@ -0,0 +1,453 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
core/kenyan_context.py
|
| 3 |
+
Shadow — AI Fraud Detection System
|
| 4 |
+
AMD Hackathon 2026
|
| 5 |
+
|
| 6 |
+
Localized Kenyan fraud intelligence knowledge base.
|
| 7 |
+
Provides structured constants for scam classification, language detection,
|
| 8 |
+
fraud scoring, and pattern matching tuned to the Kenyan threat landscape.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 12 |
+
# SCAM CATEGORIES
|
| 13 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 14 |
+
|
| 15 |
+
SCAM_CATEGORIES = {
|
| 16 |
+
"safaricom_impersonation": {
|
| 17 |
+
"label": "Safaricom Impersonation",
|
| 18 |
+
"description": "Fraudsters posing as Safaricom customer care, promotions, or network teams.",
|
| 19 |
+
"risk_level": "HIGH",
|
| 20 |
+
"keywords": [
|
| 21 |
+
"safaricom", "customer care", "network upgrade", "sim registration",
|
| 22 |
+
"promotion", "safaricom winner", "security update", "m-pesa pin",
|
| 23 |
+
"deactivated", "update your details", "twaweza", "shinda"
|
| 24 |
+
],
|
| 25 |
+
"example_patterns": [
|
| 26 |
+
"Your Safaricom line will be suspended. Call 0700XXXXXX to verify.",
|
| 27 |
+
"Niaje boss, laini yako ya Safaricom itafungwa leo. Tuma details zako nishughulikie haraka.",
|
| 28 |
+
],
|
| 29 |
+
},
|
| 30 |
+
|
| 31 |
+
"mpesa_reversal": {
|
| 32 |
+
"label": "M-Pesa Reversal / Float Scam",
|
| 33 |
+
"description": "Scammer claims mistaken send and asks for a refund, or fakes a transaction to an agent.",
|
| 34 |
+
"risk_level": "HIGH",
|
| 35 |
+
"keywords": [
|
| 36 |
+
"sent by mistake", "refund", "reversal", "wrong number", "float",
|
| 37 |
+
"agent", "rudisha", "nimekosea", "transaction failed", "pending"
|
| 38 |
+
],
|
| 39 |
+
"example_patterns": [
|
| 40 |
+
"Maze si ulifungiwa na MPESA yako? Boss nisamehe nilituma by mistake. Rudisha tu hiyo 500 haraka acha nishughulikie.",
|
| 41 |
+
"I sent you KES 2,000 by mistake. Tafadhali rudisha niko na emergency ya hospitali."
|
| 42 |
+
],
|
| 43 |
+
},
|
| 44 |
+
|
| 45 |
+
"fuliza_scam": {
|
| 46 |
+
"label": "Fuliza Abuse / Fake Alerts",
|
| 47 |
+
"description": "Fake Fuliza overdraft notices demanding top-up fees or claiming fake debt.",
|
| 48 |
+
"risk_level": "HIGH",
|
| 49 |
+
"keywords": [
|
| 50 |
+
"fuliza", "overdraft", "limit increased", "outstanding balance",
|
| 51 |
+
"top-up fee", "crb", "clear your fuliza", "fuliza m-pesa"
|
| 52 |
+
],
|
| 53 |
+
"example_patterns": [
|
| 54 |
+
"Dear Customer, your Fuliza limit has been increased to KES 50,000. Send KES 500 to activate.",
|
| 55 |
+
"Fuliza balance yako iko na arrears. Lipa sasa hivi au uwekwe CRB leo."
|
| 56 |
+
],
|
| 57 |
+
},
|
| 58 |
+
|
| 59 |
+
"betting_scam": {
|
| 60 |
+
"label": "Betting / Jackpot Scam",
|
| 61 |
+
"description": "Fake betting promotions, fixed matches, or 'jackpot won' messages.",
|
| 62 |
+
"risk_level": "HIGH",
|
| 63 |
+
"keywords": [
|
| 64 |
+
"sportpesa", "betika", "odibets", "jackpot", "fixed odds", "multibet",
|
| 65 |
+
"won", "congratulations", "registration fee", "sure bet", "odds"
|
| 66 |
+
],
|
| 67 |
+
"example_patterns": [
|
| 68 |
+
"Wewe umeshinda jackpot ya SportPesa ya 50K! Confirm details yako hapa: bit.ly/xxxxx Leo tu!",
|
| 69 |
+
"100% Sure Fixed Matches available. Send KES 1,000 VIP registration fee to get today's odds."
|
| 70 |
+
],
|
| 71 |
+
},
|
| 72 |
+
|
| 73 |
+
"bonga_points_scam": {
|
| 74 |
+
"label": "Bonga Points Scam",
|
| 75 |
+
"description": "Fake notices to redeem Bonga points before expiry.",
|
| 76 |
+
"risk_level": "MEDIUM",
|
| 77 |
+
"keywords": [
|
| 78 |
+
"bonga", "bonga points", "redeem", "expiry", "expire", "claim phones",
|
| 79 |
+
"convert to cash", "dial *126#"
|
| 80 |
+
],
|
| 81 |
+
"example_patterns": [
|
| 82 |
+
"Your 15,000 Bonga points will expire today. Click here to redeem for KES 4,500 cash immediately.",
|
| 83 |
+
"Redeem your Bonga points for a free smartphone. Tuma 500 ya delivery."
|
| 84 |
+
],
|
| 85 |
+
},
|
| 86 |
+
|
| 87 |
+
"kra_scam": {
|
| 88 |
+
"label": "KRA Tax Scam",
|
| 89 |
+
"description": "Fake Kenya Revenue Authority penalties, court summons, or tax arrears alerts.",
|
| 90 |
+
"risk_level": "CRITICAL",
|
| 91 |
+
"keywords": [
|
| 92 |
+
"kra", "itax", "tax arrears", "overdue", "penalty", "court summons",
|
| 93 |
+
"arrest", "warrant", "compliance", "pin certificate", "paye"
|
| 94 |
+
],
|
| 95 |
+
"example_patterns": [
|
| 96 |
+
"Mzee, hii ni KRA. Uko na arrears ya KES 23,450. Lipa ndani ya masaa 48 au utashtakiwa. Call 0756XXXXXX sasa.",
|
| 97 |
+
"KRA Notice: Warrant of arrest issued for tax evasion. Call Inspector Kamau on 0722XXXXXX to clear."
|
| 98 |
+
],
|
| 99 |
+
},
|
| 100 |
+
|
| 101 |
+
"chama_scam": {
|
| 102 |
+
"label": "Chama / SACCO Scam",
|
| 103 |
+
"description": "Impersonation of SACCO officials or Chama treasurers requesting emergency transfers.",
|
| 104 |
+
"risk_level": "MEDIUM",
|
| 105 |
+
"keywords": [
|
| 106 |
+
"chama", "sacco", "treasurer", "emergency fund", "contribution",
|
| 107 |
+
"loan approval", "disbursement", "shares"
|
| 108 |
+
],
|
| 109 |
+
"example_patterns": [
|
| 110 |
+
"Niaje, member wa chama amepata ajali. Tuma contribution yako kwa hii namba mpya ya treasurer.",
|
| 111 |
+
"Your SACCO loan of KES 100,000 is approved. Send KES 2,500 insurance fee to disburse."
|
| 112 |
+
],
|
| 113 |
+
},
|
| 114 |
+
|
| 115 |
+
"whatsapp_scam": {
|
| 116 |
+
"label": "WhatsApp Deregistration Scam",
|
| 117 |
+
"description": "Threatens WhatsApp account deletion and requests OTPs.",
|
| 118 |
+
"risk_level": "CRITICAL",
|
| 119 |
+
"keywords": [
|
| 120 |
+
"whatsapp", "deregistered", "verification code", "blocked",
|
| 121 |
+
"update whatsapp", "six digit code"
|
| 122 |
+
],
|
| 123 |
+
"example_patterns": [
|
| 124 |
+
"Your WhatsApp account is being registered on another device. Send the 6-digit code to cancel.",
|
| 125 |
+
"Akaunti yako ya WhatsApp itafungwa. Confirm namba yako sasa."
|
| 126 |
+
],
|
| 127 |
+
},
|
| 128 |
+
|
| 129 |
+
"fake_job": {
|
| 130 |
+
"label": "Fake Job Offer",
|
| 131 |
+
"description": "Employment offers requiring upfront payments.",
|
| 132 |
+
"risk_level": "MEDIUM",
|
| 133 |
+
"keywords": [
|
| 134 |
+
"job offer", "hiring now", "daily earnings", "registration fee",
|
| 135 |
+
"training fee", "shortlisted", "send cv", "online job"
|
| 136 |
+
],
|
| 137 |
+
"example_patterns": [
|
| 138 |
+
"Urgent vacancy! Earn KSH 1,500/day. No experience needed. Send KSH 500 registration fee.",
|
| 139 |
+
"Kazi iko. Pay KES 1,000 training fee to start immediately."
|
| 140 |
+
],
|
| 141 |
+
},
|
| 142 |
+
|
| 143 |
+
"sim_swap": {
|
| 144 |
+
"label": "SIM Swap Attack",
|
| 145 |
+
"description": "Social engineering to gain control of a phone number.",
|
| 146 |
+
"risk_level": "CRITICAL",
|
| 147 |
+
"keywords": [
|
| 148 |
+
"sim swap", "sim replacement", "port number", "national id",
|
| 149 |
+
"id card", "date of birth", "confirm identity", "verify account"
|
| 150 |
+
],
|
| 151 |
+
"example_patterns": [
|
| 152 |
+
"To complete your SIM replacement, provide your National ID and date of birth.",
|
| 153 |
+
"Laini yako inabadilishwa. Call back immediately to cancel."
|
| 154 |
+
],
|
| 155 |
+
},
|
| 156 |
+
|
| 157 |
+
"otp_theft": {
|
| 158 |
+
"label": "OTP / Code Theft",
|
| 159 |
+
"description": "Phishing for one-time passwords via USSD push or fake app upgrades.",
|
| 160 |
+
"risk_level": "CRITICAL",
|
| 161 |
+
"keywords": [
|
| 162 |
+
"otp", "verification code", "share code", "6 digit", "4 digit",
|
| 163 |
+
"do not share", "stk push", "mobile banking update"
|
| 164 |
+
],
|
| 165 |
+
"example_patterns": [
|
| 166 |
+
"Safaricom is upgrading your account. The code we sent will confirm your new package. Nipatie hiyo code.",
|
| 167 |
+
"Tumekutumia code ya M-banking. Soma hiyo code nikuwekee account sawa."
|
| 168 |
+
],
|
| 169 |
+
},
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 173 |
+
# SHENG SCAM GLOSSARY
|
| 174 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 175 |
+
|
| 176 |
+
SHENG_SCAM_GLOSSARY = {
|
| 177 |
+
# Financial / M-Pesa terms
|
| 178 |
+
"pesa": "money",
|
| 179 |
+
"mkwanja": "cash / money",
|
| 180 |
+
"chapaa": "money",
|
| 181 |
+
"hela": "money (Swahili)",
|
| 182 |
+
"doh": "money (Sheng)",
|
| 183 |
+
"send fare": "send money for transport (common scam pretext)",
|
| 184 |
+
"nitumie": "send me (Swahili — often 'send me money')",
|
| 185 |
+
"izo pesa": "that money",
|
| 186 |
+
|
| 187 |
+
# Scam action terms
|
| 188 |
+
"ronga": "con / trick",
|
| 189 |
+
"thifte": "steal",
|
| 190 |
+
"nganya": "con / overcharge",
|
| 191 |
+
"mchoro": "scheme / plan",
|
| 192 |
+
"mchezaji": "player / hustler / scammer",
|
| 193 |
+
|
| 194 |
+
# Urgency / pressure terms
|
| 195 |
+
"haraka": "hurry / urgency",
|
| 196 |
+
"sasa hivi": "right now",
|
| 197 |
+
"leo tu": "today only",
|
| 198 |
+
"shida": "problem / trouble",
|
| 199 |
+
"wacha mchezo": "stop playing around (pressure)",
|
| 200 |
+
"acha story": "stop the stories / get to it",
|
| 201 |
+
"funga deal": "close the deal",
|
| 202 |
+
|
| 203 |
+
# Identity / trust manipulation
|
| 204 |
+
"boss": "term used to create false familiarity",
|
| 205 |
+
"chief": "term used to create false authority/trust",
|
| 206 |
+
"mzee": "elder/sir — used to sound respectful/legitimate",
|
| 207 |
+
"buda": "dad / old man",
|
| 208 |
+
"budako": "your dad",
|
| 209 |
+
|
| 210 |
+
# Hooks
|
| 211 |
+
"nipigie": "call me",
|
| 212 |
+
"sema": "say / tell me",
|
| 213 |
+
"click hapa": "click here",
|
| 214 |
+
"code yako": "your code",
|
| 215 |
+
"confirm": "confirm",
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 219 |
+
# SWAHILI URGENCY PHRASES
|
| 220 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 221 |
+
|
| 222 |
+
SWAHILI_URGENCY_PHRASES = [
|
| 223 |
+
# Time pressure
|
| 224 |
+
"haraka sana", "saa moja tu", "leo tu", "kesho itakuwa imechelewa",
|
| 225 |
+
"muda mfupi", "ndani ya dakika", "usikawilie", "jibu sasa hivi", "fanya sasa",
|
| 226 |
+
|
| 227 |
+
# Threat / consequence language
|
| 228 |
+
"akaunti yako itafungwa", "nambari yako itakatwa", "utashtakiwa",
|
| 229 |
+
"hatua za kisheria", "kupoteza pesa zako", "akaunti imezuiwa", "laini yako itazimwa",
|
| 230 |
+
"uwekwe crb", "warrant ya kushikwa",
|
| 231 |
+
|
| 232 |
+
# Authority impersonation
|
| 233 |
+
"ofisi ya kra", "safaricom rasmi", "serikali ya kenya", "polisi wa kenya", "benki kuu",
|
| 234 |
+
|
| 235 |
+
# Promise / reward urgency
|
| 236 |
+
"umeshinda", "zawadi yako inakungoja", "pata pesa zako sasa", "nafasi ya mwisho",
|
| 237 |
+
]
|
| 238 |
+
|
| 239 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 240 |
+
# FRAUD SCORING INDICATORS
|
| 241 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 242 |
+
|
| 243 |
+
FRAUD_SCORING_INDICATORS = {
|
| 244 |
+
# Critical signals (weight 3)
|
| 245 |
+
"requests_otp_or_pin": {"weight": 3, "category": "credential_theft", "description": "Asks for OTP, PIN, or password"},
|
| 246 |
+
"requests_national_id": {"weight": 3, "category": "identity_theft", "description": "Requests National ID number"},
|
| 247 |
+
"sim_swap_language": {"weight": 3, "category": "sim_swap", "description": "Contains SIM swap request patterns"},
|
| 248 |
+
"external_link_present": {"weight": 3, "category": "phishing", "description": "Contains URL to external site"},
|
| 249 |
+
"impersonates_authority": {"weight": 3, "category": "impersonation", "description": "Poses as KRA, Safaricom, bank, or gov agency"},
|
| 250 |
+
"whatsapp_deregistration": {"weight": 3, "category": "account_takeover", "description": "Threatens WhatsApp deregistration"},
|
| 251 |
+
|
| 252 |
+
# High signals (weight 2)
|
| 253 |
+
"requests_upfront_payment": {"weight": 2, "category": "advance_fee", "description": "Asks for fee/deposit to claim prize or job"},
|
| 254 |
+
"unrealistic_returns": {"weight": 2, "category": "investment_fraud", "description": "Promises guaranteed or extreme profits"},
|
| 255 |
+
"urgency_language_detected": {"weight": 2, "category": "social_engineering", "description": "Uses high-pressure urgency phrases"},
|
| 256 |
+
"threat_of_suspension": {"weight": 2, "category": "intimidation", "description": "Threatens account/line suspension"},
|
| 257 |
+
"prize_win_claim": {"weight": 2, "category": "lottery_scam", "description": "Claims recipient has won a prize"},
|
| 258 |
+
"wrong_number_reversal": {"weight": 2, "category": "mpesa_fraud", "description": "Claims wrong M-Pesa send, requests refund"},
|
| 259 |
+
"fuliza_threat": {"weight": 2, "category": "intimidation", "description": "Threatens Fuliza CRB listing or demands fee"},
|
| 260 |
+
|
| 261 |
+
# Moderate signals (weight 1)
|
| 262 |
+
"sheng_scam_vocabulary": {"weight": 1, "category": "language", "description": "Contains known Sheng fraud vocabulary"},
|
| 263 |
+
"swahili_urgency_phrase": {"weight": 1, "category": "language", "description": "Contains Swahili urgency/pressure phrases"},
|
| 264 |
+
"unknown_sender_number": {"weight": 1, "category": "identity", "description": "Sender number not recognized or suspicious format"},
|
| 265 |
+
"excessive_capitalization": {"weight": 1, "category": "formatting", "description": "Excessive use of CAPS for urgency"},
|
| 266 |
+
"multiple_exclamation_marks": {"weight": 1, "category": "formatting", "description": "Three or more consecutive exclamation marks"},
|
| 267 |
+
"calls_to_unknown_number": {"weight": 1, "category": "redirection", "description": "Directs user to call an unfamiliar number"},
|
| 268 |
+
}
|
| 269 |
+
|
| 270 |
+
MAX_FRAUD_SCORE = sum(v["weight"] for v in FRAUD_SCORING_INDICATORS.values())
|
| 271 |
+
|
| 272 |
+
def calculate_fraud_score(triggered_indicators: list[str]) -> dict:
|
| 273 |
+
"""
|
| 274 |
+
Calculate fraud score and risk level using absolute raw score thresholds.
|
| 275 |
+
"""
|
| 276 |
+
raw_score = 0
|
| 277 |
+
breakdown = {}
|
| 278 |
+
|
| 279 |
+
for key in triggered_indicators:
|
| 280 |
+
if key in FRAUD_SCORING_INDICATORS:
|
| 281 |
+
indicator = FRAUD_SCORING_INDICATORS[key]
|
| 282 |
+
raw_score += indicator["weight"]
|
| 283 |
+
breakdown[key] = indicator
|
| 284 |
+
|
| 285 |
+
# Category combo bonus
|
| 286 |
+
categories_hit = {ind["category"] for ind in breakdown.values()}
|
| 287 |
+
if "credential_theft" in categories_hit and "impersonation" in categories_hit:
|
| 288 |
+
raw_score += 2
|
| 289 |
+
breakdown["combo_credential_impersonation"] = {"weight": 2, "category": "combo", "description": "High-risk combo: Impersonation + Credential theft"}
|
| 290 |
+
|
| 291 |
+
normalised = round((raw_score / MAX_FRAUD_SCORE) * 100, 1)
|
| 292 |
+
|
| 293 |
+
# Use absolute raw score thresholds for real-world accuracy
|
| 294 |
+
if raw_score >= 6:
|
| 295 |
+
risk_level = "CRITICAL"
|
| 296 |
+
elif raw_score >= 4:
|
| 297 |
+
risk_level = "HIGH"
|
| 298 |
+
elif raw_score >= 2:
|
| 299 |
+
risk_level = "MEDIUM"
|
| 300 |
+
else:
|
| 301 |
+
risk_level = "LOW"
|
| 302 |
+
|
| 303 |
+
return {
|
| 304 |
+
"raw_score": raw_score,
|
| 305 |
+
"max_score": MAX_FRAUD_SCORE,
|
| 306 |
+
"normalised_score": normalised,
|
| 307 |
+
"risk_level": risk_level,
|
| 308 |
+
"breakdown": breakdown,
|
| 309 |
+
}
|
| 310 |
+
|
| 311 |
+
|
| 312 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 313 |
+
# LEGITIMATE VS SUSPICIOUS PATTERNS
|
| 314 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 315 |
+
|
| 316 |
+
LEGITIMATE_PATTERNS = {
|
| 317 |
+
"mpesa_confirmation": {
|
| 318 |
+
"description": "Genuine M-Pesa transaction confirmation from Safaricom shortcode",
|
| 319 |
+
"sender_patterns": ["MPESA", "M-PESA", "Safaricom"],
|
| 320 |
+
"message_patterns": [
|
| 321 |
+
r"[A-Z0-9]{10} Confirmed\.", # Transaction code format
|
| 322 |
+
r"Ksh[\d,]+\.00 sent to", # Send confirmation
|
| 323 |
+
r"You have received Ksh", # Receive confirmation
|
| 324 |
+
r"New M-PESA balance", # Balance notification
|
| 325 |
+
],
|
| 326 |
+
"characteristics": [
|
| 327 |
+
"Comes from official Safaricom shortcodes (e.g., MPESA)",
|
| 328 |
+
"Contains valid 10-character transaction reference",
|
| 329 |
+
"Never asks for PIN or personal info",
|
| 330 |
+
"Balance shown matches expected transaction",
|
| 331 |
+
],
|
| 332 |
+
},
|
| 333 |
+
|
| 334 |
+
"bank_notification": {
|
| 335 |
+
"description": "Legitimate bank alert from registered shortcode",
|
| 336 |
+
"characteristics": [
|
| 337 |
+
"Comes from registered bank shortcode",
|
| 338 |
+
"Contains partial account number (masked)",
|
| 339 |
+
"Does not ask for credentials",
|
| 340 |
+
],
|
| 341 |
+
},
|
| 342 |
+
|
| 343 |
+
"kra_itax": {
|
| 344 |
+
"description": "Authentic KRA notification",
|
| 345 |
+
"characteristics": [
|
| 346 |
+
"Directs to itax.kra.go.ke (official domain only)",
|
| 347 |
+
"Never asks for PIN via SMS",
|
| 348 |
+
"References your specific KRA PIN number",
|
| 349 |
+
],
|
| 350 |
+
},
|
| 351 |
+
}
|
| 352 |
+
|
| 353 |
+
SUSPICIOUS_PATTERNS = {
|
| 354 |
+
"spoofed_sender": {
|
| 355 |
+
"description": "Sender name mimics a legitimate entity but uses a different number",
|
| 356 |
+
"signals": [
|
| 357 |
+
"Displays 'Safaricom' or 'KRA' as sender but from a mobile number (07xx)",
|
| 358 |
+
"Sender ID slightly misspelled: 'Saf4ricom', 'M-Pes4'",
|
| 359 |
+
],
|
| 360 |
+
},
|
| 361 |
+
|
| 362 |
+
"credential_extraction": {
|
| 363 |
+
"description": "Message designed to harvest security credentials",
|
| 364 |
+
"signals": [
|
| 365 |
+
"Asks for M-Pesa PIN",
|
| 366 |
+
"Requests OTP or verification code via STK push or call",
|
| 367 |
+
"Asks user to 'confirm' by sending a code",
|
| 368 |
+
],
|
| 369 |
+
},
|
| 370 |
+
|
| 371 |
+
"fake_mpesa_send": {
|
| 372 |
+
"description": "Fabricated M-Pesa confirmation to trick agent or seller",
|
| 373 |
+
"signals": [
|
| 374 |
+
"Screenshot of M-Pesa confirmation (cannot be verified via SMS)",
|
| 375 |
+
"Claims transaction reference that doesn't follow Safaricom format",
|
| 376 |
+
"Transaction reference contains lowercase letters",
|
| 377 |
+
],
|
| 378 |
+
},
|
| 379 |
+
}
|
| 380 |
+
|
| 381 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 382 |
+
# KENYAN PHONE NUMBER PATTERNS
|
| 383 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 384 |
+
|
| 385 |
+
KENYAN_PHONE_PATTERNS = {
|
| 386 |
+
# Updated 2025 prefixes
|
| 387 |
+
"safaricom": [r"^(\+?254|0)(7(0[0-9]|1[0-9]|2[0-9]|4[0-3]|4[5-6]|48|5[7-9]|6[8-9]|9[0-9])|1(1[0-5]))\d{6}$"],
|
| 388 |
+
"airtel": [r"^(\+?254|0)(7(3[0-9]|5[0-6]|6[2]|8[0-9])|1(0[0-6]))\d{6}$"],
|
| 389 |
+
"telkom": [r"^(\+?254|0)77\d{7}$"],
|
| 390 |
+
"equitel": [r"^(\+?254|0)76[3-6]\d{6}$"],
|
| 391 |
+
"shortcodes": {
|
| 392 |
+
"MPESA": "Safaricom M-Pesa official sender",
|
| 393 |
+
"Safaricom": "Safaricom official communications",
|
| 394 |
+
"KRA": "Kenya Revenue Authority",
|
| 395 |
+
"Equity": "Equity Bank",
|
| 396 |
+
"KCB": "KCB Bank",
|
| 397 |
+
"Co-opBank": "Co-operative Bank",
|
| 398 |
+
},
|
| 399 |
+
"suspicious_prefixes": [
|
| 400 |
+
"+1", # US numbers used in some scams
|
| 401 |
+
"+44", # UK numbers
|
| 402 |
+
"+234", # Nigerian numbers (419 scams)
|
| 403 |
+
"+27", # South African numbers
|
| 404 |
+
],
|
| 405 |
+
}
|
| 406 |
+
|
| 407 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 408 |
+
# RISK LEVEL METADATA
|
| 409 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 410 |
+
|
| 411 |
+
RISK_LEVELS = {
|
| 412 |
+
"CRITICAL": {
|
| 413 |
+
"score_range": (6, 100),
|
| 414 |
+
"color": "#FF1744",
|
| 415 |
+
"emoji": "🚨",
|
| 416 |
+
"action": "Do NOT comply. Block sender. Report to Safaricom/DCI.",
|
| 417 |
+
"description": "Almost certainly a scam. Immediate danger.",
|
| 418 |
+
},
|
| 419 |
+
"HIGH": {
|
| 420 |
+
"score_range": (4, 5),
|
| 421 |
+
"color": "#FF6D00",
|
| 422 |
+
"emoji": "⚠️",
|
| 423 |
+
"action": "Do not share any information. Verify independently.",
|
| 424 |
+
"description": "Strong fraud indicators present.",
|
| 425 |
+
},
|
| 426 |
+
"MEDIUM": {
|
| 427 |
+
"score_range": (2, 3),
|
| 428 |
+
"color": "#FFD600",
|
| 429 |
+
"emoji": "🔶",
|
| 430 |
+
"action": "Proceed with caution. Verify sender identity.",
|
| 431 |
+
"description": "Some suspicious elements detected.",
|
| 432 |
+
},
|
| 433 |
+
"LOW": {
|
| 434 |
+
"score_range": (0, 1),
|
| 435 |
+
"color": "#00C853",
|
| 436 |
+
"emoji": "✅",
|
| 437 |
+
"action": "Appears safe, but always stay alert.",
|
| 438 |
+
"description": "No significant fraud signals detected.",
|
| 439 |
+
},
|
| 440 |
+
}
|
| 441 |
+
|
| 442 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 443 |
+
# REPORTING CONTACTS
|
| 444 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 445 |
+
|
| 446 |
+
REPORTING_CONTACTS = {
|
| 447 |
+
"Safaricom Fraud SMS": "Forward SMS to 333 (Free)",
|
| 448 |
+
"Safaricom Care": "100 or 0722 000 000",
|
| 449 |
+
"DCI Cybercrime Unit": "+254 20 4343000 / cybercrime@dci.go.ke",
|
| 450 |
+
"CA Kenya": "complaints@ca.go.ke",
|
| 451 |
+
"KRA Fraud Tip": "fraudtipoffs@kra.go.ke",
|
| 452 |
+
"Banking Fraud (CBK)": "cps@centralbank.go.ke",
|
| 453 |
+
}
|
core/llm_client.py
ADDED
|
@@ -0,0 +1,483 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import logging
|
| 4 |
+
import time
|
| 5 |
+
from typing import Dict, Any, Generator
|
| 6 |
+
|
| 7 |
+
from core.osint_dataset import classify_synthetic_message
|
| 8 |
+
|
| 9 |
+
try:
|
| 10 |
+
from openai import OpenAI, APIConnectionError, APITimeoutError, RateLimitError
|
| 11 |
+
except ImportError:
|
| 12 |
+
OpenAI = None
|
| 13 |
+
APIConnectionError = Exception
|
| 14 |
+
APITimeoutError = Exception
|
| 15 |
+
RateLimitError = Exception
|
| 16 |
+
|
| 17 |
+
# Configure lightweight structured logging
|
| 18 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
| 19 |
+
logger = logging.getLogger("ShadowLLM")
|
| 20 |
+
|
| 21 |
+
class ShadowLLMClient:
|
| 22 |
+
"""
|
| 23 |
+
Lightweight execution bridge between Shadow agents and AMD Developer Cloud (vLLM / Qwen).
|
| 24 |
+
Built for resilience in hackathon/demo environments.
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
def __init__(self):
|
| 28 |
+
self.api_base = os.getenv("SHADOW_API_BASE", "https://api.openai.com/v1")
|
| 29 |
+
self.model = os.getenv("SHADOW_MODEL", "qwen-2.5-7b")
|
| 30 |
+
self.api_key = os.getenv("SHADOW_API_KEY", "dummy-key-for-mock")
|
| 31 |
+
self.timeout = float(os.getenv("SHADOW_TIMEOUT", "30.0"))
|
| 32 |
+
self.mock_mode = os.getenv("SHADOW_MOCK_MODE", "false").lower() == "true"
|
| 33 |
+
|
| 34 |
+
if OpenAI is None:
|
| 35 |
+
logger.warning("openai package not found. Forcing MOCK MODE.")
|
| 36 |
+
self.mock_mode = True
|
| 37 |
+
|
| 38 |
+
if not self.mock_mode:
|
| 39 |
+
self.client = OpenAI(
|
| 40 |
+
api_key=self.api_key,
|
| 41 |
+
base_url=self.api_base,
|
| 42 |
+
timeout=self.timeout
|
| 43 |
+
)
|
| 44 |
+
else:
|
| 45 |
+
self.client = None
|
| 46 |
+
logger.info("ShadowLLMClient initialized in MOCK MODE.")
|
| 47 |
+
|
| 48 |
+
def _clean_json(self, response_text: str) -> str:
|
| 49 |
+
"""Strip markdown code fences and clean output to raw JSON."""
|
| 50 |
+
text = response_text.strip()
|
| 51 |
+
if text.startswith("```json"):
|
| 52 |
+
text = text[7:]
|
| 53 |
+
elif text.startswith("```"):
|
| 54 |
+
text = text[3:]
|
| 55 |
+
|
| 56 |
+
if text.endswith("```"):
|
| 57 |
+
text = text[:-3]
|
| 58 |
+
|
| 59 |
+
return text.strip()
|
| 60 |
+
|
| 61 |
+
def generate_response(self, system_prompt: str, user_input: str) -> Dict[str, Any]:
|
| 62 |
+
"""
|
| 63 |
+
Generate a response with retry logic and JSON parsing.
|
| 64 |
+
Returns a parsed dictionary, automatically falling back to mock mode on persistent failure.
|
| 65 |
+
"""
|
| 66 |
+
if self.mock_mode:
|
| 67 |
+
return self._get_mock_response(system_prompt, user_input)
|
| 68 |
+
|
| 69 |
+
max_retries = 3
|
| 70 |
+
for attempt in range(max_retries):
|
| 71 |
+
try:
|
| 72 |
+
response = self.client.chat.completions.create(
|
| 73 |
+
model=self.model,
|
| 74 |
+
messages=[
|
| 75 |
+
{"role": "system", "content": system_prompt},
|
| 76 |
+
{"role": "user", "content": user_input}
|
| 77 |
+
],
|
| 78 |
+
temperature=0.0,
|
| 79 |
+
response_format={"type": "json_object"} if "qwen" not in self.model.lower() else None
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
raw_content = response.choices[0].message.content
|
| 83 |
+
cleaned_content = self._clean_json(raw_content)
|
| 84 |
+
return json.loads(cleaned_content)
|
| 85 |
+
|
| 86 |
+
except (APIConnectionError, APITimeoutError, RateLimitError) as e:
|
| 87 |
+
logger.warning(f"API Error on attempt {attempt + 1}/{max_retries}: {e}")
|
| 88 |
+
if attempt == max_retries - 1:
|
| 89 |
+
logger.error("Max retries reached. Falling back to mock response to prevent demo freeze.")
|
| 90 |
+
return self._get_mock_response(system_prompt, user_input)
|
| 91 |
+
time.sleep(2 ** attempt) # Exponential backoff
|
| 92 |
+
|
| 93 |
+
except json.JSONDecodeError as e:
|
| 94 |
+
logger.warning(f"JSON Parse Error on attempt {attempt + 1}/{max_retries}: {e}")
|
| 95 |
+
if attempt == max_retries - 1:
|
| 96 |
+
logger.error("Max retries reached. Falling back to mock response.")
|
| 97 |
+
return self._get_mock_response(system_prompt, user_input)
|
| 98 |
+
|
| 99 |
+
except Exception as e:
|
| 100 |
+
logger.error(f"Unexpected error: {e}")
|
| 101 |
+
logger.error("Falling back to mock response instantly.")
|
| 102 |
+
return self._get_mock_response(system_prompt, user_input)
|
| 103 |
+
|
| 104 |
+
def stream_response(self, system_prompt: str, user_input: str) -> Generator[str, None, None]:
|
| 105 |
+
"""Stream the LLM response (useful for UI feedback)."""
|
| 106 |
+
if self.mock_mode:
|
| 107 |
+
mock_data = json.dumps(self._get_mock_response(system_prompt, user_input), indent=2)
|
| 108 |
+
for chunk in mock_data.split(" "):
|
| 109 |
+
yield chunk + " "
|
| 110 |
+
time.sleep(0.02)
|
| 111 |
+
return
|
| 112 |
+
|
| 113 |
+
try:
|
| 114 |
+
response = self.client.chat.completions.create(
|
| 115 |
+
model=self.model,
|
| 116 |
+
messages=[
|
| 117 |
+
{"role": "system", "content": system_prompt},
|
| 118 |
+
{"role": "user", "content": user_input}
|
| 119 |
+
],
|
| 120 |
+
temperature=0.0,
|
| 121 |
+
stream=True
|
| 122 |
+
)
|
| 123 |
+
for chunk in response:
|
| 124 |
+
if chunk.choices and chunk.choices[0].delta.content:
|
| 125 |
+
yield chunk.choices[0].delta.content
|
| 126 |
+
except Exception as e:
|
| 127 |
+
logger.error(f"Streaming failed: {e}")
|
| 128 |
+
yield f"\n[Connection Error: {e}. Falling back to mock data...]\n\n"
|
| 129 |
+
mock_data = json.dumps(self._get_mock_response(system_prompt, user_input), indent=2)
|
| 130 |
+
yield mock_data
|
| 131 |
+
|
| 132 |
+
def _get_mock_response(self, system_prompt: str, user_input: str) -> Dict[str, Any]:
|
| 133 |
+
"""
|
| 134 |
+
Return deterministic mock responses based on input.
|
| 135 |
+
Provides robust fallback for SAFE, SUSPICIOUS, HIGH RISK, and CRITICAL scenarios.
|
| 136 |
+
"""
|
| 137 |
+
# If user_input is JSON from a pipeline step, extract just the original message
|
| 138 |
+
try:
|
| 139 |
+
parsed_input = json.loads(user_input)
|
| 140 |
+
if isinstance(parsed_input, dict) and "message" in parsed_input:
|
| 141 |
+
message_text = parsed_input["message"]
|
| 142 |
+
else:
|
| 143 |
+
message_text = user_input
|
| 144 |
+
except json.JSONDecodeError:
|
| 145 |
+
message_text = user_input
|
| 146 |
+
|
| 147 |
+
# Determine simulated risk level using OSINT precheck
|
| 148 |
+
precheck = classify_synthetic_message(message_text)
|
| 149 |
+
category = precheck.get("probable_category", "unknown")
|
| 150 |
+
|
| 151 |
+
if category == "legitimate_transaction":
|
| 152 |
+
risk = "SAFE"
|
| 153 |
+
elif category == "betting_scam":
|
| 154 |
+
risk = "SUSPICIOUS"
|
| 155 |
+
elif category == "mpesa_reversal":
|
| 156 |
+
risk = "HIGH RISK"
|
| 157 |
+
elif category in ["safaricom_impersonation", "fuliza_scam", "kra_penalty", "otp_sim_swap"]:
|
| 158 |
+
risk = "CRITICAL"
|
| 159 |
+
else:
|
| 160 |
+
# Fallback mapping from OSINT risk level if unhandled
|
| 161 |
+
osint_risk = precheck.get("risk_level", "HIGH")
|
| 162 |
+
risk_mapping = {"LOW": "SAFE", "MEDIUM": "SUSPICIOUS", "HIGH": "HIGH RISK", "CRITICAL": "CRITICAL"}
|
| 163 |
+
risk = risk_mapping.get(osint_risk, "HIGH RISK")
|
| 164 |
+
|
| 165 |
+
# Route to appropriate mock based on the agent's system prompt signature
|
| 166 |
+
if "Language Intelligence Agent" in system_prompt:
|
| 167 |
+
return self._mock_language_agent(risk)
|
| 168 |
+
elif "Threat Pattern Agent" in system_prompt:
|
| 169 |
+
return self._mock_threat_pattern_agent(risk)
|
| 170 |
+
elif "Risk Scoring Agent" in system_prompt:
|
| 171 |
+
return self._mock_risk_scoring_agent(risk)
|
| 172 |
+
elif "Action Agent" in system_prompt:
|
| 173 |
+
return self._mock_action_agent(risk)
|
| 174 |
+
else:
|
| 175 |
+
# Generic fallback
|
| 176 |
+
return {"status": "success", "mock": True, "risk": risk}
|
| 177 |
+
|
| 178 |
+
def _mock_language_agent(self, risk: str) -> Dict[str, Any]:
|
| 179 |
+
if risk == "SAFE":
|
| 180 |
+
return {
|
| 181 |
+
"primary_language": "english",
|
| 182 |
+
"secondary_languages": [],
|
| 183 |
+
"is_code_switched": False,
|
| 184 |
+
"sheng_terms_detected": [],
|
| 185 |
+
"swahili_urgency_phrases": [],
|
| 186 |
+
"formality_level": "formal",
|
| 187 |
+
"language_anomalies": [],
|
| 188 |
+
"linguistic_fraud_signals": [],
|
| 189 |
+
"confidence": 0.99,
|
| 190 |
+
"reasoning_summary": "Standard formal English, no anomalies detected."
|
| 191 |
+
}
|
| 192 |
+
elif risk == "SUSPICIOUS":
|
| 193 |
+
return {
|
| 194 |
+
"primary_language": "swahili",
|
| 195 |
+
"secondary_languages": ["english", "sheng"],
|
| 196 |
+
"is_code_switched": True,
|
| 197 |
+
"sheng_terms_detected": ["bet", "shinda"],
|
| 198 |
+
"swahili_urgency_phrases": ["cheza sasa"],
|
| 199 |
+
"formality_level": "informal",
|
| 200 |
+
"language_anomalies": ["Overly enthusiastic tone"],
|
| 201 |
+
"linguistic_fraud_signals": ["Enticing language for gambling"],
|
| 202 |
+
"confidence": 0.88,
|
| 203 |
+
"reasoning_summary": "Informal language mixing Swahili and Sheng, typical of betting promos."
|
| 204 |
+
}
|
| 205 |
+
elif risk == "HIGH RISK":
|
| 206 |
+
return {
|
| 207 |
+
"primary_language": "swahili",
|
| 208 |
+
"secondary_languages": ["sheng"],
|
| 209 |
+
"is_code_switched": True,
|
| 210 |
+
"sheng_terms_detected": ["tuma", "rudisha", "haraka"],
|
| 211 |
+
"swahili_urgency_phrases": ["rudisha pesa tafadhali", "tuma haraka"],
|
| 212 |
+
"formality_level": "informal",
|
| 213 |
+
"language_anomalies": ["Pleading tone mixed with demands"],
|
| 214 |
+
"linguistic_fraud_signals": ["High urgency", "Emotional manipulation"],
|
| 215 |
+
"confidence": 0.92,
|
| 216 |
+
"reasoning_summary": "Urgent Swahili/Sheng mix requesting money reversal."
|
| 217 |
+
}
|
| 218 |
+
else: # CRITICAL
|
| 219 |
+
return {
|
| 220 |
+
"primary_language": "english",
|
| 221 |
+
"secondary_languages": ["swahili"],
|
| 222 |
+
"is_code_switched": True,
|
| 223 |
+
"sheng_terms_detected": [],
|
| 224 |
+
"swahili_urgency_phrases": ["akaunti yako itafungwa"],
|
| 225 |
+
"formality_level": "impersonating-formal",
|
| 226 |
+
"language_anomalies": ["Poor grammar for an official entity", "Inconsistent casing"],
|
| 227 |
+
"linguistic_fraud_signals": ["Threatening tone", "Authority impersonation"],
|
| 228 |
+
"confidence": 0.95,
|
| 229 |
+
"reasoning_summary": "Highly anomalous language attempting to sound like an official entity."
|
| 230 |
+
}
|
| 231 |
+
|
| 232 |
+
def _mock_threat_pattern_agent(self, risk: str) -> Dict[str, Any]:
|
| 233 |
+
if risk == "SAFE":
|
| 234 |
+
return {
|
| 235 |
+
"scam_categories_detected": [],
|
| 236 |
+
"primary_category": "none",
|
| 237 |
+
"threat_signals": {},
|
| 238 |
+
"impersonated_entity": "None",
|
| 239 |
+
"manipulation_hook": "none",
|
| 240 |
+
"extracted_demands": [],
|
| 241 |
+
"legitimacy_evidence_for": ["Standard transaction format"],
|
| 242 |
+
"legitimacy_evidence_against": [],
|
| 243 |
+
"is_likely_legitimate": True,
|
| 244 |
+
"reasoning_summary": "No threat patterns detected."
|
| 245 |
+
}
|
| 246 |
+
elif risk == "SUSPICIOUS":
|
| 247 |
+
return {
|
| 248 |
+
"scam_categories_detected": [
|
| 249 |
+
{
|
| 250 |
+
"category_id": "betting_scam",
|
| 251 |
+
"category_label": "Fake Betting / Prize",
|
| 252 |
+
"confidence": 0.85,
|
| 253 |
+
"evidence": ["Mentions betting/prize companies"]
|
| 254 |
+
}
|
| 255 |
+
],
|
| 256 |
+
"primary_category": "betting_scam",
|
| 257 |
+
"threat_signals": {
|
| 258 |
+
"unrealistic_promises": True,
|
| 259 |
+
"requests_small_fee": False
|
| 260 |
+
},
|
| 261 |
+
"impersonated_entity": "SportPesa/Betika",
|
| 262 |
+
"manipulation_hook": "greed",
|
| 263 |
+
"extracted_demands": ["Click link", "Place bet"],
|
| 264 |
+
"legitimacy_evidence_for": [],
|
| 265 |
+
"legitimacy_evidence_against": ["Unsolicited betting promo"],
|
| 266 |
+
"is_likely_legitimate": False,
|
| 267 |
+
"reasoning_summary": "Suspicious betting or prize claim detected."
|
| 268 |
+
}
|
| 269 |
+
elif risk == "HIGH RISK":
|
| 270 |
+
return {
|
| 271 |
+
"scam_categories_detected": [
|
| 272 |
+
{
|
| 273 |
+
"category_id": "mpesa_reversal",
|
| 274 |
+
"category_label": "M-Pesa Reversal",
|
| 275 |
+
"confidence": 0.95,
|
| 276 |
+
"evidence": ["Asks for refund of falsely sent money"]
|
| 277 |
+
}
|
| 278 |
+
],
|
| 279 |
+
"primary_category": "mpesa_reversal",
|
| 280 |
+
"threat_signals": {
|
| 281 |
+
"urgency_language_detected": True,
|
| 282 |
+
"wrong_number_reversal": True,
|
| 283 |
+
"unknown_sender_number": True
|
| 284 |
+
},
|
| 285 |
+
"impersonated_entity": "None",
|
| 286 |
+
"manipulation_hook": "urgency",
|
| 287 |
+
"extracted_demands": ["Send money back"],
|
| 288 |
+
"legitimacy_evidence_for": [],
|
| 289 |
+
"legitimacy_evidence_against": ["Sent from personal number, not Safaricom shortcode"],
|
| 290 |
+
"is_likely_legitimate": False,
|
| 291 |
+
"reasoning_summary": "Classic M-Pesa reversal scam pattern matched."
|
| 292 |
+
}
|
| 293 |
+
else: # CRITICAL
|
| 294 |
+
return {
|
| 295 |
+
"scam_categories_detected": [
|
| 296 |
+
{
|
| 297 |
+
"category_id": "authority_impersonation",
|
| 298 |
+
"category_label": "Authority Impersonation",
|
| 299 |
+
"confidence": 0.98,
|
| 300 |
+
"evidence": ["Claims to be Safaricom/Fuliza/KRA", "Requests OTP"]
|
| 301 |
+
}
|
| 302 |
+
],
|
| 303 |
+
"primary_category": "authority_impersonation",
|
| 304 |
+
"threat_signals": {
|
| 305 |
+
"requests_otp_or_pin": True,
|
| 306 |
+
"impersonates_authority": True,
|
| 307 |
+
"account_suspension_threat": True
|
| 308 |
+
},
|
| 309 |
+
"impersonated_entity": "Safaricom/Fuliza/KRA",
|
| 310 |
+
"manipulation_hook": "fear",
|
| 311 |
+
"extracted_demands": ["Share OTP", "Click verification link"],
|
| 312 |
+
"legitimacy_evidence_for": [],
|
| 313 |
+
"legitimacy_evidence_against": ["Sent from personal number", "Official entities don't ask for OTP"],
|
| 314 |
+
"is_likely_legitimate": False,
|
| 315 |
+
"reasoning_summary": "Critical authority impersonation scam attempting account takeover."
|
| 316 |
+
}
|
| 317 |
+
|
| 318 |
+
def _mock_risk_scoring_agent(self, risk: str) -> Dict[str, Any]:
|
| 319 |
+
risk_map = {
|
| 320 |
+
"SAFE": ("LOW", 0),
|
| 321 |
+
"SUSPICIOUS": ("MEDIUM", 4),
|
| 322 |
+
"HIGH RISK": ("HIGH", 7),
|
| 323 |
+
"CRITICAL": ("CRITICAL", 9)
|
| 324 |
+
}
|
| 325 |
+
level, score = risk_map[risk]
|
| 326 |
+
|
| 327 |
+
if risk == "SAFE":
|
| 328 |
+
return {
|
| 329 |
+
"raw_score": score,
|
| 330 |
+
"risk_level": level,
|
| 331 |
+
"score_override_applied": False,
|
| 332 |
+
"override_reason": None,
|
| 333 |
+
"triggered_indicators": [],
|
| 334 |
+
"top_risk_drivers": [],
|
| 335 |
+
"confidence": 0.95,
|
| 336 |
+
"reasoning_summary": "Score 0. Safe."
|
| 337 |
+
}
|
| 338 |
+
elif risk == "SUSPICIOUS":
|
| 339 |
+
return {
|
| 340 |
+
"raw_score": score,
|
| 341 |
+
"risk_level": level,
|
| 342 |
+
"score_override_applied": False,
|
| 343 |
+
"override_reason": None,
|
| 344 |
+
"triggered_indicators": [
|
| 345 |
+
{"indicator": "suspicious_keywords", "weight": 4, "evidence": "Betting/prize keywords"}
|
| 346 |
+
],
|
| 347 |
+
"top_risk_drivers": ["suspicious_keywords"],
|
| 348 |
+
"confidence": 0.85,
|
| 349 |
+
"reasoning_summary": f"Risk scored as {level} due to suspicious betting patterns."
|
| 350 |
+
}
|
| 351 |
+
elif risk == "HIGH RISK":
|
| 352 |
+
return {
|
| 353 |
+
"raw_score": score,
|
| 354 |
+
"risk_level": level,
|
| 355 |
+
"score_override_applied": False,
|
| 356 |
+
"override_reason": None,
|
| 357 |
+
"triggered_indicators": [
|
| 358 |
+
{"indicator": "reversal_request", "weight": 7, "evidence": "Asking to return funds"}
|
| 359 |
+
],
|
| 360 |
+
"top_risk_drivers": ["reversal_request"],
|
| 361 |
+
"confidence": 0.90,
|
| 362 |
+
"reasoning_summary": f"Risk scored as {level} based on M-Pesa reversal indicators."
|
| 363 |
+
}
|
| 364 |
+
else: # CRITICAL
|
| 365 |
+
return {
|
| 366 |
+
"raw_score": score,
|
| 367 |
+
"risk_level": level,
|
| 368 |
+
"score_override_applied": False,
|
| 369 |
+
"override_reason": None,
|
| 370 |
+
"triggered_indicators": [
|
| 371 |
+
{"indicator": "impersonates_authority", "weight": 5, "evidence": "Claims to be official entity"},
|
| 372 |
+
{"indicator": "requests_otp_or_pin", "weight": 4, "evidence": "Mentions OTP or verification"}
|
| 373 |
+
],
|
| 374 |
+
"top_risk_drivers": ["impersonates_authority", "requests_otp_or_pin"],
|
| 375 |
+
"confidence": 0.98,
|
| 376 |
+
"reasoning_summary": f"Risk scored as {level} due to critical impersonation and credential theft attempts."
|
| 377 |
+
}
|
| 378 |
+
|
| 379 |
+
def _mock_action_agent(self, risk: str) -> Dict[str, Any]:
|
| 380 |
+
if risk == "SAFE":
|
| 381 |
+
return {
|
| 382 |
+
"verdict": "SAFE",
|
| 383 |
+
"risk_level": "LOW",
|
| 384 |
+
"scam_type": "None detected",
|
| 385 |
+
"dashboard_summary": "Message appears legitimate.",
|
| 386 |
+
"explanation": {
|
| 387 |
+
"what_is_happening": "This looks like a standard communication.",
|
| 388 |
+
"how_the_scam_works": "N/A",
|
| 389 |
+
"red_flags_found": []
|
| 390 |
+
},
|
| 391 |
+
"recommended_actions": [
|
| 392 |
+
{"priority": 1, "action": "No action needed", "reason": "Message is safe"}
|
| 393 |
+
],
|
| 394 |
+
"do_not_do": [],
|
| 395 |
+
"reporting": {"should_report": False, "contacts": []},
|
| 396 |
+
"safety_tip": {
|
| 397 |
+
"english": "Always verify unexpected messages.",
|
| 398 |
+
"swahili": "Daima thibitisha ujumbe usiotarajiwa.",
|
| 399 |
+
"sheng": "Kuwa mjanja na ma text za ufala."
|
| 400 |
+
},
|
| 401 |
+
"confidence": 0.99
|
| 402 |
+
}
|
| 403 |
+
elif risk == "SUSPICIOUS":
|
| 404 |
+
return {
|
| 405 |
+
"verdict": "SUSPICIOUS",
|
| 406 |
+
"risk_level": "MEDIUM",
|
| 407 |
+
"scam_type": "Possible Betting Scam",
|
| 408 |
+
"dashboard_summary": "Suspicious betting or prize claim.",
|
| 409 |
+
"explanation": {
|
| 410 |
+
"what_is_happening": "You received a message about a potential prize or bet.",
|
| 411 |
+
"how_the_scam_works": "Scammers promise large returns to steal small upfront fees.",
|
| 412 |
+
"red_flags_found": ["Unrealistic returns promised", "Unknown sender"]
|
| 413 |
+
},
|
| 414 |
+
"recommended_actions": [
|
| 415 |
+
{"priority": 1, "action": "Do not send any money", "reason": "High chance of loss"}
|
| 416 |
+
],
|
| 417 |
+
"do_not_do": ["Do not click any links", "Do not reply"],
|
| 418 |
+
"reporting": {
|
| 419 |
+
"should_report": True,
|
| 420 |
+
"contacts": [{"name": "Safaricom SMS", "value": "333", "reason": "Spam reporting"}]
|
| 421 |
+
},
|
| 422 |
+
"safety_tip": {
|
| 423 |
+
"english": "If it's too good to be true, it probably is.",
|
| 424 |
+
"swahili": "Kama ni nzuri sana kuwa kweli, labda ni uongo.",
|
| 425 |
+
"sheng": "Cheza chini, hizi form za quick money ni scam."
|
| 426 |
+
},
|
| 427 |
+
"confidence": 0.85
|
| 428 |
+
}
|
| 429 |
+
elif risk == "HIGH RISK":
|
| 430 |
+
return {
|
| 431 |
+
"verdict": "SCAM",
|
| 432 |
+
"risk_level": "HIGH",
|
| 433 |
+
"scam_type": "M-Pesa Reversal Fraud",
|
| 434 |
+
"dashboard_summary": "High Risk: M-Pesa Reversal Scam Detected",
|
| 435 |
+
"explanation": {
|
| 436 |
+
"what_is_happening": "Someone is pretending to have sent you money by mistake.",
|
| 437 |
+
"how_the_scam_works": "They send a fake SMS looking like M-Pesa, then call you urgently asking for a refund.",
|
| 438 |
+
"red_flags_found": ["Fake M-Pesa format", "High urgency", "Sent from personal number"]
|
| 439 |
+
},
|
| 440 |
+
"recommended_actions": [
|
| 441 |
+
{"priority": 1, "action": "Ignore the message completely", "reason": "It is a known scam tactic"},
|
| 442 |
+
{"priority": 2, "action": "Check your actual M-Pesa balance via USSD *334#", "reason": "To confirm no money actually arrived"}
|
| 443 |
+
],
|
| 444 |
+
"do_not_do": ["Do NOT send money back", "Do NOT share your M-Pesa PIN"],
|
| 445 |
+
"reporting": {
|
| 446 |
+
"should_report": True,
|
| 447 |
+
"contacts": [{"name": "Safaricom Fraud SMS", "value": "333", "reason": "Free official reporting line"}]
|
| 448 |
+
},
|
| 449 |
+
"safety_tip": {
|
| 450 |
+
"english": "Never refund money directly. Tell them to contact Safaricom to reverse it.",
|
| 451 |
+
"swahili": "Usirudishe pesa moja kwa moja. Waambie wapigie Safaricom kuirejesha.",
|
| 452 |
+
"sheng": "Zima huyo msee, mwambie apigie customer care. Usitume doo."
|
| 453 |
+
},
|
| 454 |
+
"confidence": 0.98
|
| 455 |
+
}
|
| 456 |
+
else: # CRITICAL
|
| 457 |
+
return {
|
| 458 |
+
"verdict": "SCAM",
|
| 459 |
+
"risk_level": "CRITICAL",
|
| 460 |
+
"scam_type": "Authority Impersonation",
|
| 461 |
+
"dashboard_summary": "Critical: Account Takeover Attempt",
|
| 462 |
+
"explanation": {
|
| 463 |
+
"what_is_happening": "A scammer is impersonating Safaricom, Fuliza, or KRA to steal your account.",
|
| 464 |
+
"how_the_scam_works": "They threaten you with account suspension or fake loans to trick you into sharing your OTP or PIN.",
|
| 465 |
+
"red_flags_found": ["Requests OTP", "Impersonates official entity", "Threatens account suspension"]
|
| 466 |
+
},
|
| 467 |
+
"recommended_actions": [
|
| 468 |
+
{"priority": 1, "action": "Do not share any OTP or PIN", "reason": "Official entities never ask for this."}
|
| 469 |
+
],
|
| 470 |
+
"do_not_do": ["Do NOT share your OTP", "Do NOT click any links"],
|
| 471 |
+
"reporting": {
|
| 472 |
+
"should_report": True,
|
| 473 |
+
"contacts": [{"name": "Safaricom Fraud SMS", "value": "333", "reason": "Free official reporting line"}]
|
| 474 |
+
},
|
| 475 |
+
"safety_tip": {
|
| 476 |
+
"english": "Never share your OTP or PIN with anyone, even if they claim to be from Safaricom.",
|
| 477 |
+
"swahili": "Usishiriki OTP au PIN yako na mtu yeyote, hata kama anadai kutoka Safaricom.",
|
| 478 |
+
"sheng": "Chunga sana, usiwahi peana OTP yako kwa mtu, hata kama anajiita Safaricom."
|
| 479 |
+
},
|
| 480 |
+
"confidence": 0.99
|
| 481 |
+
}
|
| 482 |
+
|
| 483 |
+
# Hybrid Mode: OSINT Precheck Integrated
|
core/osint_dataset.py
ADDED
|
@@ -0,0 +1,249 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
core/osint_dataset.py
|
| 3 |
+
Kenyan fraud OSINT synthetic dataset and intelligence layer.
|
| 4 |
+
Provides a deterministic threat simulation, prompt grounding, and testing layer.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import random
|
| 8 |
+
|
| 9 |
+
# 1. Load Structure (Metadata & SCAM_CATEGORIES)
|
| 10 |
+
METADATA = {
|
| 11 |
+
"source": "OSINT & Public Cyber Threat Advisories (Safaricom, DCI, KRA, Africa Check)",
|
| 12 |
+
"region": "Kenya",
|
| 13 |
+
"target_audience": "Hackathon MVP - Defensive AI Training",
|
| 14 |
+
"last_updated": "2026-05-06"
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
SCAM_CATEGORIES = {
|
| 18 |
+
"mpesa_reversal": {
|
| 19 |
+
"name": "M-Pesa Fake Reversal Scam",
|
| 20 |
+
"common_structure": "Fake system generated M-Pesa SMS + Follow-up frantic text/call begging for a refund.",
|
| 21 |
+
"linguistic_markers": ["by mistake", "rudisha", "mtoto yuko hosi", "tafadhali", "balance is *LOCKED*"],
|
| 22 |
+
"red_flags": ["Sender is a regular phone number (07xx) not 'MPESA'", "Grammar errors in system text", "High emotional pressure"],
|
| 23 |
+
"synthetic_examples": [
|
| 24 |
+
"MPESA ODG1LIPNX1 Confirmed.You have received Ksh 8,500 from JOHN MWANGI 06/05/26 New M-PESA balance is *(LOCKED)* Pay bills via M-PESA.",
|
| 25 |
+
"Maze si ulifungiwa na MPESA yako? Boss nisamehe nilituma by mistake. Rudisha tu hiyo 5k haraka acha nishughulikie mgonjwa.",
|
| 26 |
+
"Aki naomba urudishe ile pesa nimekutumia by mistake saa hii. Ni ya fees ya mtoto tafadhali."
|
| 27 |
+
]
|
| 28 |
+
},
|
| 29 |
+
"safaricom_impersonation": {
|
| 30 |
+
"name": "Safaricom Impersonation / USSD Hijack",
|
| 31 |
+
"common_structure": "Authority figure warning about account block + instruction to dial a USSD code (usually call forwarding or M-Pesa pin reset).",
|
| 32 |
+
"linguistic_markers": ["line yako imefungwa", "double registration", "customer care", "piga *33*"],
|
| 33 |
+
"red_flags": ["Sender is not 0722000000", "Use of fear/threat of disconnection", "Instructions to dial obscure MMI/USSD codes"],
|
| 34 |
+
"synthetic_examples": [
|
| 35 |
+
"Habari kutoka Safaricom. Laini yako inatumika na mtu mwingine (double registration). Piga *33*0000* kuzuia hii haraka.",
|
| 36 |
+
"Dear Customer, your M-Pesa account will be suspended in 2 hours due to lack of update. Click https://safaricom-update.cc to verify.",
|
| 37 |
+
"Customer care: We have detected unusual activity on your line. Reply with your ID number and M-Pesa PIN to secure your account."
|
| 38 |
+
]
|
| 39 |
+
},
|
| 40 |
+
"fuliza_scam": {
|
| 41 |
+
"name": "Fuliza Limit Boost Scam",
|
| 42 |
+
"common_structure": "Social media style text offering an impossible upgrade to Safaricom's overdraft limit, demanding an upfront 'activation' fee.",
|
| 43 |
+
"linguistic_markers": ["sema thanks", "nikuboostie fuliza", "hakuna stress", "limit up to 100k", "fuliza limit yako"],
|
| 44 |
+
"red_flags": ["Promises to bypass official Safaricom algorithms", "Requires upfront payment to unlock credit", "Uses excessive Sheng/slang for a financial product"],
|
| 45 |
+
"synthetic_examples": [
|
| 46 |
+
"KAMA ULIPATA FULIZA SEMA THANKS. Inbox nikuboostie fuliza from 0 to 100k in 2 minutes hii January hakuna stress.",
|
| 47 |
+
"Safaricom promotion: Kuongeza Fuliza limit yako hadi 50,000, tuma KES 300 kwa Till 889XXX for system activation.",
|
| 48 |
+
"Niaje buda, niko na mchoro wa ku-hack Fuliza. Tuma 500 nikuwekee limit ya 80k sai sai."
|
| 49 |
+
]
|
| 50 |
+
},
|
| 51 |
+
"kra_penalty": {
|
| 52 |
+
"name": "Fake KRA Penalty / Arrest Threat",
|
| 53 |
+
"common_structure": "Impersonation of Kenya Revenue Authority (KRA) citing unpaid taxes, threatening arrest, and providing a rogue payment link or number.",
|
| 54 |
+
"linguistic_markers": ["tax arrears", "utashtakiwa", "masaa 48", "warrant of arrest", "KRA ALERT"],
|
| 55 |
+
"red_flags": ["KRA does not issue arrest warrants via SMS", "Payment directed to a mobile number instead of official PayBill 220220", "Extreme urgency"],
|
| 56 |
+
"synthetic_examples": [
|
| 57 |
+
"KRA ALERT: Uko na tax arrears ya KES 23,450 kwa iTax system yako. Lipa ndani ya masaa 48 au utashtakiwa. Call 0756XXXXXX sasa.",
|
| 58 |
+
"FINAL NOTICE: A warrant of arrest has been issued against your ID for tax evasion. Pay KES 5,000 clearance fee via link: kra-clearance.info",
|
| 59 |
+
"Mzee, hii ni KRA. Uko na penalty ya 15k. Wacha mchezo, lipa sai ndio tusitume mapolisi kwa ofisi yako."
|
| 60 |
+
]
|
| 61 |
+
},
|
| 62 |
+
"betting_scam": {
|
| 63 |
+
"name": "Betting / Jackpot Scams",
|
| 64 |
+
"common_structure": "False notification of a massive jackpot win from popular Kenyan betting sites (SportPesa, Betika), requesting a 'withdrawal fee'.",
|
| 65 |
+
"linguistic_markers": ["umeshinda jackpot", "SportPesa ya 50K", "registration fee", "withdrawal code"],
|
| 66 |
+
"red_flags": ["Winning a contest you never entered", "Requirement to pay money to receive money", "Sender uses a standard phone number"],
|
| 67 |
+
"synthetic_examples": [
|
| 68 |
+
"Hongera! Wewe ndio mshindi wa 500k SportPesa Weekly Jackpot. Tuma 2,500 ya registration fee kupokea pesa kwa MPESA yako leo.",
|
| 69 |
+
"Betika: Namba yako imechaguliwa kushinda KES 75,000. Tuma 1,050 processing fee kwa Till namba 554XXX kupata withdrawal code.",
|
| 70 |
+
"Boss, niko na fixed matches za leo uhakika 100%. Tuma 1k nikutumie odds za 50, usikose hii form."
|
| 71 |
+
]
|
| 72 |
+
},
|
| 73 |
+
"bonga_points": {
|
| 74 |
+
"name": "Bonga Points Theft",
|
| 75 |
+
"common_structure": "Fake expiry warning designed to panic the user into clicking a phishing link, or an agent tricking the user into transferring points.",
|
| 76 |
+
"linguistic_markers": ["zina-expire leo", "redeem for cash", "Bonga points zako"],
|
| 77 |
+
"red_flags": ["Links leading to non-Safaricom domains", "Unsolicited requests for Bonga PINs"],
|
| 78 |
+
"synthetic_examples": [
|
| 79 |
+
"Safaricom: Bonga points zako (10,500) zina-expire leo saa sita usiku. Click hapa kuredeem kwa cash haraka: bit.ly/bonga-redeem",
|
| 80 |
+
"Dear customer, convert your 5,000 Bonga points to KES 1,500 cash. Reply with your M-Pesa PIN to authorize transfer.",
|
| 81 |
+
"Your expenditure of Ksh50 worth 250 points to Otenyo Momanyi Aruya Till 5307214 was successful."
|
| 82 |
+
]
|
| 83 |
+
},
|
| 84 |
+
"whatsapp_deregistration": {
|
| 85 |
+
"name": "WhatsApp Deregistration / OTP Theft",
|
| 86 |
+
"common_structure": "Scammer triggers a WhatsApp login code to the victim's phone, then messages claiming they sent it by mistake to steal the account.",
|
| 87 |
+
"linguistic_markers": ["nilituma code", "by mistake", "naomba unitumie", "WhatsApp verification"],
|
| 88 |
+
"red_flags": ["Anyone asking for a 6-digit SMS code", "Sudden WhatsApp registration SMS when you aren't logging in"],
|
| 89 |
+
"synthetic_examples": [
|
| 90 |
+
"Boss nisamehe, nilituma code ya WhatsApp kwa namba yako by mistake. Naomba unitumie hiyo code 6-digits haraka niingie kwa group ya kazi.",
|
| 91 |
+
"WARNING: Your WhatsApp is being deregistered on this device. Share the SMS code sent to you to cancel the deregistration.",
|
| 92 |
+
"Niaje buda, simu yangu imeharibika, na-login kwa simu mpya. Nimekutumia code, isomee ndio ni-activate WhatsApp."
|
| 93 |
+
]
|
| 94 |
+
},
|
| 95 |
+
"fake_jobs": {
|
| 96 |
+
"name": "Fake Job / Recruitment Scams",
|
| 97 |
+
"common_structure": "Offer for a lucrative, often international or NGO job (UN, TSC) that requires an upfront 'facilitation' or 'medical' fee.",
|
| 98 |
+
"linguistic_markers": ["shortlisted", "NGO jobs", "medical fee", "facilitation fee", "interview tomorrow"],
|
| 99 |
+
"red_flags": ["Paying to get a job", "Guaranteed employment", "Interviews scheduled via informal SMS"],
|
| 100 |
+
"synthetic_examples": [
|
| 101 |
+
"Dear applicant, you have been shortlisted for the UN NGO Data Clerk position. Pay KES 1,500 medical fee to Till 8392XX before interview tomorrow.",
|
| 102 |
+
"TSC Recruitment 2026: Umechaguliwa. Tuma 3,000 ya processing fee kwa HR Manager 0712XXXXXX kureserve position yako.",
|
| 103 |
+
"Niko na mchoro ya job huku Qatar. Tuma 5k ya kuanzisha process ya visa, mshahara ni 150k per month. Wacha mchezo."
|
| 104 |
+
]
|
| 105 |
+
},
|
| 106 |
+
"chama_sacco": {
|
| 107 |
+
"name": "Chama / SACCO / Family Emergency",
|
| 108 |
+
"common_structure": "Targeted social engineering. Scammer hacks a WhatsApp group or spoofs a number to impersonate a treasurer or relative in distress.",
|
| 109 |
+
"linguistic_markers": ["nimepata accident", "tuma haraka", "chama contribution", "ntakurudishia"],
|
| 110 |
+
"red_flags": ["Sudden change in payment numbers for a Chama", "Refusal to take a voice call during an 'emergency'"],
|
| 111 |
+
"synthetic_examples": [
|
| 112 |
+
"Buda, nimepata accident hapa Naivasha. Tuma 3k haraka nilipe doctor, ntakurudishia jioni niki-settle.",
|
| 113 |
+
"Members, our Chama account is undergoing maintenance. Please send this month's 5k contribution to the new Treasurer Till: 8821XX.",
|
| 114 |
+
"Mum, simu yangu imeanguka kwa maji na niko shule. Tuma fare 1,500 kwa hii namba ya mwalimu ndio nirudi home."
|
| 115 |
+
]
|
| 116 |
+
},
|
| 117 |
+
"otp_sim_swap": {
|
| 118 |
+
"name": "SIM Swap / Banking OTP Theft",
|
| 119 |
+
"common_structure": "Sophisticated phishing attempting to get the user's National ID and Banking OTPs to initiate a SIM Swap and drain accounts.",
|
| 120 |
+
"linguistic_markers": ["system upgrade", "confirm your details", "National ID", "Equity mobile"],
|
| 121 |
+
"red_flags": ["Bank/Telco calling from a personal line", "Requests for National ID over SMS"],
|
| 122 |
+
"synthetic_examples": [
|
| 123 |
+
"Dear Equity Bank customer, your mobile banking is due for an upgrade. Reply with your National ID and the OTP sent to you to avoid account suspension.",
|
| 124 |
+
"Safaricom: We are upgrading the network in your area. Please confirm your ID number to prevent your line from being switched off.",
|
| 125 |
+
"Mzee, mimi ni agent wa bank yako. Tuko na system error, hebu nisomee ile code imeingia kwa simu yako ndio turudishe pesa yako."
|
| 126 |
+
]
|
| 127 |
+
}
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
# 3. Risk Mapping
|
| 131 |
+
RISK_MAPPING = {
|
| 132 |
+
"mpesa_reversal": "HIGH",
|
| 133 |
+
"safaricom_impersonation": "CRITICAL",
|
| 134 |
+
"fuliza_scam": "CRITICAL",
|
| 135 |
+
"kra_penalty": "CRITICAL",
|
| 136 |
+
"otp_sim_swap": "CRITICAL",
|
| 137 |
+
"betting_scam": "MEDIUM",
|
| 138 |
+
"fake_jobs": "MEDIUM",
|
| 139 |
+
"bonga_points": "MEDIUM",
|
| 140 |
+
"chama_sacco": "HIGH",
|
| 141 |
+
"whatsapp_deregistration": "HIGH"
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
# 2. Core Functions
|
| 145 |
+
def get_category(category_id: str) -> dict:
|
| 146 |
+
"""Returns the dictionary for a specific scam category."""
|
| 147 |
+
return SCAM_CATEGORIES.get(category_id, {})
|
| 148 |
+
|
| 149 |
+
def search_by_keyword(text: str) -> list:
|
| 150 |
+
"""
|
| 151 |
+
Searches through all categories' linguistic markers for a match.
|
| 152 |
+
Returns a list of dicts with category_id and data.
|
| 153 |
+
"""
|
| 154 |
+
results = []
|
| 155 |
+
text_lower = text.lower()
|
| 156 |
+
for cat_id, cat_data in SCAM_CATEGORIES.items():
|
| 157 |
+
for marker in cat_data.get("linguistic_markers", []):
|
| 158 |
+
if marker.lower() in text_lower:
|
| 159 |
+
results.append({"category_id": cat_id, "data": cat_data})
|
| 160 |
+
break
|
| 161 |
+
return results
|
| 162 |
+
|
| 163 |
+
def get_random_example(category_id: str, deterministic: bool = True) -> str:
|
| 164 |
+
"""
|
| 165 |
+
Returns an example string from a category.
|
| 166 |
+
If deterministic is True, it returns a predictable example without randomness.
|
| 167 |
+
"""
|
| 168 |
+
category = get_category(category_id)
|
| 169 |
+
if not category:
|
| 170 |
+
return ""
|
| 171 |
+
|
| 172 |
+
examples = category.get("synthetic_examples", [])
|
| 173 |
+
if not examples:
|
| 174 |
+
return ""
|
| 175 |
+
|
| 176 |
+
if deterministic:
|
| 177 |
+
# Pseudo-deterministic choice based on the length of the category name
|
| 178 |
+
index = len(category.get("name", "")) % len(examples)
|
| 179 |
+
return examples[index]
|
| 180 |
+
|
| 181 |
+
return random.choice(examples)
|
| 182 |
+
|
| 183 |
+
def classify_synthetic_message(text: str) -> dict:
|
| 184 |
+
"""
|
| 185 |
+
Classifies a message and assigns a probable category and risk level.
|
| 186 |
+
Implements a special SAFE Detection check for legitimate M-Pesa.
|
| 187 |
+
"""
|
| 188 |
+
text_lower = text.lower()
|
| 189 |
+
|
| 190 |
+
# 4. SAFE Detection (Legitimate MPESA confirmation patterns)
|
| 191 |
+
if "confirmed" in text_lower and "received" in text_lower and "ksh" in text_lower:
|
| 192 |
+
# Ensure it lacks common reversal or scam terms before calling it safe
|
| 193 |
+
if not any(x in text_lower for x in ["by mistake", "rudisha", "locked"]):
|
| 194 |
+
return {
|
| 195 |
+
"probable_category": "legitimate_transaction",
|
| 196 |
+
"risk_level": "LOW",
|
| 197 |
+
"matched_markers": []
|
| 198 |
+
}
|
| 199 |
+
|
| 200 |
+
# Perform keyword search
|
| 201 |
+
matches = search_by_keyword(text)
|
| 202 |
+
|
| 203 |
+
if not matches:
|
| 204 |
+
return {
|
| 205 |
+
"probable_category": "unknown",
|
| 206 |
+
"risk_level": "UNKNOWN",
|
| 207 |
+
"matched_markers": []
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
# Pick the highest risk match
|
| 211 |
+
best_match = matches[0]
|
| 212 |
+
best_risk = RISK_MAPPING.get(best_match["category_id"], "UNKNOWN")
|
| 213 |
+
|
| 214 |
+
risk_weights = {"CRITICAL": 3, "HIGH": 2, "MEDIUM": 1, "LOW": 0, "UNKNOWN": -1}
|
| 215 |
+
for match in matches:
|
| 216 |
+
risk = RISK_MAPPING.get(match["category_id"], "UNKNOWN")
|
| 217 |
+
if risk_weights.get(risk, -1) > risk_weights.get(best_risk, -1):
|
| 218 |
+
best_match = match
|
| 219 |
+
best_risk = risk
|
| 220 |
+
|
| 221 |
+
# Find which markers were actually matched
|
| 222 |
+
matched_markers = [marker for marker in best_match["data"]["linguistic_markers"] if marker.lower() in text_lower]
|
| 223 |
+
|
| 224 |
+
return {
|
| 225 |
+
"probable_category": best_match["category_id"],
|
| 226 |
+
"risk_level": best_risk,
|
| 227 |
+
"matched_markers": matched_markers
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
# 6. Output Smoke Test
|
| 231 |
+
if __name__ == "__main__":
|
| 232 |
+
print("=" * 60)
|
| 233 |
+
print(" Shadow OSINT Dataset - Smoke Test")
|
| 234 |
+
print("=" * 60)
|
| 235 |
+
|
| 236 |
+
test_cases = [
|
| 237 |
+
"Maze nilikosea nikatuma thao, rudisha haraka",
|
| 238 |
+
"KRA ALERT: Uko na tax arrears ya KES 23,450 kwa iTax system yako.",
|
| 239 |
+
"Confirmed. You have received KSh 500 from John."
|
| 240 |
+
]
|
| 241 |
+
|
| 242 |
+
for case in test_cases:
|
| 243 |
+
print(f"\n[Test] Message: '{case}'")
|
| 244 |
+
result = classify_synthetic_message(case)
|
| 245 |
+
print(f"Probable Category : {result.get('probable_category')}")
|
| 246 |
+
print(f"Risk Level : {result.get('risk_level')}")
|
| 247 |
+
if result.get("matched_markers"):
|
| 248 |
+
print(f"Matched Markers : {result.get('matched_markers')}")
|
| 249 |
+
print("-" * 60)
|
core/prompts.py
ADDED
|
@@ -0,0 +1,344 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
core/prompts.py
|
| 3 |
+
Shadow — AI Fraud Detection System
|
| 4 |
+
AMD Hackathon 2026
|
| 5 |
+
|
| 6 |
+
LangGraph agent system prompts.
|
| 7 |
+
Each prompt is:
|
| 8 |
+
- Kenyan-context aware (Swahili / Sheng / English)
|
| 9 |
+
- Chain-of-thought guided for reliable reasoning
|
| 10 |
+
- Constrained to a strict JSON output contract
|
| 11 |
+
- Optimised for fast inference (no unnecessary verbosity)
|
| 12 |
+
|
| 13 |
+
Agents in the pipeline:
|
| 14 |
+
1. LanguageAgent → detect language mix and classify script
|
| 15 |
+
2. ThreatPatternAgent → identify scam type and extract threat signals
|
| 16 |
+
3. RiskScoringAgent → compute a structured fraud risk score
|
| 17 |
+
4. ActionAgent → produce user-facing verdict and recommended actions
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 21 |
+
# SHARED CONTEXT BLOCK
|
| 22 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 23 |
+
|
| 24 |
+
_KENYA_CONTEXT_PRIMER = """
|
| 25 |
+
## Kenyan Fraud Landscape Context
|
| 26 |
+
You operate in the Kenyan digital environment. Key facts:
|
| 27 |
+
- M-Pesa (Safaricom) is the dominant platform. Legitimate M-Pesa SMS come from "MPESA" only.
|
| 28 |
+
- KRA communicates via itax.kra.go.ke and NEVER asks for fees via M-Pesa.
|
| 29 |
+
- High-volume threats: Safaricom impersonation, Fuliza abuse, M-Pesa reversal tricks, betting scams, Bonga points fraud, Chama/SACCO impersonation, WhatsApp deregistration threats.
|
| 30 |
+
- Scammers exploit urgency ("haraka sana"), authority ("Safaricom rasmi"), and distress.
|
| 31 |
+
- A legitimate institution in Kenya NEVER asks for M-Pesa PIN, OTP, or National ID via SMS/Call.
|
| 32 |
+
- FALSE POSITIVES: Authentic alerts (e.g., M-Pesa sends, Bank alerts) use strict alphanumeric references, do not contain grammatical errors, and do NOT request action.
|
| 33 |
+
- Language is highly mixed: English, Swahili, and Sheng (Nairobi slang).
|
| 34 |
+
"""
|
| 35 |
+
|
| 36 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 37 |
+
# AGENT 1: LANGUAGE AGENT
|
| 38 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 39 |
+
|
| 40 |
+
LANGUAGE_AGENT_SYSTEM_PROMPT = """
|
| 41 |
+
You are the Language Intelligence Agent in Shadow, Kenya's AI fraud detection system.
|
| 42 |
+
|
| 43 |
+
## Your Role
|
| 44 |
+
Analyse the language composition of a message. Kenyan fraud frequently blends English, Swahili, and Sheng.
|
| 45 |
+
|
| 46 |
+
{kenya_context}
|
| 47 |
+
|
| 48 |
+
## Reasoning Protocol (follow silently, step by step)
|
| 49 |
+
1. Read the full message carefully.
|
| 50 |
+
2. Identify primary/secondary languages.
|
| 51 |
+
3. Flag code-switching (e.g., English -> Swahili -> Sheng).
|
| 52 |
+
4. Identify authentic Sheng terms (e.g., "ronga", "thifte", "nganya", "mchoro", "buda").
|
| 53 |
+
5. Note anomalies (e.g., KRA alert written in Sheng, or "Safaricom" with broken English).
|
| 54 |
+
6. Assess formality vs. impersonated formality.
|
| 55 |
+
7. Extract urgency phrases.
|
| 56 |
+
|
| 57 |
+
## Output Contract
|
| 58 |
+
Return ONLY a valid JSON object matching this schema. NO MARKDOWN FENCES (` ```json `), no preamble.
|
| 59 |
+
|
| 60 |
+
{{
|
| 61 |
+
"primary_language": "<english|swahili|sheng|mixed>",
|
| 62 |
+
"secondary_languages": ["<string>", "..."],
|
| 63 |
+
"is_code_switched": <true|false>,
|
| 64 |
+
"sheng_terms_detected": ["<authentic sheng term>", "..."],
|
| 65 |
+
"swahili_urgency_phrases": ["<phrase>", "..."],
|
| 66 |
+
"formality_level": "<formal|semi-formal|informal|impersonating-formal>",
|
| 67 |
+
"language_anomalies": ["<description>", "..."],
|
| 68 |
+
"linguistic_fraud_signals": ["<specific observation>", "..."],
|
| 69 |
+
"confidence": <0.0-1.0>,
|
| 70 |
+
"reasoning_summary": "<1 sentence internal summary>"
|
| 71 |
+
}}
|
| 72 |
+
""".format(kenya_context=_KENYA_CONTEXT_PRIMER)
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 76 |
+
# AGENT 2: THREAT PATTERN AGENT
|
| 77 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 78 |
+
|
| 79 |
+
THREAT_PATTERN_AGENT_SYSTEM_PROMPT = """
|
| 80 |
+
You are the Threat Pattern Agent in Shadow, Kenya's AI fraud detection system.
|
| 81 |
+
|
| 82 |
+
## Your Role
|
| 83 |
+
Identify scam categories and threat signals using the message and Language Agent's output.
|
| 84 |
+
|
| 85 |
+
{kenya_context}
|
| 86 |
+
|
| 87 |
+
## Kenyan Scam Category Reference
|
| 88 |
+
| ID | Category | Typical Mechanism |
|
| 89 |
+
|----|----------|------------------|
|
| 90 |
+
| safaricom_impersonation | Safaricom Impersonation | Harvests PIN/SIM data posing as customer care |
|
| 91 |
+
| mpesa_reversal | M-Pesa Reversal / Float Scam | Claims wrong send, asks refund; fakes agent transaction |
|
| 92 |
+
| fuliza_scam | Fuliza Abuse / Fake Alerts | Fake overdraft notices demanding top-up fees or claiming CRB debt |
|
| 93 |
+
| betting_scam | Betting / Jackpot Scam | Fake "jackpot won" or fixed odds requiring VIP registration fees |
|
| 94 |
+
| bonga_points_scam | Bonga Points Scam | Urgent notices to redeem points before expiry |
|
| 95 |
+
| kra_scam | KRA Tax Scam | Fake penalties, court summons, or tax arrears alerts |
|
| 96 |
+
| chama_scam | Chama / SACCO Scam | Impersonates officials requesting emergency transfers |
|
| 97 |
+
| whatsapp_scam | WhatsApp Deregistration | Threatens account deletion and requests OTP |
|
| 98 |
+
| fake_job | Fake Job Offer | Employment offers requiring upfront payments |
|
| 99 |
+
| sim_swap | SIM Swap Attack | Requests National ID/DOB to "port" or "verify" |
|
| 100 |
+
| otp_theft | OTP / Code Theft | Phishing for passwords via USSD push or fake app upgrades |
|
| 101 |
+
|
| 102 |
+
## Output Contract
|
| 103 |
+
Return ONLY a valid JSON object. No markdown fences, no preamble.
|
| 104 |
+
|
| 105 |
+
{{
|
| 106 |
+
"scam_categories_detected": [
|
| 107 |
+
{{
|
| 108 |
+
"category_id": "<from table above>",
|
| 109 |
+
"category_label": "<human readable>",
|
| 110 |
+
"confidence": <0.0-1.0>,
|
| 111 |
+
"evidence": ["<specific quote or signal>"]
|
| 112 |
+
}}
|
| 113 |
+
],
|
| 114 |
+
"primary_category": "<category_id of highest confidence match, or 'none'>",
|
| 115 |
+
"threat_signals": {{
|
| 116 |
+
"requests_otp_or_pin": <true|false>,
|
| 117 |
+
"requests_national_id": <true|false>,
|
| 118 |
+
"sim_swap_language": <true|false>,
|
| 119 |
+
"external_link_present": <true|false>,
|
| 120 |
+
"impersonates_authority": <true|false>,
|
| 121 |
+
"whatsapp_deregistration": <true|false>,
|
| 122 |
+
"requests_upfront_payment": <true|false>,
|
| 123 |
+
"unrealistic_returns": <true|false>,
|
| 124 |
+
"urgency_language_detected": <true|false>,
|
| 125 |
+
"threat_of_suspension": <true|false>,
|
| 126 |
+
"prize_win_claim": <true|false>,
|
| 127 |
+
"wrong_number_reversal": <true|false>,
|
| 128 |
+
"fuliza_threat": <true|false>,
|
| 129 |
+
"unknown_sender_number": <true|false>,
|
| 130 |
+
"excessive_capitalization": <true|false>,
|
| 131 |
+
"multiple_exclamation_marks": <true|false>,
|
| 132 |
+
"calls_to_unknown_number": <true|false>
|
| 133 |
+
}},
|
| 134 |
+
"impersonated_entity": "<Safaricom|KRA|Equity Bank|Police|None|Other>",
|
| 135 |
+
"manipulation_hook": "<fear|greed|urgency|authority|distress|none>",
|
| 136 |
+
"extracted_demands": ["<what user is asked to do>", "..."],
|
| 137 |
+
"legitimacy_evidence_for": ["<e.g. valid M-Pesa format>", "..."],
|
| 138 |
+
"legitimacy_evidence_against": ["<e.g. personal number claiming to be Safaricom>", "..."],
|
| 139 |
+
"is_likely_legitimate": <true|false>,
|
| 140 |
+
"reasoning_summary": "<1 sentence internal summary>"
|
| 141 |
+
}}
|
| 142 |
+
""".format(kenya_context=_KENYA_CONTEXT_PRIMER)
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 146 |
+
# AGENT 3: RISK SCORING AGENT
|
| 147 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 148 |
+
|
| 149 |
+
RISK_SCORING_AGENT_SYSTEM_PROMPT = """
|
| 150 |
+
You are the Risk Scoring Agent in Shadow, Kenya's AI fraud detection system.
|
| 151 |
+
|
| 152 |
+
## Your Role
|
| 153 |
+
Compute a structured, explainable fraud risk score using strict raw thresholds.
|
| 154 |
+
You receive outputs from Language and Threat Pattern Agents.
|
| 155 |
+
|
| 156 |
+
{kenya_context}
|
| 157 |
+
|
| 158 |
+
## Scoring Framework
|
| 159 |
+
|
| 160 |
+
### Indicator Weights
|
| 161 |
+
CRITICAL (weight = 3): requests_otp_or_pin, requests_national_id, sim_swap_language, external_link_present, impersonates_authority, whatsapp_deregistration
|
| 162 |
+
HIGH (weight = 2): requests_upfront_payment, unrealistic_returns, urgency_language_detected, threat_of_suspension, prize_win_claim, wrong_number_reversal, fuliza_threat
|
| 163 |
+
MODERATE (weight = 1): sheng_scam_vocabulary, swahili_urgency_phrase, unknown_sender_number, excessive_capitalization, multiple_exclamation_marks, calls_to_unknown_number
|
| 164 |
+
|
| 165 |
+
Combo Bonus: If BOTH credential theft AND impersonation are present, ADD 2 to the raw score.
|
| 166 |
+
|
| 167 |
+
### Absolute Risk Thresholds
|
| 168 |
+
Sum the weights to find the `raw_score`. Apply these thresholds:
|
| 169 |
+
- CRITICAL (6+) : Almost certainly a scam. Immediate danger.
|
| 170 |
+
- HIGH (4-5) : Strong fraud indicators. Do not comply.
|
| 171 |
+
- MEDIUM (2-3) : Suspicious. Verify independently.
|
| 172 |
+
- LOW (0-1) : Appears safe.
|
| 173 |
+
|
| 174 |
+
## Output Contract
|
| 175 |
+
Return ONLY a valid JSON object. No markdown fences, no preamble.
|
| 176 |
+
|
| 177 |
+
{{
|
| 178 |
+
"raw_score": <integer>,
|
| 179 |
+
"risk_level": "<CRITICAL|HIGH|MEDIUM|LOW>",
|
| 180 |
+
"score_override_applied": <true|false>,
|
| 181 |
+
"override_reason": "<null or explanation>",
|
| 182 |
+
"triggered_indicators": [
|
| 183 |
+
{{
|
| 184 |
+
"indicator": "<key>",
|
| 185 |
+
"weight": <int>,
|
| 186 |
+
"evidence": "<reason>"
|
| 187 |
+
}}
|
| 188 |
+
],
|
| 189 |
+
"top_risk_drivers": ["<top 3 keys>"],
|
| 190 |
+
"confidence": <0.0-1.0>,
|
| 191 |
+
"reasoning_summary": "<1 sentence summary>"
|
| 192 |
+
}}
|
| 193 |
+
""".format(kenya_context=_KENYA_CONTEXT_PRIMER)
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 197 |
+
# AGENT 4: ACTION AGENT
|
| 198 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 199 |
+
|
| 200 |
+
ACTION_AGENT_SYSTEM_PROMPT = """
|
| 201 |
+
You are the Action Agent in Shadow, Kenya's AI fraud detection system.
|
| 202 |
+
|
| 203 |
+
## Your Role
|
| 204 |
+
Synthesise upstream outputs into a clear, empathetic, actionable verdict.
|
| 205 |
+
Do NOT be alarmist for low-risk messages. Speak to a Kenyan user.
|
| 206 |
+
|
| 207 |
+
{kenya_context}
|
| 208 |
+
|
| 209 |
+
## Reporting Contacts
|
| 210 |
+
- Safaricom Fraud SMS : Forward SMS to 333 (Free)
|
| 211 |
+
- DCI Cybercrime Unit : +254 20 4343000 / cybercrime@dci.go.ke
|
| 212 |
+
- KRA Fraud Tip : fraudtipoffs@kra.go.ke
|
| 213 |
+
|
| 214 |
+
## Output Contract
|
| 215 |
+
Return ONLY a valid JSON object. No markdown fences, no preamble.
|
| 216 |
+
|
| 217 |
+
{{
|
| 218 |
+
"verdict": "<SCAM|SUSPICIOUS|SAFE>",
|
| 219 |
+
"risk_level": "<CRITICAL|HIGH|MEDIUM|LOW>",
|
| 220 |
+
"scam_type": "<human-readable label or 'None detected'>",
|
| 221 |
+
"dashboard_summary": "<≤12 word UI summary>",
|
| 222 |
+
"explanation": {{
|
| 223 |
+
"what_is_happening": "<2 sentences plain language>",
|
| 224 |
+
"how_the_scam_works": "<2 sentences specific mechanics>",
|
| 225 |
+
"red_flags_found": ["<red flag>", "..."]
|
| 226 |
+
}},
|
| 227 |
+
"recommended_actions": [
|
| 228 |
+
{{
|
| 229 |
+
"priority": <1-5, 1=highest>,
|
| 230 |
+
"action": "<imperative>",
|
| 231 |
+
"reason": "<why>"
|
| 232 |
+
}}
|
| 233 |
+
],
|
| 234 |
+
"do_not_do": ["<thing NOT to do>", "..."],
|
| 235 |
+
"reporting": {{
|
| 236 |
+
"should_report": <true|false>,
|
| 237 |
+
"contacts": [
|
| 238 |
+
{{
|
| 239 |
+
"name": "<name>",
|
| 240 |
+
"value": "<contact info>",
|
| 241 |
+
"reason": "<why>"
|
| 242 |
+
}}
|
| 243 |
+
]
|
| 244 |
+
}},
|
| 245 |
+
"safety_tip": {{
|
| 246 |
+
"english": "<tip>",
|
| 247 |
+
"swahili": "<Swahili tip>",
|
| 248 |
+
"sheng": "<Sheng tip>"
|
| 249 |
+
}},
|
| 250 |
+
"confidence": <0.0-1.0>
|
| 251 |
+
}}
|
| 252 |
+
""".format(kenya_context=_KENYA_CONTEXT_PRIMER)
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 256 |
+
# PROMPT BUILDER UTILITIES
|
| 257 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 258 |
+
|
| 259 |
+
def build_language_agent_input(message: str) -> str:
|
| 260 |
+
return f"""Analyse this message for language composition and linguistic fraud signals.
|
| 261 |
+
|
| 262 |
+
MESSAGE TO ANALYSE:
|
| 263 |
+
\"\"\"
|
| 264 |
+
{message}
|
| 265 |
+
\"\"\"
|
| 266 |
+
|
| 267 |
+
Return JSON ONLY per your schema."""
|
| 268 |
+
|
| 269 |
+
|
| 270 |
+
def build_threat_pattern_agent_input(message: str, language_result: dict, precheck_category: str = None) -> str:
|
| 271 |
+
import json
|
| 272 |
+
precheck_str = ""
|
| 273 |
+
if precheck_category:
|
| 274 |
+
precheck_str = f"\nOSINT PRECHECK MATCH:\nCategory: {precheck_category}\n(Use this as a strong prior for classification)\n"
|
| 275 |
+
|
| 276 |
+
return f"""Identify fraud patterns and threat signals in this message.
|
| 277 |
+
|
| 278 |
+
ORIGINAL MESSAGE:
|
| 279 |
+
\"\"\"
|
| 280 |
+
{message}
|
| 281 |
+
\"\"\"{precheck_str}
|
| 282 |
+
|
| 283 |
+
LANGUAGE AGENT OUTPUT:
|
| 284 |
+
{json.dumps(language_result, indent=2)}
|
| 285 |
+
|
| 286 |
+
Return JSON ONLY per your schema."""
|
| 287 |
+
|
| 288 |
+
|
| 289 |
+
def build_risk_scoring_agent_input(message: str, language_result: dict, threat_result: dict) -> str:
|
| 290 |
+
import json
|
| 291 |
+
return f"""Compute the fraud risk score for this message.
|
| 292 |
+
|
| 293 |
+
ORIGINAL MESSAGE:
|
| 294 |
+
\"\"\"
|
| 295 |
+
{message}
|
| 296 |
+
\"\"\"
|
| 297 |
+
|
| 298 |
+
LANGUAGE AGENT OUTPUT:
|
| 299 |
+
{json.dumps(language_result, indent=2)}
|
| 300 |
+
|
| 301 |
+
THREAT PATTERN AGENT OUTPUT:
|
| 302 |
+
{json.dumps(threat_result, indent=2)}
|
| 303 |
+
|
| 304 |
+
Return JSON ONLY per your schema."""
|
| 305 |
+
|
| 306 |
+
|
| 307 |
+
def build_action_agent_input(message: str, language_result: dict, threat_result: dict, scoring_result: dict) -> str:
|
| 308 |
+
import json
|
| 309 |
+
return f"""Generate the final user-facing verdict and actions.
|
| 310 |
+
|
| 311 |
+
ORIGINAL MESSAGE:
|
| 312 |
+
\"\"\"
|
| 313 |
+
{message}
|
| 314 |
+
\"\"\"
|
| 315 |
+
|
| 316 |
+
LANGUAGE AGENT OUTPUT:
|
| 317 |
+
{json.dumps(language_result, indent=2)}
|
| 318 |
+
|
| 319 |
+
THREAT PATTERN AGENT OUTPUT:
|
| 320 |
+
{json.dumps(threat_result, indent=2)}
|
| 321 |
+
|
| 322 |
+
RISK SCORING AGENT OUTPUT:
|
| 323 |
+
{json.dumps(scoring_result, indent=2)}
|
| 324 |
+
|
| 325 |
+
Return JSON ONLY per your schema."""
|
| 326 |
+
|
| 327 |
+
|
| 328 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 329 |
+
# AGENT REGISTRY
|
| 330 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 331 |
+
|
| 332 |
+
AGENT_PROMPTS: dict[str, str] = {
|
| 333 |
+
"language_agent": LANGUAGE_AGENT_SYSTEM_PROMPT,
|
| 334 |
+
"threat_pattern_agent": THREAT_PATTERN_AGENT_SYSTEM_PROMPT,
|
| 335 |
+
"risk_scoring_agent": RISK_SCORING_AGENT_SYSTEM_PROMPT,
|
| 336 |
+
"action_agent": ACTION_AGENT_SYSTEM_PROMPT,
|
| 337 |
+
}
|
| 338 |
+
|
| 339 |
+
|
| 340 |
+
def get_system_prompt(agent_id: str) -> str:
|
| 341 |
+
if agent_id not in AGENT_PROMPTS:
|
| 342 |
+
valid = list(AGENT_PROMPTS.keys())
|
| 343 |
+
raise KeyError(f"Unknown agent_id '{agent_id}'. Valid options: {valid}")
|
| 344 |
+
return AGENT_PROMPTS[agent_id]
|
core/synthetic_threat_intel.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
core/synthetic_threat_intel.py
|
| 3 |
+
Shadow — AI Fraud Detection System
|
| 4 |
+
AMD Hackathon 2026
|
| 5 |
+
|
| 6 |
+
Generates synthetic Kenyan fraud datasets (Sheng/Swahili/English) to overcome
|
| 7 |
+
the "Data Cold Start" problem. Uses the Shadow LLM Client to generate high-quality,
|
| 8 |
+
localized scam variations for training and evaluation.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import json
|
| 12 |
+
import time
|
| 13 |
+
import os
|
| 14 |
+
import sys
|
| 15 |
+
from typing import List, Dict, Any
|
| 16 |
+
|
| 17 |
+
# Ensure the core module is discoverable
|
| 18 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
| 19 |
+
|
| 20 |
+
from core.llm_client import ShadowLLMClient
|
| 21 |
+
from core.kenyan_context import SCAM_CATEGORIES, SHENG_SCAM_GLOSSARY
|
| 22 |
+
|
| 23 |
+
SYNTHETIC_GENERATOR_PROMPT = """
|
| 24 |
+
You are a Kenyan cybersecurity data engineer. Your task is to generate realistic,
|
| 25 |
+
synthetic fraud SMS messages to train our AI models.
|
| 26 |
+
We are focusing on the Kenyan context, specifically using code-switching (English, Swahili, Sheng).
|
| 27 |
+
|
| 28 |
+
Target Scam Category: {category_label}
|
| 29 |
+
Description: {category_description}
|
| 30 |
+
Keywords: {keywords}
|
| 31 |
+
Example Patterns: {example_patterns}
|
| 32 |
+
|
| 33 |
+
Glossary of Sheng terms to optionally incorporate:
|
| 34 |
+
{sheng_glossary}
|
| 35 |
+
|
| 36 |
+
Generate {count} unique, realistic SMS variations of this scam.
|
| 37 |
+
Ensure they vary in tone (urgent, threatening, pleading, formal-impersonation).
|
| 38 |
+
Include authentic Kenyan names (e.g., Kamau, Omondi, Wanjiku), typical amounts (e.g., KES 500, Ksh 30,000),
|
| 39 |
+
and standard shortcodes/numbers where applicable.
|
| 40 |
+
|
| 41 |
+
Return ONLY a valid JSON object matching this schema. NO MARKDOWN FENCES, no preamble.
|
| 42 |
+
{{
|
| 43 |
+
"synthetic_messages": [
|
| 44 |
+
{{
|
| 45 |
+
"message": "<the raw sms text>",
|
| 46 |
+
"language_mix": "<english|swahili|sheng|mixed>",
|
| 47 |
+
"tone": "<urgent|threatening|pleading|impersonation>",
|
| 48 |
+
"key_signals": ["<signal 1>", "<signal 2>"]
|
| 49 |
+
}}
|
| 50 |
+
]
|
| 51 |
+
}}
|
| 52 |
+
"""
|
| 53 |
+
|
| 54 |
+
class SyntheticDataGenerator:
|
| 55 |
+
"""Generates synthetic threat intelligence data using the AMD Cloud / Qwen model."""
|
| 56 |
+
|
| 57 |
+
def __init__(self):
|
| 58 |
+
self.llm_client = ShadowLLMClient()
|
| 59 |
+
|
| 60 |
+
def generate_category_dataset(self, category_id: str, count: int = 5) -> Dict[str, Any]:
|
| 61 |
+
"""Generate synthetic examples for a specific scam category."""
|
| 62 |
+
if category_id not in SCAM_CATEGORIES:
|
| 63 |
+
raise ValueError(f"Unknown category_id: {category_id}")
|
| 64 |
+
|
| 65 |
+
category = SCAM_CATEGORIES[category_id]
|
| 66 |
+
|
| 67 |
+
system_prompt = SYNTHETIC_GENERATOR_PROMPT.format(
|
| 68 |
+
category_label=category["label"],
|
| 69 |
+
category_description=category["description"],
|
| 70 |
+
keywords=", ".join(category.get("keywords", [])),
|
| 71 |
+
example_patterns=" | ".join(category.get("example_patterns", [])),
|
| 72 |
+
sheng_glossary=json.dumps(SHENG_SCAM_GLOSSARY, indent=2),
|
| 73 |
+
count=count
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
user_input = f"Generate {count} synthetic examples for the {category['label']} category."
|
| 77 |
+
|
| 78 |
+
print(f"Generating {count} synthetic examples for '{category_id}'...")
|
| 79 |
+
start_time = time.time()
|
| 80 |
+
|
| 81 |
+
try:
|
| 82 |
+
result = self.llm_client.generate_response(system_prompt, user_input)
|
| 83 |
+
duration = round(time.time() - start_time, 2)
|
| 84 |
+
print(f"Generation complete in {duration}s.")
|
| 85 |
+
return result
|
| 86 |
+
except Exception as e:
|
| 87 |
+
print(f"Error generating synthetic data: {e}")
|
| 88 |
+
return {"synthetic_messages": []}
|
| 89 |
+
|
| 90 |
+
def generate_full_benchmark(self, count_per_category: int = 3) -> Dict[str, List[Dict[str, Any]]]:
|
| 91 |
+
"""Generates a full benchmark dataset across all known scam categories."""
|
| 92 |
+
benchmark_dataset = {}
|
| 93 |
+
for cat_id in SCAM_CATEGORIES.keys():
|
| 94 |
+
result = self.generate_category_dataset(cat_id, count=count_per_category)
|
| 95 |
+
benchmark_dataset[cat_id] = result.get("synthetic_messages", [])
|
| 96 |
+
# Brief pause to avoid rate limits
|
| 97 |
+
time.sleep(1)
|
| 98 |
+
|
| 99 |
+
return benchmark_dataset
|
| 100 |
+
|
| 101 |
+
if __name__ == "__main__":
|
| 102 |
+
import os
|
| 103 |
+
os.environ["SHADOW_MOCK_MODE"] = "true"
|
| 104 |
+
generator = SyntheticDataGenerator()
|
| 105 |
+
# Test generation for a single category
|
| 106 |
+
print("Testing Synthetic Data Generation (M-Pesa Reversal)")
|
| 107 |
+
data = generator.generate_category_dataset("mpesa_reversal", count=2)
|
| 108 |
+
print(json.dumps(data, indent=2))
|
requirements.txt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
openai
|
| 3 |
+
python-dotenv
|