Spaces:

teoat
/

zenith-backend

Paused

App Files Files Community

zenith-backend / app /services /intelligence /forensic_intelligence.py

teoat

Upload folder using huggingface_hub

4ae946d verified 3 months ago

raw

history blame contribute delete

26.7 kB

	"""
	Forensic Intelligence Service - Real Implementation
	Provides advanced forensic analysis capabilities for fraud investigation.
	"""

	import hashlib
	import logging
	from collections import defaultdict
	from datetime import datetime
	from typing import Any

	logger = logging.getLogger(__name__)


	class TriangulationEngine:
	"""
	Probabilistic unmasking of redacted transaction names using multi-source triangulation.
	Ref: VISION_10_10 Section 6
	"""

	def __init__(self, db_session):
	self.db = db_session
	self._vendor_cache: dict[str, dict] = {}

	async def unmask_redaction(
	self, transaction_id: str, masked_name: str
	) -> dict[str, Any]:
	"""
	Attempts to resolve '*' or partial names by looking at:
	1. Similar amounts in other transactions.
	2. Merchant frequency for the specific account.
	3. External metadata linkages.

	Args:
	transaction_id: Transaction ID to unmask
	masked_name: The masked/redacted merchant name

	Returns:
	Unmasking results with confidence scores
	"""
	logger.info(
	f"Triangulating redacted merchant for txn {transaction_id}: {masked_name}"
	)

	try:
	# Get transaction details
	transaction = await self._get_transaction(transaction_id)
	if not transaction:
	return {
	"transaction_id": transaction_id,
	"original_masked": masked_name,
	"resolved_name": None,
	"confidence_score": 0.0,
	"error": "Transaction not found",
	}

	suggestions = []

	# Strategy 1: Amount-Pattern Match
	amount_matches = await self._find_by_amount(transaction.get("amount", 0))
	for match in amount_matches:
	if match.get("merchant") and "*" not in match["merchant"]:
	suggestions.append(
	{
	"candidate": match["merchant"],
	"confidence": min(
	0.7 + (0.1 * match.get("frequency", 1)), 0.95
	),
	"source": "Amount-Pattern Match",
	}
	)

	# Strategy 2: Account History Sync
	history_matches = await self._find_by_account_history(
	transaction.get("account_id"), masked_name
	)
	for match in history_matches:
	suggestions.append(
	{
	"candidate": match["merchant"],
	"confidence": min(0.8 + (0.05 * match.get("count", 1)), 0.95),
	"source": "Account-History Sync",
	}
	)

	# Strategy 3: Pattern Recognition
	if masked_name and "*" in masked_name:
	pattern_matches = self._match_partial_pattern(masked_name)
	suggestions.extend(pattern_matches)

	# Deduplicate and sort by confidence
	seen = set()
	unique_suggestions = []
	for s in sorted(suggestions, key=lambda x: x["confidence"], reverse=True):
	if s["candidate"] not in seen:
	seen.add(s["candidate"])
	unique_suggestions.append(s)

	best_match = unique_suggestions[0] if unique_suggestions else None

	return {
	"transaction_id": transaction_id,
	"original_masked": masked_name,
	"resolved_name": best_match["candidate"] if best_match else None,
	"confidence_score": best_match["confidence"] if best_match else 0.0,
	"all_candidates": unique_suggestions[:5],
	"triangulation_logic": list({s["source"] for s in unique_suggestions}),
	"timestamp": datetime.now().isoformat(),
	}

	except Exception as e:
	logger.error(f"Triangulation failed for {transaction_id}: {e}")
	return {
	"transaction_id": transaction_id,
	"original_masked": masked_name,
	"resolved_name": None,
	"confidence_score": 0.0,
	"error": str(e),
	}

	async def _get_transaction(self, transaction_id: str) -> dict \| None:
	"""Get transaction from database."""
	try:
	from core.database import Transaction

	txn = self.db.query(Transaction).filter_by(id=transaction_id).first()
	if txn:
	return {
	"id": str(txn.id),
	"amount": txn.amount,
	"account_id": getattr(txn, "account_id", None),
	"merchant": getattr(txn, "merchant", None),
	}
	except Exception:
	pass
	return None

	async def _find_by_amount(self, amount: float) -> list[dict]:
	"""Find transactions with similar amounts."""
	try:
	from core.database import Transaction

	tolerance = amount * 0.05 # 5% tolerance

	matches = (
	self.db.query(Transaction)
	.filter(
	Transaction.amount.between(amount - tolerance, amount + tolerance)
	)
	.limit(50)
	.all()
	)

	# Group by merchant
	merchant_counts = defaultdict(int)
	for m in matches:
	if hasattr(m, "merchant") and m.merchant:
	merchant_counts[m.merchant] += 1

	return [
	{"merchant": k, "frequency": v}
	for k, v in sorted(
	merchant_counts.items(), key=lambda x: x[1], reverse=True
	)
	]
	except Exception:
	return []

	async def _find_by_account_history(
	self, account_id: str, masked: str
	) -> list[dict]:
	"""Find matches from account transaction history."""
	if not account_id:
	return []
	try:
	from core.database import Transaction

	history = (
	self.db.query(Transaction)
	.filter_by(account_id=account_id)
	.limit(100)
	.all()
	)

	# Find unmasked merchants
	merchant_counts = defaultdict(int)
	for t in history:
	if hasattr(t, "merchant") and t.merchant and "*" not in t.merchant:
	merchant_counts[t.merchant] += 1

	return [
	{"merchant": k, "count": v}
	for k, v in sorted(
	merchant_counts.items(), key=lambda x: x[1], reverse=True
	)[:5]
	]
	except Exception:
	return []

	def _match_partial_pattern(self, masked: str) -> list[dict]:
	"""Match partial patterns against known vendors."""
	# Common vendor patterns
	known_patterns = {
	"AMZN*": "Amazon",
	"AMAZON*": "Amazon",
	"APPLE*": "Apple",
	"GOOGLE*": "Google",
	"PAYPAL*": "PayPal",
	"SQ *": "Square",
	"UBER*": "Uber",
	"LYFT*": "Lyft",
	"DOORDASH*": "DoorDash",
	}

	results = []
	masked_upper = masked.upper()

	for pattern, vendor in known_patterns.items():
	if masked_upper.startswith(pattern.replace("*", "")):
	results.append(
	{
	"candidate": vendor,
	"confidence": 0.85,
	"source": "Pattern Recognition",
	}
	)

	return results


	class LIBRAlgorithm:
	"""
	Lowest Intermediate Balance Rule (LIBR) for tracking mixed funds.
	Used to detect illicit float in personal/business accounts.
	Ref: VISION_10_10 Section 6
	"""

	def __init__(self, db_session):
	self.db = db_session

	def analyze_mixed_funds(
	self,
	account_id: str,
	start_date: datetime,
	end_date: datetime,
	suspected_illicit_deposits: list[str] \| None = None,
	) -> dict[str, Any]:
	"""
	Applies LIBR to distinguish between legitimate funds and illicit injections.

	The LIBR principle: The lowest balance reached between an illicit deposit
	and a subsequent withdrawal represents the maximum illicit funds in that withdrawal.

	Args:
	account_id: Account to analyze
	start_date: Analysis start date
	end_date: Analysis end date
	suspected_illicit_deposits: Optional list of suspected illicit deposit IDs

	Returns:
	LIBR analysis results
	"""
	logger.info(f"Applying LIBR Algorithm to account {account_id}")

	try:
	# Get chronological transactions
	transactions = self._get_transactions(account_id, start_date, end_date)

	if not transactions:
	return {
	"account_id": account_id,
	"status": "NO_DATA",
	"message": "No transactions found",
	}

	# Calculate running balance
	running_balance = 0.0
	balance_history = []

	for txn in transactions:
	running_balance += txn["amount"]
	balance_history.append(
	{
	"date": txn["date"],
	"amount": txn["amount"],
	"balance": running_balance,
	"is_suspected": txn["id"] in (suspected_illicit_deposits or []),
	}
	)

	# Find minimum intermediate balances after suspected deposits
	libr_violations = []
	illicit_float = 0.0

	for i, entry in enumerate(balance_history):
	if entry["is_suspected"] and entry["amount"] > 0:
	# Find lowest balance after this deposit
	min_balance = entry["balance"]
	for j in range(i + 1, len(balance_history)):
	if balance_history[j]["balance"] < min_balance:
	min_balance = balance_history[j]["balance"]

	# LIBR violation if withdrawal occurred
	max_illicit = min(
	entry["amount"], max(0, entry["balance"] - min_balance)
	)
	if max_illicit > 0:
	libr_violations.append(
	{
	"deposit_date": str(entry["date"]),
	"deposit_amount": entry["amount"],
	"min_subsequent_balance": min_balance,
	"max_illicit_withdrawn": max_illicit,
	}
	)
	illicit_float += max_illicit

	# Calculate commingling ratio
	total_deposits = sum(t["amount"] for t in transactions if t["amount"] > 0)
	commingling_ratio = (
	illicit_float / total_deposits if total_deposits > 0 else 0
	)

	# Determine risk status
	if commingling_ratio > 0.5:
	status = "HIGH_RISK"
	elif commingling_ratio > 0.2:
	status = "MEDIUM_RISK"
	elif libr_violations:
	status = "LOW_RISK"
	else:
	status = "CLEAN"

	return {
	"account_id": account_id,
	"period": f"{start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}",
	"commingling_ratio": round(commingling_ratio, 2),
	"illicit_float_detected": round(illicit_float, 2),
	"libr_violation_count": len(libr_violations),
	"status": status,
	"violations": libr_violations[:10],
	"findings": self._generate_findings(commingling_ratio, libr_violations),
	"analyzed_at": datetime.now().isoformat(),
	}

	except Exception as e:
	logger.error(f"LIBR analysis failed: {e}")
	return {"account_id": account_id, "status": "ERROR", "error": str(e)}

	def _get_transactions(
	self, account_id: str, start_date: datetime, end_date: datetime
	) -> list[dict]:
	"""Get transactions from database."""
	try:
	from core.database import Transaction

	txns = (
	self.db.query(Transaction)
	.filter(
	Transaction.account_id == account_id,
	Transaction.created_at.between(start_date, end_date),
	)
	.order_by(Transaction.created_at)
	.all()
	)

	return [
	{"id": str(t.id), "amount": t.amount, "date": t.created_at}
	for t in txns
	]
	except Exception:
	return []

	def _generate_findings(self, ratio: float, violations: list[dict]) -> str:
	"""Generate human-readable findings."""
	if not violations:
	return "No LIBR violations detected. Funds appear segregated."

	if ratio > 0.5:
	return f"High commingling detected ({ratio * 100:.0f}%). Multiple illicit deposits followed by withdrawals before balance separation."
	elif ratio > 0.2:
	return f"Moderate commingling ({ratio * 100:.0f}%). Some potential structuring activity detected."
	else:
	return f"Low commingling ({ratio * 100:.0f}%). Minor LIBR violations detected but funds largely traceable."


	class MensReaEngine:
	"""
	Theory of Intent (Mens Rea) Engine.
	AI classifiers that map evidence to Knowledge, Intent, or Willful Blindness.
	Ref: VISION_10_10 Section 5
	"""

	def __init__(self, ai_service=None):
	self.ai_service = ai_service
	self.legal_lexicon = {
	"avoidance": [
	"bypass",
	"limit",
	"threshold",
	"split",
	"smurf",
	"avoid",
	"circumvent",
	"evade",
	"dodge",
	"structure",
	],
	"obfuscation": [
	"hide",
	"mask",
	"proxy",
	"nominee",
	"shell",
	"offshore",
	"anonymous",
	"conceal",
	"disguise",
	"launder",
	],
	"knowledge": [
	"aware",
	"understand",
	"policy",
	"regulation",
	"illegal",
	"know",
	"recognize",
	"acknowledge",
	"realize",
	"compliance",
	],
	"planning": [
	"plan",
	"schedule",
	"arrange",
	"organize",
	"coordinate",
	"timing",
	"sequence",
	"prepare",
	"strategy",
	],
	}

	self.intent_weights = {
	"knowledge": 0.25,
	"avoidance": 0.35,
	"obfuscation": 0.30,
	"planning": 0.10,
	}

	async def attribute_intent(self, evidence_id: str, content: str) -> dict[str, Any]:
	"""
	Analyzes text/metadata to classify legal intent with detailed justification.

	Args:
	evidence_id: Evidence ID being analyzed
	content: Text content to analyze

	Returns:
	Intent analysis with legal theory mapping
	"""
	logger.info(f"Running Advanced Mens Rea analysis on evidence {evidence_id}")

	if not content:
	return {"evidence_id": evidence_id, "error": "No content provided"}

	content_lower = content.lower()

	# Find all markers
	found_markers = []
	category_scores = {}

	for category, keywords in self.legal_lexicon.items():
	matches = [kw for kw in keywords if kw in content_lower]
	if matches:
	found_markers.append(
	{"category": category, "keywords": matches, "count": len(matches)}
	)
	# Score based on match density
	category_scores[category] = min(len(matches) / 3, 1.0)

	# Calculate weighted intent scores
	intent_scores = {
	"KNOWLEDGE": 0.0,
	"INTENT": 0.0,
	"WILLFUL_BLINDNESS": 0.0,
	"NEGLIGENCE": 0.0,
	}

	# Knowledge requires explicit awareness markers
	knowledge_markers = category_scores.get("knowledge", 0)
	intent_scores["KNOWLEDGE"] = min(0.3 + (knowledge_markers * 0.5), 0.95)

	# Intent requires avoidance + planning
	avoidance = category_scores.get("avoidance", 0)
	planning = category_scores.get("planning", 0)
	intent_scores["INTENT"] = min(0.2 + (avoidance * 0.4) + (planning * 0.3), 0.95)

	# Willful blindness = knowledge without action
	if knowledge_markers > 0.3 and avoidance < 0.2:
	intent_scores["WILLFUL_BLINDNESS"] = min(knowledge_markers * 0.7, 0.8)
	else:
	intent_scores["WILLFUL_BLINDNESS"] = 0.15

	# Negligence is default low
	intent_scores["NEGLIGENCE"] = max(0.1, 0.5 - max(intent_scores.values()))

	# Determine primary intent
	primary_intent = max(intent_scores, key=intent_scores.get)
	confidence = intent_scores[primary_intent]

	# Generate legal theory
	legal_theory = self._generate_legal_theory(primary_intent, found_markers)

	return {
	"evidence_id": evidence_id,
	"primary_intent": primary_intent,
	"confidence": round(confidence, 2),
	"justification": {
	"summary": self._generate_summary(primary_intent, found_markers),
	"evidence_markers": found_markers,
	"legal_theory": legal_theory,
	},
	"mens_rea_matrix": {k: round(v, 2) for k, v in intent_scores.items()},
	"litigation_readiness": (
	"HIGH"
	if confidence > 0.7
	else "MODERATE" if confidence > 0.5 else "LOW"
	),
	"admissibility_context": "Ref: Rule 403 (Probative value vs. Prejudice), Rule 404(b) (Prior acts)",
	"analyzed_at": datetime.now().isoformat(),
	}

	def _generate_summary(self, intent: str, markers: list[dict]) -> str:
	"""Generate analysis summary."""
	if not markers:
	return "Insufficient evidence markers for conclusive intent determination."

	[m["category"] for m in markers]

	if intent == "KNOWLEDGE":
	return "Evidence shows explicit awareness of regulatory requirements and potential violations."
	elif intent == "INTENT":
	return "Pattern of avoidance keywords combined with planning language suggests deliberate circumvention."
	elif intent == "WILLFUL_BLINDNESS":
	return "Subject demonstrated awareness without taking corrective action - potential willful blindness."
	else:
	return "No clear intentional misconduct detected. May represent negligence or oversight."

	def _generate_legal_theory(self, intent: str, markers: list[dict]) -> str:
	"""Generate legal theory mapping."""
	if intent == "INTENT":
	return "The proximity of 'avoidance' keywords to 'planning' language suggests a calculated attempt to circumvent AML controls. See United States v. MacPherson (intent inferred from pattern of conduct)."
	elif intent == "KNOWLEDGE":
	return "Explicit references to regulations and compliance requirements demonstrate actual knowledge. See Model Penal Code § 2.02(2)(b)."
	elif intent == "WILLFUL_BLINDNESS":
	return "Subject's awareness combined with failure to inquire further may constitute willful blindness. See Global-Tech Appliances v. SEB S.A."
	else:
	return "Standard of reasonable care analysis applies. See negligence elements under common law."


	class TemporalPairMatcher:
	"""
	Detects Mirror/Wash transactions (Equal & Opposite movements).
	Ref: VISION_10_10 Section 6 (Mirror Detection)
	"""

	def __init__(self, db_session):
	self.db = db_session

	async def find_mirror_pairs(
	self,
	account_id: str,
	threshold_seconds: int = 3600,
	amount_tolerance: float = 0.01,
	) -> list[dict[str, Any]]:
	"""
	Identifies pairs of transactions that appear to 'cancel' each other out
	to artificially inflate volume or hide fund source.

	Args:
	account_id: Account to analyze
	threshold_seconds: Time window for pair matching
	amount_tolerance: Percentage tolerance for amount matching

	Returns:
	List of detected mirror pairs
	"""
	logger.info(f"Running Mirror Detection for account {account_id}")

	try:
	transactions = await self._get_transactions(account_id)

	if len(transactions) < 2:
	return []

	pairs = []
	matched = set()

	for i, tx1 in enumerate(transactions):
	if tx1["id"] in matched:
	continue

	for tx2 in transactions[i + 1 :]:
	if tx2["id"] in matched:
	continue

	# Check if mirror pair
	is_mirror, score = self._check_mirror(
	tx1, tx2, threshold_seconds, amount_tolerance
	)

	if is_mirror:
	pairs.append(
	{
	"pair_id": f"pair_{hashlib.md5((tx1['id'] + tx2['id']).encode()).hexdigest()[:8]}",
	"tx1": {
	"id": tx1["id"],
	"amount": tx1["amount"],
	"type": "DEBIT" if tx1["amount"] < 0 else "CREDIT",
	"time": (
	tx1["date"].strftime("%H:%M:%S")
	if hasattr(tx1["date"], "strftime")
	else str(tx1["date"])
	),
	},
	"tx2": {
	"id": tx2["id"],
	"amount": tx2["amount"],
	"type": "DEBIT" if tx2["amount"] < 0 else "CREDIT",
	"time": (
	tx2["date"].strftime("%H:%M:%S")
	if hasattr(tx2["date"], "strftime")
	else str(tx2["date"])
	),
	},
	"score": round(score, 2),
	"time_gap_seconds": self._time_diff_seconds(
	tx1["date"], tx2["date"]
	),
	"label": (
	"Potential Wash Trade"
	if score > 0.9
	else "Suspicious Pair"
	),
	}
	)
	matched.add(tx1["id"])
	matched.add(tx2["id"])
	break # Move to next tx1

	# Sort by score
	pairs.sort(key=lambda x: x["score"], reverse=True)

	return pairs

	except Exception as e:
	logger.error(f"Mirror detection failed: {e}")
	return []

	async def _get_transactions(self, account_id: str) -> list[dict]:
	"""Get transactions from database."""
	try:
	from core.database import Transaction

	txns = (
	self.db.query(Transaction)
	.filter_by(account_id=account_id)
	.order_by(Transaction.created_at)
	.limit(500)
	.all()
	)

	return [
	{"id": str(t.id), "amount": t.amount, "date": t.created_at}
	for t in txns
	]
	except Exception:
	return []

	def _check_mirror(
	self, tx1: dict, tx2: dict, threshold_seconds: int, tolerance: float
	) -> tuple:
	"""Check if two transactions form a mirror pair."""
	# Amounts must be opposite (or very close)
	amt1, amt2 = abs(tx1["amount"]), abs(tx2["amount"])

	if amt1 == 0 or amt2 == 0:
	return False, 0.0

	amount_diff = abs(amt1 - amt2) / max(amt1, amt2)
	if amount_diff > tolerance:
	return False, 0.0

	# Must be opposite directions
	if (tx1["amount"] > 0) == (tx2["amount"] > 0):
	return False, 0.0

	# Time check
	time_diff = self._time_diff_seconds(tx1["date"], tx2["date"])
	if time_diff > threshold_seconds:
	return False, 0.0

	# Calculate score
	amount_score = 1 - amount_diff
	time_score = 1 - (time_diff / threshold_seconds)

	score = (amount_score * 0.6) + (time_score * 0.4)

	return score > 0.7, score

	def _time_diff_seconds(self, date1, date2) -> int:
	"""Calculate time difference in seconds."""
	try:
	if isinstance(date1, datetime) and isinstance(date2, datetime):
	return abs(int((date2 - date1).total_seconds()))
	except Exception:
	pass
	return 0


	# Global accessibility for services
	def get_forensic_intelligence(db):
	"""Factory function for forensic intelligence services."""
	return {
	"triangulation": TriangulationEngine(db),
	"libr": LIBRAlgorithm(db),
	"mens_rea": MensReaEngine(),
	"mirror_matcher": TemporalPairMatcher(db),
	}