Spaces:

Gankit12
/

scam

Sleeping

App Files Files Community

scam / app /models /detector.py

Gankit12

Evaluation fixes: camelCase response, engagementMetrics, 15 scam types, intel extraction, callback triggers

66baff0 about 1 month ago

raw

history blame contribute delete

21.7 kB

	"""
	Scam Detection Module using IndicBERT.

	Provides hybrid scam detection combining:
	- IndicBERT transformer model for semantic classification
	- Keyword matching for known scam patterns
	- Multi-language support (English, Hindi, Hinglish)

	Acceptance Criteria:
	- AC-1.2.1: Achieves >90% accuracy on test dataset
	- AC-1.2.2: False positive rate <5%
	- AC-1.2.3: Inference time <500ms per message
	- AC-1.2.4: Handles messages up to 5000 characters
	- AC-1.2.5: Returns calibrated confidence scores (not just 0/1)
	"""

	import os
	import re
	import time
	from typing import Dict, List, Optional, Tuple

	import torch

	from app.config import settings
	from app.utils.logger import get_logger
	from app.utils.preprocessing import clean_text, convert_devanagari_digits

	logger = get_logger(__name__)

	# Score combination weights
	# When BERT is fine-tuned, use higher BERT weight
	# When using base BERT (not fine-tuned), rely more on keywords
	BERT_WEIGHT_FINETUNED = 0.6
	BERT_WEIGHT_BASE = 0.2 # Lower weight for non-fine-tuned BERT
	KEYWORD_WEIGHT_FINETUNED = 0.4
	KEYWORD_WEIGHT_BASE = 0.8 # Higher weight when BERT is not fine-tuned

	# Scam detection threshold
	SCAM_THRESHOLD = 0.6 # Lowered from 0.7 for better recall

	# Maximum message length for processing
	MAX_MESSAGE_LENGTH = 5000


	class ScamDetector:
	"""
	Hybrid scam detection using IndicBERT and keyword matching.

	Combines transformer-based semantic analysis with rule-based
	keyword matching for robust scam detection across English,
	Hindi, and Hinglish messages.

	Attributes:
	model: IndicBERT model for sequence classification
	tokenizer: Tokenizer for IndicBERT
	en_keywords: English scam keyword list
	hi_keywords: Hindi scam keyword list
	_model_loaded: Flag indicating if BERT model is available
	"""

	# Class-level model cache for singleton pattern
	_cached_model = None
	_cached_tokenizer = None
	_model_load_attempted = False

	def __init__(self, load_model: bool = True) -> None:
	"""
	Initialize the ScamDetector with IndicBERT model and keywords.

	Args:
	load_model: Whether to load the BERT model (can be False for testing)
	"""
	self._model_loaded = False
	self._model_finetuned = False # Track if model is fine-tuned for scam detection
	self.model = None
	self.tokenizer = None

	# English scam keywords (comprehensive list)
	self.en_keywords: List[str] = [
	# Prize/Lottery scams
	"won", "winner", "prize", "lottery", "congratulations", "claim",
	"selected", "lucky", "reward", "jackpot", "lakh", "crore",
	# Financial scams
	"otp", "bank", "account", "transfer", "payment", "upi",
	"verify", "blocked", "suspended", "deactivated", "kyc",
	"credit card", "debit card", "cvv", "pin",
	# Authority impersonation
	"police", "arrest", "court", "legal", "investigation",
	"warrant", "fine", "penalty", "department",
	# Utility/Bill scams
	"electricity", "electric bill", "power bill", "power cut",
	"disconnection", "utility", "gas bill", "water bill",
	"pending bill", "overdue", "outstanding dues",
	# Job/Employment scams
	"job offer", "work from home", "earn from home", "hiring",
	"salary", "employment opportunity",
	# Tax scams
	"income tax", "tax notice", "tax department", "it department",
	# Tech support scams
	"tech support", "computer virus", "microsoft support",
	# Government scheme scams
	"government scheme", "subsidy", "pm scheme", "govt scheme",
	# Urgency triggers
	"urgent", "immediately", "now", "today", "expire", "last chance",
	"limited time", "hurry", "before", "deadline",
	# Action requests
	"click", "call", "send", "share", "confirm", "update",
	"reactivate", "unblock", "incomplete",
	# Product scams
	"iphone", "samsung", "free", "gift",
	]

	# Hindi scam keywords (Devanagari)
	self.hi_keywords: List[str] = [
	# Prize/Lottery
	"जीत", "जीता", "जीते", "विजेता", "इनाम", "लॉटरी", "बधाई", "पुरस्कार",
	# Financial
	"ओटीपी", "बैंक", "खाता", "ट्रांसफर", "भुगतान", "यूपीआई",
	"वेरिफाई", "ब्लॉक", "सस्पेंड", "बंद",
	# Authority
	"पुलिस", "गिरफ्तार", "गिरफ्तारी", "कोर्ट", "कानूनी", "जांच",
	"वारंट", "जुर्माना",
	# Urgency
	"तुरंत", "अभी", "आज", "जल्दी", "फौरन",
	# Action
	"भेजें", "शेयर", "कॉल", "क्लिक",
	]

	# Romanized Hindi keywords (Hinglish)
	self.hinglish_keywords: List[str] = [
	"jeeta", "jeete", "jeet", "inaam", "lottery",
	"otp", "bank", "account", "paisa", "paise", "rupees", "rupaye",
	"police", "giraftar", "arrest", "court",
	"turant", "abhi", "jaldi", "foran",
	"bhejo", "share", "call", "click",
	]

	# Scam patterns (regex)
	self.scam_patterns = [
	r"₹\s\d+\s(lakh\|crore\|lac\|cr)", # Money amounts
	r"\d+\s(lakh\|crore\|lac\|cr)\s(rupees?)?", # Money amounts
	r"won\s+.*?(prize\|lottery\|reward)", # Prize winning
	r"(send\|share)\s+.*?otp", # OTP requests
	r"account\s+.*?(block\|suspend\|deactivat)", # Account threats
	r"(arrest\|गिरफ्तार)", # Arrest threats
	r"call\s+.*?\+?91[\s-]?\d{10}", # Call with phone number
	]

	# Load BERT model if requested
	if load_model:
	self._load_model()

	# Class-level flag for fine-tuned model
	_cached_model_finetuned = False

	def _load_model(self) -> None:
	"""
	Load IndicBERT model and tokenizer.

	Prioritizes loading fine-tuned model from local directory.
	Falls back to base IndicBERT model from HuggingFace.
	Falls back to keyword-only detection if model unavailable.
	"""
	# Use cached model if available
	if ScamDetector._cached_model is not None:
	self.model = ScamDetector._cached_model
	self.tokenizer = ScamDetector._cached_tokenizer
	self._model_loaded = True
	self._model_finetuned = ScamDetector._cached_model_finetuned
	logger.debug(f"Using cached model (fine-tuned: {self._model_finetuned})")
	return

	# Skip if already attempted and failed
	if ScamDetector._model_load_attempted:
	logger.debug("Skipping model load (previous attempt failed)")
	return

	ScamDetector._model_load_attempted = True

	try:
	from transformers import AutoModel, AutoModelForSequenceClassification, AutoTokenizer

	# First, try to load fine-tuned model from local directory
	finetuned_path = os.path.join(
	os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
	"models",
	"scam_detector",
	"latest"
	)

	if os.path.exists(finetuned_path):
	logger.info(f"Loading fine-tuned model from: {finetuned_path}")
	start_time = time.time()

	self.tokenizer = AutoTokenizer.from_pretrained(finetuned_path)
	self.model = AutoModelForSequenceClassification.from_pretrained(finetuned_path)
	self.model.eval()
	self._model_finetuned = True

	# Cache for future instances
	ScamDetector._cached_model = self.model
	ScamDetector._cached_tokenizer = self.tokenizer
	ScamDetector._cached_model_finetuned = True

	load_time = time.time() - start_time
	logger.info(f"Fine-tuned model loaded in {load_time:.2f}s")
	self._model_loaded = True
	return

	# Fall back to base IndicBERT model
	model_name = settings.INDICBERT_MODEL
	token = settings.HUGGINGFACE_TOKEN
	token_kwargs = {"token": token} if token else {}

	logger.info(f"Loading base IndicBERT model: {model_name}")
	start_time = time.time()

	self.tokenizer = AutoTokenizer.from_pretrained(model_name, **token_kwargs)
	self.model = AutoModel.from_pretrained(model_name, **token_kwargs)
	self.model.eval()
	self._model_finetuned = False

	# Cache for future instances
	ScamDetector._cached_model = self.model
	ScamDetector._cached_tokenizer = self.tokenizer
	ScamDetector._cached_model_finetuned = False

	load_time = time.time() - start_time
	logger.info(f"Base IndicBERT loaded in {load_time:.2f}s")
	self._model_loaded = True

	except ImportError as e:
	logger.warning(f"transformers not installed: {e}")
	logger.warning("Falling back to keyword-only detection")
	except Exception as e:
	error_msg = str(e).lower()
	if "gated" in error_msg or "access" in error_msg:
	logger.warning("IndicBERT requires HuggingFace authentication")
	logger.warning("Set HUGGINGFACE_TOKEN environment variable")
	else:
	logger.warning(f"Failed to load IndicBERT: {e}")
	logger.warning("Falling back to keyword-only detection")

	def detect(self, message: str, language: str = "auto") -> Dict:
	"""
	Detect if a message is a scam.

	Uses hybrid approach combining:
	1. IndicBERT semantic classification (60% weight)
	2. Keyword matching (40% weight)

	Args:
	message: Input text to analyze (max 5000 chars)
	language: Language code ('auto', 'en', 'hi', 'hinglish')

	Returns:
	Dict containing:
	- scam_detected: bool (True if confidence > 0.7)
	- confidence: float (0.0-1.0)
	- language: str (detected or provided language)
	- indicators: List[str] (matched keywords/patterns)
	"""
	start_time = time.time()

	# Handle empty message
	if not message or not message.strip():
	logger.debug("Empty message, returning not scam")
	return {
	"scam_detected": False,
	"confidence": 0.0,
	"language": language if language != "auto" else "en",
	"indicators": [],
	}

	# Clean and truncate message
	message = clean_text(message)
	if len(message) > MAX_MESSAGE_LENGTH:
	message = message[:MAX_MESSAGE_LENGTH]
	logger.debug(f"Message truncated to {MAX_MESSAGE_LENGTH} chars")

	# Detect language if auto
	detected_language = language
	if language == "auto":
	from app.models.language import detect_language
	detected_language, _ = detect_language(message)

	# Calculate keyword score
	keyword_score, indicators = self._keyword_match(message, detected_language)

	# Calculate BERT score (if model available)
	if self._model_loaded:
	bert_score = self._bert_classify(message)
	# Use appropriate weights based on whether BERT is fine-tuned
	if self._model_finetuned:
	final_confidence = BERT_WEIGHT_FINETUNED * bert_score + KEYWORD_WEIGHT_FINETUNED * keyword_score
	else:
	# Non-fine-tuned BERT: rely more on keywords
	final_confidence = BERT_WEIGHT_BASE * bert_score + KEYWORD_WEIGHT_BASE * keyword_score
	else:
	# Keyword-only fallback
	final_confidence = keyword_score

	# Check pattern matches for additional indicators
	pattern_indicators = self._pattern_match(message)
	indicators.extend(pattern_indicators)

	# Boost confidence if strong pattern matches found
	if pattern_indicators:
	pattern_boost = min(len(pattern_indicators) * 0.1, 0.2)
	final_confidence = min(1.0, final_confidence + pattern_boost)

	# Determine if scam
	scam_detected = final_confidence >= SCAM_THRESHOLD

	# Log detection
	elapsed_ms = (time.time() - start_time) * 1000
	logger.debug(
	f"Detection: scam={scam_detected}, conf={final_confidence:.2f}, "
	f"lang={detected_language}, time={elapsed_ms:.0f}ms"
	)

	return {
	"scam_detected": scam_detected,
	"confidence": float(round(final_confidence, 4)),
	"language": detected_language,
	"indicators": list(set(indicators)), # Remove duplicates
	}

	def _keyword_match(self, message: str, language: str) -> Tuple[float, List[str]]:
	"""
	Calculate keyword-based scam score.

	Args:
	message: Input text
	language: Language code ('en', 'hi', 'hinglish')

	Returns:
	Tuple of (score, matched_keywords)
	Score is normalized to 0.0-1.0
	"""
	# Convert message to lowercase and normalize Devanagari digits
	message_lower = message.lower()
	message_normalized = convert_devanagari_digits(message_lower)

	matched_keywords = []

	# Check English keywords (always check for code-mixing)
	for kw in self.en_keywords:
	if kw.lower() in message_lower:
	matched_keywords.append(kw)

	# Check Hindi keywords if language suggests Hindi content
	if language in ["hi", "hinglish"] or self._has_devanagari(message):
	for kw in self.hi_keywords:
	if kw in message:
	matched_keywords.append(kw)

	# Check Hinglish/romanized keywords
	for kw in self.hinglish_keywords:
	if kw in message_lower:
	matched_keywords.append(kw)

	# Calculate score based on number of matches
	# More keywords = higher confidence, with diminishing returns
	match_count = len(set(matched_keywords))

	if match_count == 0:
	score = 0.0
	elif match_count == 1:
	score = 0.3
	elif match_count == 2:
	score = 0.5
	elif match_count == 3:
	score = 0.7
	elif match_count == 4:
	score = 0.85
	else:
	score = min(0.95, 0.85 + (match_count - 4) * 0.02)

	return score, matched_keywords

	def _bert_classify(self, message: str) -> float:
	"""
	Classify message using BERT model.

	If model is fine-tuned for sequence classification, uses direct prediction.
	Otherwise, uses embedding-based heuristic approach.

	Args:
	message: Input text

	Returns:
	Scam probability between 0.0 and 1.0
	"""
	if not self._model_loaded:
	return 0.0

	try:
	# Tokenize with truncation
	inputs = self.tokenizer(
	message,
	return_tensors="pt",
	truncation=True,
	max_length=512,
	padding=True,
	)

	with torch.no_grad():
	outputs = self.model(**inputs)

	# Fine-tuned model: use logits directly
	if self._model_finetuned and hasattr(outputs, 'logits'):
	logits = outputs.logits
	probs = torch.softmax(logits, dim=-1)
	# Return probability of class 1 (scam)
	scam_prob = probs[0, 1].item()
	return scam_prob

	# Base model: use embedding-based heuristic
	# Get mean pooled embedding
	# Shape: [batch_size, seq_len, hidden_size]
	last_hidden = outputs.last_hidden_state

	# Mean pooling over sequence length
	attention_mask = inputs["attention_mask"]
	mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden.size()).float()
	sum_embeddings = torch.sum(last_hidden * mask_expanded, dim=1)
	sum_mask = torch.clamp(mask_expanded.sum(dim=1), min=1e-9)
	embeddings = sum_embeddings / sum_mask

	# Calculate embedding magnitude as a proxy for unusual content
	# Scam messages often have unusual patterns
	embedding_norm = torch.norm(embeddings, dim=-1).item()

	# Normalize to 0-1 range (empirically calibrated)
	# Higher norm often indicates more unusual/emphatic content
	normalized_score = min(1.0, max(0.0, (embedding_norm - 5.0) / 15.0))

	return normalized_score

	except Exception as e:
	logger.warning(f"BERT classification error: {e}")
	return 0.0

	def _pattern_match(self, message: str) -> List[str]:
	"""
	Match scam patterns using regex.

	Args:
	message: Input text

	Returns:
	List of matched pattern descriptions
	"""
	matched_patterns = []
	message_lower = message.lower()

	for pattern in self.scam_patterns:
	try:
	if re.search(pattern, message_lower, re.IGNORECASE):
	# Add a descriptive indicator based on pattern
	if "lakh" in pattern or "crore" in pattern:
	matched_patterns.append("money_amount")
	elif "prize" in pattern or "lottery" in pattern:
	matched_patterns.append("prize_winning")
	elif "otp" in pattern:
	matched_patterns.append("otp_request")
	elif "block" in pattern or "suspend" in pattern:
	matched_patterns.append("account_threat")
	elif "arrest" in pattern or "गिरफ्तार" in pattern:
	matched_patterns.append("arrest_threat")
	elif "call" in pattern:
	matched_patterns.append("phone_number")
	except re.error as e:
	logger.warning(f"Regex error for pattern {pattern}: {e}")

	return matched_patterns

	def _extract_indicators(self, message: str, language: str) -> List[str]:
	"""
	Extract scam indicators found in message.

	Args:
	message: Input text
	language: Language code

	Returns:
	List of matched keywords/indicators
	"""
	_, indicators = self._keyword_match(message, language)
	pattern_indicators = self._pattern_match(message)
	indicators.extend(pattern_indicators)
	return list(set(indicators))

	def _has_devanagari(self, text: str) -> bool:
	"""Check if text contains Devanagari characters."""
	return any("\u0900" <= char <= "\u097F" for char in text)


	def detect_scam(message: str, language: str = "auto") -> Tuple[bool, float, List[str]]:
	"""
	Convenience function for scam detection.

	Args:
	message: Input text
	language: Language code ('auto', 'en', 'hi', 'hinglish')

	Returns:
	Tuple of (scam_detected, confidence, indicators)
	"""
	# Use singleton pattern for efficiency
	if not hasattr(detect_scam, "_detector"):
	detect_scam._detector = ScamDetector()

	result = detect_scam._detector.detect(message, language)
	return result["scam_detected"], result["confidence"], result["indicators"]


	def reset_detector_cache() -> None:
	"""
	Reset the detector model cache.

	Useful for testing or when model needs to be reloaded.
	"""
	global _singleton_detector
	ScamDetector._cached_model = None
	ScamDetector._cached_tokenizer = None
	ScamDetector._model_load_attempted = False
	if hasattr(detect_scam, "_detector"):
	delattr(detect_scam, "_detector")
	_singleton_detector = None
	logger.info("Detector cache reset")


	# Singleton detector instance
	_singleton_detector: Optional[ScamDetector] = None


	def get_detector() -> ScamDetector:
	"""
	Get singleton ScamDetector instance.

	Returns:
	ScamDetector instance
	"""
	global _singleton_detector
	if _singleton_detector is None:
	_singleton_detector = ScamDetector()
	return _singleton_detector