email-phish-api / categorization.py
Rasel Santillan
Squashed clean history
c68555f
"""
Risk categorization module for phishing detection results.
This module provides functions to categorize phishing probability scores
into risk categories and binary classifications.
"""
from enum import Enum
from typing import Tuple
class RiskCategory(str, Enum):
"""Risk category based on phishing probability score."""
SAFE = "Safe"
LOW = "Low"
MODERATE = "Moderate"
HIGH = "High"
CRITICAL = "Critical"
class BinaryClassification(str, Enum):
"""Binary classification of phishing detection result."""
LEGITIMATE = "Legitimate"
PHISHING = "Phishing"
# Risk category thresholds (score is 0-100 scale)
RISK_THRESHOLDS = {
RiskCategory.SAFE: (0, 30), # 0-30%: score <= 30
RiskCategory.LOW: (30, 50), # 31-50%: 30 < score <= 50
RiskCategory.MODERATE: (50, 70), # 51-70%: 50 < score <= 70
RiskCategory.HIGH: (70, 85), # 71-85%: 70 < score <= 85
RiskCategory.CRITICAL: (85, 101), # 86-100%: score > 85
}
# Binary classification threshold
PHISHING_THRESHOLD = 70 # score > 70 is classified as Phishing
def get_risk_category(phish_probability_score: float) -> RiskCategory:
"""
Determine the risk category based on phishing probability score.
Thresholds:
- Safe: 0-30%
- Low: 31-50%
- Moderate: 51-70%
- High: 71-85%
- Critical: 86-100%
Args:
phish_probability_score: Phishing probability score (0-100 scale)
Returns:
RiskCategory: The corresponding risk category
"""
if phish_probability_score <= 30:
return RiskCategory.SAFE
elif phish_probability_score <= 50:
return RiskCategory.LOW
elif phish_probability_score <= 70:
return RiskCategory.MODERATE
elif phish_probability_score <= 85:
return RiskCategory.HIGH
else:
return RiskCategory.CRITICAL
def get_binary_classification(phish_probability_score: float) -> BinaryClassification:
"""
Determine the binary classification based on phishing probability score.
Args:
phish_probability_score: Phishing probability score (0-100 scale)
Returns:
BinaryClassification: Legitimate if score <= 70, Phishing otherwise
"""
if phish_probability_score <= PHISHING_THRESHOLD:
return BinaryClassification.LEGITIMATE
else:
return BinaryClassification.PHISHING
def categorize_phishing_result(phish_probability: float) -> Tuple[RiskCategory, BinaryClassification, float]:
"""
Categorize a phishing detection result.
This function takes a phishing probability (0-1 scale) and returns:
- Risk category (Safe, Low, Moderate, Dangerous, Critical)
- Binary classification (Legitimate or Phishing)
- The probability score on a 0-100 scale
Args:
phish_probability: Phishing probability from the model (0-1 scale)
Returns:
Tuple containing:
- RiskCategory: The risk category
- BinaryClassification: The binary classification
- float: The probability score on 0-100 scale
"""
# Convert from 0-1 scale to 0-100 scale
score_100 = phish_probability * 100
risk_category = get_risk_category(score_100)
binary_classification = get_binary_classification(score_100)
return risk_category, binary_classification, score_100