File size: 3,932 Bytes
519679e
 
 
 
 
 
 
 
 
 
64d98c0
519679e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4430286
 
519679e
 
 
 
 
 
 
 
 
 
 
 
 
 
c77b701
 
 
 
 
 
4e44cb6
c77b701
 
 
519679e
c77b701
519679e
c77b701
519679e
 
 
 
 
 
 
6507e25
519679e
6507e25
519679e
086b1fc
d4a0860
519679e
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import gradio as gr
import pandas as pd
import numpy as np
import re
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sklearn.ensemble import RandomForestClassifier

# ======================
# Load dataset
# ======================
df = pd.read_csv("scam_features.csv")

feature_cols = [
    "urgency_count", "suspicious_count", "link_count",
    "has_money", "msg_length", "caps_ratio",
    "exclamation_count", "sentiment_score"
]

X = df[feature_cols]
y = df["risk_level"]

# ======================
# Train model
# ======================
model = RandomForestClassifier(
    n_estimators=100,
    max_depth=10,
    class_weight="balanced",
    random_state=42
)
model.fit(X, y)

# ======================
# Feature logic
# ======================
analyzer = SentimentIntensityAnalyzer()

URGENCY_WORDS = ["urgent", "act now", "hurry", "limited time", "expires", "immediately",
                 "today only", "last chance", "asap", "quick", "now"]

SUSPICIOUS_KEYWORDS = ["bank", "password", "verify", "account", "login", "click",
                       "winner", "prize", "congratulations", "claim", "free",
                       "cash", "loan", "refund"]

MONEY_PATTERNS = [r"\$", r"€", r"£", r"\bfree\b", r"\bprize\b", r"\bcash\b"]

def extract_features(text):
    text = str(text)
    text_low = text.lower()

    letters = [c for c in text if c.isalpha()]
    caps = sum(1 for c in letters if c.isupper()) / len(letters) if letters else 0

    return {
        "urgency_count": sum(1 for w in URGENCY_WORDS if w in text_low),
        "suspicious_count": sum(1 for w in SUSPICIOUS_KEYWORDS if w in text_low),
        "link_count": len(re.findall(r"(http|www|\.com|\.net|\.org)", text_low)),
        "has_money": int(any(re.search(p, text_low) for p in MONEY_PATTERNS)),
        "msg_length": len(text),
        "caps_ratio": caps,
        "exclamation_count": text.count("!"),
        "sentiment_score": analyzer.polarity_scores(text)["compound"]
    }

# ======================
# Main function
# ======================
def analyze_message(text):
    if not text.strip():
        return "Please enter a message.", "", ""

    feats = extract_features(text)
    X_new = pd.DataFrame([feats])[feature_cols]

    pred = model.predict(X_new)[0]
    proba = model.predict_proba(X_new)[0]
    confidence = max(proba)

    red_flags = []
    if feats["suspicious_count"] >= 2:
        red_flags.append("suspicious keywords detected")
    if "won" in text.lower() or "click" in text.lower():
        pred = "high"
    if feats["urgency_count"] >= 1:
        red_flags.append("urgency language detected")
    if feats["link_count"] >= 1:
        red_flags.append("link detected")
    if feats["has_money"]:
        red_flags.append("money-related content")
    if feats["caps_ratio"] > 0.3:
        red_flags.append("excessive capital letters")
    if feats["exclamation_count"] >= 3:
        red_flags.append("too many exclamation marks")

    if not red_flags:
        red_flags = ["no obvious red flags"]

    if pred == "high":
        risk_display = "🔴 HIGH RISK"
    elif pred == "medium":
        risk_display = "🟠 MEDIUM RISK"
    else:
        risk_display = "🟢 LOW RISK"

    summary = f"{risk_display}"
    confidence_text = f"Confidence: {confidence:.0%}"
    flags_text = "\n".join([f"⚠️ {flag}" for flag in red_flags])

    return summary, confidence_text, flags_text

      
# ======================
# Interface
# ======================
demo = gr.Interface(
    fn=analyze_message,
    inputs=gr.Textbox(lines=6, placeholder="Paste a suspicious SMS or email here..."),
    outputs=[
        gr.Textbox(label="Risk Assessment"),
        gr.Textbox(label="Confidence"),
        gr.Textbox(label="Scam Indicators")
    ],
    title="AI Scam Detection Tool",
    description="Assess scam messages using machine learning and risk indicators to support fraud prevention."
)

demo.launch()