Spaces:
Runtime error
Runtime error
| import re | |
| import tldextract | |
| from rapidfuzz import fuzz | |
| import gradio as gr | |
| # --- Labels & Regex --- | |
| LABELS = ["urgent", "fear", "authority", "financial scam", "safe"] | |
| CUES = { | |
| "urgency": [r"\burgent\b", r"\bimmediately\b", r"\bverify now\b", r"\blimited time\b"], | |
| "fear": [r"\bsuspended\b", r"\block(ed)?\b", r"\blegal action\b", r"\bunauthorized\b"], | |
| "authority": [r"\bCEO\b", r"\badmin\b", r"\bIT support\b", r"\bgovernment\b"], | |
| "financial": [r"\bwin\b", r"\bprize\b", r"\blottery\b", r"\binvestment\b"] | |
| } | |
| TRUSTED_DOMAINS = ["google.com", "paypal.com", "microsoft.com", "amazon.com", "facebook.com", "apple.com"] | |
| SUSPICIOUS_TLDS = ["xyz", "top", "tk", "gq", "cf", "ml"] | |
| URL_PATTERN = re.compile(r"(https?://[^\s]+|www\.[^\s]+|\b[a-zA-Z0-9-]+\.[a-z]{2,}\b)") | |
| # --- Lazy-load Hugging Face model --- | |
| classifier = None | |
| def get_classifier(): | |
| global classifier | |
| if classifier is None: | |
| from transformers import pipeline | |
| classifier = pipeline( | |
| "zero-shot-classification", | |
| model="valhalla/distilbart-mnli-12-1", | |
| device=-1 # CPU | |
| ) | |
| return classifier | |
| # --- Analysis functions --- | |
| def regex_analysis(text): | |
| findings = [] | |
| score = 0 | |
| for category, patterns in CUES.items(): | |
| for pat in patterns: | |
| if re.search(pat, text, re.IGNORECASE): | |
| findings.append(f"{category.capitalize()} cue detected: '{pat.strip(r'\\b')}'") | |
| score += 20 | |
| return score, findings | |
| def huggingface_analysis(text): | |
| clf = get_classifier() | |
| result = clf(text, LABELS) | |
| label_scores = list(zip(result["labels"], result["scores"])) | |
| label_scores.sort(key=lambda x: x[1], reverse=True) | |
| top_two = label_scores[:2] | |
| findings = [f"HuggingFace: {label} (confidence {score:.2f})" for label, score in top_two if label != "safe"] | |
| hf_score = sum(int(score * 30) for label, score in top_two if label != "safe") | |
| return hf_score, findings | |
| def url_analysis(url): | |
| findings = [] | |
| score = 0 | |
| ext = tldextract.extract(url) | |
| domain = f"{ext.domain}.{ext.suffix}" | |
| if not url.lower().startswith("https://"): | |
| findings.append("Non-HTTPS connection detected") | |
| score += 25 | |
| if ext.suffix in SUSPICIOUS_TLDS: | |
| findings.append(f"Suspicious TLD detected: .{ext.suffix}") | |
| score += 20 | |
| if re.match(r"^https?://\d+\.\d+\.\d+\.\d+", url): | |
| findings.append("IP address used instead of domain") | |
| score += 30 | |
| for trusted in TRUSTED_DOMAINS: | |
| similarity = fuzz.ratio(domain, trusted) | |
| if similarity > 80 and domain != trusted: | |
| findings.append(f"Possible typosquatting: {domain} vs {trusted} (similarity {similarity}%)") | |
| score += 30 | |
| break | |
| return score, findings | |
| def extract_url_from_text(text): | |
| match = URL_PATTERN.search(text) | |
| return match.group(0) if match else None | |
| # --- Main analyze function for Gradio --- | |
| def analyze(text): | |
| regex_score, regex_findings = regex_analysis(text) | |
| hf_score, hf_findings = huggingface_analysis(text) | |
| text_score = regex_score + hf_score | |
| url = extract_url_from_text(text) | |
| url_score, url_findings = (0, []) | |
| if url: | |
| url_score, url_findings = url_analysis(url) | |
| text_score = min(text_score, 100) * 0.4 | |
| url_score = min(url_score, 100) * 0.6 | |
| total_score = min(100, int(text_score + url_score)) | |
| reasons = regex_findings + hf_findings + url_findings | |
| if total_score < 30: | |
| risk_level = "Low" | |
| elif total_score < 70: | |
| risk_level = "Medium" | |
| else: | |
| risk_level = "High" | |
| return { | |
| "Score": total_score, | |
| "Risk Level": risk_level, | |
| "Reasons": "\n".join(reasons), | |
| "Extracted URL": url if url else "None detected" | |
| } | |
| # --- Gradio Interface --- | |
| iface = gr.Interface( | |
| fn=analyze, | |
| inputs=gr.Textbox(lines=5, placeholder="Paste text here..."), | |
| outputs=[ | |
| gr.Textbox(label="Score"), | |
| gr.Textbox(label="Risk Level"), | |
| gr.Textbox(label="Reasons"), | |
| gr.Textbox(label="Extracted URL") | |
| ], | |
| title="Phishing / Scam Detector", | |
| description="Analyzes text for urgency, fear, authority, and financial scam cues, plus suspicious URLs." | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |