Spaces:

princemaxp
/

CySecGuardians

Sleeping

App Files Files Community

CySecGuardians / body_analyzer.py

princemaxp

Update body_analyzer.py

4fb4f18 verified 8 months ago

raw

history blame

2.84 kB

	import requests
	import os
	import re

	HF_API_KEY = os.getenv("HF_API_KEY")
	HF_HEADERS = {"Authorization": f"Bearer {HF_API_KEY}"} if HF_API_KEY else {}

	MODELS = {
	"ai_detector": "roberta-base-openai-detector",
	"sentiment": "finiteautomata/bertweet-base-sentiment-analysis",
	"spam": "mrm8488/bert-tiny-finetuned-sms-spam-detection",
	}

	SUSPICIOUS_PATTERNS = [
	r"verify your account",
	r"urgent action",
	r"click here",
	r"reset (your )?password",
	r"confirm (your )?identity",
	r"bank account",
	r"invoice",
	r"payment (required\|overdue\|failed\|method expired)",
	r"unauthorized login",
	r"compromised",
	r"final reminder",
	r"account (suspended\|deactivated\|locked)",
	r"update your (information\|details\|billing)",
	r"legal action",
	]

	def query_hf(model, text):
	if not HF_API_KEY:
	return None
	try:
	res = requests.post(
	f"https://api-inference.huggingface.co/models/{model}",
	headers=HF_HEADERS,
	json={"inputs": text[:1000]},
	)
	return res.json()
	except Exception:
	return None

	def analyze_body(text):
	findings = []
	score = 0
	body_lower = text.lower()
	highlighted_body = text

	# --- 1. Suspicious keyword detection ---
	for pattern in SUSPICIOUS_PATTERNS:
	matches = re.findall(pattern, body_lower)
	for match in matches:
	findings.append(f"Suspicious phrase detected: \"{match}\"")
	score += 20
	highlighted_body = re.sub(
	match, f"<mark>{match}</mark>", highlighted_body, flags=re.IGNORECASE
	)

	# --- 2. AI-generated text detection ---
	result = query_hf(MODELS["ai_detector"], text)
	if result and isinstance(result, list) and len(result) > 0:
	label = result[0]["label"]
	confidence = result[0]["score"]
	findings.append(f"Body: AI Detector → {label} (confidence {confidence:.2f})")

	# --- 3. Sentiment analysis ---
	result = query_hf(MODELS["sentiment"], text)
	if result and isinstance(result, list) and len(result) > 0:
	label = result[0]["label"]
	confidence = result[0]["score"]
	findings.append(f"Body: Sentiment → {label} (confidence {confidence:.2f})")
	if label.lower() == "negative":
	score += 10

	# --- 4. Spam vs Ham detection ---
	result = query_hf(MODELS["spam"], text)
	if result and isinstance(result, list) and len(result) > 0:
	label = result[0]["label"]
	confidence = result[0]["score"]
	findings.append(f"Body: Spam Detector → {label} (confidence {confidence:.2f})")
	if label.lower() == "spam":
	score += 20

	if not findings:
	return ["No suspicious content detected in body."], 0, text

	return findings, score, highlighted_body