Spaces:

moraeslucas
/

PhisHunter

Sleeping

App Files Files Community

PhisHunter / utils /heuristics.py

moraeslucas

First Commit with 25 files

fdff15a verified 7 months ago

raw

history blame contribute delete

2.22 kB


	import re
	import yaml
	import yake
	import spacy
	from langdetect import detect

	# Carregar regras heurísticas com pesos
	def load_rules(filepath="rules_weighted.yaml"):
	with open(filepath, "r", encoding="utf-8") as f:
	return yaml.safe_load(f)

	# Aplicar regras com base no idioma e calcular score
	def apply_heuristics(email_text, rules):
	reasons = []
	total_score = 0.0
	lower = email_text.lower()
	lang = detect(lower)

	# Regras de negação que reduzem o score
	negations = [
	"não é urgente",
	"sem urgência",
	"não necessita ação",
	"não requer ação imediata",
	"sem necessidade imediata"
	]
	for neg in negations:
	if neg in lower:
	reasons.append(f"Found negation: '{neg}' (reduces score)")
	total_score -= 0.5

	for category, keywords in rules.get("keywords", {}).items():
	# Global keywords
	for entry in keywords.get("global", []):
	pattern = entry["term"]
	weight = entry.get("weight", 1.0)
	if re.search(pattern, lower, re.IGNORECASE):
	reasons.append(f"[{category}] Matched '{pattern}' (global, weight={weight})")
	total_score += weight

	# Language-specific keywords
	for entry in keywords.get(lang, []):
	pattern = entry["term"]
	weight = entry.get("weight", 1.0)
	if re.search(pattern, lower, re.IGNORECASE):
	reasons.append(f"[{category}] Matched '{pattern}' ({lang}, weight={weight})")
	total_score += weight

	# Heurística de links
	urls = re.findall(r"http[s]?://\S+", email_text)
	if urls:
	reasons.append(f"Contains suspicious link(s): {', '.join(urls)}")
	total_score += 1.0

	return reasons, total_score, lang

	# Extração de palavras-chave com YAKE
	def extract_keywords(email_text, lang="en"):
	extractor = yake.KeywordExtractor(lan=lang, top=5)
	keywords = extractor.extract_keywords(email_text)
	return [kw for kw, score in keywords]

	# Explicação combinada
	def explain_email(email_text, rules):
	reasons, score, lang = apply_heuristics(email_text, rules)
	return reasons, score