import re import yaml import yake import spacy from langdetect import detect # Carregar regras heurísticas com pesos def load_rules(filepath="rules_weighted.yaml"): with open(filepath, "r", encoding="utf-8") as f: return yaml.safe_load(f) # Aplicar regras com base no idioma e calcular score def apply_heuristics(email_text, rules): reasons = [] total_score = 0.0 lower = email_text.lower() lang = detect(lower) # Regras de negação que reduzem o score negations = [ "não é urgente", "sem urgência", "não necessita ação", "não requer ação imediata", "sem necessidade imediata" ] for neg in negations: if neg in lower: reasons.append(f"Found negation: '{neg}' (reduces score)") total_score -= 0.5 for category, keywords in rules.get("keywords", {}).items(): # Global keywords for entry in keywords.get("global", []): pattern = entry["term"] weight = entry.get("weight", 1.0) if re.search(pattern, lower, re.IGNORECASE): reasons.append(f"[{category}] Matched '{pattern}' (global, weight={weight})") total_score += weight # Language-specific keywords for entry in keywords.get(lang, []): pattern = entry["term"] weight = entry.get("weight", 1.0) if re.search(pattern, lower, re.IGNORECASE): reasons.append(f"[{category}] Matched '{pattern}' ({lang}, weight={weight})") total_score += weight # Heurística de links urls = re.findall(r"http[s]?://\S+", email_text) if urls: reasons.append(f"Contains suspicious link(s): {', '.join(urls)}") total_score += 1.0 return reasons, total_score, lang # Extração de palavras-chave com YAKE def extract_keywords(email_text, lang="en"): extractor = yake.KeywordExtractor(lan=lang, top=5) keywords = extractor.extract_keywords(email_text) return [kw for kw, score in keywords] # Explicação combinada def explain_email(email_text, rules): reasons, score, lang = apply_heuristics(email_text, rules) return reasons, score