Spaces:
Sleeping
Sleeping
| import re | |
| import yaml | |
| import yake | |
| import spacy | |
| from langdetect import detect | |
| # Carregar regras heurísticas com pesos | |
| def load_rules(filepath="rules_weighted.yaml"): | |
| with open(filepath, "r", encoding="utf-8") as f: | |
| return yaml.safe_load(f) | |
| # Aplicar regras com base no idioma e calcular score | |
| def apply_heuristics(email_text, rules): | |
| reasons = [] | |
| total_score = 0.0 | |
| lower = email_text.lower() | |
| lang = detect(lower) | |
| # Regras de negação que reduzem o score | |
| negations = [ | |
| "não é urgente", | |
| "sem urgência", | |
| "não necessita ação", | |
| "não requer ação imediata", | |
| "sem necessidade imediata" | |
| ] | |
| for neg in negations: | |
| if neg in lower: | |
| reasons.append(f"Found negation: '{neg}' (reduces score)") | |
| total_score -= 0.5 | |
| for category, keywords in rules.get("keywords", {}).items(): | |
| # Global keywords | |
| for entry in keywords.get("global", []): | |
| pattern = entry["term"] | |
| weight = entry.get("weight", 1.0) | |
| if re.search(pattern, lower, re.IGNORECASE): | |
| reasons.append(f"[{category}] Matched '{pattern}' (global, weight={weight})") | |
| total_score += weight | |
| # Language-specific keywords | |
| for entry in keywords.get(lang, []): | |
| pattern = entry["term"] | |
| weight = entry.get("weight", 1.0) | |
| if re.search(pattern, lower, re.IGNORECASE): | |
| reasons.append(f"[{category}] Matched '{pattern}' ({lang}, weight={weight})") | |
| total_score += weight | |
| # Heurística de links | |
| urls = re.findall(r"http[s]?://\S+", email_text) | |
| if urls: | |
| reasons.append(f"Contains suspicious link(s): {', '.join(urls)}") | |
| total_score += 1.0 | |
| return reasons, total_score, lang | |
| # Extração de palavras-chave com YAKE | |
| def extract_keywords(email_text, lang="en"): | |
| extractor = yake.KeywordExtractor(lan=lang, top=5) | |
| keywords = extractor.extract_keywords(email_text) | |
| return [kw for kw, score in keywords] | |
| # Explicação combinada | |
| def explain_email(email_text, rules): | |
| reasons, score, lang = apply_heuristics(email_text, rules) | |
| return reasons, score | |