Spaces:
Sleeping
Sleeping
| import torch | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| from deep_translator import GoogleTranslator | |
| _fn_cache = {"tokenizers": {}, "models": {}} | |
| device = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu") | |
| def get_or_load_fn_model(model_key): | |
| fn_models_map = { | |
| "fn1": "vikram71198/distilroberta-base-finetuned-fake-news-detection", | |
| "fn2": "jy46604790/Fake-News-Bert-Detect" | |
| } | |
| if model_key not in _fn_cache["models"]: | |
| print(f"📥 Chargement du modèle Fake News {model_key}...") | |
| repo = fn_models_map[model_key] | |
| tok = AutoTokenizer.from_pretrained(repo) | |
| mod = AutoModelForSequenceClassification.from_pretrained(repo).to(device) | |
| mod.eval() | |
| _fn_cache["tokenizers"][model_key] = tok | |
| _fn_cache["models"][model_key] = mod | |
| return _fn_cache["tokenizers"][model_key], _fn_cache["models"][model_key] | |
| def apply_local_context_guardrails(text: str, fake_prob: float) -> float: | |
| """ | |
| Réduit artificiellement le score de Fake News si des entités sénégalaises | |
| ou africaines fiables sont mentionnées. Empêche le biais "Out-Of-Distribution". | |
| """ | |
| text_lower = text.lower() | |
| # Mots-clés de crédibilité locaux ou institutions | |
| credible_keywords = [ | |
| "aps", "agence de presse sénégalaise", "rts", "radiodiffusion télévision sénégalaise", | |
| "le soleil", "seneweb", "dakaractu", "igfm", "tfm", "walfadjri", "sud quotidien" | |
| ] | |
| # Noms propres souvent détectés comme du bruit par un modèle américain | |
| local_entities = [ | |
| "dakar", "sénégal", "senegal", "macky sall", "ousmane sonko", "diomaye faye", | |
| "bassirou diomaye", "pastef", "apr", "assemblée nationale", "ucad" | |
| ] | |
| credible_matches = sum(1 for kw in credible_keywords if kw in text_lower) | |
| entity_matches = sum(1 for kw in local_entities if kw in text_lower) | |
| # Applique une réduction progressive du fake prob | |
| discount = 0.0 | |
| if credible_matches > 0: | |
| discount += 0.25 * credible_matches | |
| if entity_matches > 0: | |
| discount += 0.15 * entity_matches | |
| discount = min(discount, 0.45) # Max discount 45% | |
| adjusted_prob = float(max(0.01, fake_prob - discount)) | |
| return adjusted_prob | |
| def analyze_fakenews_text(text: str) -> dict: | |
| # 1. TRADUCTION MULTILINGUE (Hackathon Solution) | |
| # On traduit le texte peu importe sa langue (auto) vers l'anglais | |
| # Cela permet d'utiliser les puissants modèles Fake News anglophones sur du Français, Wolof (si supporté), etc. | |
| try: | |
| translated_text = GoogleTranslator(source='auto', target='en').translate(text) | |
| print("📝 Traduction en cours pour analyse FakeNews...") | |
| except Exception as e: | |
| print(f"⚠️ Erreur de traduction : {e}. Utilisation du texte original.") | |
| translated_text = text | |
| def _predict(model_key, txt): | |
| tokenizer, model = get_or_load_fn_model(model_key) | |
| inputs = tokenizer(txt, return_tensors="pt", truncation=True, max_length=512).to(device) | |
| with torch.inference_mode(): | |
| outputs = model(**inputs) | |
| probs = torch.nn.functional.softmax(outputs.logits, dim=-1) | |
| fake_prob = float(probs[0][1]) | |
| return fake_prob | |
| # 2. INFERENCE SUR LE TEXTE TRADUIT (EN ANGLAIS) | |
| prob1 = _predict("fn1", translated_text) | |
| prob2 = _predict("fn2", translated_text) | |
| weighted_fake_prob = (prob1 * 0.60) + (prob2 * 0.40) | |
| # 3. GARDE-FOUS CONTEXTUELS LOCAUX (SÉNÉGAL) | |
| # Appliqué sur le texte *original* (pas traduit) | |
| adjusted_prob = apply_local_context_guardrails(text, weighted_fake_prob) | |
| return { | |
| "verdict": "FAKE NEWS" if adjusted_prob > 0.50 else "INFO VRAIE", | |
| "fake_prob": adjusted_prob, | |
| "real_prob": 1.0 - adjusted_prob, | |
| "is_fake": adjusted_prob > 0.50, | |
| "raw_fake_prob": weighted_fake_prob, # Pour le débug | |
| "was_translated": translated_text != text | |
| } | |