Spaces:

oyabun-dev
/

kamyvision-api

Sleeping

App Files Files Community

kamyvision-api / app /pipelines /fakenews.py

oyabun-dev

deploy: 2026-04-02T00:05:48Z

55bcd2b about 1 month ago

raw

history blame contribute delete

4.08 kB

	import torch
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	from deep_translator import GoogleTranslator

	_fn_cache = {"tokenizers": {}, "models": {}}

	device = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu")

	def get_or_load_fn_model(model_key):
	fn_models_map = {
	"fn1": "vikram71198/distilroberta-base-finetuned-fake-news-detection",
	"fn2": "jy46604790/Fake-News-Bert-Detect"
	}

	if model_key not in _fn_cache["models"]:
	print(f"📥 Chargement du modèle Fake News {model_key}...")
	repo = fn_models_map[model_key]
	tok = AutoTokenizer.from_pretrained(repo)
	mod = AutoModelForSequenceClassification.from_pretrained(repo).to(device)
	mod.eval()
	_fn_cache["tokenizers"][model_key] = tok
	_fn_cache["models"][model_key] = mod

	return _fn_cache["tokenizers"][model_key], _fn_cache["models"][model_key]

	def apply_local_context_guardrails(text: str, fake_prob: float) -> float:
	"""
	Réduit artificiellement le score de Fake News si des entités sénégalaises
	ou africaines fiables sont mentionnées. Empêche le biais "Out-Of-Distribution".
	"""
	text_lower = text.lower()

	# Mots-clés de crédibilité locaux ou institutions
	credible_keywords = [
	"aps", "agence de presse sénégalaise", "rts", "radiodiffusion télévision sénégalaise",
	"le soleil", "seneweb", "dakaractu", "igfm", "tfm", "walfadjri", "sud quotidien"
	]

	# Noms propres souvent détectés comme du bruit par un modèle américain
	local_entities = [
	"dakar", "sénégal", "senegal", "macky sall", "ousmane sonko", "diomaye faye",
	"bassirou diomaye", "pastef", "apr", "assemblée nationale", "ucad"
	]

	credible_matches = sum(1 for kw in credible_keywords if kw in text_lower)
	entity_matches = sum(1 for kw in local_entities if kw in text_lower)

	# Applique une réduction progressive du fake prob
	discount = 0.0
	if credible_matches > 0:
	discount += 0.25 * credible_matches
	if entity_matches > 0:
	discount += 0.15 * entity_matches

	discount = min(discount, 0.45) # Max discount 45%

	adjusted_prob = float(max(0.01, fake_prob - discount))
	return adjusted_prob


	def analyze_fakenews_text(text: str) -> dict:
	# 1. TRADUCTION MULTILINGUE (Hackathon Solution)
	# On traduit le texte peu importe sa langue (auto) vers l'anglais
	# Cela permet d'utiliser les puissants modèles Fake News anglophones sur du Français, Wolof (si supporté), etc.
	try:
	translated_text = GoogleTranslator(source='auto', target='en').translate(text)
	print("📝 Traduction en cours pour analyse FakeNews...")
	except Exception as e:
	print(f"⚠️ Erreur de traduction : {e}. Utilisation du texte original.")
	translated_text = text

	def _predict(model_key, txt):
	tokenizer, model = get_or_load_fn_model(model_key)
	inputs = tokenizer(txt, return_tensors="pt", truncation=True, max_length=512).to(device)
	with torch.inference_mode():
	outputs = model(**inputs)
	probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
	fake_prob = float(probs[0][1])
	return fake_prob

	# 2. INFERENCE SUR LE TEXTE TRADUIT (EN ANGLAIS)
	prob1 = _predict("fn1", translated_text)
	prob2 = _predict("fn2", translated_text)
	weighted_fake_prob = (prob1 * 0.60) + (prob2 * 0.40)

	# 3. GARDE-FOUS CONTEXTUELS LOCAUX (SÉNÉGAL)
	# Appliqué sur le texte original (pas traduit)
	adjusted_prob = apply_local_context_guardrails(text, weighted_fake_prob)

	return {
	"verdict": "FAKE NEWS" if adjusted_prob > 0.50 else "INFO VRAIE",
	"fake_prob": adjusted_prob,
	"real_prob": 1.0 - adjusted_prob,
	"is_fake": adjusted_prob > 0.50,
	"raw_fake_prob": weighted_fake_prob, # Pour le débug
	"was_translated": translated_text != text
	}