Spaces:

Quivara
/

alisto-project

Running

App Files Files Community

alisto-project / alisto_project /backend /simulate_feed.py

Quivara

Fresh upload with LFS

bdb271a 2 days ago

raw

history blame contribute delete

4.49 kB

	import time
	import os
	import pickle
	import torch
	import torch.nn.functional as F
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	from ingest_reddit import is_news_or_irrelevant, get_disaster_type, check_for_philippine_location
	from ner_extractor import extract_entities

	# ---------------------------------------------------------
	# CONFIG & SETUP
	# ---------------------------------------------------------
	BASE_DIR = os.path.dirname(os.path.abspath(__file__))
	MODEL_DIR = os.path.join(BASE_DIR, 'models')
	ROBERTA_DIR = os.path.join(MODEL_DIR, 'roberta_model')
	TFIDF_PATH = os.path.join(MODEL_DIR, 'tfidf_ensemble.pkl')

	# ---------------------------------------------------------
	# LOAD BRAINS
	# ---------------------------------------------------------
	print("--- ALISTO: Loading Simulator ---")

	tokenizer = None
	roberta_model = None
	tfidf_model = None

	# 1. Load XLM-R (Context Expert)
	try:
	if os.path.exists(ROBERTA_DIR):
	tokenizer = AutoTokenizer.from_pretrained(ROBERTA_DIR)
	roberta_model = AutoModelForSequenceClassification.from_pretrained(ROBERTA_DIR)
	roberta_model.eval()
	print("✅ XLM-R Loaded")
	else:
	print("❌ Failed to load XLM-R (Folder missing)")
	except Exception as e:
	print(f"❌ Error loading XLM-R: {e}")

	# 2. Load TF-IDF (Gatekeeper)
	try:
	if os.path.exists(TFIDF_PATH):
	with open(TFIDF_PATH, 'rb') as f:
	tfidf_model = pickle.load(f)
	print("✅ TF-IDF Loaded")
	else:
	print("❌ Failed to load TF-IDF (File missing)")
	except Exception as e:
	print(f"❌ Error loading TF-IDF: {e}")

	# ---------------------------------------------------------
	# PREDICTION LOGIC (Must match ingest_reddit.py)
	# ---------------------------------------------------------
	def predict_urgency(text):
	# 1. Gatekeeper (TF-IDF)
	if tfidf_model:
	probs = tfidf_model.predict_proba([text])[0]
	tfidf_conf = probs[1]

	if tfidf_conf < 0.20:
	return False, tfidf_conf, "TF-IDF Reject"

	# 2. Context Expert (RoBERTa)
	if roberta_model and tokenizer:
	inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
	with torch.no_grad():
	outputs = roberta_model(**inputs)
	r_probs = F.softmax(outputs.logits, dim=-1)
	roberta_conf = r_probs[0][1].item()

	return (roberta_conf > 0.5), roberta_conf, "RoBERTa"

	return False, 0.0, "No Model"

	# ---------------------------------------------------------
	# TEST DATA
	# ---------------------------------------------------------
	TEST_POSTS = [
	# --- SHOULD BE ACCEPTED ---
	"Tulong po, stuck kami sa bubong ng bahay, tumataas tubig sa Marikina!",
	"Rescue needed at Provident Village, 3 kids trapped inside ceiling.",
	"Wala na kaming matatakbuhan, lampas tao na ang baha sa Cainta.",
	"Emergency! Landslide blocked the road in Baguio, need extraction.",
	"Please help us, flood entering 2nd floor in San Mateo Rizal.",

	# --- SHOULD BE REJECTED ---
	"Breaking News: Typhoon Signal No 4 raised in Bicol.",
	"Open for donations via GCash for typhoon victims.",
	"Looking for volunteers to repack relief goods at Ateneo.",
	"Stay safe everyone, praying for all affected.",
	"Discussion: Why is the government so slow?",
	"My heart breaks seeing the flood photos."
	]

	def run_simulation():
	print("\n--- 🟢 STARTING SIMULATION ---\n")

	for text in TEST_POSTS:
	print(f"📝 Post: {text[:60]}...")

	# A. Logic Filter
	is_bad, reason = is_news_or_irrelevant(text)
	if is_bad:
	print(f" ❌ BLOCKED by Logic: {reason}")
	print("-" * 50)
	time.sleep(0.5)
	continue

	# B. AI Prediction
	is_urgent, score, source = predict_urgency(text)

	if is_urgent:
	# C. Entity Extraction
	ner = extract_entities(text)
	locs = ner.get('locations', [])
	disaster = get_disaster_type(text)

	print(f" ✅ ACCEPTED ({source} Conf: {score:.2%})")
	print(f" 📍 Location: {locs}")
	print(f" 🌊 Type: {disaster}")
	else:
	print(f" ❌ REJECTED by AI (Conf: {score:.2%})")

	print("-" * 50)
	time.sleep(1)

	if __name__ == "__main__":
	run_simulation()