Quivara's picture
Fresh upload with LFS
bdb271a
import time
import os
import pickle
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from ingest_reddit import is_news_or_irrelevant, get_disaster_type, check_for_philippine_location
from ner_extractor import extract_entities
# ---------------------------------------------------------
# CONFIG & SETUP
# ---------------------------------------------------------
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
MODEL_DIR = os.path.join(BASE_DIR, 'models')
ROBERTA_DIR = os.path.join(MODEL_DIR, 'roberta_model')
TFIDF_PATH = os.path.join(MODEL_DIR, 'tfidf_ensemble.pkl')
# ---------------------------------------------------------
# LOAD BRAINS
# ---------------------------------------------------------
print("--- ALISTO: Loading Simulator ---")
tokenizer = None
roberta_model = None
tfidf_model = None
# 1. Load XLM-R (Context Expert)
try:
if os.path.exists(ROBERTA_DIR):
tokenizer = AutoTokenizer.from_pretrained(ROBERTA_DIR)
roberta_model = AutoModelForSequenceClassification.from_pretrained(ROBERTA_DIR)
roberta_model.eval()
print("βœ… XLM-R Loaded")
else:
print("❌ Failed to load XLM-R (Folder missing)")
except Exception as e:
print(f"❌ Error loading XLM-R: {e}")
# 2. Load TF-IDF (Gatekeeper)
try:
if os.path.exists(TFIDF_PATH):
with open(TFIDF_PATH, 'rb') as f:
tfidf_model = pickle.load(f)
print("βœ… TF-IDF Loaded")
else:
print("❌ Failed to load TF-IDF (File missing)")
except Exception as e:
print(f"❌ Error loading TF-IDF: {e}")
# ---------------------------------------------------------
# PREDICTION LOGIC (Must match ingest_reddit.py)
# ---------------------------------------------------------
def predict_urgency(text):
# 1. Gatekeeper (TF-IDF)
if tfidf_model:
probs = tfidf_model.predict_proba([text])[0]
tfidf_conf = probs[1]
if tfidf_conf < 0.20:
return False, tfidf_conf, "TF-IDF Reject"
# 2. Context Expert (RoBERTa)
if roberta_model and tokenizer:
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
with torch.no_grad():
outputs = roberta_model(**inputs)
r_probs = F.softmax(outputs.logits, dim=-1)
roberta_conf = r_probs[0][1].item()
return (roberta_conf > 0.5), roberta_conf, "RoBERTa"
return False, 0.0, "No Model"
# ---------------------------------------------------------
# TEST DATA
# ---------------------------------------------------------
TEST_POSTS = [
# --- SHOULD BE ACCEPTED ---
"Tulong po, stuck kami sa bubong ng bahay, tumataas tubig sa Marikina!",
"Rescue needed at Provident Village, 3 kids trapped inside ceiling.",
"Wala na kaming matatakbuhan, lampas tao na ang baha sa Cainta.",
"Emergency! Landslide blocked the road in Baguio, need extraction.",
"Please help us, flood entering 2nd floor in San Mateo Rizal.",
# --- SHOULD BE REJECTED ---
"Breaking News: Typhoon Signal No 4 raised in Bicol.",
"Open for donations via GCash for typhoon victims.",
"Looking for volunteers to repack relief goods at Ateneo.",
"Stay safe everyone, praying for all affected.",
"Discussion: Why is the government so slow?",
"My heart breaks seeing the flood photos."
]
def run_simulation():
print("\n--- 🟒 STARTING SIMULATION ---\n")
for text in TEST_POSTS:
print(f"πŸ“ Post: {text[:60]}...")
# A. Logic Filter
is_bad, reason = is_news_or_irrelevant(text)
if is_bad:
print(f" ❌ BLOCKED by Logic: {reason}")
print("-" * 50)
time.sleep(0.5)
continue
# B. AI Prediction
is_urgent, score, source = predict_urgency(text)
if is_urgent:
# C. Entity Extraction
ner = extract_entities(text)
locs = ner.get('locations', [])
disaster = get_disaster_type(text)
print(f" βœ… ACCEPTED ({source} Conf: {score:.2%})")
print(f" πŸ“ Location: {locs}")
print(f" 🌊 Type: {disaster}")
else:
print(f" ❌ REJECTED by AI (Conf: {score:.2%})")
print("-" * 50)
time.sleep(1)
if __name__ == "__main__":
run_simulation()