import time import os import pickle import torch import torch.nn.functional as F from transformers import AutoTokenizer, AutoModelForSequenceClassification from ingest_reddit import is_news_or_irrelevant, get_disaster_type, check_for_philippine_location from ner_extractor import extract_entities # --------------------------------------------------------- # CONFIG & SETUP # --------------------------------------------------------- BASE_DIR = os.path.dirname(os.path.abspath(__file__)) MODEL_DIR = os.path.join(BASE_DIR, 'models') ROBERTA_DIR = os.path.join(MODEL_DIR, 'roberta_model') TFIDF_PATH = os.path.join(MODEL_DIR, 'tfidf_ensemble.pkl') # --------------------------------------------------------- # LOAD BRAINS # --------------------------------------------------------- print("--- ALISTO: Loading Simulator ---") tokenizer = None roberta_model = None tfidf_model = None # 1. Load XLM-R (Context Expert) try: if os.path.exists(ROBERTA_DIR): tokenizer = AutoTokenizer.from_pretrained(ROBERTA_DIR) roberta_model = AutoModelForSequenceClassification.from_pretrained(ROBERTA_DIR) roberta_model.eval() print("✅ XLM-R Loaded") else: print("❌ Failed to load XLM-R (Folder missing)") except Exception as e: print(f"❌ Error loading XLM-R: {e}") # 2. Load TF-IDF (Gatekeeper) try: if os.path.exists(TFIDF_PATH): with open(TFIDF_PATH, 'rb') as f: tfidf_model = pickle.load(f) print("✅ TF-IDF Loaded") else: print("❌ Failed to load TF-IDF (File missing)") except Exception as e: print(f"❌ Error loading TF-IDF: {e}") # --------------------------------------------------------- # PREDICTION LOGIC (Must match ingest_reddit.py) # --------------------------------------------------------- def predict_urgency(text): # 1. Gatekeeper (TF-IDF) if tfidf_model: probs = tfidf_model.predict_proba([text])[0] tfidf_conf = probs[1] if tfidf_conf < 0.20: return False, tfidf_conf, "TF-IDF Reject" # 2. Context Expert (RoBERTa) if roberta_model and tokenizer: inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128) with torch.no_grad(): outputs = roberta_model(**inputs) r_probs = F.softmax(outputs.logits, dim=-1) roberta_conf = r_probs[0][1].item() return (roberta_conf > 0.5), roberta_conf, "RoBERTa" return False, 0.0, "No Model" # --------------------------------------------------------- # TEST DATA # --------------------------------------------------------- TEST_POSTS = [ # --- SHOULD BE ACCEPTED --- "Tulong po, stuck kami sa bubong ng bahay, tumataas tubig sa Marikina!", "Rescue needed at Provident Village, 3 kids trapped inside ceiling.", "Wala na kaming matatakbuhan, lampas tao na ang baha sa Cainta.", "Emergency! Landslide blocked the road in Baguio, need extraction.", "Please help us, flood entering 2nd floor in San Mateo Rizal.", # --- SHOULD BE REJECTED --- "Breaking News: Typhoon Signal No 4 raised in Bicol.", "Open for donations via GCash for typhoon victims.", "Looking for volunteers to repack relief goods at Ateneo.", "Stay safe everyone, praying for all affected.", "Discussion: Why is the government so slow?", "My heart breaks seeing the flood photos." ] def run_simulation(): print("\n--- 🟢 STARTING SIMULATION ---\n") for text in TEST_POSTS: print(f"📝 Post: {text[:60]}...") # A. Logic Filter is_bad, reason = is_news_or_irrelevant(text) if is_bad: print(f" ❌ BLOCKED by Logic: {reason}") print("-" * 50) time.sleep(0.5) continue # B. AI Prediction is_urgent, score, source = predict_urgency(text) if is_urgent: # C. Entity Extraction ner = extract_entities(text) locs = ner.get('locations', []) disaster = get_disaster_type(text) print(f" ✅ ACCEPTED ({source} Conf: {score:.2%})") print(f" 📍 Location: {locs}") print(f" 🌊 Type: {disaster}") else: print(f" ❌ REJECTED by AI (Conf: {score:.2%})") print("-" * 50) time.sleep(1) if __name__ == "__main__": run_simulation()