Spaces:
Running
Running
File size: 4,491 Bytes
bdb271a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
import time
import os
import pickle
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from ingest_reddit import is_news_or_irrelevant, get_disaster_type, check_for_philippine_location
from ner_extractor import extract_entities
# ---------------------------------------------------------
# CONFIG & SETUP
# ---------------------------------------------------------
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
MODEL_DIR = os.path.join(BASE_DIR, 'models')
ROBERTA_DIR = os.path.join(MODEL_DIR, 'roberta_model')
TFIDF_PATH = os.path.join(MODEL_DIR, 'tfidf_ensemble.pkl')
# ---------------------------------------------------------
# LOAD BRAINS
# ---------------------------------------------------------
print("--- ALISTO: Loading Simulator ---")
tokenizer = None
roberta_model = None
tfidf_model = None
# 1. Load XLM-R (Context Expert)
try:
if os.path.exists(ROBERTA_DIR):
tokenizer = AutoTokenizer.from_pretrained(ROBERTA_DIR)
roberta_model = AutoModelForSequenceClassification.from_pretrained(ROBERTA_DIR)
roberta_model.eval()
print("β
XLM-R Loaded")
else:
print("β Failed to load XLM-R (Folder missing)")
except Exception as e:
print(f"β Error loading XLM-R: {e}")
# 2. Load TF-IDF (Gatekeeper)
try:
if os.path.exists(TFIDF_PATH):
with open(TFIDF_PATH, 'rb') as f:
tfidf_model = pickle.load(f)
print("β
TF-IDF Loaded")
else:
print("β Failed to load TF-IDF (File missing)")
except Exception as e:
print(f"β Error loading TF-IDF: {e}")
# ---------------------------------------------------------
# PREDICTION LOGIC (Must match ingest_reddit.py)
# ---------------------------------------------------------
def predict_urgency(text):
# 1. Gatekeeper (TF-IDF)
if tfidf_model:
probs = tfidf_model.predict_proba([text])[0]
tfidf_conf = probs[1]
if tfidf_conf < 0.20:
return False, tfidf_conf, "TF-IDF Reject"
# 2. Context Expert (RoBERTa)
if roberta_model and tokenizer:
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
with torch.no_grad():
outputs = roberta_model(**inputs)
r_probs = F.softmax(outputs.logits, dim=-1)
roberta_conf = r_probs[0][1].item()
return (roberta_conf > 0.5), roberta_conf, "RoBERTa"
return False, 0.0, "No Model"
# ---------------------------------------------------------
# TEST DATA
# ---------------------------------------------------------
TEST_POSTS = [
# --- SHOULD BE ACCEPTED ---
"Tulong po, stuck kami sa bubong ng bahay, tumataas tubig sa Marikina!",
"Rescue needed at Provident Village, 3 kids trapped inside ceiling.",
"Wala na kaming matatakbuhan, lampas tao na ang baha sa Cainta.",
"Emergency! Landslide blocked the road in Baguio, need extraction.",
"Please help us, flood entering 2nd floor in San Mateo Rizal.",
# --- SHOULD BE REJECTED ---
"Breaking News: Typhoon Signal No 4 raised in Bicol.",
"Open for donations via GCash for typhoon victims.",
"Looking for volunteers to repack relief goods at Ateneo.",
"Stay safe everyone, praying for all affected.",
"Discussion: Why is the government so slow?",
"My heart breaks seeing the flood photos."
]
def run_simulation():
print("\n--- π’ STARTING SIMULATION ---\n")
for text in TEST_POSTS:
print(f"π Post: {text[:60]}...")
# A. Logic Filter
is_bad, reason = is_news_or_irrelevant(text)
if is_bad:
print(f" β BLOCKED by Logic: {reason}")
print("-" * 50)
time.sleep(0.5)
continue
# B. AI Prediction
is_urgent, score, source = predict_urgency(text)
if is_urgent:
# C. Entity Extraction
ner = extract_entities(text)
locs = ner.get('locations', [])
disaster = get_disaster_type(text)
print(f" β
ACCEPTED ({source} Conf: {score:.2%})")
print(f" π Location: {locs}")
print(f" π Type: {disaster}")
else:
print(f" β REJECTED by AI (Conf: {score:.2%})")
print("-" * 50)
time.sleep(1)
if __name__ == "__main__":
run_simulation() |