Spaces:
Running
Running
| import time | |
| import os | |
| import pickle | |
| import torch | |
| import torch.nn.functional as F | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| from ingest_reddit import is_news_or_irrelevant, get_disaster_type, check_for_philippine_location | |
| from ner_extractor import extract_entities | |
| # --------------------------------------------------------- | |
| # CONFIG & SETUP | |
| # --------------------------------------------------------- | |
| BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| MODEL_DIR = os.path.join(BASE_DIR, 'models') | |
| ROBERTA_DIR = os.path.join(MODEL_DIR, 'roberta_model') | |
| TFIDF_PATH = os.path.join(MODEL_DIR, 'tfidf_ensemble.pkl') | |
| # --------------------------------------------------------- | |
| # LOAD BRAINS | |
| # --------------------------------------------------------- | |
| print("--- ALISTO: Loading Simulator ---") | |
| tokenizer = None | |
| roberta_model = None | |
| tfidf_model = None | |
| # 1. Load XLM-R (Context Expert) | |
| try: | |
| if os.path.exists(ROBERTA_DIR): | |
| tokenizer = AutoTokenizer.from_pretrained(ROBERTA_DIR) | |
| roberta_model = AutoModelForSequenceClassification.from_pretrained(ROBERTA_DIR) | |
| roberta_model.eval() | |
| print("β XLM-R Loaded") | |
| else: | |
| print("β Failed to load XLM-R (Folder missing)") | |
| except Exception as e: | |
| print(f"β Error loading XLM-R: {e}") | |
| # 2. Load TF-IDF (Gatekeeper) | |
| try: | |
| if os.path.exists(TFIDF_PATH): | |
| with open(TFIDF_PATH, 'rb') as f: | |
| tfidf_model = pickle.load(f) | |
| print("β TF-IDF Loaded") | |
| else: | |
| print("β Failed to load TF-IDF (File missing)") | |
| except Exception as e: | |
| print(f"β Error loading TF-IDF: {e}") | |
| # --------------------------------------------------------- | |
| # PREDICTION LOGIC (Must match ingest_reddit.py) | |
| # --------------------------------------------------------- | |
| def predict_urgency(text): | |
| # 1. Gatekeeper (TF-IDF) | |
| if tfidf_model: | |
| probs = tfidf_model.predict_proba([text])[0] | |
| tfidf_conf = probs[1] | |
| if tfidf_conf < 0.20: | |
| return False, tfidf_conf, "TF-IDF Reject" | |
| # 2. Context Expert (RoBERTa) | |
| if roberta_model and tokenizer: | |
| inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128) | |
| with torch.no_grad(): | |
| outputs = roberta_model(**inputs) | |
| r_probs = F.softmax(outputs.logits, dim=-1) | |
| roberta_conf = r_probs[0][1].item() | |
| return (roberta_conf > 0.5), roberta_conf, "RoBERTa" | |
| return False, 0.0, "No Model" | |
| # --------------------------------------------------------- | |
| # TEST DATA | |
| # --------------------------------------------------------- | |
| TEST_POSTS = [ | |
| # --- SHOULD BE ACCEPTED --- | |
| "Tulong po, stuck kami sa bubong ng bahay, tumataas tubig sa Marikina!", | |
| "Rescue needed at Provident Village, 3 kids trapped inside ceiling.", | |
| "Wala na kaming matatakbuhan, lampas tao na ang baha sa Cainta.", | |
| "Emergency! Landslide blocked the road in Baguio, need extraction.", | |
| "Please help us, flood entering 2nd floor in San Mateo Rizal.", | |
| # --- SHOULD BE REJECTED --- | |
| "Breaking News: Typhoon Signal No 4 raised in Bicol.", | |
| "Open for donations via GCash for typhoon victims.", | |
| "Looking for volunteers to repack relief goods at Ateneo.", | |
| "Stay safe everyone, praying for all affected.", | |
| "Discussion: Why is the government so slow?", | |
| "My heart breaks seeing the flood photos." | |
| ] | |
| def run_simulation(): | |
| print("\n--- π’ STARTING SIMULATION ---\n") | |
| for text in TEST_POSTS: | |
| print(f"π Post: {text[:60]}...") | |
| # A. Logic Filter | |
| is_bad, reason = is_news_or_irrelevant(text) | |
| if is_bad: | |
| print(f" β BLOCKED by Logic: {reason}") | |
| print("-" * 50) | |
| time.sleep(0.5) | |
| continue | |
| # B. AI Prediction | |
| is_urgent, score, source = predict_urgency(text) | |
| if is_urgent: | |
| # C. Entity Extraction | |
| ner = extract_entities(text) | |
| locs = ner.get('locations', []) | |
| disaster = get_disaster_type(text) | |
| print(f" β ACCEPTED ({source} Conf: {score:.2%})") | |
| print(f" π Location: {locs}") | |
| print(f" π Type: {disaster}") | |
| else: | |
| print(f" β REJECTED by AI (Conf: {score:.2%})") | |
| print("-" * 50) | |
| time.sleep(1) | |
| if __name__ == "__main__": | |
| run_simulation() |