File size: 4,491 Bytes
bdb271a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import time
import os
import pickle
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from ingest_reddit import is_news_or_irrelevant, get_disaster_type, check_for_philippine_location
from ner_extractor import extract_entities

# ---------------------------------------------------------
# CONFIG & SETUP
# ---------------------------------------------------------
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
MODEL_DIR = os.path.join(BASE_DIR, 'models')
ROBERTA_DIR = os.path.join(MODEL_DIR, 'roberta_model')
TFIDF_PATH = os.path.join(MODEL_DIR, 'tfidf_ensemble.pkl')

# ---------------------------------------------------------
# LOAD BRAINS
# ---------------------------------------------------------
print("--- ALISTO: Loading Simulator ---")

tokenizer = None
roberta_model = None
tfidf_model = None

# 1. Load XLM-R (Context Expert)
try:
    if os.path.exists(ROBERTA_DIR):
        tokenizer = AutoTokenizer.from_pretrained(ROBERTA_DIR)
        roberta_model = AutoModelForSequenceClassification.from_pretrained(ROBERTA_DIR)
        roberta_model.eval()
        print("βœ… XLM-R Loaded")
    else:
        print("❌ Failed to load XLM-R (Folder missing)")
except Exception as e:
    print(f"❌ Error loading XLM-R: {e}")

# 2. Load TF-IDF (Gatekeeper)
try:
    if os.path.exists(TFIDF_PATH):
        with open(TFIDF_PATH, 'rb') as f:
            tfidf_model = pickle.load(f)
        print("βœ… TF-IDF Loaded")
    else:
        print("❌ Failed to load TF-IDF (File missing)")
except Exception as e:
    print(f"❌ Error loading TF-IDF: {e}")

# ---------------------------------------------------------
# PREDICTION LOGIC (Must match ingest_reddit.py)
# ---------------------------------------------------------
def predict_urgency(text):
    # 1. Gatekeeper (TF-IDF)
    if tfidf_model:
        probs = tfidf_model.predict_proba([text])[0]
        tfidf_conf = probs[1]
        
        if tfidf_conf < 0.20:
            return False, tfidf_conf, "TF-IDF Reject"
        
    # 2. Context Expert (RoBERTa)
    if roberta_model and tokenizer:
        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
        with torch.no_grad():
            outputs = roberta_model(**inputs)
            r_probs = F.softmax(outputs.logits, dim=-1)
            roberta_conf = r_probs[0][1].item()
            
        return (roberta_conf > 0.5), roberta_conf, "RoBERTa"
    
    return False, 0.0, "No Model"

# ---------------------------------------------------------
# TEST DATA
# ---------------------------------------------------------
TEST_POSTS = [
    # --- SHOULD BE ACCEPTED ---
    "Tulong po, stuck kami sa bubong ng bahay, tumataas tubig sa Marikina!", 
    "Rescue needed at Provident Village, 3 kids trapped inside ceiling.",
    "Wala na kaming matatakbuhan, lampas tao na ang baha sa Cainta.",
    "Emergency! Landslide blocked the road in Baguio, need extraction.",
    "Please help us, flood entering 2nd floor in San Mateo Rizal.",
    
    # --- SHOULD BE REJECTED ---
    "Breaking News: Typhoon Signal No 4 raised in Bicol.",
    "Open for donations via GCash for typhoon victims.",
    "Looking for volunteers to repack relief goods at Ateneo.",
    "Stay safe everyone, praying for all affected.",
    "Discussion: Why is the government so slow?",
    "My heart breaks seeing the flood photos."
]

def run_simulation():
    print("\n--- 🟒 STARTING SIMULATION ---\n")
    
    for text in TEST_POSTS:
        print(f"πŸ“ Post: {text[:60]}...")
        
        # A. Logic Filter
        is_bad, reason = is_news_or_irrelevant(text)
        if is_bad:
            print(f"   ❌ BLOCKED by Logic: {reason}")
            print("-" * 50)
            time.sleep(0.5)
            continue
            
        # B. AI Prediction
        is_urgent, score, source = predict_urgency(text)
        
        if is_urgent:
            # C. Entity Extraction
            ner = extract_entities(text)
            locs = ner.get('locations', [])
            disaster = get_disaster_type(text)
            
            print(f"   βœ… ACCEPTED ({source} Conf: {score:.2%})")
            print(f"      πŸ“ Location: {locs}")
            print(f"      🌊 Type: {disaster}")
        else:
            print(f"   ❌ REJECTED by AI (Conf: {score:.2%})")
            
        print("-" * 50)
        time.sleep(1)

if __name__ == "__main__":
    run_simulation()