Spaces:

Quivara
/

alisto-project

Running

Quivara commited on 15 days ago

Commit

cb1e7e4

verified ·

1 Parent(s): 0c063e9

Update alisto_project/backend/ingest_reddit.py

Files changed (1) hide show

alisto_project/backend/ingest_reddit.py CHANGED Viewed

@@ -11,6 +11,7 @@ from flask import Flask
 from models import db, DisasterPost
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from ner_extractor import extract_entities
 # 1. Config & Setup
 # defines the subreddits to be monitored by the scraper
@@ -31,28 +32,33 @@ app.config['SQLALCHEMY_ENGINE_OPTIONS'] = {'connect_args': {'timeout': 15}}
 db.init_app(app)
 # 2. Load Models
-print("Loading ALISTO Brains...")
-MODEL_DIR = os.path.join(BASE_DIR, 'models')
-ROBERTA_DIR = os.path.join(MODEL_DIR, 'roberta_model')
-TFIDF_PATH = os.path.join(MODEL_DIR, 'tfidf_ensemble.pkl')
-# A. RoBERTa (XLM-R Multilingual)
-# loads the RoBERTa tokenizer and sequence classification model (Context Expert)
 try:
-    tokenizer = AutoTokenizer.from_pretrained(ROBERTA_DIR)
-    roberta_model = AutoModelForSequenceClassification.from_pretrained(ROBERTA_DIR)
-    device = torch.device("cpu") # determines the device (CPU/GPU) for model execution
     roberta_model.to(device)
     roberta_model.eval()
-    print("✅ Context Expert (XLM-R) loaded")
 except Exception as e:
-    print(f"❌ Error loading RoBERTa: {e}")
     exit()
 # B. TF-IDF (The Gatekeeper)
-# loads the pre-trained TF-IDF vectorizer and ensemble model (Gatekeeper)
 try:
-    with open(TFIDF_PATH, 'rb') as f:
         tfidf_model = pickle.load(f)
     print("✅ Gatekeeper (TF-IDF) loaded")
 except Exception as e:

 from models import db, DisasterPost
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from ner_extractor import extract_entities
+from huggingface_hub import hf_hub_download
 # 1. Config & Setup
 # defines the subreddits to be monitored by the scraper
 db.init_app(app)
 # 2. Load Models
+print("Loading ALISTO Brains from Cloud...")
+# Point to your new Model Repository
+MODEL_ID = "Quivara/alisto-brain"
 try:
+    # Load Tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+    # Load Model (Num labels must match your training, usually 2 for urgent/not urgent)
+    roberta_model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID, num_labels=2)
+    device = torch.device("cpu")
     roberta_model.to(device)
     roberta_model.eval()
+    print(f"✅ Context Expert loaded from {MODEL_ID}")
 except Exception as e:
+    print(f"❌ Error loading Model: {e}")
     exit()
 # B. TF-IDF (The Gatekeeper)
 try:
+    print("Downloading Gatekeeper (TF-IDF)...")
+    # Downloads the file from your alisto-brain repo to a cache folder
+    tfidf_path = hf_hub_download(repo_id=MODEL_ID, filename="tfidf_ensemble.pkl")
+    with open(tfidf_path, 'rb') as f:
         tfidf_model = pickle.load(f)
     print("✅ Gatekeeper (TF-IDF) loaded")
 except Exception as e: