the-algorithm / scripts /download_model.py
github-actions[bot]
deploy: HF sync (Run 194)
1ac9f32
raw
history blame contribute delete
884 Bytes
"""
Pre-download the sentiment model so it's baked into the Docker image.
Run this during `docker build` to avoid cold-start downloads.
"""
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import os
MODEL_NAME = "pascalrai/hinglish-twitter-roberta-base-sentiment"
MODEL_DIR = os.environ.get("MODEL_DIR", "/app/models/sentiment")
print(f"Downloading {MODEL_NAME} to {MODEL_DIR}...", flush=True)
# 🛡️ Sentinel: Pin model revision for supply chain integrity
REVISION = "main"
print("Fetching tokenizer...", flush=True)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, revision=REVISION)
print("Fetching model...", flush=True)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, revision=REVISION)
tokenizer.save_pretrained(MODEL_DIR)
model.save_pretrained(MODEL_DIR)
print("Model downloaded and saved successfully.", flush=True)