File size: 884 Bytes
1ac9f32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
"""
Pre-download the sentiment model so it's baked into the Docker image.
Run this during `docker build` to avoid cold-start downloads.
"""
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import os

MODEL_NAME = "pascalrai/hinglish-twitter-roberta-base-sentiment"
MODEL_DIR = os.environ.get("MODEL_DIR", "/app/models/sentiment")

print(f"Downloading {MODEL_NAME} to {MODEL_DIR}...", flush=True)
# 🛡️ Sentinel: Pin model revision for supply chain integrity
REVISION = "main"
print("Fetching tokenizer...", flush=True)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, revision=REVISION)
print("Fetching model...", flush=True)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, revision=REVISION)

tokenizer.save_pretrained(MODEL_DIR)
model.save_pretrained(MODEL_DIR)
print("Model downloaded and saved successfully.", flush=True)