MLOps-Platforms / src /scripts /download_model.py
songhieng's picture
Upload 72 files
7e825f9 verified
#!/usr/bin/env python3
"""
Script to download the RoBERTa phishing content detector model from Hugging Face
"""
import os
import logging
from pathlib import Path
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from huggingface_hub import snapshot_download
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def download_model():
"""Download the RoBERTa phishing detector model"""
# Model configuration
model_name = "songhieng/roberta-phishing-content-detector-5.0"
local_model_path = "models/roberta-phishing-detector"
# Create models directory
Path(local_model_path).mkdir(parents=True, exist_ok=True)
try:
logger.info(f"Downloading model: {model_name}")
# Download the model files using snapshot_download
snapshot_download(
repo_id=model_name,
local_dir=local_model_path,
local_dir_use_symlinks=False,
)
logger.info("Model downloaded successfully using snapshot_download")
# Verify the model can be loaded
logger.info("Verifying model loading...")
tokenizer = AutoTokenizer.from_pretrained(local_model_path, local_files_only=True)
model = AutoModelForSequenceClassification.from_pretrained(local_model_path, local_files_only=True)
logger.info("Model verification successful!")
logger.info(f"Model saved to: {os.path.abspath(local_model_path)}")
# Print model info
logger.info(f"Model config: {model.config}")
logger.info(f"Tokenizer vocab size: {len(tokenizer)}")
return True
except Exception as e:
logger.error(f"Failed to download model: {str(e)}")
return False
if __name__ == "__main__":
success = download_model()
if not success:
exit(1)
logger.info("Model download completed successfully!")