#!/usr/bin/env python3
"""
Script to download the RoBERTa phishing content detector model from Hugging Face
"""

import os
import logging
from pathlib import Path
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from huggingface_hub import snapshot_download

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def download_model():
    """Download the RoBERTa phishing detector model"""
    
    # Model configuration
    model_name = "songhieng/roberta-phishing-content-detector-5.0"
    local_model_path = "models/roberta-phishing-detector"
    
    # Create models directory
    Path(local_model_path).mkdir(parents=True, exist_ok=True)
    
    try:
        logger.info(f"Downloading model: {model_name}")
        
        # Download the model files using snapshot_download
        snapshot_download(
            repo_id=model_name,
            local_dir=local_model_path,
            local_dir_use_symlinks=False,
        )
        
        logger.info("Model downloaded successfully using snapshot_download")
        
        # Verify the model can be loaded
        logger.info("Verifying model loading...")
        tokenizer = AutoTokenizer.from_pretrained(local_model_path, local_files_only=True)
        model = AutoModelForSequenceClassification.from_pretrained(local_model_path, local_files_only=True)
        
        logger.info("Model verification successful!")
        logger.info(f"Model saved to: {os.path.abspath(local_model_path)}")
        
        # Print model info
        logger.info(f"Model config: {model.config}")
        logger.info(f"Tokenizer vocab size: {len(tokenizer)}")
        
        return True
        
    except Exception as e:
        logger.error(f"Failed to download model: {str(e)}")
        return False

if __name__ == "__main__":
    success = download_model()
    if not success:
        exit(1)
    logger.info("Model download completed successfully!")