Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Script to download the RoBERTa phishing content detector model from Hugging Face | |
| """ | |
| import os | |
| import logging | |
| from pathlib import Path | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| from huggingface_hub import snapshot_download | |
| # Setup logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| def download_model(): | |
| """Download the RoBERTa phishing detector model""" | |
| # Model configuration | |
| model_name = "songhieng/roberta-phishing-content-detector-5.0" | |
| local_model_path = "models/roberta-phishing-detector" | |
| # Create models directory | |
| Path(local_model_path).mkdir(parents=True, exist_ok=True) | |
| try: | |
| logger.info(f"Downloading model: {model_name}") | |
| # Download the model files using snapshot_download | |
| snapshot_download( | |
| repo_id=model_name, | |
| local_dir=local_model_path, | |
| local_dir_use_symlinks=False, | |
| ) | |
| logger.info("Model downloaded successfully using snapshot_download") | |
| # Verify the model can be loaded | |
| logger.info("Verifying model loading...") | |
| tokenizer = AutoTokenizer.from_pretrained(local_model_path, local_files_only=True) | |
| model = AutoModelForSequenceClassification.from_pretrained(local_model_path, local_files_only=True) | |
| logger.info("Model verification successful!") | |
| logger.info(f"Model saved to: {os.path.abspath(local_model_path)}") | |
| # Print model info | |
| logger.info(f"Model config: {model.config}") | |
| logger.info(f"Tokenizer vocab size: {len(tokenizer)}") | |
| return True | |
| except Exception as e: | |
| logger.error(f"Failed to download model: {str(e)}") | |
| return False | |
| if __name__ == "__main__": | |
| success = download_model() | |
| if not success: | |
| exit(1) | |
| logger.info("Model download completed successfully!") |