Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| CITADEL_OMEGA - Model Downloader | |
| Download ML models from HuggingFace for trading operations | |
| Author: Citadel Architect v25.0.OMNI+ | |
| """ | |
| import os | |
| import sys | |
| from pathlib import Path | |
| import json | |
| try: | |
| from huggingface_hub import snapshot_download, hf_hub_download | |
| except ImportError: | |
| print("β Error: huggingface_hub not installed") | |
| print(" Install with: pip install huggingface-hub") | |
| sys.exit(1) | |
| print("=" * 60) | |
| print("π€ CITADEL_OMEGA - ML Model Downloader") | |
| print("=" * 60) | |
| print() | |
| # Setup paths | |
| BASE_DIR = Path(__file__).parent.parent / "CITADEL_OMEGA" | |
| MODELS_DIR = BASE_DIR / "models" / "pretrained" | |
| MODELS_DIR.mkdir(parents=True, exist_ok=True) | |
| # Model registry | |
| MODELS = { | |
| "sentiment_analysis": [ | |
| { | |
| "name": "FinBERT", | |
| "repo_id": "ProsusAI/finbert", | |
| "local_dir": "finbert", | |
| "description": "Financial sentiment analysis (positive/negative/neutral)" | |
| }, | |
| { | |
| "name": "CryptoBERT", | |
| "repo_id": "ElKulako/cryptobert", | |
| "local_dir": "cryptobert", | |
| "description": "Cryptocurrency-specific sentiment analysis" | |
| }, | |
| { | |
| "name": "Twitter RoBERTa", | |
| "repo_id": "cardiffnlp/twitter-roberta-base-sentiment-latest", | |
| "local_dir": "twitter-roberta-sentiment", | |
| "description": "Twitter sentiment analysis" | |
| } | |
| ], | |
| "embeddings": [ | |
| { | |
| "name": "Sentence Transformers (MiniLM)", | |
| "repo_id": "sentence-transformers/all-MiniLM-L6-v2", | |
| "local_dir": "sentence-transformers-minilm", | |
| "description": "Fast sentence embeddings for RAG" | |
| }, | |
| { | |
| "name": "Sentence Transformers (MPNet)", | |
| "repo_id": "sentence-transformers/all-mpnet-base-v2", | |
| "local_dir": "sentence-transformers-mpnet", | |
| "description": "High-quality sentence embeddings" | |
| } | |
| ], | |
| "language_models": [ | |
| { | |
| "name": "DistilGPT2", | |
| "repo_id": "distilgpt2", | |
| "local_dir": "distilgpt2", | |
| "description": "Lightweight GPT-2 for text generation" | |
| }, | |
| { | |
| "name": "FLAN-T5 Small", | |
| "repo_id": "google/flan-t5-small", | |
| "local_dir": "flan-t5-small", | |
| "description": "Instruction-tuned T5 for Q&A" | |
| } | |
| ], | |
| "timeseries": [ | |
| { | |
| "name": "TimeGPT", | |
| "repo_id": "nixtla/timegpt-1", | |
| "local_dir": "timegpt", | |
| "description": "Time series forecasting", | |
| "skip": True # Requires authentication | |
| } | |
| ] | |
| } | |
| def download_model(repo_id: str, local_dir: str, description: str): | |
| """Download a model from HuggingFace""" | |
| target_path = MODELS_DIR / local_dir | |
| if target_path.exists(): | |
| print(f"βοΈ {local_dir} already exists, skipping...") | |
| return True | |
| try: | |
| print(f"π₯ Downloading {local_dir}...") | |
| print(f" Repo: {repo_id}") | |
| print(f" Description: {description}") | |
| snapshot_download( | |
| repo_id=repo_id, | |
| local_dir=str(target_path), | |
| local_dir_use_symlinks=False | |
| ) | |
| print(f"β {local_dir} downloaded successfully!") | |
| print() | |
| return True | |
| except Exception as e: | |
| print(f"β Error downloading {local_dir}: {e}") | |
| print() | |
| return False | |
| def main(): | |
| """Main download orchestration""" | |
| # Check for HF token (optional) | |
| hf_token = os.getenv("HF_TOKEN") | |
| if hf_token: | |
| print("π HuggingFace token detected") | |
| else: | |
| print("β οΈ No HF_TOKEN found - some models may require authentication") | |
| print() | |
| # Create models directory | |
| print(f"π Models directory: {MODELS_DIR}") | |
| print() | |
| # Track results | |
| total_models = 0 | |
| downloaded = 0 | |
| failed = 0 | |
| skipped = 0 | |
| # Download each category | |
| for category, models_list in MODELS.items(): | |
| print("-" * 60) | |
| print(f"π¦ Category: {category.upper()}") | |
| print("-" * 60) | |
| print() | |
| for model in models_list: | |
| total_models += 1 | |
| if model.get("skip", False): | |
| print(f"βοΈ Skipping {model['name']} (requires special auth)") | |
| skipped += 1 | |
| print() | |
| continue | |
| success = download_model( | |
| repo_id=model["repo_id"], | |
| local_dir=model["local_dir"], | |
| description=model["description"] | |
| ) | |
| if success: | |
| downloaded += 1 | |
| else: | |
| failed += 1 | |
| # Create model registry | |
| registry = { | |
| "version": "1.0.0", | |
| "downloaded_models": [], | |
| "categories": MODELS | |
| } | |
| # List downloaded models | |
| for category, models_list in MODELS.items(): | |
| for model in models_list: | |
| target_path = MODELS_DIR / model["local_dir"] | |
| if target_path.exists(): | |
| registry["downloaded_models"].append({ | |
| "name": model["name"], | |
| "category": category, | |
| "repo_id": model["repo_id"], | |
| "local_path": str(target_path), | |
| "description": model["description"] | |
| }) | |
| # Save registry | |
| registry_path = MODELS_DIR.parent / "model_registry.json" | |
| with open(registry_path, 'w') as f: | |
| json.dump(registry, f, indent=2) | |
| print("=" * 60) | |
| print("β Model Download Complete!") | |
| print("=" * 60) | |
| print() | |
| print("π Summary:") | |
| print(f" Total models: {total_models}") | |
| print(f" Downloaded: {downloaded}") | |
| print(f" Failed: {failed}") | |
| print(f" Skipped: {skipped}") | |
| print() | |
| print(f"π Models location: {MODELS_DIR}") | |
| print(f"π Registry saved: {registry_path}") | |
| print() | |
| print("π― Next Steps:") | |
| print(" 1. Test models: python scripts/test_models.py") | |
| print(" 2. Setup RAG: python omega_archive/rag_system/rag_engine.py") | |
| print(" 3. Train custom models: python tools/model_trainers/lstm_trainer.py") | |
| print() | |
| if __name__ == "__main__": | |
| main() | |