Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Download cutting-edge CPU-optimized models for production. | |
| """ | |
| import os | |
| import requests | |
| from pathlib import Path | |
| import json | |
| from huggingface_hub import snapshot_download, HfApi | |
| MODELS_DIR = Path("models") | |
| MODELS_DIR.mkdir(exist_ok=True) | |
| # CPU-optimized models (small, fast, quantized) | |
| MODELS_TO_DOWNLOAD = { | |
| # Ultra-fast CPU models | |
| "phi-2-gguf": { | |
| "repo_id": "microsoft/phi-2", | |
| "filename": "phi-2.Q4_K_M.gguf", # 4-bit quantization | |
| "size_gb": 1.6, | |
| "tokens_per_sec": "~30-50", | |
| "description": "Microsoft Phi-2 GGUF (4-bit)" | |
| }, | |
| "tinyllama-gguf": { | |
| "repo_id": "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF", | |
| "filename": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf", | |
| "size_gb": 0.8, | |
| "tokens_per_sec": "~50-80", | |
| "description": "TinyLlama 1.1B GGUF (4-bit)" | |
| }, | |
| "qwen2-0.5b-gguf": { | |
| "repo_id": "Qwen/Qwen2.5-0.5B-Instruct-GGUF", | |
| "filename": "qwen2.5-0.5b-instruct-q4_0.gguf", | |
| "size_gb": 0.3, | |
| "tokens_per_sec": "~100-150", | |
| "description": "Qwen 2.5 0.5B GGUF (4-bit)" | |
| }, | |
| # ONNX Runtime optimized models | |
| "bert-tiny-onnx": { | |
| "repo_id": "microsoft/bert-tiny", | |
| "files": ["model.onnx", "vocab.txt"], | |
| "type": "onnx", | |
| "description": "BERT-Tiny ONNX for ultra-fast embeddings" | |
| } | |
| } | |
| def download_model(model_name, model_info): | |
| """Download a specific model.""" | |
| print(f"\n📥 Downloading {model_name}...") | |
| print(f" Description: {model_info['description']}") | |
| target_dir = MODELS_DIR / model_name | |
| target_dir.mkdir(exist_ok=True) | |
| try: | |
| if model_info.get("type") == "onnx": | |
| # Download ONNX model | |
| api = HfApi() | |
| files = api.list_repo_files(model_info["repo_id"]) | |
| for file in files: | |
| if any(f in file for f in model_info.get("files", [])): | |
| print(f" Downloading {file}...") | |
| url = f"https://huggingface.co/{model_info['repo_id']}/resolve/main/{file}" | |
| response = requests.get(url, stream=True) | |
| response.raise_for_status() | |
| filepath = target_dir / file | |
| with open(filepath, 'wb') as f: | |
| for chunk in response.iter_content(chunk_size=8192): | |
| f.write(chunk) | |
| print(f" ✓ Downloaded {file} ({filepath.stat().st_size / 1024 / 1024:.1f}MB)") | |
| else: | |
| # Download GGUF model | |
| print(f" Looking for {model_info['filename']}...") | |
| # Try to find the file in the repo | |
| api = HfApi() | |
| files = api.list_repo_files(model_info["repo_id"]) | |
| gguf_files = [f for f in files if f.endswith('.gguf')] | |
| if gguf_files: | |
| # Get the specific file or first available | |
| target_file = model_info.get('filename') | |
| if target_file and target_file in gguf_files: | |
| file_to_download = target_file | |
| else: | |
| file_to_download = gguf_files[0] # Get smallest | |
| print(f" Found: {file_to_download}") | |
| url = f"https://huggingface.co/{model_info['repo_id']}/resolve/main/{file_to_download}" | |
| response = requests.get(url, stream=True) | |
| response.raise_for_status() | |
| filepath = target_dir / file_to_download | |
| total_size = int(response.headers.get('content-length', 0)) | |
| with open(filepath, 'wb') as f: | |
| downloaded = 0 | |
| for chunk in response.iter_content(chunk_size=8192): | |
| f.write(chunk) | |
| downloaded += len(chunk) | |
| if total_size > 0: | |
| percent = (downloaded / total_size) * 100 | |
| print(f" Progress: {percent:.1f}%", end='\r') | |
| print(f"\n ✓ Downloaded {file_to_download} ({filepath.stat().st_size / 1024 / 1024:.1f}MB)") | |
| else: | |
| print(f" ⚠ No GGUF files found in repo") | |
| except Exception as e: | |
| print(f" ❌ Error downloading {model_name}: {e}") | |
| def main(): | |
| print("=" * 60) | |
| print("🚀 DOWNLOADING CUTTING-EDGE CPU-OPTIMIZED MODELS") | |
| print("=" * 60) | |
| # Download selected models | |
| models_to_get = ["qwen2-0.5b-gguf", "bert-tiny-onnx"] # Start with essentials | |
| for model_name in models_to_get: | |
| if model_name in MODELS_TO_DOWNLOAD: | |
| download_model(model_name, MODELS_TO_DOWNLOAD[model_name]) | |
| # Create model registry | |
| registry = { | |
| "models": {}, | |
| "download_timestamp": "2026-01-22", | |
| "total_size_gb": 0 | |
| } | |
| for model_dir in MODELS_DIR.iterdir(): | |
| if model_dir.is_dir(): | |
| total_size = sum(f.stat().st_size for f in model_dir.rglob('*') if f.is_file()) | |
| registry["models"][model_dir.name] = { | |
| "path": str(model_dir.relative_to(MODELS_DIR)), | |
| "size_mb": total_size / 1024 / 1024, | |
| "files": [f.name for f in model_dir.iterdir() if f.is_file()] | |
| } | |
| registry["total_size_gb"] += total_size / 1024 / 1024 / 1024 | |
| # Save registry | |
| registry_file = MODELS_DIR / "model_registry.json" | |
| with open(registry_file, 'w') as f: | |
| json.dump(registry, f, indent=2) | |
| print(f"\n📋 Model registry saved to: {registry_file}") | |
| print(f"📦 Total models size: {registry['total_size_gb']:.2f} GB") | |
| print("\n✅ Model download complete!") | |
| print("\nNext steps:") | |
| print("1. Update config.py to use downloaded models") | |
| print("2. Run: python -c \"from app.llm_integration import CPUOptimizedLLM; llm = CPUOptimizedLLM(); llm.initialize()\"") | |
| print("3. Test with: python test_real_llm.py") | |
| if __name__ == "__main__": | |
| main() | |