#!/usr/bin/env python3 """ Download cutting-edge CPU-optimized models for production. """ import os import requests from pathlib import Path import json from huggingface_hub import snapshot_download, HfApi MODELS_DIR = Path("models") MODELS_DIR.mkdir(exist_ok=True) # CPU-optimized models (small, fast, quantized) MODELS_TO_DOWNLOAD = { # Ultra-fast CPU models "phi-2-gguf": { "repo_id": "microsoft/phi-2", "filename": "phi-2.Q4_K_M.gguf", # 4-bit quantization "size_gb": 1.6, "tokens_per_sec": "~30-50", "description": "Microsoft Phi-2 GGUF (4-bit)" }, "tinyllama-gguf": { "repo_id": "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF", "filename": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf", "size_gb": 0.8, "tokens_per_sec": "~50-80", "description": "TinyLlama 1.1B GGUF (4-bit)" }, "qwen2-0.5b-gguf": { "repo_id": "Qwen/Qwen2.5-0.5B-Instruct-GGUF", "filename": "qwen2.5-0.5b-instruct-q4_0.gguf", "size_gb": 0.3, "tokens_per_sec": "~100-150", "description": "Qwen 2.5 0.5B GGUF (4-bit)" }, # ONNX Runtime optimized models "bert-tiny-onnx": { "repo_id": "microsoft/bert-tiny", "files": ["model.onnx", "vocab.txt"], "type": "onnx", "description": "BERT-Tiny ONNX for ultra-fast embeddings" } } def download_model(model_name, model_info): """Download a specific model.""" print(f"\n📥 Downloading {model_name}...") print(f" Description: {model_info['description']}") target_dir = MODELS_DIR / model_name target_dir.mkdir(exist_ok=True) try: if model_info.get("type") == "onnx": # Download ONNX model api = HfApi() files = api.list_repo_files(model_info["repo_id"]) for file in files: if any(f in file for f in model_info.get("files", [])): print(f" Downloading {file}...") url = f"https://huggingface.co/{model_info['repo_id']}/resolve/main/{file}" response = requests.get(url, stream=True) response.raise_for_status() filepath = target_dir / file with open(filepath, 'wb') as f: for chunk in response.iter_content(chunk_size=8192): f.write(chunk) print(f" ✓ Downloaded {file} ({filepath.stat().st_size / 1024 / 1024:.1f}MB)") else: # Download GGUF model print(f" Looking for {model_info['filename']}...") # Try to find the file in the repo api = HfApi() files = api.list_repo_files(model_info["repo_id"]) gguf_files = [f for f in files if f.endswith('.gguf')] if gguf_files: # Get the specific file or first available target_file = model_info.get('filename') if target_file and target_file in gguf_files: file_to_download = target_file else: file_to_download = gguf_files[0] # Get smallest print(f" Found: {file_to_download}") url = f"https://huggingface.co/{model_info['repo_id']}/resolve/main/{file_to_download}" response = requests.get(url, stream=True) response.raise_for_status() filepath = target_dir / file_to_download total_size = int(response.headers.get('content-length', 0)) with open(filepath, 'wb') as f: downloaded = 0 for chunk in response.iter_content(chunk_size=8192): f.write(chunk) downloaded += len(chunk) if total_size > 0: percent = (downloaded / total_size) * 100 print(f" Progress: {percent:.1f}%", end='\r') print(f"\n ✓ Downloaded {file_to_download} ({filepath.stat().st_size / 1024 / 1024:.1f}MB)") else: print(f" ⚠ No GGUF files found in repo") except Exception as e: print(f" ❌ Error downloading {model_name}: {e}") def main(): print("=" * 60) print("🚀 DOWNLOADING CUTTING-EDGE CPU-OPTIMIZED MODELS") print("=" * 60) # Download selected models models_to_get = ["qwen2-0.5b-gguf", "bert-tiny-onnx"] # Start with essentials for model_name in models_to_get: if model_name in MODELS_TO_DOWNLOAD: download_model(model_name, MODELS_TO_DOWNLOAD[model_name]) # Create model registry registry = { "models": {}, "download_timestamp": "2026-01-22", "total_size_gb": 0 } for model_dir in MODELS_DIR.iterdir(): if model_dir.is_dir(): total_size = sum(f.stat().st_size for f in model_dir.rglob('*') if f.is_file()) registry["models"][model_dir.name] = { "path": str(model_dir.relative_to(MODELS_DIR)), "size_mb": total_size / 1024 / 1024, "files": [f.name for f in model_dir.iterdir() if f.is_file()] } registry["total_size_gb"] += total_size / 1024 / 1024 / 1024 # Save registry registry_file = MODELS_DIR / "model_registry.json" with open(registry_file, 'w') as f: json.dump(registry, f, indent=2) print(f"\n📋 Model registry saved to: {registry_file}") print(f"📦 Total models size: {registry['total_size_gb']:.2f} GB") print("\n✅ Model download complete!") print("\nNext steps:") print("1. Update config.py to use downloaded models") print("2. Run: python -c \"from app.llm_integration import CPUOptimizedLLM; llm = CPUOptimizedLLM(); llm.initialize()\"") print("3. Test with: python test_real_llm.py") if __name__ == "__main__": main()