Spaces:
Sleeping
Sleeping
File size: 6,162 Bytes
04ab625 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
#!/usr/bin/env python3
"""
Download cutting-edge CPU-optimized models for production.
"""
import os
import requests
from pathlib import Path
import json
from huggingface_hub import snapshot_download, HfApi
MODELS_DIR = Path("models")
MODELS_DIR.mkdir(exist_ok=True)
# CPU-optimized models (small, fast, quantized)
MODELS_TO_DOWNLOAD = {
# Ultra-fast CPU models
"phi-2-gguf": {
"repo_id": "microsoft/phi-2",
"filename": "phi-2.Q4_K_M.gguf", # 4-bit quantization
"size_gb": 1.6,
"tokens_per_sec": "~30-50",
"description": "Microsoft Phi-2 GGUF (4-bit)"
},
"tinyllama-gguf": {
"repo_id": "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
"filename": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
"size_gb": 0.8,
"tokens_per_sec": "~50-80",
"description": "TinyLlama 1.1B GGUF (4-bit)"
},
"qwen2-0.5b-gguf": {
"repo_id": "Qwen/Qwen2.5-0.5B-Instruct-GGUF",
"filename": "qwen2.5-0.5b-instruct-q4_0.gguf",
"size_gb": 0.3,
"tokens_per_sec": "~100-150",
"description": "Qwen 2.5 0.5B GGUF (4-bit)"
},
# ONNX Runtime optimized models
"bert-tiny-onnx": {
"repo_id": "microsoft/bert-tiny",
"files": ["model.onnx", "vocab.txt"],
"type": "onnx",
"description": "BERT-Tiny ONNX for ultra-fast embeddings"
}
}
def download_model(model_name, model_info):
"""Download a specific model."""
print(f"\n📥 Downloading {model_name}...")
print(f" Description: {model_info['description']}")
target_dir = MODELS_DIR / model_name
target_dir.mkdir(exist_ok=True)
try:
if model_info.get("type") == "onnx":
# Download ONNX model
api = HfApi()
files = api.list_repo_files(model_info["repo_id"])
for file in files:
if any(f in file for f in model_info.get("files", [])):
print(f" Downloading {file}...")
url = f"https://huggingface.co/{model_info['repo_id']}/resolve/main/{file}"
response = requests.get(url, stream=True)
response.raise_for_status()
filepath = target_dir / file
with open(filepath, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
print(f" ✓ Downloaded {file} ({filepath.stat().st_size / 1024 / 1024:.1f}MB)")
else:
# Download GGUF model
print(f" Looking for {model_info['filename']}...")
# Try to find the file in the repo
api = HfApi()
files = api.list_repo_files(model_info["repo_id"])
gguf_files = [f for f in files if f.endswith('.gguf')]
if gguf_files:
# Get the specific file or first available
target_file = model_info.get('filename')
if target_file and target_file in gguf_files:
file_to_download = target_file
else:
file_to_download = gguf_files[0] # Get smallest
print(f" Found: {file_to_download}")
url = f"https://huggingface.co/{model_info['repo_id']}/resolve/main/{file_to_download}"
response = requests.get(url, stream=True)
response.raise_for_status()
filepath = target_dir / file_to_download
total_size = int(response.headers.get('content-length', 0))
with open(filepath, 'wb') as f:
downloaded = 0
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
downloaded += len(chunk)
if total_size > 0:
percent = (downloaded / total_size) * 100
print(f" Progress: {percent:.1f}%", end='\r')
print(f"\n ✓ Downloaded {file_to_download} ({filepath.stat().st_size / 1024 / 1024:.1f}MB)")
else:
print(f" ⚠ No GGUF files found in repo")
except Exception as e:
print(f" ❌ Error downloading {model_name}: {e}")
def main():
print("=" * 60)
print("🚀 DOWNLOADING CUTTING-EDGE CPU-OPTIMIZED MODELS")
print("=" * 60)
# Download selected models
models_to_get = ["qwen2-0.5b-gguf", "bert-tiny-onnx"] # Start with essentials
for model_name in models_to_get:
if model_name in MODELS_TO_DOWNLOAD:
download_model(model_name, MODELS_TO_DOWNLOAD[model_name])
# Create model registry
registry = {
"models": {},
"download_timestamp": "2026-01-22",
"total_size_gb": 0
}
for model_dir in MODELS_DIR.iterdir():
if model_dir.is_dir():
total_size = sum(f.stat().st_size for f in model_dir.rglob('*') if f.is_file())
registry["models"][model_dir.name] = {
"path": str(model_dir.relative_to(MODELS_DIR)),
"size_mb": total_size / 1024 / 1024,
"files": [f.name for f in model_dir.iterdir() if f.is_file()]
}
registry["total_size_gb"] += total_size / 1024 / 1024 / 1024
# Save registry
registry_file = MODELS_DIR / "model_registry.json"
with open(registry_file, 'w') as f:
json.dump(registry, f, indent=2)
print(f"\n📋 Model registry saved to: {registry_file}")
print(f"📦 Total models size: {registry['total_size_gb']:.2f} GB")
print("\n✅ Model download complete!")
print("\nNext steps:")
print("1. Update config.py to use downloaded models")
print("2. Run: python -c \"from app.llm_integration import CPUOptimizedLLM; llm = CPUOptimizedLLM(); llm.initialize()\"")
print("3. Test with: python test_real_llm.py")
if __name__ == "__main__":
main()
|