File size: 6,162 Bytes
04ab625
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#!/usr/bin/env python3
"""
Download cutting-edge CPU-optimized models for production.
"""
import os
import requests
from pathlib import Path
import json
from huggingface_hub import snapshot_download, HfApi

MODELS_DIR = Path("models")
MODELS_DIR.mkdir(exist_ok=True)

# CPU-optimized models (small, fast, quantized)
MODELS_TO_DOWNLOAD = {
    # Ultra-fast CPU models
    "phi-2-gguf": {
        "repo_id": "microsoft/phi-2",
        "filename": "phi-2.Q4_K_M.gguf",  # 4-bit quantization
        "size_gb": 1.6,
        "tokens_per_sec": "~30-50",
        "description": "Microsoft Phi-2 GGUF (4-bit)"
    },
    "tinyllama-gguf": {
        "repo_id": "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
        "filename": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
        "size_gb": 0.8,
        "tokens_per_sec": "~50-80",
        "description": "TinyLlama 1.1B GGUF (4-bit)"
    },
    "qwen2-0.5b-gguf": {
        "repo_id": "Qwen/Qwen2.5-0.5B-Instruct-GGUF",
        "filename": "qwen2.5-0.5b-instruct-q4_0.gguf",
        "size_gb": 0.3,
        "tokens_per_sec": "~100-150",
        "description": "Qwen 2.5 0.5B GGUF (4-bit)"
    },
    # ONNX Runtime optimized models
    "bert-tiny-onnx": {
        "repo_id": "microsoft/bert-tiny",
        "files": ["model.onnx", "vocab.txt"],
        "type": "onnx",
        "description": "BERT-Tiny ONNX for ultra-fast embeddings"
    }
}

def download_model(model_name, model_info):
    """Download a specific model."""
    print(f"\n📥 Downloading {model_name}...")
    print(f"   Description: {model_info['description']}")
    
    target_dir = MODELS_DIR / model_name
    target_dir.mkdir(exist_ok=True)
    
    try:
        if model_info.get("type") == "onnx":
            # Download ONNX model
            api = HfApi()
            files = api.list_repo_files(model_info["repo_id"])
            
            for file in files:
                if any(f in file for f in model_info.get("files", [])):
                    print(f"   Downloading {file}...")
                    url = f"https://huggingface.co/{model_info['repo_id']}/resolve/main/{file}"
                    response = requests.get(url, stream=True)
                    response.raise_for_status()
                    
                    filepath = target_dir / file
                    with open(filepath, 'wb') as f:
                        for chunk in response.iter_content(chunk_size=8192):
                            f.write(chunk)
                    
                    print(f"   ✓ Downloaded {file} ({filepath.stat().st_size / 1024 / 1024:.1f}MB)")
        
        else:
            # Download GGUF model
            print(f"   Looking for {model_info['filename']}...")
            
            # Try to find the file in the repo
            api = HfApi()
            files = api.list_repo_files(model_info["repo_id"])
            
            gguf_files = [f for f in files if f.endswith('.gguf')]
            if gguf_files:
                # Get the specific file or first available
                target_file = model_info.get('filename')
                if target_file and target_file in gguf_files:
                    file_to_download = target_file
                else:
                    file_to_download = gguf_files[0]  # Get smallest
                
                print(f"   Found: {file_to_download}")
                
                url = f"https://huggingface.co/{model_info['repo_id']}/resolve/main/{file_to_download}"
                response = requests.get(url, stream=True)
                response.raise_for_status()
                
                filepath = target_dir / file_to_download
                total_size = int(response.headers.get('content-length', 0))
                
                with open(filepath, 'wb') as f:
                    downloaded = 0
                    for chunk in response.iter_content(chunk_size=8192):
                        f.write(chunk)
                        downloaded += len(chunk)
                        if total_size > 0:
                            percent = (downloaded / total_size) * 100
                            print(f"   Progress: {percent:.1f}%", end='\r')
                
                print(f"\n   ✓ Downloaded {file_to_download} ({filepath.stat().st_size / 1024 / 1024:.1f}MB)")
            else:
                print(f"   ⚠ No GGUF files found in repo")
    
    except Exception as e:
        print(f"   ❌ Error downloading {model_name}: {e}")

def main():
    print("=" * 60)
    print("🚀 DOWNLOADING CUTTING-EDGE CPU-OPTIMIZED MODELS")
    print("=" * 60)
    
    # Download selected models
    models_to_get = ["qwen2-0.5b-gguf", "bert-tiny-onnx"]  # Start with essentials
    
    for model_name in models_to_get:
        if model_name in MODELS_TO_DOWNLOAD:
            download_model(model_name, MODELS_TO_DOWNLOAD[model_name])
    
    # Create model registry
    registry = {
        "models": {},
        "download_timestamp": "2026-01-22",
        "total_size_gb": 0
    }
    
    for model_dir in MODELS_DIR.iterdir():
        if model_dir.is_dir():
            total_size = sum(f.stat().st_size for f in model_dir.rglob('*') if f.is_file())
            registry["models"][model_dir.name] = {
                "path": str(model_dir.relative_to(MODELS_DIR)),
                "size_mb": total_size / 1024 / 1024,
                "files": [f.name for f in model_dir.iterdir() if f.is_file()]
            }
            registry["total_size_gb"] += total_size / 1024 / 1024 / 1024
    
    # Save registry
    registry_file = MODELS_DIR / "model_registry.json"
    with open(registry_file, 'w') as f:
        json.dump(registry, f, indent=2)
    
    print(f"\n📋 Model registry saved to: {registry_file}")
    print(f"📦 Total models size: {registry['total_size_gb']:.2f} GB")
    print("\n✅ Model download complete!")
    print("\nNext steps:")
    print("1. Update config.py to use downloaded models")
    print("2. Run: python -c \"from app.llm_integration import CPUOptimizedLLM; llm = CPUOptimizedLLM(); llm.initialize()\"")
    print("3. Test with: python test_real_llm.py")

if __name__ == "__main__":
    main()