File size: 4,485 Bytes
072e013
 
 
 
 
 
 
 
 
 
3fbe92b
 
 
072e013
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
045ac3e
 
072e013
 
 
045ac3e
072e013
045ac3e
072e013
 
 
 
 
045ac3e
 
 
 
 
 
 
 
 
 
 
 
 
 
072e013
045ac3e
 
072e013
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import os
import sys
import zipfile
import urllib.request
import requests

# Configuration
MODEL_DIR = "models"
BIN_DIR = "bin"

# Hugging Face URL for StarCoder2 3B Instruct Q4_K_M GGUF (approx 1.8 GB)
MODEL_URL = "https://huggingface.co/QuantFactory/starcoder2-3b-instruct-GGUF/resolve/main/starcoder2-3b-instruct.Q4_K_M.gguf"
MODEL_NAME = "starcoder2-3b-instruct.Q4_K_M.gguf"
MODEL_PATH = os.path.join(MODEL_DIR, MODEL_NAME)

# llama.cpp stable release b4834 for Windows AVX2 (Supports almost all modern CPUs)
LLAMA_ZIP_URL = "https://github.com/ggerganov/llama.cpp/releases/download/b4834/llama-b4834-bin-win-avx2-x64.zip"
LLAMA_ZIP_NAME = "llama_bin.zip"

def download_file(url, destination):
    """Downloads a file with progress reporting."""
    print(f"Downloading from: {url}")
    print(f"Saving to: {destination}")
    
    response = requests.get(url, stream=True)
    response.raise_for_status()
    
    total_size = int(response.headers.get('content-length', 0))
    block_size = 1024 * 1024  # 1 MB
    downloaded = 0
    
    with open(destination, 'wb') as f:
        for chunk in response.iter_content(chunk_size=block_size):
            if chunk:
                f.write(chunk)
                downloaded += len(chunk)
                if total_size > 0:
                    percent = (downloaded / total_size) * 100
                    sys.stdout.write(f"\rProgress: {percent:.2f}% ({downloaded / (1024*1024):.1f} MB / {total_size / (1024*1024):.1f} MB)")
                    sys.stdout.flush()
    print("\nDownload complete!\n")

def setup():
    # 1. Create necessary directories
    os.makedirs(MODEL_DIR, exist_ok=True)
    os.makedirs(BIN_DIR, exist_ok=True)
    print("Created project directories ('models/', 'bin/').")

    # 2. Download the Qwen 2.5 Coder Model
    if not os.path.exists(MODEL_PATH):
        print(f"--- Model not found. Initiating download for '{MODEL_NAME}' ---")
        try:
            download_file(MODEL_URL, MODEL_PATH)
        except Exception as e:
            print(f"Error downloading model: {e}")
            sys.exit(1)
    else:
        print(f"--- Model '{MODEL_NAME}' already exists. Skipping download. ---")

    # 3. Download llama.cpp binary zip
    exe_name = "llama-server.exe" if os.name == 'nt' else "llama-server"
    llama_server_path = os.path.join(BIN_DIR, exe_name)
    if not os.path.exists(llama_server_path):
        print(f"--- Downloading llama.cpp pre-compiled AVX2 binaries ---")
        zip_path = os.path.join(BIN_DIR, LLAMA_ZIP_NAME)
        url = LLAMA_ZIP_URL if os.name == 'nt' else "https://github.com/ggerganov/llama.cpp/releases/download/b4834/llama-b4834-bin-ubuntu-x64.zip"
        try:
            download_file(url, zip_path)
            
            print("Extracting binaries...")
            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                zip_ref.extractall(BIN_DIR)
            
            # Move extracted files to bin/ if nested
            if os.name != 'nt':
                import shutil
                for root, dirs, files in os.walk(BIN_DIR):
                    for file in files:
                        if file == "llama-server" or file.endswith(".so"):
                            src = os.path.join(root, file)
                            dest = os.path.join(BIN_DIR, file)
                            if os.path.abspath(src) != os.path.abspath(dest):
                                shutil.copy2(src, dest)
                # Apply executable permissions
                if os.path.exists(llama_server_path):
                    os.chmod(llama_server_path, 0o755)
            
            # Clean up the downloaded zip file
            if os.path.exists(zip_path):
                os.remove(zip_path)
            print("Binaries extracted successfully. Zip file cleaned up.")
        except Exception as e:
            print(f"Error setting up llama.cpp: {e}")
            if os.path.exists(zip_path):
                os.remove(zip_path)
            sys.exit(1)
    else:
        print("--- llama.cpp binaries already exist. Skipping setup. ---")

    print("\n=======================================================")
    print("Setup Successful!")
    print(f"Model Path: {os.path.abspath(MODEL_PATH)}")
    print(f"llama-server Path: {os.path.abspath(llama_server_path)}")
    print("=======================================================\n")
    print("You can now run 'python run.py' to start the system.")

if __name__ == "__main__":
    setup()