StarCoder2-3B / setup.py
AjinkyaPagare's picture
feat: configure StarCoder2 3B model deployment configurations and ports
3fbe92b
import os
import sys
import zipfile
import urllib.request
import requests
# Configuration
MODEL_DIR = "models"
BIN_DIR = "bin"
# Hugging Face URL for StarCoder2 3B Instruct Q4_K_M GGUF (approx 1.8 GB)
MODEL_URL = "https://huggingface.co/QuantFactory/starcoder2-3b-instruct-GGUF/resolve/main/starcoder2-3b-instruct.Q4_K_M.gguf"
MODEL_NAME = "starcoder2-3b-instruct.Q4_K_M.gguf"
MODEL_PATH = os.path.join(MODEL_DIR, MODEL_NAME)
# llama.cpp stable release b4834 for Windows AVX2 (Supports almost all modern CPUs)
LLAMA_ZIP_URL = "https://github.com/ggerganov/llama.cpp/releases/download/b4834/llama-b4834-bin-win-avx2-x64.zip"
LLAMA_ZIP_NAME = "llama_bin.zip"
def download_file(url, destination):
"""Downloads a file with progress reporting."""
print(f"Downloading from: {url}")
print(f"Saving to: {destination}")
response = requests.get(url, stream=True)
response.raise_for_status()
total_size = int(response.headers.get('content-length', 0))
block_size = 1024 * 1024 # 1 MB
downloaded = 0
with open(destination, 'wb') as f:
for chunk in response.iter_content(chunk_size=block_size):
if chunk:
f.write(chunk)
downloaded += len(chunk)
if total_size > 0:
percent = (downloaded / total_size) * 100
sys.stdout.write(f"\rProgress: {percent:.2f}% ({downloaded / (1024*1024):.1f} MB / {total_size / (1024*1024):.1f} MB)")
sys.stdout.flush()
print("\nDownload complete!\n")
def setup():
# 1. Create necessary directories
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(BIN_DIR, exist_ok=True)
print("Created project directories ('models/', 'bin/').")
# 2. Download the Qwen 2.5 Coder Model
if not os.path.exists(MODEL_PATH):
print(f"--- Model not found. Initiating download for '{MODEL_NAME}' ---")
try:
download_file(MODEL_URL, MODEL_PATH)
except Exception as e:
print(f"Error downloading model: {e}")
sys.exit(1)
else:
print(f"--- Model '{MODEL_NAME}' already exists. Skipping download. ---")
# 3. Download llama.cpp binary zip
exe_name = "llama-server.exe" if os.name == 'nt' else "llama-server"
llama_server_path = os.path.join(BIN_DIR, exe_name)
if not os.path.exists(llama_server_path):
print(f"--- Downloading llama.cpp pre-compiled AVX2 binaries ---")
zip_path = os.path.join(BIN_DIR, LLAMA_ZIP_NAME)
url = LLAMA_ZIP_URL if os.name == 'nt' else "https://github.com/ggerganov/llama.cpp/releases/download/b4834/llama-b4834-bin-ubuntu-x64.zip"
try:
download_file(url, zip_path)
print("Extracting binaries...")
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(BIN_DIR)
# Move extracted files to bin/ if nested
if os.name != 'nt':
import shutil
for root, dirs, files in os.walk(BIN_DIR):
for file in files:
if file == "llama-server" or file.endswith(".so"):
src = os.path.join(root, file)
dest = os.path.join(BIN_DIR, file)
if os.path.abspath(src) != os.path.abspath(dest):
shutil.copy2(src, dest)
# Apply executable permissions
if os.path.exists(llama_server_path):
os.chmod(llama_server_path, 0o755)
# Clean up the downloaded zip file
if os.path.exists(zip_path):
os.remove(zip_path)
print("Binaries extracted successfully. Zip file cleaned up.")
except Exception as e:
print(f"Error setting up llama.cpp: {e}")
if os.path.exists(zip_path):
os.remove(zip_path)
sys.exit(1)
else:
print("--- llama.cpp binaries already exist. Skipping setup. ---")
print("\n=======================================================")
print("Setup Successful!")
print(f"Model Path: {os.path.abspath(MODEL_PATH)}")
print(f"llama-server Path: {os.path.abspath(llama_server_path)}")
print("=======================================================\n")
print("You can now run 'python run.py' to start the system.")
if __name__ == "__main__":
setup()