File size: 6,994 Bytes
e725a4e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 | #!/usr/bin/env python3
"""Convert the StockEx CH Trader LoRA adapter to GGUF for Ollama.
Prerequisites:
pip install torch transformers peft huggingface_hub
git clone https://github.com/ggerganov/llama.cpp
cd llama.cpp && pip install -r requirements/requirements-convert_hf_to_gguf.txt
Usage:
python scripts/convert_to_ollama.py
This script will:
1. Download the base model (Qwen2.5-7B-Instruct)
2. Download the LoRA adapter (RayMelius/stockex-ch-trader)
3. Merge adapter into base model (CPU, ~16GB RAM needed)
4. Convert merged model to GGUF (Q4_K_M quantization)
5. Create and register an Ollama model
After running, use in StockEx with:
OLLAMA_HOST=http://localhost:11434 OLLAMA_MODEL=stockex-ch-trader
"""
import os
import sys
import shutil
import subprocess
import argparse
BASE_MODEL = "Qwen/Qwen2.5-7B-Instruct"
ADAPTER_REPO = "RayMelius/stockex-ch-trader"
OLLAMA_MODEL_NAME = "stockex-ch-trader"
QUANT = "Q4_K_M"
WORK_DIR = os.path.join(os.path.dirname(__file__), "..", "models")
MERGED_DIR = os.path.join(WORK_DIR, "merged")
GGUF_PATH = os.path.join(WORK_DIR, f"stockex-ch-trader-{QUANT}.gguf")
MODELFILE_PATH = os.path.join(WORK_DIR, "Modelfile")
SYSTEM_PROMPT = (
"You are a StockEx clearing house trading agent. "
"Given a member's financial state and live market data, "
"you output a single valid JSON trading decision that respects all capital and holdings constraints. "
"Never output anything other than the JSON object."
)
def step(n, msg):
print(f"\n{'='*60}")
print(f" Step {n}: {msg}")
print(f"{'='*60}\n")
def merge_adapter():
"""Download base model + adapter, merge, save to disk."""
step(1, f"Merging {ADAPTER_REPO} into {BASE_MODEL}")
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
print(f"Loading base model (CPU, float16)...")
base_model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL,
torch_dtype=torch.float16,
device_map="cpu",
trust_remote_code=True,
)
print(f"Loading adapter from {ADAPTER_REPO}...")
model = PeftModel.from_pretrained(base_model, ADAPTER_REPO)
print("Merging adapter weights...")
model = model.merge_and_unload()
os.makedirs(MERGED_DIR, exist_ok=True)
print(f"Saving merged model to {MERGED_DIR}...")
model.save_pretrained(MERGED_DIR)
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
tokenizer.save_pretrained(MERGED_DIR)
print("Merge complete.")
def convert_to_gguf(llama_cpp_dir):
"""Convert merged HF model to GGUF format."""
step(2, f"Converting to GGUF ({QUANT})")
convert_script = os.path.join(llama_cpp_dir, "convert_hf_to_gguf.py")
if not os.path.exists(convert_script):
print(f"ERROR: {convert_script} not found.")
print(f"Clone llama.cpp first: git clone https://github.com/ggerganov/llama.cpp")
sys.exit(1)
# First convert to f16 GGUF
f16_path = os.path.join(WORK_DIR, "stockex-ch-trader-f16.gguf")
cmd = [sys.executable, convert_script, MERGED_DIR, "--outfile", f16_path, "--outtype", "f16"]
print(f"Running: {' '.join(cmd)}")
subprocess.run(cmd, check=True)
# Then quantize
quantize_bin = os.path.join(llama_cpp_dir, "build", "bin", "llama-quantize")
if not os.path.exists(quantize_bin):
# Try alternative paths
for alt in ["llama-quantize", "quantize"]:
alt_path = os.path.join(llama_cpp_dir, "build", "bin", alt)
if os.path.exists(alt_path):
quantize_bin = alt_path
break
# Check if it's in PATH
if shutil.which(alt):
quantize_bin = alt
break
if os.path.exists(quantize_bin) or shutil.which(quantize_bin):
cmd = [quantize_bin, f16_path, GGUF_PATH, QUANT]
print(f"Quantizing: {' '.join(cmd)}")
subprocess.run(cmd, check=True)
os.remove(f16_path)
print(f"Quantized GGUF saved to {GGUF_PATH}")
else:
# No quantize binary — keep f16
os.rename(f16_path, GGUF_PATH)
print(f"llama-quantize not found, using f16 GGUF: {GGUF_PATH}")
print(f"To quantize manually: llama-quantize {GGUF_PATH} output.gguf {QUANT}")
def create_ollama_model():
"""Create Ollama Modelfile and register the model."""
step(3, "Creating Ollama model")
gguf_abs = os.path.abspath(GGUF_PATH)
modelfile_content = f"""FROM {gguf_abs}
SYSTEM \"\"\"{SYSTEM_PROMPT}\"\"\"
PARAMETER temperature 0.4
PARAMETER num_predict 100
PARAMETER stop "<|im_end|>"
PARAMETER stop "<|endoftext|>"
"""
with open(MODELFILE_PATH, "w") as f:
f.write(modelfile_content)
print(f"Modelfile written to {MODELFILE_PATH}")
# Check if Ollama is available
if not shutil.which("ollama"):
print("\nOllama not found in PATH. Install from https://ollama.com")
print(f"Then run manually:")
print(f" ollama create {OLLAMA_MODEL_NAME} -f {os.path.abspath(MODELFILE_PATH)}")
return
cmd = ["ollama", "create", OLLAMA_MODEL_NAME, "-f", MODELFILE_PATH]
print(f"Running: {' '.join(cmd)}")
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode == 0:
print(f"Ollama model '{OLLAMA_MODEL_NAME}' created successfully!")
print(f"\nTest it:")
print(f" ollama run {OLLAMA_MODEL_NAME}")
print(f"\nUse in StockEx docker-compose.yml:")
print(f" OLLAMA_HOST=http://host.docker.internal:11434")
print(f" OLLAMA_MODEL={OLLAMA_MODEL_NAME}")
else:
print(f"Ollama create failed: {result.stderr}")
print(f"Try manually: ollama create {OLLAMA_MODEL_NAME} -f {os.path.abspath(MODELFILE_PATH)}")
def main():
parser = argparse.ArgumentParser(description="Convert StockEx CH Trader to Ollama GGUF")
parser.add_argument("--llama-cpp", default=os.path.expanduser("~/llama.cpp"),
help="Path to llama.cpp repo (default: ~/llama.cpp)")
parser.add_argument("--skip-merge", action="store_true",
help="Skip merge step (use existing merged model)")
parser.add_argument("--skip-convert", action="store_true",
help="Skip GGUF conversion (use existing GGUF)")
args = parser.parse_args()
os.makedirs(WORK_DIR, exist_ok=True)
if not args.skip_merge:
merge_adapter()
else:
print(f"Skipping merge (using {MERGED_DIR})")
if not args.skip_convert:
convert_to_gguf(args.llama_cpp)
else:
print(f"Skipping conversion (using {GGUF_PATH})")
create_ollama_model()
print(f"\n{'='*60}")
print(f" DONE!")
print(f"{'='*60}")
print(f" Merged model : {MERGED_DIR}")
print(f" GGUF file : {GGUF_PATH}")
print(f" Ollama model : {OLLAMA_MODEL_NAME}")
print(f"{'='*60}\n")
if __name__ == "__main__":
main()
|