File size: 1,091 Bytes
728fdc9 cd6c204 728fdc9 453d3bf e97f198 453d3bf e97f198 728fdc9 e97f198 728fdc9 72cb8c0 453d3bf f3ba1cb cd6c204 72cb8c0 f3ba1cb 453d3bf 728fdc9 453d3bf f3ba1cb e97f198 453d3bf 728fdc9 e97f198 453d3bf 72cb8c0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 | from huggingface_hub import hf_hub_download
import os
import sys
print("β¬ Downloading Roy model (IQ4_XS β optimized)...")
# ===== MODEL CHANGED HERE =====
MODEL_FILE = "Roy-v1.IQ4_XS.gguf"
hf_hub_download(
repo_id="mradermacher/Roy-v1-GGUF",
filename=MODEL_FILE,
local_dir="/app"
)
print("π Files in /app:")
for root, dirs, files in os.walk("/app"):
for f in files:
print(os.path.join(root, f))
# Auto-detect server binary
server_path = None
for root, dirs, files in os.walk("/app"):
for f in files:
if f in ["server", "llama-server"]:
server_path = os.path.join(root, f)
if not server_path:
print("β Server binary not found")
sys.exit(1)
print("π Launching:", server_path)
# ===== PERFORMANCE TUNING ADDED =====
os.execv(server_path, [
server_path,
"-m", f"/app/{MODEL_FILE}",
"--host", "0.0.0.0",
"--port", "7860",
# Speed settings
"--ctx-size", "256", # smaller context = faster
"--n-predict", "120", # limit response length
"--threads", "4", # match HF CPU
])
|