roy-v1 / start.py
souvik18's picture
Update start.py
72cb8c0 verified
from huggingface_hub import hf_hub_download
import os
import sys
print("⬇ Downloading Roy model (IQ4_XS – optimized)...")
# ===== MODEL CHANGED HERE =====
MODEL_FILE = "Roy-v1.IQ4_XS.gguf"
hf_hub_download(
repo_id="mradermacher/Roy-v1-GGUF",
filename=MODEL_FILE,
local_dir="/app"
)
print("πŸ“ Files in /app:")
for root, dirs, files in os.walk("/app"):
for f in files:
print(os.path.join(root, f))
# Auto-detect server binary
server_path = None
for root, dirs, files in os.walk("/app"):
for f in files:
if f in ["server", "llama-server"]:
server_path = os.path.join(root, f)
if not server_path:
print("❌ Server binary not found")
sys.exit(1)
print("πŸš€ Launching:", server_path)
# ===== PERFORMANCE TUNING ADDED =====
os.execv(server_path, [
server_path,
"-m", f"/app/{MODEL_FILE}",
"--host", "0.0.0.0",
"--port", "7860",
# Speed settings
"--ctx-size", "256", # smaller context = faster
"--n-predict", "120", # limit response length
"--threads", "4", # match HF CPU
])