Spaces:

souvik18
/

roy-v1

Sleeping

roy-v1 / start.py

Update start.py

72cb8c0 verified 27 days ago

1.09 kB

	from huggingface_hub import hf_hub_download
	import os
	import sys

	print("⬇ Downloading Roy model (IQ4_XS – optimized)...")

	# ===== MODEL CHANGED HERE =====
	MODEL_FILE = "Roy-v1.IQ4_XS.gguf"

	hf_hub_download(
	repo_id="mradermacher/Roy-v1-GGUF",
	filename=MODEL_FILE,
	local_dir="/app"
	)

	print("📁 Files in /app:")
	for root, dirs, files in os.walk("/app"):
	for f in files:
	print(os.path.join(root, f))

	# Auto-detect server binary
	server_path = None
	for root, dirs, files in os.walk("/app"):
	for f in files:
	if f in ["server", "llama-server"]:
	server_path = os.path.join(root, f)

	if not server_path:
	print("❌ Server binary not found")
	sys.exit(1)

	print("🚀 Launching:", server_path)

	# ===== PERFORMANCE TUNING ADDED =====
	os.execv(server_path, [
	server_path,
	"-m", f"/app/{MODEL_FILE}",

	"--host", "0.0.0.0",
	"--port", "7860",

	# Speed settings
	"--ctx-size", "256", # smaller context = faster
	"--n-predict", "120", # limit response length
	"--threads", "4", # match HF CPU
	])