hue-portal-backend / set_hf_space_env_qwen.py
Davidtran99
chore: sync with main repo
3718c84
#!/usr/bin/env python3
"""
Script to set Qwen2.5-7B-Instruct Q4_K_M on Hugging Face Space.
Upgrade from Gemma 2-2B-it for better Vietnamese legal understanding.
Usage:
python3 set_hf_space_env_qwen.py
"""
import os
import sys
from pathlib import Path
try:
from huggingface_hub import HfApi
except ImportError:
print("❌ huggingface_hub not installed. Install with: pip install huggingface_hub")
sys.exit(1)
# Space configuration
SPACE_ID = "davidtran999/hue-portal-backend"
# Environment variables for Qwen2.5-7B-Instruct Q4_K_M
# Optimized for 2 vCPU + 16GB RAM free tier
ENV_VARS = {
"DEFAULT_LLM_PROVIDER": "llama_cpp",
"LLM_PROVIDER": "llama_cpp",
# Qwen2.5-7B-Instruct Q4_K_M (~4GB, best balance for free tier)
"LLAMA_CPP_MODEL_REPO": "bartowski/Qwen2.5-7B-Instruct-GGUF",
"LLAMA_CPP_MODEL_FILE": "Qwen2.5-7B-Instruct-Q4_K_M.gguf",
# Context: 2048 tokens (reduced from 4096 to speed up inference on free tier CPU)
# Qwen2.5-7B Q4_K_M supports up to 8192, but 2048 is optimal for 2 vCPU free tier
"LLAMA_CPP_CONTEXT": "2048",
"LLAMA_CPP_THREADS": "2",
"LLAMA_CPP_BATCH": "512", # Increased to 512 for faster prompt eval (was 256)
"LLAMA_CPP_MAX_TOKENS": "512",
"LLAMA_CPP_TEMPERATURE": "0.35",
"LLAMA_CPP_TOP_P": "0.85",
"LLAMA_CPP_REPEAT_PENALTY": "1.1",
"LLAMA_CPP_USE_MMAP": "true",
"LLAMA_CPP_USE_MLOCK": "true",
"RUN_HEAVY_STARTUP_TASKS": "0",
}
def main():
# Get HF token
hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN")
if not hf_token:
token_file = Path.home() / ".cache" / "huggingface" / "token"
if token_file.exists():
hf_token = token_file.read_text(encoding="utf-8").strip()
if not hf_token:
print("❌ HF_TOKEN not found.")
print("\n💡 Option 1: Set token as environment variable")
print(" export HF_TOKEN=your_token_here")
print(" python3 set_hf_space_env_qwen.py")
print("\n💡 Option 2: Login with Hugging Face CLI")
print(" huggingface-cli login")
print(" python3 set_hf_space_env_qwen.py")
sys.exit(1)
# Initialize API
api = HfApi(token=hf_token)
print(f"🚀 Upgrading to Qwen2.5-7B-Instruct Q4_K_M on Space: {SPACE_ID}")
print("=" * 60)
print("📊 Model specs:")
print(" - Size: ~4GB (downloads from HF, no storage limit)")
print(" - RAM: ~6-8GB (fits 16GB free tier)")
print(" - Expected latency: 7-9s on 2 vCPU")
print(" - Vietnamese legal: Excellent")
print("=" * 60)
for key, value in ENV_VARS.items():
try:
print(f"Setting {key}={value}...", end=" ")
api.delete_space_variable(repo_id=SPACE_ID, key=key)
except Exception:
pass # Ignore if variable doesn't exist yet
try:
api.add_space_variable(repo_id=SPACE_ID, key=key, value=str(value))
print("✅")
except Exception as exc:
print(f"❌ {exc}")
print("=" * 60)
print("✅ Config updated! Restarting Space...")
try:
api.restart_space(repo_id=SPACE_ID)
print("✅ Space restarted. Wait 2-3 minutes for model download & load.")
print("\n💡 Monitor logs at:")
print(f" https://huggingface.co/spaces/{SPACE_ID}/logs")
except Exception as exc:
print(f"⚠️ Config saved but restart failed: {exc}")
print(" Please restart Space manually from HF dashboard.")
if __name__ == "__main__":
main()