|
|
|
|
|
""" |
|
|
Script to set Qwen2.5-7B-Instruct Q4_K_M on Hugging Face Space. |
|
|
Upgrade from Gemma 2-2B-it for better Vietnamese legal understanding. |
|
|
|
|
|
Usage: |
|
|
python3 set_hf_space_env_qwen.py |
|
|
""" |
|
|
import os |
|
|
import sys |
|
|
from pathlib import Path |
|
|
|
|
|
try: |
|
|
from huggingface_hub import HfApi |
|
|
except ImportError: |
|
|
print("❌ huggingface_hub not installed. Install with: pip install huggingface_hub") |
|
|
sys.exit(1) |
|
|
|
|
|
|
|
|
SPACE_ID = "davidtran999/hue-portal-backend" |
|
|
|
|
|
|
|
|
|
|
|
ENV_VARS = { |
|
|
"DEFAULT_LLM_PROVIDER": "llama_cpp", |
|
|
"LLM_PROVIDER": "llama_cpp", |
|
|
|
|
|
"LLAMA_CPP_MODEL_REPO": "bartowski/Qwen2.5-7B-Instruct-GGUF", |
|
|
"LLAMA_CPP_MODEL_FILE": "Qwen2.5-7B-Instruct-Q4_K_M.gguf", |
|
|
|
|
|
|
|
|
"LLAMA_CPP_CONTEXT": "2048", |
|
|
"LLAMA_CPP_THREADS": "2", |
|
|
"LLAMA_CPP_BATCH": "512", |
|
|
"LLAMA_CPP_MAX_TOKENS": "512", |
|
|
"LLAMA_CPP_TEMPERATURE": "0.35", |
|
|
"LLAMA_CPP_TOP_P": "0.85", |
|
|
"LLAMA_CPP_REPEAT_PENALTY": "1.1", |
|
|
"LLAMA_CPP_USE_MMAP": "true", |
|
|
"LLAMA_CPP_USE_MLOCK": "true", |
|
|
"RUN_HEAVY_STARTUP_TASKS": "0", |
|
|
} |
|
|
|
|
|
def main(): |
|
|
|
|
|
hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN") |
|
|
if not hf_token: |
|
|
token_file = Path.home() / ".cache" / "huggingface" / "token" |
|
|
if token_file.exists(): |
|
|
hf_token = token_file.read_text(encoding="utf-8").strip() |
|
|
if not hf_token: |
|
|
print("❌ HF_TOKEN not found.") |
|
|
print("\n💡 Option 1: Set token as environment variable") |
|
|
print(" export HF_TOKEN=your_token_here") |
|
|
print(" python3 set_hf_space_env_qwen.py") |
|
|
print("\n💡 Option 2: Login with Hugging Face CLI") |
|
|
print(" huggingface-cli login") |
|
|
print(" python3 set_hf_space_env_qwen.py") |
|
|
sys.exit(1) |
|
|
|
|
|
|
|
|
api = HfApi(token=hf_token) |
|
|
|
|
|
print(f"🚀 Upgrading to Qwen2.5-7B-Instruct Q4_K_M on Space: {SPACE_ID}") |
|
|
print("=" * 60) |
|
|
print("📊 Model specs:") |
|
|
print(" - Size: ~4GB (downloads from HF, no storage limit)") |
|
|
print(" - RAM: ~6-8GB (fits 16GB free tier)") |
|
|
print(" - Expected latency: 7-9s on 2 vCPU") |
|
|
print(" - Vietnamese legal: Excellent") |
|
|
print("=" * 60) |
|
|
|
|
|
for key, value in ENV_VARS.items(): |
|
|
try: |
|
|
print(f"Setting {key}={value}...", end=" ") |
|
|
api.delete_space_variable(repo_id=SPACE_ID, key=key) |
|
|
except Exception: |
|
|
pass |
|
|
try: |
|
|
api.add_space_variable(repo_id=SPACE_ID, key=key, value=str(value)) |
|
|
print("✅") |
|
|
except Exception as exc: |
|
|
print(f"❌ {exc}") |
|
|
|
|
|
print("=" * 60) |
|
|
print("✅ Config updated! Restarting Space...") |
|
|
|
|
|
try: |
|
|
api.restart_space(repo_id=SPACE_ID) |
|
|
print("✅ Space restarted. Wait 2-3 minutes for model download & load.") |
|
|
print("\n💡 Monitor logs at:") |
|
|
print(f" https://huggingface.co/spaces/{SPACE_ID}/logs") |
|
|
except Exception as exc: |
|
|
print(f"⚠️ Config saved but restart failed: {exc}") |
|
|
print(" Please restart Space manually from HF dashboard.") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|
|
|
|