davidtran999 commited on
Commit
30313fe
·
verified ·
1 Parent(s): 56c4c1c

Upload backend/hue_portal/hue-portal-backendDocker/set_14b_model.py with huggingface_hub

Browse files
backend/hue_portal/hue-portal-backendDocker/set_14b_model.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Script to set up Qwen 2.5 14B with 4-bit quantization on Hugging Face Spaces.
4
+ 14B model is smaller and faster than 32B, good balance between quality and speed.
5
+ """
6
+
7
+ import os
8
+ from huggingface_hub import HfApi
9
+ from huggingface_hub.utils import HfFolder
10
+
11
+ def get_hf_token():
12
+ """Get Hugging Face token from cache."""
13
+ token = HfFolder.get_token()
14
+ if not token:
15
+ print("❌ No Hugging Face token found!")
16
+ print("💡 Run: huggingface-cli login")
17
+ return None
18
+ return token
19
+
20
+ def set_secrets(api, repo_id, secrets):
21
+ """Set secrets on Hugging Face Space."""
22
+ try:
23
+ # Delete existing secrets first to avoid collisions
24
+ existing_secrets = api.get_space_variables(repo_id=repo_id, token=api.token)
25
+ existing_secret_names = [s.key for s in existing_secrets if hasattr(s, 'key')]
26
+
27
+ for secret_name in secrets.keys():
28
+ if secret_name in existing_secret_names:
29
+ try:
30
+ api.delete_space_variable(repo_id=repo_id, key=secret_name, token=api.token)
31
+ print(f"🗑️ Deleted existing secret: {secret_name}")
32
+ except Exception as e:
33
+ print(f"⚠️ Could not delete {secret_name}: {e}")
34
+
35
+ # Add new secrets
36
+ for key, value in secrets.items():
37
+ api.add_space_secret(repo_id=repo_id, key=key, value=value, token=api.token)
38
+ print(f"✅ Set secret: {key}")
39
+
40
+ return True
41
+ except Exception as e:
42
+ print(f"❌ Error setting secrets: {e}")
43
+ return False
44
+
45
+ def main():
46
+ repo_id = "davidtran999/hue-portal-backend"
47
+
48
+ print("🚀 Setting up Qwen 2.5 14B with 4-bit quantization on HF Space")
49
+ print(f"📦 Repository: {repo_id}\n")
50
+
51
+ # Get token
52
+ token = get_hf_token()
53
+ if not token:
54
+ return
55
+
56
+ api = HfApi(token=token)
57
+
58
+ # Configuration for 4-bit Qwen 2.5 14B
59
+ secrets = {
60
+ "LLM_PROVIDER": "local",
61
+ "LOCAL_MODEL_PATH": "Qwen/Qwen2.5-14B-Instruct",
62
+ "LOCAL_MODEL_DEVICE": "cuda",
63
+ "LOCAL_MODEL_4BIT": "true"
64
+ }
65
+
66
+ print("📋 Configuration:")
67
+ for key, value in secrets.items():
68
+ print(f" {key}={value}")
69
+ print()
70
+
71
+ # Set secrets
72
+ if set_secrets(api, repo_id, secrets):
73
+ print("\n✅ Successfully configured 4-bit Qwen 2.5 14B!")
74
+ print("\n📊 Expected Memory Usage:")
75
+ print(" - VRAM: ~6-8GB (với 4-bit)")
76
+ print(" - RAM: ~12GB+")
77
+ print(" - Quality: ⭐⭐⭐⭐ (very good)")
78
+ print(" - Speed: ⚡⚡⚡ (faster than 32B)")
79
+ print("\n⚠️ Note: Model will be downloaded on first run (~30GB)")
80
+ print(" First load may take 10-20 minutes.")
81
+ print("\n🔄 Rebuild your HF Space to apply changes!")
82
+ print(" Or wait for auto-rebuild if enabled.")
83
+ else:
84
+ print("\n❌ Failed to set secrets")
85
+
86
+ if __name__ == "__main__":
87
+ main()
88
+
89
+
90
+