Forol commited on
Commit
81abd3e
·
verified ·
1 Parent(s): 4d38d55

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -0
app.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+ from huggingface_hub import hf_hub_download
4
+
5
+ # =========================================================================
6
+ # CONFIGURATION: Targets the exact repository and 4-bit model file
7
+ # =========================================================================
8
+ REPO_ID = "bartowski/google_gemma-3-4b-it-GGUF"
9
+ FILENAME = "google_gemma-3-4b-it-Q4_K_M.gguf"
10
+
11
+ print("Step 1: Downloading model weights from Hugging Face hub...")
12
+ # This fetches the file and caches it inside the space architecture
13
+ model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
14
+ print(f"Model successfully saved to cache area: {model_path}")
15
+
16
+ print("Step 2: Initializing OpenAI-Compatible Mock Engine server...")
17
+ # Setup execution parameters optimized to run fast inside 2 vCPUs and 16GB RAM
18
+ cmd = [
19
+ "python3", "-m", "llama_cpp.server",
20
+ "--model", model_path,
21
+ "--host", "0.0.0.0",
22
+ "--port", "7860", # Mandatory port required by Hugging Face to route traffic
23
+ "--n_ctx", "2048", # Context limit optimized for RAM protection
24
+ "--n_threads", "2" # Uses exactly the 2 free vCPUs allocated
25
+ ]
26
+
27
+ # Run server engine
28
+ subprocess.run(cmd)