Forol commited on
Commit
4de1dae
·
verified ·
1 Parent(s): c492b9c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -5
app.py CHANGED
@@ -1,5 +1,15 @@
1
  import os
2
  import subprocess
 
 
 
 
 
 
 
 
 
 
3
  from huggingface_hub import hf_hub_download
4
 
5
  # =========================================================================
@@ -8,18 +18,16 @@ from huggingface_hub import hf_hub_download
8
  REPO_ID = "bartowski/google_gemma-3-4b-it-GGUF"
9
  FILENAME = "google_gemma-3-4b-it-Q4_K_M.gguf"
10
 
11
- print("Step 1: Downloading model weights from Hugging Face hub...")
12
- # This fetches the file and caches it inside the space architecture
13
  model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
14
  print(f"Model successfully saved to cache area: {model_path}")
15
 
16
- print("Step 2: Initializing OpenAI-Compatible Mock Engine server...")
17
- # Setup execution parameters optimized to run fast inside 2 vCPUs and 16GB RAM
18
  cmd = [
19
  "python3", "-m", "llama_cpp.server",
20
  "--model", model_path,
21
  "--host", "0.0.0.0",
22
- "--port", "7860", # Mandatory port required by Hugging Face to route traffic
23
  "--n_ctx", "2048", # Context limit optimized for RAM protection
24
  "--n_threads", "2" # Uses exactly the 2 free vCPUs allocated
25
  ]
 
1
  import os
2
  import subprocess
3
+ import sys
4
+
5
+ print("--- STEP 1: Installing Pre-Compiled llama-cpp-python Wheel (Fast Track) ---")
6
+ # This forces pip to pull a ready-made binary instead of compiling it from C++ source
7
+ subprocess.run([
8
+ sys.executable, "-m", "pip", "install", "llama-cpp-python[server]",
9
+ "--extra-index-url", "https://abetlen.github.io/llama-cpp-python/whl/cpu"
10
+ ])
11
+
12
+ # Now that it's installed, we can safely import huggingface tools
13
  from huggingface_hub import hf_hub_download
14
 
15
  # =========================================================================
 
18
  REPO_ID = "bartowski/google_gemma-3-4b-it-GGUF"
19
  FILENAME = "google_gemma-3-4b-it-Q4_K_M.gguf"
20
 
21
+ print("--- STEP 2: Downloading Gemma 3 4B Model Weights ---")
 
22
  model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
23
  print(f"Model successfully saved to cache area: {model_path}")
24
 
25
+ print("--- STEP 3: Initializing OpenAI-Compatible Server ---")
 
26
  cmd = [
27
  "python3", "-m", "llama_cpp.server",
28
  "--model", model_path,
29
  "--host", "0.0.0.0",
30
+ "--port", "7860", # Mandatory port required by Hugging Face
31
  "--n_ctx", "2048", # Context limit optimized for RAM protection
32
  "--n_threads", "2" # Uses exactly the 2 free vCPUs allocated
33
  ]