Spaces:

Forol
/

gemma3-api-backend

Paused

Forol commited on 1 day ago

Commit

4de1dae

verified ·

1 Parent(s): c492b9c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,5 +1,15 @@
 import os
 import subprocess
 from huggingface_hub import hf_hub_download
 # =========================================================================
@@ -8,18 +18,16 @@ from huggingface_hub import hf_hub_download
 REPO_ID = "bartowski/google_gemma-3-4b-it-GGUF"
 FILENAME = "google_gemma-3-4b-it-Q4_K_M.gguf"
-print("Step 1: Downloading model weights from Hugging Face hub...")
-# This fetches the file and caches it inside the space architecture
 model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
 print(f"Model successfully saved to cache area: {model_path}")
-print("Step 2: Initializing OpenAI-Compatible Mock Engine server...")
-# Setup execution parameters optimized to run fast inside 2 vCPUs and 16GB RAM
 cmd = [
     "python3", "-m", "llama_cpp.server",
     "--model", model_path,
     "--host", "0.0.0.0",
-    "--port", "7860",       # Mandatory port required by Hugging Face to route traffic
     "--n_ctx", "2048",      # Context limit optimized for RAM protection
     "--n_threads", "2"      # Uses exactly the 2 free vCPUs allocated
 ]

 import os
 import subprocess
+import sys
+print("--- STEP 1: Installing Pre-Compiled llama-cpp-python Wheel (Fast Track) ---")
+# This forces pip to pull a ready-made binary instead of compiling it from C++ source
+subprocess.run([
+    sys.executable, "-m", "pip", "install", "llama-cpp-python[server]",
+    "--extra-index-url", "https://abetlen.github.io/llama-cpp-python/whl/cpu"
+])
+# Now that it's installed, we can safely import huggingface tools
 from huggingface_hub import hf_hub_download
 # =========================================================================
 REPO_ID = "bartowski/google_gemma-3-4b-it-GGUF"
 FILENAME = "google_gemma-3-4b-it-Q4_K_M.gguf"
+print("--- STEP 2: Downloading Gemma 3 4B Model Weights ---")
 model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
 print(f"Model successfully saved to cache area: {model_path}")
+print("--- STEP 3: Initializing OpenAI-Compatible Server ---")
 cmd = [
     "python3", "-m", "llama_cpp.server",
     "--model", model_path,
     "--host", "0.0.0.0",
+    "--port", "7860",       # Mandatory port required by Hugging Face
     "--n_ctx", "2048",      # Context limit optimized for RAM protection
     "--n_threads", "2"      # Uses exactly the 2 free vCPUs allocated
 ]