Spaces:

Forol
/

gemma3-api-backend

Paused

Forol commited on about 19 hours ago

Commit

81abd3e

verified ·

1 Parent(s): 4d38d55

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+import os
+import subprocess
+from huggingface_hub import hf_hub_download
+# =========================================================================
+# CONFIGURATION: Targets the exact repository and 4-bit model file
+# =========================================================================
+REPO_ID = "bartowski/google_gemma-3-4b-it-GGUF"
+FILENAME = "google_gemma-3-4b-it-Q4_K_M.gguf"
+print("Step 1: Downloading model weights from Hugging Face hub...")
+# This fetches the file and caches it inside the space architecture
+model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
+print(f"Model successfully saved to cache area: {model_path}")
+print("Step 2: Initializing OpenAI-Compatible Mock Engine server...")
+# Setup execution parameters optimized to run fast inside 2 vCPUs and 16GB RAM
+cmd = [
+    "python3", "-m", "llama_cpp.server",
+    "--model", model_path,
+    "--host", "0.0.0.0",
+    "--port", "7860",       # Mandatory port required by Hugging Face to route traffic
+    "--n_ctx", "2048",      # Context limit optimized for RAM protection
+    "--n_threads", "2"      # Uses exactly the 2 free vCPUs allocated
+]
+# Run server engine
+subprocess.run(cmd)