Spaces:

Forol
/

gemma3-api-backend

Paused

Forol commited on 18 days ago

Commit

6535955

verified ·

1 Parent(s): 00ca394

Delete app.py

Files changed (1) hide show

app.py DELETED Viewed

@@ -1,37 +0,0 @@
-import os
-import subprocess
-import sys
-print("--- STEP 1: Installing Pre-Compiled llama-cpp-python Wheel (Fast Track) ---")
-# This forces pip to pull a ready-made binary instead of compiling it from C++ source
-subprocess.run([
-    sys.executable, "-m", "pip", "install", "llama-cpp-python[server]",
-    "--extra-index-url", "https://abetlen.github.io/llama-cpp-python/whl/cpu"
-])
-# Now that it's installed, we can safely import huggingface tools
-from huggingface_hub import hf_hub_download
-# =========================================================================
-# CONFIGURATION: Targets the exact repository and 4-bit model file
-# =========================================================================
-REPO_ID = "bartowski/google_gemma-3-4b-it-GGUF"
-FILENAME = "google_gemma-3-4b-it-Q4_K_M.gguf"
-print("--- STEP 2: Downloading Gemma 3 4B Model Weights ---")
-model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
-print(f"Model successfully saved to cache area: {model_path}")
-print("--- STEP 3: Initializing OpenAI-Compatible Server ---")
-cmd = [
-    "python3", "-m", "llama_cpp.server",
-    "--model", model_path,
-    "--model_alias", "gemma-3",
-    "--host", "0.0.0.0",
-    "--port", "7860",       # Mandatory port required by Hugging Face
-    "--n_ctx", "2048",      # Context limit optimized for RAM protection
-    "--n_threads", "2"      # Uses exactly the 2 free vCPUs allocated
-]
-# Run server engine
-subprocess.run(cmd)