Forol commited on
Commit
6535955
·
verified ·
1 Parent(s): 00ca394

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -37
app.py DELETED
@@ -1,37 +0,0 @@
1
- import os
2
- import subprocess
3
- import sys
4
-
5
- print("--- STEP 1: Installing Pre-Compiled llama-cpp-python Wheel (Fast Track) ---")
6
- # This forces pip to pull a ready-made binary instead of compiling it from C++ source
7
- subprocess.run([
8
- sys.executable, "-m", "pip", "install", "llama-cpp-python[server]",
9
- "--extra-index-url", "https://abetlen.github.io/llama-cpp-python/whl/cpu"
10
- ])
11
-
12
- # Now that it's installed, we can safely import huggingface tools
13
- from huggingface_hub import hf_hub_download
14
-
15
- # =========================================================================
16
- # CONFIGURATION: Targets the exact repository and 4-bit model file
17
- # =========================================================================
18
- REPO_ID = "bartowski/google_gemma-3-4b-it-GGUF"
19
- FILENAME = "google_gemma-3-4b-it-Q4_K_M.gguf"
20
-
21
- print("--- STEP 2: Downloading Gemma 3 4B Model Weights ---")
22
- model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
23
- print(f"Model successfully saved to cache area: {model_path}")
24
-
25
- print("--- STEP 3: Initializing OpenAI-Compatible Server ---")
26
- cmd = [
27
- "python3", "-m", "llama_cpp.server",
28
- "--model", model_path,
29
- "--model_alias", "gemma-3",
30
- "--host", "0.0.0.0",
31
- "--port", "7860", # Mandatory port required by Hugging Face
32
- "--n_ctx", "2048", # Context limit optimized for RAM protection
33
- "--n_threads", "2" # Uses exactly the 2 free vCPUs allocated
34
- ]
35
-
36
- # Run server engine
37
- subprocess.run(cmd)