Spaces:

CrazyMonkey0
/

APi_English

Sleeping

CrazyMonkey0 commited on Dec 11, 2025

Commit

f7ec4f4

1 Parent(s): f45e402

fix(nlp): update Llama loading to use from_pretrained()

Files changed (2) hide show

Dockerfile CHANGED Viewed

@@ -1,5 +1,5 @@
 # Use full Python image for compatibility with prebuilt wheels
-FROM python:3.11-bullseye
 # Set workdir
 WORKDIR /app
@@ -14,8 +14,8 @@ RUN apt-get update && apt-get install -y \
 # Upgrade pip
 RUN pip install --upgrade pip
-# Install llama-cpp-python prebuilt wheel (CPU)
-RUN pip install llama-cpp-python --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
 # Copy project requirements and install
 COPY ./requirements.txt /app/requirements.txt

 # Use full Python image for compatibility with prebuilt wheels
+FROM python:3.12
 # Set workdir
 WORKDIR /app
 # Upgrade pip
 RUN pip install --upgrade pip
+# # Install llama-cpp-python prebuilt wheel (CPU)
+# RUN pip install llama-cpp-python --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
 # Copy project requirements and install
 COPY ./requirements.txt /app/requirements.txt

app/routes/nlp.py CHANGED Viewed

@@ -11,14 +11,14 @@ class ChatRequest(BaseModel):
 # Load model function
 def load_model_lama():
-    MODEL_PATH = "/app/models/Qwen3-8B-Q5_K_M.gguf"
-    url = "https://huggingface.co/Qwen/Qwen3-8B-GGUF/resolve/main/Qwen3-8B-Q5_K_M.gguf?download=true"
-    if not os.path.exists(MODEL_PATH):
-        os.makedirs("/app/models/", exist_ok=True)
-        os.system(f"wget -c {url} -O {MODEL_PATH}")
-    return Llama(model_path=MODEL_PATH, n_ctx=2048, n_threads=8, temperature=0.7, top_p=0.9)
 # FastAPI startup event (w main.py)
 # app.state.model_lama = load_model_lama()

 # Load model function
 def load_model_lama():
+    return Llama.from_pretrained(
+        repo_id="Qwen/Qwen3-8B-GGUF",
+        filename="Qwen3-8B-Q4_K_M.gguf",
+        n_ctx=2048,
+        n_threads=8,
+        temperature=0.7,
+        top_p=0.9,
+    )
 # FastAPI startup event (w main.py)
 # app.state.model_lama = load_model_lama()