CrazyMonkey0 commited on
Commit
f7ec4f4
·
1 Parent(s): f45e402

fix(nlp): update Llama loading to use from_pretrained()

Browse files
Files changed (2) hide show
  1. Dockerfile +3 -3
  2. app/routes/nlp.py +8 -8
Dockerfile CHANGED
@@ -1,5 +1,5 @@
1
  # Use full Python image for compatibility with prebuilt wheels
2
- FROM python:3.11-bullseye
3
 
4
  # Set workdir
5
  WORKDIR /app
@@ -14,8 +14,8 @@ RUN apt-get update && apt-get install -y \
14
  # Upgrade pip
15
  RUN pip install --upgrade pip
16
 
17
- # Install llama-cpp-python prebuilt wheel (CPU)
18
- RUN pip install llama-cpp-python --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
19
 
20
  # Copy project requirements and install
21
  COPY ./requirements.txt /app/requirements.txt
 
1
  # Use full Python image for compatibility with prebuilt wheels
2
+ FROM python:3.12
3
 
4
  # Set workdir
5
  WORKDIR /app
 
14
  # Upgrade pip
15
  RUN pip install --upgrade pip
16
 
17
+ # # Install llama-cpp-python prebuilt wheel (CPU)
18
+ # RUN pip install llama-cpp-python --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
19
 
20
  # Copy project requirements and install
21
  COPY ./requirements.txt /app/requirements.txt
app/routes/nlp.py CHANGED
@@ -11,14 +11,14 @@ class ChatRequest(BaseModel):
11
 
12
  # Load model function
13
  def load_model_lama():
14
- MODEL_PATH = "/app/models/Qwen3-8B-Q5_K_M.gguf"
15
- url = "https://huggingface.co/Qwen/Qwen3-8B-GGUF/resolve/main/Qwen3-8B-Q5_K_M.gguf?download=true"
16
-
17
- if not os.path.exists(MODEL_PATH):
18
- os.makedirs("/app/models/", exist_ok=True)
19
- os.system(f"wget -c {url} -O {MODEL_PATH}")
20
-
21
- return Llama(model_path=MODEL_PATH, n_ctx=2048, n_threads=8, temperature=0.7, top_p=0.9)
22
 
23
  # FastAPI startup event (w main.py)
24
  # app.state.model_lama = load_model_lama()
 
11
 
12
  # Load model function
13
  def load_model_lama():
14
+ return Llama.from_pretrained(
15
+ repo_id="Qwen/Qwen3-8B-GGUF",
16
+ filename="Qwen3-8B-Q4_K_M.gguf",
17
+ n_ctx=2048,
18
+ n_threads=8,
19
+ temperature=0.7,
20
+ top_p=0.9,
21
+ )
22
 
23
  # FastAPI startup event (w main.py)
24
  # app.state.model_lama = load_model_lama()