3v324v23 commited on
Commit
7b9c753
Β·
1 Parent(s): a70fe97

Fix Ollama build error and use llama3 model

Browse files
Dockerfile CHANGED
@@ -1,7 +1,7 @@
1
  FROM python:3.11-slim
2
 
3
  # Install necessary tools
4
- RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
5
 
6
  # Install Ollama
7
  RUN curl -fsSL https://ollama.com/install.sh | sh
 
1
  FROM python:3.11-slim
2
 
3
  # Install necessary tools
4
+ RUN apt-get update && apt-get install -y curl zstd && rm -rf /var/lib/apt/lists/*
5
 
6
  # Install Ollama
7
  RUN curl -fsSL https://ollama.com/install.sh | sh
entrypoint.sh CHANGED
@@ -10,10 +10,9 @@ sleep 5
10
  echo "Pulling nomic-embed-text..."
11
  ollama pull nomic-embed-text
12
 
13
- # NOTE: The 20B model used for `gpt-oss:20b-cloud` may exceed Hugging Face Free Tier memory/storage limits.
14
- # Make sure your HF Space has the hardware to support this local LLM, or swap it for a smaller one like `llama3` or `mistral`.
15
- echo "Pulling gpt-oss:20b-cloud... (This might fail if it's a custom local model or exceeds HF limits)"
16
- # ollama pull gpt-oss:20b-cloud
17
 
18
  # Start the FastAPI server on port 7860 (default for HF Spaces)
19
  echo "Starting Application..."
 
10
  echo "Pulling nomic-embed-text..."
11
  ollama pull nomic-embed-text
12
 
13
+ # Pull the small LLM for generating responses (llama3.2:1b)
14
+ echo "Pulling llama3.2:1b... (Lightweight model for HF Free Tier)"
15
+ ollama pull llama3.2:1b
 
16
 
17
  # Start the FastAPI server on port 7860 (default for HF Spaces)
18
  echo "Starting Application..."
src/page_rag/llm_engine.py CHANGED
@@ -10,7 +10,7 @@ import ollama
10
  from .retriever import RetrievedPage, build_context
11
 
12
  # ─── Config ────────────────────────────────────────────────────────────────────
13
- LLM_MODEL = "gpt-oss:20b-cloud" # your local model name in Ollama
14
  # ───────────────────────────────────────────────────────────────────────────────
15
 
16
  SYSTEM_PROMPT = """You are a helpful document assistant.
 
10
  from .retriever import RetrievedPage, build_context
11
 
12
  # ─── Config ────────────────────────────────────────────────────────────────────
13
+ LLM_MODEL = "llama3.2:1b" # your local model name in Ollama
14
  # ───────────────────────────────────────────────────────────────────────────────
15
 
16
  SYSTEM_PROMPT = """You are a helpful document assistant.
src/vector_rag/llm_engine.py CHANGED
@@ -9,7 +9,7 @@ from typing import Generator
9
  import ollama
10
  from .retriever import RetrievedChunk, build_context
11
 
12
- LLM_MODEL = "gpt-oss:20b-cloud"
13
 
14
  SYSTEM_PROMPT = """You are a precise document assistant.
15
  Answer the user's question using ONLY the provided context chunks.
 
9
  import ollama
10
  from .retriever import RetrievedChunk, build_context
11
 
12
+ LLM_MODEL = "llama3.2:1b"
13
 
14
  SYSTEM_PROMPT = """You are a precise document assistant.
15
  Answer the user's question using ONLY the provided context chunks.