Fix Ollama build error and use llama3 model
Browse files- Dockerfile +1 -1
- entrypoint.sh +3 -4
- src/page_rag/llm_engine.py +1 -1
- src/vector_rag/llm_engine.py +1 -1
Dockerfile
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
FROM python:3.11-slim
|
| 2 |
|
| 3 |
# Install necessary tools
|
| 4 |
-
RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
|
| 5 |
|
| 6 |
# Install Ollama
|
| 7 |
RUN curl -fsSL https://ollama.com/install.sh | sh
|
|
|
|
| 1 |
FROM python:3.11-slim
|
| 2 |
|
| 3 |
# Install necessary tools
|
| 4 |
+
RUN apt-get update && apt-get install -y curl zstd && rm -rf /var/lib/apt/lists/*
|
| 5 |
|
| 6 |
# Install Ollama
|
| 7 |
RUN curl -fsSL https://ollama.com/install.sh | sh
|
entrypoint.sh
CHANGED
|
@@ -10,10 +10,9 @@ sleep 5
|
|
| 10 |
echo "Pulling nomic-embed-text..."
|
| 11 |
ollama pull nomic-embed-text
|
| 12 |
|
| 13 |
-
#
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
# ollama pull gpt-oss:20b-cloud
|
| 17 |
|
| 18 |
# Start the FastAPI server on port 7860 (default for HF Spaces)
|
| 19 |
echo "Starting Application..."
|
|
|
|
| 10 |
echo "Pulling nomic-embed-text..."
|
| 11 |
ollama pull nomic-embed-text
|
| 12 |
|
| 13 |
+
# Pull the small LLM for generating responses (llama3.2:1b)
|
| 14 |
+
echo "Pulling llama3.2:1b... (Lightweight model for HF Free Tier)"
|
| 15 |
+
ollama pull llama3.2:1b
|
|
|
|
| 16 |
|
| 17 |
# Start the FastAPI server on port 7860 (default for HF Spaces)
|
| 18 |
echo "Starting Application..."
|
src/page_rag/llm_engine.py
CHANGED
|
@@ -10,7 +10,7 @@ import ollama
|
|
| 10 |
from .retriever import RetrievedPage, build_context
|
| 11 |
|
| 12 |
# βββ Config ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 13 |
-
LLM_MODEL = "
|
| 14 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 15 |
|
| 16 |
SYSTEM_PROMPT = """You are a helpful document assistant.
|
|
|
|
| 10 |
from .retriever import RetrievedPage, build_context
|
| 11 |
|
| 12 |
# βββ Config ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 13 |
+
LLM_MODEL = "llama3.2:1b" # your local model name in Ollama
|
| 14 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 15 |
|
| 16 |
SYSTEM_PROMPT = """You are a helpful document assistant.
|
src/vector_rag/llm_engine.py
CHANGED
|
@@ -9,7 +9,7 @@ from typing import Generator
|
|
| 9 |
import ollama
|
| 10 |
from .retriever import RetrievedChunk, build_context
|
| 11 |
|
| 12 |
-
LLM_MODEL = "
|
| 13 |
|
| 14 |
SYSTEM_PROMPT = """You are a precise document assistant.
|
| 15 |
Answer the user's question using ONLY the provided context chunks.
|
|
|
|
| 9 |
import ollama
|
| 10 |
from .retriever import RetrievedChunk, build_context
|
| 11 |
|
| 12 |
+
LLM_MODEL = "llama3.2:1b"
|
| 13 |
|
| 14 |
SYSTEM_PROMPT = """You are a precise document assistant.
|
| 15 |
Answer the user's question using ONLY the provided context chunks.
|