Anurag Shirke commited on
Commit ·
43fe2fe
1
Parent(s): 56d1864
For enhancing the LLM memory limit which is hindering the performance
Browse files- Dockerfile +3 -0
- docker-compose.yml +2 -1
- scripts/ollama_entrypoint.sh +1 -1
- scripts/wait-for-qdrant.sh +1 -1
- src/__pycache__/main.cpython-311.pyc +0 -0
- src/core/__pycache__/llm.cpython-311.pyc +0 -0
- src/core/__pycache__/vector_store.cpython-311.pyc +0 -0
- src/core/llm.py +1 -1
- src/core/vector_store.py +1 -1
- src/main.py +9 -3
Dockerfile
CHANGED
|
@@ -11,6 +11,9 @@ WORKDIR /app
|
|
| 11 |
# Copy the requirements file into the container
|
| 12 |
COPY requirements.txt .
|
| 13 |
|
|
|
|
|
|
|
|
|
|
| 14 |
# Install any needed packages specified in requirements.txt
|
| 15 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 16 |
|
|
|
|
| 11 |
# Copy the requirements file into the container
|
| 12 |
COPY requirements.txt .
|
| 13 |
|
| 14 |
+
# Set a higher timeout for pip installations
|
| 15 |
+
ENV PIP_DEFAULT_TIMEOUT=1000
|
| 16 |
+
|
| 17 |
# Install any needed packages specified in requirements.txt
|
| 18 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 19 |
|
docker-compose.yml
CHANGED
|
@@ -15,7 +15,7 @@ services:
|
|
| 15 |
environment:
|
| 16 |
- QDRANT_HOST=qdrant
|
| 17 |
- OLLAMA_HOST=ollama
|
| 18 |
-
entrypoint: ["/app/scripts/wait-for-qdrant.sh", "qdrant:6333", "
|
| 19 |
|
| 20 |
qdrant:
|
| 21 |
image: qdrant/qdrant:latest
|
|
@@ -33,6 +33,7 @@ services:
|
|
| 33 |
volumes:
|
| 34 |
- ./scripts:/app
|
| 35 |
- ollama_data:/root/.ollama
|
|
|
|
| 36 |
|
| 37 |
volumes:
|
| 38 |
qdrant_data:
|
|
|
|
| 15 |
environment:
|
| 16 |
- QDRANT_HOST=qdrant
|
| 17 |
- OLLAMA_HOST=ollama
|
| 18 |
+
entrypoint: ["/app/scripts/wait-for-qdrant.sh", "qdrant:6333", "uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
| 19 |
|
| 20 |
qdrant:
|
| 21 |
image: qdrant/qdrant:latest
|
|
|
|
| 33 |
volumes:
|
| 34 |
- ./scripts:/app
|
| 35 |
- ollama_data:/root/.ollama
|
| 36 |
+
mem_limit: 6.5g
|
| 37 |
|
| 38 |
volumes:
|
| 39 |
qdrant_data:
|
scripts/ollama_entrypoint.sh
CHANGED
|
@@ -18,7 +18,7 @@ done
|
|
| 18 |
|
| 19 |
# Pull the model
|
| 20 |
echo "Ollama server started. Pulling llama3 model..."
|
| 21 |
-
ollama pull
|
| 22 |
|
| 23 |
# Wait for the background process to exit
|
| 24 |
wait $pid
|
|
|
|
| 18 |
|
| 19 |
# Pull the model
|
| 20 |
echo "Ollama server started. Pulling llama3 model..."
|
| 21 |
+
ollama pull phi3
|
| 22 |
|
| 23 |
# Wait for the background process to exit
|
| 24 |
wait $pid
|
scripts/wait-for-qdrant.sh
CHANGED
|
@@ -14,4 +14,4 @@ until curl -s -f "$host/healthz" > /dev/null; do
|
|
| 14 |
done
|
| 15 |
|
| 16 |
>&2 echo "Qdrant is up - executing command"
|
| 17 |
-
exec $
|
|
|
|
| 14 |
done
|
| 15 |
|
| 16 |
>&2 echo "Qdrant is up - executing command"
|
| 17 |
+
exec "$@"
|
src/__pycache__/main.cpython-311.pyc
CHANGED
|
Binary files a/src/__pycache__/main.cpython-311.pyc and b/src/__pycache__/main.cpython-311.pyc differ
|
|
|
src/core/__pycache__/llm.cpython-311.pyc
CHANGED
|
Binary files a/src/core/__pycache__/llm.cpython-311.pyc and b/src/core/__pycache__/llm.cpython-311.pyc differ
|
|
|
src/core/__pycache__/vector_store.cpython-311.pyc
CHANGED
|
Binary files a/src/core/__pycache__/vector_store.cpython-311.pyc and b/src/core/__pycache__/vector_store.cpython-311.pyc differ
|
|
|
src/core/llm.py
CHANGED
|
@@ -12,7 +12,7 @@ def get_ollama_client():
|
|
| 12 |
|
| 13 |
def format_prompt(query: str, context: list[dict]) -> str:
|
| 14 |
"""Formats the prompt for the LLM with the retrieved context."""
|
| 15 |
-
context_str = "\n".join([item
|
| 16 |
prompt = f"""**Instruction**:
|
| 17 |
Answer the user's query based *only* on the provided context.
|
| 18 |
If the context does not contain the answer, state that you cannot answer the question with the given information.
|
|
|
|
| 12 |
|
| 13 |
def format_prompt(query: str, context: list[dict]) -> str:
|
| 14 |
"""Formats the prompt for the LLM with the retrieved context."""
|
| 15 |
+
context_str = "\n".join([item.payload.get('text') for item in context])
|
| 16 |
prompt = f"""**Instruction**:
|
| 17 |
Answer the user's query based *only* on the provided context.
|
| 18 |
If the context does not contain the answer, state that you cannot answer the question with the given information.
|
src/core/vector_store.py
CHANGED
|
@@ -29,7 +29,7 @@ def upsert_vectors(client: QdrantClient, collection_name: str, vectors, payloads
|
|
| 29 |
client.upsert(
|
| 30 |
collection_name=collection_name,
|
| 31 |
points=models.Batch(
|
| 32 |
-
ids=
|
| 33 |
vectors=vectors,
|
| 34 |
payloads=payloads
|
| 35 |
),
|
|
|
|
| 29 |
client.upsert(
|
| 30 |
collection_name=collection_name,
|
| 31 |
points=models.Batch(
|
| 32 |
+
ids=list(range(len(vectors))), # Generate sequential integer IDs
|
| 33 |
vectors=vectors,
|
| 34 |
payloads=payloads
|
| 35 |
),
|
src/main.py
CHANGED
|
@@ -11,7 +11,7 @@ app = FastAPI()
|
|
| 11 |
# --- Constants ---
|
| 12 |
UPLOADS_DIR = "uploads"
|
| 13 |
QDRANT_COLLECTION_NAME = "knowledge_base"
|
| 14 |
-
OLLAMA_MODEL = "
|
| 15 |
|
| 16 |
# --- Application Startup ---
|
| 17 |
# Create uploads directory if it doesn't exist
|
|
@@ -87,11 +87,17 @@ def query_knowledge_base(request: QueryRequest):
|
|
| 87 |
# 4. Generate a response from the LLM
|
| 88 |
answer = generate_response(ollama_client, OLLAMA_MODEL, prompt)
|
| 89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
# 5. Extract source documents for citation
|
| 91 |
source_documents = [
|
| 92 |
{
|
| 93 |
-
"source": result.payload
|
| 94 |
-
"text": result.payload
|
| 95 |
"score": result.score
|
| 96 |
}
|
| 97 |
for result in search_results
|
|
|
|
| 11 |
# --- Constants ---
|
| 12 |
UPLOADS_DIR = "uploads"
|
| 13 |
QDRANT_COLLECTION_NAME = "knowledge_base"
|
| 14 |
+
OLLAMA_MODEL = "tinyllama"
|
| 15 |
|
| 16 |
# --- Application Startup ---
|
| 17 |
# Create uploads directory if it doesn't exist
|
|
|
|
| 87 |
# 4. Generate a response from the LLM
|
| 88 |
answer = generate_response(ollama_client, OLLAMA_MODEL, prompt)
|
| 89 |
|
| 90 |
+
# Debugging: Print search_results structure
|
| 91 |
+
print(f"Type of search_results: {type(search_results)}")
|
| 92 |
+
if search_results:
|
| 93 |
+
print(f"Type of first element in search_results: {type(search_results[0])}")
|
| 94 |
+
print(f"Content of first element in search_results: {search_results[0]}")
|
| 95 |
+
|
| 96 |
# 5. Extract source documents for citation
|
| 97 |
source_documents = [
|
| 98 |
{
|
| 99 |
+
"source": result.payload.get("source") if result.payload else "Unknown",
|
| 100 |
+
"text": result.payload.get("text") if result.payload else "N/A",
|
| 101 |
"score": result.score
|
| 102 |
}
|
| 103 |
for result in search_results
|