Anurag Shirke commited on
Commit
43fe2fe
·
1 Parent(s): 56d1864

For enhancing the LLM memory limit which is hindering the performance

Browse files
Dockerfile CHANGED
@@ -11,6 +11,9 @@ WORKDIR /app
11
  # Copy the requirements file into the container
12
  COPY requirements.txt .
13
 
 
 
 
14
  # Install any needed packages specified in requirements.txt
15
  RUN pip install --no-cache-dir -r requirements.txt
16
 
 
11
  # Copy the requirements file into the container
12
  COPY requirements.txt .
13
 
14
+ # Set a higher timeout for pip installations
15
+ ENV PIP_DEFAULT_TIMEOUT=1000
16
+
17
  # Install any needed packages specified in requirements.txt
18
  RUN pip install --no-cache-dir -r requirements.txt
19
 
docker-compose.yml CHANGED
@@ -15,7 +15,7 @@ services:
15
  environment:
16
  - QDRANT_HOST=qdrant
17
  - OLLAMA_HOST=ollama
18
- entrypoint: ["/app/scripts/wait-for-qdrant.sh", "qdrant:6333", "--", "uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "8000"]
19
 
20
  qdrant:
21
  image: qdrant/qdrant:latest
@@ -33,6 +33,7 @@ services:
33
  volumes:
34
  - ./scripts:/app
35
  - ollama_data:/root/.ollama
 
36
 
37
  volumes:
38
  qdrant_data:
 
15
  environment:
16
  - QDRANT_HOST=qdrant
17
  - OLLAMA_HOST=ollama
18
+ entrypoint: ["/app/scripts/wait-for-qdrant.sh", "qdrant:6333", "uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "8000"]
19
 
20
  qdrant:
21
  image: qdrant/qdrant:latest
 
33
  volumes:
34
  - ./scripts:/app
35
  - ollama_data:/root/.ollama
36
+ mem_limit: 6.5g
37
 
38
  volumes:
39
  qdrant_data:
scripts/ollama_entrypoint.sh CHANGED
@@ -18,7 +18,7 @@ done
18
 
19
  # Pull the model
20
  echo "Ollama server started. Pulling llama3 model..."
21
- ollama pull llama3
22
 
23
  # Wait for the background process to exit
24
  wait $pid
 
18
 
19
  # Pull the model
20
  echo "Ollama server started. Pulling llama3 model..."
21
+ ollama pull phi3
22
 
23
  # Wait for the background process to exit
24
  wait $pid
scripts/wait-for-qdrant.sh CHANGED
@@ -14,4 +14,4 @@ until curl -s -f "$host/healthz" > /dev/null; do
14
  done
15
 
16
  >&2 echo "Qdrant is up - executing command"
17
- exec $cmd
 
14
  done
15
 
16
  >&2 echo "Qdrant is up - executing command"
17
+ exec "$@"
src/__pycache__/main.cpython-311.pyc CHANGED
Binary files a/src/__pycache__/main.cpython-311.pyc and b/src/__pycache__/main.cpython-311.pyc differ
 
src/core/__pycache__/llm.cpython-311.pyc CHANGED
Binary files a/src/core/__pycache__/llm.cpython-311.pyc and b/src/core/__pycache__/llm.cpython-311.pyc differ
 
src/core/__pycache__/vector_store.cpython-311.pyc CHANGED
Binary files a/src/core/__pycache__/vector_store.cpython-311.pyc and b/src/core/__pycache__/vector_store.cpython-311.pyc differ
 
src/core/llm.py CHANGED
@@ -12,7 +12,7 @@ def get_ollama_client():
12
 
13
  def format_prompt(query: str, context: list[dict]) -> str:
14
  """Formats the prompt for the LLM with the retrieved context."""
15
- context_str = "\n".join([item['payload']['text'] for item in context])
16
  prompt = f"""**Instruction**:
17
  Answer the user's query based *only* on the provided context.
18
  If the context does not contain the answer, state that you cannot answer the question with the given information.
 
12
 
13
  def format_prompt(query: str, context: list[dict]) -> str:
14
  """Formats the prompt for the LLM with the retrieved context."""
15
+ context_str = "\n".join([item.payload.get('text') for item in context])
16
  prompt = f"""**Instruction**:
17
  Answer the user's query based *only* on the provided context.
18
  If the context does not contain the answer, state that you cannot answer the question with the given information.
src/core/vector_store.py CHANGED
@@ -29,7 +29,7 @@ def upsert_vectors(client: QdrantClient, collection_name: str, vectors, payloads
29
  client.upsert(
30
  collection_name=collection_name,
31
  points=models.Batch(
32
- ids=None, # Let Qdrant assign IDs
33
  vectors=vectors,
34
  payloads=payloads
35
  ),
 
29
  client.upsert(
30
  collection_name=collection_name,
31
  points=models.Batch(
32
+ ids=list(range(len(vectors))), # Generate sequential integer IDs
33
  vectors=vectors,
34
  payloads=payloads
35
  ),
src/main.py CHANGED
@@ -11,7 +11,7 @@ app = FastAPI()
11
  # --- Constants ---
12
  UPLOADS_DIR = "uploads"
13
  QDRANT_COLLECTION_NAME = "knowledge_base"
14
- OLLAMA_MODEL = "llama3"
15
 
16
  # --- Application Startup ---
17
  # Create uploads directory if it doesn't exist
@@ -87,11 +87,17 @@ def query_knowledge_base(request: QueryRequest):
87
  # 4. Generate a response from the LLM
88
  answer = generate_response(ollama_client, OLLAMA_MODEL, prompt)
89
 
 
 
 
 
 
 
90
  # 5. Extract source documents for citation
91
  source_documents = [
92
  {
93
- "source": result.payload["source"],
94
- "text": result.payload["text"],
95
  "score": result.score
96
  }
97
  for result in search_results
 
11
  # --- Constants ---
12
  UPLOADS_DIR = "uploads"
13
  QDRANT_COLLECTION_NAME = "knowledge_base"
14
+ OLLAMA_MODEL = "tinyllama"
15
 
16
  # --- Application Startup ---
17
  # Create uploads directory if it doesn't exist
 
87
  # 4. Generate a response from the LLM
88
  answer = generate_response(ollama_client, OLLAMA_MODEL, prompt)
89
 
90
+ # Debugging: Print search_results structure
91
+ print(f"Type of search_results: {type(search_results)}")
92
+ if search_results:
93
+ print(f"Type of first element in search_results: {type(search_results[0])}")
94
+ print(f"Content of first element in search_results: {search_results[0]}")
95
+
96
  # 5. Extract source documents for citation
97
  source_documents = [
98
  {
99
+ "source": result.payload.get("source") if result.payload else "Unknown",
100
+ "text": result.payload.get("text") if result.payload else "N/A",
101
  "score": result.score
102
  }
103
  for result in search_results