Spaces:
Configuration error
Configuration error
Upload 11 files
Browse files- .env +3 -0
- .gitignore +13 -0
- .python-version +1 -0
- Dockerfile +23 -0
- README.md +0 -11
- experiments.ipynb +1412 -0
- main.py +88 -0
- pyproject.toml +179 -0
- requirements.txt +11 -0
- test_query.py +27 -0
- uv.lock +0 -0
.env
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
GEMINI_API_KEY = "AIzaSyCHT8SxlgvzI0yIWJbqSaVris3l-m_q5Gc"
|
| 2 |
+
PINECONE_API_KEY = "pcsk_45m9eT_EDWhf389XUoBCuyRkUBHQBeAur6PiiNanVKugq9rzakXATTzcj9iwjnYKrA1MfR"
|
| 3 |
+
OPENAI_API_KEY = "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"
|
.gitignore
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python-generated files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[oc]
|
| 4 |
+
build/
|
| 5 |
+
dist/
|
| 6 |
+
wheels/
|
| 7 |
+
*.egg-info
|
| 8 |
+
|
| 9 |
+
# Virtual environments
|
| 10 |
+
.venv
|
| 11 |
+
.env
|
| 12 |
+
env
|
| 13 |
+
.env/
|
.python-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
3.12
|
Dockerfile
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use official Python image as base
|
| 2 |
+
FROM python:3.12-slim
|
| 3 |
+
|
| 4 |
+
# Set working directory
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Copy project files
|
| 8 |
+
COPY . /app
|
| 9 |
+
|
| 10 |
+
# Install system dependencies (if needed)
|
| 11 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 12 |
+
build-essential \
|
| 13 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 14 |
+
|
| 15 |
+
# Install Python dependencies
|
| 16 |
+
RUN pip install --upgrade pip \
|
| 17 |
+
&& pip install -r requirements.txt
|
| 18 |
+
|
| 19 |
+
# Expose port (if using uvicorn or similar)
|
| 20 |
+
EXPOSE 8000
|
| 21 |
+
|
| 22 |
+
# Default command (update if your entrypoint is different)
|
| 23 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
README.md
CHANGED
|
@@ -1,11 +0,0 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: Rag Application
|
| 3 |
-
emoji: 💻
|
| 4 |
-
colorFrom: purple
|
| 5 |
-
colorTo: pink
|
| 6 |
-
sdk: docker
|
| 7 |
-
pinned: false
|
| 8 |
-
short_description: HackrX hackathon project
|
| 9 |
-
---
|
| 10 |
-
|
| 11 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
experiments.ipynb
ADDED
|
@@ -0,0 +1,1412 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"id": "7896ff7a",
|
| 7 |
+
"metadata": {},
|
| 8 |
+
"outputs": [
|
| 9 |
+
{
|
| 10 |
+
"data": {
|
| 11 |
+
"text/plain": [
|
| 12 |
+
"'c:\\\\code\\\\Bajaj HackRx\\\\Rag_app'"
|
| 13 |
+
]
|
| 14 |
+
},
|
| 15 |
+
"execution_count": 1,
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"output_type": "execute_result"
|
| 18 |
+
}
|
| 19 |
+
],
|
| 20 |
+
"source": [
|
| 21 |
+
"%pwd"
|
| 22 |
+
]
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
"cell_type": "code",
|
| 26 |
+
"execution_count": 5,
|
| 27 |
+
"id": "8638c1e6",
|
| 28 |
+
"metadata": {},
|
| 29 |
+
"outputs": [
|
| 30 |
+
{
|
| 31 |
+
"name": "stdout",
|
| 32 |
+
"output_type": "stream",
|
| 33 |
+
"text": [
|
| 34 |
+
"Created: c:\\code\\Bajaj HackRx\\Rag_app\\app\\__init__.py\n",
|
| 35 |
+
"Created: c:\\code\\Bajaj HackRx\\Rag_app\\app\\config\\__init__.py\n",
|
| 36 |
+
"Created: c:\\code\\Bajaj HackRx\\Rag_app\\app\\embedding\\__init__.py\n",
|
| 37 |
+
"Created: c:\\code\\Bajaj HackRx\\Rag_app\\app\\ingestion\\__init__.py\n",
|
| 38 |
+
"Created: c:\\code\\Bajaj HackRx\\Rag_app\\app\\reseasoning\\__init__.py\n",
|
| 39 |
+
"Created: c:\\code\\Bajaj HackRx\\Rag_app\\app\\retrieval\\__init__.py\n",
|
| 40 |
+
"Created: c:\\code\\Bajaj HackRx\\Rag_app\\app\\schemas\\__init__.py\n",
|
| 41 |
+
"Created: c:\\code\\Bajaj HackRx\\Rag_app\\app\\utils\\__init__.py\n"
|
| 42 |
+
]
|
| 43 |
+
}
|
| 44 |
+
],
|
| 45 |
+
"source": [
|
| 46 |
+
"import os \n",
|
| 47 |
+
"\n",
|
| 48 |
+
"for directories in os.walk(\"c:\\\\code\\\\Bajaj HackRx\\\\Rag_app\\\\app\"):\n",
|
| 49 |
+
" init_path = os.path.join(directories[0], '__init__.py')\n",
|
| 50 |
+
" if not os.path.exists(init_path):\n",
|
| 51 |
+
" with open(init_path, 'w') as init_file:\n",
|
| 52 |
+
" init_file.write(\"init file\")\n",
|
| 53 |
+
" print(f\"Created: {init_path}\")"
|
| 54 |
+
]
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"cell_type": "markdown",
|
| 58 |
+
"id": "4fedace2",
|
| 59 |
+
"metadata": {},
|
| 60 |
+
"source": [
|
| 61 |
+
"## 1. Input document\n",
|
| 62 |
+
"### Input Requirements:\n",
|
| 63 |
+
"\n",
|
| 64 |
+
"- Process PDFs, DOCX, and email documents\n",
|
| 65 |
+
"- Handle policy/contract data efficiently\n",
|
| 66 |
+
"- Parse natural language queries"
|
| 67 |
+
]
|
| 68 |
+
},
|
| 69 |
+
{
|
| 70 |
+
"cell_type": "code",
|
| 71 |
+
"execution_count": 7,
|
| 72 |
+
"id": "d47f278d",
|
| 73 |
+
"metadata": {},
|
| 74 |
+
"outputs": [],
|
| 75 |
+
"source": [
|
| 76 |
+
"import fitz\n",
|
| 77 |
+
"from langchain_core.documents import Document\n",
|
| 78 |
+
"from langchain_groq import ChatGroq\n",
|
| 79 |
+
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
| 80 |
+
"from langchain.schema.messages import HumanMessage\n",
|
| 81 |
+
"from langchain_community.vectorstores import FAISS\n",
|
| 82 |
+
"import os \n",
|
| 83 |
+
"from langchain.prompts import PromptTemplate\n",
|
| 84 |
+
"from sklearn.metrics.pairwise import cosine_similarity\n",
|
| 85 |
+
"from dotenv import load_dotenv\n",
|
| 86 |
+
"load_dotenv()\n",
|
| 87 |
+
"import pymupdf"
|
| 88 |
+
]
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"cell_type": "code",
|
| 92 |
+
"execution_count": 2,
|
| 93 |
+
"id": "b7a58fc9",
|
| 94 |
+
"metadata": {},
|
| 95 |
+
"outputs": [],
|
| 96 |
+
"source": [
|
| 97 |
+
"api_key= os.getenv(\"GEMINI_API_KEY\")"
|
| 98 |
+
]
|
| 99 |
+
},
|
| 100 |
+
{
|
| 101 |
+
"cell_type": "code",
|
| 102 |
+
"execution_count": 3,
|
| 103 |
+
"id": "6d065c7c",
|
| 104 |
+
"metadata": {},
|
| 105 |
+
"outputs": [],
|
| 106 |
+
"source": [
|
| 107 |
+
"from langchain_google_genai import GoogleGenerativeAIEmbeddings\n",
|
| 108 |
+
"\n",
|
| 109 |
+
"embeddings = GoogleGenerativeAIEmbeddings(model = \"models/gemini-embedding-001\",google_api_key = api_key)\n",
|
| 110 |
+
"vector = embeddings.embed_query(\"hello, world\")"
|
| 111 |
+
]
|
| 112 |
+
},
|
| 113 |
+
{
|
| 114 |
+
"cell_type": "code",
|
| 115 |
+
"execution_count": 9,
|
| 116 |
+
"id": "d0706163",
|
| 117 |
+
"metadata": {},
|
| 118 |
+
"outputs": [
|
| 119 |
+
{
|
| 120 |
+
"data": {
|
| 121 |
+
"text/plain": [
|
| 122 |
+
"[-0.02842607907950878,\n",
|
| 123 |
+
" 0.004132709465920925,\n",
|
| 124 |
+
" 0.010386144742369652,\n",
|
| 125 |
+
" -0.09004563093185425,\n",
|
| 126 |
+
" -0.0044305226765573025]"
|
| 127 |
+
]
|
| 128 |
+
},
|
| 129 |
+
"execution_count": 9,
|
| 130 |
+
"metadata": {},
|
| 131 |
+
"output_type": "execute_result"
|
| 132 |
+
}
|
| 133 |
+
],
|
| 134 |
+
"source": [
|
| 135 |
+
"vector[:5]"
|
| 136 |
+
]
|
| 137 |
+
},
|
| 138 |
+
{
|
| 139 |
+
"cell_type": "code",
|
| 140 |
+
"execution_count": 5,
|
| 141 |
+
"id": "01d64928",
|
| 142 |
+
"metadata": {},
|
| 143 |
+
"outputs": [],
|
| 144 |
+
"source": [
|
| 145 |
+
"import requests\n",
|
| 146 |
+
"url = \"https://hackrx.blob.core.windows.net/assets/policy.pdf?sv=2023-01-03&st=2025-07-04T09%3A11%3A24Z&se=2027-07-05T09%3A11%3A00Z&sr=b&sp=r&sig=N4a9OU0w0QXO6AOIBiu4bpl7AXvEZogeT%2FjUHNO7HzQ%3D\"\n",
|
| 147 |
+
"response = requests.get(url)"
|
| 148 |
+
]
|
| 149 |
+
},
|
| 150 |
+
{
|
| 151 |
+
"cell_type": "code",
|
| 152 |
+
"execution_count": 94,
|
| 153 |
+
"id": "80cf7260",
|
| 154 |
+
"metadata": {},
|
| 155 |
+
"outputs": [],
|
| 156 |
+
"source": [
|
| 157 |
+
"import requests\n",
|
| 158 |
+
"url = \"https://docs.google.com/document/d/13pujQKEZS37mEHEfWDnaqb2FlvDnDwzkuJX88Y9w9EA/edit?usp=sharing\"\n",
|
| 159 |
+
"response = requests.get(url)"
|
| 160 |
+
]
|
| 161 |
+
},
|
| 162 |
+
{
|
| 163 |
+
"cell_type": "code",
|
| 164 |
+
"execution_count": 95,
|
| 165 |
+
"id": "56afd5c0",
|
| 166 |
+
"metadata": {},
|
| 167 |
+
"outputs": [
|
| 168 |
+
{
|
| 169 |
+
"data": {
|
| 170 |
+
"text/plain": [
|
| 171 |
+
"'text/html; charset=utf-8'"
|
| 172 |
+
]
|
| 173 |
+
},
|
| 174 |
+
"execution_count": 95,
|
| 175 |
+
"metadata": {},
|
| 176 |
+
"output_type": "execute_result"
|
| 177 |
+
}
|
| 178 |
+
],
|
| 179 |
+
"source": [
|
| 180 |
+
"response.headers['Content-Type']"
|
| 181 |
+
]
|
| 182 |
+
},
|
| 183 |
+
{
|
| 184 |
+
"cell_type": "code",
|
| 185 |
+
"execution_count": 8,
|
| 186 |
+
"id": "4d3fe1fb",
|
| 187 |
+
"metadata": {},
|
| 188 |
+
"outputs": [
|
| 189 |
+
{
|
| 190 |
+
"data": {
|
| 191 |
+
"text/plain": [
|
| 192 |
+
"157"
|
| 193 |
+
]
|
| 194 |
+
},
|
| 195 |
+
"execution_count": 8,
|
| 196 |
+
"metadata": {},
|
| 197 |
+
"output_type": "execute_result"
|
| 198 |
+
}
|
| 199 |
+
],
|
| 200 |
+
"source": [
|
| 201 |
+
"\n",
|
| 202 |
+
"response.raise_for_status()\n",
|
| 203 |
+
"pdf_bytes = response.content\n",
|
| 204 |
+
"doc = pymupdf.open(stream=pdf_bytes, filetype=\"pdf\")\n",
|
| 205 |
+
"text = \"\"\n",
|
| 206 |
+
"splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)\n",
|
| 207 |
+
"pages = 0\n",
|
| 208 |
+
"from uuid import uuid4\n",
|
| 209 |
+
"uuid = str(uuid4())\n",
|
| 210 |
+
"for i,page in enumerate(doc): \n",
|
| 211 |
+
" text += page.get_text()\n",
|
| 212 |
+
" uuid = str(uuid4())\n",
|
| 213 |
+
" if text.strip():\n",
|
| 214 |
+
" temp_doc = Document(page_content = text, metadata={\n",
|
| 215 |
+
" \"doc_id\": uuid,\n",
|
| 216 |
+
" \"page\":i,\n",
|
| 217 |
+
" \"chunk_id\": f\"{uuid}_p{i}\",\n",
|
| 218 |
+
" \"type\":\"text\"\n",
|
| 219 |
+
" })\n",
|
| 220 |
+
" text_chunks = splitter.split_documents([temp_doc])\n",
|
| 221 |
+
"\n",
|
| 222 |
+
"len(text_chunks)"
|
| 223 |
+
]
|
| 224 |
+
},
|
| 225 |
+
{
|
| 226 |
+
"cell_type": "code",
|
| 227 |
+
"execution_count": 9,
|
| 228 |
+
"id": "08cfbca7",
|
| 229 |
+
"metadata": {},
|
| 230 |
+
"outputs": [
|
| 231 |
+
{
|
| 232 |
+
"data": {
|
| 233 |
+
"text/plain": [
|
| 234 |
+
"list"
|
| 235 |
+
]
|
| 236 |
+
},
|
| 237 |
+
"execution_count": 9,
|
| 238 |
+
"metadata": {},
|
| 239 |
+
"output_type": "execute_result"
|
| 240 |
+
}
|
| 241 |
+
],
|
| 242 |
+
"source": [
|
| 243 |
+
"type(text_chunks)"
|
| 244 |
+
]
|
| 245 |
+
},
|
| 246 |
+
{
|
| 247 |
+
"cell_type": "code",
|
| 248 |
+
"execution_count": null,
|
| 249 |
+
"id": "c8f47031",
|
| 250 |
+
"metadata": {},
|
| 251 |
+
"outputs": [],
|
| 252 |
+
"source": [
|
| 253 |
+
"splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)\n",
|
| 254 |
+
"pages = 0\n",
|
| 255 |
+
"from uuid import uuid4\n",
|
| 256 |
+
"uuid = str(uuid4())\n",
|
| 257 |
+
"for i,page in enumerate(doc): \n",
|
| 258 |
+
" text += page.get_text()\n",
|
| 259 |
+
" uuid = str(uuid4())\n",
|
| 260 |
+
" if text.strip():\n",
|
| 261 |
+
" temp_doc = Document(page_content = text, metadata={\n",
|
| 262 |
+
" \"doc_id\": uuid,\n",
|
| 263 |
+
" \"page\":i,\n",
|
| 264 |
+
" \"chunk_id\": f\"{uuid}_p{i}\",\n",
|
| 265 |
+
" \"type\":\"text\"\n",
|
| 266 |
+
" })\n",
|
| 267 |
+
" text_chunks = splitter.split_documents([temp_doc])\n",
|
| 268 |
+
"\n",
|
| 269 |
+
"len(text_chunks)"
|
| 270 |
+
]
|
| 271 |
+
},
|
| 272 |
+
{
|
| 273 |
+
"cell_type": "code",
|
| 274 |
+
"execution_count": 5,
|
| 275 |
+
"id": "6fe8d9ab",
|
| 276 |
+
"metadata": {},
|
| 277 |
+
"outputs": [
|
| 278 |
+
{
|
| 279 |
+
"data": {
|
| 280 |
+
"text/plain": [
|
| 281 |
+
"157"
|
| 282 |
+
]
|
| 283 |
+
},
|
| 284 |
+
"execution_count": 5,
|
| 285 |
+
"metadata": {},
|
| 286 |
+
"output_type": "execute_result"
|
| 287 |
+
}
|
| 288 |
+
],
|
| 289 |
+
"source": [
|
| 290 |
+
"pages = 0\n",
|
| 291 |
+
"from uuid import uuid4\n",
|
| 292 |
+
"uuid = str(uuid4())\n",
|
| 293 |
+
"for i,page in enumerate(doc): \n",
|
| 294 |
+
" text += page.get_text()\n",
|
| 295 |
+
" uuid = str(uuid4())\n",
|
| 296 |
+
" if text.strip():\n",
|
| 297 |
+
" temp_doc = Document(page_content = text, metadata={\n",
|
| 298 |
+
" \"doc_id\": uuid,\n",
|
| 299 |
+
" \"page\":i,\n",
|
| 300 |
+
" \"chunk_id\": f\"{uuid}_p{i}\",\n",
|
| 301 |
+
" \"type\":\"text\"\n",
|
| 302 |
+
" })\n",
|
| 303 |
+
" text_chunks = splitter.split_documents([temp_doc])\n",
|
| 304 |
+
"\n",
|
| 305 |
+
"len(text_chunks)"
|
| 306 |
+
]
|
| 307 |
+
},
|
| 308 |
+
{
|
| 309 |
+
"cell_type": "code",
|
| 310 |
+
"execution_count": 124,
|
| 311 |
+
"id": "b7456368",
|
| 312 |
+
"metadata": {},
|
| 313 |
+
"outputs": [
|
| 314 |
+
{
|
| 315 |
+
"name": "stdout",
|
| 316 |
+
"output_type": "stream",
|
| 317 |
+
"text": [
|
| 318 |
+
"National Insurance Co. Ltd. \n",
|
| 319 |
+
"Premises No. 18-0374, Plot no. CBD-81, \n",
|
| 320 |
+
"New Town, Kolkata - 700156 \n",
|
| 321 |
+
"Page 1 of 25 \n",
|
| 322 |
+
"National Parivar Mediclaim Plus Policy \n",
|
| 323 |
+
"UIN: NICHLIP25039V032425 \n",
|
| 324 |
+
" \n",
|
| 325 |
+
"National Insurance Company Limited \n",
|
| 326 |
+
" \n",
|
| 327 |
+
" \n",
|
| 328 |
+
" \n",
|
| 329 |
+
" \n",
|
| 330 |
+
" \n",
|
| 331 |
+
"CIN - U10200WB1906GOI001713 \n",
|
| 332 |
+
"IRDAI Regn. No. – 58 \n",
|
| 333 |
+
" \n",
|
| 334 |
+
" Issuing Office \n",
|
| 335 |
+
"National Parivar Mediclaim Plus Policy \n",
|
| 336 |
+
" \n",
|
| 337 |
+
"Whereas the Proposer designated in the schedule hereto has by a Proposal together with Declaration, which shall be the basis of \n",
|
| 338 |
+
"this contract and is deemed to be incorporated herein, has applied to National Insurance Company Ltd. (hereinafter called the \n",
|
| 339 |
+
"Company), for the insurance hereinafter set forth, in respect of person(s)/ family members named in the schedule hereto\n"
|
| 340 |
+
]
|
| 341 |
+
}
|
| 342 |
+
],
|
| 343 |
+
"source": [
|
| 344 |
+
"print(text_chunks[0].page_content)"
|
| 345 |
+
]
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"cell_type": "code",
|
| 349 |
+
"execution_count": 125,
|
| 350 |
+
"id": "84e3b7e6",
|
| 351 |
+
"metadata": {},
|
| 352 |
+
"outputs": [],
|
| 353 |
+
"source": [
|
| 354 |
+
"from uuid import uuid4\n",
|
| 355 |
+
"uuids = [str(uuid4()) for _ in range(len(text_chunks)) ]"
|
| 356 |
+
]
|
| 357 |
+
},
|
| 358 |
+
{
|
| 359 |
+
"cell_type": "markdown",
|
| 360 |
+
"id": "a7b3a0a7",
|
| 361 |
+
"metadata": {},
|
| 362 |
+
"source": [
|
| 363 |
+
"### Setting up Pinecone Vectore Store"
|
| 364 |
+
]
|
| 365 |
+
},
|
| 366 |
+
{
|
| 367 |
+
"cell_type": "code",
|
| 368 |
+
"execution_count": 1,
|
| 369 |
+
"id": "6a98c3e3",
|
| 370 |
+
"metadata": {},
|
| 371 |
+
"outputs": [],
|
| 372 |
+
"source": [
|
| 373 |
+
"import os\n",
|
| 374 |
+
"from dotenv import load_dotenv\n",
|
| 375 |
+
"load_dotenv()\n",
|
| 376 |
+
"pinecone_key = os.getenv(\"PINECONE_API_KEY\")\n"
|
| 377 |
+
]
|
| 378 |
+
},
|
| 379 |
+
{
|
| 380 |
+
"cell_type": "code",
|
| 381 |
+
"execution_count": 2,
|
| 382 |
+
"id": "e9bd7561",
|
| 383 |
+
"metadata": {},
|
| 384 |
+
"outputs": [
|
| 385 |
+
{
|
| 386 |
+
"data": {
|
| 387 |
+
"text/plain": [
|
| 388 |
+
"<pinecone.pinecone.Pinecone at 0x1e0239e9e80>"
|
| 389 |
+
]
|
| 390 |
+
},
|
| 391 |
+
"execution_count": 2,
|
| 392 |
+
"metadata": {},
|
| 393 |
+
"output_type": "execute_result"
|
| 394 |
+
}
|
| 395 |
+
],
|
| 396 |
+
"source": [
|
| 397 |
+
"from pinecone import Pinecone\n",
|
| 398 |
+
"pc = Pinecone(api_key=pinecone_key)\n",
|
| 399 |
+
"pc\n"
|
| 400 |
+
]
|
| 401 |
+
},
|
| 402 |
+
{
|
| 403 |
+
"cell_type": "code",
|
| 404 |
+
"execution_count": 105,
|
| 405 |
+
"id": "07746b7f",
|
| 406 |
+
"metadata": {},
|
| 407 |
+
"outputs": [
|
| 408 |
+
{
|
| 409 |
+
"name": "stdout",
|
| 410 |
+
"output_type": "stream",
|
| 411 |
+
"text": [
|
| 412 |
+
"2025-08-13-16-36\n"
|
| 413 |
+
]
|
| 414 |
+
}
|
| 415 |
+
],
|
| 416 |
+
"source": [
|
| 417 |
+
"from pinecone import ServerlessSpec\n",
|
| 418 |
+
"from datetime import datetime\n",
|
| 419 |
+
"current_time = datetime.now()\n",
|
| 420 |
+
"time_string = current_time.strftime(\"%Y-%m-%d-%H-%M\")\n",
|
| 421 |
+
"print(time_string)\n",
|
| 422 |
+
"index_name = f\"hackrx-index{time_string}\"\n",
|
| 423 |
+
"# index_name = \"hackrx-index\"\n",
|
| 424 |
+
"if not pc.has_index(index_name):\n",
|
| 425 |
+
" pc.create_index(\n",
|
| 426 |
+
" name = index_name,\n",
|
| 427 |
+
" dimension=1536,\n",
|
| 428 |
+
" metric=\"cosine\",\n",
|
| 429 |
+
" spec = ServerlessSpec(cloud=\"aws\", region=\"us-east-1\")\n",
|
| 430 |
+
" )\n",
|
| 431 |
+
"\n",
|
| 432 |
+
"index = pc.Index(index_name)"
|
| 433 |
+
]
|
| 434 |
+
},
|
| 435 |
+
{
|
| 436 |
+
"cell_type": "code",
|
| 437 |
+
"execution_count": 4,
|
| 438 |
+
"id": "e6af117d",
|
| 439 |
+
"metadata": {},
|
| 440 |
+
"outputs": [
|
| 441 |
+
{
|
| 442 |
+
"data": {
|
| 443 |
+
"text/plain": [
|
| 444 |
+
"<pinecone.db_data.index.Index at 0x1e023e2c6e0>"
|
| 445 |
+
]
|
| 446 |
+
},
|
| 447 |
+
"execution_count": 4,
|
| 448 |
+
"metadata": {},
|
| 449 |
+
"output_type": "execute_result"
|
| 450 |
+
}
|
| 451 |
+
],
|
| 452 |
+
"source": [
|
| 453 |
+
"index"
|
| 454 |
+
]
|
| 455 |
+
},
|
| 456 |
+
{
|
| 457 |
+
"cell_type": "code",
|
| 458 |
+
"execution_count": 109,
|
| 459 |
+
"id": "7ee7c02b",
|
| 460 |
+
"metadata": {},
|
| 461 |
+
"outputs": [
|
| 462 |
+
{
|
| 463 |
+
"name": "stderr",
|
| 464 |
+
"output_type": "stream",
|
| 465 |
+
"text": [
|
| 466 |
+
"C:\\Users\\hp\\AppData\\Local\\Temp\\ipykernel_9600\\2571001968.py:7: LangChainDeprecationWarning: The class `OpenAIEmbeddings` was deprecated in LangChain 0.0.9 and will be removed in 1.0. An updated version of the class exists in the :class:`~langchain-openai package and should be used instead. To use it run `pip install -U :class:`~langchain-openai` and import as `from :class:`~langchain_openai import OpenAIEmbeddings``.\n",
|
| 467 |
+
" embeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")\n"
|
| 468 |
+
]
|
| 469 |
+
}
|
| 470 |
+
],
|
| 471 |
+
"source": [
|
| 472 |
+
"# from langchain_openai import \n",
|
| 473 |
+
"from langchain.embeddings import OpenAIEmbeddings\n",
|
| 474 |
+
"\n",
|
| 475 |
+
"from dotenv import load_dotenv\n",
|
| 476 |
+
"load_dotenv()\n",
|
| 477 |
+
"os.environ[\"OPENAI_API_KEY\"] = os.getenv(\"OPENAI_API_KEY\")\n",
|
| 478 |
+
"embeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")"
|
| 479 |
+
]
|
| 480 |
+
},
|
| 481 |
+
{
|
| 482 |
+
"cell_type": "code",
|
| 483 |
+
"execution_count": 6,
|
| 484 |
+
"id": "43151b5f",
|
| 485 |
+
"metadata": {},
|
| 486 |
+
"outputs": [],
|
| 487 |
+
"source": [
|
| 488 |
+
"from langchain_pinecone import PineconeVectorStore\n",
|
| 489 |
+
"vector_store = PineconeVectorStore(index = index, embedding=embeddings)"
|
| 490 |
+
]
|
| 491 |
+
},
|
| 492 |
+
{
|
| 493 |
+
"cell_type": "code",
|
| 494 |
+
"execution_count": 133,
|
| 495 |
+
"id": "03fb29a9",
|
| 496 |
+
"metadata": {},
|
| 497 |
+
"outputs": [
|
| 498 |
+
{
|
| 499 |
+
"data": {
|
| 500 |
+
"text/plain": [
|
| 501 |
+
"['174168c5-ee26-4f4a-9aac-82c890cae977',\n",
|
| 502 |
+
" 'cd33ff12-209e-4d3b-9ac9-cfffbb2f160f',\n",
|
| 503 |
+
" '8f7cf61f-8b6a-4412-a45e-90614b04e4bf',\n",
|
| 504 |
+
" '8696beaa-fa55-4ac6-9000-0f2be8f1a39b',\n",
|
| 505 |
+
" 'f89f37a0-5382-4202-a320-c3374ca97470',\n",
|
| 506 |
+
" '38e86c9f-a9a1-49a1-b1b6-0e0fd9242012',\n",
|
| 507 |
+
" '9239c5e9-493e-45cc-b621-962c7cdd18ff',\n",
|
| 508 |
+
" '1e3c9771-f5f2-45fd-b979-a0851d6285ff',\n",
|
| 509 |
+
" '54a9e37f-84d9-4ea5-8a47-be7a500d94b6',\n",
|
| 510 |
+
" '2ed09d29-6900-44d8-83e4-d503a5dcde15',\n",
|
| 511 |
+
" '8103e53c-1a21-47aa-9e23-0a817ddbdca5',\n",
|
| 512 |
+
" '0e879567-5c16-42b1-bd9c-bf919ef5b394',\n",
|
| 513 |
+
" 'fe2003c6-8409-4045-acda-1f1c6c6a6699',\n",
|
| 514 |
+
" '516acaa0-5a4d-4b0e-ac3c-5f03db8084b7',\n",
|
| 515 |
+
" '4bca0e17-ca5a-43bf-b0c9-6f2572bd2f5f',\n",
|
| 516 |
+
" '09a0a182-ee47-4569-a77e-b4a8adb82682',\n",
|
| 517 |
+
" '3239e274-c789-410f-845e-791c0c4b6b03',\n",
|
| 518 |
+
" 'fe825c23-84d9-4ad2-a853-74f8a1d02928',\n",
|
| 519 |
+
" 'e5404438-f6ec-4af5-8a5c-4fa884e7022f',\n",
|
| 520 |
+
" 'c9aeb34b-b556-46cf-b762-56ea338653e8',\n",
|
| 521 |
+
" '9b6d478d-32fa-4ffd-96ca-95822c255cf1',\n",
|
| 522 |
+
" '6652b6b8-1da2-4b15-9aea-26f79bc0a6d5',\n",
|
| 523 |
+
" 'e8d57256-a599-4f9e-93e8-392334098dcb',\n",
|
| 524 |
+
" '5491919b-0aa5-4080-9ee5-c319bc53749b',\n",
|
| 525 |
+
" 'a00fec0d-79bc-47be-97ea-dbdcfefb828f',\n",
|
| 526 |
+
" '71645e15-9440-40f2-b982-0376da83d0e3',\n",
|
| 527 |
+
" '6df619c8-ef76-44de-b74a-2116148d89aa',\n",
|
| 528 |
+
" 'f8151f79-56be-4fed-b364-b22ea4f2a2d0',\n",
|
| 529 |
+
" '739c7bc6-0f9d-45ce-9bf0-db7c7dbc939b',\n",
|
| 530 |
+
" '791401f5-46fe-4a9c-992b-da0135d72ae8',\n",
|
| 531 |
+
" '1e67b489-9e3f-4e48-8807-d694963ede5b',\n",
|
| 532 |
+
" '8e3892a6-7e26-4ecb-b8d0-c6a48126dbca',\n",
|
| 533 |
+
" 'ffbcd2e2-8eb3-4e1b-997d-f0b8f004b95f',\n",
|
| 534 |
+
" 'ac57e24d-8641-46d4-93aa-596664ed5a63',\n",
|
| 535 |
+
" 'fc31c598-4716-4312-9091-75d9c879cac5',\n",
|
| 536 |
+
" 'b08141e7-0519-4e92-bf5c-56849bc45c7f',\n",
|
| 537 |
+
" '2e7a4727-f118-461d-a4aa-7f223ffeffcb',\n",
|
| 538 |
+
" '1dcdf7e5-9d2f-4360-b9a9-ea00b9398fb4',\n",
|
| 539 |
+
" '716f55ba-b624-43ae-98ce-0f77b063f17f',\n",
|
| 540 |
+
" 'a42dbb78-85dc-4ee6-9b9c-c7a77093bae8',\n",
|
| 541 |
+
" '57d5d937-2fef-4aae-86bb-6002523fa30f',\n",
|
| 542 |
+
" '3400762f-1110-4e5f-a726-4f3edea01f02',\n",
|
| 543 |
+
" '2d5b5f30-63d6-4399-8e69-806697e64307',\n",
|
| 544 |
+
" '8feb1525-189a-45c1-ac23-0b33ba4994f3',\n",
|
| 545 |
+
" '0a503b10-b5da-4cb5-a1b8-bd6f0f34ce64',\n",
|
| 546 |
+
" '725010be-ebc8-45ee-be3e-2af2a8d1a5ee',\n",
|
| 547 |
+
" '52016839-2213-4db2-b397-a51e7bca1882',\n",
|
| 548 |
+
" '661398e1-eff3-4e1a-9ff4-b738bd2ce606',\n",
|
| 549 |
+
" '5a37f469-e018-4866-9224-30734dda0161',\n",
|
| 550 |
+
" '8028ec15-908a-4e98-8756-bff08d3ae96f',\n",
|
| 551 |
+
" 'd8541648-8d20-4f79-a5bb-4160106b0795',\n",
|
| 552 |
+
" '57aa5fb9-a383-4174-b88c-fd26e0c2e1e9',\n",
|
| 553 |
+
" '8d539cf3-12bd-4844-b306-9ead1cb12540',\n",
|
| 554 |
+
" '30e4322e-cf5e-4cd7-a56d-5529391d1b48',\n",
|
| 555 |
+
" '81f18135-fb7e-49d5-b1ce-1c833246b7e6',\n",
|
| 556 |
+
" 'c8f70828-1fc6-40cf-acd8-f7910841a3f6',\n",
|
| 557 |
+
" '2ea87054-e3cb-436b-a418-e7dde7b66488',\n",
|
| 558 |
+
" 'ec8797e6-1e89-403d-8f4b-a7528b856a9a',\n",
|
| 559 |
+
" '9ec0c175-c01a-4713-8042-a9dc5057cd6e',\n",
|
| 560 |
+
" 'df1dcb67-07a8-49d6-8bfa-2a0b857907ca',\n",
|
| 561 |
+
" 'ce065535-e2ca-45f4-b278-96d46356f511',\n",
|
| 562 |
+
" '161f51df-b3ca-4c54-b686-c8b68543da7e',\n",
|
| 563 |
+
" '9528a8fc-45e0-43bc-9933-20a0e5d25a91',\n",
|
| 564 |
+
" '6c3840f5-8d8f-4ed2-bf17-f82a88f8cfc4',\n",
|
| 565 |
+
" '46a9b7b4-c460-4ae4-afa7-6bb8478f6072',\n",
|
| 566 |
+
" '76df9287-461a-4cc8-9f34-7420f44ff0bf',\n",
|
| 567 |
+
" '80265ffa-ef7c-4721-956b-6010745f577c',\n",
|
| 568 |
+
" 'd09fc759-c639-4f5c-9a01-5f0f3bc359af',\n",
|
| 569 |
+
" '78b04c1b-38ed-493a-b04c-aebb454c87e0',\n",
|
| 570 |
+
" 'b6e68d75-11fe-428c-898f-56c58a6f2206',\n",
|
| 571 |
+
" '45255213-2a89-428e-a97a-371a1f78bfdd',\n",
|
| 572 |
+
" '516b872e-3c3c-442d-8be3-254da689bc13',\n",
|
| 573 |
+
" 'e16f0153-1bd8-45a0-af2b-35d581e23e66',\n",
|
| 574 |
+
" 'c36378f3-8839-48ba-ac46-86916439de77',\n",
|
| 575 |
+
" '82261df7-bbbc-47d6-9124-386e12f28c63',\n",
|
| 576 |
+
" 'ebd9c5d0-9bec-4901-946d-d663d1e66e89',\n",
|
| 577 |
+
" 'd33ef9e8-db28-4f12-9e1e-45041b0b0193',\n",
|
| 578 |
+
" '867171b7-f18f-451b-8fe0-879fedc98c11',\n",
|
| 579 |
+
" 'fcbed948-b63d-4462-adc5-e3d48cd3f27c',\n",
|
| 580 |
+
" 'c21c5ccd-c110-4790-8a83-6a6f1d6d5bab',\n",
|
| 581 |
+
" '38d43887-22f9-4d11-b94d-dc96de2f3313',\n",
|
| 582 |
+
" 'fdfa031f-8160-4623-8b9a-1e6d2ad9ac0b',\n",
|
| 583 |
+
" 'd51bbdba-7403-4a33-8c3b-e98931fc41a9',\n",
|
| 584 |
+
" 'a11eca40-2a42-4213-9996-d7d4242f41f3',\n",
|
| 585 |
+
" '1e159458-4db8-407f-9729-3b79f57870b2',\n",
|
| 586 |
+
" 'dc3582df-3f5b-4865-b9b3-8122455b22d8',\n",
|
| 587 |
+
" 'd51ea955-c3f9-4b68-a4d8-6f36f0087e90',\n",
|
| 588 |
+
" '8c0b67e8-1355-489f-bdbb-e16eae04a585',\n",
|
| 589 |
+
" '7c35e509-23d3-4d80-9863-3172bc1c7963',\n",
|
| 590 |
+
" '94b2eb43-5a75-4e1f-a9ff-58029cf50431',\n",
|
| 591 |
+
" '9e885f6e-67d6-4fc7-9fc0-25c2443628d6',\n",
|
| 592 |
+
" 'b3cd4d17-6feb-417a-a34d-4fab409e22b3',\n",
|
| 593 |
+
" 'bead51f3-b31f-47b1-b290-1616a3ed8008',\n",
|
| 594 |
+
" '1e197e22-0230-4c09-80ae-c7d63433ac64',\n",
|
| 595 |
+
" '68395eb4-67a4-4262-af1c-9961c787f8cc',\n",
|
| 596 |
+
" '84c5b8dd-098b-40e4-b86f-c341dcf8395d',\n",
|
| 597 |
+
" '1361b7ee-849f-4d68-b3c3-42e9103ec6fa',\n",
|
| 598 |
+
" '4fbe5da0-6f3e-42f3-b99e-6081f44e0cd8',\n",
|
| 599 |
+
" 'ac1975b1-d034-488f-b613-28cf3782d0a8',\n",
|
| 600 |
+
" '0017d512-f135-4123-b694-665bc6e11e64',\n",
|
| 601 |
+
" 'f13798aa-3424-4b38-a7bf-f5abe366120f',\n",
|
| 602 |
+
" '9dfcec61-f8a7-46be-98ae-7f54a5b53e7b',\n",
|
| 603 |
+
" '44f6beba-fca5-4509-b814-662ddf5fad29',\n",
|
| 604 |
+
" 'd87aedeb-16f1-4ccf-8728-fbbdfde6310f',\n",
|
| 605 |
+
" '3eeb99d0-9c2f-4854-befd-3001154ca693',\n",
|
| 606 |
+
" 'dbb2f8f2-d0a4-4288-8bbf-be24cb25360e',\n",
|
| 607 |
+
" '0afd4ed7-a742-4230-a3e8-f59b4ed3af0f',\n",
|
| 608 |
+
" '68cc2545-e9ee-4772-a968-5affe4eb80b7',\n",
|
| 609 |
+
" '772df9fb-4da6-45a3-9cfc-fb5fec014153',\n",
|
| 610 |
+
" 'a5bf6d49-afad-48a6-8ec7-d3b3e1c945d4',\n",
|
| 611 |
+
" '3c4e4cfe-ce5f-48e4-bb2e-7d7080a6d9df',\n",
|
| 612 |
+
" 'fdd184e8-2c0c-4a0d-aad1-425625c0acd8',\n",
|
| 613 |
+
" '6059f8f3-bb47-4e8e-82f1-3b3cf42293ce',\n",
|
| 614 |
+
" '32e22ff0-133e-4f0c-b9f0-d18108eada71',\n",
|
| 615 |
+
" 'cd3a73dc-b2df-494a-80c1-7edd34655e6b',\n",
|
| 616 |
+
" '081bb658-b2de-4470-9bf0-599595e32070',\n",
|
| 617 |
+
" '9c9089be-1648-49ee-a296-4a2b0e629962',\n",
|
| 618 |
+
" 'f90feeb1-dbb3-45c4-a141-aaffac9e9735',\n",
|
| 619 |
+
" '931dbe4e-27ae-407c-b0a1-552cfadcf124',\n",
|
| 620 |
+
" '5d1afa42-297f-475b-bbb3-d82b03b972f8',\n",
|
| 621 |
+
" '75e2ee6b-9b59-4067-b59d-e12a6a29fb1f',\n",
|
| 622 |
+
" 'd1f10078-a911-40d3-8def-36e11119d18b',\n",
|
| 623 |
+
" '7380d7e5-8d51-492e-b6fe-042863fdb84e',\n",
|
| 624 |
+
" '25d829a5-e681-4605-91b3-84b5589bca85',\n",
|
| 625 |
+
" 'b901a4ad-11fa-4241-b6c0-2f1ce4ab5913',\n",
|
| 626 |
+
" 'bcd5b4b2-efa6-4b89-a639-aae6afea19bf',\n",
|
| 627 |
+
" '4f657335-b7e7-4918-b597-664db98ab9e5',\n",
|
| 628 |
+
" '6b21048e-5bde-481a-9450-ea92c219741e',\n",
|
| 629 |
+
" 'e209717c-6925-4cdd-b4a6-26230031d4b3',\n",
|
| 630 |
+
" '2795137e-f3db-4c3a-a8be-4a4a62e2d83c',\n",
|
| 631 |
+
" '43660962-d892-4a23-bdac-9825c5e00623',\n",
|
| 632 |
+
" 'de594872-f941-4575-b7ec-6e66a222ca9d',\n",
|
| 633 |
+
" '8dd521bd-02d5-44d0-b35b-2bc82c68ca87',\n",
|
| 634 |
+
" 'de06a779-7d61-4240-a0e2-ac0b559469b1',\n",
|
| 635 |
+
" '7892dcff-1b29-495e-a2a4-17cef5a7904d',\n",
|
| 636 |
+
" 'a646b497-4d7a-4f4e-9b5d-4b989da6e26d',\n",
|
| 637 |
+
" '2d9453bd-381f-4e20-bac4-27edbab64a5b',\n",
|
| 638 |
+
" '45b2ff28-1a2b-4ac1-b30c-8e19dbd95943',\n",
|
| 639 |
+
" '1f229b56-0839-495f-8ecb-a281eaaaa452',\n",
|
| 640 |
+
" '763e5982-0827-41f9-b077-054d13782e69',\n",
|
| 641 |
+
" 'c48d5c1b-6a70-41f2-8263-ac35244768e7',\n",
|
| 642 |
+
" '65c4c939-b2a3-4dd8-b9c1-8a4585277859',\n",
|
| 643 |
+
" 'c0954e09-6856-4a52-be96-059b9ad381d2',\n",
|
| 644 |
+
" '484985c8-0f45-4289-904c-6143be565287',\n",
|
| 645 |
+
" 'efedb28c-cc8e-4aad-ae86-e1126dfc960a',\n",
|
| 646 |
+
" 'c41269f7-6a1a-4122-9326-9d4f08f7fa46',\n",
|
| 647 |
+
" '8b53ddf1-8f7f-4902-b7f7-8059725ffb2b',\n",
|
| 648 |
+
" '21ae63cb-649f-4b10-a67d-7f900a4185eb',\n",
|
| 649 |
+
" '80610c06-6a16-44e7-9a43-dd95fd89e720',\n",
|
| 650 |
+
" '8af13230-ea5f-425d-a4f0-e37acd8e7242',\n",
|
| 651 |
+
" '3b8c80fa-a860-4324-9faa-d46848cd62c2',\n",
|
| 652 |
+
" 'd358125e-6b25-4845-9303-b9f94ee9b1d9',\n",
|
| 653 |
+
" '6dea7e26-2408-411f-8867-8251fe672319',\n",
|
| 654 |
+
" '98564a4c-aeda-4556-af00-4ebd23cb407b',\n",
|
| 655 |
+
" '7553a543-572c-4418-917f-9a6e7e62d155',\n",
|
| 656 |
+
" 'c44444d0-f3a6-4f0d-b907-666d0b6c0d08',\n",
|
| 657 |
+
" '12b11de7-032b-404e-9cfe-3d9ba260abd7',\n",
|
| 658 |
+
" '9ac75c85-77d7-4418-8842-7997895d4400',\n",
|
| 659 |
+
" 'e5d580eb-7c8c-451b-9e44-00386a72f47c',\n",
|
| 660 |
+
" '923710d5-ff8f-44ed-97a7-ee40cb69ffba',\n",
|
| 661 |
+
" 'b2990aa7-2b84-48cb-8abc-ee30719c5c86',\n",
|
| 662 |
+
" 'f65e8225-3b98-4702-8b46-74b6703407b7',\n",
|
| 663 |
+
" '992a965a-1c9c-46c7-bfca-4f0c99f33bf6',\n",
|
| 664 |
+
" '18a1af9f-a788-45c7-9827-d6fb07c283d1',\n",
|
| 665 |
+
" '5833c1f7-6d17-4308-919c-9c022e4cf98d',\n",
|
| 666 |
+
" '3541476d-4fd7-4249-ae4f-c86d734001ca',\n",
|
| 667 |
+
" '5f66b974-37ae-4e8a-a2eb-7d72f0d75d3c',\n",
|
| 668 |
+
" '400717e9-430e-4bfc-9deb-f019dd5055a9',\n",
|
| 669 |
+
" '356d2916-f094-48f2-8f5d-4658dd4209a5',\n",
|
| 670 |
+
" '3de80d7b-42c5-456e-9a1b-7cdb75749df4',\n",
|
| 671 |
+
" '156e996d-302b-4dbc-a3a5-db7a518f4a4c',\n",
|
| 672 |
+
" '606c46a9-808c-4d33-bcd4-deac1e3b55a8',\n",
|
| 673 |
+
" 'd9a44989-531c-4237-838a-8393479da64d',\n",
|
| 674 |
+
" '2c26cc4c-251c-4d4f-ba17-309692015c4e',\n",
|
| 675 |
+
" '74d76b64-1674-40c0-8b41-19b1eebe05a7',\n",
|
| 676 |
+
" '71d03984-c626-4160-9430-528be9fedf59',\n",
|
| 677 |
+
" 'cd48ebdf-7d40-4e61-87ab-f9eea638a74d',\n",
|
| 678 |
+
" '8e8b2c73-ce6b-456c-897d-89fbfd75fa26',\n",
|
| 679 |
+
" '9434a9f5-8909-45fb-a023-a4c61e8d4764',\n",
|
| 680 |
+
" '7bd524c2-489a-4594-95bc-01b964c2c64d',\n",
|
| 681 |
+
" 'f8e787dd-ad34-464d-b21f-ed6111c3fa30',\n",
|
| 682 |
+
" '1a79c0b3-9b64-4809-813b-0480de369971',\n",
|
| 683 |
+
" '89aa9b61-59e5-4f78-9f94-c145c753625c',\n",
|
| 684 |
+
" '165c1f92-6a6b-4cce-b088-3a73c8c72c24',\n",
|
| 685 |
+
" 'b7e1f173-6f42-4f91-a5f5-cdc2289548b4',\n",
|
| 686 |
+
" '5320958c-6b8d-4445-9aa8-fb8e652198a5',\n",
|
| 687 |
+
" '8efdbae0-1a07-4278-a657-e05cd1435753',\n",
|
| 688 |
+
" '71a4c5c4-93a3-4839-acc7-d80bbcd4f774',\n",
|
| 689 |
+
" '97463e5d-cecf-4cfa-bef5-5d6e9c8f0791',\n",
|
| 690 |
+
" 'fbbd1d5a-7390-4011-b6c6-701fe5cfc1da',\n",
|
| 691 |
+
" '09b40129-1ddc-4e93-bb40-c93865f4219e',\n",
|
| 692 |
+
" '5569a342-0bde-4356-a38e-af426c796693',\n",
|
| 693 |
+
" 'b09d66b0-245f-440f-a7f4-213bff7ba8ca',\n",
|
| 694 |
+
" '5f62dabc-c220-44ce-b6f8-04adf37186c6',\n",
|
| 695 |
+
" '273d9cd4-590d-4ece-9715-1ed201d3b53d',\n",
|
| 696 |
+
" 'eb5c2c74-0de4-4870-aef4-b65a59fa502d',\n",
|
| 697 |
+
" '5f7f5339-9919-44aa-b10f-449f16ed5df4',\n",
|
| 698 |
+
" '3a890f5b-b380-4ab8-bfa9-f67f74dceac3',\n",
|
| 699 |
+
" 'b47959e7-6038-4733-9e72-beaf98a731e4',\n",
|
| 700 |
+
" 'bb1e60c2-bf86-4e9b-b0d0-8e1b859ff220',\n",
|
| 701 |
+
" 'c6afbf5e-857a-48c7-84f2-9c5cb9a1ef00',\n",
|
| 702 |
+
" '808ddb34-ecc5-4c7e-bb4d-0cd5487f8b84',\n",
|
| 703 |
+
" '3c120e24-5edf-4a53-99e4-5b929162f849',\n",
|
| 704 |
+
" '22a911c4-fe04-43e5-867c-c4529501131d',\n",
|
| 705 |
+
" '0295a993-3a58-4550-a522-66004a6dd0fc',\n",
|
| 706 |
+
" 'a54e8316-67d4-4ab7-9747-876ac1413eb3',\n",
|
| 707 |
+
" 'b5068009-bef5-4bce-af38-081905babf3a',\n",
|
| 708 |
+
" '7d57b14e-fcc6-4bc4-8ed7-9f8616268464',\n",
|
| 709 |
+
" 'f119914b-dda3-4fab-92ed-ac7d45be674c',\n",
|
| 710 |
+
" 'a13eb99f-bf84-4918-8308-6aaebfa44522',\n",
|
| 711 |
+
" '02425f7d-3f2c-4f99-b4b3-dae251f79f9a',\n",
|
| 712 |
+
" '8c070a7b-b0fe-4cad-9b25-e1f703c08041',\n",
|
| 713 |
+
" '458439d7-1a8e-4808-b4bf-fe0a75b569a9',\n",
|
| 714 |
+
" '1db0a9d9-dccd-4347-9cf3-283815f1bc05',\n",
|
| 715 |
+
" '5ecc0797-280b-450a-b6db-fd8352c855bc',\n",
|
| 716 |
+
" '85f323ea-ded5-4cea-87d5-e07e951b1fea',\n",
|
| 717 |
+
" 'd5c87f7b-695b-4f80-8502-5e7116721e67',\n",
|
| 718 |
+
" 'a2fd76f1-a851-495a-b463-44fc537d62cd',\n",
|
| 719 |
+
" '737a76b0-d8d8-49a6-b433-f553a8aa2b58',\n",
|
| 720 |
+
" '5b0db327-3234-468a-ba12-734a481d7e73',\n",
|
| 721 |
+
" '70c8cf89-29fe-4349-b7d3-b3293267fd10',\n",
|
| 722 |
+
" '635b04dd-fa10-4dde-bfcb-168cc8a9bd39',\n",
|
| 723 |
+
" '152b468a-b307-4b8d-b7b3-ee7e152aac78',\n",
|
| 724 |
+
" '8942236c-0e8c-42c5-8e95-7fcf84e89677',\n",
|
| 725 |
+
" '2de3d06a-9d37-4e1f-a4ce-45a3e2cdcf08',\n",
|
| 726 |
+
" '4c1dff30-a002-43e8-b47f-f66855fc13e4',\n",
|
| 727 |
+
" '9254fd90-3ffc-48f8-b091-b9fe81d3b56d',\n",
|
| 728 |
+
" 'b4befb2a-8746-4d75-8397-66010d1baf2c',\n",
|
| 729 |
+
" '97ba14a5-ace8-45b7-b25b-be774aa25410',\n",
|
| 730 |
+
" '25d27bce-5d31-430c-93c6-2c30025a030c',\n",
|
| 731 |
+
" 'c15c3ada-bb90-43f3-a41d-f85a832673b8',\n",
|
| 732 |
+
" '4a353418-e1db-4c6e-ab8e-f3a534b03d2e',\n",
|
| 733 |
+
" '668dfef9-37da-4619-8a0b-71ba993c7ac3',\n",
|
| 734 |
+
" 'ba8233d0-1601-4167-886c-f632e3d077bf',\n",
|
| 735 |
+
" '49cf3301-80b4-46e5-93bd-55712adfae99',\n",
|
| 736 |
+
" '51ced038-b27b-46d1-96e6-923867ca4774',\n",
|
| 737 |
+
" 'ac4e9a83-8715-4a7d-b4a3-b4e7b2ecef9e',\n",
|
| 738 |
+
" '6fe1361f-ede5-4359-90be-d007f6eb03e2',\n",
|
| 739 |
+
" 'cf49d0cc-a941-486f-88ab-88c93cc3f211',\n",
|
| 740 |
+
" 'ee21070d-e45e-4f61-ae1c-a412b659035b',\n",
|
| 741 |
+
" '3744d911-37cc-4645-ba2b-b376718e0afd',\n",
|
| 742 |
+
" 'efff5713-c6cf-4e9a-b5ce-ccd4e443a8b9',\n",
|
| 743 |
+
" 'aeae7c83-6a90-40c6-9fa5-65a8488865af',\n",
|
| 744 |
+
" '3b349ca6-fac5-41c1-b8de-5ee32161f023',\n",
|
| 745 |
+
" '39703730-8667-432f-aceb-8a569bb7d3c7',\n",
|
| 746 |
+
" 'd021eee3-ea5f-454e-a5f9-8ff2b354e05f',\n",
|
| 747 |
+
" '29fab8ce-8c8c-4f76-a59e-00291af854df',\n",
|
| 748 |
+
" '186e7eef-e3ab-4f05-b6ee-2ae65d871393',\n",
|
| 749 |
+
" 'c8b02b23-9dc6-43a9-b0ea-fd42533165cf',\n",
|
| 750 |
+
" '48734804-8b5e-45e8-9576-16f5f3d5da13',\n",
|
| 751 |
+
" '56f12823-504b-46b1-a239-78ebf589063e',\n",
|
| 752 |
+
" '5cea0d13-04f6-4b8a-9b71-ec1a2a5eb191',\n",
|
| 753 |
+
" '147a6f26-3ad6-4929-a074-7e397d3ac134',\n",
|
| 754 |
+
" '4e725c5a-f457-437b-ae47-d5e544a058d3',\n",
|
| 755 |
+
" '39b0e4b9-74ea-461f-8375-31a38af1db59',\n",
|
| 756 |
+
" '2d90a41d-20bc-4f87-adbd-8c5ef5d8f33f',\n",
|
| 757 |
+
" '0108b65b-f4b5-4839-80bc-b5a2f61c35c6',\n",
|
| 758 |
+
" '8002651d-7f26-4c99-a60c-d8df30aadd79',\n",
|
| 759 |
+
" 'bef4b953-c0e3-4822-85e0-94d3a41a2a65',\n",
|
| 760 |
+
" '6b02df41-38d7-4a11-bb14-a87ef004a191',\n",
|
| 761 |
+
" '84c9570a-06ee-4edd-b767-dc2ce45603d9',\n",
|
| 762 |
+
" '71a0f38c-4764-428a-a9a9-f3353fd7c768',\n",
|
| 763 |
+
" '15e8567b-4f1c-4f80-8ce7-aaf83a489933',\n",
|
| 764 |
+
" '0dbdae26-1e9d-4b2c-8957-3f0fd056fb6f',\n",
|
| 765 |
+
" '2888971e-530b-44b5-b38a-ba00b8667439',\n",
|
| 766 |
+
" 'c99e5ddc-0741-4a24-8cd8-cab974c32dbc',\n",
|
| 767 |
+
" '5365117a-4e5e-4683-9cf6-5c9015b328c8',\n",
|
| 768 |
+
" '62194dec-507c-49d8-b809-20c03d5caf0b',\n",
|
| 769 |
+
" '358b61b1-e962-4443-9240-835bc75146cc',\n",
|
| 770 |
+
" '10c92ff4-a8cb-483b-ad78-6dc680d50cd8',\n",
|
| 771 |
+
" 'a6cc759f-2599-4199-a08d-3f3b142af66f',\n",
|
| 772 |
+
" '035bb626-5d0a-4a43-a9cb-66b2bd940d2c',\n",
|
| 773 |
+
" '1a1da71a-e9b8-466d-b5e0-b7e10847b857',\n",
|
| 774 |
+
" '09fe8139-5029-4782-bff9-a0edfce73e2f',\n",
|
| 775 |
+
" '939af366-34d7-47c5-8937-59f0122df0d4',\n",
|
| 776 |
+
" 'f74a0d54-0d45-459d-b988-b2c398cacfd5',\n",
|
| 777 |
+
" 'bbe495a3-7964-4dd0-8c6d-b2a22a8494b2',\n",
|
| 778 |
+
" '11616f61-808c-43a4-8c38-9de0db923d68',\n",
|
| 779 |
+
" '65b9f618-b8aa-42d0-b7e4-321207cf81eb',\n",
|
| 780 |
+
" '7d964d73-3b58-49ab-8a13-fe9add8f015e',\n",
|
| 781 |
+
" '90ac205f-f322-431d-a097-a7fce1729cb2',\n",
|
| 782 |
+
" 'ce338839-0430-47a7-b426-87feabab5320',\n",
|
| 783 |
+
" '09cc0450-5e49-43a2-b7c0-c79afb049eb0',\n",
|
| 784 |
+
" '80500abe-37ef-4483-a40c-aa1ac451cf95',\n",
|
| 785 |
+
" '64b77e67-7115-4941-a04b-2ff4771aa71e',\n",
|
| 786 |
+
" 'cc9a5ffa-2c9c-4096-972a-e5fa9b3cefe9',\n",
|
| 787 |
+
" 'be907e7c-f601-4119-b68c-35a24eb9acfa',\n",
|
| 788 |
+
" '79fee5a5-3d8a-46e9-96bc-340ac4a324aa',\n",
|
| 789 |
+
" 'db7c23df-6888-4b4e-a4b6-58a953c174d3',\n",
|
| 790 |
+
" 'f54d787e-5dfb-4801-bfed-842cd18fa332',\n",
|
| 791 |
+
" 'e18688f1-15fc-400d-8ea3-59d313181cb0',\n",
|
| 792 |
+
" 'f7fba163-e8ea-451f-b1f8-cc70f599f23d',\n",
|
| 793 |
+
" '7f7f8847-1483-4801-96da-8c789d6ac93a',\n",
|
| 794 |
+
" 'e356cda5-b603-4e15-9995-e9411bd8f4f4',\n",
|
| 795 |
+
" 'ab3bdbe6-d223-420d-b66b-d572bf4b14d1',\n",
|
| 796 |
+
" 'b2e2ed1a-e738-4197-a878-d8762814c860',\n",
|
| 797 |
+
" 'b0d31fab-a0f8-4978-a3fd-00fc9754f327',\n",
|
| 798 |
+
" 'af6d58f7-3d4d-4a32-96ba-0376dc945960',\n",
|
| 799 |
+
" '863b5fd8-fedc-42c9-8a8e-2d07670d676c',\n",
|
| 800 |
+
" 'dcc803bf-0601-4a82-a90b-3481b7188b73',\n",
|
| 801 |
+
" 'cefcc7ee-9b1b-4586-983c-fceda1417772',\n",
|
| 802 |
+
" 'b895c11d-3f5d-49f5-b4d8-9f0f9307ee3e',\n",
|
| 803 |
+
" '06a62e87-e561-46af-94f3-9657d2a8e0c2',\n",
|
| 804 |
+
" 'f34de4a4-b5e0-4a7e-81e1-5c7abc066221',\n",
|
| 805 |
+
" '3ad33f70-faaa-4ae2-8660-bf7972a401e0',\n",
|
| 806 |
+
" 'b170a51a-cc14-4a3d-9c39-23f18e7405f5',\n",
|
| 807 |
+
" '4d89bda3-00d8-4db7-a89b-da2b06b29a24',\n",
|
| 808 |
+
" 'e28be583-da3a-4b82-bc86-e25dde6fb02b',\n",
|
| 809 |
+
" 'db982e3e-c076-4e06-9ba6-faed32db3527',\n",
|
| 810 |
+
" '86c5d471-60af-4e96-9cf1-a3b7f5295c47',\n",
|
| 811 |
+
" '9e010af0-ea9d-40f0-aad3-973c0789768c',\n",
|
| 812 |
+
" 'e5baba45-762a-42b5-b32d-4f64a4753b27',\n",
|
| 813 |
+
" '772ee256-aa86-4ae5-925e-acfff2ec76f0',\n",
|
| 814 |
+
" '0ad3ab0a-0bae-46ab-92d3-caf165cefed5']"
|
| 815 |
+
]
|
| 816 |
+
},
|
| 817 |
+
"execution_count": 133,
|
| 818 |
+
"metadata": {},
|
| 819 |
+
"output_type": "execute_result"
|
| 820 |
+
}
|
| 821 |
+
],
|
| 822 |
+
"source": [
|
| 823 |
+
"vector_store.add_documents(documents=text_chunks, ids = uuids)"
|
| 824 |
+
]
|
| 825 |
+
},
|
| 826 |
+
{
|
| 827 |
+
"cell_type": "code",
|
| 828 |
+
"execution_count": 11,
|
| 829 |
+
"id": "7b8b7a46",
|
| 830 |
+
"metadata": {},
|
| 831 |
+
"outputs": [
|
| 832 |
+
{
|
| 833 |
+
"name": "stdout",
|
| 834 |
+
"output_type": "stream",
|
| 835 |
+
"text": [
|
| 836 |
+
"* Page 16 of 25 \n",
|
| 837 |
+
"National Parivar Mediclaim Plus Policy \n",
|
| 838 |
+
"UIN: NICHLIP25039V032425 \n",
|
| 839 |
+
" \n",
|
| 840 |
+
"ii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \n",
|
| 841 |
+
"a) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \n",
|
| 842 |
+
"made during the policy period. \n",
|
| 843 |
+
"b) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \n",
|
| 844 |
+
"such policy years has not commenced and refund proportionate premium for unexpired policy period for the current \n",
|
| 845 |
+
"policy year. \n",
|
| 846 |
+
"There shall be no refund for the completed policy year elapsed. [{'chunk_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', 'doc_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5', 'page': 24.0, 'type': 'text'}]\n",
|
| 847 |
+
"* Page 16 of 25 \n",
|
| 848 |
+
"National Parivar Mediclaim Plus Policy \n",
|
| 849 |
+
"UIN: NICHLIP25039V032425 \n",
|
| 850 |
+
" \n",
|
| 851 |
+
"ii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \n",
|
| 852 |
+
"a) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \n",
|
| 853 |
+
"made during the policy period. \n",
|
| 854 |
+
"b) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \n",
|
| 855 |
+
"such policy years has not commenced and refund proportionate premium for unexpired policy period for the current \n",
|
| 856 |
+
"policy year. \n",
|
| 857 |
+
"There shall be no refund for the completed policy year elapsed. [{'chunk_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', 'doc_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5', 'page': 24.0, 'type': 'text'}]\n"
|
| 858 |
+
]
|
| 859 |
+
}
|
| 860 |
+
],
|
| 861 |
+
"source": [
|
| 862 |
+
"results = vector_store.similarity_search(\n",
|
| 863 |
+
" \"What is the grace period for premium payment under the National Parivar Mediclaim Plus Policy?\",\n",
|
| 864 |
+
" k=2\n",
|
| 865 |
+
")\n",
|
| 866 |
+
"for res in results:\n",
|
| 867 |
+
" print(f\"* {res.page_content} [{res.metadata}]\")"
|
| 868 |
+
]
|
| 869 |
+
},
|
| 870 |
+
{
|
| 871 |
+
"cell_type": "code",
|
| 872 |
+
"execution_count": 135,
|
| 873 |
+
"id": "41f27c21",
|
| 874 |
+
"metadata": {},
|
| 875 |
+
"outputs": [
|
| 876 |
+
{
|
| 877 |
+
"name": "stdout",
|
| 878 |
+
"output_type": "stream",
|
| 879 |
+
"text": [
|
| 880 |
+
"[Document(id='f13798aa-3424-4b38-a7bf-f5abe366120f', metadata={'chunk_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', 'doc_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5', 'page': 24.0, 'type': 'text'}, page_content='Page 16 of 25 \\nNational Parivar Mediclaim Plus Policy \\nUIN: NICHLIP25039V032425 \\n \\nii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \\na) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \\nmade during the policy period. \\nb) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \\nsuch policy years has not commenced and refund proportionate premium for unexpired policy period for the current \\npolicy year. \\nThere shall be no refund for the completed policy year elapsed.'), Document(id='8002651d-7f26-4c99-a60c-d8df30aadd79', metadata={'chunk_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', 'doc_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5', 'page': 24.0, 'type': 'text'}, page_content='Page 16 of 25 \\nNational Parivar Mediclaim Plus Policy \\nUIN: NICHLIP25039V032425 \\n \\nii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \\na) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \\nmade during the policy period. \\nb) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \\nsuch policy years has not commenced and refund proportionate premium for unexpired policy period for the current \\npolicy year. \\nThere shall be no refund for the completed policy year elapsed.')]\n"
|
| 881 |
+
]
|
| 882 |
+
}
|
| 883 |
+
],
|
| 884 |
+
"source": [
|
| 885 |
+
"print(results)"
|
| 886 |
+
]
|
| 887 |
+
},
|
| 888 |
+
{
|
| 889 |
+
"cell_type": "code",
|
| 890 |
+
"execution_count": 136,
|
| 891 |
+
"id": "cf7b7568",
|
| 892 |
+
"metadata": {},
|
| 893 |
+
"outputs": [
|
| 894 |
+
{
|
| 895 |
+
"name": "stdout",
|
| 896 |
+
"output_type": "stream",
|
| 897 |
+
"text": [
|
| 898 |
+
"* [SIM=0.678520] Page 16 of 25 \n",
|
| 899 |
+
"National Parivar Mediclaim Plus Policy \n",
|
| 900 |
+
"UIN: NICHLIP25039V032425 \n",
|
| 901 |
+
" \n",
|
| 902 |
+
"ii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \n",
|
| 903 |
+
"a) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \n",
|
| 904 |
+
"made during the policy period. \n",
|
| 905 |
+
"b) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \n",
|
| 906 |
+
"such policy years has not commenced and refund proportionate premium for unexpired policy period for the current \n",
|
| 907 |
+
"policy year. \n",
|
| 908 |
+
"There shall be no refund for the completed policy year elapsed. [{'chunk_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', 'doc_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5', 'page': 24.0, 'type': 'text'}]\n"
|
| 909 |
+
]
|
| 910 |
+
}
|
| 911 |
+
],
|
| 912 |
+
"source": [
|
| 913 |
+
"results = vector_store.similarity_search_with_score(\n",
|
| 914 |
+
" \"What is the grace period for premium payment under the National Parivar Mediclaim Plus Policy?\", k=1\n",
|
| 915 |
+
")\n",
|
| 916 |
+
"for res, score in results:\n",
|
| 917 |
+
" print(f\"* [SIM={score:3f}] {res.page_content} [{res.metadata}]\")"
|
| 918 |
+
]
|
| 919 |
+
},
|
| 920 |
+
{
|
| 921 |
+
"cell_type": "markdown",
|
| 922 |
+
"id": "7fde8a22",
|
| 923 |
+
"metadata": {},
|
| 924 |
+
"source": [
|
| 925 |
+
"### QUERY PARSING "
|
| 926 |
+
]
|
| 927 |
+
},
|
| 928 |
+
{
|
| 929 |
+
"cell_type": "code",
|
| 930 |
+
"execution_count": 62,
|
| 931 |
+
"id": "20452e98",
|
| 932 |
+
"metadata": {},
|
| 933 |
+
"outputs": [],
|
| 934 |
+
"source": [
|
| 935 |
+
"from pydantic import BaseModel, model_validator,field_validator\n",
|
| 936 |
+
"from typing import List, Dict, Any, Optional\n",
|
| 937 |
+
"import json\n",
|
| 938 |
+
"class QuerySpec(BaseModel):\n",
|
| 939 |
+
" raw_query: str \n",
|
| 940 |
+
" intent: str \n",
|
| 941 |
+
" entities: Dict[str, str]\n",
|
| 942 |
+
" constraints : Dict[str, Any]\n",
|
| 943 |
+
" answer_type: str \n",
|
| 944 |
+
" followups: Optional[List[str]] = []\n",
|
| 945 |
+
"\n",
|
| 946 |
+
" @model_validator(mode = \"before\")\n",
|
| 947 |
+
" @classmethod\n",
|
| 948 |
+
" def parse_nested_json(cls, values):\n",
|
| 949 |
+
" for field in ['entities', 'constraints']:\n",
|
| 950 |
+
" val = values.get(field)\n",
|
| 951 |
+
" if isinstance(val, str):\n",
|
| 952 |
+
" try:\n",
|
| 953 |
+
" values[field] = json.loads(val)\n",
|
| 954 |
+
" except json.JSONDecodeError:\n",
|
| 955 |
+
" pass\n",
|
| 956 |
+
" return values\n",
|
| 957 |
+
"\n",
|
| 958 |
+
"class ClauseHit(BaseModel):\n",
|
| 959 |
+
" doc_id : str\n",
|
| 960 |
+
" page: int\n",
|
| 961 |
+
" chunk_id: str \n",
|
| 962 |
+
" text: str \n",
|
| 963 |
+
" metadata: Dict[str, Any]\n",
|
| 964 |
+
" score: float \n",
|
| 965 |
+
" boost: Optional[float] = None\n",
|
| 966 |
+
" combined_score: Optional[float] = None\n",
|
| 967 |
+
"\n",
|
| 968 |
+
" @field_validator(\"metadata\", mode=\"before\")\n",
|
| 969 |
+
" def parse_metadata(cls, v):\n",
|
| 970 |
+
" if isinstance(v, str):\n",
|
| 971 |
+
" try:\n",
|
| 972 |
+
" return json.loads(v) if v.strip() else {}\n",
|
| 973 |
+
" except json.JSONDecodeError:\n",
|
| 974 |
+
" return {}\n",
|
| 975 |
+
" return v\n",
|
| 976 |
+
"\n",
|
| 977 |
+
"class LogicResult(BaseModel):\n",
|
| 978 |
+
" answer: str\n",
|
| 979 |
+
" decision: str # \"covered\"/\"not_covered\"/\"conditional\"\n",
|
| 980 |
+
" confidence: float\n",
|
| 981 |
+
" evidence: List[ClauseHit]\n",
|
| 982 |
+
" rationale: str\n",
|
| 983 |
+
" \n",
|
| 984 |
+
"\n",
|
| 985 |
+
"class APIResponse(BaseModel):\n",
|
| 986 |
+
" query_spec: QuerySpec\n",
|
| 987 |
+
" logic_result: LogicResult\n",
|
| 988 |
+
" debug: Optional[Dict[str, Any]] = None\n",
|
| 989 |
+
"\n",
|
| 990 |
+
"\n",
|
| 991 |
+
"\n"
|
| 992 |
+
]
|
| 993 |
+
},
|
| 994 |
+
{
|
| 995 |
+
"cell_type": "code",
|
| 996 |
+
"execution_count": 8,
|
| 997 |
+
"id": "3ac1f99f",
|
| 998 |
+
"metadata": {},
|
| 999 |
+
"outputs": [],
|
| 1000 |
+
"source": [
|
| 1001 |
+
"user_question = \"What is the grace period for premium payment under the National Parivar Mediclaim Plus Policy?\""
|
| 1002 |
+
]
|
| 1003 |
+
},
|
| 1004 |
+
{
|
| 1005 |
+
"cell_type": "code",
|
| 1006 |
+
"execution_count": 9,
|
| 1007 |
+
"id": "2961e184",
|
| 1008 |
+
"metadata": {},
|
| 1009 |
+
"outputs": [],
|
| 1010 |
+
"source": [
|
| 1011 |
+
"PARSER_PROMPT = f\"\"\"You receive a user's question about an insurance/contract document. Produce a JSON with keys:\n",
|
| 1012 |
+
"- intent (one of: coverage_check, definition, limit_query, waiting_period, exclusions, other)\n",
|
| 1013 |
+
"- entities (map of entity_name -> canonical string)\n",
|
| 1014 |
+
"- constraints (map: plan, time_window, eligible_person, numerical_constraints)\n",
|
| 1015 |
+
"- answer_type (one of: yes_no, short_explain, detailed, clause_list)\n",
|
| 1016 |
+
"Return ONLY the JSON.Make sure that nested fields like \"entities\" and \"constraints\" are JSON objects, not strings.\n",
|
| 1017 |
+
"\"\"\""
|
| 1018 |
+
]
|
| 1019 |
+
},
|
| 1020 |
+
{
|
| 1021 |
+
"cell_type": "code",
|
| 1022 |
+
"execution_count": 10,
|
| 1023 |
+
"id": "a9123e2a",
|
| 1024 |
+
"metadata": {},
|
| 1025 |
+
"outputs": [
|
| 1026 |
+
{
|
| 1027 |
+
"data": {
|
| 1028 |
+
"text/plain": [
|
| 1029 |
+
"ChatGoogleGenerativeAI(model='models/gemini-2.5-flash', google_api_key=SecretStr('**********'), client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x000001E02EF9EA50>, default_metadata=(), model_kwargs={})"
|
| 1030 |
+
]
|
| 1031 |
+
},
|
| 1032 |
+
"execution_count": 10,
|
| 1033 |
+
"metadata": {},
|
| 1034 |
+
"output_type": "execute_result"
|
| 1035 |
+
}
|
| 1036 |
+
],
|
| 1037 |
+
"source": [
|
| 1038 |
+
"from langchain_google_genai import ChatGoogleGenerativeAI\n",
|
| 1039 |
+
"api_key = os.getenv(\"GEMINI_API_KEY\")\n",
|
| 1040 |
+
"llm = ChatGoogleGenerativeAI(\n",
|
| 1041 |
+
" model=\"gemini-2.5-flash\",\n",
|
| 1042 |
+
" google_api_key = api_key\n",
|
| 1043 |
+
" \n",
|
| 1044 |
+
" )\n",
|
| 1045 |
+
"llm"
|
| 1046 |
+
]
|
| 1047 |
+
},
|
| 1048 |
+
{
|
| 1049 |
+
"cell_type": "code",
|
| 1050 |
+
"execution_count": 30,
|
| 1051 |
+
"id": "290081a1",
|
| 1052 |
+
"metadata": {},
|
| 1053 |
+
"outputs": [],
|
| 1054 |
+
"source": [
|
| 1055 |
+
"def parsing_query(query:str) -> QuerySpec:\n",
|
| 1056 |
+
" # Bind the schema to the model\n",
|
| 1057 |
+
" structured_llm = llm.with_structured_output(QuerySpec)\n",
|
| 1058 |
+
"\n",
|
| 1059 |
+
" # Compose the full prompt with instructions and user question\n",
|
| 1060 |
+
" full_prompt = PARSER_PROMPT + \"\\n\" + query\n",
|
| 1061 |
+
"\n",
|
| 1062 |
+
" # Invoke the model to get structured output parsed as QuerySpec\n",
|
| 1063 |
+
" result: QuerySpec = structured_llm.invoke(full_prompt)\n",
|
| 1064 |
+
" return result\n",
|
| 1065 |
+
" # print(result.json()) # This will print the JSON output matching your schema\n"
|
| 1066 |
+
]
|
| 1067 |
+
},
|
| 1068 |
+
{
|
| 1069 |
+
"cell_type": "markdown",
|
| 1070 |
+
"id": "b5cecc42",
|
| 1071 |
+
"metadata": {},
|
| 1072 |
+
"source": [
|
| 1073 |
+
"## Embedding + Retrieval and Clause Matching"
|
| 1074 |
+
]
|
| 1075 |
+
},
|
| 1076 |
+
{
|
| 1077 |
+
"cell_type": "code",
|
| 1078 |
+
"execution_count": 38,
|
| 1079 |
+
"id": "11fdd288",
|
| 1080 |
+
"metadata": {},
|
| 1081 |
+
"outputs": [],
|
| 1082 |
+
"source": [
|
| 1083 |
+
"def get_query_embedding(embedding_client, query_spec: QuerySpec):\n",
|
| 1084 |
+
" q = query_spec.raw_query\n",
|
| 1085 |
+
" e_main = embedding_client.embed_query(q)\n",
|
| 1086 |
+
" expansions = []\n",
|
| 1087 |
+
" if \"procedure\" in query_spec.entities:\n",
|
| 1088 |
+
" expansions.append(f\"{q} OR {query_spec.entities['procedure']} procedures related\")\n",
|
| 1089 |
+
" return e_main, expansions\n",
|
| 1090 |
+
"\n",
|
| 1091 |
+
"def retrieval_from_pinecone_vectoreStore(pinecone_index, embeddings, top_k= 3, filter_meta = None):\n",
|
| 1092 |
+
" \"\"\"\n",
|
| 1093 |
+
" Retrieve the top matching chunks from Pinecone.\n",
|
| 1094 |
+
" \n",
|
| 1095 |
+
" Args:\n",
|
| 1096 |
+
" pinecone_index: Your Pinecone index object.\n",
|
| 1097 |
+
" embedding: The vector embedding of the query.\n",
|
| 1098 |
+
" top_k: How many chunks to retrieve.\n",
|
| 1099 |
+
" filter_meta: Optional metadata filter dict.\n",
|
| 1100 |
+
" \n",
|
| 1101 |
+
" Returns:\n",
|
| 1102 |
+
" List of ClauseHit objects (lightweight container for chunk info).\n",
|
| 1103 |
+
" \"\"\"\n",
|
| 1104 |
+
" res = pinecone_index.query(\n",
|
| 1105 |
+
" vector= embeddings,\n",
|
| 1106 |
+
" top_k =top_k ,\n",
|
| 1107 |
+
" include_metadata = True, \n",
|
| 1108 |
+
" include_values = False, \n",
|
| 1109 |
+
" filter = filter_meta \n",
|
| 1110 |
+
" )\n",
|
| 1111 |
+
" hits= []\n",
|
| 1112 |
+
" for match in res['matches']:\n",
|
| 1113 |
+
" hits.append(ClauseHit(\n",
|
| 1114 |
+
" doc_id=match['metadata']['doc_id'],\n",
|
| 1115 |
+
" page=match['metadata'].get('page', -1),\n",
|
| 1116 |
+
" chunk_id=match['metadata'].get('chunk_id', ''),\n",
|
| 1117 |
+
" text=match['metadata']['text'],\n",
|
| 1118 |
+
" metadata=match['metadata'],\n",
|
| 1119 |
+
" score=match['score']\n",
|
| 1120 |
+
" ))\n",
|
| 1121 |
+
" return hits\n",
|
| 1122 |
+
"\n",
|
| 1123 |
+
" "
|
| 1124 |
+
]
|
| 1125 |
+
},
|
| 1126 |
+
{
|
| 1127 |
+
"cell_type": "markdown",
|
| 1128 |
+
"id": "9707521f",
|
| 1129 |
+
"metadata": {},
|
| 1130 |
+
"source": [
|
| 1131 |
+
"## Logic Evaluation\n",
|
| 1132 |
+
"### Decision processing"
|
| 1133 |
+
]
|
| 1134 |
+
},
|
| 1135 |
+
{
|
| 1136 |
+
"cell_type": "code",
|
| 1137 |
+
"execution_count": 74,
|
| 1138 |
+
"id": "74e49132",
|
| 1139 |
+
"metadata": {},
|
| 1140 |
+
"outputs": [],
|
| 1141 |
+
"source": [
|
| 1142 |
+
"def evaluate_with_llm(raw_query: str, top_clauses: list):\n",
|
| 1143 |
+
" \"\"\"\n",
|
| 1144 |
+
" Use the LLM to analyze retrieved clauses and return structured decision.\n",
|
| 1145 |
+
" \"\"\"\n",
|
| 1146 |
+
"\n",
|
| 1147 |
+
" # Prepare context for the prompt\n",
|
| 1148 |
+
" context_clauses = []\n",
|
| 1149 |
+
" for i, c in enumerate(top_clauses, 1):\n",
|
| 1150 |
+
" context_clauses.append(f\"{i}) [source:{c.doc_id} page:{c.page}] {c.text}\")\n",
|
| 1151 |
+
" print(chr(10).join(context_clauses))\n",
|
| 1152 |
+
" \n",
|
| 1153 |
+
" # Build prompt\n",
|
| 1154 |
+
" prompt = f\"\"\"\n",
|
| 1155 |
+
" You are an insurance policy analyst. Question: \"{raw_query}\"\n",
|
| 1156 |
+
"\n",
|
| 1157 |
+
" Provided clauses (numbered):\n",
|
| 1158 |
+
" {chr(10).join(context_clauses)}\n",
|
| 1159 |
+
"\n",
|
| 1160 |
+
" Task:\n",
|
| 1161 |
+
" 1) Decide: COVERED / NOT_COVERED / CONDITIONAL\n",
|
| 1162 |
+
" 2) Summarize the exact clause(s) that justify your decision.\n",
|
| 1163 |
+
" 3) List any conditions, waiting periods, sublimits, or exclusions relevant.\n",
|
| 1164 |
+
" 4) Provide a concise final answer (1-2 sentences).\n",
|
| 1165 |
+
"\n",
|
| 1166 |
+
" Return JSON with these exact keys:\n",
|
| 1167 |
+
" {{\n",
|
| 1168 |
+
" \"decision\": \"...\",\n",
|
| 1169 |
+
" \"evidence\": [\n",
|
| 1170 |
+
" {{\"doc_id\": \"...\", \"page\": 0, \"snippet\": \"...\", \"reason\": \"...\"}}\n",
|
| 1171 |
+
" ],\n",
|
| 1172 |
+
" \"confidence\": 0.0,\n",
|
| 1173 |
+
" \"rationale\": \"...\",\n",
|
| 1174 |
+
" \"answer\": \"...\"\n",
|
| 1175 |
+
" }}\n",
|
| 1176 |
+
" \"\"\"\n",
|
| 1177 |
+
"\n",
|
| 1178 |
+
" # Directly parse to LogicResult using structured output\n",
|
| 1179 |
+
" structured_llm = llm.with_structured_output(LogicResult)\n",
|
| 1180 |
+
" result: LogicResult = structured_llm.invoke(prompt)\n",
|
| 1181 |
+
" # print(f\"result: {result}\\n result_type{type(result)}\")\n",
|
| 1182 |
+
"\n",
|
| 1183 |
+
" # Attach full text for each evidence\n",
|
| 1184 |
+
" enriched_evidence = []\n",
|
| 1185 |
+
" for ev in result.evidence:\n",
|
| 1186 |
+
" matched = next((c for c in top_clauses if c.doc_id == ev.doc_id and str(c.page) == str(ev.page)), None)\n",
|
| 1187 |
+
" if matched:\n",
|
| 1188 |
+
" ev.text = matched.text # or use a different field if needed\n",
|
| 1189 |
+
" enriched_evidence.append(ev)\n",
|
| 1190 |
+
"\n",
|
| 1191 |
+
" result.evidence = enriched_evidence\n",
|
| 1192 |
+
" # print(enriched_evidence[0])\n",
|
| 1193 |
+
" return result\n"
|
| 1194 |
+
]
|
| 1195 |
+
},
|
| 1196 |
+
{
|
| 1197 |
+
"cell_type": "code",
|
| 1198 |
+
"execution_count": 14,
|
| 1199 |
+
"id": "fe78ab38",
|
| 1200 |
+
"metadata": {},
|
| 1201 |
+
"outputs": [],
|
| 1202 |
+
"source": [
|
| 1203 |
+
"query = \"What is the grace period for premium payment under the National Parivar Mediclaim Plus Policy?\""
|
| 1204 |
+
]
|
| 1205 |
+
},
|
| 1206 |
+
{
|
| 1207 |
+
"cell_type": "code",
|
| 1208 |
+
"execution_count": 36,
|
| 1209 |
+
"id": "fea3b1be",
|
| 1210 |
+
"metadata": {},
|
| 1211 |
+
"outputs": [],
|
| 1212 |
+
"source": [
|
| 1213 |
+
"parsed_query = parsing_query(query)\n"
|
| 1214 |
+
]
|
| 1215 |
+
},
|
| 1216 |
+
{
|
| 1217 |
+
"cell_type": "code",
|
| 1218 |
+
"execution_count": 32,
|
| 1219 |
+
"id": "82fcb8bb",
|
| 1220 |
+
"metadata": {},
|
| 1221 |
+
"outputs": [
|
| 1222 |
+
{
|
| 1223 |
+
"data": {
|
| 1224 |
+
"text/plain": [
|
| 1225 |
+
"__main__.QuerySpec"
|
| 1226 |
+
]
|
| 1227 |
+
},
|
| 1228 |
+
"execution_count": 32,
|
| 1229 |
+
"metadata": {},
|
| 1230 |
+
"output_type": "execute_result"
|
| 1231 |
+
}
|
| 1232 |
+
],
|
| 1233 |
+
"source": [
|
| 1234 |
+
"type(parsed_query)"
|
| 1235 |
+
]
|
| 1236 |
+
},
|
| 1237 |
+
{
|
| 1238 |
+
"cell_type": "code",
|
| 1239 |
+
"execution_count": 39,
|
| 1240 |
+
"id": "9b8292f0",
|
| 1241 |
+
"metadata": {},
|
| 1242 |
+
"outputs": [],
|
| 1243 |
+
"source": [
|
| 1244 |
+
"\n",
|
| 1245 |
+
"# Step 1 — Embed\n",
|
| 1246 |
+
"embedding = get_query_embedding(embeddings, parsed_query)"
|
| 1247 |
+
]
|
| 1248 |
+
},
|
| 1249 |
+
{
|
| 1250 |
+
"cell_type": "code",
|
| 1251 |
+
"execution_count": 44,
|
| 1252 |
+
"id": "46790137",
|
| 1253 |
+
"metadata": {},
|
| 1254 |
+
"outputs": [],
|
| 1255 |
+
"source": [
|
| 1256 |
+
"\n",
|
| 1257 |
+
"# Step 2 — Retrieve\n",
|
| 1258 |
+
"top_hits = retrieval_from_pinecone_vectoreStore(index, embedding, top_k=3)"
|
| 1259 |
+
]
|
| 1260 |
+
},
|
| 1261 |
+
{
|
| 1262 |
+
"cell_type": "code",
|
| 1263 |
+
"execution_count": 48,
|
| 1264 |
+
"id": "9c3f4e68",
|
| 1265 |
+
"metadata": {},
|
| 1266 |
+
"outputs": [
|
| 1267 |
+
{
|
| 1268 |
+
"data": {
|
| 1269 |
+
"text/plain": [
|
| 1270 |
+
"[ClauseHit(doc_id='b0a34a7d-f5a1-4777-93aa-c59269013de5', page=24, chunk_id='b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', text='Page 16 of 25 \\nNational Parivar Mediclaim Plus Policy \\nUIN: NICHLIP25039V032425 \\n \\nii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \\na) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \\nmade during the policy period. \\nb) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \\nsuch policy years has not commenced and refund proportionate premium for unexpired policy period for the current \\npolicy year. \\nThere shall be no refund for the completed policy year elapsed.', metadata={'chunk_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', 'doc_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5', 'page': 24.0, 'text': 'Page 16 of 25 \\nNational Parivar Mediclaim Plus Policy \\nUIN: NICHLIP25039V032425 \\n \\nii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \\na) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \\nmade during the policy period. \\nb) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \\nsuch policy years has not commenced and refund proportionate premium for unexpired policy period for the current \\npolicy year. \\nThere shall be no refund for the completed policy year elapsed.', 'type': 'text'}, score=0.678843796, boost=None, combined_score=None),\n",
|
| 1271 |
+
" ClauseHit(doc_id='b0a34a7d-f5a1-4777-93aa-c59269013de5', page=24, chunk_id='b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', text='Page 16 of 25 \\nNational Parivar Mediclaim Plus Policy \\nUIN: NICHLIP25039V032425 \\n \\nii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \\na) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \\nmade during the policy period. \\nb) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \\nsuch policy years has not commenced and refund proportionate premium for unexpired policy period for the current \\npolicy year. \\nThere shall be no refund for the completed policy year elapsed.', metadata={'chunk_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', 'doc_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5', 'page': 24.0, 'text': 'Page 16 of 25 \\nNational Parivar Mediclaim Plus Policy \\nUIN: NICHLIP25039V032425 \\n \\nii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \\na) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \\nmade during the policy period. \\nb) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \\nsuch policy years has not commenced and refund proportionate premium for unexpired policy period for the current \\npolicy year. \\nThere shall be no refund for the completed policy year elapsed.', 'type': 'text'}, score=0.677854538, boost=None, combined_score=None),\n",
|
| 1272 |
+
" ClauseHit(doc_id='b0a34a7d-f5a1-4777-93aa-c59269013de5', page=24, chunk_id='b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', text='all claims made in the aggregate during each policy year. \\n \\n2.21 Grace Period means the specified period of time, immediately following the premium due date during which premium \\npayment can be made to renew or continue a policy in force without loss of continuity benefits pertaining to Waiting Periods \\nand coverage of Pre-Existing Diseases. The Grace Period for payment of the premium shall be thirty days. \\nCoverage shall not be available during the period for which no premium is received. \\n \\n2.22 Hospital means any institution established for in-patient care and day care treatment of disease/ injuries and which has been \\nregistered as a hospital with the local authorities under the Clinical Establishments (Registration and Regulation) Act, 2010 or', metadata={'chunk_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', 'doc_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5', 'page': 24.0, 'text': 'all claims made in the aggregate during each policy year. \\n \\n2.21 Grace Period means the specified period of time, immediately following the premium due date during which premium \\npayment can be made to renew or continue a policy in force without loss of continuity benefits pertaining to Waiting Periods \\nand coverage of Pre-Existing Diseases. The Grace Period for payment of the premium shall be thirty days. \\nCoverage shall not be available during the period for which no premium is received. \\n \\n2.22 Hospital means any institution established for in-patient care and day care treatment of disease/ injuries and which has been \\nregistered as a hospital with the local authorities under the Clinical Establishments (Registration and Regulation) Act, 2010 or', 'type': 'text'}, score=0.64794, boost=None, combined_score=None)]"
|
| 1273 |
+
]
|
| 1274 |
+
},
|
| 1275 |
+
"execution_count": 48,
|
| 1276 |
+
"metadata": {},
|
| 1277 |
+
"output_type": "execute_result"
|
| 1278 |
+
}
|
| 1279 |
+
],
|
| 1280 |
+
"source": [
|
| 1281 |
+
"top_hits"
|
| 1282 |
+
]
|
| 1283 |
+
},
|
| 1284 |
+
{
|
| 1285 |
+
"cell_type": "code",
|
| 1286 |
+
"execution_count": 75,
|
| 1287 |
+
"id": "05cb7ca5",
|
| 1288 |
+
"metadata": {},
|
| 1289 |
+
"outputs": [
|
| 1290 |
+
{
|
| 1291 |
+
"name": "stdout",
|
| 1292 |
+
"output_type": "stream",
|
| 1293 |
+
"text": [
|
| 1294 |
+
"1) [source:b0a34a7d-f5a1-4777-93aa-c59269013de5 page:24] Page 16 of 25 \n",
|
| 1295 |
+
"National Parivar Mediclaim Plus Policy \n",
|
| 1296 |
+
"UIN: NICHLIP25039V032425 \n",
|
| 1297 |
+
" \n",
|
| 1298 |
+
"ii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \n",
|
| 1299 |
+
"a) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \n",
|
| 1300 |
+
"made during the policy period. \n",
|
| 1301 |
+
"b) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \n",
|
| 1302 |
+
"such policy years has not commenced and refund proportionate premium for unexpired policy period for the current \n",
|
| 1303 |
+
"policy year. \n",
|
| 1304 |
+
"There shall be no refund for the completed policy year elapsed.\n",
|
| 1305 |
+
"2) [source:b0a34a7d-f5a1-4777-93aa-c59269013de5 page:24] Page 16 of 25 \n",
|
| 1306 |
+
"National Parivar Mediclaim Plus Policy \n",
|
| 1307 |
+
"UIN: NICHLIP25039V032425 \n",
|
| 1308 |
+
" \n",
|
| 1309 |
+
"ii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \n",
|
| 1310 |
+
"a) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \n",
|
| 1311 |
+
"made during the policy period. \n",
|
| 1312 |
+
"b) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \n",
|
| 1313 |
+
"such policy years has not commenced and refund proportionate premium for unexpired policy period for the current \n",
|
| 1314 |
+
"policy year. \n",
|
| 1315 |
+
"There shall be no refund for the completed policy year elapsed.\n",
|
| 1316 |
+
"3) [source:b0a34a7d-f5a1-4777-93aa-c59269013de5 page:24] all claims made in the aggregate during each policy year. \n",
|
| 1317 |
+
" \n",
|
| 1318 |
+
"2.21 Grace Period means the specified period of time, immediately following the premium due date during which premium \n",
|
| 1319 |
+
"payment can be made to renew or continue a policy in force without loss of continuity benefits pertaining to Waiting Periods \n",
|
| 1320 |
+
"and coverage of Pre-Existing Diseases. The Grace Period for payment of the premium shall be thirty days. \n",
|
| 1321 |
+
"Coverage shall not be available during the period for which no premium is received. \n",
|
| 1322 |
+
" \n",
|
| 1323 |
+
"2.22 Hospital means any institution established for in-patient care and day care treatment of disease/ injuries and which has been \n",
|
| 1324 |
+
"registered as a hospital with the local authorities under the Clinical Establishments (Registration and Regulation) Act, 2010 or\n"
|
| 1325 |
+
]
|
| 1326 |
+
}
|
| 1327 |
+
],
|
| 1328 |
+
"source": [
|
| 1329 |
+
"# Step 3 — Evaluate with LLM\n",
|
| 1330 |
+
"result = evaluate_with_llm(query, top_hits)"
|
| 1331 |
+
]
|
| 1332 |
+
},
|
| 1333 |
+
{
|
| 1334 |
+
"cell_type": "code",
|
| 1335 |
+
"execution_count": 73,
|
| 1336 |
+
"id": "40c7075b",
|
| 1337 |
+
"metadata": {},
|
| 1338 |
+
"outputs": [
|
| 1339 |
+
{
|
| 1340 |
+
"data": {
|
| 1341 |
+
"text/plain": [
|
| 1342 |
+
"'The grace period for premium payment under the National Parivar Mediclaim Plus Policy is thirty days. However, coverage is not available during this period if no premium is received.'"
|
| 1343 |
+
]
|
| 1344 |
+
},
|
| 1345 |
+
"execution_count": 73,
|
| 1346 |
+
"metadata": {},
|
| 1347 |
+
"output_type": "execute_result"
|
| 1348 |
+
}
|
| 1349 |
+
],
|
| 1350 |
+
"source": [
|
| 1351 |
+
"result.answer"
|
| 1352 |
+
]
|
| 1353 |
+
},
|
| 1354 |
+
{
|
| 1355 |
+
"cell_type": "code",
|
| 1356 |
+
"execution_count": 82,
|
| 1357 |
+
"id": "46ff44ac",
|
| 1358 |
+
"metadata": {},
|
| 1359 |
+
"outputs": [
|
| 1360 |
+
{
|
| 1361 |
+
"name": "stderr",
|
| 1362 |
+
"output_type": "stream",
|
| 1363 |
+
"text": [
|
| 1364 |
+
"C:\\Users\\hp\\AppData\\Local\\Temp\\ipykernel_9600\\3651844483.py:1: PydanticDeprecatedSince20: The `__fields__` attribute is deprecated, use `model_fields` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/\n",
|
| 1365 |
+
" result.evidence[0].__fields__\n"
|
| 1366 |
+
]
|
| 1367 |
+
},
|
| 1368 |
+
{
|
| 1369 |
+
"data": {
|
| 1370 |
+
"text/plain": [
|
| 1371 |
+
"{'doc_id': FieldInfo(annotation=str, required=True),\n",
|
| 1372 |
+
" 'page': FieldInfo(annotation=int, required=True),\n",
|
| 1373 |
+
" 'chunk_id': FieldInfo(annotation=str, required=True),\n",
|
| 1374 |
+
" 'text': FieldInfo(annotation=str, required=True),\n",
|
| 1375 |
+
" 'metadata': FieldInfo(annotation=Dict[str, Any], required=True),\n",
|
| 1376 |
+
" 'score': FieldInfo(annotation=float, required=True),\n",
|
| 1377 |
+
" 'boost': FieldInfo(annotation=Union[float, NoneType], required=False, default=None),\n",
|
| 1378 |
+
" 'combined_score': FieldInfo(annotation=Union[float, NoneType], required=False, default=None)}"
|
| 1379 |
+
]
|
| 1380 |
+
},
|
| 1381 |
+
"execution_count": 82,
|
| 1382 |
+
"metadata": {},
|
| 1383 |
+
"output_type": "execute_result"
|
| 1384 |
+
}
|
| 1385 |
+
],
|
| 1386 |
+
"source": [
|
| 1387 |
+
"result.evidence[0].__fields__"
|
| 1388 |
+
]
|
| 1389 |
+
}
|
| 1390 |
+
],
|
| 1391 |
+
"metadata": {
|
| 1392 |
+
"kernelspec": {
|
| 1393 |
+
"display_name": "rag-app",
|
| 1394 |
+
"language": "python",
|
| 1395 |
+
"name": "python3"
|
| 1396 |
+
},
|
| 1397 |
+
"language_info": {
|
| 1398 |
+
"codemirror_mode": {
|
| 1399 |
+
"name": "ipython",
|
| 1400 |
+
"version": 3
|
| 1401 |
+
},
|
| 1402 |
+
"file_extension": ".py",
|
| 1403 |
+
"mimetype": "text/x-python",
|
| 1404 |
+
"name": "python",
|
| 1405 |
+
"nbconvert_exporter": "python",
|
| 1406 |
+
"pygments_lexer": "ipython3",
|
| 1407 |
+
"version": "3.13.3"
|
| 1408 |
+
}
|
| 1409 |
+
},
|
| 1410 |
+
"nbformat": 4,
|
| 1411 |
+
"nbformat_minor": 5
|
| 1412 |
+
}
|
main.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.utils.model_loader import ModelLoader
|
| 2 |
+
from app.ingestion.file_loader import load_documents_form_url
|
| 3 |
+
from app.ingestion.text_splitter import text_splitting
|
| 4 |
+
from app.reseasoning.query_parser import parsing_query
|
| 5 |
+
from app.reseasoning.descision_maker import evaluate_with_llm
|
| 6 |
+
from app.retrieval.retriever import retrieval_from_pinecone_vectoreStore
|
| 7 |
+
from app.schemas.request_models import QuerySpec,LogicResult, ClauseHit, HackRxRunRequest
|
| 8 |
+
from app.schemas.response_models import APIResponse
|
| 9 |
+
from fastapi import FastAPI, Header, HTTPException, Depends
|
| 10 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 11 |
+
from typing import Optional
|
| 12 |
+
from app.embedding.embeder import get_query_embedding
|
| 13 |
+
from app.embedding.vectore_store import create_vectore_store
|
| 14 |
+
from app.ingestion.file_loader import load_documents_form_url
|
| 15 |
+
from app.ingestion.text_splitter import text_splitting
|
| 16 |
+
|
| 17 |
+
app = FastAPI(title="RAG app")
|
| 18 |
+
app.add_middleware(
|
| 19 |
+
CORSMiddleware,
|
| 20 |
+
allow_origins=["*"],
|
| 21 |
+
allow_credentials=True,
|
| 22 |
+
allow_methods=["*"],
|
| 23 |
+
allow_headers=["*"],
|
| 24 |
+
)
|
| 25 |
+
def verify_bearer_token(authorization: Optional[str] = Header(None)):
|
| 26 |
+
expected_token = "cc13b8bb7f4bc1570c8a39bda8c9d4c34b2be6b8abe1044c89abf49b28cee3f8"
|
| 27 |
+
if authorization is None or not authorization.startswith("Bearer "):
|
| 28 |
+
raise HTTPException(status_code=401, detail="Missing or invalid Authorization header")
|
| 29 |
+
token = authorization.split("Bearer ")[1]
|
| 30 |
+
if token != expected_token:
|
| 31 |
+
raise HTTPException(status_code=403, detail="Invalid token")
|
| 32 |
+
return True
|
| 33 |
+
|
| 34 |
+
@app.post("/api/v1/hackrx/run", response_model=APIResponse)
|
| 35 |
+
async def run_hackrx(request: HackRxRunRequest , authorized: bool = Depends(verify_bearer_token)):
|
| 36 |
+
## model initialisation
|
| 37 |
+
# LLM
|
| 38 |
+
model_loader = ModelLoader(model_loader= "gemini")
|
| 39 |
+
llm = model_loader.load_llm()
|
| 40 |
+
|
| 41 |
+
# Embedding model
|
| 42 |
+
model_loader = ModelLoader(model_provider="openai")
|
| 43 |
+
embedding_model = model_loader.load_llm()
|
| 44 |
+
print("LLMs are loaded!!")
|
| 45 |
+
|
| 46 |
+
answers = []
|
| 47 |
+
|
| 48 |
+
# file loading
|
| 49 |
+
document_url = request.documents
|
| 50 |
+
pdf_doc = load_documents_form_url(document_url)
|
| 51 |
+
print("file has been loaded")
|
| 52 |
+
|
| 53 |
+
## splitting into chunks
|
| 54 |
+
chunks = text_splitting(doc_content=pdf_doc)
|
| 55 |
+
print("Chunks have been splitted")
|
| 56 |
+
|
| 57 |
+
## creating a vectore store
|
| 58 |
+
index, namespace = create_vectore_store(text_chunks=chunks, embedding_model=embedding_model)
|
| 59 |
+
print("Index is created")
|
| 60 |
+
for question in request.questions:
|
| 61 |
+
|
| 62 |
+
#1. parsing query
|
| 63 |
+
parsed_query = parsing_query(query=question, llm = llm)
|
| 64 |
+
print("Query Parsed")
|
| 65 |
+
|
| 66 |
+
#2 emebed the query
|
| 67 |
+
query_embedding, expansions = get_query_embedding(query_spec = parsed_query, embedding_model=embedding_model)
|
| 68 |
+
print("Query Embedded")
|
| 69 |
+
# 3.Retrieve
|
| 70 |
+
top_hits = retrieval_from_pinecone_vectoreStore(pinecone_index=index, embeddings = query_embedding , top_k=3,namespace=namespace)
|
| 71 |
+
print("Documents retrieved!")
|
| 72 |
+
# 4. evaluate with llm
|
| 73 |
+
result = evaluate_with_llm(raw_query=question, top_clauses=top_hits, llm = llm)
|
| 74 |
+
print("Answer created!")
|
| 75 |
+
answers.append(result.answer)
|
| 76 |
+
|
| 77 |
+
print("Answers are appended!")
|
| 78 |
+
print(answers)
|
| 79 |
+
# index.delete(delete_all=True,namespace=namespace)
|
| 80 |
+
# print("index is deleted!!")
|
| 81 |
+
return APIResponse(answers=answers)
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
|
pyproject.toml
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "rag-app"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "Add your description here"
|
| 5 |
+
readme = "README.md"
|
| 6 |
+
requires-python = "==3.12.4"
|
| 7 |
+
dependencies = [
|
| 8 |
+
"acres==0.5.0",
|
| 9 |
+
"aiohappyeyeballs==2.6.1",
|
| 10 |
+
"aiohttp==3.12.15",
|
| 11 |
+
"aiohttp-retry==2.9.1",
|
| 12 |
+
"aiosignal==1.4.0",
|
| 13 |
+
"annotated-types==0.7.0",
|
| 14 |
+
"anyio==4.10.0",
|
| 15 |
+
"asttokens==3.0.0",
|
| 16 |
+
"attrs==25.3.0",
|
| 17 |
+
"beautifulsoup4==4.13.4",
|
| 18 |
+
"cachetools==5.5.2",
|
| 19 |
+
"certifi==2025.8.3",
|
| 20 |
+
"cffi==1.17.1",
|
| 21 |
+
"charset-normalizer==3.4.3",
|
| 22 |
+
"ci-info==0.3.0",
|
| 23 |
+
"click==8.2.1",
|
| 24 |
+
"colorama==0.4.6",
|
| 25 |
+
"colorclass==2.2.2",
|
| 26 |
+
"comm==0.2.3",
|
| 27 |
+
"compressed-rtf==1.0.7",
|
| 28 |
+
"configobj==5.0.9",
|
| 29 |
+
"configparser==7.2.0",
|
| 30 |
+
"cryptography==45.0.6",
|
| 31 |
+
"dataclasses-json==0.6.7",
|
| 32 |
+
"debugpy==1.8.16",
|
| 33 |
+
"decorator==5.2.1",
|
| 34 |
+
"distro==1.9.0",
|
| 35 |
+
"docx==0.2.4",
|
| 36 |
+
"easygui==0.98.3",
|
| 37 |
+
"ebcdic==1.1.1",
|
| 38 |
+
"etelemetry==0.3.1",
|
| 39 |
+
"executing==2.2.0",
|
| 40 |
+
"extract-msg==0.55.0",
|
| 41 |
+
"fastapi==0.116.1",
|
| 42 |
+
"filelock==3.18.0",
|
| 43 |
+
"filetype==1.2.0",
|
| 44 |
+
"frozenlist==1.7.0",
|
| 45 |
+
"google-ai-generativelanguage==0.6.18",
|
| 46 |
+
"google-api-core==2.25.1",
|
| 47 |
+
"google-auth==2.40.3",
|
| 48 |
+
"googleapis-common-protos==1.70.0",
|
| 49 |
+
"greenlet==3.2.4",
|
| 50 |
+
"groq==0.31.0",
|
| 51 |
+
"grpcio==1.74.0",
|
| 52 |
+
"grpcio-status==1.74.0",
|
| 53 |
+
"h11==0.16.0",
|
| 54 |
+
"httpcore==1.0.9",
|
| 55 |
+
"httplib2==0.22.0",
|
| 56 |
+
"httpx==0.28.1",
|
| 57 |
+
"httpx-sse==0.4.1",
|
| 58 |
+
"idna==3.10",
|
| 59 |
+
"iniconfig==2.1.0",
|
| 60 |
+
"ipykernel==6.30.1",
|
| 61 |
+
"ipython==9.4.0",
|
| 62 |
+
"ipython-pygments-lexers==1.1.1",
|
| 63 |
+
"jedi==0.19.2",
|
| 64 |
+
"jiter==0.10.0",
|
| 65 |
+
"joblib==1.5.1",
|
| 66 |
+
"jsonpatch==1.33",
|
| 67 |
+
"jsonpointer==3.0.0",
|
| 68 |
+
"jupyter-client==8.6.3",
|
| 69 |
+
"jupyter-core==5.8.1",
|
| 70 |
+
"langchain==0.3.27",
|
| 71 |
+
"langchain-community==0.3.27",
|
| 72 |
+
"langchain-core==0.3.74",
|
| 73 |
+
"langchain-google-genai==2.1.9",
|
| 74 |
+
"langchain-groq==0.3.7",
|
| 75 |
+
"langchain-openai==0.3.29",
|
| 76 |
+
"langchain-pinecone==0.2.11",
|
| 77 |
+
"langchain-tests==0.3.20",
|
| 78 |
+
"langchain-text-splitters==0.3.9",
|
| 79 |
+
"langsmith==0.4.13",
|
| 80 |
+
"lark==1.1.9",
|
| 81 |
+
"looseversion==1.3.0",
|
| 82 |
+
"lxml==6.0.0",
|
| 83 |
+
"markdown-it-py==4.0.0",
|
| 84 |
+
"marshmallow==3.26.1",
|
| 85 |
+
"matplotlib-inline==0.1.7",
|
| 86 |
+
"mdurl==0.1.2",
|
| 87 |
+
"msoffcrypto-tool==5.4.2",
|
| 88 |
+
"multidict==6.6.4",
|
| 89 |
+
"mypy-extensions==1.1.0",
|
| 90 |
+
"nest-asyncio==1.6.0",
|
| 91 |
+
"networkx==3.5",
|
| 92 |
+
"nibabel==5.3.2",
|
| 93 |
+
"nipype==1.10.0",
|
| 94 |
+
"numpy==2.3.2",
|
| 95 |
+
"olefile==0.47",
|
| 96 |
+
"oletools==0.60.2",
|
| 97 |
+
"openai==1.99.7",
|
| 98 |
+
"orjson==3.11.1",
|
| 99 |
+
"packaging==24.2",
|
| 100 |
+
"pandas==2.3.1",
|
| 101 |
+
"parso==0.8.4",
|
| 102 |
+
"pathlib==1.0.1",
|
| 103 |
+
"pcodedmp==1.2.6",
|
| 104 |
+
"pillow==11.3.0",
|
| 105 |
+
"pinecone==7.3.0",
|
| 106 |
+
"pinecone-plugin-assistant==1.7.0",
|
| 107 |
+
"pinecone-plugin-interface==0.0.7",
|
| 108 |
+
"platformdirs==4.3.8",
|
| 109 |
+
"pluggy==1.6.0",
|
| 110 |
+
"prompt-toolkit==3.0.51",
|
| 111 |
+
"propcache==0.3.2",
|
| 112 |
+
"proto-plus==1.26.1",
|
| 113 |
+
"protobuf==6.31.1",
|
| 114 |
+
"prov==2.1.1",
|
| 115 |
+
"psutil==7.0.0",
|
| 116 |
+
"pure-eval==0.2.3",
|
| 117 |
+
"puremagic==1.30",
|
| 118 |
+
"py-cpuinfo==9.0.0",
|
| 119 |
+
"pyasn1==0.6.1",
|
| 120 |
+
"pyasn1-modules==0.4.2",
|
| 121 |
+
"pycparser==2.22",
|
| 122 |
+
"pydantic==2.11.7",
|
| 123 |
+
"pydantic-core==2.33.2",
|
| 124 |
+
"pydantic-settings==2.10.1",
|
| 125 |
+
"pydot==4.0.1",
|
| 126 |
+
"pygments==2.19.2",
|
| 127 |
+
"pymupdf==1.26.3",
|
| 128 |
+
"pyparsing==3.2.3",
|
| 129 |
+
"pytest==8.4.1",
|
| 130 |
+
"pytest-asyncio==0.26.0",
|
| 131 |
+
"pytest-benchmark==5.1.0",
|
| 132 |
+
"pytest-codspeed==4.0.0",
|
| 133 |
+
"pytest-recording==0.13.4",
|
| 134 |
+
"pytest-socket==0.7.0",
|
| 135 |
+
"python-dateutil==2.9.0.post0",
|
| 136 |
+
"python-dotenv==1.1.1",
|
| 137 |
+
"pytz==2025.2",
|
| 138 |
+
"pyxnat==1.6.3",
|
| 139 |
+
"pyyaml==6.0.2",
|
| 140 |
+
"pyzmq==27.0.1",
|
| 141 |
+
"rdflib==7.1.4",
|
| 142 |
+
"red-black-tree-mod==1.22",
|
| 143 |
+
"regex==2025.7.34",
|
| 144 |
+
"requests==2.32.4",
|
| 145 |
+
"requests-toolbelt==1.0.0",
|
| 146 |
+
"rich==14.1.0",
|
| 147 |
+
"rsa==4.9.1",
|
| 148 |
+
"rtfde==0.1.2.1",
|
| 149 |
+
"scikit-learn==1.7.1",
|
| 150 |
+
"scipy==1.16.1",
|
| 151 |
+
"simplejson==3.20.1",
|
| 152 |
+
"six==1.17.0",
|
| 153 |
+
"sniffio==1.3.1",
|
| 154 |
+
"soupsieve==2.7",
|
| 155 |
+
"sqlalchemy==2.0.42",
|
| 156 |
+
"stack-data==0.6.3",
|
| 157 |
+
"starlette==0.47.2",
|
| 158 |
+
"syrupy==4.9.1",
|
| 159 |
+
"tenacity==9.1.2",
|
| 160 |
+
"threadpoolctl==3.6.0",
|
| 161 |
+
"tiktoken==0.11.0",
|
| 162 |
+
"tornado==6.5.2",
|
| 163 |
+
"tqdm==4.67.1",
|
| 164 |
+
"traitlets==5.14.3",
|
| 165 |
+
"traits==7.0.2",
|
| 166 |
+
"typing-extensions==4.14.1",
|
| 167 |
+
"typing-inspect==0.9.0",
|
| 168 |
+
"typing-inspection==0.4.1",
|
| 169 |
+
"tzdata==2025.2",
|
| 170 |
+
"tzlocal==5.3.1",
|
| 171 |
+
"urllib3<2",
|
| 172 |
+
"uvicorn>=0.35.0",
|
| 173 |
+
"vcrpy==7.0.0",
|
| 174 |
+
"wcwidth==0.2.13",
|
| 175 |
+
"win-unicode-console==0.5",
|
| 176 |
+
"wrapt==1.17.2",
|
| 177 |
+
"yarl==1.20.1",
|
| 178 |
+
"zstandard==0.23.0",
|
| 179 |
+
]
|
requirements.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
langchain
|
| 2 |
+
langchain-community
|
| 3 |
+
langchain-groq
|
| 4 |
+
langchain-openai
|
| 5 |
+
pymupdf
|
| 6 |
+
pinecone
|
| 7 |
+
langchain-pinecone
|
| 8 |
+
fastapi
|
| 9 |
+
langchain-google-genai
|
| 10 |
+
pydantic
|
| 11 |
+
uvicorn
|
test_query.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import json
|
| 3 |
+
|
| 4 |
+
# Test data
|
| 5 |
+
test_data = {
|
| 6 |
+
"documents": "https://hackrx.blob.core.windows.net/assets/policy.pdf?sv=2023-01-03&st=2025-07-04T09%3A11%3A24Z&se=2027-07-05T09%3A11%3A00Z&sr=b&sp=r&sig=N4a9OU0w0QXO6AOIBiu4bpl7AXvEZogeT%2FjUHNO7HzQ%3D",
|
| 7 |
+
"questions": [
|
| 8 |
+
"What is covered for room rent and ICU charges?"
|
| 9 |
+
]
|
| 10 |
+
}
|
| 11 |
+
|
| 12 |
+
# Headers
|
| 13 |
+
headers = {
|
| 14 |
+
"Authorization": "Bearer cc13b8bb7f4bc1570c8a39bda8c9d4c34b2be6b8abe1044c89abf49b28cee3f8",
|
| 15 |
+
"Content-Type": "application/json"
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
# Make the request
|
| 19 |
+
response = requests.post(
|
| 20 |
+
"http://127.0.0.1:8000/api/v1/hackrx/run",
|
| 21 |
+
headers=headers,
|
| 22 |
+
data=json.dumps(test_data)
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
# Print the response
|
| 26 |
+
print(f"Status Code: {response.status_code}")
|
| 27 |
+
print(f"Response: {response.text}")
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|