Kshitijk20 commited on
Commit
3a2c9d3
·
1 Parent(s): 21f5e7a

added code fils

Browse files
.gitignore ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
11
+ .env
12
+ env
13
+ .env/
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.12
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use official Python image as base
2
+ FROM python:3.12-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Copy project files
8
+ COPY . /app
9
+
10
+ # Install system dependencies (if needed)
11
+ RUN apt-get update && apt-get install -y --no-install-recommends \
12
+ build-essential \
13
+ && rm -rf /var/lib/apt/lists/*
14
+
15
+ # Install Python dependencies
16
+ RUN pip install --upgrade pip \
17
+ && pip install -r requirements.txt
18
+
19
+ # Expose port (if using uvicorn or similar)
20
+ EXPOSE 8000
21
+
22
+ # Default command (update if your entrypoint is different)
23
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
app/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # This file is automatically created to mark the directory as a package.
app/config/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # This file is automatically created to mark the directory as a package.
app/config/config.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ llm:
2
+ groq:
3
+ provider: "groq"
4
+ model_name: "qwen/qwen3-32b"
5
+ gemini:
6
+ provider: "gemini"
7
+ model_name: ""
8
+
9
+ embedding_model:
10
+ openai:
11
+ provider: "openai"
12
+ model_name: "text-embedding-3-small"
13
+
14
+
app/embedding/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # This file is automatically created to mark the directory as a package.
app/embedding/embeder.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from app.schemas.request_models import QuerySpec
4
+ from typing import Union, List
5
+
6
+
7
+ def get_query_embedding(query_spec: QuerySpec, embedding_model):
8
+ # load_dotenv()
9
+ # model_loader = ModelLoader(model_provider="openai")
10
+ # embedding_client = model_loader.load_llm()
11
+ q = query_spec.raw_query
12
+ e_main = embedding_model.embed_query(q)
13
+ expansions = []
14
+ if "procedure" in query_spec.entities:
15
+ procedure_value = query_spec.entities['procedure']
16
+ # Handle both string and list values
17
+ if isinstance(procedure_value, list):
18
+ procedure_str = ", ".join(procedure_value)
19
+ else:
20
+ procedure_str = procedure_value
21
+ expansions.append(f"{q} OR {procedure_str} procedures related")
22
+ return e_main,expansions
app/embedding/vectore_store.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from pinecone import Pinecone
4
+ from pinecone import ServerlessSpec
5
+ from langchain_pinecone import PineconeVectorStore
6
+ from datetime import datetime
7
+ from uuid import uuid4
8
+
9
+ current_time = datetime.now()
10
+
11
+ def create_vectore_store(text_chunks:list, embedding_model):
12
+ load_dotenv()
13
+ pinecone_key = os.getenv("PINECONE_API_KEY")
14
+ pc = Pinecone(api_key=pinecone_key)
15
+ # pc._vector_api.api_client.pool_threads = 1
16
+ time_string = current_time.strftime("%Y-%m-%d-%H-%M")
17
+ index_name = f"hackrx-index"
18
+ if not pc.has_index(index_name):
19
+ pc.create_index(
20
+ name = index_name,
21
+ dimension=1536,
22
+ metric="cosine",
23
+ spec = ServerlessSpec(cloud="aws", region="us-east-1")
24
+ )
25
+
26
+ index = pc.Index(index_name)
27
+ # model_loader = ModelLoader(model_provider="openai")
28
+ # embedding_model = model_loader.load_llm()
29
+ uuids = [str(uuid4()) for _ in range(len(text_chunks)) ]
30
+ vector_store = PineconeVectorStore(index = index, embedding=embedding_model)
31
+ name_space = f"hackrx-index{time_string}"
32
+ vector_store.add_documents(documents=text_chunks, ids = uuids,namespace = name_space )
33
+
34
+ return index, name_space
35
+
36
+
app/ingestion/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # This file is automatically created to mark the directory as a package.
app/ingestion/file_loader.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pymupdf
2
+ import os
3
+ import requests
4
+
5
+ def load_documents_form_url(url:str):
6
+ """
7
+ Load documents from a given URL and return their content."""
8
+
9
+ response = requests.get(url)
10
+ response.raise_for_status() # Ensure we got a valid response
11
+ # check file type
12
+ if response.headers['Content-Type'] == 'application/pdf':
13
+ pdf_doc = pymupdf.open(stream = response.content, filetype="pdf")
14
+ return pdf_doc
15
+ else:
16
+ return "FILE NOT supported"
17
+
18
+
19
+
app/ingestion/text_splitter.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.documents import Document
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from langchain.prompts import PromptTemplate
4
+ from uuid import uuid4
5
+
6
+
7
+ def text_splitting(doc_content: str):
8
+ """
9
+ split the documents into chunks and add metadata fields with every document
10
+ """
11
+ splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
12
+ text = ""
13
+ for i,page in enumerate(doc_content):
14
+ text += page.get_text()
15
+ uuid = str(uuid4())
16
+ if text.strip():
17
+ temp_doc = Document(page_content = text, metadata={
18
+ "doc_id": uuid,
19
+ "page":i,
20
+ "chunk_id": f"{uuid}_p{i}",
21
+ "type":"text"
22
+ })
23
+ text_chunks = splitter.split_documents([temp_doc])
24
+
25
+ return text_chunks
26
+
app/main.py ADDED
File without changes
app/prompts/__init__.py ADDED
File without changes
app/prompts/prompts.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ PARSER_PROMPT = f"""You receive a user's question about an insurance/contract document. Produce a JSON with keys:
2
+ - intent (one of: coverage_check, definition, limit_query, waiting_period, exclusions, other)
3
+ - entities (map of entity_name -> canonical string)
4
+ - constraints (map: plan, time_window, eligible_person, numerical_constraints)
5
+ - answer_type (one of: yes_no, short_explain, detailed, clause_list)
6
+ Return ONLY the JSON.Make sure that nested fields like "entities" and "constraints" are JSON objects, not strings.
7
+ """
app/reseasoning/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # This file is automatically created to mark the directory as a package.
app/reseasoning/descision_maker.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.schemas.request_models import QuerySpec, LogicResult
2
+
3
+
4
+ def evaluate_with_llm(raw_query: str, top_clauses: list, llm):
5
+ """
6
+ Use the LLM to analyze retrieved clauses and return structured decision.
7
+ """
8
+
9
+ # Prepare context for the prompt
10
+ context_clauses = []
11
+ for i, c in enumerate(top_clauses, 1):
12
+ context_clauses.append(f"{i}) [source:{c.doc_id} page:{c.page}] {c.text}")
13
+ print(chr(10).join(context_clauses))
14
+
15
+ # Build prompt
16
+ prompt = f"""
17
+ You are an insurance policy analyst. Question: "{raw_query}"
18
+
19
+ Provided clauses (numbered):
20
+ {chr(10).join(context_clauses)}
21
+
22
+ Task:
23
+ 1) Decide: COVERED / NOT_COVERED / CONDITIONAL
24
+ 2) Summarize the exact clause(s) that justify your decision.
25
+ 3) List any conditions, waiting periods, sublimits, or exclusions relevant.
26
+ 4) Provide a concise final answer (1-2 sentences).
27
+
28
+ Return JSON with these exact keys:
29
+ {{
30
+ "decision": "...",
31
+ "evidence": [
32
+ {{"doc_id": "...", "page": 0, "snippet": "...", "reason": "..."}}
33
+ ],
34
+ "confidence": 0.0,
35
+ "rationale": "...",
36
+ "answer": "..."
37
+ }}
38
+ """
39
+
40
+ # Directly parse to LogicResult using structured output
41
+ structured_llm = llm.with_structured_output(LogicResult)
42
+ result: LogicResult = structured_llm.invoke(prompt)
43
+ # print(f"result: {result}\n result_type{type(result)}")
44
+
45
+ # Attach full text for each evidence
46
+ enriched_evidence = []
47
+ for ev in result.evidence:
48
+ matched = next((c for c in top_clauses if c.doc_id == ev.doc_id and str(c.page) == str(ev.page)), None)
49
+ if matched:
50
+ ev.text = matched.text # or use a different field if needed
51
+ enriched_evidence.append(ev)
52
+
53
+ result.evidence = enriched_evidence
54
+ # print(enriched_evidence[0])
55
+ return result
app/reseasoning/query_parser.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.utils.model_loader import ModelLoader
2
+ from app.schemas.request_models import QuerySpec
3
+ from app.prompts.prompts import PARSER_PROMPT
4
+
5
+ def parsing_query(query:str, llm) -> QuerySpec:
6
+ # Bind the schema to the model
7
+ # model_loader = ModelLoader(model_provider = "gemini")
8
+ # llm = model_loader.load_llm()
9
+
10
+ structured_llm = llm.with_structured_output(QuerySpec)
11
+
12
+ # Compose the full prompt with instructions and user question
13
+ full_prompt = PARSER_PROMPT + "\n" + query
14
+
15
+ # Invoke the model to get structured output parsed as QuerySpec
16
+ result: QuerySpec = structured_llm.invoke(full_prompt)
17
+ return result
18
+ # print(result.json()) # This will print the JSON output matching your schema
app/retrieval/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # This file is automatically created to mark the directory as a package.
app/retrieval/reranker.py ADDED
File without changes
app/retrieval/retriever.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.schemas.request_models import ClauseHit
2
+
3
+
4
+ def retrieval_from_pinecone_vectoreStore(pinecone_index, embeddings, top_k= 3, filter_meta = None, namespace= None):
5
+ """
6
+ Retrieve the top matching chunks from Pinecone.
7
+
8
+ Args:
9
+ pinecone_index: Your Pinecone index object.
10
+ embedding: The vector embedding of the query.
11
+ top_k: How many chunks to retrieve.
12
+ filter_meta: Optional metadata filter dict.
13
+
14
+ Returns:
15
+ List of ClauseHit objects (lightweight container for chunk info).
16
+ """
17
+ res = pinecone_index.query(
18
+ vector= embeddings,
19
+ top_k =top_k ,
20
+ include_metadata = True,
21
+ include_values = False,
22
+ filter = filter_meta,
23
+ namespace = namespace
24
+ )
25
+ hits= []
26
+ for match in res['matches']:
27
+ hits.append(ClauseHit(
28
+ doc_id=match['metadata']['doc_id'],
29
+ page=match['metadata'].get('page', -1),
30
+ chunk_id=match['metadata'].get('chunk_id', ''),
31
+ text=match['metadata']['text'],
32
+ metadata=match['metadata'],
33
+ score=match['score']
34
+ ))
35
+ return hits
app/schemas/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # This file is automatically created to mark the directory as a package.
app/schemas/request_models.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, model_validator,field_validator, HttpUrl, Field
2
+ from typing import List, Dict, Any, Optional, Union
3
+ import json
4
+ class QuerySpec(BaseModel):
5
+ raw_query: str
6
+ intent: str
7
+ entities: Dict[str, Union[str, List[str]]]
8
+ constraints : Dict[str, Any]
9
+ answer_type: str
10
+ followups: Optional[List[str]] = []
11
+
12
+ @model_validator(mode = "before")
13
+ @classmethod
14
+ def parse_nested_json(cls, values):
15
+ for field in ['entities', 'constraints']:
16
+ val = values.get(field)
17
+ if isinstance(val, str):
18
+ try:
19
+ values[field] = json.loads(val)
20
+ except json.JSONDecodeError:
21
+ pass
22
+ return values
23
+
24
+ class ClauseHit(BaseModel):
25
+ doc_id : str
26
+ page: int
27
+ chunk_id: str
28
+ text: str
29
+ metadata: Dict[str, Any]
30
+ score: float
31
+ boost: Optional[float] = None
32
+ combined_score: Optional[float] = None
33
+
34
+ @field_validator("metadata", mode="before")
35
+ def parse_metadata(cls, v):
36
+ if isinstance(v, str):
37
+ try:
38
+ return json.loads(v) if v.strip() else {}
39
+ except json.JSONDecodeError:
40
+ return {}
41
+ return v
42
+
43
+ class LogicResult(BaseModel):
44
+ answer: str
45
+ decision: str # "covered"/"not_covered"/"conditional"
46
+ confidence: float
47
+ evidence: List[ClauseHit]
48
+ rationale: str
49
+
50
+ class HackRxRunRequest(BaseModel):
51
+ documents: HttpUrl = Field(
52
+ ...,
53
+ description="URL to the document (PDF, DOCX, or email blob)"
54
+ )
55
+ questions: List[str] = Field(
56
+ ...,
57
+ description="List of questions to query against the document"
58
+ )
59
+
60
+
61
+
62
+
63
+
app/schemas/response_models.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, model_validator,field_validator
2
+ from typing import List, Dict, Any, Optional
3
+ import json
4
+ from app.schemas.request_models import QuerySpec, LogicResult
5
+ class APIResponse(BaseModel):
6
+ answers : List
app/utils/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # This file is automatically created to mark the directory as a package.
app/utils/config_loader.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import yaml
2
+ import os
3
+
4
+ def load_config(config_path: str = "app/config/config.yaml") -> dict:
5
+ with open(config_path, "r") as file:
6
+ config = yaml.safe_load(file)
7
+ # print(config)
8
+ return config
app/utils/logger.py ADDED
File without changes
app/utils/model_loader.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from config_loader import
2
+ import os
3
+ from dotenv import load_dotenv
4
+ from pydantic import BaseModel, Field
5
+ from typing import Literal, Optional,Any
6
+ from app.utils.config_loader import load_config
7
+ from langchain_groq import ChatGroq
8
+ from langchain_google_genai import ChatGoogleGenerativeAI
9
+ from dotenv import load_dotenv
10
+ # from langchain_openai import OpenAIEmbeddings
11
+ from langchain_community.embeddings import OpenAIEmbeddings
12
+ class ConfigLoader:
13
+ def __init__(self):
14
+ print(f"Loading config....")
15
+ self.config = load_config()
16
+
17
+ def __getitem__(self,key):## This method allows you to access config values using dictionary-like syntax
18
+ return self.config[key]
19
+
20
+
21
+ class ModelLoader(BaseModel):
22
+ model_provider: Literal["groq", "gemini", "openai"] = "gemini"
23
+ config: Optional[ConfigLoader] = Field(default = None, exclude = True) # either the config is ConfigLoader object or None
24
+
25
+ def model_post_init(self, __context: Any)->None:
26
+ self.config = ConfigLoader() # model_post_init is a Pydantic V2 hook, which runs after model creation.It assigns a ConfigLoader() instance to self.config.This ensures the configuration is loaded whenever you create a ModelLoader.
27
+
28
+ class Config:
29
+ arbitrary_types_allowed = True # Allows ConfigLoader (a non-Pydantic class) to be used as a field in the model.
30
+
31
+ def load_llm(self):
32
+ """
33
+ Load and return the LLM model
34
+ """
35
+ print("LLM loading...")
36
+ print("Loading model from provider: ")
37
+ if self.model_provider == "groq":
38
+ print("Loading model from GROQ:")
39
+ groq_api_key = os.getenv("GROQ_API_KEY")
40
+ model_name = self.config["llm"]["groq"]["model_name"]
41
+ llm = ChatGroq(model = model_name, api_key = groq_api_key)
42
+ elif self.model_provider =="gemini":
43
+ print("Loading model from gemini:")
44
+ load_dotenv()
45
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
46
+ model_name = self.config["llm"]["gemini"]["model_name"]
47
+ llm = ChatGoogleGenerativeAI(
48
+ model=model_name,
49
+ google_api_key= gemini_api_key
50
+ )
51
+ elif self.model_provider =="openai":
52
+ load_dotenv()
53
+ print("Loading model from openai:")
54
+ api_key = os.getenv("OPENAI_API_KEY")
55
+ model_name = self.config["embedding_model"]["openai"]["model_name"]
56
+ llm = OpenAIEmbeddings(model=model_name, api_key = api_key)
57
+ else:
58
+ raise ValueError(f"Unsupported model provider: {self.model_provider}")
59
+ return llm
60
+
61
+
62
+
63
+
experiments.ipynb ADDED
@@ -0,0 +1,1412 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "7896ff7a",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "data": {
11
+ "text/plain": [
12
+ "'c:\\\\code\\\\Bajaj HackRx\\\\Rag_app'"
13
+ ]
14
+ },
15
+ "execution_count": 1,
16
+ "metadata": {},
17
+ "output_type": "execute_result"
18
+ }
19
+ ],
20
+ "source": [
21
+ "%pwd"
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": 5,
27
+ "id": "8638c1e6",
28
+ "metadata": {},
29
+ "outputs": [
30
+ {
31
+ "name": "stdout",
32
+ "output_type": "stream",
33
+ "text": [
34
+ "Created: c:\\code\\Bajaj HackRx\\Rag_app\\app\\__init__.py\n",
35
+ "Created: c:\\code\\Bajaj HackRx\\Rag_app\\app\\config\\__init__.py\n",
36
+ "Created: c:\\code\\Bajaj HackRx\\Rag_app\\app\\embedding\\__init__.py\n",
37
+ "Created: c:\\code\\Bajaj HackRx\\Rag_app\\app\\ingestion\\__init__.py\n",
38
+ "Created: c:\\code\\Bajaj HackRx\\Rag_app\\app\\reseasoning\\__init__.py\n",
39
+ "Created: c:\\code\\Bajaj HackRx\\Rag_app\\app\\retrieval\\__init__.py\n",
40
+ "Created: c:\\code\\Bajaj HackRx\\Rag_app\\app\\schemas\\__init__.py\n",
41
+ "Created: c:\\code\\Bajaj HackRx\\Rag_app\\app\\utils\\__init__.py\n"
42
+ ]
43
+ }
44
+ ],
45
+ "source": [
46
+ "import os \n",
47
+ "\n",
48
+ "for directories in os.walk(\"c:\\\\code\\\\Bajaj HackRx\\\\Rag_app\\\\app\"):\n",
49
+ " init_path = os.path.join(directories[0], '__init__.py')\n",
50
+ " if not os.path.exists(init_path):\n",
51
+ " with open(init_path, 'w') as init_file:\n",
52
+ " init_file.write(\"init file\")\n",
53
+ " print(f\"Created: {init_path}\")"
54
+ ]
55
+ },
56
+ {
57
+ "cell_type": "markdown",
58
+ "id": "4fedace2",
59
+ "metadata": {},
60
+ "source": [
61
+ "## 1. Input document\n",
62
+ "### Input Requirements:\n",
63
+ "\n",
64
+ "- Process PDFs, DOCX, and email documents\n",
65
+ "- Handle policy/contract data efficiently\n",
66
+ "- Parse natural language queries"
67
+ ]
68
+ },
69
+ {
70
+ "cell_type": "code",
71
+ "execution_count": 7,
72
+ "id": "d47f278d",
73
+ "metadata": {},
74
+ "outputs": [],
75
+ "source": [
76
+ "import fitz\n",
77
+ "from langchain_core.documents import Document\n",
78
+ "from langchain_groq import ChatGroq\n",
79
+ "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
80
+ "from langchain.schema.messages import HumanMessage\n",
81
+ "from langchain_community.vectorstores import FAISS\n",
82
+ "import os \n",
83
+ "from langchain.prompts import PromptTemplate\n",
84
+ "from sklearn.metrics.pairwise import cosine_similarity\n",
85
+ "from dotenv import load_dotenv\n",
86
+ "load_dotenv()\n",
87
+ "import pymupdf"
88
+ ]
89
+ },
90
+ {
91
+ "cell_type": "code",
92
+ "execution_count": 2,
93
+ "id": "b7a58fc9",
94
+ "metadata": {},
95
+ "outputs": [],
96
+ "source": [
97
+ "api_key= os.getenv(\"GEMINI_API_KEY\")"
98
+ ]
99
+ },
100
+ {
101
+ "cell_type": "code",
102
+ "execution_count": 3,
103
+ "id": "6d065c7c",
104
+ "metadata": {},
105
+ "outputs": [],
106
+ "source": [
107
+ "from langchain_google_genai import GoogleGenerativeAIEmbeddings\n",
108
+ "\n",
109
+ "embeddings = GoogleGenerativeAIEmbeddings(model = \"models/gemini-embedding-001\",google_api_key = api_key)\n",
110
+ "vector = embeddings.embed_query(\"hello, world\")"
111
+ ]
112
+ },
113
+ {
114
+ "cell_type": "code",
115
+ "execution_count": 9,
116
+ "id": "d0706163",
117
+ "metadata": {},
118
+ "outputs": [
119
+ {
120
+ "data": {
121
+ "text/plain": [
122
+ "[-0.02842607907950878,\n",
123
+ " 0.004132709465920925,\n",
124
+ " 0.010386144742369652,\n",
125
+ " -0.09004563093185425,\n",
126
+ " -0.0044305226765573025]"
127
+ ]
128
+ },
129
+ "execution_count": 9,
130
+ "metadata": {},
131
+ "output_type": "execute_result"
132
+ }
133
+ ],
134
+ "source": [
135
+ "vector[:5]"
136
+ ]
137
+ },
138
+ {
139
+ "cell_type": "code",
140
+ "execution_count": 5,
141
+ "id": "01d64928",
142
+ "metadata": {},
143
+ "outputs": [],
144
+ "source": [
145
+ "import requests\n",
146
+ "url = \"https://hackrx.blob.core.windows.net/assets/policy.pdf?sv=2023-01-03&st=2025-07-04T09%3A11%3A24Z&se=2027-07-05T09%3A11%3A00Z&sr=b&sp=r&sig=N4a9OU0w0QXO6AOIBiu4bpl7AXvEZogeT%2FjUHNO7HzQ%3D\"\n",
147
+ "response = requests.get(url)"
148
+ ]
149
+ },
150
+ {
151
+ "cell_type": "code",
152
+ "execution_count": 94,
153
+ "id": "80cf7260",
154
+ "metadata": {},
155
+ "outputs": [],
156
+ "source": [
157
+ "import requests\n",
158
+ "url = \"https://docs.google.com/document/d/13pujQKEZS37mEHEfWDnaqb2FlvDnDwzkuJX88Y9w9EA/edit?usp=sharing\"\n",
159
+ "response = requests.get(url)"
160
+ ]
161
+ },
162
+ {
163
+ "cell_type": "code",
164
+ "execution_count": 95,
165
+ "id": "56afd5c0",
166
+ "metadata": {},
167
+ "outputs": [
168
+ {
169
+ "data": {
170
+ "text/plain": [
171
+ "'text/html; charset=utf-8'"
172
+ ]
173
+ },
174
+ "execution_count": 95,
175
+ "metadata": {},
176
+ "output_type": "execute_result"
177
+ }
178
+ ],
179
+ "source": [
180
+ "response.headers['Content-Type']"
181
+ ]
182
+ },
183
+ {
184
+ "cell_type": "code",
185
+ "execution_count": 8,
186
+ "id": "4d3fe1fb",
187
+ "metadata": {},
188
+ "outputs": [
189
+ {
190
+ "data": {
191
+ "text/plain": [
192
+ "157"
193
+ ]
194
+ },
195
+ "execution_count": 8,
196
+ "metadata": {},
197
+ "output_type": "execute_result"
198
+ }
199
+ ],
200
+ "source": [
201
+ "\n",
202
+ "response.raise_for_status()\n",
203
+ "pdf_bytes = response.content\n",
204
+ "doc = pymupdf.open(stream=pdf_bytes, filetype=\"pdf\")\n",
205
+ "text = \"\"\n",
206
+ "splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)\n",
207
+ "pages = 0\n",
208
+ "from uuid import uuid4\n",
209
+ "uuid = str(uuid4())\n",
210
+ "for i,page in enumerate(doc): \n",
211
+ " text += page.get_text()\n",
212
+ " uuid = str(uuid4())\n",
213
+ " if text.strip():\n",
214
+ " temp_doc = Document(page_content = text, metadata={\n",
215
+ " \"doc_id\": uuid,\n",
216
+ " \"page\":i,\n",
217
+ " \"chunk_id\": f\"{uuid}_p{i}\",\n",
218
+ " \"type\":\"text\"\n",
219
+ " })\n",
220
+ " text_chunks = splitter.split_documents([temp_doc])\n",
221
+ "\n",
222
+ "len(text_chunks)"
223
+ ]
224
+ },
225
+ {
226
+ "cell_type": "code",
227
+ "execution_count": 9,
228
+ "id": "08cfbca7",
229
+ "metadata": {},
230
+ "outputs": [
231
+ {
232
+ "data": {
233
+ "text/plain": [
234
+ "list"
235
+ ]
236
+ },
237
+ "execution_count": 9,
238
+ "metadata": {},
239
+ "output_type": "execute_result"
240
+ }
241
+ ],
242
+ "source": [
243
+ "type(text_chunks)"
244
+ ]
245
+ },
246
+ {
247
+ "cell_type": "code",
248
+ "execution_count": null,
249
+ "id": "c8f47031",
250
+ "metadata": {},
251
+ "outputs": [],
252
+ "source": [
253
+ "splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)\n",
254
+ "pages = 0\n",
255
+ "from uuid import uuid4\n",
256
+ "uuid = str(uuid4())\n",
257
+ "for i,page in enumerate(doc): \n",
258
+ " text += page.get_text()\n",
259
+ " uuid = str(uuid4())\n",
260
+ " if text.strip():\n",
261
+ " temp_doc = Document(page_content = text, metadata={\n",
262
+ " \"doc_id\": uuid,\n",
263
+ " \"page\":i,\n",
264
+ " \"chunk_id\": f\"{uuid}_p{i}\",\n",
265
+ " \"type\":\"text\"\n",
266
+ " })\n",
267
+ " text_chunks = splitter.split_documents([temp_doc])\n",
268
+ "\n",
269
+ "len(text_chunks)"
270
+ ]
271
+ },
272
+ {
273
+ "cell_type": "code",
274
+ "execution_count": 5,
275
+ "id": "6fe8d9ab",
276
+ "metadata": {},
277
+ "outputs": [
278
+ {
279
+ "data": {
280
+ "text/plain": [
281
+ "157"
282
+ ]
283
+ },
284
+ "execution_count": 5,
285
+ "metadata": {},
286
+ "output_type": "execute_result"
287
+ }
288
+ ],
289
+ "source": [
290
+ "pages = 0\n",
291
+ "from uuid import uuid4\n",
292
+ "uuid = str(uuid4())\n",
293
+ "for i,page in enumerate(doc): \n",
294
+ " text += page.get_text()\n",
295
+ " uuid = str(uuid4())\n",
296
+ " if text.strip():\n",
297
+ " temp_doc = Document(page_content = text, metadata={\n",
298
+ " \"doc_id\": uuid,\n",
299
+ " \"page\":i,\n",
300
+ " \"chunk_id\": f\"{uuid}_p{i}\",\n",
301
+ " \"type\":\"text\"\n",
302
+ " })\n",
303
+ " text_chunks = splitter.split_documents([temp_doc])\n",
304
+ "\n",
305
+ "len(text_chunks)"
306
+ ]
307
+ },
308
+ {
309
+ "cell_type": "code",
310
+ "execution_count": 124,
311
+ "id": "b7456368",
312
+ "metadata": {},
313
+ "outputs": [
314
+ {
315
+ "name": "stdout",
316
+ "output_type": "stream",
317
+ "text": [
318
+ "National Insurance Co. Ltd. \n",
319
+ "Premises No. 18-0374, Plot no. CBD-81, \n",
320
+ "New Town, Kolkata - 700156 \n",
321
+ "Page 1 of 25 \n",
322
+ "National Parivar Mediclaim Plus Policy \n",
323
+ "UIN: NICHLIP25039V032425 \n",
324
+ " \n",
325
+ "National Insurance Company Limited \n",
326
+ " \n",
327
+ " \n",
328
+ " \n",
329
+ " \n",
330
+ " \n",
331
+ "CIN - U10200WB1906GOI001713 \n",
332
+ "IRDAI Regn. No. – 58 \n",
333
+ " \n",
334
+ " Issuing Office \n",
335
+ "National Parivar Mediclaim Plus Policy \n",
336
+ " \n",
337
+ "Whereas the Proposer designated in the schedule hereto has by a Proposal together with Declaration, which shall be the basis of \n",
338
+ "this contract and is deemed to be incorporated herein, has applied to National Insurance Company Ltd. (hereinafter called the \n",
339
+ "Company), for the insurance hereinafter set forth, in respect of person(s)/ family members named in the schedule hereto\n"
340
+ ]
341
+ }
342
+ ],
343
+ "source": [
344
+ "print(text_chunks[0].page_content)"
345
+ ]
346
+ },
347
+ {
348
+ "cell_type": "code",
349
+ "execution_count": 125,
350
+ "id": "84e3b7e6",
351
+ "metadata": {},
352
+ "outputs": [],
353
+ "source": [
354
+ "from uuid import uuid4\n",
355
+ "uuids = [str(uuid4()) for _ in range(len(text_chunks)) ]"
356
+ ]
357
+ },
358
+ {
359
+ "cell_type": "markdown",
360
+ "id": "a7b3a0a7",
361
+ "metadata": {},
362
+ "source": [
363
+ "### Setting up Pinecone Vectore Store"
364
+ ]
365
+ },
366
+ {
367
+ "cell_type": "code",
368
+ "execution_count": 1,
369
+ "id": "6a98c3e3",
370
+ "metadata": {},
371
+ "outputs": [],
372
+ "source": [
373
+ "import os\n",
374
+ "from dotenv import load_dotenv\n",
375
+ "load_dotenv()\n",
376
+ "pinecone_key = os.getenv(\"PINECONE_API_KEY\")\n"
377
+ ]
378
+ },
379
+ {
380
+ "cell_type": "code",
381
+ "execution_count": 2,
382
+ "id": "e9bd7561",
383
+ "metadata": {},
384
+ "outputs": [
385
+ {
386
+ "data": {
387
+ "text/plain": [
388
+ "<pinecone.pinecone.Pinecone at 0x1e0239e9e80>"
389
+ ]
390
+ },
391
+ "execution_count": 2,
392
+ "metadata": {},
393
+ "output_type": "execute_result"
394
+ }
395
+ ],
396
+ "source": [
397
+ "from pinecone import Pinecone\n",
398
+ "pc = Pinecone(api_key=pinecone_key)\n",
399
+ "pc\n"
400
+ ]
401
+ },
402
+ {
403
+ "cell_type": "code",
404
+ "execution_count": 105,
405
+ "id": "07746b7f",
406
+ "metadata": {},
407
+ "outputs": [
408
+ {
409
+ "name": "stdout",
410
+ "output_type": "stream",
411
+ "text": [
412
+ "2025-08-13-16-36\n"
413
+ ]
414
+ }
415
+ ],
416
+ "source": [
417
+ "from pinecone import ServerlessSpec\n",
418
+ "from datetime import datetime\n",
419
+ "current_time = datetime.now()\n",
420
+ "time_string = current_time.strftime(\"%Y-%m-%d-%H-%M\")\n",
421
+ "print(time_string)\n",
422
+ "index_name = f\"hackrx-index{time_string}\"\n",
423
+ "# index_name = \"hackrx-index\"\n",
424
+ "if not pc.has_index(index_name):\n",
425
+ " pc.create_index(\n",
426
+ " name = index_name,\n",
427
+ " dimension=1536,\n",
428
+ " metric=\"cosine\",\n",
429
+ " spec = ServerlessSpec(cloud=\"aws\", region=\"us-east-1\")\n",
430
+ " )\n",
431
+ "\n",
432
+ "index = pc.Index(index_name)"
433
+ ]
434
+ },
435
+ {
436
+ "cell_type": "code",
437
+ "execution_count": 4,
438
+ "id": "e6af117d",
439
+ "metadata": {},
440
+ "outputs": [
441
+ {
442
+ "data": {
443
+ "text/plain": [
444
+ "<pinecone.db_data.index.Index at 0x1e023e2c6e0>"
445
+ ]
446
+ },
447
+ "execution_count": 4,
448
+ "metadata": {},
449
+ "output_type": "execute_result"
450
+ }
451
+ ],
452
+ "source": [
453
+ "index"
454
+ ]
455
+ },
456
+ {
457
+ "cell_type": "code",
458
+ "execution_count": 109,
459
+ "id": "7ee7c02b",
460
+ "metadata": {},
461
+ "outputs": [
462
+ {
463
+ "name": "stderr",
464
+ "output_type": "stream",
465
+ "text": [
466
+ "C:\\Users\\hp\\AppData\\Local\\Temp\\ipykernel_9600\\2571001968.py:7: LangChainDeprecationWarning: The class `OpenAIEmbeddings` was deprecated in LangChain 0.0.9 and will be removed in 1.0. An updated version of the class exists in the :class:`~langchain-openai package and should be used instead. To use it run `pip install -U :class:`~langchain-openai` and import as `from :class:`~langchain_openai import OpenAIEmbeddings``.\n",
467
+ " embeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")\n"
468
+ ]
469
+ }
470
+ ],
471
+ "source": [
472
+ "# from langchain_openai import \n",
473
+ "from langchain.embeddings import OpenAIEmbeddings\n",
474
+ "\n",
475
+ "from dotenv import load_dotenv\n",
476
+ "load_dotenv()\n",
477
+ "os.environ[\"OPENAI_API_KEY\"] = os.getenv(\"OPENAI_API_KEY\")\n",
478
+ "embeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")"
479
+ ]
480
+ },
481
+ {
482
+ "cell_type": "code",
483
+ "execution_count": 6,
484
+ "id": "43151b5f",
485
+ "metadata": {},
486
+ "outputs": [],
487
+ "source": [
488
+ "from langchain_pinecone import PineconeVectorStore\n",
489
+ "vector_store = PineconeVectorStore(index = index, embedding=embeddings)"
490
+ ]
491
+ },
492
+ {
493
+ "cell_type": "code",
494
+ "execution_count": 133,
495
+ "id": "03fb29a9",
496
+ "metadata": {},
497
+ "outputs": [
498
+ {
499
+ "data": {
500
+ "text/plain": [
501
+ "['174168c5-ee26-4f4a-9aac-82c890cae977',\n",
502
+ " 'cd33ff12-209e-4d3b-9ac9-cfffbb2f160f',\n",
503
+ " '8f7cf61f-8b6a-4412-a45e-90614b04e4bf',\n",
504
+ " '8696beaa-fa55-4ac6-9000-0f2be8f1a39b',\n",
505
+ " 'f89f37a0-5382-4202-a320-c3374ca97470',\n",
506
+ " '38e86c9f-a9a1-49a1-b1b6-0e0fd9242012',\n",
507
+ " '9239c5e9-493e-45cc-b621-962c7cdd18ff',\n",
508
+ " '1e3c9771-f5f2-45fd-b979-a0851d6285ff',\n",
509
+ " '54a9e37f-84d9-4ea5-8a47-be7a500d94b6',\n",
510
+ " '2ed09d29-6900-44d8-83e4-d503a5dcde15',\n",
511
+ " '8103e53c-1a21-47aa-9e23-0a817ddbdca5',\n",
512
+ " '0e879567-5c16-42b1-bd9c-bf919ef5b394',\n",
513
+ " 'fe2003c6-8409-4045-acda-1f1c6c6a6699',\n",
514
+ " '516acaa0-5a4d-4b0e-ac3c-5f03db8084b7',\n",
515
+ " '4bca0e17-ca5a-43bf-b0c9-6f2572bd2f5f',\n",
516
+ " '09a0a182-ee47-4569-a77e-b4a8adb82682',\n",
517
+ " '3239e274-c789-410f-845e-791c0c4b6b03',\n",
518
+ " 'fe825c23-84d9-4ad2-a853-74f8a1d02928',\n",
519
+ " 'e5404438-f6ec-4af5-8a5c-4fa884e7022f',\n",
520
+ " 'c9aeb34b-b556-46cf-b762-56ea338653e8',\n",
521
+ " '9b6d478d-32fa-4ffd-96ca-95822c255cf1',\n",
522
+ " '6652b6b8-1da2-4b15-9aea-26f79bc0a6d5',\n",
523
+ " 'e8d57256-a599-4f9e-93e8-392334098dcb',\n",
524
+ " '5491919b-0aa5-4080-9ee5-c319bc53749b',\n",
525
+ " 'a00fec0d-79bc-47be-97ea-dbdcfefb828f',\n",
526
+ " '71645e15-9440-40f2-b982-0376da83d0e3',\n",
527
+ " '6df619c8-ef76-44de-b74a-2116148d89aa',\n",
528
+ " 'f8151f79-56be-4fed-b364-b22ea4f2a2d0',\n",
529
+ " '739c7bc6-0f9d-45ce-9bf0-db7c7dbc939b',\n",
530
+ " '791401f5-46fe-4a9c-992b-da0135d72ae8',\n",
531
+ " '1e67b489-9e3f-4e48-8807-d694963ede5b',\n",
532
+ " '8e3892a6-7e26-4ecb-b8d0-c6a48126dbca',\n",
533
+ " 'ffbcd2e2-8eb3-4e1b-997d-f0b8f004b95f',\n",
534
+ " 'ac57e24d-8641-46d4-93aa-596664ed5a63',\n",
535
+ " 'fc31c598-4716-4312-9091-75d9c879cac5',\n",
536
+ " 'b08141e7-0519-4e92-bf5c-56849bc45c7f',\n",
537
+ " '2e7a4727-f118-461d-a4aa-7f223ffeffcb',\n",
538
+ " '1dcdf7e5-9d2f-4360-b9a9-ea00b9398fb4',\n",
539
+ " '716f55ba-b624-43ae-98ce-0f77b063f17f',\n",
540
+ " 'a42dbb78-85dc-4ee6-9b9c-c7a77093bae8',\n",
541
+ " '57d5d937-2fef-4aae-86bb-6002523fa30f',\n",
542
+ " '3400762f-1110-4e5f-a726-4f3edea01f02',\n",
543
+ " '2d5b5f30-63d6-4399-8e69-806697e64307',\n",
544
+ " '8feb1525-189a-45c1-ac23-0b33ba4994f3',\n",
545
+ " '0a503b10-b5da-4cb5-a1b8-bd6f0f34ce64',\n",
546
+ " '725010be-ebc8-45ee-be3e-2af2a8d1a5ee',\n",
547
+ " '52016839-2213-4db2-b397-a51e7bca1882',\n",
548
+ " '661398e1-eff3-4e1a-9ff4-b738bd2ce606',\n",
549
+ " '5a37f469-e018-4866-9224-30734dda0161',\n",
550
+ " '8028ec15-908a-4e98-8756-bff08d3ae96f',\n",
551
+ " 'd8541648-8d20-4f79-a5bb-4160106b0795',\n",
552
+ " '57aa5fb9-a383-4174-b88c-fd26e0c2e1e9',\n",
553
+ " '8d539cf3-12bd-4844-b306-9ead1cb12540',\n",
554
+ " '30e4322e-cf5e-4cd7-a56d-5529391d1b48',\n",
555
+ " '81f18135-fb7e-49d5-b1ce-1c833246b7e6',\n",
556
+ " 'c8f70828-1fc6-40cf-acd8-f7910841a3f6',\n",
557
+ " '2ea87054-e3cb-436b-a418-e7dde7b66488',\n",
558
+ " 'ec8797e6-1e89-403d-8f4b-a7528b856a9a',\n",
559
+ " '9ec0c175-c01a-4713-8042-a9dc5057cd6e',\n",
560
+ " 'df1dcb67-07a8-49d6-8bfa-2a0b857907ca',\n",
561
+ " 'ce065535-e2ca-45f4-b278-96d46356f511',\n",
562
+ " '161f51df-b3ca-4c54-b686-c8b68543da7e',\n",
563
+ " '9528a8fc-45e0-43bc-9933-20a0e5d25a91',\n",
564
+ " '6c3840f5-8d8f-4ed2-bf17-f82a88f8cfc4',\n",
565
+ " '46a9b7b4-c460-4ae4-afa7-6bb8478f6072',\n",
566
+ " '76df9287-461a-4cc8-9f34-7420f44ff0bf',\n",
567
+ " '80265ffa-ef7c-4721-956b-6010745f577c',\n",
568
+ " 'd09fc759-c639-4f5c-9a01-5f0f3bc359af',\n",
569
+ " '78b04c1b-38ed-493a-b04c-aebb454c87e0',\n",
570
+ " 'b6e68d75-11fe-428c-898f-56c58a6f2206',\n",
571
+ " '45255213-2a89-428e-a97a-371a1f78bfdd',\n",
572
+ " '516b872e-3c3c-442d-8be3-254da689bc13',\n",
573
+ " 'e16f0153-1bd8-45a0-af2b-35d581e23e66',\n",
574
+ " 'c36378f3-8839-48ba-ac46-86916439de77',\n",
575
+ " '82261df7-bbbc-47d6-9124-386e12f28c63',\n",
576
+ " 'ebd9c5d0-9bec-4901-946d-d663d1e66e89',\n",
577
+ " 'd33ef9e8-db28-4f12-9e1e-45041b0b0193',\n",
578
+ " '867171b7-f18f-451b-8fe0-879fedc98c11',\n",
579
+ " 'fcbed948-b63d-4462-adc5-e3d48cd3f27c',\n",
580
+ " 'c21c5ccd-c110-4790-8a83-6a6f1d6d5bab',\n",
581
+ " '38d43887-22f9-4d11-b94d-dc96de2f3313',\n",
582
+ " 'fdfa031f-8160-4623-8b9a-1e6d2ad9ac0b',\n",
583
+ " 'd51bbdba-7403-4a33-8c3b-e98931fc41a9',\n",
584
+ " 'a11eca40-2a42-4213-9996-d7d4242f41f3',\n",
585
+ " '1e159458-4db8-407f-9729-3b79f57870b2',\n",
586
+ " 'dc3582df-3f5b-4865-b9b3-8122455b22d8',\n",
587
+ " 'd51ea955-c3f9-4b68-a4d8-6f36f0087e90',\n",
588
+ " '8c0b67e8-1355-489f-bdbb-e16eae04a585',\n",
589
+ " '7c35e509-23d3-4d80-9863-3172bc1c7963',\n",
590
+ " '94b2eb43-5a75-4e1f-a9ff-58029cf50431',\n",
591
+ " '9e885f6e-67d6-4fc7-9fc0-25c2443628d6',\n",
592
+ " 'b3cd4d17-6feb-417a-a34d-4fab409e22b3',\n",
593
+ " 'bead51f3-b31f-47b1-b290-1616a3ed8008',\n",
594
+ " '1e197e22-0230-4c09-80ae-c7d63433ac64',\n",
595
+ " '68395eb4-67a4-4262-af1c-9961c787f8cc',\n",
596
+ " '84c5b8dd-098b-40e4-b86f-c341dcf8395d',\n",
597
+ " '1361b7ee-849f-4d68-b3c3-42e9103ec6fa',\n",
598
+ " '4fbe5da0-6f3e-42f3-b99e-6081f44e0cd8',\n",
599
+ " 'ac1975b1-d034-488f-b613-28cf3782d0a8',\n",
600
+ " '0017d512-f135-4123-b694-665bc6e11e64',\n",
601
+ " 'f13798aa-3424-4b38-a7bf-f5abe366120f',\n",
602
+ " '9dfcec61-f8a7-46be-98ae-7f54a5b53e7b',\n",
603
+ " '44f6beba-fca5-4509-b814-662ddf5fad29',\n",
604
+ " 'd87aedeb-16f1-4ccf-8728-fbbdfde6310f',\n",
605
+ " '3eeb99d0-9c2f-4854-befd-3001154ca693',\n",
606
+ " 'dbb2f8f2-d0a4-4288-8bbf-be24cb25360e',\n",
607
+ " '0afd4ed7-a742-4230-a3e8-f59b4ed3af0f',\n",
608
+ " '68cc2545-e9ee-4772-a968-5affe4eb80b7',\n",
609
+ " '772df9fb-4da6-45a3-9cfc-fb5fec014153',\n",
610
+ " 'a5bf6d49-afad-48a6-8ec7-d3b3e1c945d4',\n",
611
+ " '3c4e4cfe-ce5f-48e4-bb2e-7d7080a6d9df',\n",
612
+ " 'fdd184e8-2c0c-4a0d-aad1-425625c0acd8',\n",
613
+ " '6059f8f3-bb47-4e8e-82f1-3b3cf42293ce',\n",
614
+ " '32e22ff0-133e-4f0c-b9f0-d18108eada71',\n",
615
+ " 'cd3a73dc-b2df-494a-80c1-7edd34655e6b',\n",
616
+ " '081bb658-b2de-4470-9bf0-599595e32070',\n",
617
+ " '9c9089be-1648-49ee-a296-4a2b0e629962',\n",
618
+ " 'f90feeb1-dbb3-45c4-a141-aaffac9e9735',\n",
619
+ " '931dbe4e-27ae-407c-b0a1-552cfadcf124',\n",
620
+ " '5d1afa42-297f-475b-bbb3-d82b03b972f8',\n",
621
+ " '75e2ee6b-9b59-4067-b59d-e12a6a29fb1f',\n",
622
+ " 'd1f10078-a911-40d3-8def-36e11119d18b',\n",
623
+ " '7380d7e5-8d51-492e-b6fe-042863fdb84e',\n",
624
+ " '25d829a5-e681-4605-91b3-84b5589bca85',\n",
625
+ " 'b901a4ad-11fa-4241-b6c0-2f1ce4ab5913',\n",
626
+ " 'bcd5b4b2-efa6-4b89-a639-aae6afea19bf',\n",
627
+ " '4f657335-b7e7-4918-b597-664db98ab9e5',\n",
628
+ " '6b21048e-5bde-481a-9450-ea92c219741e',\n",
629
+ " 'e209717c-6925-4cdd-b4a6-26230031d4b3',\n",
630
+ " '2795137e-f3db-4c3a-a8be-4a4a62e2d83c',\n",
631
+ " '43660962-d892-4a23-bdac-9825c5e00623',\n",
632
+ " 'de594872-f941-4575-b7ec-6e66a222ca9d',\n",
633
+ " '8dd521bd-02d5-44d0-b35b-2bc82c68ca87',\n",
634
+ " 'de06a779-7d61-4240-a0e2-ac0b559469b1',\n",
635
+ " '7892dcff-1b29-495e-a2a4-17cef5a7904d',\n",
636
+ " 'a646b497-4d7a-4f4e-9b5d-4b989da6e26d',\n",
637
+ " '2d9453bd-381f-4e20-bac4-27edbab64a5b',\n",
638
+ " '45b2ff28-1a2b-4ac1-b30c-8e19dbd95943',\n",
639
+ " '1f229b56-0839-495f-8ecb-a281eaaaa452',\n",
640
+ " '763e5982-0827-41f9-b077-054d13782e69',\n",
641
+ " 'c48d5c1b-6a70-41f2-8263-ac35244768e7',\n",
642
+ " '65c4c939-b2a3-4dd8-b9c1-8a4585277859',\n",
643
+ " 'c0954e09-6856-4a52-be96-059b9ad381d2',\n",
644
+ " '484985c8-0f45-4289-904c-6143be565287',\n",
645
+ " 'efedb28c-cc8e-4aad-ae86-e1126dfc960a',\n",
646
+ " 'c41269f7-6a1a-4122-9326-9d4f08f7fa46',\n",
647
+ " '8b53ddf1-8f7f-4902-b7f7-8059725ffb2b',\n",
648
+ " '21ae63cb-649f-4b10-a67d-7f900a4185eb',\n",
649
+ " '80610c06-6a16-44e7-9a43-dd95fd89e720',\n",
650
+ " '8af13230-ea5f-425d-a4f0-e37acd8e7242',\n",
651
+ " '3b8c80fa-a860-4324-9faa-d46848cd62c2',\n",
652
+ " 'd358125e-6b25-4845-9303-b9f94ee9b1d9',\n",
653
+ " '6dea7e26-2408-411f-8867-8251fe672319',\n",
654
+ " '98564a4c-aeda-4556-af00-4ebd23cb407b',\n",
655
+ " '7553a543-572c-4418-917f-9a6e7e62d155',\n",
656
+ " 'c44444d0-f3a6-4f0d-b907-666d0b6c0d08',\n",
657
+ " '12b11de7-032b-404e-9cfe-3d9ba260abd7',\n",
658
+ " '9ac75c85-77d7-4418-8842-7997895d4400',\n",
659
+ " 'e5d580eb-7c8c-451b-9e44-00386a72f47c',\n",
660
+ " '923710d5-ff8f-44ed-97a7-ee40cb69ffba',\n",
661
+ " 'b2990aa7-2b84-48cb-8abc-ee30719c5c86',\n",
662
+ " 'f65e8225-3b98-4702-8b46-74b6703407b7',\n",
663
+ " '992a965a-1c9c-46c7-bfca-4f0c99f33bf6',\n",
664
+ " '18a1af9f-a788-45c7-9827-d6fb07c283d1',\n",
665
+ " '5833c1f7-6d17-4308-919c-9c022e4cf98d',\n",
666
+ " '3541476d-4fd7-4249-ae4f-c86d734001ca',\n",
667
+ " '5f66b974-37ae-4e8a-a2eb-7d72f0d75d3c',\n",
668
+ " '400717e9-430e-4bfc-9deb-f019dd5055a9',\n",
669
+ " '356d2916-f094-48f2-8f5d-4658dd4209a5',\n",
670
+ " '3de80d7b-42c5-456e-9a1b-7cdb75749df4',\n",
671
+ " '156e996d-302b-4dbc-a3a5-db7a518f4a4c',\n",
672
+ " '606c46a9-808c-4d33-bcd4-deac1e3b55a8',\n",
673
+ " 'd9a44989-531c-4237-838a-8393479da64d',\n",
674
+ " '2c26cc4c-251c-4d4f-ba17-309692015c4e',\n",
675
+ " '74d76b64-1674-40c0-8b41-19b1eebe05a7',\n",
676
+ " '71d03984-c626-4160-9430-528be9fedf59',\n",
677
+ " 'cd48ebdf-7d40-4e61-87ab-f9eea638a74d',\n",
678
+ " '8e8b2c73-ce6b-456c-897d-89fbfd75fa26',\n",
679
+ " '9434a9f5-8909-45fb-a023-a4c61e8d4764',\n",
680
+ " '7bd524c2-489a-4594-95bc-01b964c2c64d',\n",
681
+ " 'f8e787dd-ad34-464d-b21f-ed6111c3fa30',\n",
682
+ " '1a79c0b3-9b64-4809-813b-0480de369971',\n",
683
+ " '89aa9b61-59e5-4f78-9f94-c145c753625c',\n",
684
+ " '165c1f92-6a6b-4cce-b088-3a73c8c72c24',\n",
685
+ " 'b7e1f173-6f42-4f91-a5f5-cdc2289548b4',\n",
686
+ " '5320958c-6b8d-4445-9aa8-fb8e652198a5',\n",
687
+ " '8efdbae0-1a07-4278-a657-e05cd1435753',\n",
688
+ " '71a4c5c4-93a3-4839-acc7-d80bbcd4f774',\n",
689
+ " '97463e5d-cecf-4cfa-bef5-5d6e9c8f0791',\n",
690
+ " 'fbbd1d5a-7390-4011-b6c6-701fe5cfc1da',\n",
691
+ " '09b40129-1ddc-4e93-bb40-c93865f4219e',\n",
692
+ " '5569a342-0bde-4356-a38e-af426c796693',\n",
693
+ " 'b09d66b0-245f-440f-a7f4-213bff7ba8ca',\n",
694
+ " '5f62dabc-c220-44ce-b6f8-04adf37186c6',\n",
695
+ " '273d9cd4-590d-4ece-9715-1ed201d3b53d',\n",
696
+ " 'eb5c2c74-0de4-4870-aef4-b65a59fa502d',\n",
697
+ " '5f7f5339-9919-44aa-b10f-449f16ed5df4',\n",
698
+ " '3a890f5b-b380-4ab8-bfa9-f67f74dceac3',\n",
699
+ " 'b47959e7-6038-4733-9e72-beaf98a731e4',\n",
700
+ " 'bb1e60c2-bf86-4e9b-b0d0-8e1b859ff220',\n",
701
+ " 'c6afbf5e-857a-48c7-84f2-9c5cb9a1ef00',\n",
702
+ " '808ddb34-ecc5-4c7e-bb4d-0cd5487f8b84',\n",
703
+ " '3c120e24-5edf-4a53-99e4-5b929162f849',\n",
704
+ " '22a911c4-fe04-43e5-867c-c4529501131d',\n",
705
+ " '0295a993-3a58-4550-a522-66004a6dd0fc',\n",
706
+ " 'a54e8316-67d4-4ab7-9747-876ac1413eb3',\n",
707
+ " 'b5068009-bef5-4bce-af38-081905babf3a',\n",
708
+ " '7d57b14e-fcc6-4bc4-8ed7-9f8616268464',\n",
709
+ " 'f119914b-dda3-4fab-92ed-ac7d45be674c',\n",
710
+ " 'a13eb99f-bf84-4918-8308-6aaebfa44522',\n",
711
+ " '02425f7d-3f2c-4f99-b4b3-dae251f79f9a',\n",
712
+ " '8c070a7b-b0fe-4cad-9b25-e1f703c08041',\n",
713
+ " '458439d7-1a8e-4808-b4bf-fe0a75b569a9',\n",
714
+ " '1db0a9d9-dccd-4347-9cf3-283815f1bc05',\n",
715
+ " '5ecc0797-280b-450a-b6db-fd8352c855bc',\n",
716
+ " '85f323ea-ded5-4cea-87d5-e07e951b1fea',\n",
717
+ " 'd5c87f7b-695b-4f80-8502-5e7116721e67',\n",
718
+ " 'a2fd76f1-a851-495a-b463-44fc537d62cd',\n",
719
+ " '737a76b0-d8d8-49a6-b433-f553a8aa2b58',\n",
720
+ " '5b0db327-3234-468a-ba12-734a481d7e73',\n",
721
+ " '70c8cf89-29fe-4349-b7d3-b3293267fd10',\n",
722
+ " '635b04dd-fa10-4dde-bfcb-168cc8a9bd39',\n",
723
+ " '152b468a-b307-4b8d-b7b3-ee7e152aac78',\n",
724
+ " '8942236c-0e8c-42c5-8e95-7fcf84e89677',\n",
725
+ " '2de3d06a-9d37-4e1f-a4ce-45a3e2cdcf08',\n",
726
+ " '4c1dff30-a002-43e8-b47f-f66855fc13e4',\n",
727
+ " '9254fd90-3ffc-48f8-b091-b9fe81d3b56d',\n",
728
+ " 'b4befb2a-8746-4d75-8397-66010d1baf2c',\n",
729
+ " '97ba14a5-ace8-45b7-b25b-be774aa25410',\n",
730
+ " '25d27bce-5d31-430c-93c6-2c30025a030c',\n",
731
+ " 'c15c3ada-bb90-43f3-a41d-f85a832673b8',\n",
732
+ " '4a353418-e1db-4c6e-ab8e-f3a534b03d2e',\n",
733
+ " '668dfef9-37da-4619-8a0b-71ba993c7ac3',\n",
734
+ " 'ba8233d0-1601-4167-886c-f632e3d077bf',\n",
735
+ " '49cf3301-80b4-46e5-93bd-55712adfae99',\n",
736
+ " '51ced038-b27b-46d1-96e6-923867ca4774',\n",
737
+ " 'ac4e9a83-8715-4a7d-b4a3-b4e7b2ecef9e',\n",
738
+ " '6fe1361f-ede5-4359-90be-d007f6eb03e2',\n",
739
+ " 'cf49d0cc-a941-486f-88ab-88c93cc3f211',\n",
740
+ " 'ee21070d-e45e-4f61-ae1c-a412b659035b',\n",
741
+ " '3744d911-37cc-4645-ba2b-b376718e0afd',\n",
742
+ " 'efff5713-c6cf-4e9a-b5ce-ccd4e443a8b9',\n",
743
+ " 'aeae7c83-6a90-40c6-9fa5-65a8488865af',\n",
744
+ " '3b349ca6-fac5-41c1-b8de-5ee32161f023',\n",
745
+ " '39703730-8667-432f-aceb-8a569bb7d3c7',\n",
746
+ " 'd021eee3-ea5f-454e-a5f9-8ff2b354e05f',\n",
747
+ " '29fab8ce-8c8c-4f76-a59e-00291af854df',\n",
748
+ " '186e7eef-e3ab-4f05-b6ee-2ae65d871393',\n",
749
+ " 'c8b02b23-9dc6-43a9-b0ea-fd42533165cf',\n",
750
+ " '48734804-8b5e-45e8-9576-16f5f3d5da13',\n",
751
+ " '56f12823-504b-46b1-a239-78ebf589063e',\n",
752
+ " '5cea0d13-04f6-4b8a-9b71-ec1a2a5eb191',\n",
753
+ " '147a6f26-3ad6-4929-a074-7e397d3ac134',\n",
754
+ " '4e725c5a-f457-437b-ae47-d5e544a058d3',\n",
755
+ " '39b0e4b9-74ea-461f-8375-31a38af1db59',\n",
756
+ " '2d90a41d-20bc-4f87-adbd-8c5ef5d8f33f',\n",
757
+ " '0108b65b-f4b5-4839-80bc-b5a2f61c35c6',\n",
758
+ " '8002651d-7f26-4c99-a60c-d8df30aadd79',\n",
759
+ " 'bef4b953-c0e3-4822-85e0-94d3a41a2a65',\n",
760
+ " '6b02df41-38d7-4a11-bb14-a87ef004a191',\n",
761
+ " '84c9570a-06ee-4edd-b767-dc2ce45603d9',\n",
762
+ " '71a0f38c-4764-428a-a9a9-f3353fd7c768',\n",
763
+ " '15e8567b-4f1c-4f80-8ce7-aaf83a489933',\n",
764
+ " '0dbdae26-1e9d-4b2c-8957-3f0fd056fb6f',\n",
765
+ " '2888971e-530b-44b5-b38a-ba00b8667439',\n",
766
+ " 'c99e5ddc-0741-4a24-8cd8-cab974c32dbc',\n",
767
+ " '5365117a-4e5e-4683-9cf6-5c9015b328c8',\n",
768
+ " '62194dec-507c-49d8-b809-20c03d5caf0b',\n",
769
+ " '358b61b1-e962-4443-9240-835bc75146cc',\n",
770
+ " '10c92ff4-a8cb-483b-ad78-6dc680d50cd8',\n",
771
+ " 'a6cc759f-2599-4199-a08d-3f3b142af66f',\n",
772
+ " '035bb626-5d0a-4a43-a9cb-66b2bd940d2c',\n",
773
+ " '1a1da71a-e9b8-466d-b5e0-b7e10847b857',\n",
774
+ " '09fe8139-5029-4782-bff9-a0edfce73e2f',\n",
775
+ " '939af366-34d7-47c5-8937-59f0122df0d4',\n",
776
+ " 'f74a0d54-0d45-459d-b988-b2c398cacfd5',\n",
777
+ " 'bbe495a3-7964-4dd0-8c6d-b2a22a8494b2',\n",
778
+ " '11616f61-808c-43a4-8c38-9de0db923d68',\n",
779
+ " '65b9f618-b8aa-42d0-b7e4-321207cf81eb',\n",
780
+ " '7d964d73-3b58-49ab-8a13-fe9add8f015e',\n",
781
+ " '90ac205f-f322-431d-a097-a7fce1729cb2',\n",
782
+ " 'ce338839-0430-47a7-b426-87feabab5320',\n",
783
+ " '09cc0450-5e49-43a2-b7c0-c79afb049eb0',\n",
784
+ " '80500abe-37ef-4483-a40c-aa1ac451cf95',\n",
785
+ " '64b77e67-7115-4941-a04b-2ff4771aa71e',\n",
786
+ " 'cc9a5ffa-2c9c-4096-972a-e5fa9b3cefe9',\n",
787
+ " 'be907e7c-f601-4119-b68c-35a24eb9acfa',\n",
788
+ " '79fee5a5-3d8a-46e9-96bc-340ac4a324aa',\n",
789
+ " 'db7c23df-6888-4b4e-a4b6-58a953c174d3',\n",
790
+ " 'f54d787e-5dfb-4801-bfed-842cd18fa332',\n",
791
+ " 'e18688f1-15fc-400d-8ea3-59d313181cb0',\n",
792
+ " 'f7fba163-e8ea-451f-b1f8-cc70f599f23d',\n",
793
+ " '7f7f8847-1483-4801-96da-8c789d6ac93a',\n",
794
+ " 'e356cda5-b603-4e15-9995-e9411bd8f4f4',\n",
795
+ " 'ab3bdbe6-d223-420d-b66b-d572bf4b14d1',\n",
796
+ " 'b2e2ed1a-e738-4197-a878-d8762814c860',\n",
797
+ " 'b0d31fab-a0f8-4978-a3fd-00fc9754f327',\n",
798
+ " 'af6d58f7-3d4d-4a32-96ba-0376dc945960',\n",
799
+ " '863b5fd8-fedc-42c9-8a8e-2d07670d676c',\n",
800
+ " 'dcc803bf-0601-4a82-a90b-3481b7188b73',\n",
801
+ " 'cefcc7ee-9b1b-4586-983c-fceda1417772',\n",
802
+ " 'b895c11d-3f5d-49f5-b4d8-9f0f9307ee3e',\n",
803
+ " '06a62e87-e561-46af-94f3-9657d2a8e0c2',\n",
804
+ " 'f34de4a4-b5e0-4a7e-81e1-5c7abc066221',\n",
805
+ " '3ad33f70-faaa-4ae2-8660-bf7972a401e0',\n",
806
+ " 'b170a51a-cc14-4a3d-9c39-23f18e7405f5',\n",
807
+ " '4d89bda3-00d8-4db7-a89b-da2b06b29a24',\n",
808
+ " 'e28be583-da3a-4b82-bc86-e25dde6fb02b',\n",
809
+ " 'db982e3e-c076-4e06-9ba6-faed32db3527',\n",
810
+ " '86c5d471-60af-4e96-9cf1-a3b7f5295c47',\n",
811
+ " '9e010af0-ea9d-40f0-aad3-973c0789768c',\n",
812
+ " 'e5baba45-762a-42b5-b32d-4f64a4753b27',\n",
813
+ " '772ee256-aa86-4ae5-925e-acfff2ec76f0',\n",
814
+ " '0ad3ab0a-0bae-46ab-92d3-caf165cefed5']"
815
+ ]
816
+ },
817
+ "execution_count": 133,
818
+ "metadata": {},
819
+ "output_type": "execute_result"
820
+ }
821
+ ],
822
+ "source": [
823
+ "vector_store.add_documents(documents=text_chunks, ids = uuids)"
824
+ ]
825
+ },
826
+ {
827
+ "cell_type": "code",
828
+ "execution_count": 11,
829
+ "id": "7b8b7a46",
830
+ "metadata": {},
831
+ "outputs": [
832
+ {
833
+ "name": "stdout",
834
+ "output_type": "stream",
835
+ "text": [
836
+ "* Page 16 of 25 \n",
837
+ "National Parivar Mediclaim Plus Policy \n",
838
+ "UIN: NICHLIP25039V032425 \n",
839
+ " \n",
840
+ "ii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \n",
841
+ "a) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \n",
842
+ "made during the policy period. \n",
843
+ "b) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \n",
844
+ "such policy years has not commenced and refund proportionate premium for unexpired policy period for the current \n",
845
+ "policy year. \n",
846
+ "There shall be no refund for the completed policy year elapsed. [{'chunk_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', 'doc_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5', 'page': 24.0, 'type': 'text'}]\n",
847
+ "* Page 16 of 25 \n",
848
+ "National Parivar Mediclaim Plus Policy \n",
849
+ "UIN: NICHLIP25039V032425 \n",
850
+ " \n",
851
+ "ii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \n",
852
+ "a) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \n",
853
+ "made during the policy period. \n",
854
+ "b) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \n",
855
+ "such policy years has not commenced and refund proportionate premium for unexpired policy period for the current \n",
856
+ "policy year. \n",
857
+ "There shall be no refund for the completed policy year elapsed. [{'chunk_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', 'doc_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5', 'page': 24.0, 'type': 'text'}]\n"
858
+ ]
859
+ }
860
+ ],
861
+ "source": [
862
+ "results = vector_store.similarity_search(\n",
863
+ " \"What is the grace period for premium payment under the National Parivar Mediclaim Plus Policy?\",\n",
864
+ " k=2\n",
865
+ ")\n",
866
+ "for res in results:\n",
867
+ " print(f\"* {res.page_content} [{res.metadata}]\")"
868
+ ]
869
+ },
870
+ {
871
+ "cell_type": "code",
872
+ "execution_count": 135,
873
+ "id": "41f27c21",
874
+ "metadata": {},
875
+ "outputs": [
876
+ {
877
+ "name": "stdout",
878
+ "output_type": "stream",
879
+ "text": [
880
+ "[Document(id='f13798aa-3424-4b38-a7bf-f5abe366120f', metadata={'chunk_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', 'doc_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5', 'page': 24.0, 'type': 'text'}, page_content='Page 16 of 25 \\nNational Parivar Mediclaim Plus Policy \\nUIN: NICHLIP25039V032425 \\n \\nii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \\na) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \\nmade during the policy period. \\nb) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \\nsuch policy years has not commenced and refund proportionate premium for unexpired policy period for the current \\npolicy year. \\nThere shall be no refund for the completed policy year elapsed.'), Document(id='8002651d-7f26-4c99-a60c-d8df30aadd79', metadata={'chunk_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', 'doc_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5', 'page': 24.0, 'type': 'text'}, page_content='Page 16 of 25 \\nNational Parivar Mediclaim Plus Policy \\nUIN: NICHLIP25039V032425 \\n \\nii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \\na) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \\nmade during the policy period. \\nb) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \\nsuch policy years has not commenced and refund proportionate premium for unexpired policy period for the current \\npolicy year. \\nThere shall be no refund for the completed policy year elapsed.')]\n"
881
+ ]
882
+ }
883
+ ],
884
+ "source": [
885
+ "print(results)"
886
+ ]
887
+ },
888
+ {
889
+ "cell_type": "code",
890
+ "execution_count": 136,
891
+ "id": "cf7b7568",
892
+ "metadata": {},
893
+ "outputs": [
894
+ {
895
+ "name": "stdout",
896
+ "output_type": "stream",
897
+ "text": [
898
+ "* [SIM=0.678520] Page 16 of 25 \n",
899
+ "National Parivar Mediclaim Plus Policy \n",
900
+ "UIN: NICHLIP25039V032425 \n",
901
+ " \n",
902
+ "ii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \n",
903
+ "a) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \n",
904
+ "made during the policy period. \n",
905
+ "b) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \n",
906
+ "such policy years has not commenced and refund proportionate premium for unexpired policy period for the current \n",
907
+ "policy year. \n",
908
+ "There shall be no refund for the completed policy year elapsed. [{'chunk_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', 'doc_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5', 'page': 24.0, 'type': 'text'}]\n"
909
+ ]
910
+ }
911
+ ],
912
+ "source": [
913
+ "results = vector_store.similarity_search_with_score(\n",
914
+ " \"What is the grace period for premium payment under the National Parivar Mediclaim Plus Policy?\", k=1\n",
915
+ ")\n",
916
+ "for res, score in results:\n",
917
+ " print(f\"* [SIM={score:3f}] {res.page_content} [{res.metadata}]\")"
918
+ ]
919
+ },
920
+ {
921
+ "cell_type": "markdown",
922
+ "id": "7fde8a22",
923
+ "metadata": {},
924
+ "source": [
925
+ "### QUERY PARSING "
926
+ ]
927
+ },
928
+ {
929
+ "cell_type": "code",
930
+ "execution_count": 62,
931
+ "id": "20452e98",
932
+ "metadata": {},
933
+ "outputs": [],
934
+ "source": [
935
+ "from pydantic import BaseModel, model_validator,field_validator\n",
936
+ "from typing import List, Dict, Any, Optional\n",
937
+ "import json\n",
938
+ "class QuerySpec(BaseModel):\n",
939
+ " raw_query: str \n",
940
+ " intent: str \n",
941
+ " entities: Dict[str, str]\n",
942
+ " constraints : Dict[str, Any]\n",
943
+ " answer_type: str \n",
944
+ " followups: Optional[List[str]] = []\n",
945
+ "\n",
946
+ " @model_validator(mode = \"before\")\n",
947
+ " @classmethod\n",
948
+ " def parse_nested_json(cls, values):\n",
949
+ " for field in ['entities', 'constraints']:\n",
950
+ " val = values.get(field)\n",
951
+ " if isinstance(val, str):\n",
952
+ " try:\n",
953
+ " values[field] = json.loads(val)\n",
954
+ " except json.JSONDecodeError:\n",
955
+ " pass\n",
956
+ " return values\n",
957
+ "\n",
958
+ "class ClauseHit(BaseModel):\n",
959
+ " doc_id : str\n",
960
+ " page: int\n",
961
+ " chunk_id: str \n",
962
+ " text: str \n",
963
+ " metadata: Dict[str, Any]\n",
964
+ " score: float \n",
965
+ " boost: Optional[float] = None\n",
966
+ " combined_score: Optional[float] = None\n",
967
+ "\n",
968
+ " @field_validator(\"metadata\", mode=\"before\")\n",
969
+ " def parse_metadata(cls, v):\n",
970
+ " if isinstance(v, str):\n",
971
+ " try:\n",
972
+ " return json.loads(v) if v.strip() else {}\n",
973
+ " except json.JSONDecodeError:\n",
974
+ " return {}\n",
975
+ " return v\n",
976
+ "\n",
977
+ "class LogicResult(BaseModel):\n",
978
+ " answer: str\n",
979
+ " decision: str # \"covered\"/\"not_covered\"/\"conditional\"\n",
980
+ " confidence: float\n",
981
+ " evidence: List[ClauseHit]\n",
982
+ " rationale: str\n",
983
+ " \n",
984
+ "\n",
985
+ "class APIResponse(BaseModel):\n",
986
+ " query_spec: QuerySpec\n",
987
+ " logic_result: LogicResult\n",
988
+ " debug: Optional[Dict[str, Any]] = None\n",
989
+ "\n",
990
+ "\n",
991
+ "\n"
992
+ ]
993
+ },
994
+ {
995
+ "cell_type": "code",
996
+ "execution_count": 8,
997
+ "id": "3ac1f99f",
998
+ "metadata": {},
999
+ "outputs": [],
1000
+ "source": [
1001
+ "user_question = \"What is the grace period for premium payment under the National Parivar Mediclaim Plus Policy?\""
1002
+ ]
1003
+ },
1004
+ {
1005
+ "cell_type": "code",
1006
+ "execution_count": 9,
1007
+ "id": "2961e184",
1008
+ "metadata": {},
1009
+ "outputs": [],
1010
+ "source": [
1011
+ "PARSER_PROMPT = f\"\"\"You receive a user's question about an insurance/contract document. Produce a JSON with keys:\n",
1012
+ "- intent (one of: coverage_check, definition, limit_query, waiting_period, exclusions, other)\n",
1013
+ "- entities (map of entity_name -> canonical string)\n",
1014
+ "- constraints (map: plan, time_window, eligible_person, numerical_constraints)\n",
1015
+ "- answer_type (one of: yes_no, short_explain, detailed, clause_list)\n",
1016
+ "Return ONLY the JSON.Make sure that nested fields like \"entities\" and \"constraints\" are JSON objects, not strings.\n",
1017
+ "\"\"\""
1018
+ ]
1019
+ },
1020
+ {
1021
+ "cell_type": "code",
1022
+ "execution_count": 10,
1023
+ "id": "a9123e2a",
1024
+ "metadata": {},
1025
+ "outputs": [
1026
+ {
1027
+ "data": {
1028
+ "text/plain": [
1029
+ "ChatGoogleGenerativeAI(model='models/gemini-2.5-flash', google_api_key=SecretStr('**********'), client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x000001E02EF9EA50>, default_metadata=(), model_kwargs={})"
1030
+ ]
1031
+ },
1032
+ "execution_count": 10,
1033
+ "metadata": {},
1034
+ "output_type": "execute_result"
1035
+ }
1036
+ ],
1037
+ "source": [
1038
+ "from langchain_google_genai import ChatGoogleGenerativeAI\n",
1039
+ "api_key = os.getenv(\"GEMINI_API_KEY\")\n",
1040
+ "llm = ChatGoogleGenerativeAI(\n",
1041
+ " model=\"gemini-2.5-flash\",\n",
1042
+ " google_api_key = api_key\n",
1043
+ " \n",
1044
+ " )\n",
1045
+ "llm"
1046
+ ]
1047
+ },
1048
+ {
1049
+ "cell_type": "code",
1050
+ "execution_count": 30,
1051
+ "id": "290081a1",
1052
+ "metadata": {},
1053
+ "outputs": [],
1054
+ "source": [
1055
+ "def parsing_query(query:str) -> QuerySpec:\n",
1056
+ " # Bind the schema to the model\n",
1057
+ " structured_llm = llm.with_structured_output(QuerySpec)\n",
1058
+ "\n",
1059
+ " # Compose the full prompt with instructions and user question\n",
1060
+ " full_prompt = PARSER_PROMPT + \"\\n\" + query\n",
1061
+ "\n",
1062
+ " # Invoke the model to get structured output parsed as QuerySpec\n",
1063
+ " result: QuerySpec = structured_llm.invoke(full_prompt)\n",
1064
+ " return result\n",
1065
+ " # print(result.json()) # This will print the JSON output matching your schema\n"
1066
+ ]
1067
+ },
1068
+ {
1069
+ "cell_type": "markdown",
1070
+ "id": "b5cecc42",
1071
+ "metadata": {},
1072
+ "source": [
1073
+ "## Embedding + Retrieval and Clause Matching"
1074
+ ]
1075
+ },
1076
+ {
1077
+ "cell_type": "code",
1078
+ "execution_count": 38,
1079
+ "id": "11fdd288",
1080
+ "metadata": {},
1081
+ "outputs": [],
1082
+ "source": [
1083
+ "def get_query_embedding(embedding_client, query_spec: QuerySpec):\n",
1084
+ " q = query_spec.raw_query\n",
1085
+ " e_main = embedding_client.embed_query(q)\n",
1086
+ " expansions = []\n",
1087
+ " if \"procedure\" in query_spec.entities:\n",
1088
+ " expansions.append(f\"{q} OR {query_spec.entities['procedure']} procedures related\")\n",
1089
+ " return e_main, expansions\n",
1090
+ "\n",
1091
+ "def retrieval_from_pinecone_vectoreStore(pinecone_index, embeddings, top_k= 3, filter_meta = None):\n",
1092
+ " \"\"\"\n",
1093
+ " Retrieve the top matching chunks from Pinecone.\n",
1094
+ " \n",
1095
+ " Args:\n",
1096
+ " pinecone_index: Your Pinecone index object.\n",
1097
+ " embedding: The vector embedding of the query.\n",
1098
+ " top_k: How many chunks to retrieve.\n",
1099
+ " filter_meta: Optional metadata filter dict.\n",
1100
+ " \n",
1101
+ " Returns:\n",
1102
+ " List of ClauseHit objects (lightweight container for chunk info).\n",
1103
+ " \"\"\"\n",
1104
+ " res = pinecone_index.query(\n",
1105
+ " vector= embeddings,\n",
1106
+ " top_k =top_k ,\n",
1107
+ " include_metadata = True, \n",
1108
+ " include_values = False, \n",
1109
+ " filter = filter_meta \n",
1110
+ " )\n",
1111
+ " hits= []\n",
1112
+ " for match in res['matches']:\n",
1113
+ " hits.append(ClauseHit(\n",
1114
+ " doc_id=match['metadata']['doc_id'],\n",
1115
+ " page=match['metadata'].get('page', -1),\n",
1116
+ " chunk_id=match['metadata'].get('chunk_id', ''),\n",
1117
+ " text=match['metadata']['text'],\n",
1118
+ " metadata=match['metadata'],\n",
1119
+ " score=match['score']\n",
1120
+ " ))\n",
1121
+ " return hits\n",
1122
+ "\n",
1123
+ " "
1124
+ ]
1125
+ },
1126
+ {
1127
+ "cell_type": "markdown",
1128
+ "id": "9707521f",
1129
+ "metadata": {},
1130
+ "source": [
1131
+ "## Logic Evaluation\n",
1132
+ "### Decision processing"
1133
+ ]
1134
+ },
1135
+ {
1136
+ "cell_type": "code",
1137
+ "execution_count": 74,
1138
+ "id": "74e49132",
1139
+ "metadata": {},
1140
+ "outputs": [],
1141
+ "source": [
1142
+ "def evaluate_with_llm(raw_query: str, top_clauses: list):\n",
1143
+ " \"\"\"\n",
1144
+ " Use the LLM to analyze retrieved clauses and return structured decision.\n",
1145
+ " \"\"\"\n",
1146
+ "\n",
1147
+ " # Prepare context for the prompt\n",
1148
+ " context_clauses = []\n",
1149
+ " for i, c in enumerate(top_clauses, 1):\n",
1150
+ " context_clauses.append(f\"{i}) [source:{c.doc_id} page:{c.page}] {c.text}\")\n",
1151
+ " print(chr(10).join(context_clauses))\n",
1152
+ " \n",
1153
+ " # Build prompt\n",
1154
+ " prompt = f\"\"\"\n",
1155
+ " You are an insurance policy analyst. Question: \"{raw_query}\"\n",
1156
+ "\n",
1157
+ " Provided clauses (numbered):\n",
1158
+ " {chr(10).join(context_clauses)}\n",
1159
+ "\n",
1160
+ " Task:\n",
1161
+ " 1) Decide: COVERED / NOT_COVERED / CONDITIONAL\n",
1162
+ " 2) Summarize the exact clause(s) that justify your decision.\n",
1163
+ " 3) List any conditions, waiting periods, sublimits, or exclusions relevant.\n",
1164
+ " 4) Provide a concise final answer (1-2 sentences).\n",
1165
+ "\n",
1166
+ " Return JSON with these exact keys:\n",
1167
+ " {{\n",
1168
+ " \"decision\": \"...\",\n",
1169
+ " \"evidence\": [\n",
1170
+ " {{\"doc_id\": \"...\", \"page\": 0, \"snippet\": \"...\", \"reason\": \"...\"}}\n",
1171
+ " ],\n",
1172
+ " \"confidence\": 0.0,\n",
1173
+ " \"rationale\": \"...\",\n",
1174
+ " \"answer\": \"...\"\n",
1175
+ " }}\n",
1176
+ " \"\"\"\n",
1177
+ "\n",
1178
+ " # Directly parse to LogicResult using structured output\n",
1179
+ " structured_llm = llm.with_structured_output(LogicResult)\n",
1180
+ " result: LogicResult = structured_llm.invoke(prompt)\n",
1181
+ " # print(f\"result: {result}\\n result_type{type(result)}\")\n",
1182
+ "\n",
1183
+ " # Attach full text for each evidence\n",
1184
+ " enriched_evidence = []\n",
1185
+ " for ev in result.evidence:\n",
1186
+ " matched = next((c for c in top_clauses if c.doc_id == ev.doc_id and str(c.page) == str(ev.page)), None)\n",
1187
+ " if matched:\n",
1188
+ " ev.text = matched.text # or use a different field if needed\n",
1189
+ " enriched_evidence.append(ev)\n",
1190
+ "\n",
1191
+ " result.evidence = enriched_evidence\n",
1192
+ " # print(enriched_evidence[0])\n",
1193
+ " return result\n"
1194
+ ]
1195
+ },
1196
+ {
1197
+ "cell_type": "code",
1198
+ "execution_count": 14,
1199
+ "id": "fe78ab38",
1200
+ "metadata": {},
1201
+ "outputs": [],
1202
+ "source": [
1203
+ "query = \"What is the grace period for premium payment under the National Parivar Mediclaim Plus Policy?\""
1204
+ ]
1205
+ },
1206
+ {
1207
+ "cell_type": "code",
1208
+ "execution_count": 36,
1209
+ "id": "fea3b1be",
1210
+ "metadata": {},
1211
+ "outputs": [],
1212
+ "source": [
1213
+ "parsed_query = parsing_query(query)\n"
1214
+ ]
1215
+ },
1216
+ {
1217
+ "cell_type": "code",
1218
+ "execution_count": 32,
1219
+ "id": "82fcb8bb",
1220
+ "metadata": {},
1221
+ "outputs": [
1222
+ {
1223
+ "data": {
1224
+ "text/plain": [
1225
+ "__main__.QuerySpec"
1226
+ ]
1227
+ },
1228
+ "execution_count": 32,
1229
+ "metadata": {},
1230
+ "output_type": "execute_result"
1231
+ }
1232
+ ],
1233
+ "source": [
1234
+ "type(parsed_query)"
1235
+ ]
1236
+ },
1237
+ {
1238
+ "cell_type": "code",
1239
+ "execution_count": 39,
1240
+ "id": "9b8292f0",
1241
+ "metadata": {},
1242
+ "outputs": [],
1243
+ "source": [
1244
+ "\n",
1245
+ "# Step 1 — Embed\n",
1246
+ "embedding = get_query_embedding(embeddings, parsed_query)"
1247
+ ]
1248
+ },
1249
+ {
1250
+ "cell_type": "code",
1251
+ "execution_count": 44,
1252
+ "id": "46790137",
1253
+ "metadata": {},
1254
+ "outputs": [],
1255
+ "source": [
1256
+ "\n",
1257
+ "# Step 2 — Retrieve\n",
1258
+ "top_hits = retrieval_from_pinecone_vectoreStore(index, embedding, top_k=3)"
1259
+ ]
1260
+ },
1261
+ {
1262
+ "cell_type": "code",
1263
+ "execution_count": 48,
1264
+ "id": "9c3f4e68",
1265
+ "metadata": {},
1266
+ "outputs": [
1267
+ {
1268
+ "data": {
1269
+ "text/plain": [
1270
+ "[ClauseHit(doc_id='b0a34a7d-f5a1-4777-93aa-c59269013de5', page=24, chunk_id='b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', text='Page 16 of 25 \\nNational Parivar Mediclaim Plus Policy \\nUIN: NICHLIP25039V032425 \\n \\nii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \\na) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \\nmade during the policy period. \\nb) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \\nsuch policy years has not commenced and refund proportionate premium for unexpired policy period for the current \\npolicy year. \\nThere shall be no refund for the completed policy year elapsed.', metadata={'chunk_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', 'doc_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5', 'page': 24.0, 'text': 'Page 16 of 25 \\nNational Parivar Mediclaim Plus Policy \\nUIN: NICHLIP25039V032425 \\n \\nii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \\na) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \\nmade during the policy period. \\nb) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \\nsuch policy years has not commenced and refund proportionate premium for unexpired policy period for the current \\npolicy year. \\nThere shall be no refund for the completed policy year elapsed.', 'type': 'text'}, score=0.678843796, boost=None, combined_score=None),\n",
1271
+ " ClauseHit(doc_id='b0a34a7d-f5a1-4777-93aa-c59269013de5', page=24, chunk_id='b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', text='Page 16 of 25 \\nNational Parivar Mediclaim Plus Policy \\nUIN: NICHLIP25039V032425 \\n \\nii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \\na) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \\nmade during the policy period. \\nb) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \\nsuch policy years has not commenced and refund proportionate premium for unexpired policy period for the current \\npolicy year. \\nThere shall be no refund for the completed policy year elapsed.', metadata={'chunk_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', 'doc_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5', 'page': 24.0, 'text': 'Page 16 of 25 \\nNational Parivar Mediclaim Plus Policy \\nUIN: NICHLIP25039V032425 \\n \\nii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \\na) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \\nmade during the policy period. \\nb) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \\nsuch policy years has not commenced and refund proportionate premium for unexpired policy period for the current \\npolicy year. \\nThere shall be no refund for the completed policy year elapsed.', 'type': 'text'}, score=0.677854538, boost=None, combined_score=None),\n",
1272
+ " ClauseHit(doc_id='b0a34a7d-f5a1-4777-93aa-c59269013de5', page=24, chunk_id='b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', text='all claims made in the aggregate during each policy year. \\n \\n2.21 Grace Period means the specified period of time, immediately following the premium due date during which premium \\npayment can be made to renew or continue a policy in force without loss of continuity benefits pertaining to Waiting Periods \\nand coverage of Pre-Existing Diseases. The Grace Period for payment of the premium shall be thirty days. \\nCoverage shall not be available during the period for which no premium is received. \\n \\n2.22 Hospital means any institution established for in-patient care and day care treatment of disease/ injuries and which has been \\nregistered as a hospital with the local authorities under the Clinical Establishments (Registration and Regulation) Act, 2010 or', metadata={'chunk_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', 'doc_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5', 'page': 24.0, 'text': 'all claims made in the aggregate during each policy year. \\n \\n2.21 Grace Period means the specified period of time, immediately following the premium due date during which premium \\npayment can be made to renew or continue a policy in force without loss of continuity benefits pertaining to Waiting Periods \\nand coverage of Pre-Existing Diseases. The Grace Period for payment of the premium shall be thirty days. \\nCoverage shall not be available during the period for which no premium is received. \\n \\n2.22 Hospital means any institution established for in-patient care and day care treatment of disease/ injuries and which has been \\nregistered as a hospital with the local authorities under the Clinical Establishments (Registration and Regulation) Act, 2010 or', 'type': 'text'}, score=0.64794, boost=None, combined_score=None)]"
1273
+ ]
1274
+ },
1275
+ "execution_count": 48,
1276
+ "metadata": {},
1277
+ "output_type": "execute_result"
1278
+ }
1279
+ ],
1280
+ "source": [
1281
+ "top_hits"
1282
+ ]
1283
+ },
1284
+ {
1285
+ "cell_type": "code",
1286
+ "execution_count": 75,
1287
+ "id": "05cb7ca5",
1288
+ "metadata": {},
1289
+ "outputs": [
1290
+ {
1291
+ "name": "stdout",
1292
+ "output_type": "stream",
1293
+ "text": [
1294
+ "1) [source:b0a34a7d-f5a1-4777-93aa-c59269013de5 page:24] Page 16 of 25 \n",
1295
+ "National Parivar Mediclaim Plus Policy \n",
1296
+ "UIN: NICHLIP25039V032425 \n",
1297
+ " \n",
1298
+ "ii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \n",
1299
+ "a) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \n",
1300
+ "made during the policy period. \n",
1301
+ "b) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \n",
1302
+ "such policy years has not commenced and refund proportionate premium for unexpired policy period for the current \n",
1303
+ "policy year. \n",
1304
+ "There shall be no refund for the completed policy year elapsed.\n",
1305
+ "2) [source:b0a34a7d-f5a1-4777-93aa-c59269013de5 page:24] Page 16 of 25 \n",
1306
+ "National Parivar Mediclaim Plus Policy \n",
1307
+ "UIN: NICHLIP25039V032425 \n",
1308
+ " \n",
1309
+ "ii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \n",
1310
+ "a) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \n",
1311
+ "made during the policy period. \n",
1312
+ "b) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \n",
1313
+ "such policy years has not commenced and refund proportionate premium for unexpired policy period for the current \n",
1314
+ "policy year. \n",
1315
+ "There shall be no refund for the completed policy year elapsed.\n",
1316
+ "3) [source:b0a34a7d-f5a1-4777-93aa-c59269013de5 page:24] all claims made in the aggregate during each policy year. \n",
1317
+ " \n",
1318
+ "2.21 Grace Period means the specified period of time, immediately following the premium due date during which premium \n",
1319
+ "payment can be made to renew or continue a policy in force without loss of continuity benefits pertaining to Waiting Periods \n",
1320
+ "and coverage of Pre-Existing Diseases. The Grace Period for payment of the premium shall be thirty days. \n",
1321
+ "Coverage shall not be available during the period for which no premium is received. \n",
1322
+ " \n",
1323
+ "2.22 Hospital means any institution established for in-patient care and day care treatment of disease/ injuries and which has been \n",
1324
+ "registered as a hospital with the local authorities under the Clinical Establishments (Registration and Regulation) Act, 2010 or\n"
1325
+ ]
1326
+ }
1327
+ ],
1328
+ "source": [
1329
+ "# Step 3 — Evaluate with LLM\n",
1330
+ "result = evaluate_with_llm(query, top_hits)"
1331
+ ]
1332
+ },
1333
+ {
1334
+ "cell_type": "code",
1335
+ "execution_count": 73,
1336
+ "id": "40c7075b",
1337
+ "metadata": {},
1338
+ "outputs": [
1339
+ {
1340
+ "data": {
1341
+ "text/plain": [
1342
+ "'The grace period for premium payment under the National Parivar Mediclaim Plus Policy is thirty days. However, coverage is not available during this period if no premium is received.'"
1343
+ ]
1344
+ },
1345
+ "execution_count": 73,
1346
+ "metadata": {},
1347
+ "output_type": "execute_result"
1348
+ }
1349
+ ],
1350
+ "source": [
1351
+ "result.answer"
1352
+ ]
1353
+ },
1354
+ {
1355
+ "cell_type": "code",
1356
+ "execution_count": 82,
1357
+ "id": "46ff44ac",
1358
+ "metadata": {},
1359
+ "outputs": [
1360
+ {
1361
+ "name": "stderr",
1362
+ "output_type": "stream",
1363
+ "text": [
1364
+ "C:\\Users\\hp\\AppData\\Local\\Temp\\ipykernel_9600\\3651844483.py:1: PydanticDeprecatedSince20: The `__fields__` attribute is deprecated, use `model_fields` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/\n",
1365
+ " result.evidence[0].__fields__\n"
1366
+ ]
1367
+ },
1368
+ {
1369
+ "data": {
1370
+ "text/plain": [
1371
+ "{'doc_id': FieldInfo(annotation=str, required=True),\n",
1372
+ " 'page': FieldInfo(annotation=int, required=True),\n",
1373
+ " 'chunk_id': FieldInfo(annotation=str, required=True),\n",
1374
+ " 'text': FieldInfo(annotation=str, required=True),\n",
1375
+ " 'metadata': FieldInfo(annotation=Dict[str, Any], required=True),\n",
1376
+ " 'score': FieldInfo(annotation=float, required=True),\n",
1377
+ " 'boost': FieldInfo(annotation=Union[float, NoneType], required=False, default=None),\n",
1378
+ " 'combined_score': FieldInfo(annotation=Union[float, NoneType], required=False, default=None)}"
1379
+ ]
1380
+ },
1381
+ "execution_count": 82,
1382
+ "metadata": {},
1383
+ "output_type": "execute_result"
1384
+ }
1385
+ ],
1386
+ "source": [
1387
+ "result.evidence[0].__fields__"
1388
+ ]
1389
+ }
1390
+ ],
1391
+ "metadata": {
1392
+ "kernelspec": {
1393
+ "display_name": "rag-app",
1394
+ "language": "python",
1395
+ "name": "python3"
1396
+ },
1397
+ "language_info": {
1398
+ "codemirror_mode": {
1399
+ "name": "ipython",
1400
+ "version": 3
1401
+ },
1402
+ "file_extension": ".py",
1403
+ "mimetype": "text/x-python",
1404
+ "name": "python",
1405
+ "nbconvert_exporter": "python",
1406
+ "pygments_lexer": "ipython3",
1407
+ "version": "3.12.4"
1408
+ }
1409
+ },
1410
+ "nbformat": 4,
1411
+ "nbformat_minor": 5
1412
+ }
main.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.utils.model_loader import ModelLoader
2
+ from app.ingestion.file_loader import load_documents_form_url
3
+ from app.ingestion.text_splitter import text_splitting
4
+ from app.reseasoning.query_parser import parsing_query
5
+ from app.reseasoning.descision_maker import evaluate_with_llm
6
+ from app.retrieval.retriever import retrieval_from_pinecone_vectoreStore
7
+ from app.schemas.request_models import QuerySpec,LogicResult, ClauseHit, HackRxRunRequest
8
+ from app.schemas.response_models import APIResponse
9
+ from fastapi import FastAPI, Header, HTTPException, Depends
10
+ from fastapi.middleware.cors import CORSMiddleware
11
+ from typing import Optional
12
+ from app.embedding.embeder import get_query_embedding
13
+ from app.embedding.vectore_store import create_vectore_store
14
+ from app.ingestion.file_loader import load_documents_form_url
15
+ from app.ingestion.text_splitter import text_splitting
16
+
17
+ app = FastAPI(title="RAG app")
18
+ app.add_middleware(
19
+ CORSMiddleware,
20
+ allow_origins=["*"],
21
+ allow_credentials=True,
22
+ allow_methods=["*"],
23
+ allow_headers=["*"],
24
+ )
25
+ def verify_bearer_token(authorization: Optional[str] = Header(None)):
26
+ expected_token = "cc13b8bb7f4bc1570c8a39bda8c9d4c34b2be6b8abe1044c89abf49b28cee3f8"
27
+ if authorization is None or not authorization.startswith("Bearer "):
28
+ raise HTTPException(status_code=401, detail="Missing or invalid Authorization header")
29
+ token = authorization.split("Bearer ")[1]
30
+ if token != expected_token:
31
+ raise HTTPException(status_code=403, detail="Invalid token")
32
+ return True
33
+
34
+ @app.post("/api/v1/hackrx/run", response_model=APIResponse)
35
+ async def run_hackrx(request: HackRxRunRequest , authorized: bool = Depends(verify_bearer_token)):
36
+ ## model initialisation
37
+ # LLM
38
+ model_loader = ModelLoader(model_loader= "gemini")
39
+ llm = model_loader.load_llm()
40
+
41
+ # Embedding model
42
+ model_loader = ModelLoader(model_provider="openai")
43
+ embedding_model = model_loader.load_llm()
44
+ print("LLMs are loaded!!")
45
+
46
+ answers = []
47
+
48
+ # file loading
49
+ document_url = request.documents
50
+ pdf_doc = load_documents_form_url(document_url)
51
+ print("file has been loaded")
52
+
53
+ ## splitting into chunks
54
+ chunks = text_splitting(doc_content=pdf_doc)
55
+ print("Chunks have been splitted")
56
+
57
+ ## creating a vectore store
58
+ index, namespace = create_vectore_store(text_chunks=chunks, embedding_model=embedding_model)
59
+ print("Index is created")
60
+ for question in request.questions:
61
+
62
+ #1. parsing query
63
+ parsed_query = parsing_query(query=question, llm = llm)
64
+ print("Query Parsed")
65
+
66
+ #2 emebed the query
67
+ query_embedding, expansions = get_query_embedding(query_spec = parsed_query, embedding_model=embedding_model)
68
+ print("Query Embedded")
69
+ # 3.Retrieve
70
+ top_hits = retrieval_from_pinecone_vectoreStore(pinecone_index=index, embeddings = query_embedding , top_k=3,namespace=namespace)
71
+ print("Documents retrieved!")
72
+ # 4. evaluate with llm
73
+ result = evaluate_with_llm(raw_query=question, top_clauses=top_hits, llm = llm)
74
+ print("Answer created!")
75
+ answers.append(result.answer)
76
+
77
+ print("Answers are appended!")
78
+ print(answers)
79
+ # index.delete(delete_all=True,namespace=namespace)
80
+ # print("index is deleted!!")
81
+ return APIResponse(answers=answers)
82
+
83
+
84
+
85
+
86
+
87
+
88
+
pyproject.toml ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "rag-app"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = "==3.12.4"
7
+ dependencies = [
8
+ "acres==0.5.0",
9
+ "aiohappyeyeballs==2.6.1",
10
+ "aiohttp==3.12.15",
11
+ "aiohttp-retry==2.9.1",
12
+ "aiosignal==1.4.0",
13
+ "annotated-types==0.7.0",
14
+ "anyio==4.10.0",
15
+ "asttokens==3.0.0",
16
+ "attrs==25.3.0",
17
+ "beautifulsoup4==4.13.4",
18
+ "cachetools==5.5.2",
19
+ "certifi==2025.8.3",
20
+ "cffi==1.17.1",
21
+ "charset-normalizer==3.4.3",
22
+ "ci-info==0.3.0",
23
+ "click==8.2.1",
24
+ "colorama==0.4.6",
25
+ "colorclass==2.2.2",
26
+ "comm==0.2.3",
27
+ "compressed-rtf==1.0.7",
28
+ "configobj==5.0.9",
29
+ "configparser==7.2.0",
30
+ "cryptography==45.0.6",
31
+ "dataclasses-json==0.6.7",
32
+ "debugpy==1.8.16",
33
+ "decorator==5.2.1",
34
+ "distro==1.9.0",
35
+ "docx==0.2.4",
36
+ "easygui==0.98.3",
37
+ "ebcdic==1.1.1",
38
+ "etelemetry==0.3.1",
39
+ "executing==2.2.0",
40
+ "extract-msg==0.55.0",
41
+ "fastapi==0.116.1",
42
+ "filelock==3.18.0",
43
+ "filetype==1.2.0",
44
+ "frozenlist==1.7.0",
45
+ "google-ai-generativelanguage==0.6.18",
46
+ "google-api-core==2.25.1",
47
+ "google-auth==2.40.3",
48
+ "googleapis-common-protos==1.70.0",
49
+ "greenlet==3.2.4",
50
+ "groq==0.31.0",
51
+ "grpcio==1.74.0",
52
+ "grpcio-status==1.74.0",
53
+ "h11==0.16.0",
54
+ "httpcore==1.0.9",
55
+ "httplib2==0.22.0",
56
+ "httpx==0.28.1",
57
+ "httpx-sse==0.4.1",
58
+ "idna==3.10",
59
+ "iniconfig==2.1.0",
60
+ "ipykernel==6.30.1",
61
+ "ipython==9.4.0",
62
+ "ipython-pygments-lexers==1.1.1",
63
+ "jedi==0.19.2",
64
+ "jiter==0.10.0",
65
+ "joblib==1.5.1",
66
+ "jsonpatch==1.33",
67
+ "jsonpointer==3.0.0",
68
+ "jupyter-client==8.6.3",
69
+ "jupyter-core==5.8.1",
70
+ "langchain==0.3.27",
71
+ "langchain-community==0.3.27",
72
+ "langchain-core==0.3.74",
73
+ "langchain-google-genai==2.1.9",
74
+ "langchain-groq==0.3.7",
75
+ "langchain-openai==0.3.29",
76
+ "langchain-pinecone==0.2.11",
77
+ "langchain-tests==0.3.20",
78
+ "langchain-text-splitters==0.3.9",
79
+ "langsmith==0.4.13",
80
+ "lark==1.1.9",
81
+ "looseversion==1.3.0",
82
+ "lxml==6.0.0",
83
+ "markdown-it-py==4.0.0",
84
+ "marshmallow==3.26.1",
85
+ "matplotlib-inline==0.1.7",
86
+ "mdurl==0.1.2",
87
+ "msoffcrypto-tool==5.4.2",
88
+ "multidict==6.6.4",
89
+ "mypy-extensions==1.1.0",
90
+ "nest-asyncio==1.6.0",
91
+ "networkx==3.5",
92
+ "nibabel==5.3.2",
93
+ "nipype==1.10.0",
94
+ "numpy==2.3.2",
95
+ "olefile==0.47",
96
+ "oletools==0.60.2",
97
+ "openai==1.99.7",
98
+ "orjson==3.11.1",
99
+ "packaging==24.2",
100
+ "pandas==2.3.1",
101
+ "parso==0.8.4",
102
+ "pathlib==1.0.1",
103
+ "pcodedmp==1.2.6",
104
+ "pillow==11.3.0",
105
+ "pinecone==7.3.0",
106
+ "pinecone-plugin-assistant==1.7.0",
107
+ "pinecone-plugin-interface==0.0.7",
108
+ "platformdirs==4.3.8",
109
+ "pluggy==1.6.0",
110
+ "prompt-toolkit==3.0.51",
111
+ "propcache==0.3.2",
112
+ "proto-plus==1.26.1",
113
+ "protobuf==6.31.1",
114
+ "prov==2.1.1",
115
+ "psutil==7.0.0",
116
+ "pure-eval==0.2.3",
117
+ "puremagic==1.30",
118
+ "py-cpuinfo==9.0.0",
119
+ "pyasn1==0.6.1",
120
+ "pyasn1-modules==0.4.2",
121
+ "pycparser==2.22",
122
+ "pydantic==2.11.7",
123
+ "pydantic-core==2.33.2",
124
+ "pydantic-settings==2.10.1",
125
+ "pydot==4.0.1",
126
+ "pygments==2.19.2",
127
+ "pymupdf==1.26.3",
128
+ "pyparsing==3.2.3",
129
+ "pytest==8.4.1",
130
+ "pytest-asyncio==0.26.0",
131
+ "pytest-benchmark==5.1.0",
132
+ "pytest-codspeed==4.0.0",
133
+ "pytest-recording==0.13.4",
134
+ "pytest-socket==0.7.0",
135
+ "python-dateutil==2.9.0.post0",
136
+ "python-dotenv==1.1.1",
137
+ "pytz==2025.2",
138
+ "pyxnat==1.6.3",
139
+ "pyyaml==6.0.2",
140
+ "pyzmq==27.0.1",
141
+ "rdflib==7.1.4",
142
+ "red-black-tree-mod==1.22",
143
+ "regex==2025.7.34",
144
+ "requests==2.32.4",
145
+ "requests-toolbelt==1.0.0",
146
+ "rich==14.1.0",
147
+ "rsa==4.9.1",
148
+ "rtfde==0.1.2.1",
149
+ "scikit-learn==1.7.1",
150
+ "scipy==1.16.1",
151
+ "simplejson==3.20.1",
152
+ "six==1.17.0",
153
+ "sniffio==1.3.1",
154
+ "soupsieve==2.7",
155
+ "sqlalchemy==2.0.42",
156
+ "stack-data==0.6.3",
157
+ "starlette==0.47.2",
158
+ "syrupy==4.9.1",
159
+ "tenacity==9.1.2",
160
+ "threadpoolctl==3.6.0",
161
+ "tiktoken==0.11.0",
162
+ "tornado==6.5.2",
163
+ "tqdm==4.67.1",
164
+ "traitlets==5.14.3",
165
+ "traits==7.0.2",
166
+ "typing-extensions==4.14.1",
167
+ "typing-inspect==0.9.0",
168
+ "typing-inspection==0.4.1",
169
+ "tzdata==2025.2",
170
+ "tzlocal==5.3.1",
171
+ "urllib3<2",
172
+ "uvicorn>=0.35.0",
173
+ "vcrpy==7.0.0",
174
+ "wcwidth==0.2.13",
175
+ "win-unicode-console==0.5",
176
+ "wrapt==1.17.2",
177
+ "yarl==1.20.1",
178
+ "zstandard==0.23.0",
179
+ ]
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ langchain-community
3
+ langchain-groq
4
+ langchain-openai
5
+ pymupdf
6
+ pinecone
7
+ langchain-pinecone
8
+ fastapi
9
+ langchain-google-genai
10
+ pydantic
11
+ uvicorn
test_query.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+
4
+ # Test data
5
+ test_data = {
6
+ "documents": "https://hackrx.blob.core.windows.net/assets/policy.pdf?sv=2023-01-03&st=2025-07-04T09%3A11%3A24Z&se=2027-07-05T09%3A11%3A00Z&sr=b&sp=r&sig=N4a9OU0w0QXO6AOIBiu4bpl7AXvEZogeT%2FjUHNO7HzQ%3D",
7
+ "questions": [
8
+ "What is covered for room rent and ICU charges?"
9
+ ]
10
+ }
11
+
12
+ # Headers
13
+ headers = {
14
+ "Authorization": "Bearer cc13b8bb7f4bc1570c8a39bda8c9d4c34b2be6b8abe1044c89abf49b28cee3f8",
15
+ "Content-Type": "application/json"
16
+ }
17
+
18
+ # Make the request
19
+ response = requests.post(
20
+ "http://127.0.0.1:8000/api/v1/hackrx/run",
21
+ headers=headers,
22
+ data=json.dumps(test_data)
23
+ )
24
+
25
+ # Print the response
26
+ print(f"Status Code: {response.status_code}")
27
+ print(f"Response: {response.text}")
uv.lock ADDED
The diff for this file is too large to render. See raw diff