{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "7896ff7a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'c:\\\\code\\\\Bajaj HackRx\\\\Rag_app'" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%pwd" ] }, { "cell_type": "code", "execution_count": 5, "id": "8638c1e6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Created: c:\\code\\Bajaj HackRx\\Rag_app\\app\\__init__.py\n", "Created: c:\\code\\Bajaj HackRx\\Rag_app\\app\\config\\__init__.py\n", "Created: c:\\code\\Bajaj HackRx\\Rag_app\\app\\embedding\\__init__.py\n", "Created: c:\\code\\Bajaj HackRx\\Rag_app\\app\\ingestion\\__init__.py\n", "Created: c:\\code\\Bajaj HackRx\\Rag_app\\app\\reseasoning\\__init__.py\n", "Created: c:\\code\\Bajaj HackRx\\Rag_app\\app\\retrieval\\__init__.py\n", "Created: c:\\code\\Bajaj HackRx\\Rag_app\\app\\schemas\\__init__.py\n", "Created: c:\\code\\Bajaj HackRx\\Rag_app\\app\\utils\\__init__.py\n" ] } ], "source": [ "import os \n", "\n", "for directories in os.walk(\"c:\\\\code\\\\Bajaj HackRx\\\\Rag_app\\\\app\"):\n", " init_path = os.path.join(directories[0], '__init__.py')\n", " if not os.path.exists(init_path):\n", " with open(init_path, 'w') as init_file:\n", " init_file.write(\"init file\")\n", " print(f\"Created: {init_path}\")" ] }, { "cell_type": "markdown", "id": "4fedace2", "metadata": {}, "source": [ "## 1. Input document\n", "### Input Requirements:\n", "\n", "- Process PDFs, DOCX, and email documents\n", "- Handle policy/contract data efficiently\n", "- Parse natural language queries" ] }, { "cell_type": "code", "execution_count": 7, "id": "d47f278d", "metadata": {}, "outputs": [], "source": [ "import fitz\n", "from langchain_core.documents import Document\n", "from langchain_groq import ChatGroq\n", "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", "from langchain.schema.messages import HumanMessage\n", "from langchain_community.vectorstores import FAISS\n", "import os \n", "from langchain.prompts import PromptTemplate\n", "from sklearn.metrics.pairwise import cosine_similarity\n", "from dotenv import load_dotenv\n", "load_dotenv()\n", "import pymupdf" ] }, { "cell_type": "code", "execution_count": 2, "id": "b7a58fc9", "metadata": {}, "outputs": [], "source": [ "api_key= os.getenv(\"GEMINI_API_KEY\")" ] }, { "cell_type": "code", "execution_count": 3, "id": "6d065c7c", "metadata": {}, "outputs": [], "source": [ "from langchain_google_genai import GoogleGenerativeAIEmbeddings\n", "\n", "embeddings = GoogleGenerativeAIEmbeddings(model = \"models/gemini-embedding-001\",google_api_key = api_key)\n", "vector = embeddings.embed_query(\"hello, world\")" ] }, { "cell_type": "code", "execution_count": 9, "id": "d0706163", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[-0.02842607907950878,\n", " 0.004132709465920925,\n", " 0.010386144742369652,\n", " -0.09004563093185425,\n", " -0.0044305226765573025]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vector[:5]" ] }, { "cell_type": "code", "execution_count": 5, "id": "01d64928", "metadata": {}, "outputs": [], "source": [ "import requests\n", "url = \"https://hackrx.blob.core.windows.net/assets/policy.pdf?sv=2023-01-03&st=2025-07-04T09%3A11%3A24Z&se=2027-07-05T09%3A11%3A00Z&sr=b&sp=r&sig=N4a9OU0w0QXO6AOIBiu4bpl7AXvEZogeT%2FjUHNO7HzQ%3D\"\n", "response = requests.get(url)" ] }, { "cell_type": "code", "execution_count": 94, "id": "80cf7260", "metadata": {}, "outputs": [], "source": [ "import requests\n", "url = \"https://docs.google.com/document/d/13pujQKEZS37mEHEfWDnaqb2FlvDnDwzkuJX88Y9w9EA/edit?usp=sharing\"\n", "response = requests.get(url)" ] }, { "cell_type": "code", "execution_count": 95, "id": "56afd5c0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'text/html; charset=utf-8'" ] }, "execution_count": 95, "metadata": {}, "output_type": "execute_result" } ], "source": [ "response.headers['Content-Type']" ] }, { "cell_type": "code", "execution_count": 8, "id": "4d3fe1fb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "157" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "response.raise_for_status()\n", "pdf_bytes = response.content\n", "doc = pymupdf.open(stream=pdf_bytes, filetype=\"pdf\")\n", "text = \"\"\n", "splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)\n", "pages = 0\n", "from uuid import uuid4\n", "uuid = str(uuid4())\n", "for i,page in enumerate(doc): \n", " text += page.get_text()\n", " uuid = str(uuid4())\n", " if text.strip():\n", " temp_doc = Document(page_content = text, metadata={\n", " \"doc_id\": uuid,\n", " \"page\":i,\n", " \"chunk_id\": f\"{uuid}_p{i}\",\n", " \"type\":\"text\"\n", " })\n", " text_chunks = splitter.split_documents([temp_doc])\n", "\n", "len(text_chunks)" ] }, { "cell_type": "code", "execution_count": 9, "id": "08cfbca7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "list" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(text_chunks)" ] }, { "cell_type": "code", "execution_count": null, "id": "c8f47031", "metadata": {}, "outputs": [], "source": [ "splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)\n", "pages = 0\n", "from uuid import uuid4\n", "uuid = str(uuid4())\n", "for i,page in enumerate(doc): \n", " text += page.get_text()\n", " uuid = str(uuid4())\n", " if text.strip():\n", " temp_doc = Document(page_content = text, metadata={\n", " \"doc_id\": uuid,\n", " \"page\":i,\n", " \"chunk_id\": f\"{uuid}_p{i}\",\n", " \"type\":\"text\"\n", " })\n", " text_chunks = splitter.split_documents([temp_doc])\n", "\n", "len(text_chunks)" ] }, { "cell_type": "code", "execution_count": 5, "id": "6fe8d9ab", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "157" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pages = 0\n", "from uuid import uuid4\n", "uuid = str(uuid4())\n", "for i,page in enumerate(doc): \n", " text += page.get_text()\n", " uuid = str(uuid4())\n", " if text.strip():\n", " temp_doc = Document(page_content = text, metadata={\n", " \"doc_id\": uuid,\n", " \"page\":i,\n", " \"chunk_id\": f\"{uuid}_p{i}\",\n", " \"type\":\"text\"\n", " })\n", " text_chunks = splitter.split_documents([temp_doc])\n", "\n", "len(text_chunks)" ] }, { "cell_type": "code", "execution_count": 124, "id": "b7456368", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "National Insurance Co. Ltd. \n", "Premises No. 18-0374, Plot no. CBD-81, \n", "New Town, Kolkata - 700156 \n", "Page 1 of 25 \n", "National Parivar Mediclaim Plus Policy \n", "UIN: NICHLIP25039V032425 \n", " \n", "National Insurance Company Limited \n", " \n", " \n", " \n", " \n", " \n", "CIN - U10200WB1906GOI001713 \n", "IRDAI Regn. No. – 58 \n", " \n", " Issuing Office \n", "National Parivar Mediclaim Plus Policy \n", " \n", "Whereas the Proposer designated in the schedule hereto has by a Proposal together with Declaration, which shall be the basis of \n", "this contract and is deemed to be incorporated herein, has applied to National Insurance Company Ltd. (hereinafter called the \n", "Company), for the insurance hereinafter set forth, in respect of person(s)/ family members named in the schedule hereto\n" ] } ], "source": [ "print(text_chunks[0].page_content)" ] }, { "cell_type": "code", "execution_count": 125, "id": "84e3b7e6", "metadata": {}, "outputs": [], "source": [ "from uuid import uuid4\n", "uuids = [str(uuid4()) for _ in range(len(text_chunks)) ]" ] }, { "cell_type": "markdown", "id": "a7b3a0a7", "metadata": {}, "source": [ "### Setting up Pinecone Vectore Store" ] }, { "cell_type": "code", "execution_count": 1, "id": "6a98c3e3", "metadata": {}, "outputs": [], "source": [ "import os\n", "from dotenv import load_dotenv\n", "load_dotenv()\n", "pinecone_key = os.getenv(\"PINECONE_API_KEY\")\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "e9bd7561", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from pinecone import Pinecone\n", "pc = Pinecone(api_key=pinecone_key)\n", "pc\n" ] }, { "cell_type": "code", "execution_count": 105, "id": "07746b7f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2025-08-13-16-36\n" ] } ], "source": [ "from pinecone import ServerlessSpec\n", "from datetime import datetime\n", "current_time = datetime.now()\n", "time_string = current_time.strftime(\"%Y-%m-%d-%H-%M\")\n", "print(time_string)\n", "index_name = f\"hackrx-index{time_string}\"\n", "# index_name = \"hackrx-index\"\n", "if not pc.has_index(index_name):\n", " pc.create_index(\n", " name = index_name,\n", " dimension=1536,\n", " metric=\"cosine\",\n", " spec = ServerlessSpec(cloud=\"aws\", region=\"us-east-1\")\n", " )\n", "\n", "index = pc.Index(index_name)" ] }, { "cell_type": "code", "execution_count": 4, "id": "e6af117d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "index" ] }, { "cell_type": "code", "execution_count": 109, "id": "7ee7c02b", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\hp\\AppData\\Local\\Temp\\ipykernel_9600\\2571001968.py:7: LangChainDeprecationWarning: The class `OpenAIEmbeddings` was deprecated in LangChain 0.0.9 and will be removed in 1.0. An updated version of the class exists in the :class:`~langchain-openai package and should be used instead. To use it run `pip install -U :class:`~langchain-openai` and import as `from :class:`~langchain_openai import OpenAIEmbeddings``.\n", " embeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")\n" ] } ], "source": [ "# from langchain_openai import \n", "from langchain.embeddings import OpenAIEmbeddings\n", "\n", "from dotenv import load_dotenv\n", "load_dotenv()\n", "os.environ[\"OPENAI_API_KEY\"] = os.getenv(\"OPENAI_API_KEY\")\n", "embeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")" ] }, { "cell_type": "code", "execution_count": 6, "id": "43151b5f", "metadata": {}, "outputs": [], "source": [ "from langchain_pinecone import PineconeVectorStore\n", "vector_store = PineconeVectorStore(index = index, embedding=embeddings)" ] }, { "cell_type": "code", "execution_count": 133, "id": "03fb29a9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['174168c5-ee26-4f4a-9aac-82c890cae977',\n", " 'cd33ff12-209e-4d3b-9ac9-cfffbb2f160f',\n", " '8f7cf61f-8b6a-4412-a45e-90614b04e4bf',\n", " '8696beaa-fa55-4ac6-9000-0f2be8f1a39b',\n", " 'f89f37a0-5382-4202-a320-c3374ca97470',\n", " '38e86c9f-a9a1-49a1-b1b6-0e0fd9242012',\n", " '9239c5e9-493e-45cc-b621-962c7cdd18ff',\n", " '1e3c9771-f5f2-45fd-b979-a0851d6285ff',\n", " '54a9e37f-84d9-4ea5-8a47-be7a500d94b6',\n", " '2ed09d29-6900-44d8-83e4-d503a5dcde15',\n", " '8103e53c-1a21-47aa-9e23-0a817ddbdca5',\n", " '0e879567-5c16-42b1-bd9c-bf919ef5b394',\n", " 'fe2003c6-8409-4045-acda-1f1c6c6a6699',\n", " '516acaa0-5a4d-4b0e-ac3c-5f03db8084b7',\n", " '4bca0e17-ca5a-43bf-b0c9-6f2572bd2f5f',\n", " '09a0a182-ee47-4569-a77e-b4a8adb82682',\n", " '3239e274-c789-410f-845e-791c0c4b6b03',\n", " 'fe825c23-84d9-4ad2-a853-74f8a1d02928',\n", " 'e5404438-f6ec-4af5-8a5c-4fa884e7022f',\n", " 'c9aeb34b-b556-46cf-b762-56ea338653e8',\n", " '9b6d478d-32fa-4ffd-96ca-95822c255cf1',\n", " '6652b6b8-1da2-4b15-9aea-26f79bc0a6d5',\n", " 'e8d57256-a599-4f9e-93e8-392334098dcb',\n", " '5491919b-0aa5-4080-9ee5-c319bc53749b',\n", " 'a00fec0d-79bc-47be-97ea-dbdcfefb828f',\n", " '71645e15-9440-40f2-b982-0376da83d0e3',\n", " '6df619c8-ef76-44de-b74a-2116148d89aa',\n", " 'f8151f79-56be-4fed-b364-b22ea4f2a2d0',\n", " '739c7bc6-0f9d-45ce-9bf0-db7c7dbc939b',\n", " '791401f5-46fe-4a9c-992b-da0135d72ae8',\n", " '1e67b489-9e3f-4e48-8807-d694963ede5b',\n", " '8e3892a6-7e26-4ecb-b8d0-c6a48126dbca',\n", " 'ffbcd2e2-8eb3-4e1b-997d-f0b8f004b95f',\n", " 'ac57e24d-8641-46d4-93aa-596664ed5a63',\n", " 'fc31c598-4716-4312-9091-75d9c879cac5',\n", " 'b08141e7-0519-4e92-bf5c-56849bc45c7f',\n", " '2e7a4727-f118-461d-a4aa-7f223ffeffcb',\n", " '1dcdf7e5-9d2f-4360-b9a9-ea00b9398fb4',\n", " '716f55ba-b624-43ae-98ce-0f77b063f17f',\n", " 'a42dbb78-85dc-4ee6-9b9c-c7a77093bae8',\n", " '57d5d937-2fef-4aae-86bb-6002523fa30f',\n", " '3400762f-1110-4e5f-a726-4f3edea01f02',\n", " '2d5b5f30-63d6-4399-8e69-806697e64307',\n", " '8feb1525-189a-45c1-ac23-0b33ba4994f3',\n", " '0a503b10-b5da-4cb5-a1b8-bd6f0f34ce64',\n", " '725010be-ebc8-45ee-be3e-2af2a8d1a5ee',\n", " '52016839-2213-4db2-b397-a51e7bca1882',\n", " '661398e1-eff3-4e1a-9ff4-b738bd2ce606',\n", " '5a37f469-e018-4866-9224-30734dda0161',\n", " '8028ec15-908a-4e98-8756-bff08d3ae96f',\n", " 'd8541648-8d20-4f79-a5bb-4160106b0795',\n", " '57aa5fb9-a383-4174-b88c-fd26e0c2e1e9',\n", " '8d539cf3-12bd-4844-b306-9ead1cb12540',\n", " '30e4322e-cf5e-4cd7-a56d-5529391d1b48',\n", " '81f18135-fb7e-49d5-b1ce-1c833246b7e6',\n", " 'c8f70828-1fc6-40cf-acd8-f7910841a3f6',\n", " '2ea87054-e3cb-436b-a418-e7dde7b66488',\n", " 'ec8797e6-1e89-403d-8f4b-a7528b856a9a',\n", " '9ec0c175-c01a-4713-8042-a9dc5057cd6e',\n", " 'df1dcb67-07a8-49d6-8bfa-2a0b857907ca',\n", " 'ce065535-e2ca-45f4-b278-96d46356f511',\n", " '161f51df-b3ca-4c54-b686-c8b68543da7e',\n", " '9528a8fc-45e0-43bc-9933-20a0e5d25a91',\n", " '6c3840f5-8d8f-4ed2-bf17-f82a88f8cfc4',\n", " '46a9b7b4-c460-4ae4-afa7-6bb8478f6072',\n", " '76df9287-461a-4cc8-9f34-7420f44ff0bf',\n", " '80265ffa-ef7c-4721-956b-6010745f577c',\n", " 'd09fc759-c639-4f5c-9a01-5f0f3bc359af',\n", " '78b04c1b-38ed-493a-b04c-aebb454c87e0',\n", " 'b6e68d75-11fe-428c-898f-56c58a6f2206',\n", " '45255213-2a89-428e-a97a-371a1f78bfdd',\n", " '516b872e-3c3c-442d-8be3-254da689bc13',\n", " 'e16f0153-1bd8-45a0-af2b-35d581e23e66',\n", " 'c36378f3-8839-48ba-ac46-86916439de77',\n", " '82261df7-bbbc-47d6-9124-386e12f28c63',\n", " 'ebd9c5d0-9bec-4901-946d-d663d1e66e89',\n", " 'd33ef9e8-db28-4f12-9e1e-45041b0b0193',\n", " '867171b7-f18f-451b-8fe0-879fedc98c11',\n", " 'fcbed948-b63d-4462-adc5-e3d48cd3f27c',\n", " 'c21c5ccd-c110-4790-8a83-6a6f1d6d5bab',\n", " '38d43887-22f9-4d11-b94d-dc96de2f3313',\n", " 'fdfa031f-8160-4623-8b9a-1e6d2ad9ac0b',\n", " 'd51bbdba-7403-4a33-8c3b-e98931fc41a9',\n", " 'a11eca40-2a42-4213-9996-d7d4242f41f3',\n", " '1e159458-4db8-407f-9729-3b79f57870b2',\n", " 'dc3582df-3f5b-4865-b9b3-8122455b22d8',\n", " 'd51ea955-c3f9-4b68-a4d8-6f36f0087e90',\n", " '8c0b67e8-1355-489f-bdbb-e16eae04a585',\n", " '7c35e509-23d3-4d80-9863-3172bc1c7963',\n", " '94b2eb43-5a75-4e1f-a9ff-58029cf50431',\n", " '9e885f6e-67d6-4fc7-9fc0-25c2443628d6',\n", " 'b3cd4d17-6feb-417a-a34d-4fab409e22b3',\n", " 'bead51f3-b31f-47b1-b290-1616a3ed8008',\n", " '1e197e22-0230-4c09-80ae-c7d63433ac64',\n", " '68395eb4-67a4-4262-af1c-9961c787f8cc',\n", " '84c5b8dd-098b-40e4-b86f-c341dcf8395d',\n", " '1361b7ee-849f-4d68-b3c3-42e9103ec6fa',\n", " '4fbe5da0-6f3e-42f3-b99e-6081f44e0cd8',\n", " 'ac1975b1-d034-488f-b613-28cf3782d0a8',\n", " '0017d512-f135-4123-b694-665bc6e11e64',\n", " 'f13798aa-3424-4b38-a7bf-f5abe366120f',\n", " '9dfcec61-f8a7-46be-98ae-7f54a5b53e7b',\n", " '44f6beba-fca5-4509-b814-662ddf5fad29',\n", " 'd87aedeb-16f1-4ccf-8728-fbbdfde6310f',\n", " '3eeb99d0-9c2f-4854-befd-3001154ca693',\n", " 'dbb2f8f2-d0a4-4288-8bbf-be24cb25360e',\n", " '0afd4ed7-a742-4230-a3e8-f59b4ed3af0f',\n", " '68cc2545-e9ee-4772-a968-5affe4eb80b7',\n", " '772df9fb-4da6-45a3-9cfc-fb5fec014153',\n", " 'a5bf6d49-afad-48a6-8ec7-d3b3e1c945d4',\n", " '3c4e4cfe-ce5f-48e4-bb2e-7d7080a6d9df',\n", " 'fdd184e8-2c0c-4a0d-aad1-425625c0acd8',\n", " '6059f8f3-bb47-4e8e-82f1-3b3cf42293ce',\n", " '32e22ff0-133e-4f0c-b9f0-d18108eada71',\n", " 'cd3a73dc-b2df-494a-80c1-7edd34655e6b',\n", " '081bb658-b2de-4470-9bf0-599595e32070',\n", " '9c9089be-1648-49ee-a296-4a2b0e629962',\n", " 'f90feeb1-dbb3-45c4-a141-aaffac9e9735',\n", " '931dbe4e-27ae-407c-b0a1-552cfadcf124',\n", " '5d1afa42-297f-475b-bbb3-d82b03b972f8',\n", " '75e2ee6b-9b59-4067-b59d-e12a6a29fb1f',\n", " 'd1f10078-a911-40d3-8def-36e11119d18b',\n", " '7380d7e5-8d51-492e-b6fe-042863fdb84e',\n", " '25d829a5-e681-4605-91b3-84b5589bca85',\n", " 'b901a4ad-11fa-4241-b6c0-2f1ce4ab5913',\n", " 'bcd5b4b2-efa6-4b89-a639-aae6afea19bf',\n", " '4f657335-b7e7-4918-b597-664db98ab9e5',\n", " '6b21048e-5bde-481a-9450-ea92c219741e',\n", " 'e209717c-6925-4cdd-b4a6-26230031d4b3',\n", " '2795137e-f3db-4c3a-a8be-4a4a62e2d83c',\n", " '43660962-d892-4a23-bdac-9825c5e00623',\n", " 'de594872-f941-4575-b7ec-6e66a222ca9d',\n", " '8dd521bd-02d5-44d0-b35b-2bc82c68ca87',\n", " 'de06a779-7d61-4240-a0e2-ac0b559469b1',\n", " '7892dcff-1b29-495e-a2a4-17cef5a7904d',\n", " 'a646b497-4d7a-4f4e-9b5d-4b989da6e26d',\n", " '2d9453bd-381f-4e20-bac4-27edbab64a5b',\n", " '45b2ff28-1a2b-4ac1-b30c-8e19dbd95943',\n", " '1f229b56-0839-495f-8ecb-a281eaaaa452',\n", " '763e5982-0827-41f9-b077-054d13782e69',\n", " 'c48d5c1b-6a70-41f2-8263-ac35244768e7',\n", " '65c4c939-b2a3-4dd8-b9c1-8a4585277859',\n", " 'c0954e09-6856-4a52-be96-059b9ad381d2',\n", " '484985c8-0f45-4289-904c-6143be565287',\n", " 'efedb28c-cc8e-4aad-ae86-e1126dfc960a',\n", " 'c41269f7-6a1a-4122-9326-9d4f08f7fa46',\n", " '8b53ddf1-8f7f-4902-b7f7-8059725ffb2b',\n", " '21ae63cb-649f-4b10-a67d-7f900a4185eb',\n", " '80610c06-6a16-44e7-9a43-dd95fd89e720',\n", " '8af13230-ea5f-425d-a4f0-e37acd8e7242',\n", " '3b8c80fa-a860-4324-9faa-d46848cd62c2',\n", " 'd358125e-6b25-4845-9303-b9f94ee9b1d9',\n", " '6dea7e26-2408-411f-8867-8251fe672319',\n", " '98564a4c-aeda-4556-af00-4ebd23cb407b',\n", " '7553a543-572c-4418-917f-9a6e7e62d155',\n", " 'c44444d0-f3a6-4f0d-b907-666d0b6c0d08',\n", " '12b11de7-032b-404e-9cfe-3d9ba260abd7',\n", " '9ac75c85-77d7-4418-8842-7997895d4400',\n", " 'e5d580eb-7c8c-451b-9e44-00386a72f47c',\n", " '923710d5-ff8f-44ed-97a7-ee40cb69ffba',\n", " 'b2990aa7-2b84-48cb-8abc-ee30719c5c86',\n", " 'f65e8225-3b98-4702-8b46-74b6703407b7',\n", " '992a965a-1c9c-46c7-bfca-4f0c99f33bf6',\n", " '18a1af9f-a788-45c7-9827-d6fb07c283d1',\n", " '5833c1f7-6d17-4308-919c-9c022e4cf98d',\n", " '3541476d-4fd7-4249-ae4f-c86d734001ca',\n", " '5f66b974-37ae-4e8a-a2eb-7d72f0d75d3c',\n", " '400717e9-430e-4bfc-9deb-f019dd5055a9',\n", " '356d2916-f094-48f2-8f5d-4658dd4209a5',\n", " '3de80d7b-42c5-456e-9a1b-7cdb75749df4',\n", " '156e996d-302b-4dbc-a3a5-db7a518f4a4c',\n", " '606c46a9-808c-4d33-bcd4-deac1e3b55a8',\n", " 'd9a44989-531c-4237-838a-8393479da64d',\n", " '2c26cc4c-251c-4d4f-ba17-309692015c4e',\n", " '74d76b64-1674-40c0-8b41-19b1eebe05a7',\n", " '71d03984-c626-4160-9430-528be9fedf59',\n", " 'cd48ebdf-7d40-4e61-87ab-f9eea638a74d',\n", " '8e8b2c73-ce6b-456c-897d-89fbfd75fa26',\n", " '9434a9f5-8909-45fb-a023-a4c61e8d4764',\n", " '7bd524c2-489a-4594-95bc-01b964c2c64d',\n", " 'f8e787dd-ad34-464d-b21f-ed6111c3fa30',\n", " '1a79c0b3-9b64-4809-813b-0480de369971',\n", " '89aa9b61-59e5-4f78-9f94-c145c753625c',\n", " '165c1f92-6a6b-4cce-b088-3a73c8c72c24',\n", " 'b7e1f173-6f42-4f91-a5f5-cdc2289548b4',\n", " '5320958c-6b8d-4445-9aa8-fb8e652198a5',\n", " '8efdbae0-1a07-4278-a657-e05cd1435753',\n", " '71a4c5c4-93a3-4839-acc7-d80bbcd4f774',\n", " '97463e5d-cecf-4cfa-bef5-5d6e9c8f0791',\n", " 'fbbd1d5a-7390-4011-b6c6-701fe5cfc1da',\n", " '09b40129-1ddc-4e93-bb40-c93865f4219e',\n", " '5569a342-0bde-4356-a38e-af426c796693',\n", " 'b09d66b0-245f-440f-a7f4-213bff7ba8ca',\n", " '5f62dabc-c220-44ce-b6f8-04adf37186c6',\n", " '273d9cd4-590d-4ece-9715-1ed201d3b53d',\n", " 'eb5c2c74-0de4-4870-aef4-b65a59fa502d',\n", " '5f7f5339-9919-44aa-b10f-449f16ed5df4',\n", " '3a890f5b-b380-4ab8-bfa9-f67f74dceac3',\n", " 'b47959e7-6038-4733-9e72-beaf98a731e4',\n", " 'bb1e60c2-bf86-4e9b-b0d0-8e1b859ff220',\n", " 'c6afbf5e-857a-48c7-84f2-9c5cb9a1ef00',\n", " '808ddb34-ecc5-4c7e-bb4d-0cd5487f8b84',\n", " '3c120e24-5edf-4a53-99e4-5b929162f849',\n", " '22a911c4-fe04-43e5-867c-c4529501131d',\n", " '0295a993-3a58-4550-a522-66004a6dd0fc',\n", " 'a54e8316-67d4-4ab7-9747-876ac1413eb3',\n", " 'b5068009-bef5-4bce-af38-081905babf3a',\n", " '7d57b14e-fcc6-4bc4-8ed7-9f8616268464',\n", " 'f119914b-dda3-4fab-92ed-ac7d45be674c',\n", " 'a13eb99f-bf84-4918-8308-6aaebfa44522',\n", " '02425f7d-3f2c-4f99-b4b3-dae251f79f9a',\n", " '8c070a7b-b0fe-4cad-9b25-e1f703c08041',\n", " '458439d7-1a8e-4808-b4bf-fe0a75b569a9',\n", " '1db0a9d9-dccd-4347-9cf3-283815f1bc05',\n", " '5ecc0797-280b-450a-b6db-fd8352c855bc',\n", " '85f323ea-ded5-4cea-87d5-e07e951b1fea',\n", " 'd5c87f7b-695b-4f80-8502-5e7116721e67',\n", " 'a2fd76f1-a851-495a-b463-44fc537d62cd',\n", " '737a76b0-d8d8-49a6-b433-f553a8aa2b58',\n", " '5b0db327-3234-468a-ba12-734a481d7e73',\n", " '70c8cf89-29fe-4349-b7d3-b3293267fd10',\n", " '635b04dd-fa10-4dde-bfcb-168cc8a9bd39',\n", " '152b468a-b307-4b8d-b7b3-ee7e152aac78',\n", " '8942236c-0e8c-42c5-8e95-7fcf84e89677',\n", " '2de3d06a-9d37-4e1f-a4ce-45a3e2cdcf08',\n", " '4c1dff30-a002-43e8-b47f-f66855fc13e4',\n", " '9254fd90-3ffc-48f8-b091-b9fe81d3b56d',\n", " 'b4befb2a-8746-4d75-8397-66010d1baf2c',\n", " '97ba14a5-ace8-45b7-b25b-be774aa25410',\n", " '25d27bce-5d31-430c-93c6-2c30025a030c',\n", " 'c15c3ada-bb90-43f3-a41d-f85a832673b8',\n", " '4a353418-e1db-4c6e-ab8e-f3a534b03d2e',\n", " '668dfef9-37da-4619-8a0b-71ba993c7ac3',\n", " 'ba8233d0-1601-4167-886c-f632e3d077bf',\n", " '49cf3301-80b4-46e5-93bd-55712adfae99',\n", " '51ced038-b27b-46d1-96e6-923867ca4774',\n", " 'ac4e9a83-8715-4a7d-b4a3-b4e7b2ecef9e',\n", " '6fe1361f-ede5-4359-90be-d007f6eb03e2',\n", " 'cf49d0cc-a941-486f-88ab-88c93cc3f211',\n", " 'ee21070d-e45e-4f61-ae1c-a412b659035b',\n", " '3744d911-37cc-4645-ba2b-b376718e0afd',\n", " 'efff5713-c6cf-4e9a-b5ce-ccd4e443a8b9',\n", " 'aeae7c83-6a90-40c6-9fa5-65a8488865af',\n", " '3b349ca6-fac5-41c1-b8de-5ee32161f023',\n", " '39703730-8667-432f-aceb-8a569bb7d3c7',\n", " 'd021eee3-ea5f-454e-a5f9-8ff2b354e05f',\n", " '29fab8ce-8c8c-4f76-a59e-00291af854df',\n", " '186e7eef-e3ab-4f05-b6ee-2ae65d871393',\n", " 'c8b02b23-9dc6-43a9-b0ea-fd42533165cf',\n", " '48734804-8b5e-45e8-9576-16f5f3d5da13',\n", " '56f12823-504b-46b1-a239-78ebf589063e',\n", " '5cea0d13-04f6-4b8a-9b71-ec1a2a5eb191',\n", " '147a6f26-3ad6-4929-a074-7e397d3ac134',\n", " '4e725c5a-f457-437b-ae47-d5e544a058d3',\n", " '39b0e4b9-74ea-461f-8375-31a38af1db59',\n", " '2d90a41d-20bc-4f87-adbd-8c5ef5d8f33f',\n", " '0108b65b-f4b5-4839-80bc-b5a2f61c35c6',\n", " '8002651d-7f26-4c99-a60c-d8df30aadd79',\n", " 'bef4b953-c0e3-4822-85e0-94d3a41a2a65',\n", " '6b02df41-38d7-4a11-bb14-a87ef004a191',\n", " '84c9570a-06ee-4edd-b767-dc2ce45603d9',\n", " '71a0f38c-4764-428a-a9a9-f3353fd7c768',\n", " '15e8567b-4f1c-4f80-8ce7-aaf83a489933',\n", " '0dbdae26-1e9d-4b2c-8957-3f0fd056fb6f',\n", " '2888971e-530b-44b5-b38a-ba00b8667439',\n", " 'c99e5ddc-0741-4a24-8cd8-cab974c32dbc',\n", " '5365117a-4e5e-4683-9cf6-5c9015b328c8',\n", " '62194dec-507c-49d8-b809-20c03d5caf0b',\n", " '358b61b1-e962-4443-9240-835bc75146cc',\n", " '10c92ff4-a8cb-483b-ad78-6dc680d50cd8',\n", " 'a6cc759f-2599-4199-a08d-3f3b142af66f',\n", " '035bb626-5d0a-4a43-a9cb-66b2bd940d2c',\n", " '1a1da71a-e9b8-466d-b5e0-b7e10847b857',\n", " '09fe8139-5029-4782-bff9-a0edfce73e2f',\n", " '939af366-34d7-47c5-8937-59f0122df0d4',\n", " 'f74a0d54-0d45-459d-b988-b2c398cacfd5',\n", " 'bbe495a3-7964-4dd0-8c6d-b2a22a8494b2',\n", " '11616f61-808c-43a4-8c38-9de0db923d68',\n", " '65b9f618-b8aa-42d0-b7e4-321207cf81eb',\n", " '7d964d73-3b58-49ab-8a13-fe9add8f015e',\n", " '90ac205f-f322-431d-a097-a7fce1729cb2',\n", " 'ce338839-0430-47a7-b426-87feabab5320',\n", " '09cc0450-5e49-43a2-b7c0-c79afb049eb0',\n", " '80500abe-37ef-4483-a40c-aa1ac451cf95',\n", " '64b77e67-7115-4941-a04b-2ff4771aa71e',\n", " 'cc9a5ffa-2c9c-4096-972a-e5fa9b3cefe9',\n", " 'be907e7c-f601-4119-b68c-35a24eb9acfa',\n", " '79fee5a5-3d8a-46e9-96bc-340ac4a324aa',\n", " 'db7c23df-6888-4b4e-a4b6-58a953c174d3',\n", " 'f54d787e-5dfb-4801-bfed-842cd18fa332',\n", " 'e18688f1-15fc-400d-8ea3-59d313181cb0',\n", " 'f7fba163-e8ea-451f-b1f8-cc70f599f23d',\n", " '7f7f8847-1483-4801-96da-8c789d6ac93a',\n", " 'e356cda5-b603-4e15-9995-e9411bd8f4f4',\n", " 'ab3bdbe6-d223-420d-b66b-d572bf4b14d1',\n", " 'b2e2ed1a-e738-4197-a878-d8762814c860',\n", " 'b0d31fab-a0f8-4978-a3fd-00fc9754f327',\n", " 'af6d58f7-3d4d-4a32-96ba-0376dc945960',\n", " '863b5fd8-fedc-42c9-8a8e-2d07670d676c',\n", " 'dcc803bf-0601-4a82-a90b-3481b7188b73',\n", " 'cefcc7ee-9b1b-4586-983c-fceda1417772',\n", " 'b895c11d-3f5d-49f5-b4d8-9f0f9307ee3e',\n", " '06a62e87-e561-46af-94f3-9657d2a8e0c2',\n", " 'f34de4a4-b5e0-4a7e-81e1-5c7abc066221',\n", " '3ad33f70-faaa-4ae2-8660-bf7972a401e0',\n", " 'b170a51a-cc14-4a3d-9c39-23f18e7405f5',\n", " '4d89bda3-00d8-4db7-a89b-da2b06b29a24',\n", " 'e28be583-da3a-4b82-bc86-e25dde6fb02b',\n", " 'db982e3e-c076-4e06-9ba6-faed32db3527',\n", " '86c5d471-60af-4e96-9cf1-a3b7f5295c47',\n", " '9e010af0-ea9d-40f0-aad3-973c0789768c',\n", " 'e5baba45-762a-42b5-b32d-4f64a4753b27',\n", " '772ee256-aa86-4ae5-925e-acfff2ec76f0',\n", " '0ad3ab0a-0bae-46ab-92d3-caf165cefed5']" ] }, "execution_count": 133, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vector_store.add_documents(documents=text_chunks, ids = uuids)" ] }, { "cell_type": "code", "execution_count": 11, "id": "7b8b7a46", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* Page 16 of 25 \n", "National Parivar Mediclaim Plus Policy \n", "UIN: NICHLIP25039V032425 \n", " \n", "ii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \n", "a) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \n", "made during the policy period. \n", "b) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \n", "such policy years has not commenced and refund proportionate premium for unexpired policy period for the current \n", "policy year. \n", "There shall be no refund for the completed policy year elapsed. [{'chunk_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', 'doc_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5', 'page': 24.0, 'type': 'text'}]\n", "* Page 16 of 25 \n", "National Parivar Mediclaim Plus Policy \n", "UIN: NICHLIP25039V032425 \n", " \n", "ii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \n", "a) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \n", "made during the policy period. \n", "b) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \n", "such policy years has not commenced and refund proportionate premium for unexpired policy period for the current \n", "policy year. \n", "There shall be no refund for the completed policy year elapsed. [{'chunk_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', 'doc_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5', 'page': 24.0, 'type': 'text'}]\n" ] } ], "source": [ "results = vector_store.similarity_search(\n", " \"What is the grace period for premium payment under the National Parivar Mediclaim Plus Policy?\",\n", " k=2\n", ")\n", "for res in results:\n", " print(f\"* {res.page_content} [{res.metadata}]\")" ] }, { "cell_type": "code", "execution_count": 135, "id": "41f27c21", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[Document(id='f13798aa-3424-4b38-a7bf-f5abe366120f', metadata={'chunk_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', 'doc_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5', 'page': 24.0, 'type': 'text'}, page_content='Page 16 of 25 \\nNational Parivar Mediclaim Plus Policy \\nUIN: NICHLIP25039V032425 \\n \\nii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \\na) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \\nmade during the policy period. \\nb) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \\nsuch policy years has not commenced and refund proportionate premium for unexpired policy period for the current \\npolicy year. \\nThere shall be no refund for the completed policy year elapsed.'), Document(id='8002651d-7f26-4c99-a60c-d8df30aadd79', metadata={'chunk_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', 'doc_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5', 'page': 24.0, 'type': 'text'}, page_content='Page 16 of 25 \\nNational Parivar Mediclaim Plus Policy \\nUIN: NICHLIP25039V032425 \\n \\nii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \\na) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \\nmade during the policy period. \\nb) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \\nsuch policy years has not commenced and refund proportionate premium for unexpired policy period for the current \\npolicy year. \\nThere shall be no refund for the completed policy year elapsed.')]\n" ] } ], "source": [ "print(results)" ] }, { "cell_type": "code", "execution_count": 136, "id": "cf7b7568", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* [SIM=0.678520] Page 16 of 25 \n", "National Parivar Mediclaim Plus Policy \n", "UIN: NICHLIP25039V032425 \n", " \n", "ii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \n", "a) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \n", "made during the policy period. \n", "b) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \n", "such policy years has not commenced and refund proportionate premium for unexpired policy period for the current \n", "policy year. \n", "There shall be no refund for the completed policy year elapsed. [{'chunk_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', 'doc_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5', 'page': 24.0, 'type': 'text'}]\n" ] } ], "source": [ "results = vector_store.similarity_search_with_score(\n", " \"What is the grace period for premium payment under the National Parivar Mediclaim Plus Policy?\", k=1\n", ")\n", "for res, score in results:\n", " print(f\"* [SIM={score:3f}] {res.page_content} [{res.metadata}]\")" ] }, { "cell_type": "markdown", "id": "7fde8a22", "metadata": {}, "source": [ "### QUERY PARSING " ] }, { "cell_type": "code", "execution_count": 62, "id": "20452e98", "metadata": {}, "outputs": [], "source": [ "from pydantic import BaseModel, model_validator,field_validator\n", "from typing import List, Dict, Any, Optional\n", "import json\n", "class QuerySpec(BaseModel):\n", " raw_query: str \n", " intent: str \n", " entities: Dict[str, str]\n", " constraints : Dict[str, Any]\n", " answer_type: str \n", " followups: Optional[List[str]] = []\n", "\n", " @model_validator(mode = \"before\")\n", " @classmethod\n", " def parse_nested_json(cls, values):\n", " for field in ['entities', 'constraints']:\n", " val = values.get(field)\n", " if isinstance(val, str):\n", " try:\n", " values[field] = json.loads(val)\n", " except json.JSONDecodeError:\n", " pass\n", " return values\n", "\n", "class ClauseHit(BaseModel):\n", " doc_id : str\n", " page: int\n", " chunk_id: str \n", " text: str \n", " metadata: Dict[str, Any]\n", " score: float \n", " boost: Optional[float] = None\n", " combined_score: Optional[float] = None\n", "\n", " @field_validator(\"metadata\", mode=\"before\")\n", " def parse_metadata(cls, v):\n", " if isinstance(v, str):\n", " try:\n", " return json.loads(v) if v.strip() else {}\n", " except json.JSONDecodeError:\n", " return {}\n", " return v\n", "\n", "class LogicResult(BaseModel):\n", " answer: str\n", " decision: str # \"covered\"/\"not_covered\"/\"conditional\"\n", " confidence: float\n", " evidence: List[ClauseHit]\n", " rationale: str\n", " \n", "\n", "class APIResponse(BaseModel):\n", " query_spec: QuerySpec\n", " logic_result: LogicResult\n", " debug: Optional[Dict[str, Any]] = None\n", "\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 8, "id": "3ac1f99f", "metadata": {}, "outputs": [], "source": [ "user_question = \"What is the grace period for premium payment under the National Parivar Mediclaim Plus Policy?\"" ] }, { "cell_type": "code", "execution_count": 9, "id": "2961e184", "metadata": {}, "outputs": [], "source": [ "PARSER_PROMPT = f\"\"\"You receive a user's question about an insurance/contract document. Produce a JSON with keys:\n", "- intent (one of: coverage_check, definition, limit_query, waiting_period, exclusions, other)\n", "- entities (map of entity_name -> canonical string)\n", "- constraints (map: plan, time_window, eligible_person, numerical_constraints)\n", "- answer_type (one of: yes_no, short_explain, detailed, clause_list)\n", "Return ONLY the JSON.Make sure that nested fields like \"entities\" and \"constraints\" are JSON objects, not strings.\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 10, "id": "a9123e2a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ChatGoogleGenerativeAI(model='models/gemini-2.5-flash', google_api_key=SecretStr('**********'), client=, default_metadata=(), model_kwargs={})" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from langchain_google_genai import ChatGoogleGenerativeAI\n", "api_key = os.getenv(\"GEMINI_API_KEY\")\n", "llm = ChatGoogleGenerativeAI(\n", " model=\"gemini-2.5-flash\",\n", " google_api_key = api_key\n", " \n", " )\n", "llm" ] }, { "cell_type": "code", "execution_count": 30, "id": "290081a1", "metadata": {}, "outputs": [], "source": [ "def parsing_query(query:str) -> QuerySpec:\n", " # Bind the schema to the model\n", " structured_llm = llm.with_structured_output(QuerySpec)\n", "\n", " # Compose the full prompt with instructions and user question\n", " full_prompt = PARSER_PROMPT + \"\\n\" + query\n", "\n", " # Invoke the model to get structured output parsed as QuerySpec\n", " result: QuerySpec = structured_llm.invoke(full_prompt)\n", " return result\n", " # print(result.json()) # This will print the JSON output matching your schema\n" ] }, { "cell_type": "markdown", "id": "b5cecc42", "metadata": {}, "source": [ "## Embedding + Retrieval and Clause Matching" ] }, { "cell_type": "code", "execution_count": 38, "id": "11fdd288", "metadata": {}, "outputs": [], "source": [ "def get_query_embedding(embedding_client, query_spec: QuerySpec):\n", " q = query_spec.raw_query\n", " e_main = embedding_client.embed_query(q)\n", " expansions = []\n", " if \"procedure\" in query_spec.entities:\n", " expansions.append(f\"{q} OR {query_spec.entities['procedure']} procedures related\")\n", " return e_main, expansions\n", "\n", "def retrieval_from_pinecone_vectoreStore(pinecone_index, embeddings, top_k= 3, filter_meta = None):\n", " \"\"\"\n", " Retrieve the top matching chunks from Pinecone.\n", " \n", " Args:\n", " pinecone_index: Your Pinecone index object.\n", " embedding: The vector embedding of the query.\n", " top_k: How many chunks to retrieve.\n", " filter_meta: Optional metadata filter dict.\n", " \n", " Returns:\n", " List of ClauseHit objects (lightweight container for chunk info).\n", " \"\"\"\n", " res = pinecone_index.query(\n", " vector= embeddings,\n", " top_k =top_k ,\n", " include_metadata = True, \n", " include_values = False, \n", " filter = filter_meta \n", " )\n", " hits= []\n", " for match in res['matches']:\n", " hits.append(ClauseHit(\n", " doc_id=match['metadata']['doc_id'],\n", " page=match['metadata'].get('page', -1),\n", " chunk_id=match['metadata'].get('chunk_id', ''),\n", " text=match['metadata']['text'],\n", " metadata=match['metadata'],\n", " score=match['score']\n", " ))\n", " return hits\n", "\n", " " ] }, { "cell_type": "markdown", "id": "9707521f", "metadata": {}, "source": [ "## Logic Evaluation\n", "### Decision processing" ] }, { "cell_type": "code", "execution_count": 74, "id": "74e49132", "metadata": {}, "outputs": [], "source": [ "def evaluate_with_llm(raw_query: str, top_clauses: list):\n", " \"\"\"\n", " Use the LLM to analyze retrieved clauses and return structured decision.\n", " \"\"\"\n", "\n", " # Prepare context for the prompt\n", " context_clauses = []\n", " for i, c in enumerate(top_clauses, 1):\n", " context_clauses.append(f\"{i}) [source:{c.doc_id} page:{c.page}] {c.text}\")\n", " print(chr(10).join(context_clauses))\n", " \n", " # Build prompt\n", " prompt = f\"\"\"\n", " You are an insurance policy analyst. Question: \"{raw_query}\"\n", "\n", " Provided clauses (numbered):\n", " {chr(10).join(context_clauses)}\n", "\n", " Task:\n", " 1) Decide: COVERED / NOT_COVERED / CONDITIONAL\n", " 2) Summarize the exact clause(s) that justify your decision.\n", " 3) List any conditions, waiting periods, sublimits, or exclusions relevant.\n", " 4) Provide a concise final answer (1-2 sentences).\n", "\n", " Return JSON with these exact keys:\n", " {{\n", " \"decision\": \"...\",\n", " \"evidence\": [\n", " {{\"doc_id\": \"...\", \"page\": 0, \"snippet\": \"...\", \"reason\": \"...\"}}\n", " ],\n", " \"confidence\": 0.0,\n", " \"rationale\": \"...\",\n", " \"answer\": \"...\"\n", " }}\n", " \"\"\"\n", "\n", " # Directly parse to LogicResult using structured output\n", " structured_llm = llm.with_structured_output(LogicResult)\n", " result: LogicResult = structured_llm.invoke(prompt)\n", " # print(f\"result: {result}\\n result_type{type(result)}\")\n", "\n", " # Attach full text for each evidence\n", " enriched_evidence = []\n", " for ev in result.evidence:\n", " matched = next((c for c in top_clauses if c.doc_id == ev.doc_id and str(c.page) == str(ev.page)), None)\n", " if matched:\n", " ev.text = matched.text # or use a different field if needed\n", " enriched_evidence.append(ev)\n", "\n", " result.evidence = enriched_evidence\n", " # print(enriched_evidence[0])\n", " return result\n" ] }, { "cell_type": "code", "execution_count": 14, "id": "fe78ab38", "metadata": {}, "outputs": [], "source": [ "query = \"What is the grace period for premium payment under the National Parivar Mediclaim Plus Policy?\"" ] }, { "cell_type": "code", "execution_count": 36, "id": "fea3b1be", "metadata": {}, "outputs": [], "source": [ "parsed_query = parsing_query(query)\n" ] }, { "cell_type": "code", "execution_count": 32, "id": "82fcb8bb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "__main__.QuerySpec" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(parsed_query)" ] }, { "cell_type": "code", "execution_count": 39, "id": "9b8292f0", "metadata": {}, "outputs": [], "source": [ "\n", "# Step 1 — Embed\n", "embedding = get_query_embedding(embeddings, parsed_query)" ] }, { "cell_type": "code", "execution_count": 44, "id": "46790137", "metadata": {}, "outputs": [], "source": [ "\n", "# Step 2 — Retrieve\n", "top_hits = retrieval_from_pinecone_vectoreStore(index, embedding, top_k=3)" ] }, { "cell_type": "code", "execution_count": 48, "id": "9c3f4e68", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[ClauseHit(doc_id='b0a34a7d-f5a1-4777-93aa-c59269013de5', page=24, chunk_id='b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', text='Page 16 of 25 \\nNational Parivar Mediclaim Plus Policy \\nUIN: NICHLIP25039V032425 \\n \\nii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \\na) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \\nmade during the policy period. \\nb) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \\nsuch policy years has not commenced and refund proportionate premium for unexpired policy period for the current \\npolicy year. \\nThere shall be no refund for the completed policy year elapsed.', metadata={'chunk_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', 'doc_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5', 'page': 24.0, 'text': 'Page 16 of 25 \\nNational Parivar Mediclaim Plus Policy \\nUIN: NICHLIP25039V032425 \\n \\nii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \\na) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \\nmade during the policy period. \\nb) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \\nsuch policy years has not commenced and refund proportionate premium for unexpired policy period for the current \\npolicy year. \\nThere shall be no refund for the completed policy year elapsed.', 'type': 'text'}, score=0.678843796, boost=None, combined_score=None),\n", " ClauseHit(doc_id='b0a34a7d-f5a1-4777-93aa-c59269013de5', page=24, chunk_id='b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', text='Page 16 of 25 \\nNational Parivar Mediclaim Plus Policy \\nUIN: NICHLIP25039V032425 \\n \\nii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \\na) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \\nmade during the policy period. \\nb) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \\nsuch policy years has not commenced and refund proportionate premium for unexpired policy period for the current \\npolicy year. \\nThere shall be no refund for the completed policy year elapsed.', metadata={'chunk_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', 'doc_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5', 'page': 24.0, 'text': 'Page 16 of 25 \\nNational Parivar Mediclaim Plus Policy \\nUIN: NICHLIP25039V032425 \\n \\nii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \\na) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \\nmade during the policy period. \\nb) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \\nsuch policy years has not commenced and refund proportionate premium for unexpired policy period for the current \\npolicy year. \\nThere shall be no refund for the completed policy year elapsed.', 'type': 'text'}, score=0.677854538, boost=None, combined_score=None),\n", " ClauseHit(doc_id='b0a34a7d-f5a1-4777-93aa-c59269013de5', page=24, chunk_id='b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', text='all claims made in the aggregate during each policy year. \\n \\n2.21 Grace Period means the specified period of time, immediately following the premium due date during which premium \\npayment can be made to renew or continue a policy in force without loss of continuity benefits pertaining to Waiting Periods \\nand coverage of Pre-Existing Diseases. The Grace Period for payment of the premium shall be thirty days. \\nCoverage shall not be available during the period for which no premium is received. \\n \\n2.22 Hospital means any institution established for in-patient care and day care treatment of disease/ injuries and which has been \\nregistered as a hospital with the local authorities under the Clinical Establishments (Registration and Regulation) Act, 2010 or', metadata={'chunk_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5_p24', 'doc_id': 'b0a34a7d-f5a1-4777-93aa-c59269013de5', 'page': 24.0, 'text': 'all claims made in the aggregate during each policy year. \\n \\n2.21 Grace Period means the specified period of time, immediately following the premium due date during which premium \\npayment can be made to renew or continue a policy in force without loss of continuity benefits pertaining to Waiting Periods \\nand coverage of Pre-Existing Diseases. The Grace Period for payment of the premium shall be thirty days. \\nCoverage shall not be available during the period for which no premium is received. \\n \\n2.22 Hospital means any institution established for in-patient care and day care treatment of disease/ injuries and which has been \\nregistered as a hospital with the local authorities under the Clinical Establishments (Registration and Regulation) Act, 2010 or', 'type': 'text'}, score=0.64794, boost=None, combined_score=None)]" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top_hits" ] }, { "cell_type": "code", "execution_count": 75, "id": "05cb7ca5", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1) [source:b0a34a7d-f5a1-4777-93aa-c59269013de5 page:24] Page 16 of 25 \n", "National Parivar Mediclaim Plus Policy \n", "UIN: NICHLIP25039V032425 \n", " \n", "ii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \n", "a) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \n", "made during the policy period. \n", "b) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \n", "such policy years has not commenced and refund proportionate premium for unexpired policy period for the current \n", "policy year. \n", "There shall be no refund for the completed policy year elapsed.\n", "2) [source:b0a34a7d-f5a1-4777-93aa-c59269013de5 page:24] Page 16 of 25 \n", "National Parivar Mediclaim Plus Policy \n", "UIN: NICHLIP25039V032425 \n", " \n", "ii. The policyholder may cancel his/her policy at any time during the term, by giving 7 days’ notice in writing. The Insurer shall: \n", "a) refund proportionate premium for unexpired policy period, if the term of policy upto one year and there is no claim (s) \n", "made during the policy period. \n", "b) refund premium for the unexpired policy period, in respect of policies with term more than 1 year and risk coverage for \n", "such policy years has not commenced and refund proportionate premium for unexpired policy period for the current \n", "policy year. \n", "There shall be no refund for the completed policy year elapsed.\n", "3) [source:b0a34a7d-f5a1-4777-93aa-c59269013de5 page:24] all claims made in the aggregate during each policy year. \n", " \n", "2.21 Grace Period means the specified period of time, immediately following the premium due date during which premium \n", "payment can be made to renew or continue a policy in force without loss of continuity benefits pertaining to Waiting Periods \n", "and coverage of Pre-Existing Diseases. The Grace Period for payment of the premium shall be thirty days. \n", "Coverage shall not be available during the period for which no premium is received. \n", " \n", "2.22 Hospital means any institution established for in-patient care and day care treatment of disease/ injuries and which has been \n", "registered as a hospital with the local authorities under the Clinical Establishments (Registration and Regulation) Act, 2010 or\n" ] } ], "source": [ "# Step 3 — Evaluate with LLM\n", "result = evaluate_with_llm(query, top_hits)" ] }, { "cell_type": "code", "execution_count": 73, "id": "40c7075b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'The grace period for premium payment under the National Parivar Mediclaim Plus Policy is thirty days. However, coverage is not available during this period if no premium is received.'" ] }, "execution_count": 73, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result.answer" ] }, { "cell_type": "code", "execution_count": 82, "id": "46ff44ac", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\hp\\AppData\\Local\\Temp\\ipykernel_9600\\3651844483.py:1: PydanticDeprecatedSince20: The `__fields__` attribute is deprecated, use `model_fields` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/\n", " result.evidence[0].__fields__\n" ] }, { "data": { "text/plain": [ "{'doc_id': FieldInfo(annotation=str, required=True),\n", " 'page': FieldInfo(annotation=int, required=True),\n", " 'chunk_id': FieldInfo(annotation=str, required=True),\n", " 'text': FieldInfo(annotation=str, required=True),\n", " 'metadata': FieldInfo(annotation=Dict[str, Any], required=True),\n", " 'score': FieldInfo(annotation=float, required=True),\n", " 'boost': FieldInfo(annotation=Union[float, NoneType], required=False, default=None),\n", " 'combined_score': FieldInfo(annotation=Union[float, NoneType], required=False, default=None)}" ] }, "execution_count": 82, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result.evidence[0].__fields__" ] } ], "metadata": { "kernelspec": { "display_name": "rag-app", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.4" } }, "nbformat": 4, "nbformat_minor": 5 }