niranjan9795 commited on
Commit
90f9b02
·
verified ·
1 Parent(s): 654d722

Upload all files

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. Bank.pdf +3 -0
  3. app.py +237 -0
  4. requirements.txt +0 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Bank.pdf filter=lfs diff=lfs merge=lfs -text
Bank.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65b86fe7f0f01b1478f5516d1f90c6a66e465158bf0d997d9132a80f2c6d5439
3
+ size 438003
app.py ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Hugging Face Spaces deployment file for a Gradio-based Policy & Claims Agent.
2
+
3
+ from langchain_community.document_loaders import PyPDFLoader
4
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
5
+ from langchain_community.vectorstores import Chroma
6
+ import os
7
+ from pathlib import Path
8
+ from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint, ChatHuggingFace
9
+ from langchain_community.vectorstores import Chroma
10
+ from langchain_classic.chains import create_retrieval_chain
11
+ from langchain_classic.chains.combine_documents import create_stuff_documents_chain
12
+ from langchain_core.prompts import ChatPromptTemplate
13
+ import getpass
14
+ import gradio as gr
15
+ import os
16
+
17
+ PDF_PATH = Path("./Bank.pdf")
18
+ os.environ["HF_TOKEN"] = HF_KEY
19
+ os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_KEY
20
+
21
+ def load_pdf():
22
+ # drive.mount('/content/drive')
23
+ #base_path = Path('/content/drive/MyDrive/GenerativeAI')
24
+ #file_path = base_path /'Bank.pdf'
25
+ file_path = str(PDF_PATH)
26
+ loader = PyPDFLoader(file_path)
27
+ pages = loader.load()
28
+ print(f"PDF loaed and returing page number {len(pages)} \n")
29
+ return pages
30
+
31
+
32
+ def split_pages(pages):
33
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=700,
34
+ chunk_overlap=50
35
+ )
36
+ chunks = text_splitter.split_documents(pages)
37
+ print(f"Created {len(chunks)} chunks")
38
+ return chunks
39
+
40
+ def create_embedding():
41
+ embeddings = HuggingFaceEmbeddings(
42
+ model_name="sentence-transformers/all-MiniLM-L6-v2",
43
+ model_kwargs={'device': 'cuda'}
44
+ )
45
+ return embeddings
46
+
47
+ def create_vector(chunks, embeddings):
48
+ vector_db = Chroma.from_documents(documents=chunks,
49
+ embedding= embeddings,
50
+ persist_directory="./hf_cloud_db"
51
+ )
52
+
53
+ print(f"Created vectore DB using huggig face\n")
54
+ return vector_db
55
+
56
+ def get_retriver(vector_db):
57
+ retriever = vector_db.as_retriever(search_type="similarity",
58
+ search_kwargs={"k": 5}
59
+ )
60
+ print(f"Retreiver created \n")
61
+ return retriever
62
+
63
+ def create_model():
64
+ llm = HuggingFaceEndpoint(
65
+ repo_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
66
+ task="text-generation",
67
+ max_new_tokens=500,
68
+ temperature=0.2,
69
+ do_sample=False,
70
+ )
71
+
72
+ chat_model = ChatHuggingFace(llm=llm)
73
+ print(f"Model created")
74
+ return chat_model
75
+
76
+ def build_policy_prompt():
77
+ prompt = ChatPromptTemplate.from_template("""
78
+ You are a Policy & Claims agent.
79
+
80
+ Answer the user's question using ONLY the retrieved policy context.
81
+
82
+ Rules:
83
+ 1. Do not guess.
84
+ 2. Do not use outside knowledge.
85
+ 3. If the answer is not found in the context, say:
86
+ "Answer not found in the provided policy document."
87
+ 4. Keep the answer short and clear.
88
+ 5. Mention source page number when available.
89
+
90
+ Question:
91
+ {input}
92
+
93
+ Context:
94
+ {context}
95
+ """)
96
+ return prompt
97
+
98
+ def build_claim_prompt():
99
+ prompt = ChatPromptTemplate.from_template("""
100
+ You are a Policy & Claims Copilot.
101
+
102
+ You are doing a claim pre-check, not final claim approval.
103
+
104
+ Use ONLY the retrieved policy context.
105
+
106
+ Rules:
107
+ 1. Do not guess.
108
+ 2. Do not use outside knowledge.
109
+ 3. If evidence is missing, say:
110
+ "Unclear based on provided policy context."
111
+ 4. Give output in this format:
112
+
113
+ Pre-check Result:
114
+ - Likely Covered / Likely Not Covered / Unclear
115
+
116
+ Reason:
117
+ - Short explanation from the policy context
118
+
119
+ Waiting Period / Limits:
120
+ - Mention only if found
121
+
122
+ Documents Needed:
123
+ - Mention only if found
124
+
125
+ Disclaimer:
126
+ - This is only a pre-check, not final claim approval.
127
+
128
+ User Claim Scenario:
129
+ {input}
130
+
131
+ Context:
132
+ {context}
133
+ """)
134
+ return prompt
135
+
136
+ def rag_chaining(retriever, chat_model, prompt):
137
+ document_chain = create_stuff_documents_chain(chat_model, prompt)
138
+ rag_chain = create_retrieval_chain(retriever, document_chain)
139
+ return rag_chain
140
+
141
+ def chat_function(message, history, mode):
142
+ if mode in ["Q&A", "Policy Q&A"]:
143
+ return ask_policy_question(message, APP["policy_chain"])
144
+ else:
145
+ return claim_precheck(message, APP["precheck_chain"])
146
+
147
+ def format_sources(docs):
148
+ if not docs:
149
+ return "No sources found."
150
+
151
+ lines = []
152
+ seen = set()
153
+
154
+ for doc in docs:
155
+ page = doc.metadata.get("page", "N/A")
156
+ source = doc.metadata.get("source", "N/A")
157
+ key = (source, page)
158
+
159
+ if key not in seen:
160
+ seen.add(key)
161
+ lines.append(f"- File: {source}, Page: {page}")
162
+
163
+ return "\n".join(lines)
164
+
165
+ def ask_policy_question(message, policy_chain):
166
+ result = policy_chain.invoke({"input": message})
167
+ answer = result.get("answer", "")
168
+ docs = result.get("context", [])
169
+ sources = format_sources(docs)
170
+
171
+ return f"""{answer}
172
+
173
+ Sources:
174
+ {sources}
175
+ """
176
+
177
+ def claim_precheck(message, precheck_chain):
178
+ result = precheck_chain.invoke({"input": message})
179
+ answer = result.get("answer", "")
180
+ docs = result.get("context", [])
181
+ sources = format_sources(docs)
182
+
183
+ return f"""{answer}
184
+
185
+ Sources:
186
+ {sources}
187
+ """
188
+ def launch_ui():
189
+ mode_input = gr.Radio(
190
+ choices=["Q&A", "Claim Pre-check"],
191
+ value="Q&A",
192
+ label="Mode"
193
+ )
194
+
195
+ demo = gr.ChatInterface(
196
+ fn=chat_function,
197
+ additional_inputs=[mode_input],
198
+ title="Policy & Claims Agent",
199
+ description=(
200
+ "Ask policy questions or run a basic claim pre-check using the uploaded PDF. "
201
+ "Responses are grounded in retrieved document chunks."
202
+ ),
203
+ examples=[
204
+ ["What is covered under hospitalization?", "Q&A"],
205
+ ["What documents are needed to submit a claim?", "Q&A"],
206
+ ["My policy started 4 months ago and I want to claim for a surgery. Is it likely covered?", "Claim Pre-check"],
207
+ ],
208
+ cache_examples=False
209
+ )
210
+
211
+ demo.launch(debug=False, share=True)
212
+
213
+
214
+ def initialize_app():
215
+ pages = load_pdf()
216
+ chunks = split_pages(pages)
217
+ embeddings = create_embedding()
218
+ vector_db = create_vector(chunks, embeddings)
219
+ retriever = get_retriver(vector_db)
220
+ chat_model = create_model()
221
+
222
+ policy_prompt = build_policy_prompt()
223
+ precheck_prompt = build_claim_prompt()
224
+
225
+ policy_chain = rag_chaining(retriever, chat_model, policy_prompt)
226
+ precheck_chain = rag_chaining(retriever, chat_model, precheck_prompt)
227
+
228
+ return {
229
+ "policy_chain": policy_chain,
230
+ "precheck_chain": precheck_chain
231
+ }
232
+
233
+ APP = initialize_app()
234
+ demo = launch_ui()
235
+
236
+ if __name__ == "__main__":
237
+ demo.launch()
requirements.txt ADDED
File without changes