kk20krishna commited on
Commit
74d52fc
·
verified ·
1 Parent(s): 649b40b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +291 -0
app.py ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class AbbyyVantage:
2
+ """
3
+ A client to interact with the ABBYY Vantage public API.
4
+ Handles authentication, skill listing, transaction initiation, and result retrieval.
5
+ """
6
+
7
+ def __init__(self, client_id, client_secret, region="au"):
8
+ """
9
+ Initializes the AbbyyVantageClient by authenticating using client credentials.
10
+
11
+ Args:
12
+ client_id (str): Your ABBYY Vantage client ID.
13
+ client_secret (str): Your ABBYY Vantage client secret.
14
+ region (str): ABBYY Vantage region ('eu', 'us', 'au', etc.). Defaults to 'au'.
15
+
16
+ Raises:
17
+ Exception: If authentication fails or access token is not returned.
18
+ """
19
+ self.client_id = client_id
20
+ self.client_secret = client_secret
21
+ self.token_url = f"https://vantage-{region}.abbyy.com/auth2/connect/token"
22
+ self.api_base = f"https://vantage-{region}.abbyy.com/api/publicapi/v1"
23
+
24
+ try:
25
+ # Prepare data for token request using client credentials
26
+ data = {
27
+ 'grant_type': 'client_credentials',
28
+ 'client_id': self.client_id,
29
+ 'client_secret': self.client_secret
30
+ }
31
+
32
+ # Request access token from ABBYY OAuth2 endpoint
33
+ res = requests.post(self.token_url, data=data)
34
+ res.raise_for_status()
35
+
36
+ # Extract access token from response
37
+ token = res.json().get('access_token')
38
+ if not token:
39
+ raise ValueError("No access token returned from ABBYY")
40
+
41
+ # Set authorization headers for future API calls
42
+ self._headers = {
43
+ "Authorization": f"Bearer {token}",
44
+ "accept": "application/json"
45
+ }
46
+ except Exception as e:
47
+ print(f"Error during authentication: {e}")
48
+ raise
49
+
50
+ def get_skills(self):
51
+ """
52
+ Retrieves a list of available document processing skills from ABBYY Vantage.
53
+
54
+ Returns:
55
+ dict or None: A JSON object containing skill metadata or None if the request fails.
56
+ """
57
+ try:
58
+ # Send GET request to fetch all available skills
59
+ res = requests.get(f'{self.api_base}/skills', headers=self._headers)
60
+ res.raise_for_status()
61
+ return res.json()
62
+ except Exception as e:
63
+ print(f"Failed to fetch skills: {e}")
64
+ return None
65
+
66
+ def process_document(self, file_path, skill_id):
67
+ """
68
+ Starts a new transaction by uploading a file to be processed using a specific skill.
69
+
70
+ Args:
71
+ file_path (str): Path to the local PDF file to be uploaded.
72
+ skill_id (str): The ID of the skill to be used for processing.
73
+
74
+ Returns:
75
+ str or None: The transaction ID returned by the API or None if the request fails.
76
+ """
77
+ try:
78
+ # Prepare API URL with query parameter for the skill ID
79
+ url = f"{self.api_base}/transactions/launch?skillId={skill_id}"
80
+
81
+ # Open the file in binary mode for upload
82
+ with open(file_path, "rb") as f:
83
+ files = {
84
+ "Files": (os.path.basename(file_path), f, "application/pdf")
85
+ }
86
+
87
+ # Post the file to ABBYY API to start a transaction
88
+ res = requests.post(url, headers=self._headers, files=files)
89
+ res.raise_for_status()
90
+
91
+ # Extract and return the transaction ID
92
+ return res.json().get('transactionId')
93
+ except Exception as e:
94
+ print(f"Failed to start transaction: {e}")
95
+ return None
96
+
97
+ def get_document_results(self, transaction_id, output_path="result_file.txt"):
98
+ """
99
+ Checks the transaction status and downloads the result file if processing is complete.
100
+
101
+ Args:
102
+ transaction_id (str): The transaction ID to monitor.
103
+ output_path (str): Local file path to save the result file. Defaults to "result_file.txt".
104
+
105
+ Returns:
106
+ str or None: Path to the saved result file, or None if processing is incomplete or fails.
107
+ """
108
+ try:
109
+ # Get transaction status and metadata
110
+ url = f"{self.api_base}/transactions/{transaction_id}"
111
+ res = requests.get(url, headers=self._headers)
112
+ res.raise_for_status()
113
+ data = res.json()
114
+ except Exception as e:
115
+ print(f"Failed to fetch transaction details: {e}")
116
+ return None
117
+
118
+ # Extract processing status
119
+ status = data.get('status')
120
+ print(f"Transaction status: {status}")
121
+
122
+ # Handle status outcomes
123
+ if status == 'Processing':
124
+ print("File is still being processed. Try again later.")
125
+ return 'Processing'
126
+ elif status != 'Processed':
127
+ print(f"Unexpected status: {status}")
128
+ return f"Unexpected status: {status}"
129
+
130
+ try:
131
+ # Navigate to the result file ID in the JSON structure
132
+ file_id = data['documents'][0]['resultFiles'][0]['fileId']
133
+
134
+ # Build the download URL using transaction ID and file ID
135
+ download_url = f"{self.api_base}/transactions/{transaction_id}/files/{file_id}/download"
136
+
137
+ # Download the result file
138
+ res = requests.get(download_url, headers=self._headers)
139
+ res.raise_for_status()
140
+
141
+ # Save the file to the specified path
142
+ with open(output_path, 'wb') as f:
143
+ f.write(res.content)
144
+
145
+ print(f"File downloaded and saved to: {output_path}")
146
+ return 'Processed'
147
+
148
+ except (KeyError, IndexError) as e:
149
+ print(f"Error accessing file ID in response JSON: {e}")
150
+ except Exception as e:
151
+ print(f"Failed to download or save file: {e}")
152
+
153
+ # df = pd.DataFrame(client.get_skills())
154
+ # df
155
+
156
+ # ----------- Process OCR & Setup Retrieval Agent -------------
157
+ def process_pdf_ocr(file):
158
+ print('process_pdf_ocr', file)
159
+ client = AbbyyVantage(client_id=os.getenv("ABBY_CLIENT_ID"),
160
+ client_secret=os.getenv("ABBY_CLIENT_SECRET"),
161
+ region="au" # or "us", "au", etc.
162
+ )
163
+ skill_id = '1681402d-2931-41cb-9717-bb7612bc09aa'
164
+ trans_id = client.process_document(file_path=file, skill_id=skill_id)
165
+
166
+ @retry(stop=stop_after_delay(60), wait=wait_fixed(3))
167
+ def wait_for_processing():
168
+ status = client.get_document_results(trans_id, output_path="/tmp/result_file.txt")
169
+ print(f"Status: {status}")
170
+ if status == 'Processed':
171
+ print("|-- Processed")
172
+ return status
173
+ raise Exception("Still Processing")
174
+
175
+ try:
176
+ status = wait_for_processing()
177
+ print("|--OCR Successful")
178
+ setup_agent("/tmp/result_file.txt")
179
+ print("|--Chatbot is ready")
180
+ return "OCR Successful. Chatbot is ready"
181
+ except RetryError:
182
+ print("|--OCR Failed or Timed Out")
183
+ return "OCR Failed or Timed Out"
184
+
185
+
186
+ # Global state
187
+ retrieval_chain = None
188
+ agent_executor = None
189
+
190
+
191
+ # ----------- Setup LangChain Retrieval Agent -------------
192
+ def setup_agent(file):
193
+ global retrieval_chain, agent_executor
194
+
195
+ if not os.path.exists(file):
196
+ return "Please process a PDF first."
197
+
198
+ loader = TextLoader(file)
199
+ documents = loader.load()
200
+
201
+ splitter = SpacyTextSplitter()
202
+ chunks = splitter.split_documents(documents)
203
+
204
+ embeddings = OpenAIEmbeddings()
205
+ vectordb = Chroma.from_documents(chunks, embedding=embeddings, collection_name=f"temp_collection_{uuid.uuid4().hex}")
206
+ retriever = vectordb.as_retriever(search_kwargs={"k": 10})
207
+
208
+ memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
209
+
210
+ retrieval_chain = ConversationalRetrievalChain.from_llm(
211
+ llm=ChatOpenAI(model="gpt-3.5-turbo"),
212
+ retriever=retriever,
213
+ memory=memory,
214
+ return_source_documents=False
215
+ )
216
+
217
+ tools = [
218
+ Tool(
219
+ name="PolicyRetrievalRAG",
220
+ func=retrieval_chain.run,
221
+ description="Use this to retreive policy clauses from the policy."
222
+ )
223
+ ]
224
+
225
+ agent_executor = initialize_agent(
226
+ tools=tools,
227
+ llm=ChatOpenAI(model="gpt-3.5-turbo"),
228
+ agent=AgentType.CONVERSATIONAL_REACT_DESCRIPTION,
229
+ #agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,
230
+ memory=memory,
231
+ verbose=True
232
+ )
233
+
234
+ print("|--Agent setup complete")
235
+ return "Agent is ready."
236
+
237
+ # ----------- Chat Interface Handler -------------
238
+ def ask_question(message, history):
239
+ if agent_executor is None:
240
+ return "❗ Chatbot not ready. Please upload and process a PDF first."
241
+
242
+ advisory_prompt = (
243
+ "Reterive policy information using PolicyRetrievalRAG tool and answer the user questions."
244
+ "Always use data returned by the policy. Do not makeup information."
245
+ #"In addition to answering the question based on the insurance policy, "
246
+ #"give practical advice to the user on how they might use or take advantage of any relevant clause."
247
+ )
248
+ prompt = f"{advisory_prompt}\nQuestion: {message}"
249
+
250
+ try:
251
+ response = agent_executor.run(prompt)
252
+ return response
253
+ except Exception as e:
254
+ return f"❌ Error: {str(e)}"
255
+
256
+ # ----------- Gradio UI -------------
257
+ with gr.Blocks(gr.themes.Soft(), title="📄 Insurance Policy AIdvisor") as demo:
258
+ gr.Markdown("# Welcome to the Insurance Policy AIdvisor App")
259
+ gr.Markdown("## Upload policy and converse")
260
+ with gr.Tab("📄 Upload PDF"):
261
+ gr.Markdown("### Upload a PDF. And Intellignet Automation Processng will automatically processing it using ABBYY Vantage, ChromaDB and LangChain")
262
+ pdf_file = gr.File(label="📤 Upload a PDF", file_types=[".pdf"])
263
+ ocr_status = gr.Textbox(label="Processing Status", interactive=False)
264
+
265
+ pdf_file.change(process_pdf_ocr, inputs=[pdf_file], outputs=[ocr_status])
266
+
267
+ gr.Examples(
268
+ examples=[["small-insudoc.pdf"],["Principal-Sample-Life-Insurance-Policy.pdf"]],
269
+ inputs=[pdf_file],
270
+ label="Example PDFs"
271
+ )
272
+
273
+ with gr.Tab("💬 Chatbot"):
274
+ gr.Markdown("### Ask about the policy and get advice.")
275
+ chat = gr.ChatInterface(fn=ask_question, chatbot=gr.Chatbot())
276
+
277
+ gr.Examples(
278
+ examples=[
279
+ "In what forms are the certificate avalaible?",
280
+ "How many employees should enroll if the member is to not contribute premium?",
281
+ "Can insurer contest this policy?",
282
+ "when can insurer make changes to the policy?",
283
+ "I gave incorrect age in the policy, what to do now?",
284
+ "Can the data I filled in the application form to get the insurance policy be used against me?"
285
+ ],
286
+ inputs=chat.textbox
287
+ )
288
+ with gr.Tab("System Sequence Design"):
289
+ gr.Markdown("[![](https://mermaid.ink/img/pako:eNrtVl1v2jAU_SuWn1qJIkIIhUirRGEgHrZVDCZ1QkImMcFqYme2s41V_e-7jhOapKnW91VIwbHP_b7HuY84ECHFPlb0R0Z5QGeMRJIkW45QSqRmAUsJ12i5mk3WiKh8sWyebhSV5tD8N88WkoRMbJbmvFw3MakUAVVql4aHnQhyVcvZXRM12e9Pp2-wIhE1kMnt7f09KjaaYEV1lu7gANYGmy_mJNBCnprY6VGKhMxuDbBcNzErqiWjP22cq8misrEWIn7ha83wlptzkmnBs2RvcyRpoJGM9hd9x-mg50ev27805wh9FpqimB40Eoc8tb7NPprH4peF2LJc3dw0U-ijTRoLEiISx-huNlcW30y0kaym1UeTTB_BYxYQME54iCKqkRYPlL9VQ4kIRZAloOriwGLaQeqBxfGOhUVwtWJetUagJeEKKsYEBzkrFguRog34B1FZPA2RgBqwhIpMW9BbvISwzh7uJFVZrNVF3eJlqe0NvipNdKbQh9IrxiPj1tlHq4rysC36NoVle6HCt9ezX-l0H301LyCTC5MYEdt-RrbKiFaTC6j0rEjJa_ZeGGwINI2UdPJhlfGHvKM-AgXCsB1_ZhUISGp6UJY77QIT60YBNsSclCEX2X4mmucBx_JHf_APouU3GvCmwrV8y5gsb7EzxwBnIee7rjW_a8miqFD7ZboqAdAq7-R8J-f_Sc6cesCLY869Ar-4WyO366F1JveiRmHn2jWfySE83N6rFK4Q1HB2Kjh4qIjpnCqVG1w2ezADqWdYjc6FbzUY-sX00dQhSYs6TlqTZT7CJvpyUkCV6aFeCNinkjJorbwkReHOA0pTcQVT0Vhxd1Vt0MK5etx2E3Cp4Iq2BG4vxDnj0LGygMGlg7Ji2MsraX64gyPJQuxrmdEOTqhMiHnFjwa2xXB3JXSLfViG9EDAry3e8icQg5HpuxBJKSlFFh2xfyCxgrcsDaF7irn0DAGrVE5FxjX2R6NcBfYf8W_sX426o6HrjLzr_nDoek5v3MEn2HaHTtfz-uOB43g9Z-iNnzr4T27V6Q561-7A7Q9H7ngwuO71O5iGDEbFT3Y4zmfkp7_uT8Ws?type=png)](https://mermaid.live/edit#pako:eNrtVl1v2jAU_SuWn1qJIkIIhUirRGEgHrZVDCZ1QkImMcFqYme2s41V_e-7jhOapKnW91VIwbHP_b7HuY84ECHFPlb0R0Z5QGeMRJIkW45QSqRmAUsJ12i5mk3WiKh8sWyebhSV5tD8N88WkoRMbJbmvFw3MakUAVVql4aHnQhyVcvZXRM12e9Pp2-wIhE1kMnt7f09KjaaYEV1lu7gANYGmy_mJNBCnprY6VGKhMxuDbBcNzErqiWjP22cq8misrEWIn7ha83wlptzkmnBs2RvcyRpoJGM9hd9x-mg50ev27805wh9FpqimB40Eoc8tb7NPprH4peF2LJc3dw0U-ijTRoLEiISx-huNlcW30y0kaym1UeTTB_BYxYQME54iCKqkRYPlL9VQ4kIRZAloOriwGLaQeqBxfGOhUVwtWJetUagJeEKKsYEBzkrFguRog34B1FZPA2RgBqwhIpMW9BbvISwzh7uJFVZrNVF3eJlqe0NvipNdKbQh9IrxiPj1tlHq4rysC36NoVle6HCt9ezX-l0H301LyCTC5MYEdt-RrbKiFaTC6j0rEjJa_ZeGGwINI2UdPJhlfGHvKM-AgXCsB1_ZhUISGp6UJY77QIT60YBNsSclCEX2X4mmucBx_JHf_APouU3GvCmwrV8y5gsb7EzxwBnIee7rjW_a8miqFD7ZboqAdAq7-R8J-f_Sc6cesCLY869Ar-4WyO366F1JveiRmHn2jWfySE83N6rFK4Q1HB2Kjh4qIjpnCqVG1w2ezADqWdYjc6FbzUY-sX00dQhSYs6TlqTZT7CJvpyUkCV6aFeCNinkjJorbwkReHOA0pTcQVT0Vhxd1Vt0MK5etx2E3Cp4Iq2BG4vxDnj0LGygMGlg7Ji2MsraX64gyPJQuxrmdEOTqhMiHnFjwa2xXB3JXSLfViG9EDAry3e8icQg5HpuxBJKSlFFh2xfyCxgrcsDaF7irn0DAGrVE5FxjX2R6NcBfYf8W_sX426o6HrjLzr_nDoek5v3MEn2HaHTtfz-uOB43g9Z-iNnzr4T27V6Q561-7A7Q9H7ngwuO71O5iGDEbFT3Y4zmfkp7_uT8Ws)")
290
+
291
+ demo.launch(debug=True)