athmikha commited on
Commit
e954fd0
·
verified ·
1 Parent(s): 30f869b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -38
app.py CHANGED
@@ -12,10 +12,11 @@ import gradio as gr
12
  from docx import Document
13
  import PyPDF2
14
  import csv
 
15
 
16
  warnings.filterwarnings("ignore")
 
17
 
18
- # Function to extract text from PDF
19
  def extract_text_from_pdf(pdf_path):
20
  with open(pdf_path, "rb") as f:
21
  pdf_reader = PyPDF2.PdfReader(f)
@@ -25,7 +26,6 @@ def extract_text_from_pdf(pdf_path):
25
  text += page.extract_text()
26
  return text
27
 
28
- # Function to extract text from DOCX
29
  def extract_text_from_docx(docx_path):
30
  doc = Document(docx_path)
31
  full_text = []
@@ -33,7 +33,6 @@ def extract_text_from_docx(docx_path):
33
  full_text.append(para.text)
34
  return '\n\n'.join(full_text)
35
 
36
- # Function to extract text from TXT
37
  def extract_text_from_txt(txt_path):
38
  try:
39
  with open(txt_path, "r", encoding='utf-8') as f:
@@ -42,7 +41,17 @@ def extract_text_from_txt(txt_path):
42
  with open(txt_path, "r", encoding='latin-1') as f:
43
  return f.read()
44
 
 
 
 
 
 
 
 
 
 
45
 
 
46
  def read_and_structure_csv(csv_path):
47
  structured_data = []
48
  with open(csv_path, mode='r', encoding='utf-8-sig') as file:
@@ -54,33 +63,30 @@ def read_and_structure_csv(csv_path):
54
  plan_details += f" - **{key.replace('_', ' ').title()}**: {value}\n"
55
  structured_data.append(plan_details)
56
  return "\n\n".join(structured_data)
57
- # List of paths to your input files (PDF, DOCX, TXT)
58
  file_paths = ["./csvrecommend - Sheet1.csv","./dummymedicare.txt"]
59
 
60
- # Extract text from files
61
- texts = []
62
  for path in file_paths:
63
  if path.endswith(".pdf"):
64
- texts.append(extract_text_from_pdf(path))
65
  elif path.endswith(".docx"):
66
- texts.append(extract_text_from_docx(path))
67
  elif path.endswith(".txt"):
68
  txt_content = extract_text_from_txt(path)
69
- texts.append(txt_content)
70
- print("TXT file content:", txt_content)
71
- print(f"Added content from {path}: {txt_content[:500]}...")
72
  elif path.endswith(".csv"):
73
- texts.append(read_and_structure_csv(path))
 
 
74
 
75
- context = "\n\n".join(texts)
76
 
77
- print(context,"context\n\n\n")
78
 
79
- # Initialize text splitter and embeddings
80
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=8000, chunk_overlap=1200)
81
  texts = text_splitter.split_text(context)
82
 
83
- print("\n\n\n",texts,"texts")
84
  api_key = "AIzaSyCqEKwd23ztVuk-dkCXypjeHWlcs41aCSM"
85
  if not api_key:
86
  raise ValueError("API key not found. Please set your GEMINI_API_KEY in the environment.")
@@ -95,7 +101,7 @@ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_a
95
  vector_index = Chroma.from_texts(texts, embeddings).as_retriever(search_kwargs={"k": 5})
96
 
97
  # Create QA chain
98
- template = """You are a highly knowledgeable and detail-oriented medical assistant specializing in insurance plans. Your task is to recommend only those insurance plans that strictly align with all the needs and preferences provided by the user.
99
  Ensure that each recommended plan meets every single requirement specified by the user. Do not recommend plans that only partially meet the requirements.
100
  Context:
101
  {context}
@@ -110,18 +116,17 @@ qa_chain = RetrievalQA.from_chain_type(
110
  chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
111
  )
112
 
113
- # Load history from file if it exists
114
- history_file = "history.json"
115
 
116
  def load_history():
117
  if os.path.exists(history_file):
118
  with open(history_file, "r") as f:
119
  try:
120
  data = json.load(f)
121
- if isinstance(data, list): # Ensure that the data is a list
122
  return data
123
  except json.JSONDecodeError:
124
- pass # If there's an error decoding, return an empty list
125
  return []
126
 
127
  def save_history(history):
@@ -130,32 +135,55 @@ def save_history(history):
130
 
131
  history = load_history()
132
 
133
- # Function to handle questions and maintain history
 
 
 
 
 
 
 
 
 
 
 
 
134
  def ask_question(question):
135
  global history
136
  if question.strip().lower() == "exit":
137
- history = [] # Clear history
138
  save_history(history)
139
  return "Hey there! I'm your Medicare assistant. You can ask me questions related to different type of insurances and I'll help you. Let's get started!"
 
 
 
 
140
  result = qa_chain({"query": question})
141
  answer = result["result"]
142
- history.append({"question": question, "answer": answer})
143
  save_history(history)
144
- # Format history for display
 
 
 
 
 
 
 
 
 
145
  history_md = ""
146
  for entry in history:
147
- history_md += f"**USER:** {entry['question']}\n\n**BOT:** {entry['answer']}\n\n---\n\n"
148
  return history_md
149
 
150
- # Format history for initial display
151
  initial_history_md = ""
152
  if not history:
153
  initial_history_md = "Hey there! I'm your Medicare assistant. You can ask me questions related to different type of insurances and I'll help you. Let's get started!"
154
  else:
155
  for entry in history:
156
- initial_history_md += f"**User:** {entry['question']}\n\n**Bot:** {entry['answer']}\n\n---\n\n"
157
 
158
- # Create Gradio interface using Blocks
159
  with gr.Blocks() as demo:
160
  gr.HTML(
161
  """
@@ -177,22 +205,15 @@ with gr.Blocks() as demo:
177
  """
178
  )
179
 
180
- # Markdown block to display history
181
  history_output = gr.Markdown(value=initial_history_md, elem_classes="scrollable-history")
182
 
183
- # Row for question input and submit button
184
  with gr.Row(elem_classes="fixed-bottom"):
185
- # Column for question input and submit button
186
  with gr.Column():
187
- # Text area for question input
188
  question_input = gr.Textbox(lines=2, placeholder="Type your question here...", show_label=False)
189
- # Submit button
190
  submit_button = gr.Button("Submit")
191
- # Function to handle submit action
192
  submit_button.click(ask_question, inputs=question_input, outputs=history_output)
193
- submit_button.click(lambda: "", None, question_input) # Clears the input box
194
 
195
- # Display history above the question input and submit button pair
196
  history_output
197
 
198
  demo.launch()
 
12
  from docx import Document
13
  import PyPDF2
14
  import csv
15
+ import google.generativeai as genai
16
 
17
  warnings.filterwarnings("ignore")
18
+ global context
19
 
 
20
  def extract_text_from_pdf(pdf_path):
21
  with open(pdf_path, "rb") as f:
22
  pdf_reader = PyPDF2.PdfReader(f)
 
26
  text += page.extract_text()
27
  return text
28
 
 
29
  def extract_text_from_docx(docx_path):
30
  doc = Document(docx_path)
31
  full_text = []
 
33
  full_text.append(para.text)
34
  return '\n\n'.join(full_text)
35
 
 
36
  def extract_text_from_txt(txt_path):
37
  try:
38
  with open(txt_path, "r", encoding='utf-8') as f:
 
41
  with open(txt_path, "r", encoding='latin-1') as f:
42
  return f.read()
43
 
44
+ def extract_text_from_json(json_path):
45
+ with open(json_path, "r", encoding='utf-8') as f:
46
+ try:
47
+ data = json.load(f)
48
+ if not data:
49
+ return ""
50
+ return json.dumps(data, indent=4)
51
+ except json.JSONDecodeError:
52
+ return ""
53
 
54
+
55
  def read_and_structure_csv(csv_path):
56
  structured_data = []
57
  with open(csv_path, mode='r', encoding='utf-8-sig') as file:
 
63
  plan_details += f" - **{key.replace('_', ' ').title()}**: {value}\n"
64
  structured_data.append(plan_details)
65
  return "\n\n".join(structured_data)
66
+
67
  file_paths = ["./csvrecommend - Sheet1.csv","./dummymedicare.txt"]
68
 
69
+ texts1 = []
 
70
  for path in file_paths:
71
  if path.endswith(".pdf"):
72
+ texts1.append(extract_text_from_pdf(path))
73
  elif path.endswith(".docx"):
74
+ texts1.append(extract_text_from_docx(path))
75
  elif path.endswith(".txt"):
76
  txt_content = extract_text_from_txt(path)
77
+ texts1.append(txt_content)
78
+
 
79
  elif path.endswith(".csv"):
80
+ texts1.append(read_and_structure_csv(path))
81
+ elif path.endswith(".json"):
82
+ texts1.append(extract_text_from_json(path))
83
 
 
84
 
85
+ context = "\n\n".join(texts1)
86
 
87
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=8000, chunk_overlap=1300)
 
88
  texts = text_splitter.split_text(context)
89
 
 
90
  api_key = "AIzaSyCqEKwd23ztVuk-dkCXypjeHWlcs41aCSM"
91
  if not api_key:
92
  raise ValueError("API key not found. Please set your GEMINI_API_KEY in the environment.")
 
101
  vector_index = Chroma.from_texts(texts, embeddings).as_retriever(search_kwargs={"k": 5})
102
 
103
  # Create QA chain
104
+ template = """You are a highly knowledgeable and detail-oriented medical assistant specializing in insurance plans. Your task is to answer questions to user and recommend only those insurance plans that strictly align with all the needs and preferences provided by the user.
105
  Ensure that each recommended plan meets every single requirement specified by the user. Do not recommend plans that only partially meet the requirements.
106
  Context:
107
  {context}
 
116
  chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
117
  )
118
 
119
+ history_file = "history2.json"
 
120
 
121
  def load_history():
122
  if os.path.exists(history_file):
123
  with open(history_file, "r") as f:
124
  try:
125
  data = json.load(f)
126
+ if isinstance(data, list):
127
  return data
128
  except json.JSONDecodeError:
129
+ pass
130
  return []
131
 
132
  def save_history(history):
 
135
 
136
  history = load_history()
137
 
138
+ def summarize_history(history):
139
+ os.environ['GOOGLE_API_KEY'] = "AIzaSyCqEKwd23ztVuk-dkCXypjeHWlcs41aCSM"
140
+ genai.configure(api_key = os.environ['GOOGLE_API_KEY'])
141
+ # Summarize the user's preferences from the chat history
142
+ user_history = "\n".join([entry["USER"] for entry in history])
143
+ prompt = f"Summarize the important points on preferences from the following user history Discard unwanted details only give users preferences :\n\n{user_history}"
144
+ model1=genai.GenerativeModel('gemini-pro')
145
+
146
+ summary_response = model1.generate_content(prompt)
147
+ print(summary_response.text,"summary_response")
148
+ summary = summary_response.text
149
+ return summary
150
+
151
  def ask_question(question):
152
  global history
153
  if question.strip().lower() == "exit":
154
+ history = []
155
  save_history(history)
156
  return "Hey there! I'm your Medicare assistant. You can ask me questions related to different type of insurances and I'll help you. Let's get started!"
157
+
158
+ with open("./chat_history.txt", "a") as f:
159
+ f.write(f"USER: {question}\n")
160
+
161
  result = qa_chain({"query": question})
162
  answer = result["result"]
163
+ history.append({"USER": question, "answer": answer})
164
  save_history(history)
165
+
166
+ # Summarize the chat history
167
+ summary = summarize_history(history)
168
+
169
+ # Combine the context and summary for the text splitter
170
+ combined_text = context + "\n\n" + "MY PREFERENCES "+summary
171
+ print(combined_text,"combined_text\n\n")
172
+ texts = text_splitter.split_text(combined_text)
173
+ vector_index = Chroma.from_texts(texts, embeddings).as_retriever(search_kwargs={"k": 5})
174
+
175
  history_md = ""
176
  for entry in history:
177
+ history_md += f"**USER:** {entry['USER']}\n\n**BOT:** {entry['answer']}\n\n---\n\n"
178
  return history_md
179
 
 
180
  initial_history_md = ""
181
  if not history:
182
  initial_history_md = "Hey there! I'm your Medicare assistant. You can ask me questions related to different type of insurances and I'll help you. Let's get started!"
183
  else:
184
  for entry in history:
185
+ initial_history_md += f"**USER:** {entry['USER']}\n\n**BOT:** {entry['answer']}\n\n---\n\n"
186
 
 
187
  with gr.Blocks() as demo:
188
  gr.HTML(
189
  """
 
205
  """
206
  )
207
 
 
208
  history_output = gr.Markdown(value=initial_history_md, elem_classes="scrollable-history")
209
 
 
210
  with gr.Row(elem_classes="fixed-bottom"):
 
211
  with gr.Column():
 
212
  question_input = gr.Textbox(lines=2, placeholder="Type your question here...", show_label=False)
 
213
  submit_button = gr.Button("Submit")
 
214
  submit_button.click(ask_question, inputs=question_input, outputs=history_output)
215
+ submit_button.click(lambda: "", None, question_input)
216
 
 
217
  history_output
218
 
219
  demo.launch()