Update app.py
Browse files
app.py
CHANGED
|
@@ -12,10 +12,11 @@ import gradio as gr
|
|
| 12 |
from docx import Document
|
| 13 |
import PyPDF2
|
| 14 |
import csv
|
|
|
|
| 15 |
|
| 16 |
warnings.filterwarnings("ignore")
|
|
|
|
| 17 |
|
| 18 |
-
# Function to extract text from PDF
|
| 19 |
def extract_text_from_pdf(pdf_path):
|
| 20 |
with open(pdf_path, "rb") as f:
|
| 21 |
pdf_reader = PyPDF2.PdfReader(f)
|
|
@@ -25,7 +26,6 @@ def extract_text_from_pdf(pdf_path):
|
|
| 25 |
text += page.extract_text()
|
| 26 |
return text
|
| 27 |
|
| 28 |
-
# Function to extract text from DOCX
|
| 29 |
def extract_text_from_docx(docx_path):
|
| 30 |
doc = Document(docx_path)
|
| 31 |
full_text = []
|
|
@@ -33,7 +33,6 @@ def extract_text_from_docx(docx_path):
|
|
| 33 |
full_text.append(para.text)
|
| 34 |
return '\n\n'.join(full_text)
|
| 35 |
|
| 36 |
-
# Function to extract text from TXT
|
| 37 |
def extract_text_from_txt(txt_path):
|
| 38 |
try:
|
| 39 |
with open(txt_path, "r", encoding='utf-8') as f:
|
|
@@ -42,7 +41,17 @@ def extract_text_from_txt(txt_path):
|
|
| 42 |
with open(txt_path, "r", encoding='latin-1') as f:
|
| 43 |
return f.read()
|
| 44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
|
|
|
| 46 |
def read_and_structure_csv(csv_path):
|
| 47 |
structured_data = []
|
| 48 |
with open(csv_path, mode='r', encoding='utf-8-sig') as file:
|
|
@@ -54,33 +63,30 @@ def read_and_structure_csv(csv_path):
|
|
| 54 |
plan_details += f" - **{key.replace('_', ' ').title()}**: {value}\n"
|
| 55 |
structured_data.append(plan_details)
|
| 56 |
return "\n\n".join(structured_data)
|
| 57 |
-
|
| 58 |
file_paths = ["./csvrecommend - Sheet1.csv","./dummymedicare.txt"]
|
| 59 |
|
| 60 |
-
|
| 61 |
-
texts = []
|
| 62 |
for path in file_paths:
|
| 63 |
if path.endswith(".pdf"):
|
| 64 |
-
|
| 65 |
elif path.endswith(".docx"):
|
| 66 |
-
|
| 67 |
elif path.endswith(".txt"):
|
| 68 |
txt_content = extract_text_from_txt(path)
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
print(f"Added content from {path}: {txt_content[:500]}...")
|
| 72 |
elif path.endswith(".csv"):
|
| 73 |
-
|
|
|
|
|
|
|
| 74 |
|
| 75 |
-
context = "\n\n".join(texts)
|
| 76 |
|
| 77 |
-
|
| 78 |
|
| 79 |
-
|
| 80 |
-
text_splitter = RecursiveCharacterTextSplitter(chunk_size=8000, chunk_overlap=1200)
|
| 81 |
texts = text_splitter.split_text(context)
|
| 82 |
|
| 83 |
-
print("\n\n\n",texts,"texts")
|
| 84 |
api_key = "AIzaSyCqEKwd23ztVuk-dkCXypjeHWlcs41aCSM"
|
| 85 |
if not api_key:
|
| 86 |
raise ValueError("API key not found. Please set your GEMINI_API_KEY in the environment.")
|
|
@@ -95,7 +101,7 @@ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_a
|
|
| 95 |
vector_index = Chroma.from_texts(texts, embeddings).as_retriever(search_kwargs={"k": 5})
|
| 96 |
|
| 97 |
# Create QA chain
|
| 98 |
-
template = """You are a highly knowledgeable and detail-oriented medical assistant specializing in insurance plans. Your task is to recommend only those insurance plans that strictly align with all the needs and preferences provided by the user.
|
| 99 |
Ensure that each recommended plan meets every single requirement specified by the user. Do not recommend plans that only partially meet the requirements.
|
| 100 |
Context:
|
| 101 |
{context}
|
|
@@ -110,18 +116,17 @@ qa_chain = RetrievalQA.from_chain_type(
|
|
| 110 |
chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
|
| 111 |
)
|
| 112 |
|
| 113 |
-
|
| 114 |
-
history_file = "history.json"
|
| 115 |
|
| 116 |
def load_history():
|
| 117 |
if os.path.exists(history_file):
|
| 118 |
with open(history_file, "r") as f:
|
| 119 |
try:
|
| 120 |
data = json.load(f)
|
| 121 |
-
if isinstance(data, list):
|
| 122 |
return data
|
| 123 |
except json.JSONDecodeError:
|
| 124 |
-
pass
|
| 125 |
return []
|
| 126 |
|
| 127 |
def save_history(history):
|
|
@@ -130,32 +135,55 @@ def save_history(history):
|
|
| 130 |
|
| 131 |
history = load_history()
|
| 132 |
|
| 133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
def ask_question(question):
|
| 135 |
global history
|
| 136 |
if question.strip().lower() == "exit":
|
| 137 |
-
history = []
|
| 138 |
save_history(history)
|
| 139 |
return "Hey there! I'm your Medicare assistant. You can ask me questions related to different type of insurances and I'll help you. Let's get started!"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
result = qa_chain({"query": question})
|
| 141 |
answer = result["result"]
|
| 142 |
-
history.append({"
|
| 143 |
save_history(history)
|
| 144 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
history_md = ""
|
| 146 |
for entry in history:
|
| 147 |
-
history_md += f"**USER:** {entry['
|
| 148 |
return history_md
|
| 149 |
|
| 150 |
-
# Format history for initial display
|
| 151 |
initial_history_md = ""
|
| 152 |
if not history:
|
| 153 |
initial_history_md = "Hey there! I'm your Medicare assistant. You can ask me questions related to different type of insurances and I'll help you. Let's get started!"
|
| 154 |
else:
|
| 155 |
for entry in history:
|
| 156 |
-
initial_history_md += f"**
|
| 157 |
|
| 158 |
-
# Create Gradio interface using Blocks
|
| 159 |
with gr.Blocks() as demo:
|
| 160 |
gr.HTML(
|
| 161 |
"""
|
|
@@ -177,22 +205,15 @@ with gr.Blocks() as demo:
|
|
| 177 |
"""
|
| 178 |
)
|
| 179 |
|
| 180 |
-
# Markdown block to display history
|
| 181 |
history_output = gr.Markdown(value=initial_history_md, elem_classes="scrollable-history")
|
| 182 |
|
| 183 |
-
# Row for question input and submit button
|
| 184 |
with gr.Row(elem_classes="fixed-bottom"):
|
| 185 |
-
# Column for question input and submit button
|
| 186 |
with gr.Column():
|
| 187 |
-
# Text area for question input
|
| 188 |
question_input = gr.Textbox(lines=2, placeholder="Type your question here...", show_label=False)
|
| 189 |
-
# Submit button
|
| 190 |
submit_button = gr.Button("Submit")
|
| 191 |
-
# Function to handle submit action
|
| 192 |
submit_button.click(ask_question, inputs=question_input, outputs=history_output)
|
| 193 |
-
submit_button.click(lambda: "", None, question_input)
|
| 194 |
|
| 195 |
-
# Display history above the question input and submit button pair
|
| 196 |
history_output
|
| 197 |
|
| 198 |
demo.launch()
|
|
|
|
| 12 |
from docx import Document
|
| 13 |
import PyPDF2
|
| 14 |
import csv
|
| 15 |
+
import google.generativeai as genai
|
| 16 |
|
| 17 |
warnings.filterwarnings("ignore")
|
| 18 |
+
global context
|
| 19 |
|
|
|
|
| 20 |
def extract_text_from_pdf(pdf_path):
|
| 21 |
with open(pdf_path, "rb") as f:
|
| 22 |
pdf_reader = PyPDF2.PdfReader(f)
|
|
|
|
| 26 |
text += page.extract_text()
|
| 27 |
return text
|
| 28 |
|
|
|
|
| 29 |
def extract_text_from_docx(docx_path):
|
| 30 |
doc = Document(docx_path)
|
| 31 |
full_text = []
|
|
|
|
| 33 |
full_text.append(para.text)
|
| 34 |
return '\n\n'.join(full_text)
|
| 35 |
|
|
|
|
| 36 |
def extract_text_from_txt(txt_path):
|
| 37 |
try:
|
| 38 |
with open(txt_path, "r", encoding='utf-8') as f:
|
|
|
|
| 41 |
with open(txt_path, "r", encoding='latin-1') as f:
|
| 42 |
return f.read()
|
| 43 |
|
| 44 |
+
def extract_text_from_json(json_path):
|
| 45 |
+
with open(json_path, "r", encoding='utf-8') as f:
|
| 46 |
+
try:
|
| 47 |
+
data = json.load(f)
|
| 48 |
+
if not data:
|
| 49 |
+
return ""
|
| 50 |
+
return json.dumps(data, indent=4)
|
| 51 |
+
except json.JSONDecodeError:
|
| 52 |
+
return ""
|
| 53 |
|
| 54 |
+
|
| 55 |
def read_and_structure_csv(csv_path):
|
| 56 |
structured_data = []
|
| 57 |
with open(csv_path, mode='r', encoding='utf-8-sig') as file:
|
|
|
|
| 63 |
plan_details += f" - **{key.replace('_', ' ').title()}**: {value}\n"
|
| 64 |
structured_data.append(plan_details)
|
| 65 |
return "\n\n".join(structured_data)
|
| 66 |
+
|
| 67 |
file_paths = ["./csvrecommend - Sheet1.csv","./dummymedicare.txt"]
|
| 68 |
|
| 69 |
+
texts1 = []
|
|
|
|
| 70 |
for path in file_paths:
|
| 71 |
if path.endswith(".pdf"):
|
| 72 |
+
texts1.append(extract_text_from_pdf(path))
|
| 73 |
elif path.endswith(".docx"):
|
| 74 |
+
texts1.append(extract_text_from_docx(path))
|
| 75 |
elif path.endswith(".txt"):
|
| 76 |
txt_content = extract_text_from_txt(path)
|
| 77 |
+
texts1.append(txt_content)
|
| 78 |
+
|
|
|
|
| 79 |
elif path.endswith(".csv"):
|
| 80 |
+
texts1.append(read_and_structure_csv(path))
|
| 81 |
+
elif path.endswith(".json"):
|
| 82 |
+
texts1.append(extract_text_from_json(path))
|
| 83 |
|
|
|
|
| 84 |
|
| 85 |
+
context = "\n\n".join(texts1)
|
| 86 |
|
| 87 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=8000, chunk_overlap=1300)
|
|
|
|
| 88 |
texts = text_splitter.split_text(context)
|
| 89 |
|
|
|
|
| 90 |
api_key = "AIzaSyCqEKwd23ztVuk-dkCXypjeHWlcs41aCSM"
|
| 91 |
if not api_key:
|
| 92 |
raise ValueError("API key not found. Please set your GEMINI_API_KEY in the environment.")
|
|
|
|
| 101 |
vector_index = Chroma.from_texts(texts, embeddings).as_retriever(search_kwargs={"k": 5})
|
| 102 |
|
| 103 |
# Create QA chain
|
| 104 |
+
template = """You are a highly knowledgeable and detail-oriented medical assistant specializing in insurance plans. Your task is to answer questions to user and recommend only those insurance plans that strictly align with all the needs and preferences provided by the user.
|
| 105 |
Ensure that each recommended plan meets every single requirement specified by the user. Do not recommend plans that only partially meet the requirements.
|
| 106 |
Context:
|
| 107 |
{context}
|
|
|
|
| 116 |
chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
|
| 117 |
)
|
| 118 |
|
| 119 |
+
history_file = "history2.json"
|
|
|
|
| 120 |
|
| 121 |
def load_history():
|
| 122 |
if os.path.exists(history_file):
|
| 123 |
with open(history_file, "r") as f:
|
| 124 |
try:
|
| 125 |
data = json.load(f)
|
| 126 |
+
if isinstance(data, list):
|
| 127 |
return data
|
| 128 |
except json.JSONDecodeError:
|
| 129 |
+
pass
|
| 130 |
return []
|
| 131 |
|
| 132 |
def save_history(history):
|
|
|
|
| 135 |
|
| 136 |
history = load_history()
|
| 137 |
|
| 138 |
+
def summarize_history(history):
|
| 139 |
+
os.environ['GOOGLE_API_KEY'] = "AIzaSyCqEKwd23ztVuk-dkCXypjeHWlcs41aCSM"
|
| 140 |
+
genai.configure(api_key = os.environ['GOOGLE_API_KEY'])
|
| 141 |
+
# Summarize the user's preferences from the chat history
|
| 142 |
+
user_history = "\n".join([entry["USER"] for entry in history])
|
| 143 |
+
prompt = f"Summarize the important points on preferences from the following user history Discard unwanted details only give users preferences :\n\n{user_history}"
|
| 144 |
+
model1=genai.GenerativeModel('gemini-pro')
|
| 145 |
+
|
| 146 |
+
summary_response = model1.generate_content(prompt)
|
| 147 |
+
print(summary_response.text,"summary_response")
|
| 148 |
+
summary = summary_response.text
|
| 149 |
+
return summary
|
| 150 |
+
|
| 151 |
def ask_question(question):
|
| 152 |
global history
|
| 153 |
if question.strip().lower() == "exit":
|
| 154 |
+
history = []
|
| 155 |
save_history(history)
|
| 156 |
return "Hey there! I'm your Medicare assistant. You can ask me questions related to different type of insurances and I'll help you. Let's get started!"
|
| 157 |
+
|
| 158 |
+
with open("./chat_history.txt", "a") as f:
|
| 159 |
+
f.write(f"USER: {question}\n")
|
| 160 |
+
|
| 161 |
result = qa_chain({"query": question})
|
| 162 |
answer = result["result"]
|
| 163 |
+
history.append({"USER": question, "answer": answer})
|
| 164 |
save_history(history)
|
| 165 |
+
|
| 166 |
+
# Summarize the chat history
|
| 167 |
+
summary = summarize_history(history)
|
| 168 |
+
|
| 169 |
+
# Combine the context and summary for the text splitter
|
| 170 |
+
combined_text = context + "\n\n" + "MY PREFERENCES "+summary
|
| 171 |
+
print(combined_text,"combined_text\n\n")
|
| 172 |
+
texts = text_splitter.split_text(combined_text)
|
| 173 |
+
vector_index = Chroma.from_texts(texts, embeddings).as_retriever(search_kwargs={"k": 5})
|
| 174 |
+
|
| 175 |
history_md = ""
|
| 176 |
for entry in history:
|
| 177 |
+
history_md += f"**USER:** {entry['USER']}\n\n**BOT:** {entry['answer']}\n\n---\n\n"
|
| 178 |
return history_md
|
| 179 |
|
|
|
|
| 180 |
initial_history_md = ""
|
| 181 |
if not history:
|
| 182 |
initial_history_md = "Hey there! I'm your Medicare assistant. You can ask me questions related to different type of insurances and I'll help you. Let's get started!"
|
| 183 |
else:
|
| 184 |
for entry in history:
|
| 185 |
+
initial_history_md += f"**USER:** {entry['USER']}\n\n**BOT:** {entry['answer']}\n\n---\n\n"
|
| 186 |
|
|
|
|
| 187 |
with gr.Blocks() as demo:
|
| 188 |
gr.HTML(
|
| 189 |
"""
|
|
|
|
| 205 |
"""
|
| 206 |
)
|
| 207 |
|
|
|
|
| 208 |
history_output = gr.Markdown(value=initial_history_md, elem_classes="scrollable-history")
|
| 209 |
|
|
|
|
| 210 |
with gr.Row(elem_classes="fixed-bottom"):
|
|
|
|
| 211 |
with gr.Column():
|
|
|
|
| 212 |
question_input = gr.Textbox(lines=2, placeholder="Type your question here...", show_label=False)
|
|
|
|
| 213 |
submit_button = gr.Button("Submit")
|
|
|
|
| 214 |
submit_button.click(ask_question, inputs=question_input, outputs=history_output)
|
| 215 |
+
submit_button.click(lambda: "", None, question_input)
|
| 216 |
|
|
|
|
| 217 |
history_output
|
| 218 |
|
| 219 |
demo.launch()
|