File size: 7,869 Bytes
71000f9
de51edb
 
71000f9
71e7de4
71000f9
 
71e7de4
71000f9
 
e185ce5
71e7de4
de51edb
7038376
e954fd0
71000f9
e185ce5
e954fd0
3148aad
2d05ef2
81edbc6
71000f9
 
de51edb
71000f9
 
 
 
 
 
 
 
fc8d9ff
 
 
 
 
 
81edbc6
e954fd0
 
 
 
 
 
 
 
 
fc8d9ff
e58e77c
 
 
 
 
cb0d755
e58e77c
cb0d755
e58e77c
 
 
e954fd0
81edbc6
d550535
e954fd0
71000f9
 
e954fd0
71000f9
81edbc6
3040bbb
e954fd0
 
 
71000f9
e954fd0
71000f9
81edbc6
142f7d1
71000f9
 
13ea200
e58e77c
 
 
de51edb
142f7d1
e58e77c
de51edb
 
 
e58e77c
e185ce5
71000f9
e185ce5
7b4a2fc
71e7de4
 
71d856b
71000f9
 
 
b43c382
71000f9
 
 
 
 
 
2c9ed37
 
81edbc6
476f8f1
de51edb
 
 
 
884f183
 
e954fd0
884f183
 
e954fd0
de51edb
2c9ed37
de51edb
 
 
 
 
 
71e7de4
13ea200
71e7de4
13ea200
 
71e7de4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13ea200
 
71e7de4
 
 
e954fd0
e185ce5
81edbc6
 
6adfa18
e954fd0
db42f47
13ea200
81edbc6
 
 
 
 
 
 
e954fd0
 
 
71000f9
e185ce5
e954fd0
de51edb
13ea200
 
 
 
 
 
 
e954fd0
e185ce5
de51edb
e954fd0
e185ce5
 
de51edb
884f183
81edbc6
884f183
 
e954fd0
3148aad
bc7bbda
929bd99
 
 
 
 
 
 
 
 
 
 
 
 
81edbc6
929bd99
 
 
 
 
 
de51edb
929bd99
de51edb
 
 
 
e954fd0
de51edb
 
 
fc8d9ff
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
import warnings
import os
import json
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import gradio as gr

import PyPDF2
import csv
import google.generativeai as genai

warnings.filterwarnings("ignore")
global context


# Define functions for extracting text from different file types
def extract_text_from_pdf(pdf_path):
    with open(pdf_path, "rb") as f:
        pdf_reader = PyPDF2.PdfReader(f)
        text = ""
        for page_num in range(len(pdf_reader.pages)):
            page = pdf_reader.pages[page_num]
            text += page.extract_text()
        return text


def extract_text_from_txt(txt_path):
    try:
        with open(txt_path, "r", encoding='utf-8') as f:
            return f.read()
    except UnicodeDecodeError:
        with open(txt_path, "r", encoding='latin-1') as f:
            return f.read()

def extract_text_from_json(json_path):
    with open(json_path, "r", encoding='utf-8') as f:
        try:
            data = json.load(f)
            if not data:  
                return ""
            return json.dumps(data, indent=4)
        except json.JSONDecodeError:
            return ""

def read_and_structure_csv(csv_path):
    structured_data = []
    with open(csv_path, mode='r', encoding='utf-8-sig') as file:
        csv_reader = csv.DictReader(file)
        for row in csv_reader:
            plan_details = f"plan_type: {row['plan_type']}\n"
            for key, value in row.items():
                if key != 'plan_type':
                    plan_details += f"  - **{key.replace('_', ' ').title()}**: {value}\n"
            structured_data.append(plan_details)
    return "\n\n".join(structured_data)

# Initial setup: load files and extract text
file_paths = ["./Final Medigap  - Medigap Generic Plan Details - Medigap Generic Plan Details CSV.csv","finalll - Sheet1.csv"]
texts1 = []
for path in file_paths:
    if path.endswith(".pdf"):
        texts1.append(extract_text_from_pdf(path))
    elif path.endswith(".txt"):
        texts1.append(extract_text_from_txt(path))
    elif path.endswith(".csv"):
        texts1.append(read_and_structure_csv(path))
    elif path.endswith(".json"):
        texts1.append(extract_text_from_json(path))

context = "\n\n".join(texts1)

# Initialize text splitter and vector index
text_splitter = RecursiveCharacterTextSplitter(chunk_size=11000, chunk_overlap=1700)
texts = text_splitter.split_text(context)

api_key = "AIzaSyD4_iUnGy_ySlp1NCprIjhHL3iw25Ypreo"
if not api_key:
    raise ValueError("API key not found. Please set your GEMINI_API_KEY in the environment.")

model = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro", 
    google_api_key=api_key,
    temperature=0.1, 
    convert_system_message_to_human=True
)
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
vector_index = Chroma.from_texts(texts, embeddings).as_retriever(search_kwargs={"k": 5})

# Create QA chain
template = """You are a highly knowledgeable and detail-oriented medical assistant specializing in recommending insurance plans.\n
Ensure that each recommended plan meets every single requirement specified by the user.\n
Use only the information provided in the context. Do not generate any information that is not explicitly mentioned in the context\n
Context:
{context}
Question: {question}
Helpful Answer:"""

QA_CHAIN_PROMPT = PromptTemplate.from_template(template)
qa_chain = RetrievalQA.from_chain_type(
    model,
    retriever=vector_index,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

# History management
history_file = "./history2.json"

def load_history():
    if os.path.exists(history_file):
        with open(history_file, "r") as f:
            try:
                data = json.load(f)
                if isinstance(data, list):  
                    return data
            except json.JSONDecodeError:
                pass  
    return []

def save_history(history):
    with open(history_file, "w") as f:
        json.dump(history, f, indent=4)

history = load_history()

def userPreference():
    genai.configure(api_key="AIzaSyD4_iUnGy_ySlp1NCprIjhHL3iw25Ypreo")

    user_history = extract_text_from_json("./history2.json")
    print(user_history,"user history\n\n\n")
    generation_config = {
    "temperature": 0.9,
    "top_p": 1,
    "max_output_tokens": 2048,
    "response_mime_type": "text/plain",
    }

    model1 = genai.GenerativeModel(
    model_name="gemini-1.0-pro",
    generation_config=generation_config,

    )

    chat_session = model1.start_chat(
    history=[]
        )

    response = chat_session.send_message(f"""{str(user_history)}
    Take user's preference only if they mention as want ,prefer or preference, etc.List user's preference.when user ask's to list something don't take it as preference.
    """)
    print("\n\n\n",response.text,"response \n\n")
    return response.text

def ask_question(question):
    global history, context, vector_index
    
    if question.strip().lower() == "exit":
        history = []  
        save_history(history)
       
        # Reinitialize context and vector index
        context = "\n\n".join(texts1)
        texts = text_splitter.split_text(context)
        vector_index = Chroma.from_texts(texts, embeddings).as_retriever(search_kwargs={"k": 5})

        return "Hey there! I'm your Medicare assistant. You can ask me questions related to different types of insurances and I'll help you. Let's get started!"

    with open("./chat_history.txt", "a") as f:
        f.write(f"USER: {question}\n")

    result = qa_chain({"query": question})
    answer = result["result"]
    history.append({"USER": question, "answer": answer})
    save_history(history)
     
    pref = userPreference()
    print("\n\n",pref,"pref\n\n\n")
    context = "\n\n".join(texts1)
    context1 = context +"USER'S PREFERENCE"+pref
    texts = text_splitter.split_text(context1)
    vector_index = Chroma.from_texts(texts, embeddings).as_retriever(search_kwargs={"k": 5})

    history_md = ""
    for entry in history:
        history_md += f"**USER:** {entry['USER']}\n\n**BOT:** {entry['answer']}\n\n---\n\n"
    return history_md

initial_history_md = ""
if not history:
    initial_history_md = "Hey there! I'm your Medicare assistant. You can ask me questions related to different types of insurances and I'll help you. Let's get started!"
else:
    for entry in history:
        initial_history_md += f"**USER:** {entry['USER']}\n\n**BOT:** {entry['answer']}\n\n---\n\n"

with gr.Blocks() as demo:
    gr.HTML(
        """
        <style>
            .fixed-bottom {
                position: fixed;
                bottom: 0;
                width: 100%;
                padding: 10px;
                box-shadow: 0 -1px 10px rgba(0, 0, 0, 0.1);
            }
            .scrollable-history {
                max-height: 80vh;
                overflow-y: auto;
                margin-bottom: 100px;
            }
        </style>
        """
    )
    
    history_output = gr.Markdown(value=initial_history_md, elem_classes="scrollable-history")
    
    with gr.Row(elem_classes="fixed-bottom"):
        with gr.Column():
            question_input = gr.Textbox(lines=2, placeholder="Type your question here...", show_label=False)
            submit_button = gr.Button("Submit")
            submit_button.click(ask_question, inputs=question_input, outputs=history_output)
            submit_button.click(lambda: "", None, question_input)  
    
    history_output

demo.launch()