Yash Sharma commited on
Commit
d31ac17
·
1 Parent(s): 873c82e

Added app.py file

Browse files
Files changed (1) hide show
  1. app.py +321 -0
app.py ADDED
@@ -0,0 +1,321 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # # import streamlit as st
2
+ # # # from dotenv import load_dotenv
3
+ # # # from PyPDF2 import PdfReader
4
+ # # # from langchain.text_splitter import CharacterTextSplitter
5
+ # # # from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
6
+ # # # from langchain.vectorstores import FAISS
7
+ # # # from langchain.chat_models import ChatOpenAI
8
+ # # # from langchain.memory import ConversationBufferMemory
9
+ # # # from langchain.chains import ConversationalRetrievalChain
10
+ # # # from htmlTemplates import css, bot_template, user_template
11
+ # # # from langchain.llms import HuggingFaceHub
12
+
13
+ # # # def get_pdf_text(pdf_docs):
14
+ # # # text = ""
15
+ # # # for pdf in pdf_docs:
16
+ # # # pdf_reader = PdfReader(pdf)
17
+ # # # for page in pdf_reader.pages:
18
+ # # # text += page.extract_text()
19
+ # # # return text
20
+
21
+
22
+ # # # def get_text_chunks(text):
23
+ # # # text_splitter = CharacterTextSplitter(
24
+ # # # separator="\n",
25
+ # # # chunk_size=1000,
26
+ # # # chunk_overlap=200,
27
+ # # # length_function=len
28
+ # # # )
29
+ # # # chunks = text_splitter.split_text(text)
30
+ # # # return chunks
31
+
32
+
33
+ # # # def get_vectorstore(text_chunks):
34
+ # # # embeddings = OpenAIEmbeddings()
35
+ # # # # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
36
+ # # # vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
37
+ # # # return vectorstore
38
+
39
+
40
+ # # # def get_conversation_chain(vectorstore):
41
+ # # # llm = ChatOpenAI()
42
+ # # # # llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
43
+
44
+ # # # memory = ConversationBufferMemory(
45
+ # # # memory_key='chat_history', return_messages=True)
46
+ # # # conversation_chain = ConversationalRetrievalChain.from_llm(
47
+ # # # llm=llm,
48
+ # # # retriever=vectorstore.as_retriever(),
49
+ # # # memory=memory
50
+ # # # )
51
+ # # # return conversation_chain
52
+
53
+
54
+ # # # def handle_userinput(user_question):
55
+ # # # response = st.session_state.conversation({'question': user_question})
56
+ # # # st.session_state.chat_history = response['chat_history']
57
+
58
+ # # # for i, message in enumerate(st.session_state.chat_history):
59
+ # # # if i % 2 == 0:
60
+ # # # st.write(user_template.replace(
61
+ # # # "{{MSG}}", message.content), unsafe_allow_html=True)
62
+ # # # else:
63
+ # # # st.write(bot_template.replace(
64
+ # # # "{{MSG}}", message.content), unsafe_allow_html=True)
65
+
66
+
67
+ # # # def main():
68
+ # # # load_dotenv()
69
+ # # # st.set_page_config(page_title="Mental Health Support",
70
+ # # # page_icon=":books:")
71
+ # # # st.write(css, unsafe_allow_html=True)
72
+
73
+ # # # if "conversation" not in st.session_state:
74
+ # # # st.session_state.conversation = None
75
+ # # # if "chat_history" not in st.session_state:
76
+ # # # st.session_state.chat_history = None
77
+
78
+ # # # st.header("Mental Health Support :brain:")
79
+ # # # user_question = st.text_input("Ask a question about your documents:")
80
+ # # # if user_question:
81
+ # # # handle_userinput(user_question)
82
+
83
+ # # # with st.sidebar:
84
+ # # # st.subheader("Your documents")
85
+ # # # pdf_docs = st.file_uploader(
86
+ # # # "Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
87
+ # # # if st.button("Process"):
88
+ # # # with st.spinner("Processing"):
89
+ # # # # get pdf text
90
+ # # # raw_text = get_pdf_text(pdf_docs)
91
+
92
+ # # # # get the text chunks
93
+ # # # text_chunks = get_text_chunks(raw_text)
94
+
95
+ # # # # create vector store
96
+ # # # vectorstore = get_vectorstore(text_chunks)
97
+
98
+ # # # # create conversation chain
99
+ # # # st.session_state.conversation = get_conversation_chain(
100
+ # # # vectorstore)
101
+
102
+
103
+
104
+ # # # if __name__ == '__main__':
105
+ # # # main()
106
+ # # # import streamlit as st
107
+ # # # from dotenv import load_dotenv
108
+ # # # from PyPDF2 import PdfReader
109
+ # # # from langchain.text_splitter import CharacterTextSplitter
110
+ # # # from langchain.embeddings import OpenAIEmbeddings
111
+ # # # # from langchain.embeddings import HuggingFaceInstructEmbeddings
112
+ # # # from langchain.vectorstores import FAISS
113
+ # # # from langchain.chat_models import ChatOpenAI
114
+ # # # from langchain.memory import ConversationBufferMemory
115
+ # # # from langchain.chains import ConversationalRetrievalChain
116
+ # # # from htmlTemplates import css, bot_template, user_template
117
+ # # # # from langchain.llms import HuggingFaceHub
118
+ # # # # from streamlit_option_menu import option_menu
119
+ # # # import pyttsx3
120
+
121
+ # # # def get_pdf_text(pdf_paths):
122
+ # # # text = ""
123
+ # # # for pdf_path in pdf_paths:
124
+ # # # with open(pdf_path, 'rb') as pdf_file:
125
+ # # # pdf_reader = PdfReader(pdf_file)
126
+ # # # for page in pdf_reader.pages:
127
+ # # # text += page.extract_text()
128
+ # # # return text
129
+
130
+ # # # def get_text_chunks(text):
131
+ # # # text_splitter = CharacterTextSplitter(
132
+ # # # separator="\n",
133
+ # # # chunk_size=1000,
134
+ # # # chunk_overlap=200,
135
+ # # # length_function=len
136
+ # # # )
137
+ # # # chunks = text_splitter.split_text(text)
138
+ # # # return chunks
139
+
140
+ # # # def get_vectorstore(text_chunks):
141
+ # # # embeddings = OpenAIEmbeddings()
142
+ # # # #embeddings = HuggingFaceInstructEmbeddings(model_name="nomic-ai/gpt4all-j")
143
+ # # # vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
144
+ # # # return vectorstore
145
+
146
+ # # # def get_conversation_chain(vectorstore):
147
+ # # # llm = ChatOpenAI()
148
+ # # # #llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
149
+ # # # memory = ConversationBufferMemory(
150
+ # # # memory_key='chat_history', return_messages=True)
151
+ # # # conversation_chain = ConversationalRetrievalChain.from_llm(
152
+ # # # llm=llm,
153
+ # # # retriever=vectorstore.as_retriever(),
154
+ # # # memory=memory
155
+ # # # )
156
+ # # # return conversation_chain
157
+
158
+
159
+
160
+ # # # def handle_userinput(user_question):
161
+ # # # response = st.session_state.conversation({'question': user_question})
162
+ # # # st.session_state.chat_history = response['chat_history']
163
+
164
+
165
+ # # # for i, message in enumerate(st.session_state.chat_history):
166
+ # # # if i % 2 == 0:
167
+ # # # st.write(user_template.replace(
168
+ # # # "{{MSG}}", message.content), unsafe_allow_html=True)
169
+ # # # else:
170
+ # # # st.write(bot_template.replace(
171
+ # # # "{{MSG}}", message.content), unsafe_allow_html=True)
172
+
173
+ # # # engine = pyttsx3.init()
174
+ # # # engine.say(response['answer'])
175
+ # # # engine.runAndWait()
176
+
177
+ # # # def main():
178
+ # # # load_dotenv()
179
+ # # # st.set_page_config(page_title="Mental Health Support", page_icon=":brain:")
180
+ # # # st.write(css, unsafe_allow_html=True)
181
+
182
+
183
+ # # # if "conversation" not in st.session_state:
184
+ # # # st.session_state.conversation = None
185
+ # # # if "chat_history" not in st.session_state:
186
+ # # # st.session_state.chat_history = None
187
+
188
+ # # # st.header("Mental Health Support :brain:")
189
+ # # # pdf_paths = [
190
+ # # # 'C:/Users/sharm/Downloads/ask-multiple-pdfs-main/ask-multiple-pdfs-main/Chat_data.pdf',
191
+ # # # 'C:/Users/sharm/Downloads/ask-multiple-pdfs-main/ask-multiple-pdfs-main/class 10 history ch 3.pdf'
192
+ # # # ]
193
+
194
+
195
+ # # # # get pdf text
196
+ # # # raw_text = get_pdf_text(pdf_paths)
197
+
198
+ # # # # get the text chunks
199
+ # # # text_chunks = get_text_chunks(raw_text)
200
+
201
+ # # # # create vector store
202
+ # # # vectorstore = get_vectorstore(text_chunks)
203
+
204
+ # # # # create conversation chain
205
+ # # # st.session_state.conversation = get_conversation_chain(vectorstore)
206
+
207
+ # # # user_question = st.text_input("Your therapist is there for you!:")
208
+ # # # if user_question and st.session_state.conversation:
209
+ # # # handle_userinput(user_question)
210
+
211
+ # # # if __name__ == '__main__':
212
+ # # # main()
213
+ import streamlit as st
214
+ from dotenv import load_dotenv
215
+ from PyPDF2 import PdfReader
216
+ from langchain.text_splitter import CharacterTextSplitter
217
+ from langchain.embeddings import OpenAIEmbeddings,HuggingFaceInstructEmbeddings
218
+ from langchain.vectorstores import FAISS
219
+ from langchain.chat_models import ChatOpenAI
220
+ from langchain.memory import ConversationBufferMemory
221
+ from langchain.chains import ConversationalRetrievalChain
222
+ from langchain.llms import HuggingFaceHub
223
+ from htmlTemplates import css, bot_template, user_template
224
+ #from InstructorEmbedding import INSTRUCTOR
225
+ import tempfile
226
+ import ttsmms
227
+ import soundfile as sf
228
+ from streamlit.components.v1 import html
229
+
230
+ def get_pdf_text(pdf_paths):
231
+ text = ""
232
+ for pdf_path in pdf_paths:
233
+ with open(pdf_path, 'rb') as pdf_file:
234
+ pdf_reader = PdfReader(pdf_file)
235
+ for page in pdf_reader.pages:
236
+ text += page.extract_text()
237
+ return text
238
+
239
+ def get_text_chunks(text):
240
+ text_splitter = CharacterTextSplitter(
241
+ separator="\n",
242
+ chunk_size=1000,
243
+ chunk_overlap=200,
244
+ length_function=len
245
+ )
246
+ chunks = text_splitter.split_text(text)
247
+ return chunks
248
+
249
+ def get_vectorstore(text_chunks):
250
+ #embeddings = OpenAIEmbeddings()
251
+ embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-base")
252
+ vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
253
+ return vectorstore
254
+
255
+ def get_conversation_chain(vectorstore):
256
+ llm = ChatOpenAI()
257
+ memory = ConversationBufferMemory(
258
+ memory_key='chat_history', return_messages=True)
259
+ conversation_chain = ConversationalRetrievalChain.from_llm(
260
+ llm=llm,
261
+ retriever=vectorstore.as_retriever(),
262
+ memory=memory
263
+ )
264
+ return conversation_chain
265
+
266
+ def handle_userinput(user_question):
267
+ response = st.session_state.conversation({'question': user_question})
268
+ st.session_state.chat_history = response['chat_history']
269
+
270
+ for i, message in enumerate(st.session_state.chat_history):
271
+ if i % 2 == 0:
272
+ st.write(user_template.replace(
273
+ "{{MSG}}", message.content), unsafe_allow_html=True)
274
+ else:
275
+ st.write(bot_template.replace(
276
+ "{{MSG}}", message.content), unsafe_allow_html=True)
277
+
278
+ audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
279
+ tts = ttsmms.TTS("data/eng") # Update with the correct path
280
+ wav = tts.synthesis(response['answer'])
281
+ sf.write(audio_path, wav["x"], wav["sampling_rate"])
282
+
283
+ st.audio(audio_path, format="audio/wav", start_time=0, sample_rate=wav["sampling_rate"])
284
+
285
+ def main():
286
+ load_dotenv()
287
+ st.set_page_config(page_title="Mental Health Support", page_icon=":brain:")
288
+ st.write(css, unsafe_allow_html=True)
289
+
290
+ if "conversation" not in st.session_state:
291
+ st.session_state.conversation = None
292
+ if "chat_history" not in st.session_state:
293
+ st.session_state.chat_history = None
294
+
295
+ st.header("Mental Health Support :brain:")
296
+ pdf_paths = [
297
+ 'C:/Users/sharm/Downloads/ask-multiple-pdfs-main/ask-multiple-pdfs-main/Chat_data.pdf',
298
+ 'C:/Users/sharm/Downloads/ask-multiple-pdfs-main/ask-multiple-pdfs-main/class 10 history ch 3.pdf'
299
+ ]
300
+
301
+ raw_text = get_pdf_text(pdf_paths)
302
+ text_chunks = get_text_chunks(raw_text)
303
+ vectorstore = get_vectorstore(text_chunks)
304
+ st.session_state.conversation = get_conversation_chain(vectorstore)
305
+
306
+ user_question = st.text_input("Your therapist is there for you!:")
307
+ if user_question and st.session_state.conversation:
308
+ handle_userinput(user_question)
309
+
310
+ if __name__ == '__main__':
311
+ main()
312
+ # my_js = """
313
+ # alert("Please don't forget to enter you daily details!!!");
314
+ # """
315
+
316
+ # # Wrapt the javascript as html code
317
+ # my_html = f"<script>{my_js}</script>"
318
+
319
+ # # Execute your app
320
+
321
+ # html(my_html)