InnoAIWarrior commited on
Commit
6ccbc82
·
1 Parent(s): f847e34

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +95 -1
  2. env +2 -0
  3. htmlTemplates.py +44 -0
app.py CHANGED
@@ -1,7 +1,101 @@
1
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  def main():
4
- st.write("Hello world")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  if __name__ == '__main__':
7
  main()
 
1
  import streamlit as st
2
+ from dotenv import load_dotenv
3
+ from PyPDF2 import PdfReader
4
+ from langchain.text_splitter import CharacterTextSplitter
5
+ from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
6
+ from langchain.vectorstores import FAISS
7
+ from langchain.chat_models import ChatOpenAI
8
+ from langchain.memory import ConversationBufferMemory
9
+ from langchain.chains import ConversationalRetrievalChain
10
+ from htmlTemplates import css, bot_template, user_template
11
+ from langchain.llms import HuggingFaceHub
12
+
13
+ def get_pdf_text(pdf_docs):
14
+ text = ""
15
+ for pdf in pdf_docs:
16
+ pdf_reader = PdfReader(pdf)
17
+ for page in pdf_reader.pages:
18
+ text += page.extract_text()
19
+ return text
20
+
21
+
22
+ def get_text_chunks(text):
23
+ text_splitter = CharacterTextSplitter(
24
+ separator="\n",
25
+ chunk_size=1000,
26
+ chunk_overlap=200,
27
+ length_function=len
28
+ )
29
+ chunks = text_splitter.split_text(text)
30
+ return chunks
31
+
32
+
33
+ def get_vectorstore(text_chunks):
34
+ embeddings = OpenAIEmbeddings()
35
+ # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
36
+ vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
37
+ return vectorstore
38
+
39
+
40
+ def get_conversation_chain(vectorstore):
41
+ llm = ChatOpenAI()
42
+ # llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
43
+
44
+ memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
45
+ conversation_chain = ConversationalRetrievalChain.from_llm(
46
+ llm=llm,
47
+ retriever=vectorstore.as_retriever(),
48
+ memory=memory
49
+ )
50
+ return conversation_chain
51
+
52
+
53
+ def handle_userinput(user_question):
54
+ response = st.session_state.conversation({'question': user_question})
55
+ st.session_state.chat_history = response['chat_history']
56
+
57
+ for i, message in enumerate(st.session_state.chat_history):
58
+ if i % 2 == 0:
59
+ st.write(user_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
60
+ else:
61
+ st.write(bot_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
62
+
63
 
64
  def main():
65
+ load_dotenv()
66
+ st.set_page_config(page_title="AIusBOT",
67
+ page_icon=":pdf:")
68
+ st.write(css, unsafe_allow_html=True)
69
+
70
+ if "conversation" not in st.session_state:
71
+ st.session_state.conversation = None
72
+ if "chat_history" not in st.session_state:
73
+ st.session_state.chat_history = None
74
+
75
+ st.header("AIusBOT :pdf:")
76
+ user_question = st.text_input("Ask a question about your PDF?:")
77
+ if user_question:
78
+ handle_userinput(user_question)
79
+
80
+ with st.sidebar:
81
+ st.subheader("Your documents")
82
+ pdf_docs = st.file_uploader(
83
+ "Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
84
+ if st.button("Process"):
85
+ with st.spinner("Processing"):
86
+ # get pdf text
87
+ raw_text = get_pdf_text(pdf_docs)
88
+
89
+ # get the text chunks
90
+ text_chunks = get_text_chunks(raw_text)
91
+ #st.write(text_chunks)
92
+
93
+ # create vector store
94
+ vectorstore = get_vectorstore(text_chunks)
95
+
96
+ # create conversation chain
97
+ st.session_state.conversation = get_conversation_chain(vectorstore)
98
+
99
 
100
  if __name__ == '__main__':
101
  main()
env ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ OPENAI_API_KEY=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
2
+ HUGGINGFACEHUB_API_TOKEN=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
htmlTemplates.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ css = '''
2
+ <style>
3
+ .chat-message {
4
+ padding: 1.5rem; border-radius: 0.5rem; margin-bottom: 1rem; display: flex
5
+ }
6
+ .chat-message.user {
7
+ background-color: #2b313e
8
+ }
9
+ .chat-message.bot {
10
+ background-color: #475063
11
+ }
12
+ .chat-message .avatar {
13
+ width: 20%;
14
+ }
15
+ .chat-message .avatar img {
16
+ max-width: 78px;
17
+ max-height: 78px;
18
+ border-radius: 50%;
19
+ object-fit: cover;
20
+ }
21
+ .chat-message .message {
22
+ width: 80%;
23
+ padding: 0 1.5rem;
24
+ color: #fff;
25
+ }
26
+ '''
27
+
28
+ bot_template = '''
29
+ <div class="chat-message bot">
30
+ <div class="avatar">
31
+ <img src="https://images.squarespace-cdn.com/content/v1/56000e61e4b0009a00ff8cf5/f2f1bbb8-f0a6-45cb-add0-a2a3e82c1cda/TNQ-Logo-%2810cm%29.png?format=78w" style="max-height: 78px; max-width: 78px; border-radius: 50%; object-fit: cover;">
32
+ </div>
33
+ <div class="message">{{MSG}}</div>
34
+ </div>
35
+ '''
36
+
37
+ user_template = '''
38
+ <div class="chat-message user">
39
+ <div class="avatar">
40
+ <img src="https://gravatar.com/avatar/efe06f34526ba62f479f33b1bd05c9cf?s=200&d=mp&r=r">
41
+ </div>
42
+ <div class="message">{{MSG}}</div>
43
+ </div>
44
+ '''