prsdm commited on
Commit
fca82a2
Β·
verified Β·
1 Parent(s): b5f5448

Rename app (1).py to app.py

Browse files
Files changed (1) hide show
  1. app (1).py β†’ app.py +4 -85
app (1).py β†’ app.py RENAMED
@@ -1,18 +1,10 @@
1
- """
2
- Question Answering with Retrieval QA and LangChain Language Models featuring FAISS vector stores.
3
- This script uses the LangChain Language Model API to answer questions using Retrieval QA
4
- and FAISS vector stores. It also uses the Mistral huggingface inference endpoint to
5
- generate responses.
6
- """
7
 
8
  import os
9
  import streamlit as st
10
- from dotenv import load_dotenv
11
  from PyPDF2 import PdfReader
12
  from langchain.text_splitter import CharacterTextSplitter
13
  from langchain.embeddings import HuggingFaceBgeEmbeddings
14
  from langchain.vectorstores import FAISS
15
- from langchain.chat_models import ChatOpenAI
16
  from langchain.memory import ConversationBufferMemory
17
  from langchain.chains import ConversationalRetrievalChain
18
  from htmlTemplates import css, bot_template, user_template
@@ -20,20 +12,6 @@ from langchain.llms import HuggingFaceHub
20
 
21
 
22
  def get_pdf_text(pdf_docs):
23
- """
24
- Extract text from a list of PDF documents.
25
-
26
- Parameters
27
- ----------
28
- pdf_docs : list
29
- List of PDF documents to extract text from.
30
-
31
- Returns
32
- -------
33
- str
34
- Extracted text from all the PDF documents.
35
-
36
- """
37
  text = ""
38
  for pdf in pdf_docs:
39
  pdf_reader = PdfReader(pdf)
@@ -43,20 +21,6 @@ def get_pdf_text(pdf_docs):
43
 
44
 
45
  def get_text_chunks(text):
46
- """
47
- Split the input text into chunks.
48
-
49
- Parameters
50
- ----------
51
- text : str
52
- The input text to be split.
53
-
54
- Returns
55
- -------
56
- list
57
- List of text chunks.
58
-
59
- """
60
  text_splitter = CharacterTextSplitter(
61
  separator="\n", chunk_size=1500, chunk_overlap=300, length_function=len
62
  )
@@ -65,20 +29,6 @@ def get_text_chunks(text):
65
 
66
 
67
  def get_vectorstore(text_chunks):
68
- """
69
- Generate a vector store from a list of text chunks using HuggingFace BgeEmbeddings.
70
-
71
- Parameters
72
- ----------
73
- text_chunks : list
74
- List of text chunks to be embedded.
75
-
76
- Returns
77
- -------
78
- FAISS
79
- A FAISS vector store containing the embeddings of the text chunks.
80
-
81
- """
82
  model = "BAAI/bge-base-en-v1.5"
83
  encode_kwargs = {
84
  "normalize_embeddings": True
@@ -91,20 +41,6 @@ def get_vectorstore(text_chunks):
91
 
92
 
93
  def get_conversation_chain(vectorstore):
94
- """
95
- Create a conversational retrieval chain using a vector store and a language model.
96
-
97
- Parameters
98
- ----------
99
- vectorstore : FAISS
100
- A FAISS vector store containing the embeddings of the text chunks.
101
-
102
- Returns
103
- -------
104
- ConversationalRetrievalChain
105
- A conversational retrieval chain for generating responses.
106
-
107
- """
108
  llm = HuggingFaceHub(
109
  repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
110
  model_kwargs={"temperature": 0.5, "max_length": 1048},
@@ -119,45 +55,29 @@ def get_conversation_chain(vectorstore):
119
 
120
 
121
  def handle_userinput(user_question):
122
- """
123
- Handle user input and generate a response using the conversational retrieval chain.
124
- Parameters
125
- ----------
126
- user_question : str
127
- The user's question.
128
- """
129
  response = st.session_state.conversation({"question": user_question})
130
  st.session_state.chat_history = response["chat_history"]
131
 
132
  for i, message in enumerate(st.session_state.chat_history):
133
  if i % 2 == 0:
134
- st.write("//_^ User: " + message.content)
135
  else:
136
  st.write("πŸ€– ChatBot: " + message.content)
137
 
138
 
139
  def main():
140
- """
141
- Putting it all together.
142
- """
143
  st.set_page_config(
144
- page_title="Chat with a Bot that tries to answer questions about multiple PDFs",
145
- page_icon=":books:",
146
  )
147
 
148
- st.markdown("# Chat with a Bot")
149
- st.markdown("This bot tries to answer questions about multiple PDFs. Let the processing of the PDF finish before adding your question. πŸ™πŸΎ")
150
 
151
  st.write(css, unsafe_allow_html=True)
152
 
153
- # set huggingface hub token in st.text_input widget
154
- # then hide the input
155
  huggingface_token = st.text_input("Enter your HuggingFace Hub token", type="password")
156
- #openai_api_key = st.text_input("Enter your OpenAI API key", type="password")
157
 
158
- # set this key as an environment variable
159
  os.environ["HUGGINGFACEHUB_API_TOKEN"] = huggingface_token
160
- #os.environ["OPENAI_API_KEY"] = openai_api_key
161
 
162
 
163
  if "conversation" not in st.session_state:
@@ -165,7 +85,6 @@ def main():
165
  if "chat_history" not in st.session_state:
166
  st.session_state.chat_history = None
167
 
168
- st.header("Chat with a Bot πŸ€–πŸ¦Ύ that tries to answer questions about multiple PDFs :books:")
169
  user_question = st.text_input("Ask a question about your documents:")
170
  if user_question:
171
  handle_userinput(user_question)
 
 
 
 
 
 
 
1
 
2
  import os
3
  import streamlit as st
 
4
  from PyPDF2 import PdfReader
5
  from langchain.text_splitter import CharacterTextSplitter
6
  from langchain.embeddings import HuggingFaceBgeEmbeddings
7
  from langchain.vectorstores import FAISS
 
8
  from langchain.memory import ConversationBufferMemory
9
  from langchain.chains import ConversationalRetrievalChain
10
  from htmlTemplates import css, bot_template, user_template
 
12
 
13
 
14
  def get_pdf_text(pdf_docs):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  text = ""
16
  for pdf in pdf_docs:
17
  pdf_reader = PdfReader(pdf)
 
21
 
22
 
23
  def get_text_chunks(text):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  text_splitter = CharacterTextSplitter(
25
  separator="\n", chunk_size=1500, chunk_overlap=300, length_function=len
26
  )
 
29
 
30
 
31
  def get_vectorstore(text_chunks):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  model = "BAAI/bge-base-en-v1.5"
33
  encode_kwargs = {
34
  "normalize_embeddings": True
 
41
 
42
 
43
  def get_conversation_chain(vectorstore):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  llm = HuggingFaceHub(
45
  repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
46
  model_kwargs={"temperature": 0.5, "max_length": 1048},
 
55
 
56
 
57
  def handle_userinput(user_question):
 
 
 
 
 
 
 
58
  response = st.session_state.conversation({"question": user_question})
59
  st.session_state.chat_history = response["chat_history"]
60
 
61
  for i, message in enumerate(st.session_state.chat_history):
62
  if i % 2 == 0:
63
+ st.write("πŸ‘€ User: " + message.content)
64
  else:
65
  st.write("πŸ€– ChatBot: " + message.content)
66
 
67
 
68
  def main():
 
 
 
69
  st.set_page_config(
70
+ page_title="Chat with multiple PDFs",
71
+ page_icon="πŸ“„",
72
  )
73
 
74
+ st.markdown("# Chat with multiple PDFs πŸ“„")
 
75
 
76
  st.write(css, unsafe_allow_html=True)
77
 
 
 
78
  huggingface_token = st.text_input("Enter your HuggingFace Hub token", type="password")
 
79
 
 
80
  os.environ["HUGGINGFACEHUB_API_TOKEN"] = huggingface_token
 
81
 
82
 
83
  if "conversation" not in st.session_state:
 
85
  if "chat_history" not in st.session_state:
86
  st.session_state.chat_history = None
87
 
 
88
  user_question = st.text_input("Ask a question about your documents:")
89
  if user_question:
90
  handle_userinput(user_question)