Dua Rajper commited on
Commit
3740dca
·
verified ·
1 Parent(s): f1f3635

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -41
app.py CHANGED
@@ -4,13 +4,11 @@ from dotenv import load_dotenv
4
  import streamlit as st
5
  from PyPDF2 import PdfReader
6
  from langchain.text_splitter import CharacterTextSplitter
7
- # from langchain.embeddings import HuggingFaceInstructEmbeddings
8
- from langchain_cohere import CohereEmbeddings
9
  from langchain.vectorstores import FAISS
10
  from langchain.memory import ConversationBufferMemory
11
  from langchain.chains import ConversationalRetrievalChain
12
- # from langchain.llms import Ollama
13
- from langchain_groq import ChatGroq
14
 
15
  # Load environment variables
16
  load_dotenv()
@@ -21,16 +19,14 @@ logging.basicConfig(
21
  format='%(asctime)s - %(levelname)s - %(message)s'
22
  )
23
 
24
- # Function to extract text from PDF files
25
  def get_pdf_text(pdf_docs):
26
  text = ""
27
  for pdf in pdf_docs:
28
  pdf_reader = PdfReader(pdf)
29
  for page in pdf_reader.pages:
30
- text += page.extract_text()
31
  return text
32
 
33
- # Function to split the extracted text into chunks
34
  def get_text_chunks(text):
35
  text_splitter = CharacterTextSplitter(
36
  separator="\n",
@@ -38,74 +34,54 @@ def get_text_chunks(text):
38
  chunk_overlap=200,
39
  length_function=len
40
  )
41
- chunks = text_splitter.split_text(text)
42
- return chunks
43
-
44
- # Function to create a FAISS vectorstore
45
- # def get_vectorstore(text_chunks):
46
- # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
47
- # vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
48
- # return vectorstore
49
 
50
  def get_vectorstore(text_chunks):
51
- cohere_api_key = os.getenv("COHERE_API_KEY")
52
- embeddings = CohereEmbeddings(model="embed-english-v3.0", cohere_api_key=cohere_api_key)
53
- vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
54
- return vectorstore
55
 
56
- # Function to set up the conversational retrieval chain
57
  def get_conversation_chain(vectorstore):
58
  try:
59
- # llm = Ollama(model="llama3.2:1b")
60
- llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0.5)
61
  memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
62
 
63
- conversation_chain = ConversationalRetrievalChain.from_llm(
64
  llm=llm,
65
  retriever=vectorstore.as_retriever(),
66
  memory=memory
67
  )
68
-
69
- logging.info("Conversation chain created successfully.")
70
- return conversation_chain
71
  except Exception as e:
72
  logging.error(f"Error creating conversation chain: {e}")
73
  st.error("An error occurred while setting up the conversation chain.")
 
74
 
75
- # Handle user input
76
  def handle_userinput(user_question):
77
- if st.session_state.conversation is not None:
78
  response = st.session_state.conversation({'question': user_question})
79
  st.session_state.chat_history = response['chat_history']
80
-
81
  for i, message in enumerate(st.session_state.chat_history):
82
- if i % 2 == 0:
83
- st.write(f"*User:* {message.content}")
84
- else:
85
- st.write(f"*Bot:* {message.content}")
86
  else:
87
  st.warning("Please process the documents first.")
88
 
89
- # Main function to run the Streamlit app
90
  def main():
91
- load_dotenv()
92
- st.set_page_config(page_title="Chat with multiple PDFs", page_icon=":books:")
93
 
94
  if "conversation" not in st.session_state:
95
  st.session_state.conversation = None
96
  if "chat_history" not in st.session_state:
97
  st.session_state.chat_history = None
98
 
99
- st.header("Chat with multiple PDFs :books:")
100
  user_question = st.text_input("Ask a question about your documents:")
101
  if user_question:
102
  handle_userinput(user_question)
103
 
104
  with st.sidebar:
105
  st.subheader("Your documents")
106
- pdf_docs = st.file_uploader(
107
- "Upload your PDFs here and click on 'Process'", accept_multiple_files=True
108
- )
109
  if st.button("Process"):
110
  with st.spinner("Processing..."):
111
  raw_text = get_pdf_text(pdf_docs)
@@ -114,4 +90,4 @@ def main():
114
  st.session_state.conversation = get_conversation_chain(vectorstore)
115
 
116
  if __name__ == '__main__':
117
- main()
 
4
  import streamlit as st
5
  from PyPDF2 import PdfReader
6
  from langchain.text_splitter import CharacterTextSplitter
7
+ from langchain.embeddings import HuggingFaceEmbeddings
 
8
  from langchain.vectorstores import FAISS
9
  from langchain.memory import ConversationBufferMemory
10
  from langchain.chains import ConversationalRetrievalChain
11
+ from langchain.llms import HuggingFaceHub
 
12
 
13
  # Load environment variables
14
  load_dotenv()
 
19
  format='%(asctime)s - %(levelname)s - %(message)s'
20
  )
21
 
 
22
  def get_pdf_text(pdf_docs):
23
  text = ""
24
  for pdf in pdf_docs:
25
  pdf_reader = PdfReader(pdf)
26
  for page in pdf_reader.pages:
27
+ text += page.extract_text() or ""
28
  return text
29
 
 
30
  def get_text_chunks(text):
31
  text_splitter = CharacterTextSplitter(
32
  separator="\n",
 
34
  chunk_overlap=200,
35
  length_function=len
36
  )
37
+ return text_splitter.split_text(text)
 
 
 
 
 
 
 
38
 
39
  def get_vectorstore(text_chunks):
40
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
41
+ return FAISS.from_texts(texts=text_chunks, embedding=embeddings)
 
 
42
 
 
43
  def get_conversation_chain(vectorstore):
44
  try:
45
+ llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.1", model_kwargs={"temperature": 0.5})
 
46
  memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
47
 
48
+ return ConversationalRetrievalChain.from_llm(
49
  llm=llm,
50
  retriever=vectorstore.as_retriever(),
51
  memory=memory
52
  )
 
 
 
53
  except Exception as e:
54
  logging.error(f"Error creating conversation chain: {e}")
55
  st.error("An error occurred while setting up the conversation chain.")
56
+ return None
57
 
 
58
  def handle_userinput(user_question):
59
+ if st.session_state.conversation:
60
  response = st.session_state.conversation({'question': user_question})
61
  st.session_state.chat_history = response['chat_history']
62
+
63
  for i, message in enumerate(st.session_state.chat_history):
64
+ role = "User" if i % 2 == 0 else "Bot"
65
+ st.write(f"*{role}:* {message.content}")
 
 
66
  else:
67
  st.warning("Please process the documents first.")
68
 
 
69
  def main():
70
+ st.set_page_config(page_title="Chat with PDFs", page_icon=":books:")
 
71
 
72
  if "conversation" not in st.session_state:
73
  st.session_state.conversation = None
74
  if "chat_history" not in st.session_state:
75
  st.session_state.chat_history = None
76
 
77
+ st.header("Chat with PDFs :books:")
78
  user_question = st.text_input("Ask a question about your documents:")
79
  if user_question:
80
  handle_userinput(user_question)
81
 
82
  with st.sidebar:
83
  st.subheader("Your documents")
84
+ pdf_docs = st.file_uploader("Upload PDFs and click 'Process'", accept_multiple_files=True)
 
 
85
  if st.button("Process"):
86
  with st.spinner("Processing..."):
87
  raw_text = get_pdf_text(pdf_docs)
 
90
  st.session_state.conversation = get_conversation_chain(vectorstore)
91
 
92
  if __name__ == '__main__':
93
+ main()