yousifalishah commited on
Commit
517d6f4
·
verified ·
1 Parent(s): 0cc78f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -23
app.py CHANGED
@@ -4,11 +4,11 @@ from dotenv import load_dotenv
4
  import streamlit as st
5
  from PyPDF2 import PdfReader
6
  from langchain.text_splitter import CharacterTextSplitter
 
7
  from langchain_community.vectorstores import FAISS
8
- from langchain_community.embeddings import SentenceTransformerEmbeddings
9
  from langchain.memory import ConversationBufferMemory
10
  from langchain.chains import ConversationalRetrievalChain
11
- from langchain.chat_models import ChatGroq
12
 
13
  # Load environment variables
14
  load_dotenv()
@@ -19,8 +19,8 @@ logging.basicConfig(
19
  format='%(asctime)s - %(levelname)s - %(message)s'
20
  )
21
 
 
22
  def get_pdf_text(pdf_docs):
23
- """Extract text from uploaded PDF files."""
24
  text = ""
25
  for pdf in pdf_docs:
26
  pdf_reader = PdfReader(pdf)
@@ -28,8 +28,8 @@ def get_pdf_text(pdf_docs):
28
  text += page.extract_text() or ""
29
  return text
30
 
 
31
  def get_text_chunks(text):
32
- """Split the extracted text into manageable chunks."""
33
  text_splitter = CharacterTextSplitter(
34
  separator="\n",
35
  chunk_size=1000,
@@ -38,38 +38,32 @@ def get_text_chunks(text):
38
  )
39
  return text_splitter.split_text(text)
40
 
 
41
  def get_vectorstore(text_chunks):
42
- """Create a FAISS vectorstore from text chunks."""
43
- try:
44
- embedding_function = SentenceTransformerEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
45
- vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embedding_function)
46
- logging.info("Vectorstore created successfully.")
47
- return vectorstore
48
- except Exception as e:
49
- logging.error(f"Error creating vectorstore: {e}", exc_info=True)
50
- st.error(f"An error occurred while creating the vectorstore: {e}")
51
- return None
52
 
 
53
  def get_conversation_chain(vectorstore):
54
- """Set up the conversational retrieval chain using Groq's API."""
55
  try:
56
- groq_api_key = os.getenv("GROQ_API_KEY")
57
- llm = ChatGroq(model_name="mixtral-8x7b-32768", temperature=0.5, api_key=groq_api_key)
58
 
59
  conversation_chain = ConversationalRetrievalChain.from_llm(
60
  llm=llm,
61
  retriever=vectorstore.as_retriever(),
62
- memory=ConversationBufferMemory(memory_key='chat_history', return_messages=True)
63
  )
 
64
  logging.info("Conversation chain created successfully.")
65
  return conversation_chain
66
  except Exception as e:
67
- logging.error(f"Error creating conversation chain: {e}", exc_info=True)
68
- st.error(f"An error occurred while setting up the conversation chain: {e}")
69
- return None
70
 
 
71
  def handle_userinput(user_question):
72
- """Handle user input and generate a response."""
73
  if st.session_state.conversation is not None:
74
  response = st.session_state.conversation({'question': user_question})
75
  st.session_state.chat_history = response.get('chat_history', [])
@@ -82,8 +76,8 @@ def handle_userinput(user_question):
82
  else:
83
  st.warning("Please process the documents first.")
84
 
 
85
  def main():
86
- """Run the Streamlit app."""
87
  load_dotenv()
88
  st.set_page_config(page_title="Chat with multiple PDFs", page_icon=":books:")
89
 
 
4
  import streamlit as st
5
  from PyPDF2 import PdfReader
6
  from langchain.text_splitter import CharacterTextSplitter
7
+ from langchain_community.embeddings import HuggingFaceEmbeddings
8
  from langchain_community.vectorstores import FAISS
 
9
  from langchain.memory import ConversationBufferMemory
10
  from langchain.chains import ConversationalRetrievalChain
11
+ from langchain_groq import ChatGroq
12
 
13
  # Load environment variables
14
  load_dotenv()
 
19
  format='%(asctime)s - %(levelname)s - %(message)s'
20
  )
21
 
22
+ # Function to extract text from PDF files
23
  def get_pdf_text(pdf_docs):
 
24
  text = ""
25
  for pdf in pdf_docs:
26
  pdf_reader = PdfReader(pdf)
 
28
  text += page.extract_text() or ""
29
  return text
30
 
31
+ # Function to split the extracted text into chunks
32
  def get_text_chunks(text):
 
33
  text_splitter = CharacterTextSplitter(
34
  separator="\n",
35
  chunk_size=1000,
 
38
  )
39
  return text_splitter.split_text(text)
40
 
41
+ # Function to create a FAISS vectorstore using Hugging Face embeddings
42
  def get_vectorstore(text_chunks):
43
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
44
+ vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
45
+ return vectorstore
 
 
 
 
 
 
 
46
 
47
+ # Function to set up the conversational retrieval chain
48
  def get_conversation_chain(vectorstore):
 
49
  try:
50
+ llm = ChatGroq(model="mixtral-8x7b-32768", temperature=0.5)
51
+ memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
52
 
53
  conversation_chain = ConversationalRetrievalChain.from_llm(
54
  llm=llm,
55
  retriever=vectorstore.as_retriever(),
56
+ memory=memory
57
  )
58
+
59
  logging.info("Conversation chain created successfully.")
60
  return conversation_chain
61
  except Exception as e:
62
+ logging.error(f"Error creating conversation chain: {e}")
63
+ st.error("An error occurred while setting up the conversation chain.")
 
64
 
65
+ # Handle user input
66
  def handle_userinput(user_question):
 
67
  if st.session_state.conversation is not None:
68
  response = st.session_state.conversation({'question': user_question})
69
  st.session_state.chat_history = response.get('chat_history', [])
 
76
  else:
77
  st.warning("Please process the documents first.")
78
 
79
+ # Main function to run the Streamlit app
80
  def main():
 
81
  load_dotenv()
82
  st.set_page_config(page_title="Chat with multiple PDFs", page_icon=":books:")
83