Spaces:

yousifalishah
/

chatWithMultiplePDF1

Sleeping

App Files Files Community

yousifalishah commited on Feb 26, 2025

Commit

517d6f4

verified ·

1 Parent(s): 0cc78f9

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -23

app.py CHANGED Viewed

@@ -4,11 +4,11 @@ from dotenv import load_dotenv
 import streamlit as st
 from PyPDF2 import PdfReader
 from langchain.text_splitter import CharacterTextSplitter
 from langchain_community.vectorstores import FAISS
-from langchain_community.embeddings import SentenceTransformerEmbeddings
 from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationalRetrievalChain
-from langchain.chat_models import ChatGroq
 # Load environment variables
 load_dotenv()
@@ -19,8 +19,8 @@ logging.basicConfig(
     format='%(asctime)s - %(levelname)s - %(message)s'
 )
 def get_pdf_text(pdf_docs):
-    """Extract text from uploaded PDF files."""
     text = ""
     for pdf in pdf_docs:
         pdf_reader = PdfReader(pdf)
@@ -28,8 +28,8 @@ def get_pdf_text(pdf_docs):
             text += page.extract_text() or ""
     return text
 def get_text_chunks(text):
-    """Split the extracted text into manageable chunks."""
     text_splitter = CharacterTextSplitter(
         separator="\n",
         chunk_size=1000,
@@ -38,38 +38,32 @@ def get_text_chunks(text):
     )
     return text_splitter.split_text(text)
 def get_vectorstore(text_chunks):
-    """Create a FAISS vectorstore from text chunks."""
-    try:
-        embedding_function = SentenceTransformerEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
-        vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embedding_function)
-        logging.info("Vectorstore created successfully.")
-        return vectorstore
-    except Exception as e:
-        logging.error(f"Error creating vectorstore: {e}", exc_info=True)
-        st.error(f"An error occurred while creating the vectorstore: {e}")
-        return None
 def get_conversation_chain(vectorstore):
-    """Set up the conversational retrieval chain using Groq's API."""
     try:
-        groq_api_key = os.getenv("GROQ_API_KEY")
-        llm = ChatGroq(model_name="mixtral-8x7b-32768", temperature=0.5, api_key=groq_api_key)
         conversation_chain = ConversationalRetrievalChain.from_llm(
             llm=llm,
             retriever=vectorstore.as_retriever(),
-            memory=ConversationBufferMemory(memory_key='chat_history', return_messages=True)
         )
         logging.info("Conversation chain created successfully.")
         return conversation_chain
     except Exception as e:
-        logging.error(f"Error creating conversation chain: {e}", exc_info=True)
-        st.error(f"An error occurred while setting up the conversation chain: {e}")
-        return None
 def handle_userinput(user_question):
-    """Handle user input and generate a response."""
     if st.session_state.conversation is not None:
         response = st.session_state.conversation({'question': user_question})
         st.session_state.chat_history = response.get('chat_history', [])
@@ -82,8 +76,8 @@ def handle_userinput(user_question):
     else:
         st.warning("Please process the documents first.")
 def main():
-    """Run the Streamlit app."""
     load_dotenv()
     st.set_page_config(page_title="Chat with multiple PDFs", page_icon=":books:")

 import streamlit as st
 from PyPDF2 import PdfReader
 from langchain.text_splitter import CharacterTextSplitter
+from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationalRetrievalChain
+from langchain_groq import ChatGroq
 # Load environment variables
 load_dotenv()
     format='%(asctime)s - %(levelname)s - %(message)s'
 )
+# Function to extract text from PDF files
 def get_pdf_text(pdf_docs):
     text = ""
     for pdf in pdf_docs:
         pdf_reader = PdfReader(pdf)
             text += page.extract_text() or ""
     return text
+# Function to split the extracted text into chunks
 def get_text_chunks(text):
     text_splitter = CharacterTextSplitter(
         separator="\n",
         chunk_size=1000,
     )
     return text_splitter.split_text(text)
+# Function to create a FAISS vectorstore using Hugging Face embeddings
 def get_vectorstore(text_chunks):
+    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+    vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
+    return vectorstore
+# Function to set up the conversational retrieval chain
 def get_conversation_chain(vectorstore):
     try:
+        llm = ChatGroq(model="mixtral-8x7b-32768", temperature=0.5)
+        memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
         conversation_chain = ConversationalRetrievalChain.from_llm(
             llm=llm,
             retriever=vectorstore.as_retriever(),
+            memory=memory
         )
         logging.info("Conversation chain created successfully.")
         return conversation_chain
     except Exception as e:
+        logging.error(f"Error creating conversation chain: {e}")
+        st.error("An error occurred while setting up the conversation chain.")
+# Handle user input
 def handle_userinput(user_question):
     if st.session_state.conversation is not None:
         response = st.session_state.conversation({'question': user_question})
         st.session_state.chat_history = response.get('chat_history', [])
     else:
         st.warning("Please process the documents first.")
+# Main function to run the Streamlit app
 def main():
     load_dotenv()
     st.set_page_config(page_title="Chat with multiple PDFs", page_icon=":books:")