Spaces:

Azmathussainthebo
/

Cheat_With_Multiple_Pdf

Build error

App Files Files Community

Azmathussainthebo commited on Feb 25, 2025

Commit

a29a1ab

verified ·

1 Parent(s): 08b9a7c

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -15

app.py CHANGED Viewed

@@ -1,16 +1,23 @@
 import os
 import streamlit as st
 from PyPDF2 import PdfReader
 from langchain.text_splitter import CharacterTextSplitter
-from langchain_cohere import CohereEmbeddings
 from langchain.vectorstores import FAISS
 from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationalRetrievalChain
-from langchain_openai import ChatOpenAI
 # Load environment variables
-cohere_api_key = os.environ.get("COHERE_API_KEY")
-openai_api_key = os.environ.get("OPENAI_API_KEY")
 # Function to extract text from PDF files
 def get_pdf_text(pdf_docs):
@@ -32,28 +39,36 @@ def get_text_chunks(text):
     chunks = text_splitter.split_text(text)
     return chunks
-# Function to create a FAISS vectorstore
 def get_vectorstore(text_chunks):
-    embeddings = CohereEmbeddings(model="embed-english-v3.0", cohere_api_key=cohere_api_key)
     vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
     return vectorstore
 # Function to set up the conversational retrieval chain
 def get_conversation_chain(vectorstore):
-    llm = ChatOpenAI(model="gpt-4", temperature=0.5, openai_api_key=openai_api_key)
-    memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
-    conversation_chain = ConversationalRetrievalChain.from_llm(
-        llm=llm,
-        retriever=vectorstore.as_retriever(),
-        memory=memory
-    )
-    return conversation_chain
 # Handle user input
 def handle_userinput(user_question):
     if st.session_state.conversation is not None:
         response = st.session_state.conversation({'question': user_question})
         st.session_state.chat_history = response['chat_history']
         for i, message in enumerate(st.session_state.chat_history):
             if i % 2 == 0:
                 st.write(f"*User:* {message.content}")
@@ -64,7 +79,9 @@ def handle_userinput(user_question):
 # Main function to run the Streamlit app
 def main():
     st.set_page_config(page_title="Chat with multiple PDFs", page_icon=":books:")
     if "conversation" not in st.session_state:
         st.session_state.conversation = None
     if "chat_history" not in st.session_state:
@@ -77,7 +94,9 @@ def main():
     with st.sidebar:
         st.subheader("Your documents")
-        pdf_docs = st.file_uploader("Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
         if st.button("Process"):
             with st.spinner("Processing..."):
                 raw_text = get_pdf_text(pdf_docs)

 import os
+import logging
+from dotenv import load_dotenv
 import streamlit as st
 from PyPDF2 import PdfReader
 from langchain.text_splitter import CharacterTextSplitter
+from langchain.embeddings import HuggingFaceInstructEmbeddings
 from langchain.vectorstores import FAISS
 from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationalRetrievalChain
+from langchain_groq import ChatGroq
 # Load environment variables
+load_dotenv()
+# Set up logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
 # Function to extract text from PDF files
 def get_pdf_text(pdf_docs):
     chunks = text_splitter.split_text(text)
     return chunks
+# Function to create a FAISS vectorstore using Hugging Face embeddings
 def get_vectorstore(text_chunks):
+    embeddings = HuggingFaceInstructEmbeddings(model_name="all-MiniLM-L6-v2")
     vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
     return vectorstore
 # Function to set up the conversational retrieval chain
 def get_conversation_chain(vectorstore):
+    try:
+        llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0.5)
+        memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
+        conversation_chain = ConversationalRetrievalChain.from_llm(
+            llm=llm,
+            retriever=vectorstore.as_retriever(),
+            memory=memory
+        )
+        logging.info("Conversation chain created successfully.")
+        return conversation_chain
+    except Exception as e:
+        logging.error(f"Error creating conversation chain: {e}")
+        st.error("An error occurred while setting up the conversation chain.")
 # Handle user input
 def handle_userinput(user_question):
     if st.session_state.conversation is not None:
         response = st.session_state.conversation({'question': user_question})
         st.session_state.chat_history = response['chat_history']
         for i, message in enumerate(st.session_state.chat_history):
             if i % 2 == 0:
                 st.write(f"*User:* {message.content}")
 # Main function to run the Streamlit app
 def main():
+    load_dotenv()
     st.set_page_config(page_title="Chat with multiple PDFs", page_icon=":books:")
     if "conversation" not in st.session_state:
         st.session_state.conversation = None
     if "chat_history" not in st.session_state:
     with st.sidebar:
         st.subheader("Your documents")
+        pdf_docs = st.file_uploader(
+            "Upload your PDFs here and click on 'Process'", accept_multiple_files=True
+        )
         if st.button("Process"):
             with st.spinner("Processing..."):
                 raw_text = get_pdf_text(pdf_docs)