Spaces:

Azmathussainthebo
/

Cheat_With_Multiple_Pdf

Build error

App Files Files Community

Azmathussainthebo commited on Feb 25, 2025

Commit

506eddb

verified ·

1 Parent(s): f1ac50a

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -40

app.py CHANGED Viewed

@@ -1,25 +1,16 @@
 import os
-import logging
-from dotenv import load_dotenv
 import streamlit as st
 from PyPDF2 import PdfReader
 from langchain.text_splitter import CharacterTextSplitter
-# from langchain.embeddings import HuggingFaceInstructEmbeddings
 from langchain_cohere import CohereEmbeddings
 from langchain.vectorstores import FAISS
 from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationalRetrievalChain
-# from langchain.llms import Ollama
-from langchain_groq import ChatGroq
 # Load environment variables
-load_dotenv()
-# Set up logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s'
-)
 # Function to extract text from PDF files
 def get_pdf_text(pdf_docs):
@@ -42,42 +33,27 @@ def get_text_chunks(text):
     return chunks
 # Function to create a FAISS vectorstore
-# def get_vectorstore(text_chunks):
-#     embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
-#     vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
-#     return vectorstore
 def get_vectorstore(text_chunks):
-    cohere_api_key = os.getenv("COHERE_API_KEY")
     embeddings = CohereEmbeddings(model="embed-english-v3.0", cohere_api_key=cohere_api_key)
     vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
     return vectorstore
 # Function to set up the conversational retrieval chain
 def get_conversation_chain(vectorstore):
-    try:
-        # llm = Ollama(model="llama3.2:1b")
-        llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0.5)
-        memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
-        conversation_chain = ConversationalRetrievalChain.from_llm(
-            llm=llm,
-            retriever=vectorstore.as_retriever(),
-            memory=memory
-        )
-        logging.info("Conversation chain created successfully.")
-        return conversation_chain
-    except Exception as e:
-        logging.error(f"Error creating conversation chain: {e}")
-        st.error("An error occurred while setting up the conversation chain.")
 # Handle user input
 def handle_userinput(user_question):
     if st.session_state.conversation is not None:
         response = st.session_state.conversation({'question': user_question})
         st.session_state.chat_history = response['chat_history']
         for i, message in enumerate(st.session_state.chat_history):
             if i % 2 == 0:
                 st.write(f"*User:* {message.content}")
@@ -88,9 +64,7 @@ def handle_userinput(user_question):
 # Main function to run the Streamlit app
 def main():
-    load_dotenv()
     st.set_page_config(page_title="Chat with multiple PDFs", page_icon=":books:")
     if "conversation" not in st.session_state:
         st.session_state.conversation = None
     if "chat_history" not in st.session_state:
@@ -103,9 +77,7 @@ def main():
     with st.sidebar:
         st.subheader("Your documents")
-        pdf_docs = st.file_uploader(
-            "Upload your PDFs here and click on 'Process'", accept_multiple_files=True
-        )
         if st.button("Process"):
             with st.spinner("Processing..."):
                 raw_text = get_pdf_text(pdf_docs)

 import os
 import streamlit as st
 from PyPDF2 import PdfReader
 from langchain.text_splitter import CharacterTextSplitter
 from langchain_cohere import CohereEmbeddings
 from langchain.vectorstores import FAISS
 from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationalRetrievalChain
+from langchain_openai import ChatOpenAI
 # Load environment variables
+cohere_api_key = os.environ.get("COHERE_API_KEY")
+openai_api_key = os.environ.get("OPENAI_API_KEY")
 # Function to extract text from PDF files
 def get_pdf_text(pdf_docs):
     return chunks
 # Function to create a FAISS vectorstore
 def get_vectorstore(text_chunks):
     embeddings = CohereEmbeddings(model="embed-english-v3.0", cohere_api_key=cohere_api_key)
     vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
     return vectorstore
 # Function to set up the conversational retrieval chain
 def get_conversation_chain(vectorstore):
+    llm = ChatOpenAI(model="gpt-4", temperature=0.5, openai_api_key=openai_api_key)
+    memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
+    conversation_chain = ConversationalRetrievalChain.from_llm(
+        llm=llm,
+        retriever=vectorstore.as_retriever(),
+        memory=memory
+    )
+    return conversation_chain
 # Handle user input
 def handle_userinput(user_question):
     if st.session_state.conversation is not None:
         response = st.session_state.conversation({'question': user_question})
         st.session_state.chat_history = response['chat_history']
         for i, message in enumerate(st.session_state.chat_history):
             if i % 2 == 0:
                 st.write(f"*User:* {message.content}")
 # Main function to run the Streamlit app
 def main():
     st.set_page_config(page_title="Chat with multiple PDFs", page_icon=":books:")
     if "conversation" not in st.session_state:
         st.session_state.conversation = None
     if "chat_history" not in st.session_state:
     with st.sidebar:
         st.subheader("Your documents")
+        pdf_docs = st.file_uploader("Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
         if st.button("Process"):
             with st.spinner("Processing..."):
                 raw_text = get_pdf_text(pdf_docs)