Spaces:

Manasa1
/

CHAT_WITH_PDF_USING_DEEPSEEK

Sleeping

App Files Files Community

Manasa1 commited on Feb 11, 2025

Commit

4e886c9

verified ·

1 Parent(s): e7eb78a

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -32

app.py CHANGED Viewed

@@ -9,24 +9,17 @@ from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationalRetrievalChain
 import os
 import nltk
-# Load environment variables
-load_dotenv()
-# Download necessary NLTK data
-nltk.download('punkt')
-nltk.download('averaged_perceptron_tagger')
 # Install Poppler and Tesseract in the runtime environment
 os.system("apt-get update && apt-get install -y poppler-utils tesseract-ocr")
-# Retrieve API key
 secret = os.getenv('GROQ_API_KEY')
-# Get the working directory
 working_dir = os.path.dirname(os.path.abspath(__file__))
 def load_documents(file_path):
     loader = UnstructuredPDFLoader(
         file_path,
         poppler_path="/usr/bin",
@@ -38,7 +31,7 @@ def load_documents(file_path):
 def setup_vectorstore(documents):
     embeddings = HuggingFaceEmbeddings()
     text_splitter = CharacterTextSplitter(
-        separator="\n",
         chunk_size=1000,
         chunk_overlap=200
     )
@@ -54,6 +47,8 @@ def create_chain(vectorstores):
     )
     retriever = vectorstores.as_retriever()
     memory = ConversationBufferMemory(
         memory_key="chat_history",
         return_messages=True
     )
@@ -68,7 +63,7 @@ def create_chain(vectorstores):
 # Streamlit page configuration
 st.set_page_config(
     page_title="Chat with your documents",
-    page_icon="📁",
     layout="centered"
 )
@@ -91,23 +86,21 @@ if uploaded_file:
     if "conversation_chain" not in st.session_state:
         st.session_state.conversation_chain = create_chain(st.session_state.vectorstores)
-    # Display chat history
-    for message in st.session_state.chat_history:
-        with st.chat_message(message["role"]):
-            st.markdown(message["content"])
-    # User input handling
-    user_input = st.chat_input("Ask any questions relevant to uploaded pdf")
-    if user_input:
-        st.session_state.chat_history.append({"role": "user", "content": user_input})
-        with st.chat_message("user"):
-            st.markdown(user_input)
-        with st.chat_message("assistant"):
-            response = st.session_state.conversation_chain({"question": user_input})
-            assistant_response = response["answer"]
-            st.markdown(assistant_response)
-            st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})
-else:
-    st.info("Please upload a PDF to start the conversation.")

 from langchain.chains import ConversationalRetrievalChain
 import os
 import nltk
+nltk.download('punkt_tab')
+nltk.download('averaged_perceptron_tagger_eng')
 # Install Poppler and Tesseract in the runtime environment
 os.system("apt-get update && apt-get install -y poppler-utils tesseract-ocr")
 secret = os.getenv('GROQ_API_KEY')
 working_dir = os.path.dirname(os.path.abspath(__file__))
 def load_documents(file_path):
+    # Specify poppler_path and tesseract_path to ensure compatibility
     loader = UnstructuredPDFLoader(
         file_path,
         poppler_path="/usr/bin",
 def setup_vectorstore(documents):
     embeddings = HuggingFaceEmbeddings()
     text_splitter = CharacterTextSplitter(
+        separator="/n",
         chunk_size=1000,
         chunk_overlap=200
     )
     )
     retriever = vectorstores.as_retriever()
     memory = ConversationBufferMemory(
+        llm=llm,
+        output_key="answer",
         memory_key="chat_history",
         return_messages=True
     )
 # Streamlit page configuration
 st.set_page_config(
     page_title="Chat with your documents",
+    page_icon="📑",
     layout="centered"
 )
     if "conversation_chain" not in st.session_state:
         st.session_state.conversation_chain = create_chain(st.session_state.vectorstores)
+# Display chat history
+for message in st.session_state.chat_history:
+    with st.chat_message(message["role"]):
+        st.markdown(message["content"])
+# User input handling
+user_input = st.chat_input("Ask any questions relevant to uploaded pdf")
+if user_input:
+    st.session_state.chat_history.append({"role": "user", "content": user_input})
+    with st.chat_message("user"):
+        st.markdown(user_input)
+    with st.chat_message("assistant"):
+        response = st.session_state.conversation_chain({"question": user_input})
+        assistant_response = response["answer"]
+        st.markdown(assistant_response)
+        st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})