Spaces:

agnixcode
/

chat_pDF

Sleeping

App Files Files Community

Dua Rajper commited on Feb 25, 2025

Commit

7a19be8

verified ·

1 Parent(s): 39e042c

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -23

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ from dotenv import load_dotenv
 import streamlit as st
 from PyPDF2 import PdfReader
 from langchain.text_splitter import CharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceInstructEmbeddings
 from langchain.vectorstores import FAISS
 from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationalRetrievalChain
@@ -14,10 +14,7 @@ from langchain_groq import ChatGroq
 load_dotenv()
 # Set up logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s'
-)
 # Function to extract text from PDF files
 def get_pdf_text(pdf_docs):
@@ -25,31 +22,25 @@ def get_pdf_text(pdf_docs):
     for pdf in pdf_docs:
         pdf_reader = PdfReader(pdf)
         for page in pdf_reader.pages:
-            text += page.extract_text()
     return text
-# Function to split the extracted text into chunks
 def get_text_chunks(text):
-    text_splitter = CharacterTextSplitter(
-        separator="\n",
-        chunk_size=1000,
-        chunk_overlap=200,
-        length_function=len
-    )
     chunks = text_splitter.split_text(text)
     return chunks
-# Function to create a FAISS vectorstore
 def get_vectorstore(text_chunks):
-    embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
     vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
     return vectorstore
 # Function to set up the conversational retrieval chain
 def get_conversation_chain(vectorstore):
     try:
-        groq_api_key = os.getenv("GROQ_API_KEY")
-        llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0.5, api_key=groq_api_key)
         memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
         conversation_chain = ConversationalRetrievalChain.from_llm(
@@ -81,23 +72,21 @@ def handle_userinput(user_question):
 # Main function to run the Streamlit app
 def main():
     load_dotenv()
-    st.set_page_config(page_title="Chat with multiple PDFs", page_icon=":books:")
     if "conversation" not in st.session_state:
         st.session_state.conversation = None
     if "chat_history" not in st.session_state:
         st.session_state.chat_history = None
-    st.header("Chat with multiple PDFs :books:")
     user_question = st.text_input("Ask a question about your documents:")
     if user_question:
         handle_userinput(user_question)
     with st.sidebar:
-        st.subheader("Your documents")
-        pdf_docs = st.file_uploader(
-            "Upload your PDFs here and click on 'Process'", accept_multiple_files=True
-        )
         if st.button("Process"):
             with st.spinner("Processing..."):
                 raw_text = get_pdf_text(pdf_docs)

 import streamlit as st
 from PyPDF2 import PdfReader
 from langchain.text_splitter import CharacterTextSplitter
+from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.vectorstores import FAISS
 from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationalRetrievalChain
 load_dotenv()
 # Set up logging
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 # Function to extract text from PDF files
 def get_pdf_text(pdf_docs):
     for pdf in pdf_docs:
         pdf_reader = PdfReader(pdf)
         for page in pdf_reader.pages:
+            text += page.extract_text() or ""
     return text
+# Function to split extracted text into chunks
 def get_text_chunks(text):
+    text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len)
     chunks = text_splitter.split_text(text)
     return chunks
+# Function to create a FAISS vectorstore using Hugging Face Embeddings
 def get_vectorstore(text_chunks):
+    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
     vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
     return vectorstore
 # Function to set up the conversational retrieval chain
 def get_conversation_chain(vectorstore):
     try:
+        llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0.5)
         memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
         conversation_chain = ConversationalRetrievalChain.from_llm(
 # Main function to run the Streamlit app
 def main():
     load_dotenv()
+    st.set_page_config(page_title="Chat with PDFs", page_icon="📄")
     if "conversation" not in st.session_state:
         st.session_state.conversation = None
     if "chat_history" not in st.session_state:
         st.session_state.chat_history = None
+    st.header("Chat with your PDFs 📄🤖")
     user_question = st.text_input("Ask a question about your documents:")
     if user_question:
         handle_userinput(user_question)
     with st.sidebar:
+        st.subheader("Upload your PDFs")
+        pdf_docs = st.file_uploader("Upload PDFs and click 'Process'", accept_multiple_files=True)
         if st.button("Process"):
             with st.spinner("Processing..."):
                 raw_text = get_pdf_text(pdf_docs)