Spaces:

sreedeepEK
/

ChatwithPDF

Sleeping

App Files Files Community

sreedeepEK commited on Aug 17, 2024

Commit

ee2a124

verified ·

1 Parent(s): f554ca3

Upload 3 files

Browse files

Files changed (3) hide show

app.py +39 -0
helper_function.py +69 -0
requirements.txt +12 -0

app.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import gradio as gr
+from dotenv import load_dotenv
+from helper_function import get_conversation_chain, get_pdf_text, get_text_chunks, get_vectorstore
+load_dotenv()
+def chat_with_pdf(user_question, pdf_docs):
+    if not pdf_docs:
+        return "Please upload PDFs to process."
+    # Process all uploaded files
+    raw_text = get_pdf_text(pdf_docs)
+    text_chunks = get_text_chunks(raw_text)
+    vectorstore = get_vectorstore(text_chunks)
+    # Create conversation chain
+    conversation_chain = get_conversation_chain(vectorstore)
+    # Handle user input using the appropriate method
+    response = conversation_chain.run({'question': user_question})
+    return response
+# Define the Gradio interface
+interface = gr.Interface(
+    fn=chat_with_pdf,
+    inputs=[
+        gr.Textbox(label="Ask a question about your documents:"),
+        gr.File(label="Upload your PDFs", type="binary", file_count="multiple")
+    ],
+    outputs="text",
+    title="Chat with PDFs ",
+    description="Upload your PDF files and ask questions about their content.",
+    examples=[["What is the summary of this document?", None]]
+)
+# Launch the Gradio interface
+if __name__ == '__main__':
+    interface.launch(debug=True)

helper_function.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import io
+import streamlit as st
+from PyPDF2 import PdfReader
+from dotenv import load_dotenv
+from langchain_groq import ChatGroq
+from langchain_community.vectorstores import FAISS
+from langchain.memory import ConversationBufferMemory
+from langchain.chains import ConversationalRetrievalChain
+from langchain.text_splitter import CharacterTextSplitter
+from langchain_community.embeddings import HuggingFaceInstructEmbeddings
+from PyPDF2 import PdfReader
+import io
+from PyPDF2 import PdfReader
+import io
+def get_pdf_text(pdf_docs):
+    text = ""
+    for pdf in pdf_docs:
+        pdf_reader = PdfReader(io.BytesIO(pdf))
+        for page in pdf_reader.pages:
+            text += page.extract_text() or ""
+    return text
+def get_text_chunks(text):
+    text_splitter = CharacterTextSplitter(
+        separator="\n",
+        chunk_size=1000,
+        chunk_overlap=200,
+        length_function=len
+    )
+    chunks = text_splitter.split_text(text)
+    return chunks
+def get_vectorstore(text_chunks):
+    embeddings = HuggingFaceInstructEmbeddings(model_name="all-MiniLM-L12-v2")
+    vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
+    return vectorstore
+def get_conversation_chain(vectorstore):
+    llm = ChatGroq(model="llama3-70b-8192")
+    memory = ConversationBufferMemory(
+        memory_key='chat_history', return_messages=True
+    )
+    conversation_chain = ConversationalRetrievalChain.from_llm(
+        llm=llm,
+        retriever=vectorstore.as_retriever(),
+        memory=memory
+    )
+    return conversation_chain
+def handle_userinput(user_question):
+    if 'conversation' not in st.session_state:
+        st.error("Conversation not initialized. Please upload and process PDF documents first.")
+        return
+    conversation_chain = st.session_state.conversation
+    # Process user input using the appropriate method
+    response = conversation_chain.run({'question': user_question})
+    final_answer = response.get('answer', 'Sorry, I couldn\'t find an answer.')
+    st.markdown(f"**Response:** {final_answer}")
+    st.markdown("---")

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+streamlit==1.22.0
+faiss-cpu==1.7.4
+sentence-transformers==2.2.2
+pyPDF2
+python-dotenv
+langchain_groq
+langchain_community
+python-dotenv==1.0.0
+pydantic==1.10.7
+tiktoken==0.4.0
+langchain_core
+langchain