sreedeepEK commited on
Commit
ee2a124
·
verified ·
1 Parent(s): f554ca3

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +39 -0
  2. helper_function.py +69 -0
  3. requirements.txt +12 -0
app.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from dotenv import load_dotenv
3
+ from helper_function import get_conversation_chain, get_pdf_text, get_text_chunks, get_vectorstore
4
+
5
+ load_dotenv()
6
+
7
+ def chat_with_pdf(user_question, pdf_docs):
8
+ if not pdf_docs:
9
+ return "Please upload PDFs to process."
10
+
11
+ # Process all uploaded files
12
+ raw_text = get_pdf_text(pdf_docs)
13
+ text_chunks = get_text_chunks(raw_text)
14
+ vectorstore = get_vectorstore(text_chunks)
15
+
16
+ # Create conversation chain
17
+ conversation_chain = get_conversation_chain(vectorstore)
18
+
19
+ # Handle user input using the appropriate method
20
+ response = conversation_chain.run({'question': user_question})
21
+ return response
22
+
23
+
24
+ # Define the Gradio interface
25
+ interface = gr.Interface(
26
+ fn=chat_with_pdf,
27
+ inputs=[
28
+ gr.Textbox(label="Ask a question about your documents:"),
29
+ gr.File(label="Upload your PDFs", type="binary", file_count="multiple")
30
+ ],
31
+ outputs="text",
32
+ title="Chat with PDFs ",
33
+ description="Upload your PDF files and ask questions about their content.",
34
+ examples=[["What is the summary of this document?", None]]
35
+ )
36
+
37
+ # Launch the Gradio interface
38
+ if __name__ == '__main__':
39
+ interface.launch(debug=True)
helper_function.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import streamlit as st
3
+ from PyPDF2 import PdfReader
4
+ from dotenv import load_dotenv
5
+ from langchain_groq import ChatGroq
6
+ from langchain_community.vectorstores import FAISS
7
+ from langchain.memory import ConversationBufferMemory
8
+ from langchain.chains import ConversationalRetrievalChain
9
+ from langchain.text_splitter import CharacterTextSplitter
10
+ from langchain_community.embeddings import HuggingFaceInstructEmbeddings
11
+
12
+ from PyPDF2 import PdfReader
13
+ import io
14
+
15
+ from PyPDF2 import PdfReader
16
+ import io
17
+
18
+ def get_pdf_text(pdf_docs):
19
+ text = ""
20
+ for pdf in pdf_docs:
21
+
22
+ pdf_reader = PdfReader(io.BytesIO(pdf))
23
+ for page in pdf_reader.pages:
24
+ text += page.extract_text() or ""
25
+ return text
26
+
27
+
28
+ def get_text_chunks(text):
29
+ text_splitter = CharacterTextSplitter(
30
+ separator="\n",
31
+ chunk_size=1000,
32
+ chunk_overlap=200,
33
+ length_function=len
34
+ )
35
+ chunks = text_splitter.split_text(text)
36
+ return chunks
37
+
38
+ def get_vectorstore(text_chunks):
39
+ embeddings = HuggingFaceInstructEmbeddings(model_name="all-MiniLM-L12-v2")
40
+ vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
41
+ return vectorstore
42
+
43
+ def get_conversation_chain(vectorstore):
44
+ llm = ChatGroq(model="llama3-70b-8192")
45
+ memory = ConversationBufferMemory(
46
+ memory_key='chat_history', return_messages=True
47
+ )
48
+ conversation_chain = ConversationalRetrievalChain.from_llm(
49
+ llm=llm,
50
+ retriever=vectorstore.as_retriever(),
51
+ memory=memory
52
+ )
53
+ return conversation_chain
54
+
55
+
56
+
57
+ def handle_userinput(user_question):
58
+ if 'conversation' not in st.session_state:
59
+ st.error("Conversation not initialized. Please upload and process PDF documents first.")
60
+ return
61
+
62
+ conversation_chain = st.session_state.conversation
63
+
64
+ # Process user input using the appropriate method
65
+ response = conversation_chain.run({'question': user_question})
66
+
67
+ final_answer = response.get('answer', 'Sorry, I couldn\'t find an answer.')
68
+ st.markdown(f"**Response:** {final_answer}")
69
+ st.markdown("---")
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit==1.22.0
2
+ faiss-cpu==1.7.4
3
+ sentence-transformers==2.2.2
4
+ pyPDF2
5
+ python-dotenv
6
+ langchain_groq
7
+ langchain_community
8
+ python-dotenv==1.0.0
9
+ pydantic==1.10.7
10
+ tiktoken==0.4.0
11
+ langchain_core
12
+ langchain