Surat96 commited on
Commit
b013e38
·
verified ·
1 Parent(s): 8e9090e

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -0
app.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+ from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
4
+ import os, getpass
5
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
6
+ import google.generativeai as genai
7
+ from langchain.vectorstores import FAISS
8
+ from langchain_google_genai import ChatGoogleGenerativeAI
9
+ from langchain.chains.question_answering import load_qa_chain
10
+ from langchain.prompts import PromptTemplate
11
+ from langchain.chains import ConversationalRetrievalChain
12
+ from langchain.memory import ConversationBufferMemory
13
+
14
+
15
+ #Gemini Key
16
+ os.environ['GOOGLE_API_KEY']="AIzaSyB6-jZLBXeOeLFBhFaU11oidwAeBATkrds"
17
+ genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
18
+
19
+
20
+ def get_pdf_text(pdf_docs):
21
+ text=""
22
+ for pdf in pdf_docs:
23
+ pdf_reader= PdfReader(pdf)
24
+ for page in pdf_reader.pages:
25
+ text+= page.extract_text()
26
+ return text
27
+
28
+
29
+ def get_text_chunks(text):
30
+ #RecursiveCharacterTextSplitter CharacterTextSplitter separator="\n",
31
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=1000, length_function=len)#
32
+ chunks = text_splitter.split_text(text)
33
+ return chunks
34
+
35
+
36
+ def get_vector_store(text_chunks):
37
+ embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
38
+ vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
39
+ return vector_store
40
+
41
+
42
+ def get_conversational_chain(Fvs):
43
+
44
+ prompt_template = """
45
+ Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
46
+ provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
47
+ Context:\n {context}?\n
48
+ Question: \n{question}\n
49
+
50
+ Answer:
51
+ """
52
+
53
+ model = ChatGoogleGenerativeAI(model="gemini-1.5-pro",temperature=0.3)
54
+ prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])
55
+ memory = ConversationBufferMemory(memory_key = "chat_history", return_messages=True)
56
+ chain = ConversationalRetrievalChain.from_llm(llm=model,retriever=Fvs.as_retriever(), memory=memory)
57
+
58
+ return chain
59
+
60
+
61
+
62
+ def user_input(user_question):
63
+ response = st.session_state.conversation({'question': user_question})
64
+ st.session_state.chatHistory = response['chat_history']
65
+ for i, message in enumerate(st.session_state.chatHistory):
66
+ if i%2 == 0:
67
+ st.write("Human: ", message.content)
68
+ else:
69
+ st.write("Bot: ", message.content)
70
+
71
+ ## streamlit app
72
+ st.set_page_config("Chat With Multiple PDF")
73
+ st.header("Chat with Multiple PDF :books:")
74
+
75
+ user_question = st.text_input("Ask a Question from the PDF Files")
76
+ submit=st.button("Ask the question")
77
+
78
+ ## If ask button is clicked
79
+ if submit:
80
+ if "conversation" not in st.session_state:
81
+ st.session_state.conversation = None
82
+ if "chatHistory" not in st.session_state:
83
+ st.session_state.chatHistory = None
84
+ if user_question:
85
+ user_input(user_question)
86
+
87
+ with st.sidebar:
88
+ st.title("Menu:")
89
+ pdf_docs = st.file_uploader("Upload your PDF Files", accept_multiple_files=True)
90
+ if st.button("Submit & Process"):
91
+ with st.spinner("Processing..."):
92
+ raw_text = get_pdf_text(pdf_docs)
93
+ text_chunks = get_text_chunks(raw_text)
94
+ Fvs = get_vector_store(text_chunks)
95
+ st.session_state.conversation = get_conversational_chain(Fvs)
96
+ st.success("Done")
97
+
98
+ if st.button("Clear Chat Window", use_container_width=True, type="primary"):
99
+ st.session_state.history = []
100
+ st.rerun()
101
+
102
+ footer = """
103
+ ---
104
+ #### Made By [Surat Banerjee](https://www.linkedin.com/in/surat-banerjee/)
105
+ For Any Queries, Reach out on [Portfolio](https://suratbanerjee.wixsite.com/myportfoliods)
106
+ """
107
+
108
+ st.markdown(footer, unsafe_allow_html=True)