dimoZ commited on
Commit
6268b6e
·
verified ·
1 Parent(s): b4d7c38

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -115
app.py CHANGED
@@ -1,115 +1,113 @@
1
- import streamlit as st
2
- from PyPDF2 import PdfReader
3
- from langchain.text_splitter import RecursiveCharacterTextSplitter
4
- import os
5
- from langchain_google_genai import GoogleGenerativeAIEmbeddings
6
- import google.generativeai as genai
7
- from langchain.vectorstores import FAISS
8
- from langchain_google_genai import ChatGoogleGenerativeAI
9
- from langchain.chains.question_answering import load_qa_chain
10
- from langchain.prompts import PromptTemplate
11
- from dotenv import load_dotenv
12
-
13
- # Load environment variables
14
- load_dotenv()
15
- google_api_key = os.getenv("GOOGLE_API_KEY")
16
- if google_api_key is None:
17
- st.error("GOOGLE_API_KEY is not set. Please set it in the .env file.")
18
- else:
19
- genai.configure(api_key=google_api_key)
20
-
21
- # Global variable to store chat history
22
- if 'chat_history' not in st.session_state:
23
- st.session_state.chat_history = []
24
-
25
- # Function to extract text from PDF files
26
- def get_pdf_text(pdf_docs):
27
- text = ""
28
- for pdf in pdf_docs:
29
- pdf_reader = PdfReader(pdf)
30
- for page in pdf_reader.pages:
31
- text += page.extract_text()
32
- return text
33
-
34
- # Function to split text into chunks
35
- def get_text_chunks(text):
36
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
37
- chunks = text_splitter.split_text(text)
38
- return chunks
39
-
40
- # Function to create and save a vector store from text chunks
41
- def get_vector_store(text_chunks):
42
- embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
43
- vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
44
- vector_store.save_local("faiss_index")
45
-
46
- # Function to load a question-answering chain
47
- def get_conversational_chain():
48
- prompt_template = """
49
- Answer the question as detailed as possible from the provided context, make sure to provide all the details. If the answer is not in
50
- provided context, just say, "Answer is not available in the context." Don't provide the wrong answer.\n\n
51
- Context:\n{context}\n
52
- Question:\n{question}\n
53
- Answer:
54
- """
55
- model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.5)
56
- prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
57
- chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
58
- return chain
59
-
60
- # Function to handle user input and generate a response
61
- def user_input(user_question):
62
- embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
63
-
64
- new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
65
- docs = new_db.similarity_search(user_question)
66
-
67
- chain = get_conversational_chain()
68
-
69
- response = chain(
70
- {"input_documents": docs, "question": user_question},
71
- return_only_outputs=True
72
- )
73
-
74
- response_text = response["output_text"]
75
-
76
- # Update chat history
77
- st.session_state.chat_history.append({"question": user_question, "answer": response_text})
78
-
79
- st.write("Reply: ", response_text)
80
-
81
- # Function to download chat history as a .txt file
82
- def download_chat_history_txt():
83
- chat_lines = [f"Question: {entry['question']}\nAnswer: {entry['answer']}\n" for entry in st.session_state.chat_history]
84
- chat_text = "\n".join(chat_lines)
85
- return chat_text
86
-
87
- # Main function to run the Streamlit app
88
- def main():
89
- st.set_page_config(page_title="Chat PDF", layout="wide")
90
- st.header("Chat with PDF")
91
-
92
- user_question = st.text_input("Ask a Question from the PDF Files")
93
-
94
- if user_question:
95
- user_input(user_question)
96
-
97
- with st.sidebar:
98
- st.title("Menu:")
99
- pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
100
- if st.button("Submit & Process"):
101
- if pdf_docs:
102
- with st.spinner("Processing..."):
103
- raw_text = get_pdf_text(pdf_docs)
104
- text_chunks = get_text_chunks(raw_text)
105
- get_vector_store(text_chunks)
106
- st.success("Done")
107
- else:
108
- st.error("Please upload at least one PDF file.")
109
-
110
- # Only one download button
111
- chat_txt = download_chat_history_txt()
112
- st.download_button("Download Chat History (TXT)", chat_txt, file_name="chat_history.txt", mime="text/plain")
113
-
114
- if __name__ == "__main__":
115
- main()
 
1
+ import os
2
+ import streamlit as st
3
+ from PyPDF2 import PdfReader
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain.vectorstores import FAISS
6
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
7
+ from langchain.prompts import PromptTemplate
8
+ from langchain.chains.question_answering import load_qa_chain
9
+ from dotenv import load_dotenv
10
+ from fuzzywuzzy import process
11
+
12
+ # Load environment variables
13
+ load_dotenv()
14
+ google_api_key = os.getenv("GOOGLE_API_KEY")
15
+ if google_api_key is None:
16
+ st.error("GOOGLE_API_KEY is not set. Please set it in the .env file.")
17
+ else:
18
+ from google.generativeai import configure
19
+ configure(api_key=google_api_key)
20
+
21
+ # Global variable to store chat history
22
+ if 'chat_history' not in st.session_state:
23
+ st.session_state.chat_history = []
24
+
25
+ # Function to extract text from uploaded PDF files
26
+ def extract_text_from_pdfs(files):
27
+ text = ""
28
+ for pdf in files:
29
+ reader = PdfReader(pdf)
30
+ for page in reader.pages:
31
+ text += page.extract_text()
32
+ return text
33
+
34
+ # Function to split text into manageable chunks
35
+ def split_text_into_chunks(text):
36
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
37
+ return splitter.split_text(text)
38
+
39
+ # Create and store embeddings
40
+ def create_vector_store(chunks):
41
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
42
+ vector_store = FAISS.from_texts(chunks, embedding=embeddings)
43
+ vector_store.save_local("faiss_index")
44
+ return vector_store
45
+
46
+ # Load a previously created vector store
47
+ def load_vector_store():
48
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
49
+ return FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
50
+
51
+ # Generate a response using Gemini
52
+ def generate_response(question, vector_store):
53
+ docs = vector_store.similarity_search(question)
54
+ chain = get_qa_chain()
55
+ response = chain({"input_documents": docs, "question": question}, return_only_outputs=True)
56
+ return response["output_text"]
57
+
58
+ # Load the question-answering chain
59
+ def get_qa_chain():
60
+ prompt = PromptTemplate(
61
+ template="""
62
+ Use the provided context to answer the question in detail. If the answer is unavailable, respond with "Answer not found in the provided context."
63
+ Context:\n{context}\n
64
+ Question:\n{question}\n
65
+ Answer:""",
66
+ input_variables=["context", "question"]
67
+ )
68
+ llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.5)
69
+ return load_qa_chain(llm, chain_type="stuff", prompt=prompt)
70
+
71
+ # Suggest questions or keywords dynamically
72
+ def suggest_keywords(query, all_texts):
73
+ return process.extract(query, all_texts, limit=5)
74
+
75
+ # Main app function
76
+ def main():
77
+ st.set_page_config(page_title="Virtual Agent App", layout="wide")
78
+ st.title("Virtual Agent Powered by Gemini")
79
+
80
+ # Sidebar for uploading files
81
+ with st.sidebar:
82
+ st.header("Upload Documents")
83
+ uploaded_files = st.file_uploader("Upload PDFs", type=["pdf"], accept_multiple_files=True)
84
+ if st.button("Process Files"):
85
+ if uploaded_files:
86
+ raw_text = extract_text_from_pdfs(uploaded_files)
87
+ text_chunks = split_text_into_chunks(raw_text)
88
+ create_vector_store(text_chunks)
89
+ st.success("Documents processed successfully!")
90
+ else:
91
+ st.error("Please upload at least one PDF.")
92
+
93
+ # Main interface for questions and suggestions
94
+ user_question = st.text_input("Ask your question here (suggestions below):")
95
+ if user_question:
96
+ # Load vector store and generate suggestions
97
+ vector_store = load_vector_store()
98
+ suggestions = suggest_keywords(user_question, vector_store.similarity_search(user_question, k=10))
99
+ st.write("Suggestions:", [s[0] for s in suggestions])
100
+
101
+ # Generate and display response
102
+ if st.button("Submit Question"):
103
+ response = generate_response(user_question, vector_store)
104
+ st.write("Answer:", response)
105
+ st.session_state.chat_history.append({"question": user_question, "answer": response})
106
+
107
+ # Chat history download
108
+ if st.sidebar.button("Download Chat History"):
109
+ chat_history = "\n".join([f"Q: {item['question']}\nA: {item['answer']}" for item in st.session_state.chat_history])
110
+ st.sidebar.download_button("Download History", chat_history, file_name="chat_history.txt", mime="text/plain")
111
+
112
+ if __name__ == "__main__":
113
+ main()