dimoZ commited on
Commit
c61da78
·
verified ·
1 Parent(s): 47ae0f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -69
app.py CHANGED
@@ -1,11 +1,12 @@
1
  import os
2
  import streamlit as st
3
  from PyPDF2 import PdfReader
 
4
  from langchain.text_splitter import RecursiveCharacterTextSplitter
5
  from langchain.vectorstores import FAISS
6
- from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
7
- from langchain.prompts import PromptTemplate
8
  from langchain.chains.question_answering import load_qa_chain
 
9
  from dotenv import load_dotenv
10
  from fuzzywuzzy import process
11
 
@@ -14,100 +15,114 @@ load_dotenv()
14
  google_api_key = os.getenv("GOOGLE_API_KEY")
15
  if google_api_key is None:
16
  st.error("GOOGLE_API_KEY is not set. Please set it in the .env file.")
17
- else:
18
- from google.generativeai import configure
19
- configure(api_key=google_api_key)
20
 
21
- # Global variable to store chat history
22
- if 'chat_history' not in st.session_state:
 
 
 
23
  st.session_state.chat_history = []
24
 
25
- # Function to extract text from uploaded PDF files
26
- def extract_text_from_pdfs(files):
 
 
 
 
 
 
 
 
 
27
  text = ""
28
- for pdf in files:
29
- reader = PdfReader(pdf)
30
- for page in reader.pages:
31
  text += page.extract_text()
32
  return text
33
 
34
- # Function to split text into manageable chunks
 
 
 
 
 
 
 
 
 
35
  def split_text_into_chunks(text):
36
- splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
37
  return splitter.split_text(text)
38
 
39
- # Create and store embeddings
40
- def create_vector_store(chunks):
41
  embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
42
- vector_store = FAISS.from_texts(chunks, embedding=embeddings)
43
  vector_store.save_local("faiss_index")
44
- return vector_store
45
 
46
- # Load a previously created vector store
47
- def load_vector_store():
48
- embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
49
- return FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
 
 
 
 
 
 
 
50
 
51
- # Generate a response using Gemini
52
- def generate_response(question, vector_store):
 
 
53
  docs = vector_store.similarity_search(question)
54
- chain = get_qa_chain()
55
  response = chain({"input_documents": docs, "question": question}, return_only_outputs=True)
56
  return response["output_text"]
57
 
58
- # Load the question-answering chain
59
- def get_qa_chain():
60
- prompt = PromptTemplate(
61
- template="""
62
- Use the provided context to answer the question in detail. If the answer is unavailable, respond with "Answer not found in the provided context."
63
- Context:\n{context}\n
64
- Question:\n{question}\n
65
- Answer:""",
66
- input_variables=["context", "question"]
67
- )
68
- llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.5)
69
- return load_qa_chain(llm, chain_type="stuff", prompt=prompt)
70
-
71
- # Suggest questions or keywords dynamically
72
- def suggest_keywords(query, all_texts):
73
- return process.extract(query, all_texts, limit=5)
74
-
75
  # Main app function
76
  def main():
77
- st.set_page_config(page_title="Virtual Agent App", layout="wide")
78
- st.title("Virtual Agent Powered by Gemini")
79
 
80
- # Sidebar for uploading files
 
 
 
 
 
 
 
 
81
  with st.sidebar:
82
  st.header("Upload Documents")
83
- uploaded_files = st.file_uploader("Upload PDFs", type=["pdf"], accept_multiple_files=True)
84
- if st.button("Process Files"):
85
- if uploaded_files:
86
- raw_text = extract_text_from_pdfs(uploaded_files)
87
- text_chunks = split_text_into_chunks(raw_text)
 
 
 
 
88
  create_vector_store(text_chunks)
89
  st.success("Documents processed successfully!")
90
  else:
91
- st.error("Please upload at least one PDF.")
92
-
93
- # Main interface for questions and suggestions
94
- user_question = st.text_input("Ask your question here (suggestions below):")
95
- if user_question:
96
- # Load vector store and generate suggestions
97
- vector_store = load_vector_store()
98
- suggestions = suggest_keywords(user_question, vector_store.similarity_search(user_question, k=10))
99
- st.write("Suggestions:", [s[0] for s in suggestions])
100
-
101
- # Generate and display response
102
- if st.button("Submit Question"):
103
- response = generate_response(user_question, vector_store)
104
- st.write("Answer:", response)
105
- st.session_state.chat_history.append({"question": user_question, "answer": response})
106
-
107
- # Chat history download
108
  if st.sidebar.button("Download Chat History"):
109
- chat_history = "\n".join([f"Q: {item['question']}\nA: {item['answer']}" for item in st.session_state.chat_history])
110
- st.sidebar.download_button("Download History", chat_history, file_name="chat_history.txt", mime="text/plain")
111
 
112
  if __name__ == "__main__":
113
  main()
 
1
  import os
2
  import streamlit as st
3
  from PyPDF2 import PdfReader
4
+ from docx import Document
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain.vectorstores import FAISS
7
+ from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
 
8
  from langchain.chains.question_answering import load_qa_chain
9
+ from langchain.prompts import PromptTemplate
10
  from dotenv import load_dotenv
11
  from fuzzywuzzy import process
12
 
 
15
  google_api_key = os.getenv("GOOGLE_API_KEY")
16
  if google_api_key is None:
17
  st.error("GOOGLE_API_KEY is not set. Please set it in the .env file.")
 
 
 
18
 
19
+ # Configure the Gemini API
20
+ genai.configure(api_key=google_api_key)
21
+
22
+ # Global variables
23
+ if "chat_history" not in st.session_state:
24
  st.session_state.chat_history = []
25
 
26
+ # List of predefined questions for suggestions
27
+ suggested_questions = [
28
+ "What is the revenue of the company?",
29
+ "Who are the board members?",
30
+ "What are the key achievements mentioned in the report?",
31
+ "What is the company's growth strategy?",
32
+ "What are the major risks highlighted?",
33
+ ]
34
+
35
+ # Function to extract text from PDF
36
+ def extract_text_from_pdf(pdf_docs):
37
  text = ""
38
+ for pdf in pdf_docs:
39
+ pdf_reader = PdfReader(pdf)
40
+ for page in pdf_reader.pages:
41
  text += page.extract_text()
42
  return text
43
 
44
+ # Function to extract text from .docx
45
+ def extract_text_from_docx(docx_docs):
46
+ text = ""
47
+ for doc in docx_docs:
48
+ document = Document(doc)
49
+ for para in document.paragraphs:
50
+ text += para.text + "\n"
51
+ return text
52
+
53
+ # Function to split text into chunks
54
  def split_text_into_chunks(text):
55
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
56
  return splitter.split_text(text)
57
 
58
+ # Function to create vector store
59
+ def create_vector_store(text_chunks):
60
  embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
61
+ vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
62
  vector_store.save_local("faiss_index")
 
63
 
64
+ # Function to load a QA chain
65
+ def load_qa_chain_model():
66
+ prompt_template = """
67
+ Use the context provided to answer the question accurately. If the answer is not found, respond with "Answer not available in the context."
68
+ Context:\n{context}\n
69
+ Question:\n{question}\n
70
+ Answer:
71
+ """
72
+ model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.5)
73
+ prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
74
+ return load_qa_chain(model, chain_type="stuff", prompt=prompt)
75
 
76
+ # Function to process user questions
77
+ def process_user_question(question):
78
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
79
+ vector_store = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
80
  docs = vector_store.similarity_search(question)
81
+ chain = load_qa_chain_model()
82
  response = chain({"input_documents": docs, "question": question}, return_only_outputs=True)
83
  return response["output_text"]
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  # Main app function
86
  def main():
87
+ st.set_page_config(page_title="Virtual Agent", layout="wide")
88
+ st.title("Ask Questions from Your Company Documents")
89
 
90
+ # Real-time suggestion box
91
+ user_input = st.text_input("Type your question", placeholder="Ask a question...", key="question_input")
92
+ suggestions = process.extract(user_input, suggested_questions, limit=5) if user_input else []
93
+ if user_input:
94
+ st.markdown("**Suggestions:**")
95
+ for suggestion, _ in suggestions:
96
+ st.button(suggestion, on_click=lambda s=suggestion: st.session_state.update({"question_input": s}))
97
+
98
+ # Sidebar for file upload
99
  with st.sidebar:
100
  st.header("Upload Documents")
101
+ pdf_docs = st.file_uploader("Upload PDF files", type="pdf", accept_multiple_files=True)
102
+ docx_docs = st.file_uploader("Upload .docx files", type="docx", accept_multiple_files=True)
103
+ if st.button("Process Documents"):
104
+ if pdf_docs or docx_docs:
105
+ st.spinner("Processing...")
106
+ pdf_text = extract_text_from_pdf(pdf_docs) if pdf_docs else ""
107
+ docx_text = extract_text_from_docx(docx_docs) if docx_docs else ""
108
+ combined_text = pdf_text + docx_text
109
+ text_chunks = split_text_into_chunks(combined_text)
110
  create_vector_store(text_chunks)
111
  st.success("Documents processed successfully!")
112
  else:
113
+ st.error("Please upload at least one document.")
114
+
115
+ # Handle question input and response
116
+ if user_input:
117
+ st.spinner("Generating response...")
118
+ answer = process_user_question(user_input)
119
+ st.session_state.chat_history.append({"question": user_input, "answer": answer})
120
+ st.write(f"**Answer:** {answer}")
121
+
122
+ # Chat history download option
 
 
 
 
 
 
 
123
  if st.sidebar.button("Download Chat History"):
124
+ chat_history = "\n".join([f"Q: {entry['question']}\nA: {entry['answer']}" for entry in st.session_state.chat_history])
125
+ st.sidebar.download_button("Download", chat_history, file_name="chat_history.txt", mime="text/plain")
126
 
127
  if __name__ == "__main__":
128
  main()