Wajahat698 commited on
Commit
459f5f2
·
verified ·
1 Parent(s): 42edc02

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -65
app.py CHANGED
@@ -164,9 +164,9 @@ def merge_markdown_contents(contents):
164
 
165
  def upload_to_firebase(user_id, file):
166
  """
167
- Upload document to Firebase and extract content for querying.
168
  """
169
- content = convert_file_to_md(file) # Ensure this function is working as expected
170
  if not content:
171
  return None, "Failed to extract content from the file."
172
 
@@ -176,15 +176,41 @@ def upload_to_firebase(user_id, file):
176
  # Save document to Firebase
177
  db.child("users").child(user_id).child("KnowledgeBase").child(doc_id).set(document_data)
178
 
179
- # Update session state
180
- if "documents" not in st.session_state:
181
- st.session_state["documents"] = {}
182
- st.session_state["documents"][doc_id] = document_data
 
 
 
183
 
184
- st.sidebar.success(f"Document '{file.name}' uploaded successfully!")
185
  return content, None
186
 
187
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
 
189
  def fetch_trustbuilders(user_id):
190
  """
@@ -1060,21 +1086,23 @@ def google_search(query):
1060
 
1061
  def rag_response(query):
1062
  """
1063
- Handle RAG-based queries when uploaded document context is not mentioned.
1064
  """
1065
  try:
1066
- if "uploaded document" in query.lower():
1067
- # Handle document-specific queries
1068
- return handle_document_query(query)
1069
-
1070
- # Proceed with the existing knowledge base logic
1071
- retrieved_docs = search_knowledge_base(query) # Replace with actual KB search logic
1072
- if not retrieved_docs:
 
 
1073
  return "No relevant information found in the knowledge base."
1074
 
1075
- context = "\n".join(doc.page_content for doc in retrieved_docs)
1076
  prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer:"
1077
- llm = ChatOpenAI(model="gpt-4o", temperature=0.3, api_key=openai_api_key)
1078
  response = llm.invoke(prompt)
1079
 
1080
  return response.content
@@ -1083,7 +1111,6 @@ def rag_response(query):
1083
  return "An error occurred during the RAG response generation process."
1084
 
1085
 
1086
-
1087
  # Define tools
1088
  @tool
1089
  def knowledge_base_tool(query: str):
@@ -1799,54 +1826,7 @@ def load_user_memory(user_id):
1799
  st.session_state["documents"] = {}
1800
  st.session_state["vector_store"] = {}
1801
 
1802
- def get_document_content(doc_name=None):
1803
- """
1804
- Retrieve content of an uploaded document from Streamlit session state.
1805
- """
1806
- documents = st.session_state.get("documents", {})
1807
- if not documents:
1808
- return None, "No documents have been uploaded."
1809
-
1810
- if doc_name:
1811
- doc_name = doc_name.strip().lower()
1812
- for doc_id, doc_data in documents.items():
1813
- if doc_data.get("name", "").strip().lower() == doc_name:
1814
- return doc_data.get("content"), None
1815
- return None, f"Document '{doc_name}' not found."
1816
-
1817
- # Default to the most recently uploaded document
1818
- last_doc = list(documents.values())[-1]
1819
- return last_doc.get("content"), None
1820
-
1821
-
1822
- def handle_document_query(query):
1823
- """
1824
- Handle user queries related to uploaded documents.
1825
- """
1826
- # Extract document name from the query
1827
- doc_name_match = re.search(r"[\"']?([^\"']+\.(pdf|docx|doc|txt))[\"']?", query, re.IGNORECASE)
1828
- doc_name = doc_name_match.group(1) if doc_name_match else None
1829
 
1830
- if not doc_name:
1831
- return "Please specify a document name in your query."
1832
-
1833
-
1834
- # Fetch document content
1835
- doc_content, error = get_document_content(doc_name)
1836
- if error:
1837
- return error
1838
-
1839
-
1840
- # Generate AI response using document context
1841
- full_prompt = f"Document Content:\n{doc_content}\n\nUser Query: {query}\n\nResponse:"
1842
- try:
1843
- llm = ChatOpenAI(model="gpt-4o", temperature=0.5, api_key=openai_api_key)
1844
- response = llm.invoke(full_prompt)
1845
- return response.content
1846
- except Exception as e:
1847
- logger.error(f"Error generating response using the document: {e}")
1848
- return f"Error generating response using the document: {e}"
1849
-
1850
 
1851
  if "missing_trustbucket_content" not in st.session_state:
1852
  st.session_state["missing_trustbucket_content"] = None
 
164
 
165
  def upload_to_firebase(user_id, file):
166
  """
167
+ Upload document to Firebase, extract content, and add it to the knowledge base.
168
  """
169
+ content = convert_file_to_md(file) # Ensure this function extracts content correctly
170
  if not content:
171
  return None, "Failed to extract content from the file."
172
 
 
176
  # Save document to Firebase
177
  db.child("users").child(user_id).child("KnowledgeBase").child(doc_id).set(document_data)
178
 
179
+ # Add content to the knowledge base
180
+ if "knowledge_base" not in st.session_state:
181
+ st.session_state["knowledge_base"] = []
182
+ st.session_state["knowledge_base"].append({"doc_id": doc_id, "content": content})
183
+
184
+ # Index the document content for semantic search
185
+ index_document_content(content, doc_id)
186
 
187
+ st.sidebar.success(f"Document '{file.name}' uploaded successfully and added to the knowledge base!")
188
  return content, None
189
 
190
 
191
+ def index_document_content(doc_content, doc_id):
192
+ """
193
+ Indexes the document content by splitting it into chunks and creating embeddings.
194
+ """
195
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
196
+ texts = text_splitter.split_text(doc_content)
197
+
198
+ # Create embeddings for each chunk
199
+ embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
200
+ doc_metadata = [{"doc_id": doc_id, "chunk_id": i} for i in range(len(texts))]
201
+ vector_store = FAISS.from_texts(texts, embeddings, metadatas=doc_metadata)
202
+
203
+ # Save the vector store in session state
204
+ if "vector_store" not in st.session_state:
205
+ st.session_state["vector_store"] = {}
206
+ st.session_state["vector_store"][doc_id] = vector_store
207
+
208
+
209
+
210
+
211
+
212
+
213
+
214
 
215
  def fetch_trustbuilders(user_id):
216
  """
 
1086
 
1087
  def rag_response(query):
1088
  """
1089
+ Handle queries by searching both static and dynamically uploaded knowledge base.
1090
  """
1091
  try:
1092
+ # Retrieve relevant chunks from the vector store
1093
+ results = []
1094
+ if "vector_store" in st.session_state:
1095
+ for vector_store in st.session_state["vector_store"].values():
1096
+ results.extend(vector_store.similarity_search(query, k=3)) # Adjust `k` for the number of results
1097
+
1098
+ # Combine results into a context
1099
+ context = "\n".join([result.page_content for result in results])
1100
+ if not context:
1101
  return "No relevant information found in the knowledge base."
1102
 
1103
+ # Generate AI response with the retrieved context
1104
  prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer:"
1105
+ llm = ChatOpenAI(model="gpt-4", temperature=0.3, api_key=openai_api_key)
1106
  response = llm.invoke(prompt)
1107
 
1108
  return response.content
 
1111
  return "An error occurred during the RAG response generation process."
1112
 
1113
 
 
1114
  # Define tools
1115
  @tool
1116
  def knowledge_base_tool(query: str):
 
1826
  st.session_state["documents"] = {}
1827
  st.session_state["vector_store"] = {}
1828
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1829
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1830
 
1831
  if "missing_trustbucket_content" not in st.session_state:
1832
  st.session_state["missing_trustbucket_content"] = None