Wajahat698 commited on
Commit
6fedd5b
·
verified ·
1 Parent(s): 652f6fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -15
app.py CHANGED
@@ -188,14 +188,17 @@ def extract_text_from_file(file):
188
 
189
 
190
  def upload_to_firebase(user_id, file):
191
- content = extract_text_from_file (file)
 
 
 
192
  if not content:
193
- return None, "Failed to convert file to content."
194
 
195
  doc_id = str(uuid.uuid4())
196
  document_data = {"content": content, "name": file.name}
197
 
198
- # Save to Firebase
199
  db.child("users").child(user_id).child("KnowledgeBase").child(doc_id).set(document_data)
200
 
201
  # Update session state
@@ -203,25 +206,22 @@ def upload_to_firebase(user_id, file):
203
  st.session_state["documents"] = {}
204
  st.session_state["documents"][doc_id] = document_data
205
 
206
- # Index the document content
207
  index_document_content(content, doc_id)
208
 
209
- st.sidebar.success(f"Document '{file.name}' uploaded successfully!")
210
- return content,None
 
211
 
212
  def index_document_content(doc_content, doc_id):
213
  """
214
  Indexes the document content by splitting it into chunks and creating embeddings.
215
  """
216
- # Split the document into chunks
217
- text_splitter = RecursiveCharacterTextSplitter(
218
- chunk_size=500,
219
- chunk_overlap=50,
220
- )
221
  texts = text_splitter.split_text(doc_content)
222
 
223
  # Create embeddings for each chunk
224
- embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
225
  doc_metadata = [{"doc_id": doc_id, "chunk_id": i} for i in range(len(texts))]
226
  vector_store = FAISS.from_texts(texts, embeddings, metadatas=doc_metadata)
227
 
@@ -230,7 +230,6 @@ def index_document_content(doc_content, doc_id):
230
  st.session_state["vector_store"] = {}
231
  st.session_state["vector_store"][doc_id] = vector_store
232
 
233
-
234
  def fetch_trustbuilders(user_id):
235
  """
236
  Retrieve TrustBuilders from Firebase for a specific user.
@@ -1125,7 +1124,8 @@ def rag_response(query):
1125
  return response.content
1126
  except Exception as e:
1127
  logger.error(f"Error generating RAG response: {e}")
1128
- return "Error occurred during RAG response generation."
 
1129
 
1130
 
1131
  # Define tools
@@ -1878,12 +1878,18 @@ def handle_document_query(query):
1878
  # Extract document name from the query
1879
  doc_name_match = re.search(r"[\"']?([^\"']+\.(pdf|docx|doc|txt))[\"']?", query, re.IGNORECASE)
1880
  doc_name = doc_name_match.group(1) if doc_name_match else None
 
 
 
 
1881
 
1882
  # Fetch document content
1883
  doc_content, error = get_document_content(doc_name)
1884
  if error:
1885
  return error
1886
 
 
 
1887
  # Generate AI response using document context
1888
  full_prompt = f"Document Content:\n{doc_content}\n\nUser Query: {query}\n\nResponse:"
1889
  try:
@@ -1893,7 +1899,6 @@ def handle_document_query(query):
1893
  except Exception as e:
1894
  logger.error(f"Error generating response using the document: {e}")
1895
  return f"Error generating response using the document: {e}"
1896
-
1897
 
1898
 
1899
  if "missing_trustbucket_content" not in st.session_state:
 
188
 
189
 
190
  def upload_to_firebase(user_id, file):
191
+ """
192
+ Upload document to Firebase and extract content for querying.
193
+ """
194
+ content = extract_text_from_file(file)
195
  if not content:
196
+ return None, "Failed to extract content from the file."
197
 
198
  doc_id = str(uuid.uuid4())
199
  document_data = {"content": content, "name": file.name}
200
 
201
+ # Save document to Firebase
202
  db.child("users").child(user_id).child("KnowledgeBase").child(doc_id).set(document_data)
203
 
204
  # Update session state
 
206
  st.session_state["documents"] = {}
207
  st.session_state["documents"][doc_id] = document_data
208
 
209
+ # Index the document content for semantic search
210
  index_document_content(content, doc_id)
211
 
212
+ st.sidebar.success(f"Document '{file.name}' uploaded successfully!")
213
+ return content, None
214
+
215
 
216
  def index_document_content(doc_content, doc_id):
217
  """
218
  Indexes the document content by splitting it into chunks and creating embeddings.
219
  """
220
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
 
 
 
 
221
  texts = text_splitter.split_text(doc_content)
222
 
223
  # Create embeddings for each chunk
224
+ embeddings = OpenAIEmbeddings(openai_api_key="your_openai_api_key_here")
225
  doc_metadata = [{"doc_id": doc_id, "chunk_id": i} for i in range(len(texts))]
226
  vector_store = FAISS.from_texts(texts, embeddings, metadatas=doc_metadata)
227
 
 
230
  st.session_state["vector_store"] = {}
231
  st.session_state["vector_store"][doc_id] = vector_store
232
 
 
233
  def fetch_trustbuilders(user_id):
234
  """
235
  Retrieve TrustBuilders from Firebase for a specific user.
 
1124
  return response.content
1125
  except Exception as e:
1126
  logger.error(f"Error generating RAG response: {e}")
1127
+ return "An error occurred during the RAG response generation process."
1128
+
1129
 
1130
 
1131
  # Define tools
 
1878
  # Extract document name from the query
1879
  doc_name_match = re.search(r"[\"']?([^\"']+\.(pdf|docx|doc|txt))[\"']?", query, re.IGNORECASE)
1880
  doc_name = doc_name_match.group(1) if doc_name_match else None
1881
+ if not doc_name:
1882
+ return "Please specify a document name in your query."
1883
+
1884
+ st.write("Extracted Document Name:", doc_name)
1885
 
1886
  # Fetch document content
1887
  doc_content, error = get_document_content(doc_name)
1888
  if error:
1889
  return error
1890
 
1891
+ st.write("Document Content Extracted:", doc_content)
1892
+
1893
  # Generate AI response using document context
1894
  full_prompt = f"Document Content:\n{doc_content}\n\nUser Query: {query}\n\nResponse:"
1895
  try:
 
1899
  except Exception as e:
1900
  logger.error(f"Error generating response using the document: {e}")
1901
  return f"Error generating response using the document: {e}"
 
1902
 
1903
 
1904
  if "missing_trustbucket_content" not in st.session_state: