Spaces:

trustlogic
/

Copy-AI

Sleeping

App Files Files Community

Wajahat698 commited on Nov 24, 2024

Commit

6fedd5b

verified ·

1 Parent(s): 652f6fe

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -15

app.py CHANGED Viewed

@@ -188,14 +188,17 @@ def extract_text_from_file(file):
 def upload_to_firebase(user_id, file):
-    content = extract_text_from_file (file)
     if not content:
-        return None, "Failed to convert file to content."
     doc_id = str(uuid.uuid4())
     document_data = {"content": content, "name": file.name}
-    # Save to Firebase
     db.child("users").child(user_id).child("KnowledgeBase").child(doc_id).set(document_data)
     # Update session state
@@ -203,25 +206,22 @@ def upload_to_firebase(user_id, file):
         st.session_state["documents"] = {}
     st.session_state["documents"][doc_id] = document_data
-    # Index the document content
     index_document_content(content, doc_id)
-    st.sidebar.success(f"Document '{file.name}' uploaded  successfully!")
-    return content,None
 def index_document_content(doc_content, doc_id):
     """
     Indexes the document content by splitting it into chunks and creating embeddings.
     """
-    # Split the document into chunks
-    text_splitter = RecursiveCharacterTextSplitter(
-        chunk_size=500,
-        chunk_overlap=50,
-    )
     texts = text_splitter.split_text(doc_content)
     # Create embeddings for each chunk
-    embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
     doc_metadata = [{"doc_id": doc_id, "chunk_id": i} for i in range(len(texts))]
     vector_store = FAISS.from_texts(texts, embeddings, metadatas=doc_metadata)
@@ -230,7 +230,6 @@ def index_document_content(doc_content, doc_id):
         st.session_state["vector_store"] = {}
     st.session_state["vector_store"][doc_id] = vector_store
 def fetch_trustbuilders(user_id):
     """
     Retrieve TrustBuilders from Firebase for a specific user.
@@ -1125,7 +1124,8 @@ def rag_response(query):
         return response.content
     except Exception as e:
         logger.error(f"Error generating RAG response: {e}")
-        return "Error occurred during RAG response generation."
 # Define tools
@@ -1878,12 +1878,18 @@ def handle_document_query(query):
     # Extract document name from the query
     doc_name_match = re.search(r"[\"']?([^\"']+\.(pdf|docx|doc|txt))[\"']?", query, re.IGNORECASE)
     doc_name = doc_name_match.group(1) if doc_name_match else None
     # Fetch document content
     doc_content, error = get_document_content(doc_name)
     if error:
         return error
     # Generate AI response using document context
     full_prompt = f"Document Content:\n{doc_content}\n\nUser Query: {query}\n\nResponse:"
     try:
@@ -1893,7 +1899,6 @@ def handle_document_query(query):
     except Exception as e:
         logger.error(f"Error generating response using the document: {e}")
         return f"Error generating response using the document: {e}"
 if "missing_trustbucket_content" not in st.session_state:

 def upload_to_firebase(user_id, file):
+    """
+    Upload document to Firebase and extract content for querying.
+    """
+    content = extract_text_from_file(file)
     if not content:
+        return None, "Failed to extract content from the file."
     doc_id = str(uuid.uuid4())
     document_data = {"content": content, "name": file.name}
+    # Save document to Firebase
     db.child("users").child(user_id).child("KnowledgeBase").child(doc_id).set(document_data)
     # Update session state
         st.session_state["documents"] = {}
     st.session_state["documents"][doc_id] = document_data
+    # Index the document content for semantic search
     index_document_content(content, doc_id)
+    st.sidebar.success(f"Document '{file.name}' uploaded successfully!")
+    return content, None
 def index_document_content(doc_content, doc_id):
     """
     Indexes the document content by splitting it into chunks and creating embeddings.
     """
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
     texts = text_splitter.split_text(doc_content)
     # Create embeddings for each chunk
+    embeddings = OpenAIEmbeddings(openai_api_key="your_openai_api_key_here")
     doc_metadata = [{"doc_id": doc_id, "chunk_id": i} for i in range(len(texts))]
     vector_store = FAISS.from_texts(texts, embeddings, metadatas=doc_metadata)
         st.session_state["vector_store"] = {}
     st.session_state["vector_store"][doc_id] = vector_store
 def fetch_trustbuilders(user_id):
     """
     Retrieve TrustBuilders from Firebase for a specific user.
         return response.content
     except Exception as e:
         logger.error(f"Error generating RAG response: {e}")
+        return "An error occurred during the RAG response generation process."
 # Define tools
     # Extract document name from the query
     doc_name_match = re.search(r"[\"']?([^\"']+\.(pdf|docx|doc|txt))[\"']?", query, re.IGNORECASE)
     doc_name = doc_name_match.group(1) if doc_name_match else None
+    if not doc_name:
+        return "Please specify a document name in your query."
+    st.write("Extracted Document Name:", doc_name)
     # Fetch document content
     doc_content, error = get_document_content(doc_name)
     if error:
         return error
+    st.write("Document Content Extracted:", doc_content)
     # Generate AI response using document context
     full_prompt = f"Document Content:\n{doc_content}\n\nUser Query: {query}\n\nResponse:"
     try:
     except Exception as e:
         logger.error(f"Error generating response using the document: {e}")
         return f"Error generating response using the document: {e}"
 if "missing_trustbucket_content" not in st.session_state: