Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -188,14 +188,17 @@ def extract_text_from_file(file):
|
|
| 188 |
|
| 189 |
|
| 190 |
def upload_to_firebase(user_id, file):
|
| 191 |
-
|
|
|
|
|
|
|
|
|
|
| 192 |
if not content:
|
| 193 |
-
return None, "Failed to
|
| 194 |
|
| 195 |
doc_id = str(uuid.uuid4())
|
| 196 |
document_data = {"content": content, "name": file.name}
|
| 197 |
|
| 198 |
-
# Save to Firebase
|
| 199 |
db.child("users").child(user_id).child("KnowledgeBase").child(doc_id).set(document_data)
|
| 200 |
|
| 201 |
# Update session state
|
|
@@ -203,25 +206,22 @@ def upload_to_firebase(user_id, file):
|
|
| 203 |
st.session_state["documents"] = {}
|
| 204 |
st.session_state["documents"][doc_id] = document_data
|
| 205 |
|
| 206 |
-
# Index the document content
|
| 207 |
index_document_content(content, doc_id)
|
| 208 |
|
| 209 |
-
st.sidebar.success(f"Document '{file.name}' uploaded
|
| 210 |
-
return content,None
|
|
|
|
| 211 |
|
| 212 |
def index_document_content(doc_content, doc_id):
|
| 213 |
"""
|
| 214 |
Indexes the document content by splitting it into chunks and creating embeddings.
|
| 215 |
"""
|
| 216 |
-
|
| 217 |
-
text_splitter = RecursiveCharacterTextSplitter(
|
| 218 |
-
chunk_size=500,
|
| 219 |
-
chunk_overlap=50,
|
| 220 |
-
)
|
| 221 |
texts = text_splitter.split_text(doc_content)
|
| 222 |
|
| 223 |
# Create embeddings for each chunk
|
| 224 |
-
embeddings = OpenAIEmbeddings(openai_api_key=
|
| 225 |
doc_metadata = [{"doc_id": doc_id, "chunk_id": i} for i in range(len(texts))]
|
| 226 |
vector_store = FAISS.from_texts(texts, embeddings, metadatas=doc_metadata)
|
| 227 |
|
|
@@ -230,7 +230,6 @@ def index_document_content(doc_content, doc_id):
|
|
| 230 |
st.session_state["vector_store"] = {}
|
| 231 |
st.session_state["vector_store"][doc_id] = vector_store
|
| 232 |
|
| 233 |
-
|
| 234 |
def fetch_trustbuilders(user_id):
|
| 235 |
"""
|
| 236 |
Retrieve TrustBuilders from Firebase for a specific user.
|
|
@@ -1125,7 +1124,8 @@ def rag_response(query):
|
|
| 1125 |
return response.content
|
| 1126 |
except Exception as e:
|
| 1127 |
logger.error(f"Error generating RAG response: {e}")
|
| 1128 |
-
return "
|
|
|
|
| 1129 |
|
| 1130 |
|
| 1131 |
# Define tools
|
|
@@ -1878,12 +1878,18 @@ def handle_document_query(query):
|
|
| 1878 |
# Extract document name from the query
|
| 1879 |
doc_name_match = re.search(r"[\"']?([^\"']+\.(pdf|docx|doc|txt))[\"']?", query, re.IGNORECASE)
|
| 1880 |
doc_name = doc_name_match.group(1) if doc_name_match else None
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1881 |
|
| 1882 |
# Fetch document content
|
| 1883 |
doc_content, error = get_document_content(doc_name)
|
| 1884 |
if error:
|
| 1885 |
return error
|
| 1886 |
|
|
|
|
|
|
|
| 1887 |
# Generate AI response using document context
|
| 1888 |
full_prompt = f"Document Content:\n{doc_content}\n\nUser Query: {query}\n\nResponse:"
|
| 1889 |
try:
|
|
@@ -1893,7 +1899,6 @@ def handle_document_query(query):
|
|
| 1893 |
except Exception as e:
|
| 1894 |
logger.error(f"Error generating response using the document: {e}")
|
| 1895 |
return f"Error generating response using the document: {e}"
|
| 1896 |
-
|
| 1897 |
|
| 1898 |
|
| 1899 |
if "missing_trustbucket_content" not in st.session_state:
|
|
|
|
| 188 |
|
| 189 |
|
| 190 |
def upload_to_firebase(user_id, file):
|
| 191 |
+
"""
|
| 192 |
+
Upload document to Firebase and extract content for querying.
|
| 193 |
+
"""
|
| 194 |
+
content = extract_text_from_file(file)
|
| 195 |
if not content:
|
| 196 |
+
return None, "Failed to extract content from the file."
|
| 197 |
|
| 198 |
doc_id = str(uuid.uuid4())
|
| 199 |
document_data = {"content": content, "name": file.name}
|
| 200 |
|
| 201 |
+
# Save document to Firebase
|
| 202 |
db.child("users").child(user_id).child("KnowledgeBase").child(doc_id).set(document_data)
|
| 203 |
|
| 204 |
# Update session state
|
|
|
|
| 206 |
st.session_state["documents"] = {}
|
| 207 |
st.session_state["documents"][doc_id] = document_data
|
| 208 |
|
| 209 |
+
# Index the document content for semantic search
|
| 210 |
index_document_content(content, doc_id)
|
| 211 |
|
| 212 |
+
st.sidebar.success(f"Document '{file.name}' uploaded successfully!")
|
| 213 |
+
return content, None
|
| 214 |
+
|
| 215 |
|
| 216 |
def index_document_content(doc_content, doc_id):
|
| 217 |
"""
|
| 218 |
Indexes the document content by splitting it into chunks and creating embeddings.
|
| 219 |
"""
|
| 220 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
texts = text_splitter.split_text(doc_content)
|
| 222 |
|
| 223 |
# Create embeddings for each chunk
|
| 224 |
+
embeddings = OpenAIEmbeddings(openai_api_key="your_openai_api_key_here")
|
| 225 |
doc_metadata = [{"doc_id": doc_id, "chunk_id": i} for i in range(len(texts))]
|
| 226 |
vector_store = FAISS.from_texts(texts, embeddings, metadatas=doc_metadata)
|
| 227 |
|
|
|
|
| 230 |
st.session_state["vector_store"] = {}
|
| 231 |
st.session_state["vector_store"][doc_id] = vector_store
|
| 232 |
|
|
|
|
| 233 |
def fetch_trustbuilders(user_id):
|
| 234 |
"""
|
| 235 |
Retrieve TrustBuilders from Firebase for a specific user.
|
|
|
|
| 1124 |
return response.content
|
| 1125 |
except Exception as e:
|
| 1126 |
logger.error(f"Error generating RAG response: {e}")
|
| 1127 |
+
return "An error occurred during the RAG response generation process."
|
| 1128 |
+
|
| 1129 |
|
| 1130 |
|
| 1131 |
# Define tools
|
|
|
|
| 1878 |
# Extract document name from the query
|
| 1879 |
doc_name_match = re.search(r"[\"']?([^\"']+\.(pdf|docx|doc|txt))[\"']?", query, re.IGNORECASE)
|
| 1880 |
doc_name = doc_name_match.group(1) if doc_name_match else None
|
| 1881 |
+
if not doc_name:
|
| 1882 |
+
return "Please specify a document name in your query."
|
| 1883 |
+
|
| 1884 |
+
st.write("Extracted Document Name:", doc_name)
|
| 1885 |
|
| 1886 |
# Fetch document content
|
| 1887 |
doc_content, error = get_document_content(doc_name)
|
| 1888 |
if error:
|
| 1889 |
return error
|
| 1890 |
|
| 1891 |
+
st.write("Document Content Extracted:", doc_content)
|
| 1892 |
+
|
| 1893 |
# Generate AI response using document context
|
| 1894 |
full_prompt = f"Document Content:\n{doc_content}\n\nUser Query: {query}\n\nResponse:"
|
| 1895 |
try:
|
|
|
|
| 1899 |
except Exception as e:
|
| 1900 |
logger.error(f"Error generating response using the document: {e}")
|
| 1901 |
return f"Error generating response using the document: {e}"
|
|
|
|
| 1902 |
|
| 1903 |
|
| 1904 |
if "missing_trustbucket_content" not in st.session_state:
|