Spaces:

Ismetdh
/

SimpleRAG

Sleeping

App Files Files Community

Ismetdh commited on Mar 17, 2025

Commit

330a910

verified ·

1 Parent(s): 0463792

Update app.py

Browse files

Make it able to accept multiple files. Now the user can delete the chat history and attached files as well

Files changed (1) hide show

app.py +85 -38

app.py CHANGED Viewed

@@ -5,14 +5,15 @@ import os
 import re
 import numpy as np
 import google.generativeai as palm
-from sklearn.metrics.pairwise import cosine_similarity
 import logging
 import time
 import uuid
 import json
 import firebase_admin
 from firebase_admin import credentials, firestore
 def init_firebase():
     if not firebase_admin._apps:
         data = json.loads(os.getenv("FIREBASE_CRED"))
@@ -68,10 +69,11 @@ def update_feedback_in_firestore(session_id, conversation_id, feedback):
 class Config:
     CHUNK_WORDS = 300
-    EMBEDDING_MODEL = "models/text-embedding-004"
     TOP_N = 5
     SYSTEM_PROMPT = (
-        "You are a helpful assistant. Answer the question using the provided context below. Answer based on your knowledge if the context given is not enough."
     )
     GENERATION_MODEL = "models/gemini-1.5-flash"
@@ -179,9 +181,6 @@ def chunk_text(text: str) -> list[str]:
 def process_document(uploaded_file) -> None:
     try:
-        keys_to_clear = ["document_text", "document_chunks", "document_embeddings"]
-        for key in keys_to_clear:
-            st.session_state.pop(key, None)
         file_text = extract_text_from_file(uploaded_file)
         if not file_text.strip():
             logger.error("Uploaded file contains no valid text.")
@@ -197,21 +196,34 @@ def process_document(uploaded_file) -> None:
             logger.error("All embeddings are zero vectors.")
             st.error("Failed to generate valid embeddings.")
             return
-        st.session_state.update({
             "document_text": file_text,
             "document_chunks": chunks,
-            "document_embeddings": embeddings
-        })
-        if not st.session_state.get("doc_processed", False):
-            message_placeholder = st.empty()
-            message_placeholder.success("Document processing complete! You can now start chatting.")
-            st.session_state.doc_processed = True
     except Exception as e:
         logger.error("Document processing failed: %s", e)
         st.error(f"An error occurred while processing the document: {e}")
 def search_query(query: str) -> list[tuple[str, float]]:
-    if "document_embeddings" not in st.session_state or len(st.session_state["document_embeddings"]) == 0:
         logger.error("No valid document embeddings found in session state.")
         st.error("No valid document embeddings found. Please upload a valid document.")
         return []
@@ -221,10 +233,15 @@ def search_query(query: str) -> list[tuple[str, float]]:
         st.error("Failed to generate a valid query embedding.")
         return []
     query_embedding = query_embedding.reshape(1, -1)
-    doc_embeddings = np.vstack(st.session_state["document_embeddings"])
     similarities = cosine_similarity(query_embedding, doc_embeddings)[0]
     top_indices = np.argsort(similarities)[-Config.TOP_N:][::-1]
-    results = [(st.session_state["document_chunks"][i], similarities[i]) for i in top_indices]
     return results
 def generate_answer(user_query: str, context: str) -> str:
@@ -276,34 +293,64 @@ def chat_app():
             "user_question": user_input,
             "assistant_answer": answer,
         })
-        if "feedback" not in st.session_state.conversations[-1]:
-            col1, col2, col3, col4, col5, col6, col7, col8, col9, col10 = st.columns(10)
-            col1.button("👍", key=f"feedback_like_{len(st.session_state.conversations)}", on_click=handle_feedback, args=("positive",))
-            col2.button("👎", key=f"feedback_dislike_{len(st.session_state.conversations)}", on_click=handle_feedback, args=("negative",))
 def main():
     st.title("Chat with your files")
-    st.sidebar.header("Upload Document")
-    uploaded_file = st.sidebar.file_uploader("Upload (.txt, .pdf, .docx)", type=["txt", "pdf", "docx"])
-    if uploaded_file and not st.session_state.get("doc_processed", False):
-        process_document(uploaded_file)
-    if "document_text" in st.session_state:
         chat_app()
     else:
-        st.info("Please upload and process a document from the sidebar to start chatting.")
     st.markdown(
-    """
-    <div style="position: fixed; right: 10px; bottom: 10px; font-size: 12px; z-index: 9999; text-align: right;">
-    Made by Danny.<br>
-    Your questions, our response as well as your feedback will be saved for evaluation purposes.
-    </div>
-    """,
-    unsafe_allow_html=True
-)
 if __name__ == "__main__":
     main()

 import re
 import numpy as np
 import google.generativeai as palm
 import logging
 import time
 import uuid
 import json
 import firebase_admin
 from firebase_admin import credentials, firestore
+from sklearn.metrics.pairwise import cosine_similarity
+# Initialize Firebase
 def init_firebase():
     if not firebase_admin._apps:
         data = json.loads(os.getenv("FIREBASE_CRED"))
 class Config:
     CHUNK_WORDS = 300
+    EMBEDDING_MODEL = "models/gemini-embedding-exp-03-07"
     TOP_N = 5
     SYSTEM_PROMPT = (
+        "You are a helpful assistant. Answer the question using the provided context below. "
+        "Answer based on your knowledge if the context given is not enough."
     )
     GENERATION_MODEL = "models/gemini-1.5-flash"
 def process_document(uploaded_file) -> None:
     try:
         file_text = extract_text_from_file(uploaded_file)
         if not file_text.strip():
             logger.error("Uploaded file contains no valid text.")
             logger.error("All embeddings are zero vectors.")
             st.error("Failed to generate valid embeddings.")
             return
+        doc_entry = {
+            "file_name": uploaded_file.name,
             "document_text": file_text,
             "document_chunks": chunks,
+            "document_embeddings": embeddings,
+        }
+        if "documents" not in st.session_state:
+            st.session_state["documents"] = []
+        st.session_state.documents.append(doc_entry)
+        st.session_state.doc_processed = True
+        st.success(f"Document '{uploaded_file.name}' processing complete! You can now start chatting.")
     except Exception as e:
         logger.error("Document processing failed: %s", e)
         st.error(f"An error occurred while processing the document: {e}")
+def clear_documents():
+    # Clear attached documents and chat messages from session state.
+    if "documents" in st.session_state:
+        del st.session_state["documents"]
+    if "conversations" in st.session_state:
+        del st.session_state["conversations"]
+    # Update the dynamic key for the file uploader to force reinitialization.
+    st.session_state["uploaded_files_key"] = str(uuid.uuid4())
+    st.session_state.doc_processed = False
+    st.success("All documents and chat messages have been cleared.")
 def search_query(query: str) -> list[tuple[str, float]]:
+    if "documents" not in st.session_state or len(st.session_state["documents"]) == 0:
         logger.error("No valid document embeddings found in session state.")
         st.error("No valid document embeddings found. Please upload a valid document.")
         return []
         st.error("Failed to generate a valid query embedding.")
         return []
     query_embedding = query_embedding.reshape(1, -1)
+    all_chunks = []
+    all_embeddings = []
+    for doc in st.session_state.documents:
+        all_chunks.extend(doc["document_chunks"])
+        all_embeddings.extend(doc["document_embeddings"])
+    doc_embeddings = np.vstack(all_embeddings)
     similarities = cosine_similarity(query_embedding, doc_embeddings)[0]
     top_indices = np.argsort(similarities)[-Config.TOP_N:][::-1]
+    results = [(all_chunks[i], similarities[i]) for i in top_indices]
     return results
 def generate_answer(user_query: str, context: str) -> str:
             "user_question": user_input,
             "assistant_answer": answer,
         })
+        col1, col2 ,col3,col4,col5= st.columns(5)
+        col1.button("👍", key=f"feedback_like_{len(st.session_state.conversations)}", on_click=handle_feedback, args=("positive",))
+        col2.button("👎", key=f"feedback_dislike_{len(st.session_state.conversations)}", on_click=handle_feedback, args=("negative",))
+# Define the clear confirmation dialog using st.dialog decorator.
+@st.dialog("Confirm Clear")
+def clear_confirm_dialog():
+    st.write("This will erase all attached documents and chat history. Do you want to proceed?")
+    col1, col2 = st.columns(2)
+    with col1:
+        if st.button("Confirm Clear"):
+            clear_documents()
+            st.success("Documents and chat history have been cleared.")
+            st.rerun()
+    with col2:
+        if st.button("Cancel"):
+            st.write("Operation cancelled.")
+            st.rerun()
 def main():
     st.title("Chat with your files")
+    st.sidebar.header("Upload Documents")
+    # Ensure a dynamic key for the file uploader exists.
+    if "uploaded_files_key" not in st.session_state:
+        st.session_state["uploaded_files_key"] = str(uuid.uuid4())
+    # File uploader using the dynamic key.
+    uploaded_files = st.sidebar.file_uploader(
+        "Upload (.txt, .pdf, .docx)",
+        type=["txt", "pdf", "docx"],
+        accept_multiple_files=True,
+        key=st.session_state["uploaded_files_key"]
+    )
+    if uploaded_files:
+        for file in uploaded_files:
+            process_document(file)
+    # Show the clear button if either documents, conversations exist or if files are uploaded.
+    if (("documents" in st.session_state and st.session_state.documents) or
+        ("conversations" in st.session_state and st.session_state.conversations) or
+        (uploaded_files is not None and len(uploaded_files) > 0)):
+        if st.sidebar.button("Clear Documents & Chat History"):
+            clear_confirm_dialog()  # Call the dialog function.
+    if st.session_state.get("doc_processed", False):
         chat_app()
     else:
+        st.info("Please upload and process at least one document from the sidebar to start chatting.")
     st.markdown(
+        """
+        <div style="position: fixed; right: 10px; bottom: 10px; font-size: 12px; z-index: 9999; text-align: right;">
+            Your questions, our response as well as your feedback will be saved for evaluation purposes.
+        </div>
+        """,
+        unsafe_allow_html=True
+    )
 if __name__ == "__main__":
     main()