Spaces:

anasmkh
/

QdrantVectorStore_Llamaindex

Sleeping

App Files Files Community

anasmkh commited on Feb 13, 2025

Commit

fdd2048

verified ·

1 Parent(s): 6b3a267

Update app.py

Browse files

Files changed (1) hide show

app.py +125 -25

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import gradio as gr
 import qdrant_client
 from getpass import getpass
-# Set your OpenAI API key from environmnt variables.
 openai_api_key = os.getenv('OPENAI_API_KEY')
 # -------------------------------------------------------
@@ -33,38 +33,138 @@ client = None
 vector_store = None
 storage_context = None
 # -------------------------------------------------------
-# Function to process uploaded files and build the index.
 # -------------------------------------------------------
 def process_upload(files):
     """
-    Accepts a list of uploaded file paths, saves them to a local folder,
-    loads them as documents, and builds the vector index and chat engine.
     """
-    upload_dir = "uploaded_files"
-    if not os.path.exists(upload_dir):
-        os.makedirs(upload_dir)
-    else:
-        # Clear any existing files in the folder.
-        for f in os.listdir(upload_dir):
-            os.remove(os.path.join(upload_dir, f))
-    # 'files' is a list of file paths (Gradio's File component with type="file")
     for file_path in files:
         file_name = os.path.basename(file_path)
         dest = os.path.join(upload_dir, file_name)
-        shutil.copy(file_path, dest)
-    # Load documents from the saved folder.
-    documents = SimpleDirectoryReader(upload_dir).load_data()
-    # Build the index and chat engine using Qdrant as the vector store.
-    global client, vector_store, storage_context, index, query_engine, memory, chat_engine
-    client = qdrant_client.QdrantClient(location=":memory:")
-    vector_store = QdrantVectorStore(
-        collection_name="paper",
-        client=client,
-        enable_hybrid=True,
-        batch_size=20,
-    )

 import qdrant_client
 from getpass import getpass
+# Set your OpenAI API key from environment variables.
 openai_api_key = os.getenv('OPENAI_API_KEY')
 # -------------------------------------------------------
 vector_store = None
 storage_context = None
+# Use a persistent folder to keep uploaded files.
+upload_dir = "uploaded_files"
+if not os.path.exists(upload_dir):
+    os.makedirs(upload_dir)
+# A set to track which files have already been processed.
+processed_files = set()
 # -------------------------------------------------------
+# Function to process uploaded files and update the index.
 # -------------------------------------------------------
 def process_upload(files):
     """
+    Accepts a list of uploaded file paths, saves them to a persistent folder,
+    loads only new documents, and builds (or updates) the vector index and chat engine.
     """
+    global client, vector_store, storage_context, index, query_engine, memory, chat_engine, processed_files
+    new_file_paths = []
+    # Loop over each uploaded file.
     for file_path in files:
         file_name = os.path.basename(file_path)
         dest = os.path.join(upload_dir, file_name)
+        # If the file is not already in our folder, copy it.
+        if file_name not in processed_files:
+            if not os.path.exists(dest):
+                shutil.copy(file_path, dest)
+            new_file_paths.append(dest)
+            processed_files.add(file_name)
+    if not new_file_paths:
+        return "No new documents to add."
+    # Load only the new documents.
+    new_documents = SimpleDirectoryReader(input_files=new_file_paths).load_data()
+    # If this is the first upload, build the index from scratch.
+    if index is None:
+        # (Here we use an in-memory Qdrant client. Change ":memory:" to a persistent path if needed.)
+        client = qdrant_client.QdrantClient(location=":memory:")
+        vector_store = QdrantVectorStore(
+            collection_name="paper",
+            client=client,
+            enable_hybrid=True,
+            batch_size=20,
+        )
+        storage_context = StorageContext.from_defaults(vector_store=vector_store)
+        index = VectorStoreIndex.from_documents(new_documents, storage_context=storage_context)
+    else:
+        # Otherwise, insert the new documents into the existing index.
+        index.insert_documents(new_documents)
+    # Reinitialize query and chat engines so they use the updated index.
+    query_engine = index.as_query_engine(vector_store_query_mode="hybrid")
+    memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
+    chat_engine = index.as_chat_engine(
+        chat_mode="context",
+        memory=memory,
+        system_prompt=(
+            "You are an AI assistant who answers the user questions, "
+            "use the schema fields to generate appropriate and valid json queries"
+        ),
+    )
+    return "Documents uploaded and index updated successfully!"
+# -------------------------------------------------------
+# Chat function that uses the built chat engine.
+# -------------------------------------------------------
+def chat_with_ai(user_input, chat_history):
+    global chat_engine
+    # Check if the chat engine is initialized.
+    if chat_engine is None:
+        return chat_history, "Please upload documents first."
+    response = chat_engine.chat(user_input)
+    references = response.source_nodes
+    ref = []
+    # Extract file names from the source nodes (if available)
+    for node in references:
+        file_name = node.metadata.get('file_name')
+        if file_name and file_name not in ref:
+            ref.append(file_name)
+    complete_response = str(response) + "\n\n"
+    chat_history.append((user_input, complete_response))
+    return chat_history, ""
+# -------------------------------------------------------
+# Function to clear the chat history.
+# -------------------------------------------------------
+def clear_history():
+    return [], ""
+# -------------------------------------------------------
+# Build the Gradio interface.
+# -------------------------------------------------------
+def gradio_interface():
+    with gr.Blocks() as demo:
+        gr.Markdown("# Chat Interface for LlamaIndex with File Upload")
+        with gr.Tab("Upload Documents"):
+            gr.Markdown("Upload PDF, Excel, CSV, DOC/DOCX, or TXT files below:")
+            # The file upload widget: we specify allowed file types.
+            file_upload = gr.File(
+                label="Upload Files",
+                file_count="multiple",
+                file_types=[".pdf", ".csv", ".txt", ".xlsx", ".xls", ".doc", ".docx"],
+                type="filepath"  # returns file paths
+            )
+            upload_status = gr.Textbox(label="Upload Status", interactive=False)
+            upload_button = gr.Button("Process Upload")
+            upload_button.click(process_upload, inputs=file_upload, outputs=upload_status)
+        with gr.Tab("Chat"):
+            chatbot = gr.Chatbot(label="LlamaIndex Chatbot")
+            user_input = gr.Textbox(
+                placeholder="Ask a question...", label="Enter your question"
+            )
+            submit_button = gr.Button("Send")
+            btn_clear = gr.Button("Clear History")
+            # A State to hold the chat history.
+            chat_history = gr.State([])
+            submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
+            user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
+            btn_clear.click(clear_history, outputs=[chatbot, user_input])
+    return demo
+# Launch the Gradio app.
+gradio_interface().launch(debug=True)