Spaces:

anasmkh
/

QdrantVectorStore_Llamaindex

Sleeping

App Files Files Community

anasmkh commited on Feb 12, 2025

Commit

f1fd3e0

verified ·

1 Parent(s): f3c5908

Update app.py

Browse files

Files changed (1) hide show

app.py +138 -94

app.py CHANGED Viewed

@@ -1,119 +1,163 @@
 import os
 from getpass import getpass
 openai_api_key = os.getenv('OPENAI_API_KEY')
-openai_api_key = openai_api_key
 from llama_index.llms.openai import OpenAI
 from llama_index.embeddings.openai import OpenAIEmbedding
 from llama_index.core import Settings
-Settings.llm = OpenAI(model="gpt-3.5-turbo",temperature=0.4)
 Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")
-from llama_index.core import SimpleDirectoryReader
-documents = SimpleDirectoryReader("files").load_data()
-from llama_index.core import VectorStoreIndex, StorageContext
 from llama_index.vector_stores.qdrant import QdrantVectorStore
-import qdrant_client
-client = qdrant_client.QdrantClient(
-    location=":memory:",
-)
-vector_store = QdrantVectorStore(
-    collection_name = "paper",
-    client=client,
-    enable_hybrid=True,
-    batch_size=20,
-)
-storage_context = StorageContext.from_defaults(vector_store=vector_store)
-index = VectorStoreIndex.from_documents(
-    documents,
-    storage_context=storage_context,
-)
-query_engine = index.as_query_engine(
-    vector_store_query_mode="hybrid"
-)
 from llama_index.core.memory import ChatMemoryBuffer
-memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
-chat_engine = index.as_chat_engine(
-    chat_mode="context",
-    memory=memory,
-    system_prompt=(
-        """You are an AI assistant who answers the user questions,
-           use the schema fields to generate appriopriate and valid json queries"""
-    ),
-)
-# def is_greeting(user_input):
-#     greetings = ["hello", "hi", "hey", "good morning", "good afternoon", "good evening", "greetings"]
-#     user_input_lower = user_input.lower().strip()
-#     return any(greet in user_input_lower for greet in greetings)
-# def is_bye(user_input):
-#     greetings = ["thanks", "thanks you", "thanks a lot", "good answer", "good bye", "bye bye"]
-#     user_input_lower = user_input.lower().strip()
-#     return any(greet in user_input_lower for greet in greetings)
-import gradio as gr
 def chat_with_ai(user_input, chat_history):
-    # if is_greeting(user_input):
-    #     response = 'hi, how can i help you?'
-    #     chat_history.append((user_input, response))
-    #     return chat_history, ""
-    # elif is_bye(user_input):
-    #     response = "you're wlocome"
-    #     chat_history.append((user_input, response))
-    #     return chat_history, ""
     response = chat_engine.chat(user_input)
     references = response.source_nodes
-    ref,pages = [],[]
-    for i in range(len(references)):
-      if references[i].metadata['file_name'] not in ref:
-        ref.append(references[i].metadata['file_name'])
-      # pages.append(references[i].metadata['page_label'])
-    complete_response = str(response) + "\n\n"
-    if ref !=[] or pages!=[]:
-      chat_history.append((user_input, complete_response))
-      ref = []
-    elif ref==[] or pages==[]:
-      chat_history.append((user_input,str(response)))
     return chat_history, ""
 def clear_history():
     return [], ""
-def gradio_chatbot():
     with gr.Blocks() as demo:
-        gr.Markdown("# Chat Interface for LlamaIndex")
-        chatbot = gr.Chatbot(label="LlamaIndex Chatbot")
-        user_input = gr.Textbox(
-            placeholder="Ask a question...", label="Enter your question"
-        )
-        submit_button = gr.Button("Send")
-        btn_clear = gr.Button("Delete Context")
-        chat_history = gr.State([])
-        submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
-        user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
-        btn_clear.click(fn=clear_history, outputs=[chatbot, user_input])
     return demo
-gradio_chatbot().launch(debug=True)

 import os
+import shutil
+import gradio as gr
+import qdrant_client
 from getpass import getpass
+# Set your OpenAI API key from environment variables.
 openai_api_key = os.getenv('OPENAI_API_KEY')
+# -------------------------------------------------------
+# Configure LlamaIndex with OpenAI LLM and Embeddings
+# -------------------------------------------------------
 from llama_index.llms.openai import OpenAI
 from llama_index.embeddings.openai import OpenAIEmbedding
 from llama_index.core import Settings
+Settings.llm = OpenAI(model="gpt-3.5-turbo", temperature=0.4)
 Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")
+# -------------------------------------------------------
+# Import document readers, index, vector store, memory, etc.
+# -------------------------------------------------------
+from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext
 from llama_index.vector_stores.qdrant import QdrantVectorStore
 from llama_index.core.memory import ChatMemoryBuffer
+# Global variables to hold the index and chat engine.
+chat_engine = None
+index = None
+query_engine = None
+memory = None
+client = None
+vector_store = None
+storage_context = None
+# -------------------------------------------------------
+# Function to process uploaded files and build the index.
+# -------------------------------------------------------
+def process_upload(files):
+    """
+    Accepts a list of uploaded file paths, saves them to a local folder,
+    loads them as documents, and builds the vector index and chat engine.
+    """
+    upload_dir = "uploaded_files"
+    if not os.path.exists(upload_dir):
+        os.makedirs(upload_dir)
+    else:
+        # Clear any existing files in the folder.
+        for f in os.listdir(upload_dir):
+            os.remove(os.path.join(upload_dir, f))
+    # 'files' is a list of file paths (Gradio's File component with type="file")
+    for file_path in files:
+        file_name = os.path.basename(file_path)
+        dest = os.path.join(upload_dir, file_name)
+        shutil.copy(file_path, dest)
+    # Load documents from the saved folder.
+    documents = SimpleDirectoryReader(upload_dir).load_data()
+    # Build the index and chat engine using Qdrant as the vector store.
+    global client, vector_store, storage_context, index, query_engine, memory, chat_engine
+    client = qdrant_client.QdrantClient(location=":memory:")
+    vector_store = QdrantVectorStore(
+        collection_name="paper",
+        client=client,
+        enable_hybrid=True,
+        batch_size=20,
+    )
+    storage_context = StorageContext.from_defaults(vector_store=vector_store)
+    index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
+    query_engine = index.as_query_engine(vector_store_query_mode="hybrid")
+    memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
+    chat_engine = index.as_chat_engine(
+        chat_mode="context",
+        memory=memory,
+        system_prompt=(
+            "You are an AI assistant who answers the user questions, "
+            "use the schema fields to generate appropriate and valid json queries"
+        ),
+    )
+    return "Documents uploaded and index built successfully!"
+# -------------------------------------------------------
+# Chat function that uses the built chat engine.
+# -------------------------------------------------------
 def chat_with_ai(user_input, chat_history):
+    global chat_engine
+    # Check if the chat engine is initialized.
+    if chat_engine is None:
+        return chat_history, "Please upload documents first."
     response = chat_engine.chat(user_input)
     references = response.source_nodes
+    ref, pages = [], []
+    # Extract file names from the source nodes (if available)
+    for node in references:
+        file_name = node.metadata.get('file_name')
+        if file_name and file_name not in ref:
+            ref.append(file_name)
+    complete_response = str(response) + "\n\n"
+    if ref or pages:
+        chat_history.append((user_input, complete_response))
+    else:
+        chat_history.append((user_input, str(response)))
     return chat_history, ""
+# -------------------------------------------------------
+# Function to clear the chat history.
+# -------------------------------------------------------
 def clear_history():
     return [], ""
+# -------------------------------------------------------
+# Build the Gradio interface.
+# -------------------------------------------------------
+def gradio_interface():
     with gr.Blocks() as demo:
+        gr.Markdown("# Chat Interface for LlamaIndex with File Upload")
+        # Use Tabs to separate the file upload and chat interfaces.
+        with gr.Tab("Upload Documents"):
+            gr.Markdown("Upload PDF, Excel, CSV, DOC/DOCX, or TXT files below:")
+            # The file upload widget: we specify allowed file types.
+            file_upload = gr.File(
+                label="Upload Files",
+                file_count="multiple",
+                file_types=[".pdf", ".csv", ".txt", ".xlsx", ".xls", ".doc", ".docx"],
+                type="file"  # returns file paths
+            )
+            upload_status = gr.Textbox(label="Upload Status", interactive=False)
+            upload_button = gr.Button("Process Upload")
+            upload_button.click(process_upload, inputs=file_upload, outputs=upload_status)
+        with gr.Tab("Chat"):
+            chatbot = gr.Chatbot(label="LlamaIndex Chatbot")
+            user_input = gr.Textbox(
+                placeholder="Ask a question...", label="Enter your question"
+            )
+            submit_button = gr.Button("Send")
+            btn_clear = gr.Button("Clear History")
+            # A State to hold the chat history.
+            chat_history = gr.State([])
+            submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
+            user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
+            btn_clear.click(clear_history, outputs=[chatbot, user_input])
     return demo
+# Launch the Gradio app.
+gradio_interface().launch(debug=True)