Spaces:

daniel-was-taken
/

CompifAI

Runtime error

App Files Files Community

daniel-was-taken commited on Jul 23, 2025

Commit

ace5cd4

1 Parent(s): 1db8512

Deployment ready

Browse files

Files changed (9) hide show

.chainlit/config.toml +130 -0
.env.example +17 -0
.gitignore +18 -0
Dockerfile +24 -0
app.py +290 -0
chainlit.md +14 -0
compose.yml +82 -0
populate_db.py +170 -0
requirements.txt +14 -0

.chainlit/config.toml ADDED Viewed

	@@ -0,0 +1,130 @@

+[project]
+# Whether to enable telemetry (default: false). No personal data is collected.
+enable_telemetry = false
+# List of environment variables to be provided by each user to use the app.
+user_env = []
+# Duration (in seconds) during which the session is saved when the connection is lost
+session_timeout = 3600
+# Duration (in seconds) of the user session expiry
+user_session_timeout = 1296000  # 15 days
+# Enable third parties caching (e.g., LangChain cache)
+cache = false
+# Authorized origins
+allow_origins = ["*"]
+[features]
+# Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
+unsafe_allow_html = false
+# Process and display mathematical expressions. This can clash with "$" characters in messages.
+latex = false
+# Autoscroll new user messages at the top of the window
+user_message_autoscroll = true
+# Automatically tag threads with the current chat profile (if a chat profile is used)
+auto_tag_thread = true
+# Allow users to edit their own messages
+edit_message = true
+# Authorize users to spontaneously upload files with messages
+[features.spontaneous_file_upload]
+    enabled = false
+    # Define accepted file types using MIME types
+    # Examples:
+    # 1. For specific file types:
+    #    accept = ["image/jpeg", "image/png", "application/pdf"]
+    # 2. For all files of certain type:
+    #    accept = ["image/*", "audio/*", "video/*"]
+    # 3. For specific file extensions:
+    #    accept = { "application/octet-stream" = [".xyz", ".pdb"] }
+    # Note: Using "*/*" is not recommended as it may cause browser warnings
+    accept = ["*/*"]
+    max_files = 20
+    max_size_mb = 500
+[features.audio]
+    # Sample rate of the audio
+    sample_rate = 24000
+[features.mcp.sse]
+    enabled = true
+[features.mcp.stdio]
+    enabled = true
+    # Only the executables in the allow list can be used for MCP stdio server.
+    # Only need the base name of the executable, e.g. "npx", not "/usr/bin/npx".
+    # Please don't comment this line for now, we need it to parse the executable name.
+    allowed_executables = [ "npx", "uvx" ]
+[UI]
+# Name of the assistant.
+name = "Assistant"
+# default_theme = "dark"
+# layout = "wide"
+# default_sidebar_state = "open"
+# Description of the assistant. This is used for HTML tags.
+# description = ""
+# Chain of Thought (CoT) display mode. Can be "hidden", "tool_call" or "full".
+cot = "full"
+# Specify a CSS file that can be used to customize the user interface.
+# The CSS file can be served from the public directory or via an external link.
+# custom_css = "/public/test.css"
+# Specify additional attributes for a custom CSS file
+# custom_css_attributes = "media=\"print\""
+# Specify a JavaScript file that can be used to customize the user interface.
+# The JavaScript file can be served from the public directory.
+# custom_js = "/public/test.js"
+# The style of alert boxes. Can be "classic" or "modern".
+alert_style = "classic"
+# Specify additional attributes for custom JS file
+# custom_js_attributes = "async type = \"module\""
+# Custom login page image, relative to public directory or external URL
+# login_page_image = "/public/custom-background.jpg"
+# Custom login page image filter (Tailwind internal filters, no dark/light variants)
+# login_page_image_filter = "brightness-50 grayscale"
+# login_page_image_dark_filter = "contrast-200 blur-sm"
+# Specify a custom meta image url.
+# custom_meta_image_url = "https://chainlit-cloud.s3.eu-west-3.amazonaws.com/logo/chainlit_banner.png"
+# Load assistant logo directly from URL.
+logo_file_url = ""
+# Load assistant avatar image directly from URL.
+default_avatar_file_url = ""
+# Specify a custom build directory for the frontend.
+# This can be used to customize the frontend code.
+# Be careful: If this is a relative path, it should not start with a slash.
+# custom_build = "./public/build"
+# Specify optional one or more custom links in the header.
+# [[UI.header_links]]
+#     name = "Issues"
+#     display_name = "Report Issue"
+#     icon_url = "https://avatars.githubusercontent.com/u/128686189?s=200&v=4"
+#     url = "https://github.com/Chainlit/chainlit/issues"
+[meta]
+generated_by = "2.6.0"

.env.example ADDED Viewed

	@@ -0,0 +1,17 @@

+ACCESS_TOKEN=''
+NEBIUS_API_KEY=''
+PASSWORD=''
+OPENAI_API_KEY=''
+CHAINLIT_AUTH_SECRET=""
+DATABASE_URL=''
+OAUTH_GOOGLE_CLIENT_ID=''
+OAUTH_GOOGLE_CLIENT_SECRET=''
+OAUTH_URI=""
+DOCKER_VOLUME_DIRECTORY=/opt/app/volumes

.gitignore CHANGED Viewed

@@ -205,3 +205,21 @@ cython_debug/
 marimo/_static/
 marimo/_lsp/
 __marimo__/

 marimo/_static/
 marimo/_lsp/
 __marimo__/
+# Google Client Secrets
+*.json
+# Production secrets and environment files
+.env
+.env.local
+.env.production
+.env.staging
+secrets/
+# DigitalOcean App Platform
+.do/
+# Docker volumes (production)
+volumes/

Dockerfile ADDED Viewed

	@@ -0,0 +1,24 @@

+# Use an official Python runtime as a parent image
+FROM python:3.12-slim
+# Set the working directory in the container
+WORKDIR /app
+# Copy the requirements file into the container at /app
+COPY requirements.txt .
+# Install any needed packages specified in requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of the application's code into the container at /app
+COPY . .
+# Make port 8000 available to the world outside this container
+EXPOSE 8000
+# Define environment variable
+ENV CHAINLIT_HOST=0.0.0.0
+ENV CHAINLIT_PORT=8000
+# Run app.py when the container launches
+CMD ["chainlit", "run", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,290 @@

+import os
+from typing import Dict, List, Optional
+from operator import itemgetter
+import chainlit as cl
+from chainlit.types import ThreadDict
+from chainlit.data.sql_alchemy import SQLAlchemyDataLayer
+from pydantic import SecretStr
+from langchain.chains import create_retrieval_chain
+from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain_nebius import ChatNebius
+from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
+from langchain.schema.output_parser import StrOutputParser
+from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
+from langchain.schema.runnable.config import RunnableConfig
+from langchain_core.messages import BaseMessage, HumanMessage, AIMessage
+from pymilvus import MilvusClient
+from sentence_transformers import SentenceTransformer
+from chainlit.input_widget import Select, Switch, Slider
+from langchain_core.documents import Document
+from typing_extensions import List
+from populate_db import main
+# Initialize Milvus client and embedding model
+MILVUS_URI = os.getenv("MILVUS_URI", "http://localhost:19530")
+milvus_client = MilvusClient(uri=MILVUS_URI)
+collection_name = "my_rag_collection"
+if not milvus_client.has_collection(collection_name):
+    main()
+embedding_model = SentenceTransformer("BAAI/bge-small-en-v1.5")
+# Initialize LLM
+model = ChatNebius(
+    model="Qwen/Qwen3-14B",
+    streaming=True,
+    temperature=0.7,
+    top_p=0.95,
+    api_key=SecretStr(os.getenv("OPENAI_API_KEY")),
+)
+# Define application steps
+def emb_text(text: str) -> List[float]:
+    """Generate embeddings for text using the sentence transformer model."""
+    return embedding_model.encode([text], normalize_embeddings=True).tolist()[0]
+def retrieve_relevant_documents(query: str, limit: int = 5) -> List[Dict]:
+    """Retrieve relevant documents from Milvus based on the query."""
+    try:
+        query_embedding = emb_text(query)
+        search_results = milvus_client.search(
+            collection_name=collection_name,
+            data=[query_embedding],
+            limit=limit,
+            output_fields=["text", "metadata"]
+        )
+        # print("search_results:", search_results[0])
+        documents = []
+        for result in search_results[0]:
+            doc_info = {
+                "text": result['entity']['text'],
+                "metadata": result['entity']['metadata'],
+                "score": result['distance']
+            }
+            documents.append(doc_info)
+        return documents
+    except Exception as e:
+        print(f"Error retrieving documents: {e}")
+        return []
+def format_docs_with_id(docs: List[Dict]) -> str:
+    formatted = []
+    for i, doc in enumerate(docs):
+        # Extract title and page_number from metadata, with fallbacks
+        metadata = doc.get('metadata', {})
+        title = metadata.get('filename', 'Unknown Document')  # Use filename as fallback for title
+        page_number = metadata.get('page_number', 'Unknown')
+        score = doc.get('score', 'N/A')  # Use score if available
+        text_content = doc.get('text', '')
+        formatted_doc = f"[{i+1}] Source: {title}, Page: {page_number}, Score: {score}\nContent: {text_content}"
+        formatted.append(formatted_doc)
+    print(f"Formatted documents: {formatted}")
+    return "\n\n".join(formatted)
+def setup_rag_chain():
+    """Setup the RAG chain with context retrieval."""
+    def get_context_and_history(inputs):
+        """Retrieve context and get conversation history."""
+        query = inputs["question"]
+        relevant_docs = retrieve_relevant_documents(query, limit=5)
+        print("Relevant documents:", relevant_docs[0] if relevant_docs else "No documents found")
+        # Convert dictionaries to Document objects for LangChain
+        doc_objects = []
+        for doc in relevant_docs:
+            doc_obj = Document(
+                page_content=doc.get('text', ''),
+                metadata=doc.get('metadata', {})
+            )
+            doc_objects.append(doc_obj)
+        # Format citations for reference
+        citations = format_docs_with_id(relevant_docs)
+        # Add citations to the last document's metadata so it's available to the prompt
+        if doc_objects:
+            doc_objects[-1].metadata['formatted_citations'] = citations
+        return {
+            "question": query,
+            "context": doc_objects,
+            "history": cl.user_session.get("messages", [])
+        }
+    system_prompt = """You are a helpful assistant specialising in disability support, reasonable adjustments, and equality legislation.
+When answering questions, you should:
+1. Use the provided context documents to inform your response
+2. Be accurate and helpful
+3. If the context doesn't contain relevant information, say so clearly
+4. Always reply in English
+5. Provide clear recommendations wherever applicable
+6. Do not make assumptions about the user's knowledge or background
+7. If the user asks for a specific law or regulation, provide a brief explanation and cite relevant documents if available.
+8. Do not overlook the importance of accessibility and inclusivity in your responses.
+9. Do not overemphasize disability in your responses, but rather focus on the support and adjustments that can be made to ensure equality and inclusivity.
+10. If the user asks about a specific disability, provide general information and resources, but do not make assumptions about the individual's experience or needs.
+11. If the user query explicitly asks for a disability-related topic, provide a well-informed response based on the context documents.
+Context documents:
+{context}
+Please provide a clear response using the above context"""
+    # Get the current settings to check if Think mode is enabled
+    settings = cl.user_session.get("settings", {})
+    use_think = settings.get("Think", True)  # Default to True as per the initial setting
+    if not use_think:
+        system_prompt = '/no_think ' + system_prompt
+    prompt = ChatPromptTemplate.from_messages([
+        ("system", system_prompt),
+        MessagesPlaceholder(variable_name="history"),
+        ("human", "{question}"),
+    ])
+    question_answer_chain = create_stuff_documents_chain(model, prompt)
+    # Use a custom chain that properly handles our context and history
+    def process_input_and_format(inputs):
+        context_data = get_context_and_history(inputs)
+        return {
+            "context": context_data["context"],
+            "question": context_data["question"],
+            "history": context_data["history"]
+        }
+    chain = RunnableLambda(process_input_and_format) | question_answer_chain
+    return chain
+# ============== Application Setup ==============
+# Authentication
+@cl.password_auth_callback
+def auth(username: str, password: str) -> Optional[cl.User]:
+    if (username, password) == ("admin", SecretStr(os.getenv("PASSWORD"))):
+        return cl.User(
+            identifier="admin",
+            metadata={"role": "admin", "provider": "credentials"},
+            id="admin_id"
+        )
+    return None
+@cl.oauth_callback
+def oauth_callback(
+    provider_id: str,
+    token: str,
+    raw_user_data: Dict[str, str],
+    default_user: cl.User,
+) -> Optional[cl.PersistedUser]:
+    return default_user
+# Starters
+@cl.set_starters
+async def set_starters():
+    return [
+        cl.Starter(
+            label="Considerations for Autistic People",
+            message="What considerations should be made for autistic people?",
+        ),
+        cl.Starter(
+            label="Explain Equality Act 2010",
+            message="Explain the Equality Act 2010 in simple terms.",
+        ),
+    ]
+# Chat lifecycle
+@cl.on_chat_start
+async def on_chat_start():
+    settings = await cl.ChatSettings(
+        [
+            Switch(id="Think", label="Use Deep Thinking", initial=True),
+        ]
+    ).send()
+    # Store initial settings
+    cl.user_session.set("settings", {"Think": True})  # Set the default value
+    """Initialize chat session with RAG chain."""
+    chain = setup_rag_chain()
+    cl.user_session.set("chain", chain)
+    cl.user_session.set("messages", [])
+@cl.on_settings_update
+async def setup_agent(settings):
+    print("on_settings_update", settings)
+    # Store the settings in the user session so they can be accessed in setup_rag_chain
+    cl.user_session.set("settings", settings)
+    # Update the chain with the new settings
+    chain = setup_rag_chain()
+    cl.user_session.set("chain", chain)
+@cl.on_chat_resume
+async def on_chat_resume(thread: ThreadDict):
+    """Resume chat with conversation history."""
+    messages = []
+    root_messages = [m for m in thread["steps"] if m["parentId"] is None]
+    for message in root_messages:
+        if message["type"] == "user_message":
+            messages.append(HumanMessage(content=message["output"]))
+        else:
+            messages.append(AIMessage(content=message["output"]))
+    cl.user_session.set("messages", messages)
+    chain = setup_rag_chain()
+    cl.user_session.set("chain", chain)
+@cl.on_message
+async def on_message(message: cl.Message):
+    """Handle incoming messages with RAG and conversation history."""
+    chain = cl.user_session.get("chain")
+    messages = cl.user_session.get("messages", [])
+    cb = cl.AsyncLangchainCallbackHandler(
+        stream_final_answer=True, answer_prefix_tokens=["</think> "]
+    )
+    try:
+        # Get the relevant documents for citations
+        relevant_docs = retrieve_relevant_documents(message.content, limit=5)
+        citations = format_docs_with_id(relevant_docs)
+        answer = await chain.ainvoke({"question": message.content}, config=RunnableConfig(callbacks=[cb]))
+        async with cl.Step(name="References") as step:
+            if relevant_docs:
+                step.output = citations
+            else:
+                step.output = "No relevant documents found for this query."
+        # Update conversation history
+        messages.append(HumanMessage(content=message.content))
+        messages.append(AIMessage(content=answer))
+        cl.user_session.set("messages", messages)
+    except Exception as e:
+        await cl.Message(content=f"Sorry, I encountered an error: {str(e)}").send()

chainlit.md ADDED Viewed

	@@ -0,0 +1,14 @@

+# Welcome to Chainlit! 🚀🤖
+Hi there, Developer! 👋 We're excited to have you on board. Chainlit is a powerful tool designed to help you prototype, debug and share applications built on top of LLMs.
+## Useful Links 🔗
+- **Documentation:** Get started with our comprehensive [Chainlit Documentation](https://docs.chainlit.io) 📚
+- **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/k73SQ3FyUh) to ask questions, share your projects, and connect with other developers! 💬
+We can't wait to see what you create with Chainlit! Happy coding! 💻😊
+## Welcome screen
+To modify the welcome screen, edit the `chainlit.md` file at the root of your project. If you do not want a welcome screen, just leave this file empty.

compose.yml ADDED Viewed

	@@ -0,0 +1,82 @@

+version: '3.5'
+services:
+  etcd:
+    container_name: milvus-etcd
+    image: quay.io/coreos/etcd:v3.5.18
+    environment:
+      - ETCD_AUTO_COMPACTION_MODE=revision
+      - ETCD_AUTO_COMPACTION_RETENTION=1000
+      - ETCD_QUOTA_BACKEND_BYTES=4294967296
+      - ETCD_SNAPSHOT_COUNT=50000
+    volumes:
+      - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd
+    command: etcd -advertise-client-urls=http://etcd:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd
+    healthcheck:
+      test: ["CMD", "etcdctl", "endpoint", "health"]
+      interval: 30s
+      timeout: 20s
+      retries: 3
+  minio:
+    container_name: milvus-minio
+    image: minio/minio:RELEASE.2024-05-28T17-19-04Z
+    environment:
+      MINIO_ACCESS_KEY: minioadmin
+      MINIO_SECRET_KEY: minioadmin
+    ports:
+      - "9001:9001"
+      - "9000:9000"
+    volumes:
+      - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data
+    command: minio server /minio_data --console-address ":9001"
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
+      interval: 30s
+      timeout: 20s
+      retries: 3
+  standalone:
+    container_name: milvus-standalone
+    image: milvusdb/milvus:v2.5.14
+    command: ["milvus", "run", "standalone"]
+    security_opt:
+      - seccomp:unconfined
+    environment:
+      MINIO_REGION: us-east-1
+      ETCD_ENDPOINTS: etcd:2379
+      MINIO_ADDRESS: minio:9000
+    volumes:
+      - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"]
+      interval: 30s
+      start_period: 90s
+      timeout: 20s
+      retries: 3
+    ports:
+      - "19530:19530"
+      - "9091:9091"
+    depends_on:
+      etcd:
+        condition: service_healthy
+      minio:
+        condition: service_healthy
+  app:
+    container_name: rag-app
+    image: rag-app
+    build:
+      context: .
+      dockerfile: Dockerfile
+    ports:
+      - "8000:8000"
+    environment:
+      - MILVUS_URI=http://standalone:19530
+      - OPENAI_API_KEY=${OPENAI_API_KEY}
+    depends_on:
+      - standalone
+networks:
+  default:
+    name: milvus

populate_db.py ADDED Viewed

	@@ -0,0 +1,170 @@

+import time
+from pathlib import Path
+from unstructured.cleaners.core import clean_extra_whitespace, replace_unicode_quotes, clean_dashes, group_broken_paragraphs
+from langchain_unstructured import UnstructuredLoader
+from sentence_transformers import SentenceTransformer
+from pymilvus import MilvusClient, DataType
+# Initialize Milvus client and collection setup
+milvus_client = MilvusClient(uri="http://localhost:19530")
+collection_name = "my_rag_collection"
+# Drop existing collection if it exists
+if milvus_client.has_collection(collection_name):
+    milvus_client.drop_collection(collection_name)
+# Initialize embedding model
+embedding_model = SentenceTransformer("BAAI/bge-small-en-v1.5")
+def emb_text(text):
+    """Generate embeddings for text using the sentence transformer model."""
+    return embedding_model.encode([text], normalize_embeddings=True).tolist()[0]
+# Create Milvus collection schema
+schema = milvus_client.create_schema(auto_id=False, enable_dynamic_field=False)
+schema.add_field(field_name="id", datatype=DataType.INT64, is_primary=True)
+schema.add_field(field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=384)  # BGE-small-en-v1.5 dimension
+schema.add_field(field_name="text", datatype=DataType.VARCHAR, max_length=32768)  # 32KB max
+schema.add_field(field_name="metadata", datatype=DataType.JSON)
+# Create index for vector search
+index_params = MilvusClient.prepare_index_params()
+index_params.add_index(
+    field_name="vector",
+    metric_type="COSINE",
+    index_type="AUTOINDEX",
+)
+# Create and load collection
+milvus_client.create_collection(
+    collection_name=collection_name,
+    schema=schema,
+    index_params=index_params,
+    consistency_level="Strong",
+)
+milvus_client.load_collection(collection_name=collection_name)
+# Document directory
+directory_path = "data/"
+def main():
+    """Main function to load documents and insert them into Milvus."""
+    docs = unstructured_document_loader()
+    # Prepare data for insertion
+    data_to_insert = []
+    print(f"Processing {len(docs)} documents for insertion...")
+    for i, doc in enumerate(docs):
+        # Check text length and truncate if necessary
+        text_content = doc.page_content
+        if len(text_content) > 32000:  # Leave some buffer below 32KB limit
+            text_content = text_content[:32000]
+            print(f"Document {i+1} truncated from {len(doc.page_content)} to {len(text_content)} characters")
+        # Generate embedding for the document content
+        embedding = emb_text(text_content)
+        # Prepare the data entry
+        data_entry = {
+            "id": i,
+            "vector": embedding,
+            "text": text_content,
+            "metadata": doc.metadata if doc.metadata else {}
+        }
+        data_to_insert.append(data_entry)
+        # Print progress every 100 documents
+        if (i + 1) % 100 == 0:
+            print(f"Processed {i + 1}/{len(docs)} documents")
+    print(f"Inserting {len(data_to_insert)} documents into Milvus...")
+    # Insert data into Milvus
+    insert_result = milvus_client.insert(
+        collection_name=collection_name,
+        data=data_to_insert
+    )
+    print(f"Successfully inserted {insert_result['insert_count']} documents")
+    print(f"Primary keys: {insert_result['ids'][:10]}...")  # Show first 10 IDs
+    return docs
+def unstructured_document_loader():
+    """Load documents using UnstructuredLoader."""
+    # Collect file paths for PDF, DOCX, and HTML files
+    file_extensions = ["*.pdf", "*.docx", "*.html"]
+    file_paths = []
+    for ext in file_extensions:
+        file_paths.extend(Path(directory_path).glob(ext))
+    # Convert Path objects to strings
+    file_paths = [str(file) for file in file_paths]
+    # Configure UnstructuredLoader with post-processors
+    loader = UnstructuredLoader(
+        file_paths,
+        chunking_strategy="by_title",
+        include_orig_elements=False,
+        post_processors=[
+            clean_extra_whitespace,
+            replace_unicode_quotes,
+            clean_dashes,
+            group_broken_paragraphs
+        ]
+    )
+    docs = loader.load()
+    print(f"Number of LangChain documents: {len(docs)}")
+    print(f"Length of first document: {len(docs[0].page_content)} characters")
+    print(f"First document preview: {docs[0].page_content[:200]}...")
+    return docs
+def verify_insertion():
+    """Verify that data was successfully inserted into Milvus."""
+    # Get collection statistics
+    stats = milvus_client.get_collection_stats(collection_name)
+    print(f"Collection stats: {stats}")
+    # Test search functionality with a sample query
+    test_query = "Questions by staff to other staff"
+    test_embedding = emb_text(test_query)
+    search_results = milvus_client.search(
+        collection_name=collection_name,
+        data=[test_embedding],
+        limit=3,
+        output_fields=["text", "metadata"]
+    )
+    print(f"\nTest search results for '{test_query}':")
+    for i, result in enumerate(search_results[0]):
+        print(f"Result {i+1}:")
+        print(f"  Score: {result['distance']:.4f}")
+        print(f"  Text preview: {result['entity']['text'][:200]}...")
+        print(f"  Metadata: {result['entity']['metadata']}")
+        print("-" * 50)
+if __name__ == "__main__":
+    start_time = time.time()
+    print("="*60)
+    print("STARTING DOCUMENT PROCESSING AND MILVUS INSERTION")
+    print("="*60)
+    main()
+    print("\n" + "="*50)
+    print("VERIFYING DATA INSERTION")
+    print("="*50)
+    verify_insertion()
+    end_time = time.time()
+    elapsed_time = end_time - start_time
+    print(f"\nTotal execution time: {elapsed_time:.2f} seconds")

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+chainlit>=1.0.0
+openai>=1.0.0
+huggingface_hub>=0.20.0
+pymilvus>=2.3.0
+sentence-transformers>=2.2.0
+unstructured[all-docs]>=0.10.0
+langchain-unstructured>=0.1.0
+langchain>=0.1.0
+langchain-core>=0.1.0
+python-dotenv>=1.0.0
+pydantic>=2.0.0
+fastapi>=0.100.0
+uvicorn>=0.20.0
+langchain_nebius>=0.1.0