Spaces:

coffeebeans-ai
/

multimodal_rag_chatbot

Sleeping

App Files Files Community

vamsidharmuthireddy commited on May 26, 2025

Commit

3cfb95f

verified ·

1 Parent(s): 4c85569

Upload 7 files

Browse files

Files changed (7) hide show

Dockerfile +6 -4
app.py +36 -233
graph.py +182 -0
load_vector_db.py +105 -0
logging_config.py +47 -0
requirements.txt +1 -0
utils.py +25 -0

Dockerfile CHANGED Viewed

@@ -5,8 +5,6 @@ ENV PYTHONDONTWRITEBYTECODE=1
 ENV PYTHONUNBUFFERED=1
 ENV STREAMLIT_HOME=/app/.streamlit
-ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib
 RUN useradd -m -u 1000 appuser
 # Set working directory
@@ -31,9 +29,13 @@ COPY requirements.txt .
 RUN pip install --no-cache-dir --upgrade pip && pip install -r requirements.txt
 # Copy app files
-COPY app.py .
 COPY docs .
-COPY vectordb_milvus.db .
 # Create required directories and fix permissions
 RUN mkdir -p $STREAMLIT_HOME && \

 ENV PYTHONUNBUFFERED=1
 ENV STREAMLIT_HOME=/app/.streamlit
 RUN useradd -m -u 1000 appuser
 # Set working directory
 RUN pip install --no-cache-dir --upgrade pip && pip install -r requirements.txt
 # Copy app files
 COPY docs .
+COPY db .
+COPY logging_config.py .
+COPY load_vector_db.py .
+COPY app.py .
+COPY graph.py .
+COPY utils.py .
 # Create required directories and fix permissions
 RUN mkdir -p $STREAMLIT_HOME && \

app.py CHANGED Viewed

@@ -16,16 +16,25 @@ from langchain_core.tools import tool
 from langchain_core.messages import SystemMessage
 from langgraph.prebuilt import ToolNode, tools_condition
 from langchain_milvus import Milvus
 # Load environment variables
-load_dotenv()
 # Set AWS credentials from environment variables
 os.environ["AWS_ACCESS_KEY_ID"] = os.environ.get("aws_access_key_id")
 os.environ["AWS_SECRET_ACCESS_KEY"] = os.environ.get("aws_secret_access_key")
 os.environ["AWS_SESSION_TOKEN"] = os.environ.get("aws_session_token")
 os.environ["AWS_DEFAULT_REGION"] = os.environ.get("AWS_DEFAULT_REGION")
 # Initialize session state variables if they don't exist
 if "messages" not in st.session_state:
@@ -33,235 +42,7 @@ if "messages" not in st.session_state:
 if "initialized" not in st.session_state:
     st.session_state.initialized = False
-def init_vector_db(embeddings):
-    # Initialize vector store
-    URI = "./vectordb_milvus.db"
-    collection_name = "my_collection"
-    # Check if the collection already exists
-    try:
-        # if os.path.exists(URI):
-        #     st.info("Found existing Milvus db.")
-        # First try to connect to existing collection
-        st.info("Checking for existing Milvus db...")
-        vector_store = Milvus(
-                embedding_function=embeddings,
-                connection_args={"uri": URI},
-                auto_id=True,
-                collection_name=collection_name,
-                index_params={"index_type": "FLAT", "metric_type": "COSINE"},
-            )
-        results = vector_store.similarity_search("test query", k=1)
-        if len(results) > 0:
-            st.success("Document data found in existing collection.")
-            documents_loaded = True
-        else:
-            st.info("Collection exists but might be empty. Will check for documents.")
-            documents_loaded = False
-    except Exception as e:
-        st.info("Creating new Milvus collection...")
-        vector_store = Milvus(
-            embedding_function=embeddings,
-            connection_args={"uri": URI},
-            auto_id=True,
-            collection_name=collection_name,
-            index_params={"index_type": "FLAT", "metric_type": "COSINE"},
-        )
-        documents_loaded = False
-            # Load documents if needed
-    if not documents_loaded:
-        folder_path = "docs"
-        loader = DirectoryLoader(
-            folder_path,
-            glob="**/*.pdf",
-            loader_cls=PyPDFLoader
-        )
-        try:
-            documents = loader.load()
-            st.info(f"Loaded {len(documents)} PDF pages.")
-            if len(documents) > 0:
-                # Split documents
-                text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
-                all_splits = text_splitter.split_documents(documents)
-                st.info(f"Total Document splits: {len(all_splits)}")
-                # Add documents to vector store
-                _ = vector_store.add_documents(documents=all_splits)
-                st.success("Documents added to vector store.")
-            else:
-                st.warning("No PDF documents found in the 'docs' folder.")
-        except Exception as e:
-            st.error(f"Error loading documents: {e}")
-    return vector_store
-def init_app():
-    """Initialize the app components and return them."""
-    with st.spinner("Initializing PDF chat application..."):
-        # Initialize LLM
-        llm = init_chat_model(
-            "anthropic.claude-3-5-sonnet-20240620-v1:0",
-            model_provider="bedrock_converse",
-            temperature=0
-        )
-        # Initialize embeddings
-        embeddings = BedrockEmbeddings(model_id="amazon.titan-embed-text-v1")
-        vector_store = init_vector_db(embeddings)
-        class State(MessagesState):
-            context: List[Document]
-        # Create a retrieval tool that captures the vector_store
-        @tool(response_format="content_and_artifact")
-        def retrieve_tool(query: str):
-            """Retrieve information related to a query."""
-            retrieved_docs = vector_store.similarity_search(query, k=5)
-            serialized = "\n\n".join(
-                (f"Source: {doc.metadata}\n" f"Content: {doc.page_content}")
-                for doc in retrieved_docs
-            )
-            print(f"retrieved_docs : {retrieved_docs}")
-            return serialized, retrieved_docs
-        # Create the LLM tool-calling function with direct reference to llm
-        def query_or_respond_fn(state: State):
-            """Generate tool call for retrieval or respond."""
-            print(f"state['messages'] : {state["messages"]}")
-            valid_messages = [
-                    msg for msg in state["messages"]
-                    if msg.content
-                ]
-            if not valid_messages:
-                return {"messages": []}
-            llm_with_tools = llm.bind_tools([retrieve_tool])
-            response = llm_with_tools.invoke(state["messages"])
-            # MessagesState appends messages to state instead of overwriting
-            return {"messages": [response]}
-        # Create the generate function with direct reference to llm
-        def generate_fn(state: State):
-            """Generate answer."""
-            # Get generated ToolMessages
-            recent_tool_messages = []
-            for message in reversed(state["messages"]):
-                if message.type == "tool":
-                    recent_tool_messages.append(message)
-                else:
-                    break
-            tool_messages = recent_tool_messages[::-1]
-            # Format into prompt
-            sources_text = ""
-            # print(f"tool_messages { tool_messages}")
-            print(f"tool_messages { len(tool_messages)}")
-            tool_messages_latest = tool_messages[0]
-            for artifact in tool_messages_latest.artifact:
-                # artifact = i.artifact
-                page_label = artifact.metadata.get('page_label')
-                page = artifact.metadata.get('page')
-                source = artifact.metadata.get('source')
-                sources_text += f"Source: {source}, Page: {page}, Page Label: {page_label}\n"
-                # print(source, page, page_label)
-            print(f"sources_text { sources_text}")
-            docs_content = "\n\n".join(doc.content for doc in tool_messages)
-            system_message_content = (
-                "You are an assistant for question-answering tasks."
-                "Use the following pieces of retrieved context to answer the question."
-                "This is your only source of knowledge."
-                "If you don't know the answer, say that you don't know and STOP - do not provide related information."
-                "You are not allowed to make up answers."
-                "You are not allowed to use any external knowledge."
-                "You are not allowed to make assumptions."
-                "If the query is not clearly and directly addressed in the knowledge source, simply state that you don't have enough information and DO NOT elaborate with tangentially related content."
-                "Keep your answers strictly limited to information that directly answers the user's specific question."
-                "When information is insufficient, acknowledge this limitation in one sentence without expanding into related topics."
-                "If the query is not clear, ask for clarification."
-                "If the query is not related to the given knowledge source, mention that you can only answer from the knowledge base."
-                "Keep your answers accurate and concise to the source content."
-                "\n\n"
-                f"{docs_content}"
-                # "Provide the source of the answer like this format at the end of the response:  'Page: Page Number, Source: Source Name'  There could be multiple sources, so adjust the response accordingly.  Each new source reference should be listed on a new line following this format very strictly.  "
-                # """####Example:  This format is **strictly required**. Do not combine multiple sources in the same line. No of lines and sources acn be dynamic.
-                # Page: 1, Source: Source 1
-                # Page: 2, Source: Source 2
-                # Page: 3, Source: Source 3
-                # """
-                # f"Paste this content as is {sources_text}"
-            )
-            conversation_messages = [
-                message
-                for message in state["messages"]
-                if message.type in ("human", "system")
-                or (message.type == "ai" and not message.tool_calls)
-            ]
-            prompt = [SystemMessage(system_message_content)] + conversation_messages
-            # Run
-            response = llm.invoke(prompt)
-            # return {"messages": [response]}
-            context = []
-            for tool_message in tool_messages:
-                context.extend(tool_message.artifact)
-            return {"messages": [response], "context": context}
-        # Execute the retrieval
-        tools_node = ToolNode([retrieve_tool])
-        # Build the graph
-        graph_builder = StateGraph(MessagesState)
-        graph_builder.add_node("query_or_respond", query_or_respond_fn)
-        graph_builder.add_node("tools", tools_node)
-        graph_builder.add_node("generate", generate_fn)
-        graph_builder.set_entry_point("query_or_respond")
-        graph_builder.add_conditional_edges(
-            "query_or_respond",
-            tools_condition,
-            {END: END, "tools": "tools"},
-        )
-        graph_builder.add_edge("tools", "generate")
-        graph_builder.add_edge("generate", END)
-        graph = graph_builder.compile()
-        st.success("Initialization complete!")
-        return {"graph": graph}
-def extract_text_from_content(content):
-    """Extract text from various message content formats."""
-    if isinstance(content, str):
-        return content
-    elif isinstance(content, list):
-        # Handle list of text items or dictionaries
-        text_parts = []
-        for item in content:
-            if isinstance(item, dict):
-                # Extract text from dictionary format
-                if 'text' in item:
-                    text_parts.append(item['text'])
-            elif isinstance(item, str):
-                text_parts.append(item)
-        return ''.join(text_parts)
-    else:
-        # Fallback for any other format
-        return str(content)
 def run_graph(graph, input_message: str):
     """Run the graph with the input message."""
@@ -286,6 +67,8 @@ def run_graph(graph, input_message: str):
     response_chunks = []
     values = []
     for mode, mode_chunk in graph.stream(
         input_message_formatted,
         stream_mode=["messages", "values"],
@@ -295,8 +78,24 @@ def run_graph(graph, input_message: str):
         elif mode == "messages":
             message, metadata = mode_chunk
             if metadata["langgraph_node"] == "generate":
                 if hasattr(message, 'content'):
                     content = message.content
                     # Extract text depending on content format
                     chunk_text = extract_text_from_content(content)
@@ -306,6 +105,9 @@ def run_graph(graph, input_message: str):
                         yield chunk_text, values
         full_response = ''.join(response_chunks)
     # print(f"Full text: {full_response}")
     # print(f"full values: {values}")
     st.conversation_history.append({
@@ -320,7 +122,7 @@ st.title("PDF Question-Answering Chat")
 # Initialize the app if not already done
 if not st.session_state.initialized:
     try:
-        app_components = init_app()
         st.session_state.app_components = app_components
         st.session_state.initialized = True
         st.conversation_history = []
@@ -353,13 +155,14 @@ if prompt := st.chat_input("Ask a question about your PDFs"):
             values = {}
             for chunk, values in run_graph(st.session_state.app_components["graph"], prompt):
                 if chunk:  # Only process non-empty chunks
                     full_response += chunk
                     message_placeholder.markdown(full_response + "▌")
             values = values[-1]
             # print(f"values: {values}")
-            print(f"values keys: {values.keys()}")
-            print(f"'context' in values: { 'context' in values }")
             if 'context' in values:
                 pages_dict = {}
@@ -388,7 +191,7 @@ if prompt := st.chat_input("Ask a question about your PDFs"):
             message_placeholder.markdown(full_response)
-            print(f"Full response: {full_response}")
             # Add assistant response to chat history
             st.session_state.messages.append({"role": "assistant", "content": full_response})

 from langchain_core.messages import SystemMessage
 from langgraph.prebuilt import ToolNode, tools_condition
 from langchain_milvus import Milvus
+from utils import extract_text_from_content
+from logging_config import setup_logger
+from load_vector_db import init_vector_db
+from graph import init_graph
+import time
+logger = setup_logger(__name__)
 # Load environment variables
+load_dotenv(override=True)
 # Set AWS credentials from environment variables
 os.environ["AWS_ACCESS_KEY_ID"] = os.environ.get("aws_access_key_id")
 os.environ["AWS_SECRET_ACCESS_KEY"] = os.environ.get("aws_secret_access_key")
 os.environ["AWS_SESSION_TOKEN"] = os.environ.get("aws_session_token")
 os.environ["AWS_DEFAULT_REGION"] = os.environ.get("AWS_DEFAULT_REGION")
+print(os.environ["AWS_ACCESS_KEY_ID"])
 # Initialize session state variables if they don't exist
 if "messages" not in st.session_state:
 if "initialized" not in st.session_state:
     st.session_state.initialized = False
 def run_graph(graph, input_message: str):
     """Run the graph with the input message."""
     response_chunks = []
     values = []
+    start = time.time()
+    time_to_start_streaming = None
     for mode, mode_chunk in graph.stream(
         input_message_formatted,
         stream_mode=["messages", "values"],
         elif mode == "messages":
             message, metadata = mode_chunk
+            # if metadata["langgraph_node"] == "query_or_respond":
+            #     logger.info(f"message.tool_calls: {message.tool_calls}")
+            #     if not message.tool_calls:
+            #         content = message.content
+            #         logger.info(f"query_or_respond content type: {isinstance(content, str)}")
+            #         logger.info(f"query_or_respond content: {content}")
+            #         if isinstance(content, str):
+            #             chunk_text = content
+            #         # chunk_text = extract_text_from_content(content)
+            #             if chunk_text:
+            #                 response_chunks.append(chunk_text)
+            #                 yield chunk_text, values
             if metadata["langgraph_node"] == "generate":
                 if hasattr(message, 'content'):
+                    if time_to_start_streaming is None:
+                        time_to_start_streaming = time.time() - start
+                        logger.info(f"Time taken to start streaming: {time_to_start_streaming} seconds")
                     content = message.content
                     # Extract text depending on content format
                     chunk_text = extract_text_from_content(content)
                         yield chunk_text, values
         full_response = ''.join(response_chunks)
+    logger.info(f"Time taken for complete generation: {time.time() - start} seconds")
     # print(f"Full text: {full_response}")
     # print(f"full values: {values}")
     st.conversation_history.append({
 # Initialize the app if not already done
 if not st.session_state.initialized:
     try:
+        app_components = init_graph()
         st.session_state.app_components = app_components
         st.session_state.initialized = True
         st.conversation_history = []
             values = {}
             for chunk, values in run_graph(st.session_state.app_components["graph"], prompt):
                 if chunk:  # Only process non-empty chunks
+                    # print(f"Chunk: {chunk}")
                     full_response += chunk
                     message_placeholder.markdown(full_response + "▌")
             values = values[-1]
             # print(f"values: {values}")
+            logger.info(f"values keys: {values.keys()}")
+            logger.info(f"'context' in values: { 'context' in values }")
             if 'context' in values:
                 pages_dict = {}
             message_placeholder.markdown(full_response)
+            logger.info(f"Full response: {full_response}")
             # Add assistant response to chat history
             st.session_state.messages.append({"role": "assistant", "content": full_response})

graph.py ADDED Viewed

	@@ -0,0 +1,182 @@

+from dotenv import load_dotenv
+import os
+import streamlit as st
+from langchain_aws import BedrockEmbeddings
+from langchain_core.vectorstores import InMemoryVectorStore
+from langchain.chat_models import init_chat_model
+from langchain_core.documents import Document
+from typing_extensions import List, Dict
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langgraph.graph import START, StateGraph, END
+from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader
+from langgraph.graph import MessagesState
+from langchain_core.tools import tool
+from langchain_core.messages import SystemMessage
+from langgraph.prebuilt import ToolNode, tools_condition
+from langchain_milvus import Milvus
+from utils import extract_text_from_content
+from logging_config import setup_logger
+from load_vector_db import init_vector_db
+from logging_config import setup_logger
+import time
+logger = setup_logger(__name__)
+def init_graph():
+    """Initialize the app components and return them."""
+    with st.spinner("Initializing PDF chat application..."):
+        # Initialize LLM
+        llm = init_chat_model(
+            "anthropic.claude-3-5-sonnet-20240620-v1:0",
+            model_provider="bedrock_converse",
+            temperature=0
+        )
+        # Initialize embeddings
+        embeddings = BedrockEmbeddings(model_id="amazon.titan-embed-text-v1")
+        vector_store, compression_retriever = init_vector_db(embeddings)
+        class State(MessagesState):
+            context: List[Document]
+        # Create a retrieval tool that captures the vector_store
+        @tool(response_format="content_and_artifact")
+        def retrieve_tool(query: str):
+            """Retrieve information related to a query."""
+            start = time.time()
+            # retrieved_docs = vector_store.similarity_search(query, k=50)
+            retrieved_docs = compression_retriever.invoke(input = query,k=10)
+            serialized = "\n\n".join(
+                (f"Source: {doc.metadata}\n" f"Content: {doc.page_content}")
+                for doc in retrieved_docs
+            )
+            end = time.time()
+            logger.info(f"Time taken for vectordb retrieval: {end - start} seconds")
+            # print(f"retrieved_docs : {retrieved_docs}")
+            logger.info(f"retrieved_docs num: {len(retrieved_docs)}")
+            logger.info(f"retrieved_docs : {retrieved_docs}")
+            return serialized, retrieved_docs
+        # Create the LLM tool-calling function with direct reference to llm
+        def query_or_respond_fn(state: State):
+            """Generate tool call for retrieval or respond."""
+            # print(f"state['messages'] : {state["messages"]}")
+            start = time.time()
+            logger.info(f"state['messages'] : {state['messages']}")
+            valid_messages = [
+                    msg for msg in state["messages"]
+                    if msg.content
+                ]
+            if not valid_messages:
+                return {"messages": []}
+            llm_with_tools = llm.bind_tools([retrieve_tool])
+            response = llm_with_tools.invoke(state["messages"])
+            end = time.time()
+            logger.info(f"Time taken for query_or_respond_fn LLM invocation: {end - start} seconds")
+            # MessagesState appends messages to state instead of overwriting
+            return {"messages": [response]}
+        # Create the generate function with direct reference to llm
+        def generate_fn(state: State):
+            """Generate answer."""
+            # Get generated ToolMessages
+            start = time.time()
+            recent_tool_messages = []
+            for message in reversed(state["messages"]):
+                if message.type == "tool":
+                    recent_tool_messages.append(message)
+                else:
+                    break
+            tool_messages = recent_tool_messages[::-1]
+            # Format into prompt
+            sources_text = ""
+            # print(f"tool_messages { tool_messages}")
+            # print(f"tool_messages { len(tool_messages)}")
+            logger.info(f"tool_messages {tool_messages}")
+            tool_messages_latest = tool_messages[0]
+            for artifact in tool_messages_latest.artifact:
+                # artifact = i.artifact
+                page_label = artifact.metadata.get('page_label')
+                page = artifact.metadata.get('page')
+                source = artifact.metadata.get('source')
+                sources_text += f"Source: {source}, Page: {page}, Page Label: {page_label}\n"
+                # print(source, page, page_label)
+            # print(f"sources_text { sources_text}")
+            logger.info(f"sources_text {sources_text}")
+            docs_content = "\n\n".join(doc.content for doc in tool_messages)
+            system_message_content = (
+                "You are an assistant for question-answering tasks."
+                "Use the following pieces of retrieved context to answer the question."
+                "This is your only source of knowledge."
+                "If you don't know the answer, say that you don't know and STOP - do not provide related information."
+                "You are not allowed to make up answers."
+                "You are not allowed to use any external knowledge."
+                "You are not allowed to make assumptions."
+                "If the query is not clearly and directly addressed in the knowledge source, simply state that you don't have enough information and DO NOT elaborate with tangentially related content."
+                "Keep your answers strictly limited to information that directly answers the user's specific question."
+                "When information is insufficient, acknowledge this limitation in one sentence without expanding into related topics."
+                "If the query is single word or phrase, ask the user to provide a complete question."
+                "If the query is not clear, ask for clarification."
+                "If the query is not a complete question, ask the user to provide a complete question and provide some sample questions."
+                "If the query contains multiple questions, answer only the first question and ask the user to ask the next question."
+                "If the query contains complex or compound questions, break them down into simpler parts and answer each part separately."
+                "If the query is not related to the given knowledge source, mention that you can only answer from the knowledge base."
+                "Keep your answers accurate and concise to the source content."
+                "\n\n"
+                f"{docs_content}"
+            )
+            conversation_messages = [
+                message
+                for message in state["messages"]
+                if message.type in ("human", "system")
+                or (message.type == "ai" and not message.tool_calls)
+            ]
+            prompt = [SystemMessage(system_message_content)] + conversation_messages
+            # Run
+            start_llm = time.time()
+            response = llm.invoke(prompt)
+            # return {"messages": [response]}
+            context = []
+            for tool_message in tool_messages:
+                context.extend(tool_message.artifact)
+            end = time.time()
+            logger.info(f"Time taken for generate_fn : {end - start} seconds")
+            logger.info(f"Time taken for generate_fn LLM invocation: {end - start_llm} seconds")
+            return {"messages": [response], "context": context}
+        # Execute the retrieval
+        tools_node = ToolNode([retrieve_tool])
+        # Build the graph
+        graph_builder = StateGraph(MessagesState)
+        graph_builder.add_node("query_or_respond", query_or_respond_fn)
+        graph_builder.add_node("tools", tools_node)
+        graph_builder.add_node("generate", generate_fn)
+        graph_builder.set_entry_point("query_or_respond")
+        graph_builder.add_conditional_edges(
+            "query_or_respond",
+            tools_condition,
+            {END: END, "tools": "tools"},
+        )
+        graph_builder.add_edge("tools", "generate")
+        graph_builder.add_edge("generate", END)
+        graph = graph_builder.compile()
+        st.success("Initialization complete!")
+        return {"graph": graph}

load_vector_db.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import os
+import streamlit as st
+from langchain_aws import BedrockEmbeddings
+from langchain.chat_models import init_chat_model
+from langchain_core.documents import Document
+from typing_extensions import List, Dict, TypedDict
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langgraph.graph import START, StateGraph, END
+from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader
+from langgraph.graph import MessagesState
+from langchain_core.tools import tool
+from langchain_core.messages import SystemMessage
+from langgraph.prebuilt import ToolNode, tools_condition
+from langchain_milvus import Milvus
+from langchain_openai import ChatOpenAI
+from pydantic import BaseModel, Field
+from logging_config import setup_logger
+from flashrank import Ranker
+from langchain_community.document_compressors import FlashrankRerank
+from langchain.retrievers import ContextualCompressionRetriever
+logger = setup_logger(__name__)
+def init_vector_db(embeddings):
+    # Initialize vector store
+    URI = "db/vectordb_milvus.db"
+    collection_name = "my_collection"
+    # Check if the collection already exists
+    try:
+        st.info("Checking for existing Milvus db...")
+        vector_store = Milvus(
+                embedding_function=embeddings,
+                connection_args={"uri": URI},
+                auto_id=True,
+                collection_name=collection_name,
+                index_params={"index_type": "FLAT", "metric_type": "COSINE"},
+            )
+        results = vector_store.similarity_search("test query", k=1)
+        if len(results) > 0:
+            st.success("Document data found in existing collection.")
+            documents_loaded = True
+        else:
+            st.info("Collection exists but might be empty. Will check for documents.")
+            documents_loaded = False
+    except Exception as e:
+        st.info("Creating new Milvus collection...")
+        vector_store = Milvus(
+            embedding_function=embeddings,
+            connection_args={"uri": URI},
+            auto_id=True,
+            collection_name=collection_name,
+            index_params={"index_type": "FLAT", "metric_type": "COSINE"},
+        )
+        documents_loaded = False
+            # Load documents if needed
+    if not documents_loaded:
+        folder_path = "docs"
+        loader = DirectoryLoader(
+            folder_path,
+            glob="**/*.pdf",
+            loader_cls=PyPDFLoader
+        )
+        try:
+            documents = loader.load()
+            st.info(f"Loaded {len(documents)} PDF pages.")
+            if len(documents) > 0:
+                # Split documents
+                text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+                all_splits = text_splitter.split_documents(documents)
+                st.info(f"Total Document splits: {len(all_splits)}")
+                # Add documents to vector store
+                _ = vector_store.add_documents(documents=all_splits)
+                st.success("Documents added to vector store.")
+            else:
+                st.warning("No PDF documents found in the 'docs' folder.")
+        except Exception as e:
+            st.error(f"Error loading documents: {e}")
+    retriever = vector_store.as_retriever(search_kwargs={"k": 50})
+    ranker_client = Ranker(model_name="ms-marco-MultiBERT-L-12",
+                    cache_dir="./models")
+    compressor = FlashrankRerank(client=ranker_client, top_n=10)
+    compression_retriever = ContextualCompressionRetriever(
+        base_compressor=compressor, base_retriever=retriever
+    )
+    return vector_store, compression_retriever

logging_config.py ADDED Viewed

	@@ -0,0 +1,47 @@

+# logging_config.py
+import logging
+import os
+from datetime import datetime
+# from config import LOGS_DIR
+LOGS_DIR = "logs"
+# Create logs directory if it doesn't exist
+os.makedirs(LOGS_DIR, exist_ok=True)
+# Generate filename with timestamp
+log_filename = os.path.join(LOGS_DIR, f"app_{datetime.now().strftime('%Y%m%d')}.log")
+def setup_logger(name):
+    """
+    Create a logger with the specified name that writes to both file and console
+    """
+    logger = logging.getLogger(name)
+    # Only configure if it hasn't been configured yet
+    if not logger.handlers:
+        logger.setLevel(logging.DEBUG)
+        # Create file handler
+        file_handler = logging.FileHandler(log_filename)
+        file_handler.setLevel(logging.DEBUG)
+        file_formatter = logging.Formatter(
+            '%(asctime)s - %(name)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s',
+            datefmt='%Y-%m-%d %H:%M:%S'
+        )
+        file_handler.setFormatter(file_formatter)
+        # Create console handler
+        console_handler = logging.StreamHandler()
+        console_handler.setLevel(logging.INFO)  # Less verbose for console
+        console_formatter = logging.Formatter(
+            '%(levelname)s - %(name)s - %(message)s'
+        )
+        console_handler.setFormatter(console_formatter)
+        # Add handlers to logger
+        logger.addHandler(file_handler)
+        logger.addHandler(console_handler)
+    return logger

requirements.txt CHANGED Viewed

@@ -31,6 +31,7 @@ durationpy==0.9
 executing==2.2.0
 fastapi==0.115.12
 filelock==3.18.0
 flatbuffers==25.2.10
 frozenlist==1.6.0
 fsspec==2025.3.2

 executing==2.2.0
 fastapi==0.115.12
 filelock==3.18.0
+FlashRank==0.2.10
 flatbuffers==25.2.10
 frozenlist==1.6.0
 fsspec==2025.3.2

utils.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from logging_config import setup_logger
+logger = setup_logger(__name__)
+def extract_text_from_content(content):
+    """Extract text from various message content formats."""
+    if isinstance(content, str):
+        return content
+    elif isinstance(content, list):
+        # Handle list of text items or dictionaries
+        text_parts = []
+        for item in content:
+            if isinstance(item, dict):
+                # Extract text from dictionary format
+                if 'text' in item:
+                    text_parts.append(item['text'])
+            elif isinstance(item, str):
+                text_parts.append(item)
+        return ''.join(text_parts)
+    else:
+        # Fallback for any other format
+        return str(content)