Spaces:

ragunath-ravi
/

DocAgent

Sleeping

App Files Files Community

ragunath-ravi commited on Jul 22, 2025

Commit

4e070e0

verified ·

1 Parent(s): 2da7fa0

Update app.py

Browse files

Files changed (1) hide show

app.py +240 -151

app.py CHANGED Viewed

@@ -13,10 +13,6 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain.docstore.document import Document
-from langchain.chains import LLMChain, RetrievalQA, ConversationalRetrievalChain
-from langchain.memory import ConversationBufferMemory
-from langchain.prompts import PromptTemplate
-from langchain_community.llms import HuggingFaceHub
 # Import document parsers
 import PyPDF2
@@ -34,12 +30,8 @@ HF_TOKEN = os.getenv("hf_token")
 if not HF_TOKEN:
     raise ValueError("HuggingFace token not found in environment variables")
-# Initialize HuggingFace LLM
-llm = HuggingFaceHub(
-    repo_id="meta-llama/Llama-3.1-8B-Instruct",
-    huggingfacehub_api_token=HF_TOKEN,
-    model_kwargs={"temperature": 0.7, "max_length": 512}
-)
 # Initialize embeddings
 embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
@@ -205,60 +197,29 @@ class IngestionAgent:
         self.message_bus.publish(response)
 class RetrievalAgent:
-    """Agent responsible for embedding and semantic retrieval using LangChain"""
     def __init__(self, message_bus: MessageBus):
         self.name = "RetrievalAgent"
         self.message_bus = message_bus
         self.message_bus.subscribe(self.name, self.handle_message)
         self.vector_store = None
-        self.retriever = None
-        self.qa_chain = None
-        self.conversation_chain = None
-        self.memory = ConversationBufferMemory(
-            memory_key="chat_history",
-            return_messages=True,
-            output_key="answer"
-        )
     def handle_message(self, message: MCPMessage):
         """Handle incoming MCP messages"""
         if message.type == "INGESTION_COMPLETE":
             self.create_vector_store(message)
         elif message.type == "RETRIEVAL_REQUEST":
-            self.process_query(message)
     def create_vector_store(self, message: MCPMessage):
-        """Create vector store and chains from processed documents"""
         documents = message.payload.get("documents", [])
         if documents:
             try:
                 self.vector_store = FAISS.from_documents(documents, embeddings)
-                self.retriever = self.vector_store.as_retriever(
-                    search_type="similarity",
-                    search_kwargs={"k": 3}
-                )
-                # Create QA chain
-                self.qa_chain = RetrievalQA.from_chain_type(
-                    llm=llm,
-                    chain_type="stuff",
-                    retriever=self.retriever,
-                    return_source_documents=True,
-                    verbose=True
-                )
-                # Create conversational chain
-                self.conversation_chain = ConversationalRetrievalChain.from_llm(
-                    llm=llm,
-                    retriever=self.retriever,
-                    memory=self.memory,
-                    return_source_documents=True,
-                    verbose=True
-                )
-                logger.info(f"Vector store and chains created with {len(documents)} documents")
                 # Notify completion
                 response = MCPMessage(
@@ -272,60 +233,102 @@ class RetrievalAgent:
             except Exception as e:
                 logger.error(f"Error creating vector store: {e}")
-    def process_query(self, message: MCPMessage):
-        """Process query using conversational retrieval chain"""
         query = message.payload.get("query", "")
-        use_conversation = message.payload.get("use_conversation", True)
-        if not self.qa_chain or not query:
-            return
         try:
-            if use_conversation and self.conversation_chain:
-                # Use conversational chain for context-aware responses
-                result = self.conversation_chain({"question": query})
-                answer = result["answer"]
-                source_docs = result.get("source_documents", [])
-            else:
-                # Use simple QA chain
-                result = self.qa_chain({"query": query})
-                answer = result["result"]
-                source_docs = result.get("source_documents", [])
-            # Format sources
-            sources = []
-            for doc in source_docs:
-                sources.append({
-                    "content": doc.page_content[:200] + "...",
-                    "source": doc.metadata.get("source", "Unknown")
-                })
             response = MCPMessage(
                 sender=self.name,
                 receiver="CoordinatorAgent",
-                msg_type="CHAIN_RESPONSE",
                 trace_id=message.trace_id,
                 payload={
                     "query": query,
-                    "answer": answer,
-                    "sources": sources
                 }
             )
             self.message_bus.publish(response)
         except Exception as e:
-            logger.error(f"Error processing query: {e}")
-            # Send error response
             response = MCPMessage(
                 sender=self.name,
                 receiver="CoordinatorAgent",
-                msg_type="CHAIN_RESPONSE",
                 trace_id=message.trace_id,
-                payload={
-                    "query": query,
-                    "answer": f"Error processing query: {str(e)}",
-                    "sources": []
-                }
             )
             self.message_bus.publish(response)
@@ -336,15 +339,15 @@ class CoordinatorAgent:
         self.name = "CoordinatorAgent"
         self.message_bus = message_bus
         self.message_bus.subscribe(self.name, self.handle_message)
         self.vector_store_ready = False
-        self.current_response = None
     def handle_message(self, message: MCPMessage):
         """Handle incoming MCP messages"""
         if message.type == "VECTORSTORE_READY":
             self.vector_store_ready = True
-        elif message.type == "CHAIN_RESPONSE":
-            self.current_response = message.payload
     def process_files(self, files):
         """Process uploaded files"""
@@ -364,49 +367,55 @@ class CoordinatorAgent:
         return f"Processing {len(files)} files: {', '.join([os.path.basename(fp) for fp in file_paths])}"
-    def handle_query(self, query: str):
-        """Handle user query using LangChain chains"""
         if not self.vector_store_ready:
-            return "Please upload and process documents first."
         # Send retrieval request
         message = MCPMessage(
             sender=self.name,
             receiver="RetrievalAgent",
             msg_type="RETRIEVAL_REQUEST",
-            payload={"query": query, "use_conversation": True}
         )
         self.message_bus.publish(message)
-        # Wait for response
         import time
-        timeout = 30  # seconds
         start_time = time.time()
-        while not self.current_response and (time.time() - start_time) < timeout:
             time.sleep(0.1)
-        if self.current_response:
-            response = self.current_response
-            self.current_response = None  # Reset for next query
-            # Format response with sources
-            answer = response.get("answer", "No answer generated.")
-            sources = response.get("sources", [])
-            if sources:
-                source_text = "\n\n**Sources:**\n"
-                for i, source in enumerate(sources, 1):
-                    source_text += f"{i}. {source['source']}: {source['content']}\n"
-                answer += source_text
-            return answer
         else:
-            return "Timeout: No response received from the system."
 # Initialize agents
 ingestion_agent = IngestionAgent(message_bus)
 retrieval_agent = RetrievalAgent(message_bus)
 coordinator_agent = CoordinatorAgent(message_bus)
 def create_interface():
@@ -453,23 +462,26 @@ def create_interface():
             font-size: 0.9rem;
         }
-        /* Chat area */
         .chat-container {
             flex: 1;
             display: flex;
             flex-direction: column;
-            max-width: 800px;
             margin: 0 auto;
             width: 100%;
             padding: 1rem;
         }
-        /* Chatbot styling */
         .gradio-chatbot {
-            flex: 1 !important;
             background: transparent !important;
             border: none !important;
             margin-bottom: 1rem;
         }
         /* Input area */
@@ -479,6 +491,8 @@ def create_interface():
             padding: 1rem;
             border: 1px solid rgba(255, 193, 7, 0.2);
             backdrop-filter: blur(10px);
         }
         /* File upload */
@@ -491,7 +505,16 @@ def create_interface():
             transition: all 0.3s ease;
         }
-        /* Buttons */
         .primary-btn {
             background: linear-gradient(135deg, #ffc107 0%, #ff8f00 100%) !important;
             color: #000000 !important;
@@ -518,6 +541,19 @@ def create_interface():
             color: #ffc107;
             text-align: center;
         }
         """,
         title="Agentic RAG Assistant"
     ) as iface:
@@ -528,48 +564,92 @@ def create_interface():
                 gr.HTML("""
                 <div class="header">
                     <h1>🤖 Agentic RAG Assistant</h1>
-                    <p>Upload documents and ask questions - powered by LangChain Multi-Agent Architecture</p>
                 </div>
                 """)
-        # Main chat container
         with gr.Row():
-            with gr.Column():
-                # Chatbot
                 chatbot = gr.Chatbot(
-                    value=[],
-                    height=500,
-                    show_copy_button=True
                 )
-                # Input area
-                with gr.Column():
-                    # File upload
-                    file_upload = gr.File(
-                        file_count="multiple",
-                        file_types=[".pdf", ".pptx", ".csv", ".docx", ".txt", ".md"],
-                        label="Upload Documents"
                     )
-                    # Processing status
-                    processing_status = gr.HTML(visible=False)
-                    # Message input row
-                    with gr.Row():
-                        msg_input = gr.Textbox(
-                            placeholder="Upload documents above, then ask your questions here...",
-                            label="Message",
-                            scale=4
-                        )
-                        send_btn = gr.Button("Send", scale=1, variant="primary")
         # State to track document processing
         doc_processed = gr.State(False)
         # Event handlers
-        def handle_file_upload(files):
             if not files:
                 return gr.update(visible=False), False
@@ -607,37 +687,46 @@ def create_interface():
         def respond(message, history, doc_ready):
             if not doc_ready:
-                return history + [["Please upload and process documents first.", None]], ""
             if not message.strip():
                 return history, message
-            # Get response from coordinator
-            response = coordinator_agent.handle_query(message)
-            # Add to chat history
-            history.append([message, response])
-            return history, ""
-        # File upload triggers processing
-        file_upload.change(
-            handle_file_upload,
             inputs=[file_upload],
             outputs=[processing_status, doc_processed]
         )
-        # Send message
         send_btn.click(
             respond,
             inputs=[msg_input, chatbot, doc_processed],
-            outputs=[chatbot, msg_input]
         )
         msg_input.submit(
             respond,
             inputs=[msg_input, chatbot, doc_processed],
-            outputs=[chatbot, msg_input]
         )
     return iface

 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain.docstore.document import Document
 # Import document parsers
 import PyPDF2
 if not HF_TOKEN:
     raise ValueError("HuggingFace token not found in environment variables")
+# Initialize HuggingFace Inference Client
+client = InferenceClient(model="meta-llama/Llama-3.1-8B-Instruct", token=HF_TOKEN)
 # Initialize embeddings
 embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
         self.message_bus.publish(response)
 class RetrievalAgent:
+    """Agent responsible for embedding and semantic retrieval"""
     def __init__(self, message_bus: MessageBus):
         self.name = "RetrievalAgent"
         self.message_bus = message_bus
         self.message_bus.subscribe(self.name, self.handle_message)
         self.vector_store = None
     def handle_message(self, message: MCPMessage):
         """Handle incoming MCP messages"""
         if message.type == "INGESTION_COMPLETE":
             self.create_vector_store(message)
         elif message.type == "RETRIEVAL_REQUEST":
+            self.retrieve_context(message)
     def create_vector_store(self, message: MCPMessage):
+        """Create vector store from processed documents"""
         documents = message.payload.get("documents", [])
         if documents:
             try:
                 self.vector_store = FAISS.from_documents(documents, embeddings)
+                logger.info(f"Vector store created with {len(documents)} documents")
                 # Notify completion
                 response = MCPMessage(
             except Exception as e:
                 logger.error(f"Error creating vector store: {e}")
+    def retrieve_context(self, message: MCPMessage):
+        """Retrieve relevant context for a query"""
         query = message.payload.get("query", "")
+        k = message.payload.get("k", 3)
+        if self.vector_store and query:
+            try:
+                docs = self.vector_store.similarity_search(query, k=k)
+                context = [{"content": doc.page_content, "source": doc.metadata.get("source", "")}
+                           for doc in docs]
+                response = MCPMessage(
+                    sender=self.name,
+                    receiver="LLMResponseAgent",
+                    msg_type="CONTEXT_RESPONSE",
+                    trace_id=message.trace_id,
+                    payload={
+                        "query": query,
+                        "retrieved_context": context,
+                        "top_chunks": [doc.page_content for doc in docs]
+                    }
+                )
+                self.message_bus.publish(response)
+            except Exception as e:
+                logger.error(f"Error retrieving context: {e}")
+class LLMResponseAgent:
+    """Agent responsible for generating LLM responses"""
+    def __init__(self, message_bus: MessageBus):
+        self.name = "LLMResponseAgent"
+        self.message_bus = message_bus
+        self.message_bus.subscribe(self.name, self.handle_message)
+    def handle_message(self, message: MCPMessage):
+        """Handle incoming MCP messages"""
+        if message.type == "CONTEXT_RESPONSE":
+            self.generate_response(message)
+    def generate_response(self, message: MCPMessage):
+        """Generate response using retrieved context"""
+        query = message.payload.get("query", "")
+        context = message.payload.get("retrieved_context", [])
+        # Build context string
+        context_text = "\n\n".join([f"Source: {ctx['source']}\nContent: {ctx['content']}"
+                                      for ctx in context])
+        # Create messages for conversational format
+        messages = [
+            {
+                "role": "system",
+                "content": "You are a helpful assistant. Based on the provided context below, answer the user's question accurately and comprehensively. Cite the sources if possible.",
+            },
+            {
+                "role": "user",
+                "content": f"Context:\n\n{context_text}\n\nQuestion: {query}"
+            }
+        ]
         try:
+            # Use client.chat_completion for conversational models
+            response_stream = client.chat_completion(
+                messages=messages,
+                max_tokens=512,
+                temperature=0.7,
+                stream=True
+            )
+            # Send streaming response
             response = MCPMessage(
                 sender=self.name,
                 receiver="CoordinatorAgent",
+                msg_type="LLM_RESPONSE_STREAM",
                 trace_id=message.trace_id,
                 payload={
                     "query": query,
+                    "response_stream": response_stream,
+                    "context": context
                 }
             )
             self.message_bus.publish(response)
         except Exception as e:
+            logger.error(f"Error generating response: {e}")
+            # Send an error stream back
+            error_msg = f"Error from LLM: {e}"
+            def error_generator():
+                yield error_msg
             response = MCPMessage(
                 sender=self.name,
                 receiver="CoordinatorAgent",
+                msg_type="LLM_RESPONSE_STREAM",
                 trace_id=message.trace_id,
+                payload={"response_stream": error_generator()}
             )
             self.message_bus.publish(response)
         self.name = "CoordinatorAgent"
         self.message_bus = message_bus
         self.message_bus.subscribe(self.name, self.handle_message)
+        self.current_response_stream = None
         self.vector_store_ready = False
     def handle_message(self, message: MCPMessage):
         """Handle incoming MCP messages"""
         if message.type == "VECTORSTORE_READY":
             self.vector_store_ready = True
+        elif message.type == "LLM_RESPONSE_STREAM":
+            self.current_response_stream = message.payload.get("response_stream")
     def process_files(self, files):
         """Process uploaded files"""
         return f"Processing {len(files)} files: {', '.join([os.path.basename(fp) for fp in file_paths])}"
+    def handle_query(self, query: str, history: List) -> Generator[str, None, None]:
+        """Handle user query and return streaming response"""
         if not self.vector_store_ready:
+            yield "Please upload and process documents first."
+            return
         # Send retrieval request
         message = MCPMessage(
             sender=self.name,
             receiver="RetrievalAgent",
             msg_type="RETRIEVAL_REQUEST",
+            payload={"query": query}
         )
         self.message_bus.publish(message)
+        # Wait for response and stream
         import time
+        timeout = 20  # seconds
         start_time = time.time()
+        while not self.current_response_stream and (time.time() - start_time) < timeout:
             time.sleep(0.1)
+        if self.current_response_stream:
+            try:
+                # Stream tokens directly
+                for chunk in self.current_response_stream:
+                    # The token is in chunk.choices[0].delta.content for chat_completion
+                    if hasattr(chunk, 'choices') and chunk.choices:
+                        token = chunk.choices[0].delta.content
+                        if token:
+                            yield token
+                    else:
+                        # Fallback for different response format
+                        if hasattr(chunk, 'token'):
+                            yield chunk.token
+                        elif isinstance(chunk, str):
+                            yield chunk
+            except Exception as e:
+                yield f"Error streaming response: {e}"
+            finally:
+                self.current_response_stream = None # Reset for next query
         else:
+            yield "Timeout: No response received from LLM agent."
 # Initialize agents
 ingestion_agent = IngestionAgent(message_bus)
 retrieval_agent = RetrievalAgent(message_bus)
+llm_response_agent = LLMResponseAgent(message_bus)
 coordinator_agent = CoordinatorAgent(message_bus)
 def create_interface():
             font-size: 0.9rem;
         }
+        /* Chat area - REDUCED HEIGHT */
         .chat-container {
             flex: 1;
             display: flex;
             flex-direction: column;
+            max-width: 1000px;
             margin: 0 auto;
             width: 100%;
             padding: 1rem;
+            height: calc(100vh - 200px) !important; /* Reduced height */
         }
+        /* Chatbot styling - SMALLER */
         .gradio-chatbot {
+            height: 300px !important; /* Reduced from 500px */
+            max-height: 300px !important;
             background: transparent !important;
             border: none !important;
             margin-bottom: 1rem;
+            overflow-y: auto !important;
         }
         /* Input area */
             padding: 1rem;
             border: 1px solid rgba(255, 193, 7, 0.2);
             backdrop-filter: blur(10px);
+            position: sticky;
+            bottom: 0;
         }
         /* File upload */
             transition: all 0.3s ease;
         }
+        /* Buttons - YELLOW SEND BUTTON */
+        .send-btn {
+            background: linear-gradient(135deg, #ffc107 0%, #ff8f00 100%) !important;
+            color: #000000 !important;
+            border: none !important;
+            border-radius: 8px !important;
+            font-weight: 600 !important;
+            min-height: 40px !important;
+        }
         .primary-btn {
             background: linear-gradient(135deg, #ffc107 0%, #ff8f00 100%) !important;
             color: #000000 !important;
             color: #ffc107;
             text-align: center;
         }
+        /* Input row styling */
+        .input-row {
+            display: flex !important;
+            gap: 10px !important;
+            align-items: end !important;
+        }
+        /* Message input */
+        .message-input {
+            flex: 1 !important;
+            min-height: 40px !important;
+        }
         """,
         title="Agentic RAG Assistant"
     ) as iface:
                 gr.HTML("""
                 <div class="header">
                     <h1>🤖 Agentic RAG Assistant</h1>
+                    <p>Upload documents and ask questions - powered by Multi-Agent Architecture</p>
                 </div>
                 """)
+        # Main layout with sidebar and chat
         with gr.Row():
+            # Left sidebar for file upload
+            with gr.Column(scale=1):
+                gr.Markdown("### 📁 Document Upload")
+                file_upload = gr.File(
+                    file_count="multiple",
+                    file_types=[".pdf", ".pptx", ".csv", ".docx", ".txt", ".md"],
+                    label="Upload Documents",
+                    elem_classes=["upload-area"]
+                )
+                processing_status = gr.HTML(visible=False)
+                process_btn = gr.Button(
+                    "Process Documents",
+                    variant="primary",
+                    elem_classes=["primary-btn"]
+                )
+                gr.Markdown("### ℹ️ Architecture")
+                gr.Markdown("""
+                **Multi-Agent System:**
+                - 📄 **IngestionAgent**: Document parsing
+                - 🔍 **RetrievalAgent**: Semantic search
+                - 🤖 **LLMAgent**: Response generation
+                - 🎯 **CoordinatorAgent**: Workflow orchestration
+                **Features:**
+                - Streaming responses
+                - Multi-format support
+                - Context-aware answers
+                """)
+            # Right side - Chat interface
+            with gr.Column(scale=2):
+                gr.Markdown("### 💬 Chat Interface")
+                # Chatbot with reduced height
                 chatbot = gr.Chatbot(
+                    height=300,  # Reduced height
+                    elem_classes=["gradio-chatbot"],
+                    show_copy_button=True,
+                    type="messages",
+                    placeholder="Upload documents first, then start chatting!"
                 )
+                # Input area with improved layout
+                with gr.Row(elem_classes=["input-row"]):
+                    msg_input = gr.Textbox(
+                        placeholder="Ask about your documents...",
+                        label="Message",
+                        scale=4,
+                        elem_classes=["message-input"],
+                        show_label=False,
+                        autofocus=True
                     )
+                    send_btn = gr.Button(
+                        "Send",
+                        scale=1,
+                        elem_classes=["send-btn"],
+                        size="sm"
+                    )
+                # Examples
+                gr.Examples(
+                    examples=[
+                        "What are the main topics discussed?",
+                        "Summarize the key findings",
+                        "What metrics are mentioned?",
+                        "What are the recommendations?"
+                    ],
+                    inputs=msg_input,
+                    label="💡 Example Questions"
+                )
         # State to track document processing
         doc_processed = gr.State(False)
         # Event handlers
+        def handle_file_upload_and_process(files):
             if not files:
                 return gr.update(visible=False), False
         def respond(message, history, doc_ready):
             if not doc_ready:
+                # Show error message
+                history.append({"role": "user", "content": message})
+                history.append({"role": "assistant", "content": "⚠️ Please upload and process documents first."})
+                return history, ""
             if not message.strip():
                 return history, message
+            # Add user message
+            history.append({"role": "user", "content": message})
+            history.append({"role": "assistant", "content": ""})
+            # Stream response
+            try:
+                for token in coordinator_agent.handle_query(message, history):
+                    history[-1]["content"] += token
+                    yield history, ""
+            except Exception as e:
+                history[-1]["content"] = f"❌ Error: {str(e)}"
+                yield history, ""
+        # Event bindings
+        process_btn.click(
+            handle_file_upload_and_process,
             inputs=[file_upload],
             outputs=[processing_status, doc_processed]
         )
         send_btn.click(
             respond,
             inputs=[msg_input, chatbot, doc_processed],
+            outputs=[chatbot, msg_input],
+            show_progress=True
         )
         msg_input.submit(
             respond,
             inputs=[msg_input, chatbot, doc_processed],
+            outputs=[chatbot, msg_input],
+            show_progress=True
         )
     return iface