Spaces:

eeshanyaj
/

questrag-backend

Sleeping

App Files Files Community

eeshanyaj commited on 12 days ago

Commit

fc73f93

1 Parent(s): c463ae7

added sse features

Browse files

Files changed (5) hide show

app/api/v1/conversation_routes.py +436 -0
app/core/llm_manager.py +137 -2
app/db/repositories/conversation_repository.py +139 -0
app/services/streaming_service.py +222 -0
frontend_integration_example.js +300 -0

app/api/v1/conversation_routes.py CHANGED Viewed

@@ -18,6 +18,9 @@ from fastapi import APIRouter, HTTPException, status, Depends
 from pydantic import BaseModel, Field
 from typing import List, Dict, Optional
 from datetime import datetime
 from app.services.chat_service import chat_service
 from app.services.conversation_service import conversation_service
@@ -637,6 +640,439 @@ async def remove_reaction(
         )
 # ============================================================================
 # HEALTH CHECK
 # ============================================================================

 from pydantic import BaseModel, Field
 from typing import List, Dict, Optional
 from datetime import datetime
+from fastapi.responses import StreamingResponse
+from app.services.streaming_service import streaming_service
+import json
 from app.services.chat_service import chat_service
 from app.services.conversation_service import conversation_service
         )
+# ========================================================================
+# 🆕 STREAMING ENDPOINTS - Add after existing chat endpoint
+# ========================================================================
+@router.post("/stream")
+async def chat_stream(
+    request: ChatRequest,
+    current_user: TokenData = Depends(get_current_user)
+):
+    """
+    💬 Send message and get AI response via Server-Sent Events (SSE).
+    **Streaming endpoint** - returns response in real-time chunks.
+    Events sent:
+    - `status`: Progress updates (retrieval, generation)
+    - `content`: Response text chunks
+    - `metadata`: Final statistics (policy action, docs retrieved, timing)
+    - `done`: Stream completion
+    - `error`: If error occurs
+    Frontend should use EventSource API to consume stream.
+    Example:
+    ```javascript
+    const eventSource = new EventSource('/api/v1/chat/stream');
+    eventSource.addEventListener('content', (e) => {
+        const data = JSON.parse(e.data);
+        console.log(data.text); // Append to UI
+    });
+    eventSource.addEventListener('done', () => {
+        eventSource.close();
+    });
+    ```
+    Requires JWT authentication (pass as query param: ?token=YOUR_JWT).
+    """
+    try:
+        user_id = current_user.user_id
+        # ====================================================================
+        # STEP 1: Get or Create Conversation (same as non-streaming)
+        # ====================================================================
+        conversation_id = request.conversation_id
+        if conversation_id:
+            conversation = await conversation_repository.get_conversation(conversation_id)
+            if not conversation:
+                raise HTTPException(
+                    status_code=status.HTTP_404_NOT_FOUND,
+                    detail="Conversation not found"
+                )
+            if conversation["user_id"] != user_id:
+                raise HTTPException(
+                    status_code=status.HTTP_403_FORBIDDEN,
+                    detail="Access denied"
+                )
+        else:
+            # Create new conversation
+            from app.models.conversation import CreateConversationRequest
+            create_req = CreateConversationRequest(
+                title=None,
+                first_message=request.query
+            )
+            new_conversation = await conversation_service.create_conversation(
+                user_id=user_id,
+                request=create_req,
+                llm_manager=None
+            )
+            conversation_id = str(new_conversation.id)
+        # ====================================================================
+        # STEP 2: Get History
+        # ====================================================================
+        history = await conversation_repository.get_conversation_history(
+            conversation_id=conversation_id,
+            max_messages=10
+        )
+        # ====================================================================
+        # STEP 3: Save User Message
+        # ====================================================================
+        await conversation_repository.add_message(
+            conversation_id=conversation_id,
+            message={
+                'role': 'user',
+                'content': request.query,
+                'timestamp': datetime.utcnow(),
+                'metadata': None
+            }
+        )
+        # ====================================================================
+        # STEP 4: Stream Response
+        # ====================================================================
+        async def generate_stream():
+            """Generator that adds conversation_id to first event"""
+            # Send conversation_id first (so frontend knows where to save)
+            yield f"event: conversation_id\ndata: {json.dumps({'conversation_id': conversation_id})}\n\n"
+            # Collect full response for saving
+            full_response = ""
+            final_metadata = {}
+            # Stream from service
+            async for sse_event in streaming_service.stream_chat_response(
+                query=request.query,
+                conversation_history=history,
+                user_id=user_id
+            ):
+                yield sse_event
+                # Parse event to collect data
+                if "event: content" in sse_event:
+                    # Extract text from: data: {"text": "..."}
+                    import re
+                    match = re.search(r'"text":\s*"([^"]*)"', sse_event)
+                    if match:
+                        full_response += match.group(1)
+                elif "event: metadata" in sse_event:
+                    # Extract metadata
+                    import re
+                    data_match = re.search(r'data: (.+)', sse_event)
+                    if data_match:
+                        final_metadata = json.loads(data_match.group(1))
+            # Save assistant response after streaming completes
+            await conversation_repository.add_message(
+                conversation_id=conversation_id,
+                message={
+                    'role': 'assistant',
+                    'content': full_response,
+                    'timestamp': datetime.utcnow(),
+                    'metadata': {
+                        'policy_action': final_metadata.get('policy_action'),
+                        'policy_confidence': final_metadata.get('policy_confidence'),
+                        'documents_retrieved': final_metadata.get('documents_retrieved'),
+                        'top_doc_score': final_metadata.get('top_doc_score'),
+                        'retrieval_time_ms': final_metadata.get('retrieval_time_ms'),
+                        'generation_time_ms': final_metadata.get('generation_time_ms')
+                    }
+                }
+            )
+            # Log retrieval data
+            await conversation_repository.log_retrieval({
+                'conversation_id': conversation_id,
+                'user_id': user_id,
+                'query': request.query,
+                'policy_action': final_metadata.get('policy_action'),
+                'policy_confidence': final_metadata.get('policy_confidence'),
+                'should_retrieve': final_metadata.get('documents_retrieved', 0) > 0,
+                'documents_retrieved': final_metadata.get('documents_retrieved', 0),
+                'top_doc_score': final_metadata.get('top_doc_score'),
+                'response': full_response,
+                'retrieval_time_ms': final_metadata.get('retrieval_time_ms'),
+                'generation_time_ms': final_metadata.get('generation_time_ms'),
+                'total_time_ms': final_metadata.get('total_time_ms'),
+                'retrieved_docs_metadata': final_metadata.get('retrieved_docs_metadata', []),
+                'timestamp': datetime.utcnow()
+            })
+        # Return SSE stream
+        return StreamingResponse(
+            generate_stream(),
+            media_type="text/event-stream",
+            headers={
+                "Cache-Control": "no-cache",
+                "Connection": "keep-alive",
+                "X-Accel-Buffering": "no"  # Disable nginx buffering
+            }
+        )
+    except HTTPException:
+        raise
+    except Exception as e:
+        print(f"❌ Streaming endpoint error: {e}")
+        import traceback
+        traceback.print_exc()
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Failed to stream response: {str(e)}"
+        )
+# ========================================================================
+# 🆕 REGENERATE RESPONSE (with streaming)
+# ========================================================================
+@router.post("/conversation/{conversation_id}/regenerate")
+async def regenerate_last_response(
+    conversation_id: str,
+    current_user: TokenData = Depends(get_current_user)
+):
+    """
+    🔄 Regenerate the last assistant response.
+    - Removes last assistant message
+    - Re-processes last user query
+    - Returns streaming response
+    User must own the conversation.
+    """
+    try:
+        # Get conversation
+        conversation = await conversation_service.get_conversation(
+            conversation_id=conversation_id,
+            user_id=current_user.user_id
+        )
+        if not conversation:
+            raise HTTPException(
+                status_code=status.HTTP_404_NOT_FOUND,
+                detail="Conversation not found"
+            )
+        if len(conversation.messages) < 2:
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail="Need at least 2 messages to regenerate"
+            )
+        # Get last user message
+        last_user_msg = None
+        for msg in reversed(conversation.messages):
+            if msg.role == 'user':
+                last_user_msg = msg
+                break
+        if not last_user_msg:
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail="No user message found to regenerate from"
+            )
+        # Remove last assistant message(s)
+        await conversation_repository.remove_last_assistant_message(conversation_id)
+        # Get updated history
+        history = await conversation_repository.get_conversation_history(
+            conversation_id=conversation_id,
+            max_messages=10
+        )
+        # Stream regenerated response
+        async def generate_stream():
+            yield f"event: conversation_id\ndata: {json.dumps({'conversation_id': conversation_id})}\n\n"
+            full_response = ""
+            final_metadata = {}
+            async for sse_event in streaming_service.stream_chat_response(
+                query=last_user_msg.content,
+                conversation_history=history,
+                user_id=current_user.user_id
+            ):
+                yield sse_event
+                if "event: content" in sse_event:
+                    import re
+                    match = re.search(r'"text":\s*"([^"]*)"', sse_event)
+                    if match:
+                        full_response += match.group(1)
+                elif "event: metadata" in sse_event:
+                    import re
+                    data_match = re.search(r'data: (.+)', sse_event)
+                    if data_match:
+                        final_metadata = json.loads(data_match.group(1))
+            # Save new response
+            await conversation_repository.add_message(
+                conversation_id=conversation_id,
+                message={
+                    'role': 'assistant',
+                    'content': full_response,
+                    'timestamp': datetime.utcnow(),
+                    'metadata': {
+                        'policy_action': final_metadata.get('policy_action'),
+                        'policy_confidence': final_metadata.get('policy_confidence'),
+                        'documents_retrieved': final_metadata.get('documents_retrieved'),
+                        'regenerated': True  # Flag for analytics
+                    }
+                }
+            )
+        return StreamingResponse(
+            generate_stream(),
+            media_type="text/event-stream",
+            headers={
+                "Cache-Control": "no-cache",
+                "Connection": "keep-alive"
+            }
+        )
+    except HTTPException:
+        raise
+    except Exception as e:
+        print(f"❌ Regenerate error: {e}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Failed to regenerate: {str(e)}"
+        )
+# ========================================================================
+# 🆕 EDIT LAST MESSAGE (then regenerate)
+# ========================================================================
+class EditMessageRequest(BaseModel):
+    """Request body for editing last user message"""
+    new_content: str = Field(..., min_length=1, max_length=2000)
+@router.post("/conversation/{conversation_id}/edit")
+async def edit_and_regenerate(
+    conversation_id: str,
+    request: EditMessageRequest,
+    current_user: TokenData = Depends(get_current_user)
+):
+    """
+    ✏️ Edit last user message and regenerate response.
+    - Updates last user message content
+    - Removes last assistant response
+    - Regenerates with new message
+    - Returns streaming response
+    User must own the conversation.
+    """
+    try:
+        # Get conversation
+        conversation = await conversation_service.get_conversation(
+            conversation_id=conversation_id,
+            user_id=current_user.user_id
+        )
+        if not conversation:
+            raise HTTPException(
+                status_code=status.HTTP_404_NOT_FOUND,
+                detail="Conversation not found"
+            )
+        # Update last user message
+        success = await conversation_repository.update_last_user_message(
+            conversation_id=conversation_id,
+            new_content=request.new_content.strip()
+        )
+        if not success:
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail="Failed to update message"
+            )
+        # Remove last assistant message
+        await conversation_repository.remove_last_assistant_message(conversation_id)
+        # Get updated history
+        history = await conversation_repository.get_conversation_history(
+            conversation_id=conversation_id,
+            max_messages=10
+        )
+        # Stream regenerated response with edited query
+        async def generate_stream():
+            yield f"event: conversation_id\ndata: {json.dumps({'conversation_id': conversation_id})}\n\n"
+            full_response = ""
+            final_metadata = {}
+            async for sse_event in streaming_service.stream_chat_response(
+                query=request.new_content,
+                conversation_history=history,
+                user_id=current_user.user_id
+            ):
+                yield sse_event
+                if "event: content" in sse_event:
+                    import re
+                    match = re.search(r'"text":\s*"([^"]*)"', sse_event)
+                    if match:
+                        full_response += match.group(1)
+                elif "event: metadata" in sse_event:
+                    import re
+                    data_match = re.search(r'data: (.+)', sse_event)
+                    if data_match:
+                        final_metadata = json.loads(data_match.group(1))
+            # Save new response
+            await conversation_repository.add_message(
+                conversation_id=conversation_id,
+                message={
+                    'role': 'assistant',
+                    'content': full_response,
+                    'timestamp': datetime.utcnow(),
+                    'metadata': {
+                        'policy_action': final_metadata.get('policy_action'),
+                        'policy_confidence': final_metadata.get('policy_confidence'),
+                        'documents_retrieved': final_metadata.get('documents_retrieved'),
+                        'edited': True  # Flag for analytics
+                    }
+                }
+            )
+        return StreamingResponse(
+            generate_stream(),
+            media_type="text/event-stream",
+            headers={
+                "Cache-Control": "no-cache",
+                "Connection": "keep-alive"
+            }
+        )
+    except HTTPException:
+        raise
+    except Exception as e:
+        print(f"❌ Edit error: {e}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Failed to edit message: {str(e)}"
+        )
 # ============================================================================
 # HEALTH CHECK
 # ============================================================================

app/core/llm_manager.py CHANGED Viewed

@@ -17,11 +17,12 @@ Fallback Logic:
 """
 import time
-from typing import List, Dict, Optional, Literal
 from langchain_groq import ChatGroq
 from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
 from huggingface_hub import InferenceClient
 from app.config import settings
 # ============================================================================
 # GROQ MANAGER WITH FALLBACK
@@ -299,7 +300,7 @@ class LLMManager:
         messages: List[Dict[str, str]],
         system_prompt: Optional[str] = None,
         task: Literal["chat", "evaluation"] = "chat"
-    ) -> str:
         """
         Generate response with cascading fallback logic.
@@ -344,6 +345,140 @@ class LLMManager:
         raise ValueError("No LLM provider available")
     async def generate_chat_response(
         self,
         query: str,

 """
 import time
+from typing import AsyncGenerator, List, Dict, Optional, Literal
 from langchain_groq import ChatGroq
 from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
 from huggingface_hub import InferenceClient
 from app.config import settings
+import asyncio
 # ============================================================================
 # GROQ MANAGER WITH FALLBACK
         messages: List[Dict[str, str]],
         system_prompt: Optional[str] = None,
         task: Literal["chat", "evaluation"] = "chat"
+        ) -> str:
         """
         Generate response with cascading fallback logic.
         raise ValueError("No LLM provider available")
+    # ============================================================================
+    # ADD TO: backend/app/core/llm_manager.py
+    # Add this method to LLMManager class
+    # ============================================================================
+    async def stream_chat_response(
+        self,
+        query: str,
+        context: str = "",
+        history: List[Dict[str, str]] = None,
+        max_tokens: int = 1000,
+        temperature: float = 0.7
+    ) -> AsyncGenerator[str, None]:
+        """
+        Stream chat response (yields chunks as they're generated).
+        Tries Groq first (streaming), falls back to HuggingFace (non-streaming).
+        Args:
+            query: User query
+            context: Retrieved context
+            history: Conversation history
+            max_tokens: Max response length
+            temperature: Sampling temperature
+        Yields:
+            str: Response chunks
+        """
+        if history is None:
+            history = []
+        # Build system prompt
+        system_prompt = """You are an expert banking assistant specialized in Indian financial regulations and banking practices.
+    Instructions:
+    - Answer accurately using provided context when available
+    - If context is insufficient, still respond helpfully
+    - Keep responses clear and concise
+    - Never fabricate specific policies or rates
+    - Maintain a professional tone"""
+        # Build user message
+        user_message = query
+        if context:
+            user_message = f"""Context from knowledge base:
+    {context}
+    User Query: {query}
+    Please answer the query using the context above when relevant."""
+        # ====================================================================
+        # TRY GROQ (STREAMING SUPPORTED)
+        # ====================================================================
+        if self.groq:
+            try:
+                # Build messages for Groq
+                messages = [{"role": "system", "content": system_prompt}]
+                # Add history
+                for msg in history[-10:]:  # Last 10 messages
+                    messages.append({
+                        "role": msg['role'],
+                        "content": msg['content']
+                    })
+                # Add current query
+                messages.append({"role": "user", "content": user_message})
+                # Stream from Groq
+                stream = self.groq.chat.completions.create(
+                    model=self.groq_model,
+                    messages=messages,
+                    max_tokens=max_tokens,
+                    temperature=temperature,
+                    stream=True  # Enable streaming
+                )
+                for chunk in stream:
+                    if chunk.choices[0].delta.content:
+                        yield chunk.choices[0].delta.content
+                return  # Success, exit
+            except Exception as e:
+                print(f"⚠️ Groq streaming failed: {e}")
+                # Fall through to HuggingFace
+        # ====================================================================
+        # FALLBACK: HUGGINGFACE (NO STREAMING - SIMULATE)
+        # ====================================================================
+        if self.huggingface:
+            try:
+                print("⚠️ Using HuggingFace (simulated streaming)")
+                # Build prompt for HuggingFace
+                prompt = f"{system_prompt}\n\n"
+                # Add history
+                for msg in history[-5:]:
+                    role = "Human" if msg['role'] == 'user' else "Assistant"
+                    prompt += f"{role}: {msg['content']}\n"
+                prompt += f"Human: {user_message}\nAssistant:"
+                # Generate full response
+                response = self.huggingface(
+                    prompt,
+                    max_new_tokens=max_tokens,
+                    temperature=temperature,
+                    do_sample=True,
+                    return_full_text=False
+                )[0]['generated_text']
+                # Simulate streaming by splitting into words
+                words = response.split()
+                for i, word in enumerate(words):
+                    # Add space except for first word
+                    chunk = word if i == 0 else f" {word}"
+                    yield chunk
+                    # Small delay to simulate streaming
+                    await asyncio.sleep(0.05)  # 50ms per word
+                return
+            except Exception as e:
+                print(f"❌ HuggingFace streaming failed: {e}")
+        # ====================================================================
+        # BOTH FAILED - RETURN ERROR
+        # ====================================================================
+        yield "I apologize, but I'm unable to generate a response at the moment. Please try again."
     async def generate_chat_response(
         self,
         query: str,

app/db/repositories/conversation_repository.py CHANGED Viewed

@@ -626,6 +626,145 @@ class ConversationRepository:
         except Exception as e:
             print(f"❌ Update reaction error: {e}")
             return False
 # ============================================================================

         except Exception as e:
             print(f"❌ Update reaction error: {e}")
             return False
+    # ============================================================================
+    # ADD TO: backend/app/db/repositories/conversation_repository.py
+    # Add these methods to ConversationRepository class
+    # ============================================================================
+    async def remove_last_assistant_message(
+        self,
+        conversation_id: str
+    ) -> bool:
+        """
+        Remove the last assistant message from conversation.
+        Used for regenerate functionality.
+        Args:
+            conversation_id: Conversation ID
+        Returns:
+            bool: True if removed
+        """
+        try:
+            from bson import ObjectId
+            # Get conversation
+            conversation = await self.collection.find_one(
+                {"_id": ObjectId(conversation_id)}
+            )
+            if not conversation:
+                return False
+            messages = conversation.get('messages', [])
+            # Find last assistant message index
+            last_assistant_idx = None
+            for i in range(len(messages) - 1, -1, -1):
+                if messages[i].get('role') == 'assistant':
+                    last_assistant_idx = i
+                    break
+            if last_assistant_idx is None:
+                print("⚠️ No assistant message to remove")
+                return False
+            # Remove message
+            messages.pop(last_assistant_idx)
+            # Update conversation
+            result = await self.collection.update_one(
+                {"_id": ObjectId(conversation_id)},
+                {
+                    "$set": {
+                        "messages": messages,
+                        "message_count": len(messages),
+                        "updated_at": datetime.utcnow()
+                    }
+                }
+            )
+            if result.modified_count > 0:
+                print(f"✅ Removed last assistant message from conversation {conversation_id}")
+                return True
+            return False
+        except Exception as e:
+            print(f"❌ Remove last assistant message error: {e}")
+            return False
+async def update_last_user_message(
+    self,
+    conversation_id: str,
+    new_content: str
+) -> bool:
+    """
+    Update the content of last user message.
+    Used for edit functionality.
+    Args:
+        conversation_id: Conversation ID
+        new_content: New message content
+    Returns:
+        bool: True if updated
+    """
+    try:
+        from bson import ObjectId
+        # Get conversation
+        conversation = await self.collection.find_one(
+            {"_id": ObjectId(conversation_id)}
+        )
+        if not conversation:
+            return False
+        messages = conversation.get('messages', [])
+        # Find last user message index
+        last_user_idx = None
+        for i in range(len(messages) - 1, -1, -1):
+            if messages[i].get('role') == 'user':
+                last_user_idx = i
+                break
+        if last_user_idx is None:
+            print("⚠️ No user message to update")
+            return False
+        # Update message content
+        messages[last_user_idx]['content'] = new_content
+        messages[last_user_idx]['timestamp'] = datetime.utcnow()
+        messages[last_user_idx]['edited'] = True  # Flag as edited
+        # Update conversation
+        result = await self.collection.update_one(
+            {"_id": ObjectId(conversation_id)},
+            {
+                "$set": {
+                    "messages": messages,
+                    "updated_at": datetime.utcnow()
+                }
+            }
+        )
+        if result.modified_count > 0:
+            print(f"✅ Updated last user message in conversation {conversation_id}")
+            return True
+        return False
+    except Exception as e:
+        print(f"❌ Update last user message error: {e}")
+        return False
 # ============================================================================

app/services/streaming_service.py ADDED Viewed

	@@ -0,0 +1,222 @@

+# ============================================================================
+# backend/app/services/streaming_service.py - NEW FILE
+# ============================================================================
+"""
+Streaming Service - Server-Sent Events (SSE)
+Handles real-time streaming of AI responses.
+Integrates with chat_service.py RAG pipeline.
+"""
+import asyncio
+import json
+from typing import AsyncGenerator, Dict, Any, List, Optional
+from datetime import datetime
+from app.config import settings
+from app.ml.policy_network import predict_policy_action
+from app.ml.retriever import retrieve_documents, format_context
+from app.core.llm_manager import llm_manager
+# ============================================================================
+# STREAMING SERVICE
+# ============================================================================
+class StreamingService:
+    """
+    Handles SSE streaming for real-time chat responses.
+    Events sent:
+    - status: Progress updates (retrieval, generation stages)
+    - content: Response chunks (word by word)
+    - metadata: Final stats (policy action, docs retrieved, etc.)
+    - done: Stream completion signal
+    - error: Error occurred
+    """
+    def __init__(self):
+        print("🌊 StreamingService initialized")
+    async def stream_chat_response(
+        self,
+        query: str,
+        conversation_history: List[Dict[str, str]] = None,
+        user_id: Optional[str] = None
+    ) -> AsyncGenerator[str, None]:
+        """
+        Stream chat response with progress updates.
+        Yields SSE-formatted events:
+        - event: status, content, metadata, done, error
+        - data: JSON payload
+        Args:
+            query: User query
+            conversation_history: Previous messages
+            user_id: User ID
+        Yields:
+            str: SSE formatted events
+        """
+        import time
+        start_time = time.time()
+        if conversation_history is None:
+            conversation_history = []
+        try:
+            # ================================================================
+            # STAGE 1: Policy Decision
+            # ================================================================
+            yield self._format_sse_event(
+                event="status",
+                data={"stage": "policy", "message": "Analyzing query..."}
+            )
+            await asyncio.sleep(0.1)  # Small delay for UX
+            policy_result = predict_policy_action(
+                query=query,
+                history=conversation_history,
+                return_probs=True
+            )
+            # ================================================================
+            # STAGE 2: Retrieval (if needed)
+            # ================================================================
+            retrieved_docs = []
+            context = ""
+            retrieval_time = 0
+            if policy_result['should_retrieve']:
+                yield self._format_sse_event(
+                    event="status",
+                    data={"stage": "retrieval", "message": "Searching knowledge base..."}
+                )
+                retrieval_start = time.time()
+                try:
+                    retrieved_docs = retrieve_documents(
+                        query=query,
+                        top_k=settings.TOP_K,
+                        min_similarity=settings.SIMILARITY_THRESHOLD
+                    )
+                    retrieval_time = (time.time() - retrieval_start) * 1000
+                    if retrieved_docs:
+                        context = format_context(
+                            retrieved_docs,
+                            max_context_length=settings.MAX_CONTEXT_LENGTH
+                        )
+                        yield self._format_sse_event(
+                            event="status",
+                            data={
+                                "stage": "retrieval",
+                                "message": f"Found {len(retrieved_docs)} relevant documents"
+                            }
+                        )
+                except Exception as e:
+                    print(f"⚠️ Retrieval error during streaming: {e}")
+                    # Continue without retrieval
+            # ================================================================
+            # STAGE 3: Stream Generation
+            # ================================================================
+            yield self._format_sse_event(
+                event="status",
+                data={"stage": "generation", "message": "Generating response..."}
+            )
+            generation_start = time.time()
+            full_response = ""
+            # Stream from LLM
+            async for chunk in llm_manager.stream_chat_response(
+                query=query,
+                context=context,
+                history=conversation_history
+            ):
+                full_response += chunk
+                yield self._format_sse_event(
+                    event="content",
+                    data={"text": chunk}
+                )
+            generation_time = (time.time() - generation_start) * 1000
+            total_time = (time.time() - start_time) * 1000
+            # ================================================================
+            # STAGE 4: Send Metadata
+            # ================================================================
+            metadata = {
+                "policy_action": policy_result['action'],
+                "policy_confidence": policy_result['confidence'],
+                "documents_retrieved": len(retrieved_docs),
+                "top_doc_score": retrieved_docs[0]['score'] if retrieved_docs else None,
+                "retrieval_time_ms": round(retrieval_time, 2),
+                "generation_time_ms": round(generation_time, 2),
+                "total_time_ms": round(total_time, 2),
+                "timestamp": datetime.now().isoformat()
+            }
+            # Add retrieved docs metadata
+            if retrieved_docs:
+                metadata['retrieved_docs_metadata'] = [
+                    {
+                        'faq_id': doc['faq_id'],
+                        'score': doc['score'],
+                        'category': doc['category'],
+                        'rank': doc['rank']
+                    }
+                    for doc in retrieved_docs
+                ]
+            yield self._format_sse_event(
+                event="metadata",
+                data=metadata
+            )
+            # ================================================================
+            # STAGE 5: Done
+            # ================================================================
+            yield self._format_sse_event(
+                event="done",
+                data={"message": "Stream completed"}
+            )
+        except Exception as e:
+            print(f"❌ Streaming error: {e}")
+            import traceback
+            traceback.print_exc()
+            yield self._format_sse_event(
+                event="error",
+                data={"error": str(e), "message": "An error occurred during streaming"}
+            )
+    def _format_sse_event(self, event: str, data: Dict[str, Any]) -> str:
+        """
+        Format data as SSE event.
+        SSE format:
+        event: <event_name>
+        data: <json_data>
+        (blank line to separate events)
+        """
+        json_data = json.dumps(data, ensure_ascii=False)
+        return f"event: {event}\ndata: {json_data}\n\n"
+# ============================================================================
+# GLOBAL INSTANCE
+# ============================================================================
+streaming_service = StreamingService()

frontend_integration_example.js ADDED Viewed

	@@ -0,0 +1,300 @@

+// ============================================================================
+// Frontend Integration - How to Consume SSE Streams
+// ============================================================================
+// ============================================================================
+// VANILLA JAVASCRIPT EXAMPLE
+// ============================================================================
+async function sendStreamingMessage(query, conversationId = null) {
+    const token = localStorage.getItem('jwt_token');
+    // Prepare request
+    const requestData = {
+        query: query,
+        conversation_id: conversationId
+    };
+    try {
+        // Make POST request to get stream
+        const response = await fetch('http://localhost:8000/api/v1/chat/stream', {
+            method: 'POST',
+            headers: {
+                'Content-Type': 'application/json',
+                'Authorization': `Bearer ${token}`
+            },
+            body: JSON.stringify(requestData)
+        });
+        if (!response.ok) {
+            throw new Error(`HTTP error! status: ${response.status}`);
+        }
+        // Read stream
+        const reader = response.body.getReader();
+        const decoder = new TextDecoder();
+        let fullResponse = '';
+        let currentConversationId = conversationId;
+        while (true) {
+            const { done, value } = await reader.read();
+            if (done) break;
+            // Decode chunk
+            const chunk = decoder.decode(value);
+            // Parse SSE events
+            const events = parseSSE(chunk);
+            for (const event of events) {
+                switch (event.type) {
+                    case 'conversation_id':
+                        currentConversationId = event.data.conversation_id;
+                        console.log('Conversation ID:', currentConversationId);
+                        break;
+                    case 'status':
+                        console.log('Status:', event.data.message);
+                        // Update UI with status
+                        updateStatusIndicator(event.data.message);
+                        break;
+                    case 'content':
+                        fullResponse += event.data.text;
+                        // Append to UI in real-time
+                        appendToMessageBubble(event.data.text);
+                        break;
+                    case 'metadata':
+                        console.log('Metadata:', event.data);
+                        // Save metadata if needed
+                        break;
+                    case 'done':
+                        console.log('Stream completed');
+                        hideStatusIndicator();
+                        break;
+                    case 'error':
+                        console.error('Stream error:', event.data.error);
+                        showError(event.data.message);
+                        break;
+                }
+            }
+        }
+        return {
+            response: fullResponse,
+            conversationId: currentConversationId
+        };
+    } catch (error) {
+        console.error('Streaming error:', error);
+        throw error;
+    }
+}
+// Parse SSE format
+function parseSSE(text) {
+    const events = [];
+    const lines = text.split('\n\n');
+    for (const line of lines) {
+        if (!line.trim()) continue;
+        const eventMatch = line.match(/event: (.+)/);
+        const dataMatch = line.match(/data: (.+)/);
+        if (eventMatch && dataMatch) {
+            try {
+                events.push({
+                    type: eventMatch[1],
+                    data: JSON.parse(dataMatch[1])
+                });
+            } catch (e) {
+                console.warn('Failed to parse SSE event:', e);
+            }
+        }
+    }
+    return events;
+}
+// ============================================================================
+// REACT EXAMPLE (with hooks)
+// ============================================================================
+import { useState, useRef } from 'react';
+function ChatComponent() {
+    const [messages, setMessages] = useState([]);
+    const [isStreaming, setIsStreaming] = useState(false);
+    const [statusMessage, setStatusMessage] = useState('');
+    const abortControllerRef = useRef(null);
+    const sendStreamingMessage = async (query, conversationId) => {
+        setIsStreaming(true);
+        setStatusMessage('');
+        // Add user message immediately
+        const userMessage = { role: 'user', content: query };
+        setMessages(prev => [...prev, userMessage]);
+        // Create assistant message placeholder
+        const assistantMessageId = Date.now();
+        setMessages(prev => [...prev, {
+            id: assistantMessageId,
+            role: 'assistant',
+            content: ''
+        }]);
+        try {
+            const token = localStorage.getItem('jwt_token');
+            // Create abort controller for cancellation
+            abortControllerRef.current = new AbortController();
+            const response = await fetch('http://localhost:8000/api/v1/chat/stream', {
+                method: 'POST',
+                headers: {
+                    'Content-Type': 'application/json',
+                    'Authorization': `Bearer ${token}`
+                },
+                body: JSON.stringify({ query, conversation_id: conversationId }),
+                signal: abortControllerRef.current.signal
+            });
+            const reader = response.body.getReader();
+            const decoder = new TextDecoder();
+            let newConversationId = conversationId;
+            while (true) {
+                const { done, value } = await reader.read();
+                if (done) break;
+                const chunk = decoder.decode(value);
+                const events = parseSSE(chunk);
+                for (const event of events) {
+                    switch (event.type) {
+                        case 'conversation_id':
+                            newConversationId = event.data.conversation_id;
+                            break;
+                        case 'status':
+                            setStatusMessage(event.data.message);
+                            break;
+                        case 'content':
+                            // Update assistant message
+                            setMessages(prev => prev.map(msg =>
+                                msg.id === assistantMessageId
+                                    ? { ...msg, content: msg.content + event.data.text }
+                                    : msg
+                            ));
+                            break;
+                        case 'done':
+                            setStatusMessage('');
+                            break;
+                        case 'error':
+                            console.error('Error:', event.data);
+                            setStatusMessage('');
+                            break;
+                    }
+                }
+            }
+            return newConversationId;
+        } catch (error) {
+            if (error.name === 'AbortError') {
+                console.log('Stream cancelled by user');
+            } else {
+                console.error('Streaming error:', error);
+            }
+        } finally {
+            setIsStreaming(false);
+            setStatusMessage('');
+            abortControllerRef.current = null;
+        }
+    };
+    const stopStreaming = () => {
+        if (abortControllerRef.current) {
+            abortControllerRef.current.abort();
+        }
+    };
+    return (
+        <div className="chat-container">
+            <div className="messages">
+                {messages.map((msg, idx) => (
+                    <div key={idx} className={`message ${msg.role}`}>
+                        {msg.content}
+                    </div>
+                ))}
+                {statusMessage && (
+                    <div className="status-indicator">
+                        {statusMessage}
+                    </div>
+                )}
+            </div>
+            {isStreaming && (
+                <button onClick={stopStreaming}>
+                    Stop Generating
+                </button>
+            )}
+        </div>
+    );
+}
+// ============================================================================
+// REGENERATE & EDIT EXAMPLES
+// ============================================================================
+// Regenerate last response
+async function regenerateResponse(conversationId) {
+    const token = localStorage.getItem('jwt_token');
+    const response = await fetch(
+        `http://localhost:8000/api/v1/chat/conversation/${conversationId}/regenerate`,
+        {
+            method: 'POST',
+            headers: {
+                'Authorization': `Bearer ${token}`
+            }
+        }
+    );
+    // Process stream same as sendStreamingMessage
+    // ...
+}
+// Edit last message
+async function editLastMessage(conversationId, newContent) {
+    const token = localStorage.getItem('jwt_token');
+    const response = await fetch(
+        `http://localhost:8000/api/v1/chat/conversation/${conversationId}/edit`,
+        {
+            method: 'POST',
+            headers: {
+                'Content-Type': 'application/json',
+                'Authorization': `Bearer ${token}`
+            },
+            body: JSON.stringify({ new_content: newContent })
+        }
+    );
+    // Process stream
+    // ...
+}