Spaces:

stellar413
/

masterllm

Sleeping

App Files Files Community

redhairedshanks1 commited on Dec 17, 2025

Commit

307eb51

1 Parent(s): 3f5051e

Non-streaming (all-in-one), Streaming (live progress)

Browse files

Files changed (1) hide show

api_routes_v2.py +644 -112

api_routes_v2.py CHANGED Viewed

@@ -400,7 +400,7 @@
 # api_routes_v2.py
-from fastapi import APIRouter, HTTPException, UploadFile, File
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
 from typing import Optional, List, Dict, Any, Generator, Callable, Tuple
@@ -409,7 +409,6 @@ import os
 from datetime import datetime
 from urllib.parse import urlparse
 import tempfile
-from pathlib import Path
 # AWS S3 (server-side access, no presigned URLs)
 import boto3
@@ -428,10 +427,13 @@ router = APIRouter(prefix="/api/v2", tags=["MasterLLM API V2 - Enhanced"])
 # CONFIG: S3
 # ========================
-AWS_REGION = os.getenv("AWS_REGION", "us-east-1")
-S3_BUCKET = os.getenv("S3_BUCKET_NAME", "your-bucket")
 S3_PREFIX = os.getenv("S3_PREFIX", "masterllm")
 s3 = boto3.client("s3", region_name=AWS_REGION)
 # ========================
@@ -468,12 +470,9 @@ def _ensure_chat(chat_id: Optional[str]) -> str:
     """
     Ensure a chat exists; if not provided or missing, create a new one.
     """
-    if chat_id:
-        if session_manager.get_session(chat_id):
-            return chat_id
-    # Create a brand new chat
     new_id = session_manager.create_session()
-    # Warm/init
     session_manager.get_session(new_id)
     return new_id
@@ -509,7 +508,7 @@ def _normalize_history_for_api(chat_id: str) -> List[Message]:
 def _assistant_response_payload(
     chat_id: str,
     friendly_response: str,
-    intent_data: Dict[str, Any],
     api_data: Dict[str, Any],
     state: str
 ) -> ChatResponse:
@@ -520,7 +519,7 @@ def _assistant_response_payload(
     return ChatResponse(
         assistant_response=friendly_response,
         api_response=api_data,
-        intent=intent_data,
         chat_id=chat_id,
         state=state,
         history=history
@@ -575,38 +574,649 @@ def download_to_temp_file(file_ref: Optional[str]) -> Tuple[Optional[str], Calla
     return file_ref, noop
 # ========================
-# SMART CHAT (non-streaming)
 # ========================
 @router.post("/chat", response_model=ChatResponse)
 async def smart_chat(request: ChatRequest):
     """
-    Intelligent chat endpoint aligned with app.py:
-    - Classifies intent (casual_chat, question, unclear, pipeline_request, approval, rejection)
-    - Casual/chat/question/unclear: friendly reply
-    - Pipeline requests: generates + proposes plan
-    - Approvals/rejections when in 'pipeline_proposed' state
-    Returns:
-    - assistant_response (LLM-visible reply), plus full history like Gradio UI
     """
     chat_id = _ensure_chat(request.chat_id)
     session = _get_session_or_init(chat_id)
-    # Update file if provided (can be local path or s3://)
     if request.file_path:
         session_manager.update_session(chat_id, {"current_file": request.file_path})
         session = _get_session_or_init(chat_id)
-    # Add user message
     session_manager.add_message(chat_id, "user", request.message)
-    # Classify intent
     intent_data = intent_classifier.classify_intent(request.message)
     current_state = session.get("state", "initial")
     try:
-        # Casual chat
         if intent_data["intent"] == "casual_chat":
             friendly = intent_classifier.get_friendly_response("casual_chat", request.message)
             api_data = {
@@ -621,13 +1231,11 @@ async def smart_chat(request: ChatRequest):
             }
             return _assistant_response_payload(chat_id, friendly, intent_data, api_data, current_state)
-        # Questions
         if intent_data["intent"] == "question":
             friendly = intent_classifier.get_friendly_response("question", request.message)
             api_data = {"type": "informational_response", "message": friendly, "intent_classification": intent_data}
             return _assistant_response_payload(chat_id, friendly, intent_data, api_data, current_state)
-        # Unclear
         if intent_data["intent"] == "unclear":
             friendly = intent_classifier.get_friendly_response("unclear", request.message)
             api_data = {
@@ -642,37 +1250,29 @@ async def smart_chat(request: ChatRequest):
             }
             return _assistant_response_payload(chat_id, friendly, intent_data, api_data, current_state)
-        # Approval
         if intent_data["intent"] == "approval" and current_state == "pipeline_proposed":
             proposed = session.get("proposed_pipeline")
             if not proposed:
                 msg = "No pipeline to approve. Please request a task first."
-                return _assistant_response_payload(
-                    chat_id, msg, intent_data, {"type": "error", "message": msg}, current_state
-                )
             session_manager.update_session(chat_id, {"state": "executing"})
-            friendly = (
-                f"✅ Great! Executing the pipeline: {proposed.get('pipeline_name')}\n\n"
-                f"⏳ Processing... (Use the streaming endpoint for real-time updates)"
-            )
             api_data = {
                 "type": "pipeline_approved",
                 "message": "Pipeline execution started",
                 "pipeline": proposed,
                 "execution_status": "started",
-                "note": "Use /api/v2/chats/{chat_id}/pipeline/execute/stream for real-time progress"
             }
             return _assistant_response_payload(chat_id, friendly, intent_data, api_data, "executing")
-        # Rejection
         if intent_data["intent"] == "rejection" and current_state == "pipeline_proposed":
             session_manager.update_session(chat_id, {"state": "initial", "proposed_pipeline": None})
             friendly = "👍 No problem! The pipeline has been cancelled. What else would you like me to help you with?"
             api_data = {"type": "pipeline_rejected", "message": "Pipeline cancelled by user", "state_reset": True}
             return _assistant_response_payload(chat_id, friendly, intent_data, api_data, "initial")
-        # Pipeline request
         if intent_data["intent"] == "pipeline_request" and intent_data.get("requires_pipeline", False):
             if not session.get("current_file"):
                 friendly = (
@@ -687,7 +1287,6 @@ async def smart_chat(request: ChatRequest):
                 }
                 return _assistant_response_payload(chat_id, friendly, intent_data, api_data, current_state)
-            # Generate pipeline (no need to download file)
             try:
                 pipeline = generate_pipeline(
                     user_input=request.message,
@@ -743,7 +1342,6 @@ async def smart_chat(request: ChatRequest):
                 }
                 return _assistant_response_payload(chat_id, friendly, intent_data, api_data, current_state)
-        # Modify when pipeline_proposed and user describes changes
         if current_state == "pipeline_proposed":
             if len(request.message.strip()) > 5:
                 try:
@@ -775,7 +1373,6 @@ async def smart_chat(request: ChatRequest):
                     friendly = f"```json\n{json.dumps(api_data, indent=2)}\n```"
                     return _assistant_response_payload(chat_id, friendly, intent_data, api_data, "pipeline_proposed")
-            # Waiting for confirmation
             api_data = {
                 "type": "waiting_for_confirmation",
                 "message": "Please type 'approve', 'reject', or describe changes",
@@ -784,7 +1381,6 @@ async def smart_chat(request: ChatRequest):
             friendly = f"```json\n{json.dumps(api_data, indent=2)}\n```"
             return _assistant_response_payload(chat_id, friendly, intent_data, api_data, "pipeline_proposed")
-        # Default nudge
         friendly = (
             "I'm here to help process documents! Please tell me what you'd like to do with your document.\n\n"
             "For example:\n- 'extract text and summarize'\n- 'get tables from pages 2-5'\n- 'translate to Spanish'\n\n"
@@ -799,7 +1395,6 @@ async def smart_chat(request: ChatRequest):
         return _assistant_response_payload(chat_id, friendly, intent_data, api_data, current_state)
     except Exception as e:
-        # Global error handler
         error_msg = f"An unexpected error occurred: {str(e)}"
         history = _normalize_history_for_api(chat_id)
         return ChatResponse(
@@ -812,34 +1407,23 @@ async def smart_chat(request: ChatRequest):
         )
-# ========================
-# STREAMING CHAT (NDJSON)
-# ========================
 @router.post("/chat/stream")
 def smart_chat_stream(request: ChatRequest):
     """
-    Streaming variant of /chat. Emits NDJSON lines mirroring Gradio-like updates.
-    Content-Type: application/x-ndjson
-    Events:
-      - assistant_final: final assistant message
-      - assistant_delta: accumulated content (during execution approval path)
-      - status/info/error: additional signals
     """
     def gen() -> Generator[bytes, None, None]:
         chat_id = _ensure_chat(request.chat_id)
         session = _get_session_or_init(chat_id)
-        # Update file if provided
         if request.file_path:
             session_manager.update_session(chat_id, {"current_file": request.file_path})
             session = _get_session_or_init(chat_id)
-        # Add user message
         session_manager.add_message(chat_id, "user", request.message)
-        # Classify
         intent_data = intent_classifier.classify_intent(request.message)
         current_state = session.get("state", "initial")
@@ -849,14 +1433,12 @@ def smart_chat_stream(request: ChatRequest):
             line = json.dumps(obj, ensure_ascii=False).encode("utf-8") + b"\n"
             return line
-        # Casual / question / unclear at initial
         if intent_data["intent"] in {"casual_chat", "question", "unclear"} and current_state == "initial":
             friendly = intent_classifier.get_friendly_response(intent_data["intent"], request.message)
             session_manager.add_message(chat_id, "assistant", friendly)
             yield emit({"type": "assistant_final", "content": friendly, "intent": intent_data, "history": [m.dict() for m in _normalize_history_for_api(chat_id)]})
             return
-        # Initial: pipeline request or nudge
         if current_state == "initial":
             if not intent_data.get("requires_pipeline", False):
                 friendly = (
@@ -874,7 +1456,6 @@ def smart_chat_stream(request: ChatRequest):
                 yield emit({"type": "assistant_final", "content": friendly, "intent": intent_data, "history": [m.dict() for m in _normalize_history_for_api(chat_id)]})
                 return
-            # Generate pipeline (no need to download file)
             yield emit({"type": "status", "message": "Analyzing request and creating a pipeline..."})
             try:
                 pipeline = generate_pipeline(
@@ -906,7 +1487,6 @@ def smart_chat_stream(request: ChatRequest):
                 yield emit({"type": "assistant_final", "content": friendly, "error": str(e), "history": [m.dict() for m in _normalize_history_for_api(chat_id)]})
                 return
-        # Pipeline proposed: handle approval, rejection, or edit
         if current_state == "pipeline_proposed":
             if intent_data["intent"] == "approval":
                 session_manager.update_session(chat_id, {"state": "executing"})
@@ -922,7 +1502,6 @@ def smart_chat_stream(request: ChatRequest):
                 executor_used = "unknown"
                 accumulated = initial
-                # Download to temp for execution and ensure cleanup
                 file_ref = session.get("current_file")
                 local_path, cleanup = download_to_temp_file(file_ref)
@@ -977,7 +1556,6 @@ def smart_chat_stream(request: ChatRequest):
                             yield emit({"type": "assistant_final", "content": friendly_err, "error": err, "history": [m.dict() for m in _normalize_history_for_api(chat_id)]})
                             return
-                    # Finalize
                     if final_payload:
                         session_manager.update_session(chat_id, {"pipeline_result": final_payload, "state": "initial"})
                         session_manager.save_pipeline_execution(
@@ -1013,7 +1591,6 @@ def smart_chat_stream(request: ChatRequest):
                     yield emit({"type": "assistant_final", "content": friendly_err, "error": str(e), "history": [m.dict() for m in _normalize_history_for_api(chat_id)]})
                     return
                 finally:
-                    # Clear the temp file after using it
                     try:
                         cleanup()
                     except Exception:
@@ -1027,7 +1604,6 @@ def smart_chat_stream(request: ChatRequest):
                 return
             else:
-                # Treat as edit/modify
                 try:
                     original_plan = session.get("proposed_pipeline", {})
                     edit_context = f"Original: {original_plan.get('pipeline_name')}. User wants: {request.message}"
@@ -1048,7 +1624,6 @@ def smart_chat_stream(request: ChatRequest):
                     yield emit({"type": "assistant_final", "content": friendly, "error": str(e), "history": [m.dict() for m in _normalize_history_for_api(chat_id)]})
                     return
-        # Default
         friendly = "Please upload a document and tell me what you'd like me to do (e.g., extract text, summarize, translate)."
         session_manager.add_message(chat_id, "assistant", friendly)
         yield emit({"type": "assistant_final", "content": friendly, "history": [m.dict() for m in _normalize_history_for_api(chat_id)]})
@@ -1057,15 +1632,11 @@ def smart_chat_stream(request: ChatRequest):
 # ========================
-# PIPELINE EXECUTE (non-streaming)
 # ========================
 @router.post("/chats/{chat_id}/pipeline/execute", response_model=ChatResponse)
 async def execute_pipeline_now(chat_id: str):
-    """
-    Execute the currently proposed pipeline and return final result (non-streaming).
-    Downloads the file to a temp path for execution and deletes it afterward.
-    """
     session = session_manager.get_session(chat_id)
     if not session:
         raise HTTPException(status_code=404, detail="Chat not found")
@@ -1087,14 +1658,13 @@ async def execute_pipeline_now(chat_id: str):
         session_manager.update_session(chat_id, {"pipeline_result": result, "state": "initial"})
         friendly = "🎉 Pipeline completed. Ready for your next task!"
         api_data = {"type": "pipeline_completed", "result": result, "pipeline": plan}
-        return _assistant_response_payload(chat_id, friendly, intent={"intent": "pipeline_execute"}, api_data=api_data, state="initial")
     except Exception as e:
         session_manager.update_session(chat_id, {"state": "initial"})
         friendly = f"❌ Pipeline execution failed: {str(e)}"
         api_data = {"type": "error", "error_code": "PIPELINE_EXECUTION_FAILED", "message": str(e)}
-        return _assistant_response_payload(chat_id, friendly, intent={"intent": "pipeline_execute"}, api_data=api_data, state="initial")
     finally:
-        # Clear temp file after use
         try:
             cleanup()
         except Exception:
@@ -1103,10 +1673,6 @@ async def execute_pipeline_now(chat_id: str):
 @router.post("/chats/{chat_id}/pipeline/execute/stream")
 def execute_pipeline_stream_endpoint(chat_id: str):
-    """
-    Stream the execution of the currently proposed pipeline (NDJSON).
-    Downloads the file to a temp path for execution and deletes it afterward.
-    """
     session = session_manager.get_session(chat_id)
     if not session:
         raise HTTPException(status_code=404, detail="Chat not found")
@@ -1132,18 +1698,14 @@ def execute_pipeline_stream_endpoint(chat_id: str):
                 prefer_bedrock=True
             ):
                 yield emit(event)
-            # No final event? Return summary info
             yield emit({"type": "info", "message": "Execution finished."})
         except Exception as e:
             yield emit({"type": "error", "error": str(e)})
         finally:
-            # Clear temp file after use
             try:
                 cleanup()
             except Exception:
                 pass
-            # Reset state to initial after stream ends
             session_manager.update_session(chat_id, {"state": "initial"})
     return StreamingResponse(gen(), media_type="application/x-ndjson")
@@ -1192,52 +1754,22 @@ def get_chat_stats(chat_id: str):
 @router.post("/chats/{chat_id}/messages", response_model=ChatResponse)
 async def send_message_to_chat(chat_id: str, payload: ChatRequest):
-    """
-    Alias for POST /api/v2/chat, bound to a specific chat_id.
-    Returns assistant response plus full history (role/content), same as Gradio.
-    """
     payload.chat_id = chat_id
     return await smart_chat(payload)
 # ========================
-# FILE UPLOAD (to S3, no presigned URLs)
 # ========================
 @router.post("/chats/{chat_id}/upload")
 async def upload_file_to_chat(chat_id: str, file: UploadFile = File(...)):
-    """
-    Streams the uploaded file directly to S3; stores only s3:// URI in chat state.
-    """
     chat_id = _ensure_chat(chat_id)
-    key = f"{S3_PREFIX}/{chat_id}/{file.filename}"
-    config = TransferConfig(multipart_threshold=8 * 1024 * 1024, max_concurrency=4)
-    try:
-        # Stream from request to S3 (no full in-memory read, no local disk)
-        s3.upload_fileobj(
-            Fileobj=file.file,
-            Bucket=S3_BUCKET,
-            Key=key,
-            ExtraArgs={"ContentType": file.content_type or "application/octet-stream"},
-            Config=config
-        )
-    except ClientError as e:
-        code = e.response.get("Error", {}).get("Code", "Unknown")
-        msg = f"S3 upload failed: {code}. Check AWS credentials, permissions (s3:PutObject), region and bucket."
-        raise HTTPException(
-            status_code=403 if code in ("AccessDenied", "InvalidAccessKeyId", "SignatureDoesNotMatch") else 500,
-            detail=msg
-        )
-    s3_uri = f"s3://{S3_BUCKET}/{key}"
-    session_manager.update_session(chat_id, {"current_file": s3_uri, "state": "initial"})
-    session_manager.add_message(chat_id, "system", f"File uploaded to S3: {s3_uri}")
     return {
         "status": "success",
         "message": "File uploaded to S3",
-        "file": {"bucket": S3_BUCKET, "key": key, "s3_uri": s3_uri},
         "chat_id": chat_id,
         "next_action": "💬 Now tell me what you'd like to do with this document"
     }

 # api_routes_v2.py
+from fastapi import APIRouter, HTTPException, UploadFile, File, Request, Form
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
 from typing import Optional, List, Dict, Any, Generator, Callable, Tuple
 from datetime import datetime
 from urllib.parse import urlparse
 import tempfile
 # AWS S3 (server-side access, no presigned URLs)
 import boto3
 # CONFIG: S3
 # ========================
+AWS_REGION = os.getenv("AWS_REGION") or os.getenv("AWS_DEFAULT_REGION") or "us-east-1"
+S3_BUCKET = os.getenv("S3_BUCKET") or os.getenv("S3_BUCKET_NAME")
 S3_PREFIX = os.getenv("S3_PREFIX", "masterllm")
+if not S3_BUCKET:
+    raise RuntimeError("Missing S3 bucket. Set S3_BUCKET (or S3_BUCKET_NAME).")
 s3 = boto3.client("s3", region_name=AWS_REGION)
 # ========================
     """
     Ensure a chat exists; if not provided or missing, create a new one.
     """
+    if chat_id and session_manager.get_session(chat_id):
+        return chat_id
     new_id = session_manager.create_session()
     session_manager.get_session(new_id)
     return new_id
 def _assistant_response_payload(
     chat_id: str,
     friendly_response: str,
+    intent: Dict[str, Any],
     api_data: Dict[str, Any],
     state: str
 ) -> ChatResponse:
     return ChatResponse(
         assistant_response=friendly_response,
         api_response=api_data,
+        intent=intent,
         chat_id=chat_id,
         state=state,
         history=history
     return file_ref, noop
+def upload_stream_to_s3(chat_id: str, file: UploadFile) -> str:
+    """
+    Stream an UploadFile directly to S3, return s3:// URI.
+    Supports optional SSE via env S3_SSE and S3_KMS_KEY_ID.
+    """
+    key = f"{S3_PREFIX}/{chat_id}/{file.filename}"
+    config = TransferConfig(multipart_threshold=8 * 1024 * 1024, max_concurrency=4)
+    extra_args = {"ContentType": file.content_type or "application/octet-stream"}
+    sse = os.getenv("S3_SSE", "").upper()
+    if sse == "AES256":
+        extra_args["ServerSideEncryption"] = "AES256"
+    elif sse == "KMS":
+        extra_args["ServerSideEncryption"] = "aws:kms"
+        kms_key = os.getenv("S3_KMS_KEY_ID")
+        if kms_key:
+            extra_args["SSEKMSKeyId"] = kms_key
+    try:
+        s3.upload_fileobj(
+            Fileobj=file.file,
+            Bucket=S3_BUCKET,
+            Key=key,
+            ExtraArgs=extra_args,
+            Config=config
+        )
+    except ClientError as e:
+        code = e.response.get("Error", {}).get("Code", "Unknown")
+        msg = f"S3 upload failed: {code}. Check AWS credentials, permissions (s3:PutObject), region and bucket."
+        raise HTTPException(
+            status_code=403 if code in ("AccessDenied", "InvalidAccessKeyId", "SignatureDoesNotMatch") else 500,
+            detail=msg
+        )
+    s3_uri = f"s3://{S3_BUCKET}/{key}"
+    session_manager.update_session(chat_id, {"current_file": s3_uri, "state": "initial"})
+    session_manager.add_message(chat_id, "system", f"File uploaded to S3: {s3_uri}")
+    return s3_uri
+# ========================
+# UNIFIED CHAT (non-streaming)
+# ========================
+@router.post("/chat/unified", response_model=ChatResponse)
+async def chat_unified(
+    request: Request,
+    chat_id: Optional[str] = Form(None),
+    message: Optional[str] = Form(None),
+    prefer_bedrock: Optional[bool] = Form(True),
+    file: Optional[UploadFile] = File(None),
+):
+    """
+    One endpoint that behaves like the Gradio chatbot:
+    - Accepts multipart/form-data (file + message) OR application/json.
+    - If a file is included, it uploads to S3 and sets current_file.
+    - Handles casual chat, pipeline request, approve/reject, and edits.
+    - On approval, executes the pipeline (non-stream) and returns the final result.
+    Returns assistant_response + full history (role/content).
+    """
+    # Support JSON payloads too
+    content_type = (request.headers.get("content-type") or "").lower()
+    file_path_from_json = None
+    if "application/json" in content_type:
+        body = await request.json()
+        chat_id = body.get("chat_id") or chat_id
+        message = body.get("message") if "message" in body else message
+        prefer_bedrock = body.get("prefer_bedrock", True) if "prefer_bedrock" in body else prefer_bedrock
+        file_path_from_json = body.get("file_path")
+    chat_id = _ensure_chat(chat_id)
+    session = _get_session_or_init(chat_id)
+    # If JSON included a file_path (e.g., s3://...), attach it
+    if file_path_from_json:
+        session_manager.update_session(chat_id, {"current_file": file_path_from_json})
+        session_manager.add_message(chat_id, "system", f"File attached: {file_path_from_json}")
+        session = _get_session_or_init(chat_id)
+    # If a file is included in the form, upload to S3 and attach it
+    file_info = None
+    if file is not None:
+        s3_uri = upload_stream_to_s3(chat_id, file)
+        file_info = {"bucket": S3_BUCKET, "key": s3_uri.split(f"s3://{S3_BUCKET}/", 1)[1], "s3_uri": s3_uri}
+        session = _get_session_or_init(chat_id)
+    # If no message and only a file was sent, respond with an acknowledgement
+    if (message is None or str(message).strip() == "") and file_info:
+        friendly = "📁 File uploaded successfully. Tell me what you'd like to do with it (e.g., extract text, get tables, summarize)."
+        api_data = {"type": "file_uploaded", "file": file_info, "next_action": "send_instruction"}
+        return _assistant_response_payload(chat_id, friendly, {"intent": "file_uploaded"}, api_data, session.get("state", "initial"))
+    # If still no message, nudge the user
+    if message is None or str(message).strip() == "":
+        friendly = "Please provide a message (e.g., 'extract text', 'get tables', 'summarize')."
+        api_data = {"type": "missing_message"}
+        return _assistant_response_payload(chat_id, friendly, {"intent": "missing_message"}, api_data, session.get("state", "initial"))
+    # Add user message
+    session_manager.add_message(chat_id, "user", message)
+    # Classify intent
+    intent_data = intent_classifier.classify_intent(message)
+    current_state = session.get("state", "initial")
+    try:
+        # Casual chat
+        if intent_data["intent"] == "casual_chat":
+            friendly = intent_classifier.get_friendly_response("casual_chat", message)
+            api_data = {
+                "type": "casual_response",
+                "message": friendly,
+                "intent_classification": intent_data,
+                "suggestions": [
+                    "Upload a document to get started",
+                    "Ask 'what can you do?' to see capabilities",
+                    "Type 'help' for usage instructions"
+                ]
+            }
+            return _assistant_response_payload(chat_id, friendly, intent_data, api_data, current_state)
+        # Questions
+        if intent_data["intent"] == "question":
+            friendly = intent_classifier.get_friendly_response("question", message)
+            api_data = {"type": "informational_response", "message": friendly, "intent_classification": intent_data}
+            return _assistant_response_payload(chat_id, friendly, intent_data, api_data, current_state)
+        # Unclear
+        if intent_data["intent"] == "unclear":
+            friendly = intent_classifier.get_friendly_response("unclear", message)
+            api_data = {
+                "type": "clarification_needed",
+                "message": friendly,
+                "intent_classification": intent_data,
+                "suggestions": [
+                    "Be more specific about what you want to do",
+                    "Use keywords like: extract, summarize, translate, etc.",
+                    "Type 'help' for examples"
+                ]
+            }
+            return _assistant_response_payload(chat_id, friendly, intent_data, api_data, current_state)
+        # Approval (execute now in unified endpoint)
+        if intent_data["intent"] == "approval" and current_state == "pipeline_proposed":
+            proposed = session.get("proposed_pipeline")
+            if not proposed:
+                msg = "No pipeline to approve. Please request a task first."
+                return _assistant_response_payload(chat_id, msg, intent_data, {"type": "error", "message": msg}, current_state)
+            file_ref = session.get("current_file")
+            local_path, cleanup = download_to_temp_file(file_ref)
+            session_manager.update_session(chat_id, {"state": "executing"})
+            try:
+                result = execute_pipeline(
+                    pipeline=proposed,
+                    file_path=local_path,
+                    session_id=chat_id,
+                    prefer_bedrock=bool(prefer_bedrock),
+                )
+                session_manager.update_session(chat_id, {"pipeline_result": result, "state": "initial"})
+                friendly = (
+                    f"🎉 Pipeline completed successfully!\n"
+                    f"✅ All done! What else would you like me to help you with?"
+                )
+                api_data = {
+                    "type": "pipeline_completed",
+                    "result": result,
+                    "pipeline": proposed
+                }
+                return _assistant_response_payload(chat_id, friendly, {"intent": "pipeline_execute"}, api_data, "initial")
+            except Exception as e:
+                session_manager.update_session(chat_id, {"state": "initial"})
+                friendly = f"❌ Pipeline execution failed: {str(e)}"
+                api_data = {"type": "error", "error_code": "PIPELINE_EXECUTION_FAILED", "message": str(e)}
+                return _assistant_response_payload(chat_id, friendly, {"intent": "pipeline_execute"}, api_data, "initial")
+            finally:
+                try:
+                    cleanup()
+                except Exception:
+                    pass
+        # Rejection
+        if intent_data["intent"] == "rejection" and current_state == "pipeline_proposed":
+            session_manager.update_session(chat_id, {"state": "initial", "proposed_pipeline": None})
+            friendly = "👍 No problem! The pipeline has been cancelled. What else would you like me to help you with?"
+            api_data = {"type": "pipeline_rejected", "message": "Pipeline cancelled by user", "state_reset": True}
+            return _assistant_response_payload(chat_id, friendly, intent_data, api_data, "initial")
+        # Pipeline request
+        if intent_data["intent"] == "pipeline_request" and intent_data.get("requires_pipeline", False):
+            if not session.get("current_file"):
+                friendly = (
+                    "📁 Please upload a document first before I can process it!\n\n"
+                    "Once you upload a file, I'll be happy to help you with that task."
+                )
+                api_data = {
+                    "type": "error",
+                    "error_code": "NO_FILE_UPLOADED",
+                    "message": "Document required before pipeline generation",
+                    "action_required": "upload_file"
+                }
+                return _assistant_response_payload(chat_id, friendly, intent_data, api_data, current_state)
+            try:
+                pipeline = generate_pipeline(
+                    user_input=message,
+                    file_path=session.get("current_file"),
+                    prefer_bedrock=bool(prefer_bedrock),
+                )
+                session_manager.update_session(chat_id, {"proposed_pipeline": pipeline, "state": "pipeline_proposed"})
+                pipeline_name = pipeline.get("pipeline_name", "Document Processing")
+                steps_list = pipeline.get("pipeline_steps", [])
+                steps_summary = "\n".join([f"  {i+1}. {step.get('tool', 'Unknown')}" for i, step in enumerate(steps_list)])
+                friendly = (
+                    f"🎯 **Pipeline Created: {pipeline_name}**\n"
+                    f"Here's what I'll do:\n{steps_summary}\n"
+                    f"**Ready to proceed?**\n"
+                    f"- Type 'approve' or 'yes' to execute\n"
+                    f"- Type 'reject' or 'no' to cancel\n"
+                    f"- Describe changes to modify the plan"
+                )
+                api_data = {
+                    "type": "pipeline_generated",
+                    "message": "Pipeline successfully created",
+                    "pipeline": pipeline,
+                    "pipeline_summary": {
+                        "name": pipeline_name,
+                        "total_steps": len(steps_list),
+                        "steps": steps_list,
+                        "generator": pipeline.get("_generator"),
+                        "model": pipeline.get("_model")
+                    },
+                    "required_action": "approval",
+                    "next_steps": {
+                        "approve": "Type 'approve' or 'yes'",
+                        "reject": "Type 'reject' or 'no'",
+                        "modify": "Describe your changes"
+                    }
+                }
+                return _assistant_response_payload(chat_id, friendly, intent_data, api_data, "pipeline_proposed")
+            except Exception as e:
+                friendly = (
+                    f"❌ Oops! I encountered an error while creating the pipeline:\n\n{str(e)}\n\n"
+                    "Please try rephrasing your request or type 'help' for examples."
+                )
+                api_data = {
+                    "type": "error",
+                    "error_code": "PIPELINE_GENERATION_FAILED",
+                    "message": str(e),
+                    "traceback": str(e),
+                }
+                return _assistant_response_payload(chat_id, friendly, intent_data, api_data, current_state)
+        # Modify when pipeline_proposed and user describes changes
+        if current_state == "pipeline_proposed":
+            if len(message.strip()) > 5:
+                try:
+                    original_plan = session.get("proposed_pipeline", {})
+                    edit_context = f"Original: {original_plan.get('pipeline_name')}. User wants: {message}"
+                    new_pipeline = generate_pipeline(
+                        user_input=edit_context,
+                        file_path=session.get("current_file"),
+                        prefer_bedrock=bool(prefer_bedrock)
+                    )
+                    session_manager.update_session(chat_id, {"proposed_pipeline": new_pipeline, "state": "pipeline_proposed"})
+                    formatted = format_pipeline_for_display(new_pipeline)
+                    friendly = formatted + f"\n\n```json\n{json.dumps(new_pipeline, indent=2)}\n```"
+                    api_data = {
+                        "type": "pipeline_modified",
+                        "message": "Pipeline updated based on user's edits",
+                        "pipeline": new_pipeline
+                    }
+                    return _assistant_response_payload(chat_id, friendly, intent_data, api_data, "pipeline_proposed")
+                except Exception as e:
+                    api_data = {
+                        "type": "edit_failed",
+                        "error": str(e),
+                        "message": "Could not modify the plan",
+                        "action": "Try 'approve' to run as-is, or 'reject' to start over"
+                    }
+                    friendly = f"```json\n{json.dumps(api_data, indent=2)}\n```"
+                    return _assistant_response_payload(chat_id, friendly, intent_data, api_data, "pipeline_proposed")
+            # Waiting for confirmation
+            api_data = {
+                "type": "waiting_for_confirmation",
+                "message": "Please type 'approve', 'reject', or describe changes",
+                "hint": "You can also say 'edit' for modification hints"
+            }
+            friendly = f"```json\n{json.dumps(api_data, indent=2)}\n```"
+            return _assistant_response_payload(chat_id, friendly, intent_data, api_data, "pipeline_proposed")
+        # Default nudge
+        friendly = (
+            "I'm here to help process documents! Please tell me what you'd like to do with your document.\n\n"
+            "For example:\n- 'extract text and summarize'\n- 'get tables from pages 2-5'\n- 'translate to Spanish'\n\n"
+            "Type 'help' to see all capabilities!"
+        )
+        api_data = {
+            "type": "unclear_intent",
+            "message": "Could not determine appropriate action",
+            "intent_classification": intent_data,
+            "current_state": current_state
+        }
+        return _assistant_response_payload(chat_id, friendly, intent_data, api_data, current_state)
+    except Exception as e:
+        error_msg = f"An unexpected error occurred: {str(e)}"
+        history = _normalize_history_for_api(chat_id)
+        return ChatResponse(
+            assistant_response=error_msg,
+            api_response={"type": "unexpected_error", "error": str(e)},
+            intent=intent_data if isinstance(intent_data, dict) else {"intent": "unknown"},
+            chat_id=chat_id,
+            state=current_state if isinstance(current_state, str) else "initial",
+            history=history
+        )
+# ========================
+# UNIFIED CHAT (streaming, NDJSON)
+# ========================
+@router.post("/chat/unified/stream")
+def chat_unified_stream(
+    request: Request,
+    chat_id: Optional[str] = Form(None),
+    message: Optional[str] = Form(None),
+    prefer_bedrock: Optional[bool] = Form(True),
+    file: Optional[UploadFile] = File(None),
+):
+    """
+    Unified streaming endpoint (NDJSON), same behavior as Gradio:
+    - Accepts multipart/form-data (file + message) OR JSON.
+    - Uploads file if included.
+    - On approval, streams execution progress and final result.
+    """
+    async def prepare():
+        # Parse JSON if needed
+        content_type = (request.headers.get("content-type") or "").lower()
+        file_path_from_json = None
+        _chat_id, _message, _prefer_bedrock, _file = chat_id, message, prefer_bedrock, file
+        if "application/json" in content_type:
+            body = await request.json()
+            _chat_id = body.get("chat_id") or _chat_id
+            _message = body.get("message") if "message" in body else _message
+            _prefer_bedrock = body.get("prefer_bedrock", True) if "prefer_bedrock" in body else _prefer_bedrock
+            file_path_from_json = body.get("file_path")
+        _chat_id = _ensure_chat(_chat_id)
+        _session = _get_session_or_init(_chat_id)
+        # Attach JSON file path if provided
+        if file_path_from_json:
+            session_manager.update_session(_chat_id, {"current_file": file_path_from_json})
+            session_manager.add_message(_chat_id, "system", f"File attached: {file_path_from_json}")
+            _session = _get_session_or_init(_chat_id)
+        # Upload file if provided (form)
+        uploaded_file_info = None
+        if _file is not None:
+            s3_uri = upload_stream_to_s3(_chat_id, _file)
+            uploaded_file_info = {"bucket": S3_BUCKET, "key": s3_uri.split(f"s3://{S3_BUCKET}/", 1)[1], "s3_uri": s3_uri}
+            _session = _get_session_or_init(_chat_id)
+        return _chat_id, _message, _prefer_bedrock, uploaded_file_info
+    def make_stream(chat_id_local: str, msg: Optional[str], prefer_bedrock_local: bool, uploaded_file_info: Optional[Dict[str, Any]]):
+        def emit(obj: Dict[str, Any]):
+            obj.setdefault("chat_id", chat_id_local)
+            obj.setdefault("state", session_manager.get_session(chat_id_local).get("state", "initial"))
+            line = json.dumps(obj, ensure_ascii=False).encode("utf-8") + b"\n"
+            return line
+        def gen() -> Generator[bytes, None, None]:
+            session = _get_session_or_init(chat_id_local)
+            # If only a file was uploaded and no message, acknowledge
+            if (msg is None or str(msg).strip() == "") and uploaded_file_info:
+                friendly = "📁 File uploaded successfully. Tell me what you'd like to do with it (e.g., extract text, get tables, summarize)."
+                session_manager.add_message(chat_id_local, "assistant", friendly)
+                yield emit({"type": "assistant_final", "content": friendly, "file": uploaded_file_info, "history": [m.dict() for m in _normalize_history_for_api(chat_id_local)]})
+                return
+            # If no message at all, nudge
+            if msg is None or str(msg).strip() == "":
+                friendly = "Please provide a message (e.g., 'extract text', 'get tables', 'summarize')."
+                session_manager.add_message(chat_id_local, "assistant", friendly)
+                yield emit({"type": "assistant_final", "content": friendly, "history": [m.dict() for m in _normalize_history_for_api(chat_id_local)]})
+                return
+            # Add user message
+            session_manager.add_message(chat_id_local, "user", msg)
+            # Classify
+            intent_data = intent_classifier.classify_intent(msg)
+            current_state = session.get("state", "initial")
+            # Casual / question / unclear at initial
+            if intent_data["intent"] in {"casual_chat", "question", "unclear"} and current_state == "initial":
+                friendly = intent_classifier.get_friendly_response(intent_data["intent"], msg)
+                session_manager.add_message(chat_id_local, "assistant", friendly)
+                yield emit({"type": "assistant_final", "content": friendly, "intent": intent_data, "history": [m.dict() for m in _normalize_history_for_api(chat_id_local)]})
+                return
+            # Initial: pipeline request or nudge
+            if current_state == "initial":
+                if not intent_data.get("requires_pipeline", False):
+                    friendly = (
+                        "I'm here to help process documents! Please tell me what you'd like to do with your document.\n\n"
+                        "For example:\n- 'extract text and summarize'\n- 'get tables from pages 2-5'\n- 'translate to Spanish'\n\n"
+                        "Type 'help' to see all capabilities!"
+                    )
+                    session_manager.add_message(chat_id_local, "assistant", friendly)
+                    yield emit({"type": "assistant_final", "content": friendly, "intent": intent_data, "history": [m.dict() for m in _normalize_history_for_api(chat_id_local)]})
+                    return
+                if not session.get("current_file"):
+                    friendly = "📁 Please upload a document first before I can process it!\n\nClick 'Upload Document' to get started."
+                    session_manager.add_message(chat_id_local, "assistant", friendly)
+                    yield emit({"type": "assistant_final", "content": friendly, "intent": intent_data, "history": [m.dict() for m in _normalize_history_for_api(chat_id_local)]})
+                    return
+                # Generate pipeline (no need to download file)
+                yield emit({"type": "status", "message": "Analyzing request and creating a pipeline..."})
+                try:
+                    pipeline = generate_pipeline(
+                        user_input=msg,
+                        file_path=session.get("current_file"),
+                        prefer_bedrock=bool(prefer_bedrock_local)
+                    )
+                    session_manager.update_session(chat_id_local, {"proposed_pipeline": pipeline, "state": "pipeline_proposed"})
+                    pipeline_name = pipeline.get("pipeline_name", "Document Processing")
+                    steps_list = pipeline.get("pipeline_steps", [])
+                    steps_summary = "\n".join([f"  {i+1}. {step.get('tool', 'Unknown')}" for i, step in enumerate(steps_list)])
+                    friendly = (
+                        f"🎯 **Pipeline Created: {pipeline_name}**\n"
+                        f"Here's what I'll do:\n{steps_summary}\n"
+                        f"**Ready to proceed?**\n"
+                        f"- Type 'approve' or 'yes' to execute\n"
+                        f"- Type 'reject' or 'no' to cancel\n"
+                        f"- Describe changes to modify the plan"
+                    )
+                    session_manager.add_message(chat_id_local, "assistant", friendly)
+                    yield emit({"type": "assistant_final", "content": friendly, "pipeline": pipeline, "history": [m.dict() for m in _normalize_history_for_api(chat_id_local)]})
+                    return
+                except Exception as e:
+                    friendly = f"❌ Error generating pipeline: {str(e)}"
+                    session_manager.add_message(chat_id_local, "assistant", friendly)
+                    yield emit({"type": "assistant_final", "content": friendly, "error": str(e), "history": [m.dict() for m in _normalize_history_for_api(chat_id_local)]})
+                    return
+            # Pipeline proposed: handle approval, rejection, or edit
+            if current_state == "pipeline_proposed":
+                if intent_data["intent"] == "approval":
+                    session_manager.update_session(chat_id_local, {"state": "executing"})
+                    plan = session.get("proposed_pipeline", {})
+                    initial = (
+                        f"✅ Approved! Starting execution of: **{plan.get('pipeline_name', 'pipeline')}**\n\n"
+                        f"🚀 Processing, please wait...\n_(Using {plan.get('_generator', 'AI')} - {plan.get('_model', 'model')})_"
+                    )
+                    yield emit({"type": "assistant_delta", "content": initial})
+                    steps_completed = []
+                    final_payload = None
+                    executor_used = "unknown"
+                    accumulated = initial
+                    file_ref = session.get("current_file")
+                    local_path, cleanup = download_to_temp_file(file_ref)
+                    try:
+                        for event in execute_pipeline_streaming(
+                            pipeline=plan,
+                            file_path=local_path,
+                            session_id=chat_id_local,
+                            prefer_bedrock=bool(prefer_bedrock_local)
+                        ):
+                            etype = event.get("type")
+                            if etype == "info":
+                                msg2 = f"ℹ️ {event.get('message')} _(Executor: {event.get('executor', 'unknown')})_"
+                                accumulated += "\n\n" + msg2
+                                yield emit({"type": "assistant_delta", "content": accumulated})
+                            elif etype == "step":
+                                step_num = event.get("step", 0)
+                                tool_name = event.get("tool", "processing")
+                                status = event.get("status", "running")
+                                if status == "completed" and "observation" in event:
+                                    obs_preview = str(event.get("observation"))[:80]
+                                    step_msg = f"✅ Step {step_num}: {tool_name} - Completed!\n   Preview: {obs_preview}..."
+                                elif status == "executing":
+                                    step_msg = f"⏳ Step {step_num}: {tool_name} - Processing..."
+                                else:
+                                    step_msg = f"📍 Step {step_num}: {tool_name}"
+                                steps_completed.append({
+                                    "step": step_num,
+                                    "tool": tool_name,
+                                    "status": status,
+                                    "executor": event.get("executor", "unknown"),
+                                    "observation": event.get("observation"),
+                                    "input": event.get("input"),
+                                })
+                                executor_used = event.get("executor", executor_used)
+                                accumulated += "\n\n" + step_msg
+                                yield emit({"type": "assistant_delta", "content": accumulated})
+                            elif etype == "final":
+                                final_payload = event.get("data")
+                                executor_used = event.get("executor", executor_used)
+                            elif etype == "error":
+                                err = event.get("error", "Unknown error")
+                                friendly_err = f"❌ Pipeline Failed\n\nError: {err}\n\nCompleted {len(steps_completed)} step(s) before failure."
+                                session_manager.update_session(chat_id_local, {"state": "initial"})
+                                session_manager.add_message(chat_id_local, "assistant", friendly_err)
+                                yield emit({"type": "assistant_final", "content": friendly_err, "error": err, "history": [m.dict() for m in _normalize_history_for_api(chat_id_local)]})
+                                return
+                        # Finalize
+                        if final_payload:
+                            session_manager.update_session(chat_id_local, {"pipeline_result": final_payload, "state": "initial"})
+                            session_manager.save_pipeline_execution(
+                                session_id=chat_id_local,
+                                pipeline=plan,
+                                result=final_payload,
+                                file_path=file_ref,
+                                executor=executor_used
+                            )
+                            success_count = len([s for s in steps_completed if s.get("status") == "completed"])
+                            friendly_final = (
+                                f"🎉 Pipeline Completed Successfully!\n"
+                                f"- Pipeline: {plan.get('pipeline_name', 'Document Processing')}\n"
+                                f"- Total Steps: {len(steps_completed)}\n"
+                                f"- Successful: {success_count}\n"
+                                f"- Executor: {executor_used}\n"
+                                f"✅ All done! What else would you like me to help you with?"
+                            )
+                            session_manager.add_message(chat_id_local, "assistant", friendly_final)
+                            yield emit({"type": "assistant_final", "content": friendly_final, "result": final_payload, "history": [m.dict() for m in _normalize_history_for_api(chat_id_local)]})
+                            return
+                        else:
+                            done = f"✅ Pipeline Completed! Executed {len(steps_completed)} steps using {executor_used}."
+                            session_manager.update_session(chat_id_local, {"state": "initial"})
+                            session_manager.add_message(chat_id_local, "assistant", done)
+                            yield emit({"type": "assistant_final", "content": done, "history": [m.dict() for m in _normalize_history_for_api(chat_id_local)]})
+                            return
+                    except Exception as e:
+                        friendly_err = f"❌ Pipeline Execution Failed\n\nError: {str(e)}\n\nCompleted {len(steps_completed)} step(s) before failure."
+                        session_manager.update_session(chat_id_local, {"state": "initial"})
+                        session_manager.add_message(chat_id_local, "assistant", friendly_err)
+                        yield emit({"type": "assistant_final", "content": friendly_err, "error": str(e), "history": [m.dict() for m in _normalize_history_for_api(chat_id_local)]})
+                        return
+                    finally:
+                        try:
+                            cleanup()
+                        except Exception:
+                            pass
+                elif intent_data["intent"] == "rejection":
+                    session_manager.update_session(chat_id_local, {"state": "initial", "proposed_pipeline": None})
+                    friendly = "👍 No problem! Pipeline cancelled. What else would you like me to help you with?"
+                    session_manager.add_message(chat_id_local, "assistant", friendly)
+                    yield emit({"type": "assistant_final", "content": friendly, "history": [m.dict() for m in _normalize_history_for_api(chat_id_local)]})
+                    return
+                else:
+                    # Treat as edit/modify
+                    try:
+                        original_plan = session.get("proposed_pipeline", {})
+                        edit_context = f"Original: {original_plan.get('pipeline_name')}. User wants: {msg}"
+                        new_pipeline = generate_pipeline(
+                            user_input=edit_context,
+                            file_path=session.get("current_file"),
+                            prefer_bedrock=bool(prefer_bedrock_local)
+                        )
+                        session_manager.update_session(chat_id_local, {"proposed_pipeline": new_pipeline, "state": "pipeline_proposed"})
+                        formatted = format_pipeline_for_display(new_pipeline)
+                        friendly = formatted + f"\n\n```json\n{json.dumps(new_pipeline, indent=2)}\n```"
+                        session_manager.add_message(chat_id_local, "assistant", friendly)
+                        yield emit({"type": "assistant_final", "content": friendly, "pipeline": new_pipeline, "history": [m.dict() for m in _normalize_history_for_api(chat_id_local)]})
+                        return
+                    except Exception as e:
+                        friendly = f"❌ Edit failed: {str(e)}"
+                        session_manager.add_message(chat_id_local, "assistant", friendly)
+                        yield emit({"type": "assistant_final", "content": friendly, "error": str(e), "history": [m.dict() for m in _normalize_history_for_api(chat_id_local)]})
+                        return
+            # Default
+            friendly = "Please upload a document and tell me what you'd like me to do (e.g., extract text, summarize, translate)."
+            session_manager.add_message(chat_id_local, "assistant", friendly)
+            yield emit({"type": "assistant_final", "content": friendly, "history": [m.dict() for m in _normalize_history_for_api(chat_id_local)]})
+        return gen()
+    async def stream_wrapper():
+        chat_id_local, msg_local, prefer_local, uploaded_info = await prepare()
+        return StreamingResponse(make_stream(chat_id_local, msg_local, bool(prefer_local), uploaded_info), media_type="application/x-ndjson")
+    # Note: for FastAPI sync path, we return the awaitable response
+    return stream_wrapper()
 # ========================
+# LEGACY SMART CHAT (kept for compatibility)
 # ========================
 @router.post("/chat", response_model=ChatResponse)
 async def smart_chat(request: ChatRequest):
     """
+    Kept for compatibility with existing clients (non-stream).
+    For a single all-in-one endpoint, use /api/v2/chat/unified.
     """
     chat_id = _ensure_chat(request.chat_id)
     session = _get_session_or_init(chat_id)
     if request.file_path:
         session_manager.update_session(chat_id, {"current_file": request.file_path})
         session = _get_session_or_init(chat_id)
     session_manager.add_message(chat_id, "user", request.message)
     intent_data = intent_classifier.classify_intent(request.message)
     current_state = session.get("state", "initial")
     try:
         if intent_data["intent"] == "casual_chat":
             friendly = intent_classifier.get_friendly_response("casual_chat", request.message)
             api_data = {
             }
             return _assistant_response_payload(chat_id, friendly, intent_data, api_data, current_state)
         if intent_data["intent"] == "question":
             friendly = intent_classifier.get_friendly_response("question", request.message)
             api_data = {"type": "informational_response", "message": friendly, "intent_classification": intent_data}
             return _assistant_response_payload(chat_id, friendly, intent_data, api_data, current_state)
         if intent_data["intent"] == "unclear":
             friendly = intent_classifier.get_friendly_response("unclear", request.message)
             api_data = {
             }
             return _assistant_response_payload(chat_id, friendly, intent_data, api_data, current_state)
         if intent_data["intent"] == "approval" and current_state == "pipeline_proposed":
             proposed = session.get("proposed_pipeline")
             if not proposed:
                 msg = "No pipeline to approve. Please request a task first."
+                return _assistant_response_payload(chat_id, msg, intent_data, {"type": "error", "message": msg}, current_state)
             session_manager.update_session(chat_id, {"state": "executing"})
+            friendly = f"✅ Great! Executing the pipeline: {proposed.get('pipeline_name')}\n\n⏳ Processing... (Use the streaming endpoint for real-time updates)"
             api_data = {
                 "type": "pipeline_approved",
                 "message": "Pipeline execution started",
                 "pipeline": proposed,
                 "execution_status": "started",
+                "note": "Use /api/v2/chat/unified/stream for real-time progress"
             }
             return _assistant_response_payload(chat_id, friendly, intent_data, api_data, "executing")
         if intent_data["intent"] == "rejection" and current_state == "pipeline_proposed":
             session_manager.update_session(chat_id, {"state": "initial", "proposed_pipeline": None})
             friendly = "👍 No problem! The pipeline has been cancelled. What else would you like me to help you with?"
             api_data = {"type": "pipeline_rejected", "message": "Pipeline cancelled by user", "state_reset": True}
             return _assistant_response_payload(chat_id, friendly, intent_data, api_data, "initial")
         if intent_data["intent"] == "pipeline_request" and intent_data.get("requires_pipeline", False):
             if not session.get("current_file"):
                 friendly = (
                 }
                 return _assistant_response_payload(chat_id, friendly, intent_data, api_data, current_state)
             try:
                 pipeline = generate_pipeline(
                     user_input=request.message,
                 }
                 return _assistant_response_payload(chat_id, friendly, intent_data, api_data, current_state)
         if current_state == "pipeline_proposed":
             if len(request.message.strip()) > 5:
                 try:
                     friendly = f"```json\n{json.dumps(api_data, indent=2)}\n```"
                     return _assistant_response_payload(chat_id, friendly, intent_data, api_data, "pipeline_proposed")
             api_data = {
                 "type": "waiting_for_confirmation",
                 "message": "Please type 'approve', 'reject', or describe changes",
             friendly = f"```json\n{json.dumps(api_data, indent=2)}\n```"
             return _assistant_response_payload(chat_id, friendly, intent_data, api_data, "pipeline_proposed")
         friendly = (
             "I'm here to help process documents! Please tell me what you'd like to do with your document.\n\n"
             "For example:\n- 'extract text and summarize'\n- 'get tables from pages 2-5'\n- 'translate to Spanish'\n\n"
         return _assistant_response_payload(chat_id, friendly, intent_data, api_data, current_state)
     except Exception as e:
         error_msg = f"An unexpected error occurred: {str(e)}"
         history = _normalize_history_for_api(chat_id)
         return ChatResponse(
         )
 @router.post("/chat/stream")
 def smart_chat_stream(request: ChatRequest):
     """
+    Kept for compatibility (streaming NDJSON).
+    For the all-in-one streaming flow, use /api/v2/chat/unified/stream.
     """
     def gen() -> Generator[bytes, None, None]:
         chat_id = _ensure_chat(request.chat_id)
         session = _get_session_or_init(chat_id)
         if request.file_path:
             session_manager.update_session(chat_id, {"current_file": request.file_path})
             session = _get_session_or_init(chat_id)
         session_manager.add_message(chat_id, "user", request.message)
         intent_data = intent_classifier.classify_intent(request.message)
         current_state = session.get("state", "initial")
             line = json.dumps(obj, ensure_ascii=False).encode("utf-8") + b"\n"
             return line
         if intent_data["intent"] in {"casual_chat", "question", "unclear"} and current_state == "initial":
             friendly = intent_classifier.get_friendly_response(intent_data["intent"], request.message)
             session_manager.add_message(chat_id, "assistant", friendly)
             yield emit({"type": "assistant_final", "content": friendly, "intent": intent_data, "history": [m.dict() for m in _normalize_history_for_api(chat_id)]})
             return
         if current_state == "initial":
             if not intent_data.get("requires_pipeline", False):
                 friendly = (
                 yield emit({"type": "assistant_final", "content": friendly, "intent": intent_data, "history": [m.dict() for m in _normalize_history_for_api(chat_id)]})
                 return
             yield emit({"type": "status", "message": "Analyzing request and creating a pipeline..."})
             try:
                 pipeline = generate_pipeline(
                 yield emit({"type": "assistant_final", "content": friendly, "error": str(e), "history": [m.dict() for m in _normalize_history_for_api(chat_id)]})
                 return
         if current_state == "pipeline_proposed":
             if intent_data["intent"] == "approval":
                 session_manager.update_session(chat_id, {"state": "executing"})
                 executor_used = "unknown"
                 accumulated = initial
                 file_ref = session.get("current_file")
                 local_path, cleanup = download_to_temp_file(file_ref)
                             yield emit({"type": "assistant_final", "content": friendly_err, "error": err, "history": [m.dict() for m in _normalize_history_for_api(chat_id)]})
                             return
                     if final_payload:
                         session_manager.update_session(chat_id, {"pipeline_result": final_payload, "state": "initial"})
                         session_manager.save_pipeline_execution(
                     yield emit({"type": "assistant_final", "content": friendly_err, "error": str(e), "history": [m.dict() for m in _normalize_history_for_api(chat_id)]})
                     return
                 finally:
                     try:
                         cleanup()
                     except Exception:
                 return
             else:
                 try:
                     original_plan = session.get("proposed_pipeline", {})
                     edit_context = f"Original: {original_plan.get('pipeline_name')}. User wants: {request.message}"
                     yield emit({"type": "assistant_final", "content": friendly, "error": str(e), "history": [m.dict() for m in _normalize_history_for_api(chat_id)]})
                     return
         friendly = "Please upload a document and tell me what you'd like me to do (e.g., extract text, summarize, translate)."
         session_manager.add_message(chat_id, "assistant", friendly)
         yield emit({"type": "assistant_final", "content": friendly, "history": [m.dict() for m in _normalize_history_for_api(chat_id)]})
 # ========================
+# PIPELINE EXECUTION ENDPOINTS (keep for compatibility)
 # ========================
 @router.post("/chats/{chat_id}/pipeline/execute", response_model=ChatResponse)
 async def execute_pipeline_now(chat_id: str):
     session = session_manager.get_session(chat_id)
     if not session:
         raise HTTPException(status_code=404, detail="Chat not found")
         session_manager.update_session(chat_id, {"pipeline_result": result, "state": "initial"})
         friendly = "🎉 Pipeline completed. Ready for your next task!"
         api_data = {"type": "pipeline_completed", "result": result, "pipeline": plan}
+        return _assistant_response_payload(chat_id, friendly, {"intent": "pipeline_execute"}, api_data, "initial")
     except Exception as e:
         session_manager.update_session(chat_id, {"state": "initial"})
         friendly = f"❌ Pipeline execution failed: {str(e)}"
         api_data = {"type": "error", "error_code": "PIPELINE_EXECUTION_FAILED", "message": str(e)}
+        return _assistant_response_payload(chat_id, friendly, {"intent": "pipeline_execute"}, api_data, "initial")
     finally:
         try:
             cleanup()
         except Exception:
 @router.post("/chats/{chat_id}/pipeline/execute/stream")
 def execute_pipeline_stream_endpoint(chat_id: str):
     session = session_manager.get_session(chat_id)
     if not session:
         raise HTTPException(status_code=404, detail="Chat not found")
                 prefer_bedrock=True
             ):
                 yield emit(event)
             yield emit({"type": "info", "message": "Execution finished."})
         except Exception as e:
             yield emit({"type": "error", "error": str(e)})
         finally:
             try:
                 cleanup()
             except Exception:
                 pass
             session_manager.update_session(chat_id, {"state": "initial"})
     return StreamingResponse(gen(), media_type="application/x-ndjson")
 @router.post("/chats/{chat_id}/messages", response_model=ChatResponse)
 async def send_message_to_chat(chat_id: str, payload: ChatRequest):
     payload.chat_id = chat_id
     return await smart_chat(payload)
 # ========================
+# FILE UPLOAD (to S3, no presigned URLs) — still available
 # ========================
 @router.post("/chats/{chat_id}/upload")
 async def upload_file_to_chat(chat_id: str, file: UploadFile = File(...)):
     chat_id = _ensure_chat(chat_id)
+    s3_uri = upload_stream_to_s3(chat_id, file)
     return {
         "status": "success",
         "message": "File uploaded to S3",
+        "file": {"bucket": S3_BUCKET, "key": s3_uri.split(f"s3://{S3_BUCKET}/", 1)[1], "s3_uri": s3_uri},
         "chat_id": chat_id,
         "next_action": "💬 Now tell me what you'd like to do with this document"
     }