Spaces:

stellar413
/

masterllm

Sleeping

App Files Files Community

ganesh-vilje commited on Dec 23, 2025

Commit

aaaba76

1 Parent(s): 6648afb

feat: Unified pipeline lifecycle - single S3 file per pipeline - Pipeline now stored in S3 with pipeline_id at creation - Proposals stored in S3, only reference in MongoDB - Results appended to same S3 file on completion - Workflow save uses pipeline_id, works at any stage - pipelines_history now in session API response

Browse files

Files changed (5) hide show

api_routes_v2.py +70 -1
app.py +112 -33
services/pipeline_manager.py +76 -0
services/schemas.py +2 -0
services/workflow_manager.py +8 -2

api_routes_v2.py CHANGED Viewed

@@ -2553,13 +2553,19 @@ def get_session_complete_history(session_id: str, limit: int = 50):
         messages = session_manager.get_messages(session_id, limit=limit, include_content=True)
         return {
             "session_id": session_id,
             "chat_name": session.get("chat_name"),
             "created_at": session.get("created_at"),
             "last_activity": session.get("last_activity"),
             "state": session.get("state"),
-            "messages": messages
         }
     except HTTPException:
         raise
@@ -2584,6 +2590,69 @@ def get_session_pipeline_executions(session_id: str, limit: int = 50):
         raise HTTPException(status_code=500, detail=f"Failed to get pipeline executions: {str(e)}")
 # ========================
 # HEALTH
 # ========================

         messages = session_manager.get_messages(session_id, limit=limit, include_content=True)
+        # V3: Get pipeline history
+        from services.pipeline_manager import get_pipeline_manager
+        pipeline_mgr = get_pipeline_manager()
+        pipelines_history = pipeline_mgr.get_session_pipelines(session_id, limit=50)
         return {
             "session_id": session_id,
             "chat_name": session.get("chat_name"),
             "created_at": session.get("created_at"),
             "last_activity": session.get("last_activity"),
             "state": session.get("state"),
+            "messages": messages,
+            "pipelines_history": pipelines_history  # NEW: Pipeline history
         }
     except HTTPException:
         raise
         raise HTTPException(status_code=500, detail=f"Failed to get pipeline executions: {str(e)}")
+# ========================
+# V3 WORKFLOW SAVE API
+# ========================
+@router.post("/workflows/save")
+def save_workflow_from_pipeline(data: dict):
+    """
+    Save workflow from pipeline (works at any stage: proposed, executing, completed)
+    Uses pipeline_id instead of execution_id
+    """
+    pipeline_id = data.get("pipeline_id")
+    workflow_name = data.get("workflow_name")  # Optional override
+    if not pipeline_id:
+        raise HTTPException(status_code=400, detail="pipeline_id is required")
+    try:
+        from services.pipeline_manager import get_pipeline_manager
+        from services.workflow_manager import get_workflow_manager
+        pipeline_mgr = get_pipeline_manager()
+        workflow_mgr = get_workflow_manager()
+        # Get pipeline metadata
+        pipeline_metadata = pipeline_mgr.get_pipeline_metadata(pipeline_id)
+        if not pipeline_metadata:
+            raise HTTPException(status_code=404, detail="Pipeline not found")
+        # Download full pipeline document from S3
+        pipeline_doc = pipeline_mgr.get_full_pipeline_document(pipeline_id)
+        if not pipeline_doc:
+            raise HTTPException(status_code=404, detail="Pipeline document not found in S3")
+        # Extract definition
+        pipeline_def = pipeline_doc.get("definition", {})
+        # Override name if provided
+        if workflow_name:
+            pipeline_def["pipeline_name"] = workflow_name
+        # Save as workflow with source tracking
+        workflow_id = workflow_mgr.save_workflow(
+            session_id=pipeline_doc.get("session_id", "unknown"),
+            pipeline_definition=pipeline_def,
+            user_message=f"Saved from pipeline {pipeline_id}",
+            source_pipeline_id=pipeline_id,
+            pipeline_status=pipeline_doc.get("status", "unknown")
+        )
+        return {
+            "workflow_id": workflow_id,
+            "pipeline_name": pipeline_def.get("pipeline_name", "Untitled"),
+            "source_pipeline_id": pipeline_id,
+            "pipeline_status": pipeline_doc.get("status"),
+            "message": "Workflow saved successfully"
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to save workflow: {str(e)}")
 # ========================
 # HEALTH
 # ========================

app.py CHANGED Viewed

@@ -978,29 +978,32 @@ def chatbot_response_streaming(message: str, history: List, session_id: str, fil
             workflow_mgr = get_workflow_manager()
             s3 = get_s3_manager()
-            # Get pipeline from pipelines collection
-            pipeline_record = pipeline_mgr.get_pipeline(pending_workflow["execution_id"])
-            if pipeline_record and pipeline_record.get("pipeline_definition_s3_key"):
-                # Download pipeline definition from S3
-                pipeline_def = s3.download_json(pipeline_record["pipeline_definition_s3_key"], add_prefix=False)
-                # Save as workflow
                 workflow_id = workflow_mgr.save_workflow(
                     session_id=session_id,
                     pipeline_definition=pipeline_def,
-                    user_message=message
                 )
                 # Clear pending
                 session_manager.update_session(session_id, {"pending_workflow_save": None})
-                response = f"✅ **Workflow Saved!**\n\nWorkflow ID: `{workflow_id}`\nName: {pending_workflow['pipeline_name']}\n\nYou can now reuse this workflow anytime!\n\nWhat else can I help you with?"
                 session_manager.add_message(session_id, "assistant", response)
                 yield format_chat_history(history, message, response)
                 return
             else:
-                # Pipeline record not found
                 session_manager.update_session(session_id, {"pending_workflow_save": None})
                 response = "⚠️ Sorry, I couldn't find the pipeline to save. The workflow save request has expired.\n\nWhat else can I help you with?"
                 session_manager.add_message(session_id, "assistant", response)
@@ -1043,9 +1046,44 @@ def chatbot_response_streaming(message: str, history: List, session_id: str, fil
                 prefer_bedrock=True
             )
-            # Save proposed pipeline to session
             session_manager.update_session(session_id, {
-                "proposed_pipeline": pipeline,
                 "state": ConversationState.PIPELINE_PROPOSED
             })
@@ -1088,25 +1126,41 @@ Here's what I'll do:
         if "approve" in user_input or "yes" in user_input:
             session_manager.update_session(session_id, {"state": ConversationState.EXECUTING})
-            plan = session.get("proposed_pipeline", {})
-            # V3: Initialize execution_id before use (fallback to session_id)
-            execution_id = session_id
-            # V3: Create pipeline record BEFORE execution
-            try:
-                pipeline_mgr = get_pipeline_manager()
-                execution_id = pipeline_mgr.create_pipeline_record(
-                    session_id=session_id,
-                    pipeline_definition=plan,
-                    created_from="request",
-                    created_by_message=message
-                )
-                print(f"✅ Created pipeline record: {execution_id}")
-            except Exception as e:
-                print(f"⚠️ Failed to create pipeline record: {e}")
-                print(f"   Using session_id as fallback")
-                # execution_id already initialized to session_id
             # Initial status - User-friendly
             initial_message = f"✅ **Approved!** Starting execution of: **{plan.get('pipeline_name', 'pipeline')}**\n\n🚀 Processing... please wait...\n_(Using {plan.get('_generator', 'AI')} - {plan.get('_model', 'model')})_"
@@ -1215,17 +1269,42 @@ Here's what I'll do:
                         return
                 # Process final result
-                if final_payload:
-                    # V3: DO NOT store results in session - only in S3 via pipeline_manager
-                    # The pipeline_manager already handled S3 storage during execution
                     session_manager.update_session(session_id, {
                         "state": ConversationState.INITIAL
                     })
-                    # V3: Store pending workflow save info
                     session_manager.update_session(session_id, {
                         "pending_workflow_save": {
-                            "execution_id": execution_id,
                             "pipeline_name": plan.get("pipeline_name", "Untitled")
                         }
                     })

             workflow_mgr = get_workflow_manager()
             s3 = get_s3_manager()
+            # V3: Get full pipeline document from S3 using pipeline_id
+            pipeline_id = pending_workflow.get("pipeline_id")
+            pipeline_doc = pipeline_mgr.get_full_pipeline_document(pipeline_id)
+            if pipeline_doc and pipeline_doc.get("definition"):
+                # Extract definition from pipeline document
+                pipeline_def = pipeline_doc["definition"]
+                # Save as workflow with source tracking
                 workflow_id = workflow_mgr.save_workflow(
                     session_id=session_id,
                     pipeline_definition=pipeline_def,
+                    user_message=message,
+                    source_pipeline_id=pipeline_id,
+                    pipeline_status=pipeline_doc.get("status", "unknown")
                 )
                 # Clear pending
                 session_manager.update_session(session_id, {"pending_workflow_save": None})
+                response = f"✅ **Workflow Saved!**\n\nWorkflow ID: `{workflow_id}`\nName: {pending_workflow['pipeline_name']}\nSource Pipeline: `{pipeline_id[:8]}...`\n\nYou can now reuse this workflow anytime!\n\nWhat else can I help you with?"
                 session_manager.add_message(session_id, "assistant", response)
                 yield format_chat_history(history, message, response)
                 return
             else:
+                # Pipeline document not found
                 session_manager.update_session(session_id, {"pending_workflow_save": None})
                 response = "⚠️ Sorry, I couldn't find the pipeline to save. The workflow save request has expired.\n\nWhat else can I help you with?"
                 session_manager.add_message(session_id, "assistant", response)
                 prefer_bedrock=True
             )
+            # V3: Create pipeline_id and upload to S3
+            pipeline_id = str(uuid.uuid4())
+            # Build initial pipeline document
+            pipeline_doc = {
+                "pipeline_id": pipeline_id,
+                "session_id": session_id,
+                "pipeline_name": pipeline.get("pipeline_name"),
+                "status": "proposed",
+                "created_at": datetime.utcnow().isoformat() + "Z",
+                "created_by_message": message,
+                "definition": pipeline,
+                "execution": None,
+                "results": None
+            }
+            # Upload to S3
+            from services.s3_manager import get_s3_manager
+            s3 = get_s3_manager()
+            pipeline_s3_key = f"sessions/{session_id}/pipelines/{pipeline_id}.json"
+            s3.upload_json(pipeline_s3_key, pipeline_doc, add_prefix=False)
+            # Create metadata in MongoDB
+            from services.pipeline_manager import get_pipeline_manager
+            pipeline_mgr = get_pipeline_manager()
+            pipeline_mgr.create_pipeline_metadata(
+                pipeline_id=pipeline_id,
+                session_id=session_id,
+                pipeline_name=pipeline.get("pipeline_name"),
+                s3_key=pipeline_s3_key,
+                status="proposed",
+                created_by_message=message
+            )
+            # Update session with reference only (not full pipeline)
             session_manager.update_session(session_id, {
+                "current_pipeline_id": pipeline_id,
+                "current_pipeline_s3_key": pipeline_s3_key,
                 "state": ConversationState.PIPELINE_PROPOSED
             })
         if "approve" in user_input or "yes" in user_input:
             session_manager.update_session(session_id, {"state": ConversationState.EXECUTING})
+            # V3: Get pipeline references from session
+            pipeline_s3_key = session.get("current_pipeline_s3_key")
+            pipeline_id = session.get("current_pipeline_id")
+            # Download pipeline from S3
+            from services.s3_manager import get_s3_manager
+            s3 = get_s3_manager()
+            from services.pipeline_manager import get_pipeline_manager
+            pipeline_mgr = get_pipeline_manager()
+            if pipeline_s3_key:
+                try:
+                    pipeline_doc = s3.download_json(pipeline_s3_key, add_prefix=False)
+                    plan = pipeline_doc["definition"]
+                except Exception as e:
+                    print(f"⚠️ Failed to download pipeline from S3: {e}")
+                    plan = {}
+                    pipeline_id = session_id
+            else:
+                # Fallback for old sessions without S3 storage
+                plan = session.get("proposed_pipeline", {})
+                pipeline_id = session_id
+            # Update pipeline status to executing
+            if pipeline_s3_key and pipeline_doc:
+                pipeline_doc["status"] = "executing"
+                pipeline_doc["execution"] = {
+                    "started_at": datetime.utcnow().isoformat() + "Z",
+                    "executor": "unknown",
+                    "components_status": []
+                }
+                s3.upload_json(pipeline_s3_key, pipeline_doc, add_prefix=False)
+                pipeline_mgr.update_pipeline_status(pipeline_id, "executing")
+            execution_id = pipeline_id
             # Initial status - User-friendly
             initial_message = f"✅ **Approved!** Starting execution of: **{plan.get('pipeline_name', 'pipeline')}**\n\n🚀 Processing... please wait...\n_(Using {plan.get('_generator', 'AI')} - {plan.get('_model', 'model')})_"
                         return
                 # Process final result
+                    # V3: Update pipeline document with results in S3
+                    if pipeline_s3_key:
+                        try:
+                            pipeline_doc = s3.download_json(pipeline_s3_key, add_prefix=False)
+                            pipeline_doc["status"] = "completed"
+                            if pipeline_doc.get("execution"):
+                                pipeline_doc["execution"]["completed_at"] = datetime.utcnow().isoformat() + "Z"
+                                pipeline_doc["execution"]["executor"] = executor_used
+                            pipeline_doc["results"] = {
+                                "final_output_url": final_payload.get("final_output_url"),
+                                "final_output_expires_at": final_payload.get("final_output_expires_at"),
+                                "components_executed": final_payload.get("components_executed"),
+                                "last_node_output": final_payload.get("last_node_output"),
+                                "workflow_status": "completed"
+                            }
+                            s3.upload_json(pipeline_s3_key, pipeline_doc, add_prefix=False)
+                            # Update MongoDB metadata
+                            pipeline_mgr.update_pipeline_status(
+                                pipeline_id,
+                                "completed",
+                                final_output_url=final_payload.get("final_output_url"),
+                                final_output_expires_at=final_payload.get("final_output_expires_at")
+                            )
+                        except Exception as e:
+                            print(f"⚠️ Failed to update pipeline document: {e}")
+                    # Update session state
                     session_manager.update_session(session_id, {
                         "state": ConversationState.INITIAL
                     })
+                    # V3: Store pending workflow save info with pipeline_id
                     session_manager.update_session(session_id, {
                         "pending_workflow_save": {
+                            "pipeline_id": pipeline_id,
                             "pipeline_name": plan.get("pipeline_name", "Untitled")
                         }
                     })

services/pipeline_manager.py CHANGED Viewed

@@ -41,6 +41,82 @@ class PipelineManager:
         # S3 manager
         self.s3 = get_s3_manager()
     def create_pipeline_record(
         self,
         session_id: str,

         # S3 manager
         self.s3 = get_s3_manager()
+    def create_pipeline_metadata(
+        self,
+        pipeline_id: str,
+        session_id: str,
+        pipeline_name: str,
+        s3_key: str,
+        status: str = "proposed",
+        created_by_message: str = ""
+    ) -> bool:
+        """Create metadata record for pipeline (MongoDB only). Full document in S3."""
+        now = datetime.utcnow()
+        metadata = {
+            "pipeline_id": pipeline_id,
+            "session_id": session_id,
+            "pipeline_name": pipeline_name,
+            "status": status,
+            "s3_key": s3_key,
+            "final_output_url": None,
+            "created_at": now.isoformat() + "Z",
+            "updated_at": now.isoformat() + "Z",
+            "created_by_message": created_by_message
+        }
+        self.pipelines_collection.insert_one(metadata)
+        return True
+    def update_pipeline_status(
+        self,
+        pipeline_id: str,
+        status: str,
+        final_output_url: str = None,
+        final_output_expires_at: str = None
+    ) -> bool:
+        """Update pipeline status in metadata."""
+        update_data = {
+            "status": status,
+            "updated_at": datetime.utcnow().isoformat() + "Z"
+        }
+        if final_output_url:
+            update_data["final_output_url"] = final_output_url
+        if final_output_expires_at:
+            update_data["final_output_expires_at"] = final_output_expires_at
+        result = self.pipelines_collection.update_one(
+            {"pipeline_id": pipeline_id},
+            {"$set": update_data}
+        )
+        return result.modified_count > 0
+    def get_pipeline_metadata(self, pipeline_id: str) -> Optional[Dict[str, Any]]:
+        """Get pipeline metadata by ID."""
+        return self.pipelines_collection.find_one(
+            {"pipeline_id": pipeline_id},
+            {"_id": 0}
+        )
+    def get_full_pipeline_document(self, pipeline_id: str) -> Optional[Dict[str, Any]]:
+        """Get full pipeline document from S3 via metadata lookup."""
+        metadata = self.get_pipeline_metadata(pipeline_id)
+        if not metadata:
+            return None
+        s3_key = metadata.get("s3_key")
+        if not s3_key:
+            return None
+        try:
+            return self.s3.download_json(s3_key, add_prefix=False)
+        except Exception as e:
+            print(f"⚠️ Failed to download pipeline document: {e}")
+            return None
     def create_pipeline_record(
         self,
         session_id: str,

services/schemas.py CHANGED Viewed

@@ -153,6 +153,8 @@ class WorkflowSchema(BaseModel):
     pipeline_preview: str  # "Extract text → Summarize → Translate"
     user_confirmed: bool = True  # User explicitly confirmed save
     tags: List[str] = Field(default_factory=list)
     metadata: Dict[str, Any] = Field(default_factory=dict)
     class Config:

     pipeline_preview: str  # "Extract text → Summarize → Translate"
     user_confirmed: bool = True  # User explicitly confirmed save
     tags: List[str] = Field(default_factory=list)
+    source_pipeline_id: Optional[str] = None  # Pipeline ID this workflow came from
+    pipeline_status: Optional[str] = None  # Status when saved: "proposed", "completed"
     metadata: Dict[str, Any] = Field(default_factory=dict)
     class Config:

services/workflow_manager.py CHANGED Viewed

@@ -40,7 +40,9 @@ class WorkflowManager:
         self,
         session_id: str,
         pipeline_definition: Dict[str, Any],
-        user_message: str
     ) -> str:
         """
         Save a pipeline as a workflow
@@ -51,6 +53,8 @@ class WorkflowManager:
             session_id: Session where workflow was created
             pipeline_definition: Full pipeline definition
             user_message: User's message when confirming save
         Returns:
             workflow_id: Unique workflow ID
@@ -76,7 +80,9 @@ class WorkflowManager:
             pipeline_definition_s3_key=s3_key,
             pipeline_name=pipeline_definition.get("pipeline_name", "Untitled Workflow"),
             pipeline_preview=pipeline_preview,
-            user_confirmed=True
         )
         # Insert into MongoDB

         self,
         session_id: str,
         pipeline_definition: Dict[str, Any],
+        user_message: str,
+        source_pipeline_id: str = None,
+        pipeline_status: str = None
     ) -> str:
         """
         Save a pipeline as a workflow
             session_id: Session where workflow was created
             pipeline_definition: Full pipeline definition
             user_message: User's message when confirming save
+            source_pipeline_id: Pipeline ID this workflow came from
+            pipeline_status: Pipeline status when saved ("proposed", "completed")
         Returns:
             workflow_id: Unique workflow ID
             pipeline_definition_s3_key=s3_key,
             pipeline_name=pipeline_definition.get("pipeline_name", "Untitled Workflow"),
             pipeline_preview=pipeline_preview,
+            user_confirmed=True,
+            source_pipeline_id=source_pipeline_id,
+            pipeline_status=pipeline_status
         )
         # Insert into MongoDB