Spaces:

stellar413
/

masterllm

Sleeping

App Files Files Community

redhairedshanks1 commited on Dec 10, 2025

Commit

4555a81

1 Parent(s): a955a4b

Update services/pipeline_executor.py

Browse files

Files changed (1) hide show

services/pipeline_executor.py +364 -364

services/pipeline_executor.py CHANGED Viewed

@@ -1,364 +1,364 @@
-# services/pipeline_executor.py
-"""
-Unified pipeline executor with Bedrock LangChain (priority) and CrewAI (fallback)
-"""
-import json
-import os
-from typing import Dict, Any, Optional, Generator, List
-# For Bedrock LangChain
-try:
-    from langchain_aws import ChatBedrock
-    from langchain.agents import AgentExecutor, create_tool_calling_agent
-    from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
-    from services.master_tools import get_master_tools as get_langchain_tools
-    BEDROCK_AVAILABLE = True
-except ImportError:
-    BEDROCK_AVAILABLE = False
-    print("Warning: LangChain Bedrock not available")
-# For CrewAI fallback
-from services.agent_crewai import run_agent_streaming as crewai_run_streaming
-# ========================
-# BEDROCK LANGCHAIN EXECUTOR
-# ========================
-def execute_pipeline_bedrock(
-    pipeline: Dict[str, Any],
-    file_path: str,
-    session_id: Optional[str] = None
-) -> Dict[str, Any]:
-    """
-    Execute pipeline using Bedrock + LangChain (priority method)
-    """
-    if not BEDROCK_AVAILABLE:
-        raise RuntimeError("Bedrock LangChain not available")
-    try:
-        llm = ChatBedrock(
-            model_id=os.getenv("BEDROCK_MODEL", "anthropic.claude-3-5-sonnet-20241022-v2:0"),
-            region_name=os.getenv("AWS_REGION", "us-east-1"),
-            temperature=0.0,
-        )
-        tools = get_langchain_tools()
-        system_instructions = """You are MasterLLM, a precise document processing agent.
-Execute the provided pipeline components in ORDER. For each component:
-1. Call the corresponding tool with exact parameters
-2. Wait for the result
-3. Move to next component
-IMPORTANT:
-- Follow the pipeline order strictly
-- Use the file_path provided for all file-based operations
-- For text-processing tools (summarize, classify, NER, translate), use extracted text from previous steps
-- At the end, call 'finalize' tool with complete results
-Pipeline components will be in format:
-{
-  "tool_name": "extract_text",
-  "start_page": 1,
-  "end_page": 5,
-  "params": {}
-}"""
-        prompt = ChatPromptTemplate.from_messages([
-            ("system", system_instructions),
-            ("system", "File path: {file_path}"),
-            ("system", "Pipeline to execute: {pipeline_json}"),
-            ("system", "Session ID: {session_id}"),
-            ("human", "Execute the pipeline. Process each component in order and finalize with complete JSON results.")
-        ])
-        agent = create_tool_calling_agent(llm, tools, prompt)
-        executor = AgentExecutor(
-            agent=agent,
-            tools=tools,
-            verbose=True,
-            max_iterations=15,
-            handle_parsing_errors=True,
-        )
-        result = executor.invoke({
-            "input": f"Execute pipeline: {pipeline['pipeline_name']}",
-            "file_path": file_path,
-            "pipeline_json": json.dumps(pipeline, indent=2),
-            "session_id": session_id or "unknown"
-        })
-        return result
-    except Exception as e:
-        raise RuntimeError(f"Bedrock execution failed: {str(e)}")
-def execute_pipeline_bedrock_streaming(
-    pipeline: Dict[str, Any],
-    file_path: str,
-    session_id: Optional[str] = None
-) -> Generator[Dict[str, Any], None, None]:
-    """
-    Execute pipeline using Bedrock + LangChain with streaming
-    """
-    if not BEDROCK_AVAILABLE:
-        raise RuntimeError("Bedrock LangChain not available")
-    try:
-        llm = ChatBedrock(
-            model_id=os.getenv("BEDROCK_MODEL", "anthropic.claude-3-5-sonnet-20241022-v2:0"),
-            region_name=os.getenv("AWS_REGION", "us-east-1"),
-            temperature=0.0,
-        )
-        tools = get_langchain_tools()
-        system_instructions = """You are MasterLLM. Execute the pipeline components in ORDER.
-For each component, call the tool and wait for results."""
-        prompt = ChatPromptTemplate.from_messages([
-            ("system", system_instructions),
-            ("system", "File: {file_path}"),
-            ("system", "Pipeline: {pipeline_json}"),
-            ("human", "Execute the pipeline")
-        ])
-        agent = create_tool_calling_agent(llm, tools, prompt)
-        executor = AgentExecutor(
-            agent=agent,
-            tools=tools,
-            verbose=True,
-            max_iterations=15,
-            handle_parsing_errors=True,
-        )
-        # Yield initial status
-        yield {
-            "type": "status",
-            "message": "Initializing Bedrock executor...",
-            "executor": "bedrock"
-        }
-        step_count = 0
-        # Stream execution
-        for event in executor.stream({
-            "input": f"Execute: {pipeline['pipeline_name']}",
-            "file_path": file_path,
-            "pipeline_json": json.dumps(pipeline, indent=2)
-        }):
-            if "actions" in event:
-                for action in event.get("actions", []):
-                    step_count += 1
-                    tool = getattr(action, "tool", "unknown")
-                    yield {
-                        "type": "step",
-                        "step": step_count,
-                        "tool": tool,
-                        "status": "executing",
-                        "executor": "bedrock"
-                    }
-            elif "steps" in event:
-                for step in event.get("steps", []):
-                    observation = str(getattr(step, "observation", ""))[:500]
-                    yield {
-                        "type": "step",
-                        "step": step_count,
-                        "status": "completed",
-                        "observation": observation,
-                        "executor": "bedrock"
-                    }
-            elif "output" in event:
-                yield {
-                    "type": "final",
-                    "data": event.get("output"),
-                    "executor": "bedrock"
-                }
-                return
-    except Exception as e:
-        yield {
-            "type": "error",
-            "error": str(e),
-            "executor": "bedrock"
-        }
-# ========================
-# CREWAI EXECUTOR (FALLBACK)
-# ========================
-def execute_pipeline_crewai_streaming(
-    pipeline: Dict[str, Any],
-    file_path: str,
-    session_id: Optional[str] = None
-) -> Generator[Dict[str, Any], None, None]:
-    """
-    Execute pipeline using CrewAI (fallback method)
-    """
-    try:
-        # Yield initial status
-        yield {
-            "type": "status",
-            "message": "Using CrewAI executor (fallback)...",
-            "executor": "crewai"
-        }
-        # Use existing CrewAI streaming function
-        execution_goal = (
-            f"Execute the approved plan: {pipeline['pipeline_name']}. "
-            f"Process {len(pipeline.get('components', []))} components in order."
-        )
-        for event in crewai_run_streaming(
-            user_input=execution_goal,
-            session_file_path=file_path,
-            plan=pipeline,
-            chat_history=[]
-        ):
-            # Pass through CrewAI events with executor tag
-            if isinstance(event, dict):
-                event["executor"] = "crewai"
-            yield event
-    except Exception as e:
-        yield {
-            "type": "error",
-            "error": str(e),
-            "executor": "crewai"
-        }
-# ========================
-# UNIFIED EXECUTOR WITH FALLBACK
-# ========================
-def execute_pipeline_streaming(
-    pipeline: Dict[str, Any],
-    file_path: str,
-    session_id: Optional[str] = None,
-    prefer_bedrock: bool = True
-) -> Generator[Dict[str, Any], None, None]:
-    """
-    Execute pipeline with fallback mechanism.
-    Priority:
-    1. Try Bedrock + LangChain - if available
-    2. Fallback to CrewAI - if Bedrock fails
-    Yields:
-        Status updates and final results
-    """
-    # Try Bedrock first (priority)
-    if prefer_bedrock and BEDROCK_AVAILABLE:
-        try:
-            print(f"🏆 Executing pipeline with Bedrock: {pipeline['pipeline_name']}")
-            yield {
-                "type": "info",
-                "message": "Attempting execution with Bedrock LangChain...",
-                "executor": "bedrock"
-            }
-            # Try to execute with Bedrock
-            error_occurred = False
-            for event in execute_pipeline_bedrock_streaming(pipeline, file_path, session_id):
-                yield event
-                # Check if error occurred
-                if event.get("type") == "error":
-                    error_occurred = True
-                    bedrock_error = event.get("error")
-                    print(f"❌ Bedrock execution failed: {bedrock_error}")
-                    print("🔄 Falling back to CrewAI...")
-                    yield {
-                        "type": "info",
-                        "message": f"Bedrock failed: {bedrock_error}. Switching to CrewAI...",
-                        "executor": "fallback"
-                    }
-                    break
-                # If final result, we're done
-                if event.get("type") == "final":
-                    print(f"✅ Bedrock execution completed: {pipeline['pipeline_name']}")
-                    return
-            # If we got here with error, fall back to CrewAI
-            if error_occurred:
-                # Fall through to CrewAI
-                pass
-            else:
-                # Successful completion (shouldn't reach here normally)
-                return
-        except Exception as bedrock_error:
-            print(f"❌ Bedrock execution exception: {str(bedrock_error)}")
-            print("🔄 Falling back to CrewAI...")
-            yield {
-                "type": "info",
-                "message": f"Bedrock exception: {str(bedrock_error)}. Switching to CrewAI...",
-                "executor": "fallback"
-            }
-    # Fallback to CrewAI
-    print(f"🔄 Executing pipeline with CrewAI: {pipeline['pipeline_name']}")
-    for event in execute_pipeline_crewai_streaming(pipeline, file_path, session_id):
-        yield event
-        if event.get("type") == "final":
-            print(f"✅ CrewAI execution completed: {pipeline['pipeline_name']}")
-            return
-# ========================
-# NON-STREAMING EXECUTOR
-# ========================
-def execute_pipeline(
-    pipeline: Dict[str, Any],
-    file_path: str,
-    session_id: Optional[str] = None,
-    prefer_bedrock: bool = True
-) -> Dict[str, Any]:
-    """
-    Execute pipeline (non-streaming) with fallback
-    """
-    final_result = None
-    for event in execute_pipeline_streaming(pipeline, file_path, session_id, prefer_bedrock):
-        if event.get("type") == "final":
-            final_result = event.get("data")
-            break
-    if final_result is None:
-        raise RuntimeError("Pipeline execution completed without final result")
-    return final_result
-if __name__ == "__main__":
-    # Test
-    test_pipeline = {
-        "pipeline_name": "test-extraction",
-        "components": [
-            {
-                "tool_name": "extract_text",
-                "start_page": 1,
-                "end_page": 1,
-                "params": {}
-            }
-        ],
-        "_generator": "test"
-    }
-    test_file = "test.pdf"
-    print("Testing streaming execution...")
-    for event in execute_pipeline_streaming(test_pipeline, test_file):
-        print(f"Event: {event}")

+# services/pipeline_executor.py
+"""
+Unified pipeline executor with Bedrock LangChain (priority) and CrewAI (fallback)
+"""
+import json
+import os
+from typing import Dict, Any, Optional, Generator, List
+# For Bedrock LangChain
+try:
+    from langchain_aws import ChatBedrock
+    from langchain.agents import AgentExecutor, create_tool_calling_agent
+    from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+    from services.master_tools import get_master_tools as get_langchain_tools
+    BEDROCK_AVAILABLE = True
+except ImportError:
+    BEDROCK_AVAILABLE = False
+    print("Warning: LangChain Bedrock not available")
+# For CrewAI fallback
+from services.agent_crewai import run_agent_streaming as crewai_run_streaming
+# ========================
+# BEDROCK LANGCHAIN EXECUTOR
+# ========================
+def execute_pipeline_bedrock(
+    pipeline: Dict[str, Any],
+    file_path: str,
+    session_id: Optional[str] = None
+) -> Dict[str, Any]:
+    """
+    Execute pipeline using Bedrock + LangChain (priority method)
+    """
+    if not BEDROCK_AVAILABLE:
+        raise RuntimeError("Bedrock LangChain not available")
+    try:
+        llm = ChatBedrock(
+            model_id=os.getenv("BEDROCK_MODEL", "anthropic.claude-3-5-sonnet-20241022-v2:0"),
+            region_name=os.getenv("AWS_REGION", "us-east-1"),
+            temperature=0.0,
+        )
+        tools = get_langchain_tools()
+        system_instructions = """You are MasterLLM, a precise document processing agent.
+Execute the provided pipeline components in ORDER. For each component:
+1. Call the corresponding tool with exact parameters
+2. Wait for the result
+3. Move to next component
+IMPORTANT:
+- Follow the pipeline order strictly
+- Use the file_path provided for all file-based operations
+- For text-processing tools (summarize, classify, NER, translate), use extracted text from previous steps
+- At the end, call 'finalize' tool with complete results
+Pipeline components will be in format:
+{
+  "tool_name": "extract_text",
+  "start_page": 1,
+  "end_page": 5,
+  "params": {}
+}"""
+        prompt = ChatPromptTemplate.from_messages([
+            ("system", system_instructions),
+            ("system", "File path: {file_path}"),
+            ("system", "Pipeline to execute: {pipeline_json}"),
+            ("system", "Session ID: {session_id}"),
+            ("human", "Execute the pipeline. Process each component in order and finalize with complete JSON results.")
+        ])
+        agent = create_tool_calling_agent(llm, tools, prompt)
+        executor = AgentExecutor(
+            agent=agent,
+            tools=tools,
+            verbose=True,
+            max_iterations=15,
+            handle_parsing_errors=True,
+        )
+        result = executor.invoke({
+            "input": f"Execute pipeline: {pipeline['pipeline_name']}",
+            "file_path": file_path,
+            "pipeline_json": json.dumps(pipeline, indent=2),
+            "session_id": session_id or "unknown"
+        })
+        return result
+    except Exception as e:
+        raise RuntimeError(f"Bedrock execution failed: {str(e)}")
+def execute_pipeline_bedrock_streaming(
+    pipeline: Dict[str, Any],
+    file_path: str,
+    session_id: Optional[str] = None
+) -> Generator[Dict[str, Any], None, None]:
+    """
+    Execute pipeline using Bedrock + LangChain with streaming
+    """
+    if not BEDROCK_AVAILABLE:
+        raise RuntimeError("Bedrock LangChain not available")
+    try:
+        llm = ChatBedrock(
+            model_id=os.getenv("BEDROCK_MODEL", "anthropic.claude-3-5-sonnet-20241022-v2:0"),
+            region_name=os.getenv("AWS_REGION", "us-east-1"),
+            temperature=0.0,
+        )
+        tools = get_langchain_tools()
+        system_instructions = """You are MasterLLM. Execute the pipeline components in ORDER.
+For each component, call the tool and wait for results."""
+        prompt = ChatPromptTemplate.from_messages([
+            ("system", system_instructions),
+            ("system", "File: {file_path}"),
+            ("system", "Pipeline: {pipeline_json}"),
+            ("human", "Execute the pipeline")
+        ])
+        agent = create_tool_calling_agent(llm, tools, prompt)
+        executor = AgentExecutor(
+            agent=agent,
+            tools=tools,
+            verbose=True,
+            max_iterations=15,
+            handle_parsing_errors=True,
+        )
+        # Yield initial status
+        yield {
+            "type": "status",
+            "message": "Initializing Bedrock executor...",
+            "executor": "bedrock"
+        }
+        step_count = 0
+        # Stream execution
+        for event in executor.stream({
+            "input": f"Execute: {pipeline['pipeline_name']}",
+            "file_path": file_path,
+            "pipeline_json": json.dumps(pipeline, indent=2)
+        }):
+            if "actions" in event:
+                for action in event.get("actions", []):
+                    step_count += 1
+                    tool = getattr(action, "tool", "unknown")
+                    yield {
+                        "type": "step",
+                        "step": step_count,
+                        "tool": tool,
+                        "status": "executing",
+                        "executor": "bedrock"
+                    }
+            elif "steps" in event:
+                for step in event.get("steps", []):
+                    observation = str(getattr(step, "observation", ""))[:500]
+                    yield {
+                        "type": "step",
+                        "step": step_count,
+                        "status": "completed",
+                        "observation": observation,
+                        "executor": "bedrock"
+                    }
+            elif "output" in event:
+                yield {
+                    "type": "final",
+                    "data": event.get("output"),
+                    "executor": "bedrock"
+                }
+                return
+    except Exception as e:
+        yield {
+            "type": "error",
+            "error": str(e),
+            "executor": "bedrock"
+        }
+# ========================
+# CREWAI EXECUTOR (FALLBACK)
+# ========================
+def execute_pipeline_crewai_streaming(
+    pipeline: Dict[str, Any],
+    file_path: str,
+    session_id: Optional[str] = None
+) -> Generator[Dict[str, Any], None, None]:
+    """
+    Execute pipeline using CrewAI (fallback method)
+    """
+    try:
+        # Yield initial status
+        yield {
+            "type": "status",
+            "message": "Using CrewAI executor (fallback)...",
+            "executor": "crewai"
+        }
+        # Use existing CrewAI streaming function
+        execution_goal = (
+            f"Execute the approved plan: {pipeline['pipeline_name']}. "
+            f"Process {len(pipeline.get('components', []))} components in order."
+        )
+        for event in crewai_run_streaming(
+            user_input=execution_goal,
+            session_file_path=file_path,
+            plan=pipeline,
+            chat_history=[]
+        ):
+            # Pass through CrewAI events with executor tag
+            if isinstance(event, dict):
+                event["executor"] = "crewai"
+            yield event
+    except Exception as e:
+        yield {
+            "type": "error",
+            "error": str(e),
+            "executor": "crewai"
+        }
+# ========================
+# UNIFIED EXECUTOR WITH FALLBACK
+# ========================
+def execute_pipeline_streaming(
+    pipeline: Dict[str, Any],
+    file_path: str,
+    session_id: Optional[str] = None,
+    prefer_bedrock: bool = True
+) -> Generator[Dict[str, Any], None, None]:
+    """
+    Execute pipeline with fallback mechanism.
+    Priority:
+    1. Try Bedrock + LangChain - if available
+    2. Fallback to CrewAI - if Bedrock fails
+    Yields:
+        Status updates and final results
+    """
+    # Try Bedrock first (priority)
+    if prefer_bedrock and BEDROCK_AVAILABLE:
+        try:
+            print(f"🏆 Executing pipeline with Bedrock: {pipeline['pipeline_name']}")
+            yield {
+                "type": "info",
+                "message": "Attempting execution with Bedrock LangChain...",
+                "executor": "bedrock"
+            }
+            # Try to execute with Bedrock
+            error_occurred = False
+            for event in execute_pipeline_bedrock_streaming(pipeline, file_path, session_id):
+                yield event
+                # Check if error occurred
+                if event.get("type") == "error":
+                    error_occurred = True
+                    bedrock_error = event.get("error")
+                    print(f"❌ Bedrock execution failed: {bedrock_error}")
+                    print("🔄 Falling back to CrewAI...")
+                    yield {
+                        "type": "info",
+                        "message": f"Bedrock failed: {bedrock_error}. Switching to CrewAI...",
+                        "executor": "fallback"
+                    }
+                    break
+                # If final result, we're done
+                if event.get("type") == "final":
+                    print(f"✅ Bedrock execution completed: {pipeline['pipeline_name']}")
+                    return
+            # If we got here with error, fall back to CrewAI
+            if error_occurred:
+                # Fall through to CrewAI
+                pass
+            else:
+                # Successful completion (shouldn't reach here normally)
+                return
+        except Exception as bedrock_error:
+            print(f"❌ Bedrock execution exception: {str(bedrock_error)}")
+            print("🔄 Falling back to CrewAI...")
+            yield {
+                "type": "info",
+                "message": f"Bedrock exception: {str(bedrock_error)}. Switching to CrewAI...",
+                "executor": "fallback"
+            }
+    # Fallback to CrewAI
+    print(f"🔄 Executing pipeline with CrewAI: {pipeline['pipeline_name']}")
+    for event in execute_pipeline_crewai_streaming(pipeline, file_path, session_id):
+        yield event
+        if event.get("type") == "final":
+            print(f"✅ CrewAI execution completed: {pipeline['pipeline_name']}")
+            return
+# ========================
+# NON-STREAMING EXECUTOR
+# ========================
+def execute_pipeline(
+    pipeline: Dict[str, Any],
+    file_path: str,
+    session_id: Optional[str] = None,
+    prefer_bedrock: bool = True
+) -> Dict[str, Any]:
+    """
+    Execute pipeline (non-streaming) with fallback
+    """
+    final_result = None
+    for event in execute_pipeline_streaming(pipeline, file_path, session_id, prefer_bedrock):
+        if event.get("type") == "final":
+            final_result = event.get("data")
+            break
+    if final_result is None:
+        raise RuntimeError("Pipeline execution completed without final result")
+    return final_result
+if __name__ == "__main__":
+    # Test
+    test_pipeline = {
+        "pipeline_name": "test-extraction",
+        "components": [
+            {
+                "tool_name": "extract_text",
+                "start_page": 1,
+                "end_page": 1,
+                "params": {}
+            }
+        ],
+        "_generator": "test"
+    }
+    test_file = "test.pdf"
+    print("Testing streaming execution...")
+    for event in execute_pipeline_streaming(test_pipeline, test_file):
+        print(f"Event: {event}")