Spaces:

stellar413
/

masterllm

Sleeping

App Files Files Community

redhairedshanks1 commited on Dec 10, 2025

Commit

c895fea

1 Parent(s): d79e2c1

Update services/pipeline_executor.py

Browse files

Files changed (1) hide show

services/pipeline_executor.py +46 -41

services/pipeline_executor.py CHANGED Viewed

@@ -9,8 +9,9 @@ from typing import Dict, Any, Optional, Generator, List
 # For Bedrock LangChain
 try:
     from langchain_aws import ChatBedrock
-    from langchain.agents import AgentExecutor, create_tool_calling_agent
-    from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
     from services.master_tools import get_master_tools as get_langchain_tools
     BEDROCK_AVAILABLE = True
     print("✅ Bedrock LangChain imports successful - BEDROCK_AVAILABLE = True")
@@ -107,65 +108,71 @@ def execute_pipeline_bedrock_streaming(
     session_id: Optional[str] = None
 ) -> Generator[Dict[str, Any], None, None]:
     """
-    Execute pipeline using Bedrock + LangChain with streaming
     """
     if not BEDROCK_AVAILABLE:
         raise RuntimeError("Bedrock LangChain not available")
     try:
         llm = ChatBedrock(
             model_id="mistral.mistral-large-2402-v1:0",
-            region_name=os.getenv("AWS_REGION", "us-east-1")
         )
         tools = get_langchain_tools()
-        # STRONGER prompt that FORCES tool usage
-        system_instructions = """You are MasterLLM, an AI agent that MUST execute document processing tools.
-CRITICAL INSTRUCTIONS:
-1. You MUST call the tools provided to you - do NOT just describe what you would do
-2. Execute the pipeline components IN ORDER
-3. For each component in the pipeline, you MUST:
-   - Call the corresponding tool with the specified parameters
-   - Wait for the actual result from the tool
-   - Use the file_path provided for file operations
-   - Store the results to pass to the next component
-4. After ALL components are executed, call the 'finalize' tool with the collected results
-5. DO NOT generate placeholder text like "TEXT_EXTRACTION_RESULT" - call the actual tools!
-The pipeline components are structured as:
-{{
-  "tool_name": "extract_text",
-  "start_page": 1,
-  "end_page": -1,
-  "params": {{}}
-}}
-You must call tools, not generate descriptions. This is mandatory."""
-        prompt = ChatPromptTemplate.from_messages([
-            ("system", system_instructions),
-            ("system", "File to process: {file_path}"),
-            ("system", "Pipeline configuration: {pipeline_json}"),
-            ("human", "Execute ALL the tools in the pipeline. Call each tool and get real results. Do not describe, actually execute!"),
-            MessagesPlaceholder(variable_name="agent_scratchpad")  # REQUIRED for LangChain agent
-        ])
-        agent = create_tool_calling_agent(llm, tools, prompt)
         executor = AgentExecutor(
             agent=agent,
             tools=tools,
             verbose=True,
-            max_iterations=25,  # Increased for multi-step pipelines
             handle_parsing_errors=True,
-            return_intermediate_steps=True  # Important: get intermediate results
         )
         # Yield initial status
         yield {
             "type": "status",
-            "message": "Initializing Bedrock executor...",
             "executor": "bedrock"
         }
@@ -175,7 +182,7 @@ You must call tools, not generate descriptions. This is mandatory."""
         # Stream execution
         for event in executor.stream({
-            "input": f"Execute the pipeline '{pipeline['pipeline_name']}' by calling each tool in the components list",
             "file_path": file_path,
             "pipeline_json": json.dumps(pipeline, indent=2)
         }):
@@ -221,16 +228,15 @@ You must call tools, not generate descriptions. This is mandatory."""
                 # Check if tools were actually called
                 if not has_called_tools:
-                    # Agent didn't call tools, just generated text - this is a failure
                     yield {
                         "type": "error",
-                        "error": "Bedrock agent generated text instead of calling tools. Falling back to CrewAI.",
                         "executor": "bedrock",
                         "debug_output": str(output)[:500]
                     }
                     return
-                # If we have tool results, structure them properly
                 if tool_results:
                     structured_result = {
                         "status": "completed",
@@ -248,7 +254,6 @@ You must call tools, not generate descriptions. This is mandatory."""
                         "executor": "bedrock"
                     }
                 else:
-                    # No tool results collected, likely a problem
                     yield {
                         "type": "error",
                         "error": "No tool results collected from Bedrock execution",

 # For Bedrock LangChain
 try:
     from langchain_aws import ChatBedrock
+    from langchain.agents import AgentExecutor, create_react_agent  # Using ReAct instead of tool_calling
+    from langchain_core.prompts import PromptTemplate
+    from langchain import hub
     from services.master_tools import get_master_tools as get_langchain_tools
     BEDROCK_AVAILABLE = True
     print("✅ Bedrock LangChain imports successful - BEDROCK_AVAILABLE = True")
     session_id: Optional[str] = None
 ) -> Generator[Dict[str, Any], None, None]:
     """
+    Execute pipeline using Bedrock + LangChain with ReAct agent (works with Mistral)
     """
     if not BEDROCK_AVAILABLE:
         raise RuntimeError("Bedrock LangChain not available")
     try:
+        # Use Mistral (the only model you have access to)
         llm = ChatBedrock(
             model_id="mistral.mistral-large-2402-v1:0",
+            region_name=os.getenv("AWS_REGION", "us-east-1"),
+            model_kwargs={
+                "temperature": 0.0,
+                "max_tokens": 4096
+            }
         )
         tools = get_langchain_tools()
+        # ReAct prompt template - uses text-based reasoning
+        react_prompt = PromptTemplate.from_template("""You are MasterLLM, a document processing assistant that executes tools step-by-step.
+You have access to these tools:
+{tools}
+Tool names: {tool_names}
+Use the following format EXACTLY:
+Thought: Think about what you need to do
+Action: tool_name
+Action Input: {{"param1": "value1", "param2": value2}}
+Observation: [result will appear here]
+... (repeat Thought/Action/Action Input/Observation as needed)
+Thought: I have completed all steps
+Final Answer: [summarize what was done]
+CRITICAL RULES:
+1. You MUST use the Action/Action Input format to call tools
+2. Action Input MUST be valid JSON
+3. After Observation, think again and take the next action
+4. Call tools for EACH pipeline component
+5. When done, provide Final Answer
+File path: {file_path}
+Pipeline to execute: {pipeline_json}
+Begin! Execute each component in the pipeline.
+{agent_scratchpad}""")
+        # Create ReAct agent
+        agent = create_react_agent(llm, tools, react_prompt)
         executor = AgentExecutor(
             agent=agent,
             tools=tools,
             verbose=True,
+            max_iterations=25,
             handle_parsing_errors=True,
+            return_intermediate_steps=True
         )
         # Yield initial status
         yield {
             "type": "status",
+            "message": "Initializing Bedrock ReAct executor...",
             "executor": "bedrock"
         }
         # Stream execution
         for event in executor.stream({
+            "input": f"Execute the pipeline '{pipeline['pipeline_name']}' by running each tool in the components list",
             "file_path": file_path,
             "pipeline_json": json.dumps(pipeline, indent=2)
         }):
                 # Check if tools were actually called
                 if not has_called_tools:
                     yield {
                         "type": "error",
+                        "error": "Bedrock ReAct agent didn't call tools properly. Falling back to CrewAI.",
                         "executor": "bedrock",
                         "debug_output": str(output)[:500]
                     }
                     return
+                # If we have tool results, structure them
                 if tool_results:
                     structured_result = {
                         "status": "completed",
                         "executor": "bedrock"
                     }
                 else:
                     yield {
                         "type": "error",
                         "error": "No tool results collected from Bedrock execution",