""" Workflow for running coding tasks using Claude coding subagent. """ import sys import tempfile from pathlib import Path from loguru import logger # Add parent directory to path to find scider and bench modules sys.path.insert(0, str(Path(__file__).parent.parent)) from bench_workflows.register_models.gemini import ( register_gemini3_medium_high_models, register_gemini_low_medium_models, register_gemini_medium_high_models, ) from bench_workflows.register_models.gpt import ( register_gpt_low_medium_models, register_gpt_medium_high_models, ) from scider.agents.experiment_agent.coding_subagent_v3_claude import build from scider.agents.experiment_agent.coding_subagent_v3_claude.state import ClaudeCodingAgentState from scider.core.code_env import LocalEnv def run_coding_workflow(user_query: str, workspace_dir: str | Path | None = None) -> str: """ Run a simple coding workflow using Claude coding subagent. Args: user_query: The coding task description workspace_dir: The working directory for the coding task. If None, a temporary directory is created. Returns: The output from Claude's coding execution """ logger.info(f"Starting coding workflow with query: {user_query[:100]}...") # Create workspace environment if workspace_dir is None: workspace_dir = tempfile.mkdtemp(prefix="scider_coding_") logger.info(f"Using temporary workspace: {workspace_dir}") workspace = LocalEnv(working_dir=workspace_dir, create_dir_if_missing=True) # Create agent state coding_state = ClaudeCodingAgentState( user_query=user_query, workspace=workspace, data_summary="", # No background data needed for simple coding tasks intermediate_full_output=True, # Store full output in intermediate state skip_summary=True, # Skip final summary to get full output directly ) # Build and compile the graph coding_graph = build().compile() # Execute the workflow logger.info("Executing coding graph...") result_state = coding_graph.invoke(coding_state) # Extract intermediate states and find the last 'claude' node output intermediate_states = result_state.get("intermediate_state", []) # Filter for claude nodes and get the last one claude_states = [state for state in intermediate_states if state.get("node_name") == "claude"] if not claude_states: logger.warning("No claude node found in intermediate states") return "" # Get the output from the last claude node last_claude_output = claude_states[-1].get("_raw_claude_result", {}).get("final_result", None) assert last_claude_output is not None, "No final_result found in the last claude node output" return last_claude_output if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description="SciCodeBench Workflow - Run simple coding tasks using Claude coding subagent", prog="python -m bench_workflows.scicodebench_workflow", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Run with inline query python -m bench_workflows.scicodebench_workflow \\ --query "Create a function to calculate fibonacci numbers" # Run with query from file python -m bench_workflows.scicodebench_workflow \\ --query-file task.txt \\ --workspace ./my_workspace # Use specific model configuration python -m bench_workflows.scicodebench_workflow \\ --query "Implement a binary search tree" \\ --models gpt-medium-high """, ) # Query input (mutually exclusive) query_group = parser.add_mutually_exclusive_group(required=True) query_group.add_argument( "--query", "-q", help="Coding task query (inline text)", ) query_group.add_argument( "--query-file", "-f", help="Path to file containing the coding task query", ) # Optional arguments parser.add_argument( "--workspace", "-w", default=None, help="Workspace directory for the coding task (default: creates temp directory)", ) parser.add_argument( "--models", choices=[ "gpt-low-medium", "gpt-medium-high", "gemini-low-medium", "gemini-medium-high", "gemini3-medium-high", ], default="gemini-low-medium", help="Model configuration to use (default: gemini-low-medium)", ) parser.add_argument( "--output", "-o", default=None, help="Optional output file to save the result", ) args = parser.parse_args() # Register models based on choice logger.info(f"Registering models: {args.models}") match args.models: case "gpt-low-medium": register_gpt_low_medium_models() case "gpt-medium-high": register_gpt_medium_high_models() case "gemini-low-medium": register_gemini_low_medium_models() case "gemini-medium-high": register_gemini_medium_high_models() case "gemini3-medium-high": register_gemini3_medium_high_models() # Get user query if args.query: user_query = args.query else: query_path = Path(args.query_file) if not query_path.exists(): raise FileNotFoundError(f"Query file not found: {query_path}") user_query = query_path.read_text(encoding="utf-8") logger.info(f"User query length: {len(user_query)} chars") # Run coding workflow result = run_coding_workflow( user_query=user_query, workspace_dir=args.workspace, ) # Print result print("\n" + "=" * 80) print("CODING WORKFLOW RESULT") print("=" * 80) print(result) print("=" * 80) # Save to file if requested if args.output: output_path = Path(args.output) output_path.write_text(result, encoding="utf-8") logger.info(f"Result saved to: {output_path}") output_path.write_text(result, encoding="utf-8") logger.info(f"Result saved to: {output_path}")