Spaces:
Running
Running
File size: 6,201 Bytes
978fed5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 | """
Workflow for running coding tasks using Claude coding subagent.
"""
import sys
import tempfile
from pathlib import Path
from loguru import logger
# Add parent directory to path to find scider and bench modules
sys.path.insert(0, str(Path(__file__).parent.parent))
from bench_workflows.register_models.gemini import (
register_gemini3_medium_high_models,
register_gemini_low_medium_models,
register_gemini_medium_high_models,
)
from bench_workflows.register_models.gpt import (
register_gpt_low_medium_models,
register_gpt_medium_high_models,
)
from scider.agents.experiment_agent.coding_subagent_v3_claude import build
from scider.agents.experiment_agent.coding_subagent_v3_claude.state import ClaudeCodingAgentState
from scider.core.code_env import LocalEnv
def run_coding_workflow(user_query: str, workspace_dir: str | Path | None = None) -> str:
"""
Run a simple coding workflow using Claude coding subagent.
Args:
user_query: The coding task description
workspace_dir: The working directory for the coding task. If None, a temporary directory is created.
Returns:
The output from Claude's coding execution
"""
logger.info(f"Starting coding workflow with query: {user_query[:100]}...")
# Create workspace environment
if workspace_dir is None:
workspace_dir = tempfile.mkdtemp(prefix="scider_coding_")
logger.info(f"Using temporary workspace: {workspace_dir}")
workspace = LocalEnv(working_dir=workspace_dir, create_dir_if_missing=True)
# Create agent state
coding_state = ClaudeCodingAgentState(
user_query=user_query,
workspace=workspace,
data_summary="", # No background data needed for simple coding tasks
intermediate_full_output=True, # Store full output in intermediate state
skip_summary=True, # Skip final summary to get full output directly
)
# Build and compile the graph
coding_graph = build().compile()
# Execute the workflow
logger.info("Executing coding graph...")
result_state = coding_graph.invoke(coding_state)
# Extract intermediate states and find the last 'claude' node output
intermediate_states = result_state.get("intermediate_state", [])
# Filter for claude nodes and get the last one
claude_states = [state for state in intermediate_states if state.get("node_name") == "claude"]
if not claude_states:
logger.warning("No claude node found in intermediate states")
return ""
# Get the output from the last claude node
last_claude_output = claude_states[-1].get("_raw_claude_result", {}).get("final_result", None)
assert last_claude_output is not None, "No final_result found in the last claude node output"
return last_claude_output
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(
description="SciCodeBench Workflow - Run simple coding tasks using Claude coding subagent",
prog="python -m bench_workflows.scicodebench_workflow",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Run with inline query
python -m bench_workflows.scicodebench_workflow \\
--query "Create a function to calculate fibonacci numbers"
# Run with query from file
python -m bench_workflows.scicodebench_workflow \\
--query-file task.txt \\
--workspace ./my_workspace
# Use specific model configuration
python -m bench_workflows.scicodebench_workflow \\
--query "Implement a binary search tree" \\
--models gpt-medium-high
""",
)
# Query input (mutually exclusive)
query_group = parser.add_mutually_exclusive_group(required=True)
query_group.add_argument(
"--query",
"-q",
help="Coding task query (inline text)",
)
query_group.add_argument(
"--query-file",
"-f",
help="Path to file containing the coding task query",
)
# Optional arguments
parser.add_argument(
"--workspace",
"-w",
default=None,
help="Workspace directory for the coding task (default: creates temp directory)",
)
parser.add_argument(
"--models",
choices=[
"gpt-low-medium",
"gpt-medium-high",
"gemini-low-medium",
"gemini-medium-high",
"gemini3-medium-high",
],
default="gemini-low-medium",
help="Model configuration to use (default: gemini-low-medium)",
)
parser.add_argument(
"--output",
"-o",
default=None,
help="Optional output file to save the result",
)
args = parser.parse_args()
# Register models based on choice
logger.info(f"Registering models: {args.models}")
match args.models:
case "gpt-low-medium":
register_gpt_low_medium_models()
case "gpt-medium-high":
register_gpt_medium_high_models()
case "gemini-low-medium":
register_gemini_low_medium_models()
case "gemini-medium-high":
register_gemini_medium_high_models()
case "gemini3-medium-high":
register_gemini3_medium_high_models()
# Get user query
if args.query:
user_query = args.query
else:
query_path = Path(args.query_file)
if not query_path.exists():
raise FileNotFoundError(f"Query file not found: {query_path}")
user_query = query_path.read_text(encoding="utf-8")
logger.info(f"User query length: {len(user_query)} chars")
# Run coding workflow
result = run_coding_workflow(
user_query=user_query,
workspace_dir=args.workspace,
)
# Print result
print("\n" + "=" * 80)
print("CODING WORKFLOW RESULT")
print("=" * 80)
print(result)
print("=" * 80)
# Save to file if requested
if args.output:
output_path = Path(args.output)
output_path.write_text(result, encoding="utf-8")
logger.info(f"Result saved to: {output_path}")
output_path.write_text(result, encoding="utf-8")
logger.info(f"Result saved to: {output_path}")
|