File size: 6,201 Bytes
978fed5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
"""
Workflow for running coding tasks using Claude coding subagent.
"""

import sys
import tempfile
from pathlib import Path

from loguru import logger

# Add parent directory to path to find scider and bench modules
sys.path.insert(0, str(Path(__file__).parent.parent))

from bench_workflows.register_models.gemini import (
    register_gemini3_medium_high_models,
    register_gemini_low_medium_models,
    register_gemini_medium_high_models,
)
from bench_workflows.register_models.gpt import (
    register_gpt_low_medium_models,
    register_gpt_medium_high_models,
)
from scider.agents.experiment_agent.coding_subagent_v3_claude import build
from scider.agents.experiment_agent.coding_subagent_v3_claude.state import ClaudeCodingAgentState
from scider.core.code_env import LocalEnv


def run_coding_workflow(user_query: str, workspace_dir: str | Path | None = None) -> str:
    """
    Run a simple coding workflow using Claude coding subagent.

    Args:
        user_query: The coding task description
        workspace_dir: The working directory for the coding task. If None, a temporary directory is created.

    Returns:
        The output from Claude's coding execution
    """
    logger.info(f"Starting coding workflow with query: {user_query[:100]}...")

    # Create workspace environment
    if workspace_dir is None:
        workspace_dir = tempfile.mkdtemp(prefix="scider_coding_")
        logger.info(f"Using temporary workspace: {workspace_dir}")

    workspace = LocalEnv(working_dir=workspace_dir, create_dir_if_missing=True)

    # Create agent state
    coding_state = ClaudeCodingAgentState(
        user_query=user_query,
        workspace=workspace,
        data_summary="",  # No background data needed for simple coding tasks
        intermediate_full_output=True,  # Store full output in intermediate state
        skip_summary=True,  # Skip final summary to get full output directly
    )

    # Build and compile the graph
    coding_graph = build().compile()

    # Execute the workflow
    logger.info("Executing coding graph...")
    result_state = coding_graph.invoke(coding_state)

    # Extract intermediate states and find the last 'claude' node output
    intermediate_states = result_state.get("intermediate_state", [])

    # Filter for claude nodes and get the last one
    claude_states = [state for state in intermediate_states if state.get("node_name") == "claude"]

    if not claude_states:
        logger.warning("No claude node found in intermediate states")
        return ""

    # Get the output from the last claude node
    last_claude_output = claude_states[-1].get("_raw_claude_result", {}).get("final_result", None)
    assert last_claude_output is not None, "No final_result found in the last claude node output"

    return last_claude_output


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description="SciCodeBench Workflow - Run simple coding tasks using Claude coding subagent",
        prog="python -m bench_workflows.scicodebench_workflow",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Run with inline query
  python -m bench_workflows.scicodebench_workflow \\
      --query "Create a function to calculate fibonacci numbers"

  # Run with query from file
  python -m bench_workflows.scicodebench_workflow \\
      --query-file task.txt \\
      --workspace ./my_workspace

  # Use specific model configuration
  python -m bench_workflows.scicodebench_workflow \\
      --query "Implement a binary search tree" \\
      --models gpt-medium-high
        """,
    )

    # Query input (mutually exclusive)
    query_group = parser.add_mutually_exclusive_group(required=True)
    query_group.add_argument(
        "--query",
        "-q",
        help="Coding task query (inline text)",
    )
    query_group.add_argument(
        "--query-file",
        "-f",
        help="Path to file containing the coding task query",
    )

    # Optional arguments
    parser.add_argument(
        "--workspace",
        "-w",
        default=None,
        help="Workspace directory for the coding task (default: creates temp directory)",
    )
    parser.add_argument(
        "--models",
        choices=[
            "gpt-low-medium",
            "gpt-medium-high",
            "gemini-low-medium",
            "gemini-medium-high",
            "gemini3-medium-high",
        ],
        default="gemini-low-medium",
        help="Model configuration to use (default: gemini-low-medium)",
    )
    parser.add_argument(
        "--output",
        "-o",
        default=None,
        help="Optional output file to save the result",
    )

    args = parser.parse_args()

    # Register models based on choice
    logger.info(f"Registering models: {args.models}")
    match args.models:
        case "gpt-low-medium":
            register_gpt_low_medium_models()
        case "gpt-medium-high":
            register_gpt_medium_high_models()
        case "gemini-low-medium":
            register_gemini_low_medium_models()
        case "gemini-medium-high":
            register_gemini_medium_high_models()
        case "gemini3-medium-high":
            register_gemini3_medium_high_models()

    # Get user query
    if args.query:
        user_query = args.query
    else:
        query_path = Path(args.query_file)
        if not query_path.exists():
            raise FileNotFoundError(f"Query file not found: {query_path}")
        user_query = query_path.read_text(encoding="utf-8")

    logger.info(f"User query length: {len(user_query)} chars")

    # Run coding workflow
    result = run_coding_workflow(
        user_query=user_query,
        workspace_dir=args.workspace,
    )

    # Print result
    print("\n" + "=" * 80)
    print("CODING WORKFLOW RESULT")
    print("=" * 80)
    print(result)
    print("=" * 80)

    # Save to file if requested
    if args.output:
        output_path = Path(args.output)
        output_path.write_text(result, encoding="utf-8")
        logger.info(f"Result saved to: {output_path}")
        output_path.write_text(result, encoding="utf-8")
        logger.info(f"Result saved to: {output_path}")