Spaces:

victordibia
/

flow

Sleeping

App Files Files Community

victordibia commited on Jan 28

Commit

cbd95af

1 Parent(s): c1ec9a0

Deploy 2026-01-28 10:56:31

Browse files

Files changed (40) hide show

.env.example +29 -0
README.md +10 -6
src/flow/cli/app.py +6 -6
src/flow/cli/optimize.py +99 -135
src/flow/experiments/__init__.py +52 -96
src/flow/experiments/ablation.py +76 -248
src/flow/experiments/config_export.py +0 -184
src/flow/experiments/models.py +517 -0
src/flow/experiments/optimizer.py +81 -147
src/flow/experiments/types.py +2 -2
src/flow/harness/maf/agent.py +36 -26
src/flow/harness/maf/tools/__init__.py +157 -0
src/flow/{tools → harness/maf/tools}/coding.py +0 -0
src/flow/{tools → harness/maf/tools}/core.py +0 -0
src/flow/{tools → harness/maf/tools}/execution.py +0 -0
src/flow/{tools → harness/maf/tools}/memory.py +0 -0
src/flow/{tools → harness/maf/tools}/sub_agent.py +14 -6
src/flow/prompts.py +234 -97
src/flow/tools/__init__.py +0 -172
src/flow/ui/api/configs.py +81 -111
src/flow/ui/api/jobs.py +6 -6
src/flow/ui/api/runs.py +13 -13
src/flow/ui/database.py +1 -57
src/flow/ui/models/config.py +4 -4
src/flow/ui/models/job.py +1 -1
src/flow/ui/models/run.py +1 -1
src/flow/ui/models/task.py +1 -1
src/flow/ui/schemas/__init__.py +4 -4
src/flow/ui/schemas/config.py +33 -29
src/flow/ui/schemas/job.py +3 -3
src/flow/ui/schemas/run.py +2 -2
src/flow/ui/services/optimizer_service.py +38 -48
src/flow/ui/tests/test_e2e_user_journey.py +6 -6
src/flow/ui/ui/assets/index-2zMAgGgo.js +0 -0
src/flow/ui/ui/assets/index-BG9n9RHB.js +0 -0
src/flow/ui/ui/assets/index-BHAF8mLj.css +1 -0
src/flow/ui/ui/assets/index-Bx-_JS_6.js +0 -0
src/flow/ui/ui/assets/index-VFZIS3uv.js +0 -0
src/flow/ui/ui/assets/index-_IRgS-wR.css +1 -0
src/flow/ui/ui/index.html +2 -2

.env.example ADDED Viewed

	@@ -0,0 +1,29 @@

+# Flow UI Deployment Environment
+# Copy this to deploy/.env and fill in values
+# This file is gitignored - secrets stay local
+# --- Azure OpenAI ---
+AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/
+AZURE_OPENAI_API_KEY=your-key
+AZURE_OPENAI_DEPLOYMENT=gpt-4o
+# --- Authentication ---
+AUTH_ENABLED=true
+AUTH_MODE=github
+AUTH_SECRET=change-me-to-a-random-string
+# For GitHub OAuth (create app at https://github.com/settings/developers):
+#   Homepage URL: https://victordibia-flow.hf.space
+#   Callback URL: https://victordibia-flow.hf.space/api/auth/github/callback
+AUTH_GITHUB_CLIENT_ID=your-client-id
+AUTH_GITHUB_CLIENT_SECRET=your-client-secret
+AUTH_GITHUB_ALLOWED_USERS=victordibia,teammate1,teammate2
+# For basic auth (simpler, no GitHub app needed):
+# AUTH_MODE=basic
+# AUTH_BASIC_USERNAME=admin
+# AUTH_BASIC_PASSWORD=your-password
+# --- Optional ---
+# AUTH_SESSION_HOURS=24
+# UVICORN_WORKERS=2

README.md CHANGED Viewed

@@ -83,13 +83,17 @@ Flow tests different **context engineering strategies**:
 Example configurations:
 ```python
-from flow.experiments.ablation import AblationConfig
-configs = [
-    AblationConfig(name="baseline", enable_message_compaction=False),
-    AblationConfig(name="compaction", enable_message_compaction=True, compaction_head_size=10),
-    AblationConfig(name="full", enable_message_compaction=True, enable_memory_tool=True),
-]
 ```
 ## Task Format

 Example configurations:
 ```python
+from flow.experiments.models import Agent, CompactionConfig, GridSearchStrategy
+# Define a base agent
+base = Agent(name="my_agent", enable_memory=True)
+# Generate candidates via grid search
+strategy = GridSearchStrategy(variations={
+    "enable_memory": [True, False],
+    "compaction": [CompactionConfig.head_tail(10, 40), CompactionConfig.none()],
+})
+candidates = strategy.generate(base, budget=10)
 ```
 ## Task Format

src/flow/cli/app.py CHANGED Viewed

@@ -107,13 +107,13 @@ async def _run_single_task(
     from flow.harness.maf import MAFHarness
     if config_path:
-        # Load config from optimization result
-        from flow.experiments.config_export import load_config
-        from flow.experiments.ablation import create_harness_from_config
-        ablation_config = load_config(config_path)
-        console.print(f"[dim]Using config: {ablation_config.name}[/]")
-        harness = create_harness_from_config(ablation_config, workspace)
     else:
         harness = MAFHarness(workspace=workspace, memory_path=memory_path)

     from flow.harness.maf import MAFHarness
     if config_path:
+        # Load agent config from optimization result
+        from flow.experiments.models import load_agent
+        from flow.experiments.ablation import create_harness_from_agent
+        agent_config = load_agent(config_path)
+        console.print(f"[dim]Using agent config: {agent_config.name}[/]")
+        harness = create_harness_from_agent(agent_config, workspace)
     else:
         harness = MAFHarness(workspace=workspace, memory_path=memory_path)

src/flow/cli/optimize.py CHANGED Viewed

@@ -13,13 +13,9 @@ from typing import Annotated, Any
 import typer
 from rich.console import Console
-from flow.experiments.ablation import AblationConfig, CONTEXT_ENGINEERING_CONFIGS
-from flow.experiments.optimizer import (
-    FlowOptimizer,
-    generate_grid_configs,
-    load_tasks_from_jsonl,
-)
-from flow.experiments.types import EvalCriterion, Task
 console = Console()
@@ -36,21 +32,21 @@ def optimize(
         Path | None,
         typer.Option(
             "--config", "-c",
-            help="Path to Python config file with CONFIGS or VARIATIONS",
         ),
     ] = None,
     agent: Annotated[
         Path | None,
         typer.Option(
             "--agent", "-a",
-            help="Path to base agent Python file (for optimization)",
         ),
     ] = None,
     suite: Annotated[
         str | None,
         typer.Option(
             "--suite", "-s",
-            help="Built-in task suite: coding, research",
         ),
     ] = None,
     parallel: Annotated[
@@ -60,18 +56,11 @@ def optimize(
             help="Max concurrent experiments",
         ),
     ] = 4,
-    mode: Annotated[
-        str,
-        typer.Option(
-            "--mode", "-m",
-            help="Config mode: named (use CONFIGS), grid (use VARIATIONS)",
-        ),
-    ] = "named",
     vary: Annotated[
         str | None,
         typer.Option(
             "--vary", "-v",
-            help="Comma-separated params to vary: compaction,memory,model",
         ),
     ] = None,
     output: Annotated[
@@ -88,28 +77,35 @@ def optimize(
             help="Disable LLM-as-Judge evaluation (faster, less accurate)",
         ),
     ] = False,
 ) -> None:
     """Find the best agent configuration through experimentation.
     Runs experiments in parallel, evaluates with LLM-as-Judge,
-    ranks via Pareto analysis, and exports winning configs.
     Examples:
-        # Run with task file and default configs
         flow optimize --tasks tasks.jsonl
-        # Use custom configs from Python file
         flow optimize --config my_configs.py --tasks tasks.jsonl
-        # Grid search over variations
-        flow optimize --config my_configs.py --tasks tasks.jsonl --mode grid
         # Use built-in task suite
         flow optimize --suite coding --parallel 2
-        # Vary specific parameters
-        flow optimize --vary compaction,memory --tasks tasks.jsonl
     """
     asyncio.run(_run_optimize(
         tasks_path=tasks,
@@ -117,10 +113,10 @@ def optimize(
         agent_path=agent,
         suite=suite,
         parallel=parallel,
-        mode=mode,
         vary=vary,
         output_dir=output,
         use_llm_eval=not no_llm_eval,
     ))
@@ -130,10 +126,10 @@ async def _run_optimize(
     agent_path: Path | None,
     suite: str | None,
     parallel: int,
-    mode: str,
     vary: str | None,
     output_dir: Path | None,
     use_llm_eval: bool,
 ) -> None:
     """Run the optimization."""
     # Load tasks
@@ -142,19 +138,23 @@ async def _run_optimize(
         console.print("[red]Error:[/] No tasks specified. Use --tasks or --suite")
         raise typer.Exit(1)
-    # Load configs
-    configs = _load_configs(config_path, mode, vary)
-    if not configs:
-        console.print("[red]Error:[/] No configs to test. Use --config or --vary")
         raise typer.Exit(1)
     console.print(f"\n[bold]Tasks:[/] {len(tasks)}")
     for t in tasks:
         console.print(f"  - {t.name}")
-    console.print(f"\n[bold]Configs:[/] {len(configs)}")
-    for c in configs:
-        console.print(f"  - {c.name}")
     # Run optimizer
     optimizer = FlowOptimizer(
@@ -164,12 +164,12 @@ async def _run_optimize(
     )
     try:
-        result = await optimizer.optimize(configs, tasks)
         console.print("\n[bold green]Optimization complete![/]")
-        console.print(f"\nBest configs exported to: [cyan]{result.output_dir / 'configs'}[/]")
-        console.print("\nTo use a config:")
-        console.print(f"  [dim]flow run --config {result.output_dir / 'configs' / 'best_score.yaml'} \"your task\"[/]")
     except KeyboardInterrupt:
         console.print("\n[yellow]Optimization cancelled.[/]")
@@ -185,116 +185,73 @@ def _load_tasks(tasks_path: Path | None, suite: str | None) -> list[Task]:
         return load_tasks_from_jsonl(tasks_path)
     if suite:
-        return _get_builtin_suite(suite)
-    # Default: simple test suite
-    return _get_builtin_suite("quick")
-def _get_builtin_suite(name: str) -> list[Task]:
-    """Get a built-in task suite."""
-    suites = {
-        "quick": [
-            Task(
-                name="hello_world",
-                prompt="Create a Python script 'hello.py' that prints 'Hello, World!' and run it.",
-                criteria=[
-                    EvalCriterion(name="file_created", instruction="hello.py should be created"),
-                    EvalCriterion(name="correct_output", instruction="Output should include 'Hello, World!'"),
-                ],
-            ),
-        ],
-        "coding": [
-            Task(
-                name="fizzbuzz",
-                prompt="Create fizzbuzz.py that prints 1-30 with Fizz/Buzz/FizzBuzz rules. Run it.",
-                criteria=[
-                    EvalCriterion(name="file_created", instruction="fizzbuzz.py should be created"),
-                    EvalCriterion(name="correct_output", instruction="Output shows correct FizzBuzz pattern"),
-                ],
-                metadata={"category": "short"},
-            ),
-            Task(
-                name="rest_api",
-                prompt="Create a FastAPI app with a /health endpoint that returns JSON {'status': 'ok'}. Save as api.py.",
-                criteria=[
-                    EvalCriterion(name="file_created", instruction="api.py should be created"),
-                    EvalCriterion(name="fastapi_used", instruction="Should use FastAPI"),
-                    EvalCriterion(name="endpoint_defined", instruction="Should have /health endpoint"),
-                ],
-                metadata={"category": "medium"},
-            ),
-            Task(
-                name="data_pipeline",
-                prompt="""Create a data processing pipeline:
-1. data_types.py - DataRecord dataclass (id, name, value)
-2. validators.py - validate_id, validate_name functions
-3. pipeline.py - chain validators together
-4. test_pipeline.py - tests for the pipeline
-Run the tests.""",
-                criteria=[
-                    EvalCriterion(name="modules_created", instruction="All 4 Python files created"),
-                    EvalCriterion(name="tests_run", instruction="Tests should be executed"),
-                ],
-                metadata={"category": "long"},
-            ),
-        ],
-        "research": [
-            Task(
-                name="codebase_analysis",
-                prompt="""Analyze this workspace:
-1. Explore the directory structure
-2. Identify Python files and their purposes
-3. Create analysis_report.md with findings""",
-                criteria=[
-                    EvalCriterion(name="exploration", instruction="Should explore directory"),
-                    EvalCriterion(name="report_created", instruction="analysis_report.md created"),
-                ],
-                metadata={"category": "research"},
-            ),
-        ],
-    }
-    if name not in suites:
-        console.print(f"[red]Error:[/] Unknown suite '{name}'. Available: {list(suites.keys())}")
         raise typer.Exit(1)
-    return suites[name]
-def _load_configs(
     config_path: Path | None,
-    mode: str,
     vary: str | None,
-) -> list[AblationConfig]:
-    """Load configs from file or generate from variations."""
-    # Load from Python file
     if config_path:
         if not config_path.exists():
             console.print(f"[red]Error:[/] Config file not found: {config_path}")
             raise typer.Exit(1)
-        configs, variations = _load_python_config(config_path)
-        if mode == "grid" and variations:
-            return generate_grid_configs("grid", variations)
-        elif configs:
-            return configs
         else:
-            console.print("[red]Error:[/] Config file has no CONFIGS or VARIATIONS")
             raise typer.Exit(1)
-    # Generate from --vary flag
     if vary:
         variations = _parse_vary_flag(vary)
-        return generate_grid_configs("vary", variations)
-    # Default: use context engineering configs
-    return CONTEXT_ENGINEERING_CONFIGS
-def _load_python_config(path: Path) -> tuple[list[AblationConfig], dict[str, Any]]:
-    """Load CONFIGS and VARIATIONS from a Python file."""
     spec = importlib.util.spec_from_file_location("config_module", path)
     if spec is None or spec.loader is None:
         raise ValueError(f"Cannot load {path}")
@@ -303,29 +260,36 @@ def _load_python_config(path: Path) -> tuple[list[AblationConfig], dict[str, Any
     sys.modules["config_module"] = module
     spec.loader.exec_module(module)
-    configs = getattr(module, "CONFIGS", [])
     variations = getattr(module, "VARIATIONS", {})
-    return configs, variations
 def _parse_vary_flag(vary: str) -> dict[str, Any]:
     """Parse --vary flag into variations dict."""
-    variations = {}
     for param in vary.split(","):
         param = param.strip().lower()
         if param in ("compaction", "compact"):
-            variations["enable_message_compaction"] = [True, False]
         elif param in ("memory", "mem"):
-            variations["enable_memory_tool"] = [True, False]
         elif param in ("subagent", "sub"):
             variations["enable_sub_agent"] = [True, False]
         elif param in ("head", "head_size"):
-            variations["compaction_head_size"] = [5, 10, 20]
         elif param in ("tail", "tail_size"):
-            variations["compaction_tail_size"] = [20, 40, 60]
         else:
             console.print(f"[yellow]Warning:[/] Unknown vary param: {param}")

 import typer
 from rich.console import Console
+from flow.experiments.models import Agent, Candidate, CompactionConfig, GridSearchStrategy
+from flow.experiments.optimizer import FlowOptimizer, load_tasks_from_jsonl
+from flow.experiments.types import Task, get_task_suite
 console = Console()
         Path | None,
         typer.Option(
             "--config", "-c",
+            help="Path to Python config file with CANDIDATES or VARIATIONS",
         ),
     ] = None,
     agent: Annotated[
         Path | None,
         typer.Option(
             "--agent", "-a",
+            help="Path to base agent YAML file (for optimization)",
         ),
     ] = None,
     suite: Annotated[
         str | None,
         typer.Option(
             "--suite", "-s",
+            help="Built-in task suite: quick, core, coding",
         ),
     ] = None,
     parallel: Annotated[
             help="Max concurrent experiments",
         ),
     ] = 4,
     vary: Annotated[
         str | None,
         typer.Option(
             "--vary", "-v",
+            help="Comma-separated params to vary: compaction,memory,subagent",
         ),
     ] = None,
     output: Annotated[
             help="Disable LLM-as-Judge evaluation (faster, less accurate)",
         ),
     ] = False,
+    budget: Annotated[
+        int,
+        typer.Option(
+            "--budget", "-b",
+            help="Maximum number of candidates to generate",
+        ),
+    ] = 100,
 ) -> None:
     """Find the best agent configuration through experimentation.
     Runs experiments in parallel, evaluates with LLM-as-Judge,
+    ranks via Pareto analysis, and exports winning agent configs.
     Examples:
+        # Run with task file and default candidates
         flow optimize --tasks tasks.jsonl
+        # Use custom candidates from Python file
         flow optimize --config my_configs.py --tasks tasks.jsonl
+        # Vary specific parameters
+        flow optimize --vary compaction,memory --tasks tasks.jsonl
         # Use built-in task suite
         flow optimize --suite coding --parallel 2
+        # Start from a base agent definition
+        flow optimize --agent base_agent.yaml --vary compaction,memory --tasks tasks.jsonl
     """
     asyncio.run(_run_optimize(
         tasks_path=tasks,
         agent_path=agent,
         suite=suite,
         parallel=parallel,
         vary=vary,
         output_dir=output,
         use_llm_eval=not no_llm_eval,
+        budget=budget,
     ))
     agent_path: Path | None,
     suite: str | None,
     parallel: int,
     vary: str | None,
     output_dir: Path | None,
     use_llm_eval: bool,
+    budget: int,
 ) -> None:
     """Run the optimization."""
     # Load tasks
         console.print("[red]Error:[/] No tasks specified. Use --tasks or --suite")
         raise typer.Exit(1)
+    # Load base agent
+    base = _load_base_agent(agent_path)
+    # Load/generate candidates
+    candidates = _load_candidates(config_path, vary, base, budget)
+    if not candidates:
+        console.print("[red]Error:[/] No candidates to test. Use --config or --vary")
         raise typer.Exit(1)
+    console.print(f"\n[bold]Base Agent:[/] {base.name}")
     console.print(f"\n[bold]Tasks:[/] {len(tasks)}")
     for t in tasks:
         console.print(f"  - {t.name}")
+    console.print(f"\n[bold]Candidates:[/] {len(candidates)}")
+    for c in candidates:
+        console.print(f"  - {c.agent.name}")
     # Run optimizer
     optimizer = FlowOptimizer(
     )
     try:
+        result = await optimizer.optimize(candidates, tasks)
         console.print("\n[bold green]Optimization complete![/]")
+        console.print(f"\nBest agents exported to: [cyan]{result.output_dir / 'agents'}[/]")
+        console.print("\nTo use an agent config:")
+        console.print(f"  [dim]flow run --config {result.output_dir / 'agents' / 'best_score.yaml'} \"your task\"[/]")
     except KeyboardInterrupt:
         console.print("\n[yellow]Optimization cancelled.[/]")
         return load_tasks_from_jsonl(tasks_path)
     if suite:
+        try:
+            return get_task_suite(suite)
+        except ValueError as e:
+            console.print(f"[red]Error:[/] {e}")
+            raise typer.Exit(1)
+    # Default: quick suite
+    try:
+        return get_task_suite("quick")
+    except ValueError:
+        console.print("[red]Error:[/] No built-in suites available. Use --tasks to specify a JSONL file.")
         raise typer.Exit(1)
+def _load_base_agent(agent_path: Path | None) -> Agent:
+    """Load base agent from YAML or use defaults."""
+    if agent_path:
+        if not agent_path.exists():
+            console.print(f"[red]Error:[/] Agent file not found: {agent_path}")
+            raise typer.Exit(1)
+        from flow.experiments.models import load_agent
+        return load_agent(agent_path)
+    return Agent(name="flow_agent")
+def _load_candidates(
     config_path: Path | None,
     vary: str | None,
+    base: Agent,
+    budget: int,
+) -> list[Candidate]:
+    """Load candidates from file or generate from variations."""
     if config_path:
         if not config_path.exists():
             console.print(f"[red]Error:[/] Config file not found: {config_path}")
             raise typer.Exit(1)
+        candidates, variations = _load_python_config(config_path)
+        if variations:
+            strategy = GridSearchStrategy(variations)
+            return strategy.generate(base, budget)
+        elif candidates:
+            return candidates
         else:
+            console.print("[red]Error:[/] Config file has no CANDIDATES or VARIATIONS")
             raise typer.Exit(1)
     if vary:
         variations = _parse_vary_flag(vary)
+        strategy = GridSearchStrategy(variations)
+        return strategy.generate(base, budget)
+    # Default: explore context engineering dimensions
+    strategy = GridSearchStrategy(variations={
+        "enable_memory": [True, False],
+        "compaction": [
+            CompactionConfig.head_tail(10, 40),
+            CompactionConfig.none(),
+        ],
+    })
+    return strategy.generate(base, budget)
+def _load_python_config(path: Path) -> tuple[list[Candidate], dict[str, Any]]:
+    """Load CANDIDATES and VARIATIONS from a Python file."""
     spec = importlib.util.spec_from_file_location("config_module", path)
     if spec is None or spec.loader is None:
         raise ValueError(f"Cannot load {path}")
     sys.modules["config_module"] = module
     spec.loader.exec_module(module)
+    candidates = getattr(module, "CANDIDATES", [])
     variations = getattr(module, "VARIATIONS", {})
+    return candidates, variations
 def _parse_vary_flag(vary: str) -> dict[str, Any]:
     """Parse --vary flag into variations dict."""
+    variations: dict[str, Any] = {}
     for param in vary.split(","):
         param = param.strip().lower()
         if param in ("compaction", "compact"):
+            variations["compaction"] = [
+                CompactionConfig.head_tail(10, 40),
+                CompactionConfig.none(),
+            ]
         elif param in ("memory", "mem"):
+            variations["enable_memory"] = [True, False]
         elif param in ("subagent", "sub"):
             variations["enable_sub_agent"] = [True, False]
         elif param in ("head", "head_size"):
+            variations["compaction"] = [
+                CompactionConfig.head_tail(h, 40) for h in [5, 10, 20]
+            ]
         elif param in ("tail", "tail_size"):
+            variations["compaction"] = [
+                CompactionConfig.head_tail(10, t) for t in [20, 40, 60]
+            ]
         else:
             console.print(f"[yellow]Warning:[/] Unknown vary param: {param}")

src/flow/experiments/__init__.py CHANGED Viewed

@@ -3,96 +3,59 @@
 """Experiments framework for running and evaluating Flow agent tasks.
 This package provides a structured way to:
-- Define tasks with evaluation criteria
 - Run agents on tasks and collect OpenTelemetry traces
 - Evaluate agent outputs using LLM, heuristic, or trace-based evaluators
 - Extract metrics from execution traces
-- Run ablation studies comparing different configurations
 Example usage:
-    from flow.harness.maf import MAFHarness
     from flow.experiments import (
-        FlowExperimentRunner,
         Task,
         EvalCriterion,
-        TraceEvaluator,
-        HeuristicEvaluator,
-        extract_metrics,
-        format_metrics_summary,
-        setup_tracing,
     )
-    # Setup tracing (call once at startup)
-    setup_tracing("my-experiment")
-    # Define a task
-    task = Task(
-        name="hello_world",
-        prompt="Write a Python function that prints 'Hello, World!'",
-        criteria=[
-            EvalCriterion(
-                name="correctness",
-                instruction="The function should print exactly 'Hello, World!'",
-            ),
-        ],
-    )
-    # Run the experiment
-    harness = MAFHarness()
-    runner = FlowExperimentRunner(keep_workspace=True)
-    result = await runner.run(harness, task)
-    # Extract metrics
-    metrics = extract_metrics(result.trace)
-    print(format_metrics_summary(metrics))
-    # Evaluate the result
-    evaluator = HeuristicEvaluator()
-    eval_result = await evaluator.evaluate(result)
-    print(f"Score: {eval_result.score}, Passed: {eval_result.passed}")
-    await harness.close()
-Ablation studies:
-    from flow.experiments import run_ablations, AblationConfig
-    configs = [
-        AblationConfig(name="baseline", enable_message_compaction=False),
-        AblationConfig(name="with_compaction", enable_message_compaction=True),
-    ]
-    results = await run_ablations(
-        configs,
-        task_prompt="Create a simple HTTP server",
-    )
 """
-# Types
-# Ablation
 from .ablation import (
-    AGENT_MEMORY_ONLY,
-    ALL_CONTEXT_ENGINEERING,
-    COMPACTION_ONLY,
-    # Context engineering configs
-    CONTEXT_ENG_BASELINE,
-    CONTEXT_ENGINEERING_CONFIGS,
-    ISOLATION_ONLY,
-    AblationConfig,
-    AblationResult,
-    # Shared utilities
     compute_pareto_frontier,
-    create_harness_from_config,
     generate_recommendation,
-    run_ablations,
-    run_context_engineering_comparison,
-    run_single_ablation,
-)
-# Config export
-from .config_export import (
-    export_config,
-    export_optimization_configs,
-    load_config,
 )
 # Evaluators
@@ -116,11 +79,10 @@ from .metrics import (
 # Optimizer
 from .optimizer import (
-    ConfigSummary,
     FlowOptimizer,
     OptimizationResult,
     TaskResult,
-    generate_grid_configs,
     load_tasks_from_jsonl,
 )
@@ -142,6 +104,16 @@ from .trace_collector import FlowTraceCollector
 from .types import CriterionResult, EvalCriterion, EvalResult, RunResult, Task
 __all__ = [  # noqa: RUF022  # Intentionally grouped by category
     # Types
     "Task",
     "EvalCriterion",
@@ -173,32 +145,16 @@ __all__ = [  # noqa: RUF022  # Intentionally grouped by category
     "print_metrics_summary",
     "print_comparison_table",
     "print_eval_result",
-    # Ablation
-    "AblationConfig",
-    "AblationResult",
-    "run_ablations",
-    "run_single_ablation",
-    "create_harness_from_config",
-    # Context engineering configs
-    "CONTEXT_ENG_BASELINE",
-    "COMPACTION_ONLY",
-    "AGENT_MEMORY_ONLY",
-    "ISOLATION_ONLY",
-    "ALL_CONTEXT_ENGINEERING",
-    "CONTEXT_ENGINEERING_CONFIGS",
-    "run_context_engineering_comparison",
-    # Shared utilities
     "compute_pareto_frontier",
     "generate_recommendation",
     # Optimizer
     "FlowOptimizer",
     "OptimizationResult",
-    "ConfigSummary",
     "TaskResult",
-    "generate_grid_configs",
     "load_tasks_from_jsonl",
-    # Config export
-    "export_config",
-    "load_config",
-    "export_optimization_configs",
 ]

 """Experiments framework for running and evaluating Flow agent tasks.
 This package provides a structured way to:
+- Define agents with the Agent dataclass
+- Generate candidate variants via CandidateStrategy implementations
 - Run agents on tasks and collect OpenTelemetry traces
 - Evaluate agent outputs using LLM, heuristic, or trace-based evaluators
 - Extract metrics from execution traces
+- Run optimization studies comparing different candidates
 Example usage:
     from flow.experiments import (
+        Agent,
+        Candidate,
+        GridSearchStrategy,
+        FlowOptimizer,
         Task,
         EvalCriterion,
     )
+    # Define a base agent
+    base = Agent(name="my_agent", enable_memory=True)
+    # Generate candidates
+    strategy = GridSearchStrategy(variations={
+        "enable_memory": [True, False],
+    })
+    candidates = strategy.generate(base, budget=10)
+    # Run optimization
+    optimizer = FlowOptimizer(parallel=4)
+    tasks = [Task(name="test", prompt="Create hello world")]
+    result = await optimizer.optimize(candidates, tasks)
+    print(f"Best: {result.rank_by_score[0]}")
 """
+# Core models
+from .models import (
+    Agent,
+    Candidate,
+    CandidateStrategy,
+    CompactionConfig,
+    ExperimentResult,
+    GridSearchStrategy,
+    export_agent,
+    export_optimization_results,
+    load_agent,
+)
+# Experiment runner + Pareto analysis
 from .ablation import (
     compute_pareto_frontier,
+    create_harness_from_agent,
     generate_recommendation,
+    run_experiments,
+    run_single_experiment,
 )
 # Evaluators
 # Optimizer
 from .optimizer import (
+    CandidateSummary,
     FlowOptimizer,
     OptimizationResult,
     TaskResult,
     load_tasks_from_jsonl,
 )
 from .types import CriterionResult, EvalCriterion, EvalResult, RunResult, Task
 __all__ = [  # noqa: RUF022  # Intentionally grouped by category
+    # Core models
+    "Agent",
+    "Candidate",
+    "CandidateStrategy",
+    "CompactionConfig",
+    "ExperimentResult",
+    "GridSearchStrategy",
+    "export_agent",
+    "load_agent",
+    "export_optimization_results",
     # Types
     "Task",
     "EvalCriterion",
     "print_metrics_summary",
     "print_comparison_table",
     "print_eval_result",
+    # Experiment runner
+    "create_harness_from_agent",
+    "run_experiments",
+    "run_single_experiment",
     "compute_pareto_frontier",
     "generate_recommendation",
     # Optimizer
     "FlowOptimizer",
     "OptimizationResult",
+    "CandidateSummary",
     "TaskResult",
     "load_tasks_from_jsonl",
 ]

src/flow/experiments/ablation.py CHANGED Viewed

@@ -1,137 +1,91 @@
 # Copyright (c) Microsoft. All rights reserved.
-"""Ablation runner for comparing Flow agent configurations.
 This module provides:
-- AblationConfig: Dataclass for agent configuration parameters
 - Pareto analysis utilities for multi-objective optimization
-- Pre-defined configurations for context engineering strategies
-- Convenience functions for running ablation studies
 """
 from __future__ import annotations
 import json
 import logging
-from dataclasses import asdict, dataclass
 from datetime import datetime
 from pathlib import Path
-from typing import TYPE_CHECKING
 from .evaluators import HeuristicEvaluator
-from .metrics import TraceMetrics, extract_metrics, metrics_to_dict
 from .reporters import print_comparison_table, save_run_result
 from .runner import FlowExperimentRunner, setup_tracing
-from .types import EvalCriterion, RunResult, Task
 if TYPE_CHECKING:
     from flow.harness.maf import MAFHarness
-    from .optimizer import ConfigSummary
 logger = logging.getLogger(__name__)
-@dataclass
-class AblationConfig:
-    """Configuration for a single ablation run.
-    Each config represents a different agent configuration to test.
-    The name is used as an identifier in comparison results.
-    Attributes:
-        name: Unique identifier for this configuration
-        enable_message_compaction: Whether to enable message compaction
-        enable_memory_tool: Whether to enable agent-managed memory
-        enable_sub_agent: Whether to enable sub-agent for isolated research
-        compaction_head_size: Number of initial messages to keep
-        compaction_tail_size: Number of recent messages to keep
-        bash_timeout: Timeout for bash commands in seconds
-    """
-    name: str
-    enable_message_compaction: bool = True
-    enable_memory_tool: bool = True
-    enable_sub_agent: bool = False
-    compaction_head_size: int = 10
-    compaction_tail_size: int = 40
-    bash_timeout: int = 120
-@dataclass
-class AblationResult:
-    """Result of a single ablation run.
-    Contains all data from the run including raw results,
-    extracted metrics, and evaluation scores.
-    """
-    config: AblationConfig
-    run_result: RunResult
-    metrics: TraceMetrics
-    eval_score: float
-    eval_passed: bool
-    eval_reasoning: str
-def create_harness_from_config(config: AblationConfig, workspace: Path) -> MAFHarness:
-    """Create a MAFHarness from an ablation config.
     Args:
-        config: The ablation configuration
         workspace: Working directory
     Returns:
         A configured MAFHarness
     """
     from flow.harness.maf import MAFHarness
     return MAFHarness(
         workspace=workspace,
         memory_path=workspace / "memory",
-        enable_compaction=config.enable_message_compaction,
-        enable_memory_tool=config.enable_memory_tool,
-        enable_sub_agent=config.enable_sub_agent,
-        compaction_head_size=config.compaction_head_size,
-        compaction_tail_size=config.compaction_tail_size,
-        bash_timeout=config.bash_timeout,
     )
-async def run_single_ablation(
-    config: AblationConfig,
     task: Task,
     workspace: Path,
-) -> AblationResult:
-    """Run a single ablation with trace capture and evaluation.
     Args:
-        config: The ablation configuration
         task: The task to run
         workspace: Working directory
     Returns:
-        AblationResult with metrics and evaluation
     """
-    # Create harness from config
-    harness = create_harness_from_config(config, workspace)
     try:
-        # Create runner
         runner = FlowExperimentRunner(keep_workspace=True)
-        # Run the experiment
         run_result = await runner.run(harness, task, workspace=workspace)
-        # Extract metrics
         metrics = extract_metrics(run_result.trace)
-        # Evaluate the result
         evaluator = HeuristicEvaluator()
         eval_result = await evaluator.evaluate(run_result)
-        return AblationResult(
-            config=config,
             run_result=run_result,
             metrics=metrics,
             eval_score=eval_result.score,
@@ -142,26 +96,20 @@ async def run_single_ablation(
         await harness.close()
-def save_ablation_result(result: AblationResult, output_dir: Path) -> None:
-    """Save ablation result to files.
-    Creates a subdirectory for the config with all result files.
-    Args:
-        result: The ablation result to save
-        output_dir: Base directory for output
-    """
-    config_dir = output_dir / result.config.name
     save_run_result(
         result.run_result,
         config_dir,
         metrics=result.metrics,
     )
-    # Save ablation-specific data
-    with open(config_dir / "ablation.json", "w") as f:
         json.dump({
-            "config": asdict(result.config),
             "evaluation": {
                 "score": result.eval_score,
                 "passed": result.eval_passed,
@@ -170,37 +118,29 @@ def save_ablation_result(result: AblationResult, output_dir: Path) -> None:
         }, f, indent=2)
-async def run_ablations(
-    configs: list[AblationConfig],
     task_prompt: str,
     output_dir: Path | None = None,
-    task_name: str = "ablation_task",
-) -> list[AblationResult]:
-    """Run multiple ablation configurations and compare.
-    This function:
-    1. Sets up tracing
-    2. Runs each configuration on the same task
-    3. Collects metrics and evaluation scores
-    4. Saves results and prints comparison
     Args:
-        configs: List of configurations to test
         task_prompt: The task prompt to run
-        output_dir: Base directory for output (default: ~/.flow/ablations)
-        task_name: Name for the task (used in file paths)
     Returns:
-        List of ablation results
     """
-    # Setup output directory
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
     if output_dir is None:
-        output_dir = Path.home() / ".flow" / "ablations"
     output_dir = output_dir / timestamp
     output_dir.mkdir(parents=True, exist_ok=True)
-    # Create task
     task = Task(
         name=task_name,
         prompt=task_prompt,
@@ -212,52 +152,47 @@ async def run_ablations(
         ],
     )
-    # Save configs
     with open(output_dir / "config.json", "w") as f:  # noqa: ASYNC230
         json.dump({
             "task": task_prompt,
             "timestamp": timestamp,
-            "configs": [asdict(c) for c in configs],
         }, f, indent=2)
     print("=" * 80)
-    print(" FLOW ABLATION RUNNER")
     print("=" * 80)
-    print(f" Task:    {task_prompt[:60]}{'...' if len(task_prompt) > 60 else ''}")
-    print(f" Configs: {len(configs)}")
-    print(f" Output:  {output_dir}")
     print("=" * 80)
-    # Setup tracing once
-    setup_tracing("flow-ablation")
-    results = []
-    for i, config in enumerate(configs, 1):
-        print(f"\n[{i}/{len(configs)}] Running: {config.name}")
         print("-" * 40)
-        # Each config gets its own workspace
-        workspace = output_dir / config.name / "workspace"
         workspace.mkdir(parents=True, exist_ok=True)
-        result = await run_single_ablation(
-            config=config,
             task=task,
             workspace=workspace,
         )
         results.append(result)
-        save_ablation_result(result, output_dir)
-        # Quick status
         status = "OK" if result.run_result.success else "FAIL"
         print(f"  {status} | {result.run_result.duration_seconds:.1f}s | "
               f"Tokens: {result.metrics.total_tokens} | Tools: {result.metrics.tool_call_count}")
-    # Save comparison
     comparison_data = [
         {
-            "name": r.config.name,
             "success": r.run_result.success,
             "duration_seconds": r.run_result.duration_seconds,
             "metrics": metrics_to_dict(r.metrics),
@@ -272,152 +207,48 @@ async def run_ablations(
     with open(output_dir / "comparison.json", "w") as f:  # noqa: ASYNC230
         json.dump({"task": task_prompt, "results": comparison_data}, f, indent=2)
-    # Print comparison
-    print_comparison_table(comparison_data, "Ablation Comparison")
     print(f"\nResults saved to: {output_dir}")
     return results
 # =============================================================================
-# Context Engineering Baseline Configurations
-# =============================================================================
-# These configurations demonstrate the three main context engineering strategies:
-# 1. Compaction - Reactive trimming via message stores
-# 2. Agent-Managed Memory - Agent controls when to write/read/delete
-# 3. Isolation - Sub-agent architecture prevents context pollution
-# Baseline: No context engineering (for comparison)
-CONTEXT_ENG_BASELINE = AblationConfig(
-    name="no_context_engineering",
-    enable_message_compaction=False,
-    enable_memory_tool=False,
-    enable_sub_agent=False,
-)
-# Strategy 1: Compaction via Message Stores
-# Uses HeadTailCompactingMessageStore to keep first N + last M messages
-# Good for: Long-running sessions where middle context is less important
-COMPACTION_ONLY = AblationConfig(
-    name="compaction_only",
-    enable_message_compaction=True,
-    enable_memory_tool=False,
-    enable_sub_agent=False,
-    compaction_head_size=10,  # Keep task context
-    compaction_tail_size=40,  # Keep recent work
-)
-# Strategy 2: Agent-Managed Memory
-# Agent decides when to save/retrieve information from persistent storage
-# Good for: Cross-session memory, learning patterns, storing decisions
-AGENT_MEMORY_ONLY = AblationConfig(
-    name="agent_memory_only",
-    enable_message_compaction=False,
-    enable_memory_tool=True,
-    enable_sub_agent=False,
-)
-# Strategy 3: Isolation via Sub-Agent
-# Delegate heavy research to sub-agent with isolated context
-# Good for: Complex research tasks that would pollute main context
-ISOLATION_ONLY = AblationConfig(
-    name="isolation_only",
-    enable_message_compaction=False,
-    enable_memory_tool=False,
-    enable_sub_agent=True,
-)
-# Combined: All context engineering strategies
-# Uses compaction + memory + isolation together
-# Good for: Production systems with long-running, complex tasks
-ALL_CONTEXT_ENGINEERING = AblationConfig(
-    name="all_context_engineering",
-    enable_message_compaction=True,
-    enable_memory_tool=True,
-    enable_sub_agent=True,
-    compaction_head_size=10,
-    compaction_tail_size=40,
-)
-# Predefined list for running context engineering comparison
-CONTEXT_ENGINEERING_CONFIGS = [
-    CONTEXT_ENG_BASELINE,
-    COMPACTION_ONLY,
-    AGENT_MEMORY_ONLY,
-    ISOLATION_ONLY,
-    ALL_CONTEXT_ENGINEERING,
-]
-async def run_context_engineering_comparison(
-    task_prompt: str,
-    output_dir: Path | None = None,
-) -> list[AblationResult]:
-    """Run a comparison of all context engineering strategies.
-    This is a convenience function that runs all context engineering
-    baseline configurations against a single task for comparison.
-    Args:
-        task_prompt: The task to run (should benefit from context management)
-        output_dir: Optional output directory for results
-    Returns:
-        List of AblationResult for each strategy
-    Example:
-        >>> results = await run_context_engineering_comparison(
-        ...     "Research the authentication patterns in this codebase and "
-        ...     "create a summary document with recommendations."
-        ... )
-    """
-    return await run_ablations(
-        configs=CONTEXT_ENGINEERING_CONFIGS,
-        task_prompt=task_prompt,
-        output_dir=output_dir,
-        task_name="context_engineering_comparison",
-    )
-# =============================================================================
-# Shared Utilities for Pareto Analysis
 # =============================================================================
 def compute_pareto_frontier(
-    summaries: list[ConfigSummary],
     score_key: str = "avg_score",
     cost_key: str = "avg_tokens",
 ) -> list[str]:
     """Compute Pareto frontier for multi-objective optimization.
-    Identifies configurations that are not dominated by any other configuration.
-    A config is dominated if another config has better score AND lower tokens.
     Args:
-        summaries: List of ConfigSummary objects (or dicts with score/token keys)
         score_key: Attribute name for the score metric (higher is better)
         cost_key: Attribute name for the cost metric (lower is better)
     Returns:
         List of names of Pareto-optimal configurations
     """
-    # Sort by cost (ascending)
-    def get_val(s: object, key: str) -> float:
         if isinstance(s, dict):
             return float(s.get(key, 0))
         return float(getattr(s, key, 0))
-    def get_name(s: object) -> str:
         if isinstance(s, dict):
             return str(s.get("name", ""))
         return str(getattr(s, "name", ""))
     sorted_summaries = sorted(summaries, key=lambda s: get_val(s, cost_key))
-    pareto_names = []
     best_score = -1.0
     for summary in sorted_summaries:
@@ -430,40 +261,37 @@ def compute_pareto_frontier(
 def generate_recommendation(
-    summaries: list[ConfigSummary],
     pareto_names: list[str],
     min_score: float = 0.7,
 ) -> tuple[str | None, str]:
     """Generate a recommendation based on Pareto analysis.
     Args:
-        summaries: List of ConfigSummary objects
-        pareto_names: Names of Pareto-optimal configs
         min_score: Minimum acceptable score threshold
     Returns:
-        Tuple of (recommended_config_name, recommendation_text)
     """
-    def get_val(s: object, key: str) -> float:
         if isinstance(s, dict):
             return float(s.get(key, 0))
         return float(getattr(s, key, 0))
-    def get_name(s: object) -> str:
         if isinstance(s, dict):
             return str(s.get("name", ""))
         return str(getattr(s, "name", ""))
-    # Filter to acceptable configs
     acceptable = [s for s in summaries if get_val(s, "avg_score") >= min_score]
     if not acceptable:
         return None, "No configuration met the minimum score threshold."
-    # Prefer Pareto-optimal configs
     pareto_acceptable = [s for s in acceptable if get_name(s) in pareto_names]
     candidates = pareto_acceptable if pareto_acceptable else acceptable
-    # Pick the one with lowest tokens among candidates
     best = min(candidates, key=lambda s: get_val(s, "avg_tokens"))
     name = get_name(best)
     tokens = get_val(best, "avg_tokens")

 # Copyright (c) Microsoft. All rights reserved.
+"""Experiment runner for comparing agent configurations.
 This module provides:
+- Functions for running experiments with Agent/Candidate models
 - Pareto analysis utilities for multi-objective optimization
+- Convenience functions for running optimization studies
 """
 from __future__ import annotations
 import json
 import logging
+from dataclasses import asdict
 from datetime import datetime
 from pathlib import Path
+from typing import TYPE_CHECKING, Any
 from .evaluators import HeuristicEvaluator
+from .metrics import extract_metrics, metrics_to_dict
+from .models import Agent, Candidate, ExperimentResult
 from .reporters import print_comparison_table, save_run_result
 from .runner import FlowExperimentRunner, setup_tracing
+from .types import EvalCriterion, Task
 if TYPE_CHECKING:
     from flow.harness.maf import MAFHarness
+    from .optimizer import CandidateSummary
 logger = logging.getLogger(__name__)
+def create_harness_from_agent(agent: Agent, workspace: Path) -> MAFHarness:
+    """Create a MAFHarness from an Agent definition.
     Args:
+        agent: The agent definition
         workspace: Working directory
     Returns:
         A configured MAFHarness
     """
+    from flow.experiments.models import resolve_tools
     from flow.harness.maf import MAFHarness
+    # Resolve tools to dict form
+    tools_spec = resolve_tools(agent.tools)
     return MAFHarness(
         workspace=workspace,
         memory_path=workspace / "memory",
+        enable_compaction=agent.compaction.enabled,
+        compaction_head_size=agent.compaction.head_size,
+        compaction_tail_size=agent.compaction.tail_size,
+        tools=tools_spec,
+        instructions=agent.instructions,
     )
+async def run_single_experiment(
+    candidate: Candidate,
     task: Task,
     workspace: Path,
+) -> ExperimentResult:
+    """Run a single experiment with trace capture and evaluation.
     Args:
+        candidate: The candidate to test
         task: The task to run
         workspace: Working directory
     Returns:
+        ExperimentResult with metrics and evaluation
     """
+    harness = create_harness_from_agent(candidate.agent, workspace)
     try:
         runner = FlowExperimentRunner(keep_workspace=True)
         run_result = await runner.run(harness, task, workspace=workspace)
         metrics = extract_metrics(run_result.trace)
         evaluator = HeuristicEvaluator()
         eval_result = await evaluator.evaluate(run_result)
+        return ExperimentResult(
+            candidate=candidate,
             run_result=run_result,
             metrics=metrics,
             eval_score=eval_result.score,
         await harness.close()
+def save_experiment_result(result: ExperimentResult, output_dir: Path) -> None:
+    """Save experiment result to files."""
+    config_dir = output_dir / result.candidate.agent.name
     save_run_result(
         result.run_result,
         config_dir,
         metrics=result.metrics,
     )
+    with open(config_dir / "experiment.json", "w") as f:
         json.dump({
+            "agent": asdict(result.candidate.agent),
+            "mutations": result.candidate.mutations,
+            "rationale": result.candidate.rationale,
             "evaluation": {
                 "score": result.eval_score,
                 "passed": result.eval_passed,
         }, f, indent=2)
+async def run_experiments(
+    candidates: list[Candidate],
     task_prompt: str,
     output_dir: Path | None = None,
+    task_name: str = "experiment_task",
+) -> list[ExperimentResult]:
+    """Run multiple candidates and compare.
     Args:
+        candidates: List of candidates to test
         task_prompt: The task prompt to run
+        output_dir: Base directory for output (default: ~/.flow/experiments)
+        task_name: Name for the task
     Returns:
+        List of experiment results
     """
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
     if output_dir is None:
+        output_dir = Path.home() / ".flow" / "experiments"
     output_dir = output_dir / timestamp
     output_dir.mkdir(parents=True, exist_ok=True)
     task = Task(
         name=task_name,
         prompt=task_prompt,
         ],
     )
     with open(output_dir / "config.json", "w") as f:  # noqa: ASYNC230
         json.dump({
             "task": task_prompt,
             "timestamp": timestamp,
+            "candidates": [asdict(c) for c in candidates],
         }, f, indent=2)
     print("=" * 80)
+    print(" FLOW EXPERIMENT RUNNER")
     print("=" * 80)
+    print(f" Task:       {task_prompt[:60]}{'...' if len(task_prompt) > 60 else ''}")
+    print(f" Candidates: {len(candidates)}")
+    print(f" Output:     {output_dir}")
     print("=" * 80)
+    setup_tracing("flow-experiment")
+    results: list[ExperimentResult] = []
+    for i, candidate in enumerate(candidates, 1):
+        print(f"\n[{i}/{len(candidates)}] Running: {candidate.agent.name}")
         print("-" * 40)
+        workspace = output_dir / candidate.agent.name / "workspace"
         workspace.mkdir(parents=True, exist_ok=True)
+        result = await run_single_experiment(
+            candidate=candidate,
             task=task,
             workspace=workspace,
         )
         results.append(result)
+        save_experiment_result(result, output_dir)
         status = "OK" if result.run_result.success else "FAIL"
         print(f"  {status} | {result.run_result.duration_seconds:.1f}s | "
               f"Tokens: {result.metrics.total_tokens} | Tools: {result.metrics.tool_call_count}")
     comparison_data = [
         {
+            "name": r.candidate.agent.name,
             "success": r.run_result.success,
             "duration_seconds": r.run_result.duration_seconds,
             "metrics": metrics_to_dict(r.metrics),
     with open(output_dir / "comparison.json", "w") as f:  # noqa: ASYNC230
         json.dump({"task": task_prompt, "results": comparison_data}, f, indent=2)
+    print_comparison_table(comparison_data, "Experiment Comparison")
     print(f"\nResults saved to: {output_dir}")
     return results
 # =============================================================================
+# Pareto Analysis Utilities
 # =============================================================================
 def compute_pareto_frontier(
+    summaries: list[CandidateSummary],
     score_key: str = "avg_score",
     cost_key: str = "avg_tokens",
 ) -> list[str]:
     """Compute Pareto frontier for multi-objective optimization.
+    Identifies configurations that are not dominated by any other.
+    A config is dominated if another has better score AND lower tokens.
     Args:
+        summaries: List of CandidateSummary objects (or dicts)
         score_key: Attribute name for the score metric (higher is better)
         cost_key: Attribute name for the cost metric (lower is better)
     Returns:
         List of names of Pareto-optimal configurations
     """
+    def get_val(s: CandidateSummary | dict[str, Any], key: str) -> float:
         if isinstance(s, dict):
             return float(s.get(key, 0))
         return float(getattr(s, key, 0))
+    def get_name(s: CandidateSummary | dict[str, Any]) -> str:
         if isinstance(s, dict):
             return str(s.get("name", ""))
         return str(getattr(s, "name", ""))
     sorted_summaries = sorted(summaries, key=lambda s: get_val(s, cost_key))
+    pareto_names: list[str] = []
     best_score = -1.0
     for summary in sorted_summaries:
 def generate_recommendation(
+    summaries: list[CandidateSummary],
     pareto_names: list[str],
     min_score: float = 0.7,
 ) -> tuple[str | None, str]:
     """Generate a recommendation based on Pareto analysis.
     Args:
+        summaries: List of CandidateSummary objects
+        pareto_names: Names of Pareto-optimal candidates
         min_score: Minimum acceptable score threshold
     Returns:
+        Tuple of (recommended_name, recommendation_text)
     """
+    def get_val(s: CandidateSummary | dict[str, Any], key: str) -> float:
         if isinstance(s, dict):
             return float(s.get(key, 0))
         return float(getattr(s, key, 0))
+    def get_name(s: CandidateSummary | dict[str, Any]) -> str:
         if isinstance(s, dict):
             return str(s.get("name", ""))
         return str(getattr(s, "name", ""))
     acceptable = [s for s in summaries if get_val(s, "avg_score") >= min_score]
     if not acceptable:
         return None, "No configuration met the minimum score threshold."
     pareto_acceptable = [s for s in acceptable if get_name(s) in pareto_names]
     candidates = pareto_acceptable if pareto_acceptable else acceptable
     best = min(candidates, key=lambda s: get_val(s, "avg_tokens"))
     name = get_name(best)
     tokens = get_val(best, "avg_tokens")

src/flow/experiments/config_export.py DELETED Viewed

@@ -1,184 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-"""Config export/import utilities for optimizer results.
-Exports winning configurations as YAML files that can be loaded
-and used directly with `flow run --config <path>`.
-"""
-from __future__ import annotations
-from dataclasses import asdict
-from pathlib import Path
-from typing import Any
-import yaml
-from .ablation import AblationConfig
-def export_config(
-    config: AblationConfig,
-    metrics: dict[str, Any],
-    path: Path,
-) -> None:
-    """Export an AblationConfig as a reusable YAML file.
-    The exported YAML includes:
-    - All config parameters (directly loadable)
-    - Optimization metadata prefixed with _ (ignored when loading)
-    Args:
-        config: The AblationConfig to export
-        metrics: Optimization metrics (score, tokens, etc.)
-        path: Path to write the YAML file
-    Example output:
-        name: compaction_head10_tail40
-        enable_message_compaction: true
-        compaction_head_size: 10
-        ...
-        _optimization:
-          timestamp: "2026-01-26T14:30:22"
-          avg_score: 0.89
-          avg_tokens: 12400
-    """
-    data = asdict(config)
-    data["_optimization"] = metrics
-    path.parent.mkdir(parents=True, exist_ok=True)
-    path.write_text(yaml.dump(data, default_flow_style=False, sort_keys=False))
-def load_config(path: Path) -> AblationConfig:
-    """Load an AblationConfig from a YAML file.
-    Ignores any keys prefixed with _ (optimization metadata).
-    Args:
-        path: Path to the YAML config file
-    Returns:
-        AblationConfig instance
-    Raises:
-        FileNotFoundError: If the config file doesn't exist
-        ValueError: If the config is invalid
-    """
-    if not path.exists():
-        raise FileNotFoundError(f"Config file not found: {path}")
-    data = yaml.safe_load(path.read_text())
-    # Filter out metadata keys (prefixed with _)
-    config_data = {k: v for k, v in data.items() if not k.startswith("_")}
-    try:
-        return AblationConfig(**config_data)
-    except TypeError as e:
-        raise ValueError(f"Invalid config file {path}: {e}") from e
-def export_optimization_configs(
-    summaries: list[dict[str, Any]],
-    pareto_names: list[str],
-    output_dir: Path,
-    timestamp: str,
-) -> dict[str, Path]:
-    """Export all notable configs from an optimization run.
-    Exports:
-    - best_score.yaml: Highest quality config
-    - best_cost.yaml: Lowest token usage config
-    - best_efficiency.yaml: Best score/token ratio
-    - pareto/<name>.yaml: All Pareto-optimal configs
-    Args:
-        summaries: List of ConfigSummary dicts with metrics
-        pareto_names: Names of Pareto-optimal configs
-        output_dir: Directory to write configs
-        timestamp: Optimization timestamp for metadata
-    Returns:
-        Dict mapping config type to file path
-    """
-    configs_dir = output_dir / "configs"
-    configs_dir.mkdir(parents=True, exist_ok=True)
-    exported: dict[str, Path] = {}
-    if not summaries:
-        return exported
-    # Find best by different criteria
-    best_score = max(summaries, key=lambda s: s.get("avg_score", 0))
-    best_cost = min(summaries, key=lambda s: s.get("avg_tokens", float("inf")))
-    best_efficiency = max(
-        summaries,
-        key=lambda s: s.get("avg_score", 0) / max(s.get("avg_tokens", 1), 1),
-    )
-    # Export best configs
-    for label, summary in [
-        ("best_score", best_score),
-        ("best_cost", best_cost),
-        ("best_efficiency", best_efficiency),
-    ]:
-        config = _summary_to_config(summary)
-        metrics = _extract_metrics(summary, timestamp, label)
-        path = configs_dir / f"{label}.yaml"
-        export_config(config, metrics, path)
-        exported[label] = path
-    # Export Pareto-optimal configs
-    pareto_dir = configs_dir / "pareto"
-    pareto_dir.mkdir(exist_ok=True)
-    for summary in summaries:
-        name = summary.get("name", "unknown")
-        if name in pareto_names:
-            config = _summary_to_config(summary)
-            metrics = _extract_metrics(summary, timestamp, "pareto")
-            metrics["is_pareto_optimal"] = True
-            path = pareto_dir / f"{name}.yaml"
-            export_config(config, metrics, path)
-            exported[f"pareto/{name}"] = path
-    return exported
-def _summary_to_config(summary: dict[str, Any]) -> AblationConfig:
-    """Convert a summary dict back to an AblationConfig."""
-    # Extract config fields from summary
-    config_fields = {
-        "name": summary.get("name", "unknown"),
-        "enable_message_compaction": summary.get("enable_message_compaction", True),
-        "enable_memory_tool": summary.get("enable_memory_tool", True),
-        "enable_sub_agent": summary.get("enable_sub_agent", False),
-        "compaction_head_size": summary.get("compaction_head_size", 10),
-        "compaction_tail_size": summary.get("compaction_tail_size", 40),
-        "bash_timeout": summary.get("bash_timeout", 120),
-    }
-    # Also check nested config if present
-    if "config" in summary:
-        config_fields.update(summary["config"])
-    return AblationConfig(**config_fields)
-def _extract_metrics(
-    summary: dict[str, Any],
-    timestamp: str,
-    selection_reason: str,
-) -> dict[str, Any]:
-    """Extract optimization metrics from a summary."""
-    return {
-        "timestamp": timestamp,
-        "selection_reason": selection_reason,
-        "avg_score": summary.get("avg_score", 0),
-        "avg_tokens": summary.get("avg_tokens", 0),
-        "avg_duration": summary.get("avg_duration", 0),
-        "pass_rate": summary.get("pass_rate", 0),
-        "pareto_rank": summary.get("pareto_rank"),
-        "is_pareto_optimal": summary.get("is_pareto_optimal", False),
-    }

src/flow/experiments/models.py ADDED Viewed

	@@ -0,0 +1,517 @@

+# Copyright (c) Microsoft. All rights reserved.
+"""Core data models for the optimization framework.
+Defines:
+- CompactionConfig: Extensible compaction strategy configuration
+- Agent: Framework-agnostic agent definition (what the customer brings)
+- Candidate: A mutated agent variant produced by optimization
+- CandidateStrategy: Protocol for generating candidates from a base agent
+- GridSearchStrategy: Brute-force grid search over parameter combinations
+- TOOL_PRESETS: Standard tool configurations for agents
+- resolve_tools: Normalize tool specification to dict form
+"""
+from __future__ import annotations
+from dataclasses import asdict, dataclass, field
+from itertools import product as itertools_product
+from pathlib import Path
+from typing import Any, Protocol, runtime_checkable
+import yaml
+# =============================================================================
+# Tool Configuration
+# =============================================================================
+# Tool presets define common tool configurations.
+# Each preset maps tool names to their configuration dicts.
+TOOL_PRESETS: dict[str, dict[str, dict[str, Any]]] = {
+    "full": {
+        "read_file": {},
+        "write_file": {},
+        "list_directory": {},
+        "grep_search": {},
+        "bash_execute": {"timeout": 120},
+        "check_processes": {},
+        "python_repl": {},
+        "think": {},
+        "task_done": {},
+        "memory": {},
+        "sub_agent": {"model": "gpt-4o-mini"},
+    },
+    "standard": {
+        "read_file": {},
+        "write_file": {},
+        "list_directory": {},
+        "grep_search": {},
+        "bash_execute": {"timeout": 120},
+        "check_processes": {},
+        "python_repl": {},
+        "think": {},
+        "task_done": {},
+        "memory": {},
+    },
+    "minimal": {
+        "read_file": {},
+        "write_file": {},
+        "bash_execute": {"timeout": 120},
+        "task_done": {},
+    },
+    "readonly": {
+        "read_file": {},
+        "list_directory": {},
+        "grep_search": {},
+        "think": {},
+        "task_done": {},
+    },
+}
+def resolve_tools(tools: str | list[str] | dict[str, dict[str, Any]]) -> dict[str, dict[str, Any]]:
+    """Normalize tool specification to dict form.
+    Accepts three input formats:
+    - str: Preset name (e.g., "standard", "minimal", "full", "readonly")
+    - list[str]: List of tool names with default configs
+    - dict[str, dict]: Full specification with per-tool configs
+    Args:
+        tools: Tool specification in any supported format
+    Returns:
+        Dict mapping tool names to their configuration dicts
+    Raises:
+        ValueError: If preset name is unknown
+    Example:
+        >>> resolve_tools("standard")
+        {"read_file": {}, "write_file": {}, ...}
+        >>> resolve_tools(["read_file", "bash_execute"])
+        {"read_file": {}, "bash_execute": {}}
+        >>> resolve_tools({"bash_execute": {"timeout": 60}})
+        {"bash_execute": {"timeout": 60}}
+    """
+    if isinstance(tools, str):
+        if tools not in TOOL_PRESETS:
+            raise ValueError(f"Unknown tool preset: {tools}. Available: {list(TOOL_PRESETS.keys())}")
+        # Return a copy to prevent mutation of the preset
+        return {k: dict(v) for k, v in TOOL_PRESETS[tools].items()}
+    elif isinstance(tools, list):
+        return {name: {} for name in tools}
+    else:
+        # Already a dict, return a copy
+        return {k: dict(v) for k, v in tools.items()}
+@dataclass
+class CompactionConfig:
+    """Extensible compaction strategy configuration.
+    Supports multiple strategies via a tagged-union pattern:
+    - "head_tail": Keep first N + last M messages (default)
+    - "last_n": Keep only the last N messages
+    - "none": No compaction
+    Future strategies (e.g., "summarize") can be added without
+    changing existing code.
+    Attributes:
+        strategy: The compaction strategy name
+        params: Strategy-specific parameters
+    """
+    strategy: str = "head_tail"
+    params: dict[str, Any] = field(default_factory=lambda: {"head_size": 10, "tail_size": 40})
+    @staticmethod
+    def head_tail(head_size: int = 10, tail_size: int = 40) -> CompactionConfig:
+        """Create a head+tail compaction config."""
+        return CompactionConfig(strategy="head_tail", params={"head_size": head_size, "tail_size": tail_size})
+    @staticmethod
+    def last_n(n: int = 50) -> CompactionConfig:
+        """Create a last-N compaction config."""
+        return CompactionConfig(strategy="last_n", params={"n": n})
+    @staticmethod
+    def none() -> CompactionConfig:
+        """Create a no-compaction config."""
+        return CompactionConfig(strategy="none", params={})
+    @property
+    def enabled(self) -> bool:
+        """Whether compaction is enabled."""
+        return self.strategy != "none"
+    @property
+    def head_size(self) -> int:
+        """Head size for head_tail strategy. Returns 0 for other strategies."""
+        return self.params.get("head_size", 0)
+    @property
+    def tail_size(self) -> int:
+        """Tail size for head_tail strategy. Returns 0 for other strategies."""
+        return self.params.get("tail_size", 0)
+@dataclass
+class Agent:
+    """Framework-agnostic agent definition.
+    This is what the customer brings to the optimization service.
+    It describes the agent's identity, model, tools, and context
+    engineering settings — everything needed to instantiate and
+    run the agent on any supported framework harness.
+    Attributes:
+        name: Unique identifier for this agent
+        description: Human-readable description
+        instructions: System prompt / instructions (optional, uses framework default if None)
+        model: Model deployment name (e.g., "gpt-4o")
+        compaction: Compaction strategy configuration
+        tools: Tool configuration - can be:
+            - str: Preset name ("standard", "minimal", "full", "readonly")
+            - list[str]: List of tool names with default configs
+            - dict[str, dict]: Full specification with per-tool configs
+    """
+    name: str
+    description: str = ""
+    instructions: str | None = None
+    model: str | None = None
+    compaction: CompactionConfig = field(default_factory=CompactionConfig)
+    tools: str | list[str] | dict[str, dict[str, Any]] = "standard"
+@dataclass
+class Candidate:
+    """A mutated agent variant produced by the optimization process.
+    Each candidate is derived from a base Agent with specific mutations
+    applied. The mutations dict records what was changed, and the
+    rationale explains why.
+    Attributes:
+        agent: The mutated agent configuration
+        mutations: Dict describing what was changed from the base
+        rationale: Human-readable explanation of why this candidate exists
+    """
+    agent: Agent
+    mutations: dict[str, Any] = field(default_factory=dict)
+    rationale: str = ""
+@dataclass
+class ExperimentResult:
+    """Result of running a single experiment (one candidate on one task)."""
+    candidate: Candidate
+    run_result: Any  # RunResult from types.py
+    metrics: Any  # TraceMetrics from metrics.py
+    eval_score: float = 0.0
+    eval_passed: bool = False
+    eval_reasoning: str = ""
+@runtime_checkable
+class CandidateStrategy(Protocol):
+    """Protocol for generating candidate variants from a base agent.
+    Implementations explore different regions of the optimization space:
+    - GridSearchStrategy: Exhaustive grid over parameter combinations
+    - (Future) HeuristicStrategy: Rule-based mutations from telemetry
+    - (Future) BayesianStrategy: Bayesian optimization over parameters
+    """
+    def generate(self, base: Agent, budget: int) -> list[Candidate]:
+        """Generate candidate variants from a base agent.
+        Args:
+            base: The base agent to mutate
+            budget: Maximum number of candidates to generate
+        Returns:
+            List of Candidate objects (at most `budget` items)
+        """
+        ...
+class GridSearchStrategy:
+    """Brute-force grid search over parameter combinations.
+    Generates candidates by taking the Cartesian product of all
+    specified parameter variations.
+    Example:
+        strategy = GridSearchStrategy(variations={
+            "tools": ["standard", "minimal", "full"],
+            "compaction": [
+                CompactionConfig.head_tail(10, 40),
+                CompactionConfig.head_tail(5, 20),
+                CompactionConfig.none(),
+            ],
+        })
+        candidates = strategy.generate(base_agent, budget=20)
+    """
+    def __init__(self, variations: dict[str, list[Any]]) -> None:
+        """Initialize with parameter variations.
+        Args:
+            variations: Dict mapping Agent field names to lists of values to try.
+                        Special keys:
+                        - "compaction": Accepts CompactionConfig objects
+                        - "tools": Accepts preset strings, lists, or dicts
+        """
+        self.variations = variations
+    def generate(self, base: Agent, budget: int) -> list[Candidate]:
+        """Generate all grid combinations up to budget."""
+        if not self.variations:
+            return [Candidate(agent=base, mutations={}, rationale="baseline")]
+        param_names = list(self.variations.keys())
+        param_values = list(self.variations.values())
+        candidates = []
+        for values in itertools_product(*param_values):
+            if len(candidates) >= budget:
+                break
+            mutations = dict(zip(param_names, values, strict=True))
+            # Build mutated agent
+            agent_dict = asdict(base)
+            for key, value in mutations.items():
+                if key == "compaction" and isinstance(value, CompactionConfig):
+                    agent_dict["compaction"] = asdict(value)
+                elif key in agent_dict:
+                    agent_dict[key] = value
+            # Reconstruct CompactionConfig from dict
+            comp_data = agent_dict.pop("compaction")
+            if isinstance(comp_data, dict):
+                compaction = CompactionConfig(**comp_data)
+            else:
+                compaction = comp_data
+            # Handle tools field - keep as-is (str, list, or dict)
+            tools = agent_dict.pop("tools", "standard")
+            mutated = Agent(
+                **{k: v for k, v in agent_dict.items() if k not in ("compaction", "tools")},
+                compaction=compaction,
+                tools=tools,
+            )
+            # Build name from mutations
+            name_parts = []
+            for k, v in mutations.items():
+                if isinstance(v, CompactionConfig):
+                    name_parts.append(f"{v.strategy}")
+                    if v.strategy == "head_tail":
+                        name_parts.append(f"h{v.head_size}_t{v.tail_size}")
+                elif k == "tools":
+                    # Format tools for name
+                    if isinstance(v, str):
+                        name_parts.append(f"tools={v}")
+                    elif isinstance(v, list):
+                        name_parts.append(f"tools=[{len(v)}]")
+                    else:
+                        name_parts.append(f"tools=[{len(v)}]")
+                elif isinstance(v, bool):
+                    name_parts.append(f"{k}={'on' if v else 'off'}")
+                else:
+                    name_parts.append(f"{k}={v}")
+            mutated.name = f"{base.name}_{'_'.join(name_parts)}"
+            # Serialize mutations for storage (convert non-serializable types)
+            serializable_mutations = {}
+            for k, v in mutations.items():
+                if isinstance(v, CompactionConfig):
+                    serializable_mutations[k] = asdict(v)
+                else:
+                    serializable_mutations[k] = v
+            candidates.append(Candidate(
+                agent=mutated,
+                mutations=serializable_mutations,
+                rationale=f"Grid search: {', '.join(name_parts)}",
+            ))
+        return candidates
+# =============================================================================
+# Agent YAML Export / Import
+# =============================================================================
+def export_agent(
+    agent: Agent,
+    path: Path,
+    metrics: dict[str, Any] | None = None,
+) -> None:
+    """Export an Agent as a reusable YAML file.
+    Args:
+        agent: The Agent to export
+        path: Path to write the YAML file
+        metrics: Optional optimization metrics (stored under _optimization key)
+    """
+    data = asdict(agent)
+    if metrics:
+        data["_optimization"] = metrics
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(yaml.dump(data, default_flow_style=False, sort_keys=False))
+def load_agent(path: Path) -> Agent:
+    """Load an Agent from a YAML file.
+    Ignores any keys prefixed with _ (optimization metadata).
+    Args:
+        path: Path to the YAML config file
+    Returns:
+        Agent instance
+    Raises:
+        FileNotFoundError: If the file doesn't exist
+        ValueError: If the config is invalid
+    """
+    if not path.exists():
+        raise FileNotFoundError(f"Agent config file not found: {path}")
+    data = yaml.safe_load(path.read_text())
+    config_data = {k: v for k, v in data.items() if not k.startswith("_")}
+    # Reconstruct CompactionConfig from nested dict
+    if "compaction" in config_data and isinstance(config_data["compaction"], dict):
+        config_data["compaction"] = CompactionConfig(**config_data["compaction"])
+    try:
+        return Agent(**config_data)
+    except TypeError as e:
+        raise ValueError(f"Invalid agent config file {path}: {e}") from e
+def export_optimization_results(
+    summaries: list[dict[str, Any]],
+    pareto_names: list[str],
+    output_dir: Path,
+    timestamp: str,
+) -> dict[str, Path]:
+    """Export notable agents from an optimization run as YAML files.
+    Exports:
+    - best_score.yaml: Highest quality agent
+    - best_cost.yaml: Lowest token usage agent
+    - best_efficiency.yaml: Best score/token ratio
+    - pareto/<name>.yaml: All Pareto-optimal agents
+    Args:
+        summaries: List of summary dicts with metrics
+        pareto_names: Names of Pareto-optimal agents
+        output_dir: Directory to write agent files
+        timestamp: Optimization timestamp for metadata
+    Returns:
+        Dict mapping label to file path
+    """
+    configs_dir = output_dir / "agents"
+    configs_dir.mkdir(parents=True, exist_ok=True)
+    exported: dict[str, Path] = {}
+    if not summaries:
+        return exported
+    best_score = max(summaries, key=lambda s: s.get("avg_score", 0))
+    best_cost = min(summaries, key=lambda s: s.get("avg_tokens", float("inf")))
+    best_efficiency = max(
+        summaries,
+        key=lambda s: s.get("avg_score", 0) / max(s.get("avg_tokens", 1), 1),
+    )
+    for label, summary in [
+        ("best_score", best_score),
+        ("best_cost", best_cost),
+        ("best_efficiency", best_efficiency),
+    ]:
+        agent = _summary_to_agent(summary)
+        metrics = _extract_metrics(summary, timestamp, label)
+        path = configs_dir / f"{label}.yaml"
+        export_agent(agent, path, metrics)
+        exported[label] = path
+    # Export Pareto-optimal agents
+    pareto_dir = configs_dir / "pareto"
+    pareto_dir.mkdir(exist_ok=True)
+    for summary in summaries:
+        name = summary.get("name", "unknown")
+        if name in pareto_names:
+            agent = _summary_to_agent(summary)
+            metrics = _extract_metrics(summary, timestamp, "pareto")
+            metrics["is_pareto_optimal"] = True
+            path = pareto_dir / f"{name}.yaml"
+            export_agent(agent, path, metrics)
+            exported[f"pareto/{name}"] = path
+    return exported
+def _summary_to_agent(summary: dict[str, Any]) -> Agent:
+    """Convert a summary dict back to an Agent."""
+    agent_data = summary.get("agent", {})
+    if agent_data:
+        # Reconstruct from nested agent dict
+        if "compaction" in agent_data and isinstance(agent_data["compaction"], dict):
+            agent_data["compaction"] = CompactionConfig(**agent_data["compaction"])
+        # tools field can be str, list, or dict - all are valid, keep as-is
+        return Agent(**agent_data)
+    # Fallback: build from flat summary fields (legacy format)
+    compaction = CompactionConfig.head_tail(
+        head_size=summary.get("compaction_head_size", 10),
+        tail_size=summary.get("compaction_tail_size", 40),
+    ) if summary.get("enable_message_compaction", True) else CompactionConfig.none()
+    # Determine tools from legacy fields if present
+    tools: str | list[str] | dict[str, dict[str, Any]] = "standard"
+    if "tools" in summary:
+        tools = summary["tools"]
+    return Agent(
+        name=summary.get("name", "unknown"),
+        compaction=compaction,
+        tools=tools,
+    )
+def _extract_metrics(
+    summary: dict[str, Any],
+    timestamp: str,
+    selection_reason: str,
+) -> dict[str, Any]:
+    """Extract optimization metrics from a summary."""
+    return {
+        "timestamp": timestamp,
+        "selection_reason": selection_reason,
+        "avg_score": summary.get("avg_score", 0),
+        "avg_tokens": summary.get("avg_tokens", 0),
+        "avg_duration": summary.get("avg_duration", 0),
+        "pass_rate": summary.get("pass_rate", 0),
+        "pareto_rank": summary.get("pareto_rank"),
+        "is_pareto_optimal": summary.get("is_pareto_optimal", False),
+    }

src/flow/experiments/optimizer.py CHANGED Viewed

@@ -3,7 +3,7 @@
 """Optimizer service for finding best agent configurations.
 Runs experiments in parallel, evaluates with LLM-as-Judge,
-ranks via Pareto analysis, and exports reusable configs.
 """
 from __future__ import annotations
@@ -15,31 +15,32 @@ import os
 from collections.abc import Callable
 from dataclasses import asdict, dataclass, field
 from datetime import datetime
-from itertools import product
 from pathlib import Path
 from typing import Any
 from openai import AsyncAzureOpenAI
 from .ablation import (
-    AblationConfig,
     compute_pareto_frontier,
-    create_harness_from_config,
 )
-from .config_export import export_optimization_configs
 from .evaluators import LLMEvaluator
 from .metrics import TraceMetrics, extract_metrics
 from .runner import FlowExperimentRunner, setup_tracing
-from .types import EvalCriterion, RunResult, Task
 logger = logging.getLogger(__name__)
 @dataclass
 class TaskResult:
-    """Result for a single config-task pair."""
-    config_name: str
     task_name: str
     run_result: RunResult
     metrics: TraceMetrics
@@ -49,12 +50,12 @@ class TaskResult:
 @dataclass
-class ConfigSummary:
-    """Aggregated summary for a configuration across all tasks."""
     name: str
-    config: AblationConfig
-    task_results: list[TaskResult] = field(default_factory=list)
     # Aggregated metrics
     avg_score: float = 0.0
@@ -72,7 +73,9 @@ class ConfigSummary:
         """Convert to dictionary for serialization."""
         return {
             "name": self.name,
-            "config": asdict(self.config),
             "avg_score": self.avg_score,
             "avg_tokens": self.avg_tokens,
             "avg_duration": self.avg_duration,
@@ -90,21 +93,21 @@ class OptimizationResult:
     timestamp: str
     output_dir: Path
-    summaries: list[ConfigSummary]
     pareto_frontier: list[str]
-    exported_configs: dict[str, Path]
     # Rankings
-    rank_by_score: list[str] = field(default_factory=list)
-    rank_by_tokens: list[str] = field(default_factory=list)
-    rank_by_efficiency: list[str] = field(default_factory=list)
     # Stats
     total_experiments: int = 0
     total_duration_seconds: float = 0.0
-    def get_best_config(self, criterion: str = "score") -> ConfigSummary | None:
-        """Get the best config by a criterion."""
         if criterion == "score":
             names = self.rank_by_score
         elif criterion == "tokens":
@@ -126,17 +129,18 @@ class OptimizationResult:
 class FlowOptimizer:
     """Optimizer for finding best agent configurations.
-    Runs experiments in parallel, evaluates results, performs
-    Pareto analysis, and exports winning configs.
     Example:
         optimizer = FlowOptimizer(parallel=4)
-        configs = [
-            AblationConfig(name="baseline", enable_message_compaction=False),
-            AblationConfig(name="compaction", enable_message_compaction=True),
-        ]
-        tasks = [Task(name="test", prompt="Create hello world")]
-        result = await optimizer.optimize(configs, tasks)
         print(f"Best: {result.rank_by_score[0]}")
     """
@@ -146,69 +150,55 @@ class FlowOptimizer:
         use_llm_evaluator: bool = True,
         output_dir: Path | None = None,
     ) -> None:
-        """Initialize the optimizer.
-        Args:
-            parallel: Max concurrent experiments
-            use_llm_evaluator: Whether to use LLM for evaluation
-            output_dir: Base directory for results
-        """
         self.parallel = parallel
         self.use_llm_evaluator = use_llm_evaluator
         self.output_dir = output_dir or Path.home() / ".flow" / "optimizations"
     async def optimize(
         self,
-        configs: list[AblationConfig],
         tasks: list[Task],
         progress_callback: Callable[[int, int, str, str], None] | None = None,
     ) -> OptimizationResult:
-        """Run optimization across all configs and tasks.
         Args:
-            configs: Configurations to test
-            tasks: Tasks to run each config on
-            progress_callback: Optional callback(completed, total, config, task)
         Returns:
-            OptimizationResult with rankings and exported configs
         """
         start_time = datetime.now()
         timestamp = start_time.strftime("%Y%m%d_%H%M%S")
         run_dir = self.output_dir / timestamp
         run_dir.mkdir(parents=True, exist_ok=True)
-        # Setup
         setup_tracing("flow-optimizer")
-        self._save_config(configs, tasks, run_dir)
         print("=" * 70)
         print(" FLOW OPTIMIZER")
         print("=" * 70)
-        print(f" Configs: {len(configs)}")
-        print(f" Tasks:   {len(tasks)}")
-        print(f" Total:   {len(configs) * len(tasks)} experiments")
-        print(f" Parallel: {self.parallel}")
-        print(f" Output:  {run_dir}")
         print("=" * 70)
-        # Create LLM evaluator if needed
         evaluator = None
         if self.use_llm_evaluator:
             evaluator = self._create_evaluator()
-        # Run all experiments in parallel
         task_results = await self._run_parallel(
-            configs, tasks, run_dir, evaluator, progress_callback
         )
-        # Aggregate by config
-        summaries = self._aggregate_results(task_results, configs)
-        # Pareto analysis
         pareto_names = self._compute_pareto(summaries)
-        # Compute rankings
         rank_by_score = sorted(summaries, key=lambda s: s.avg_score, reverse=True)
         rank_by_tokens = sorted(summaries, key=lambda s: s.avg_tokens)
         rank_by_efficiency = sorted(
@@ -217,9 +207,8 @@ class FlowOptimizer:
             reverse=True,
         )
-        # Export configs
         summary_dicts = [s.to_dict() for s in summaries]
-        exported = export_optimization_configs(
             summary_dicts, pareto_names, run_dir, timestamp
         )
@@ -230,7 +219,7 @@ class FlowOptimizer:
             output_dir=run_dir,
             summaries=summaries,
             pareto_frontier=pareto_names,
-            exported_configs=exported,
             rank_by_score=[s.name for s in rank_by_score],
             rank_by_tokens=[s.name for s in rank_by_tokens],
             rank_by_efficiency=[s.name for s in rank_by_efficiency],
@@ -238,56 +227,49 @@ class FlowOptimizer:
             total_duration_seconds=(end_time - start_time).total_seconds(),
         )
-        # Save results
         self._save_results(result, run_dir)
-        # Print summary
         self._print_summary(result)
         return result
     async def _run_parallel(
         self,
-        configs: list[AblationConfig],
         tasks: list[Task],
         run_dir: Path,
         evaluator: LLMEvaluator | None,
         progress_callback: Callable[[int, int, str, str], None] | None,
     ) -> list[TaskResult]:
-        """Run all config-task pairs in parallel with semaphore control."""
         semaphore = asyncio.Semaphore(self.parallel)
-        total = len(configs) * len(tasks)
         completed = 0
         lock = asyncio.Lock()
-        async def run_one(config: AblationConfig, task: Task) -> TaskResult:
             nonlocal completed
             async with semaphore:
-                workspace = run_dir / "workspaces" / config.name / task.name
                 workspace.mkdir(parents=True, exist_ok=True)
-                result = await self._run_single(config, task, workspace, evaluator)
                 async with lock:
                     completed += 1
                     status = "✓" if result.eval_passed else "✗"
                     print(
-                        f"  [{completed}/{total}] {config.name}/{task.name}: "
                         f"{status} score={result.eval_score:.2f} "
                         f"tokens={result.metrics.total_tokens:,}"
                     )
                     if progress_callback:
-                        progress_callback(completed, total, config.name, task.name)
                 return result
-        # Create all tasks
-        coroutines = [run_one(config, task) for config in configs for task in tasks]
-        # Run with gather
         gather_results = await asyncio.gather(*coroutines, return_exceptions=True)
-        # Filter out exceptions
         valid_results: list[TaskResult] = []
         for r in gather_results:
             if isinstance(r, BaseException):
@@ -299,33 +281,31 @@ class FlowOptimizer:
     async def _run_single(
         self,
-        config: AblationConfig,
         task: Task,
         workspace: Path,
         evaluator: LLMEvaluator | None,
     ) -> TaskResult:
-        """Run a single config-task experiment."""
-        harness = create_harness_from_config(config, workspace)
         try:
             runner = FlowExperimentRunner(keep_workspace=True)
             run_result = await runner.run(harness, task, workspace=workspace)
             metrics = extract_metrics(run_result.trace)
-            # Evaluate
             if evaluator:
                 eval_result = await evaluator.evaluate(run_result)
                 eval_score = eval_result.score
                 eval_passed = eval_result.passed
                 eval_reasoning = eval_result.reasoning
             else:
-                # Simple heuristic: passed if no error
                 eval_score = 1.0 if run_result.success else 0.0
                 eval_passed = run_result.success
                 eval_reasoning = "Success" if run_result.success else run_result.error or "Failed"
             return TaskResult(
-                config_name=config.name,
                 task_name=task.name,
                 run_result=run_result,
                 metrics=metrics,
@@ -339,25 +319,25 @@ class FlowOptimizer:
     def _aggregate_results(
         self,
         task_results: list[TaskResult],
-        configs: list[AblationConfig],
-    ) -> list[ConfigSummary]:
-        """Aggregate task results into config summaries."""
-        config_map = {c.name: c for c in configs}
-        results_by_config: dict[str, list[TaskResult]] = {c.name: [] for c in configs}
         for result in task_results:
-            if result.config_name in results_by_config:
-                results_by_config[result.config_name].append(result)
         summaries = []
-        for name, results in results_by_config.items():
             if not results:
                 continue
-            config = config_map[name]
-            summary = ConfigSummary(
                 name=name,
-                config=config,
                 task_results=results,
                 avg_score=sum(r.eval_score for r in results) / len(results),
                 avg_tokens=sum(r.metrics.total_tokens for r in results) / len(results),
@@ -370,19 +350,17 @@ class FlowOptimizer:
         return summaries
-    def _compute_pareto(self, summaries: list[ConfigSummary]) -> list[str]:
         """Compute Pareto frontier (maximize score, minimize tokens)."""
-        # Use shared utility
         pareto_names = compute_pareto_frontier(summaries)
-        # Mark summaries with Pareto status
         for summary in summaries:
             if summary.name in pareto_names:
                 summary.is_pareto_optimal = True
                 summary.pareto_rank = 0
             else:
                 summary.is_pareto_optimal = False
-                summary.pareto_rank = 1  # Simplified: all non-Pareto get rank 1
         return pareto_names
@@ -410,7 +388,7 @@ class FlowOptimizer:
     def _save_config(
         self,
-        configs: list[AblationConfig],
         tasks: list[Task],
         run_dir: Path,
     ) -> None:
@@ -418,7 +396,7 @@ class FlowOptimizer:
         with open(run_dir / "optimization_config.json", "w") as f:
             json.dump(
                 {
-                    "configs": [asdict(c) for c in configs],
                     "tasks": [{"name": t.name, "prompt": t.prompt} for t in tasks],
                     "parallel": self.parallel,
                     "use_llm_evaluator": self.use_llm_evaluator,
@@ -437,7 +415,7 @@ class FlowOptimizer:
             "rank_by_score": result.rank_by_score,
             "rank_by_tokens": result.rank_by_tokens,
             "rank_by_efficiency": result.rank_by_efficiency,
-            "exported_configs": {k: str(v) for k, v in result.exported_configs.items()},
             "summaries": [s.to_dict() for s in result.summaries],
         }
@@ -450,8 +428,7 @@ class FlowOptimizer:
         print(" OPTIMIZATION RESULTS")
         print("=" * 70)
-        # Rankings table
-        print(f"\n{'Config':<30} | {'Score':>8} | {'Tokens':>10} | {'Pareto':>8}")
         print("-" * 65)
         for summary in sorted(result.summaries, key=lambda s: s.avg_score, reverse=True):
@@ -465,62 +442,19 @@ class FlowOptimizer:
         print(f"Pareto frontier: {result.pareto_frontier}")
         print(f"Best by score: {result.rank_by_score[0] if result.rank_by_score else 'N/A'}")
         print(f"Best by efficiency: {result.rank_by_efficiency[0] if result.rank_by_efficiency else 'N/A'}")
-        print("\nExported configs:")
-        for name, path in result.exported_configs.items():
             print(f"  {name}: {path}")
         print(f"\nResults saved to: {result.output_dir}")
-def generate_grid_configs(
-    base_name: str,
-    variations: dict[str, list[Any]],
-) -> list[AblationConfig]:
-    """Generate configs from a variation grid.
-    Args:
-        base_name: Base name for generated configs
-        variations: Dict of param_name -> list of values
-    Returns:
-        List of AblationConfig for each combination
-    Example:
-        configs = generate_grid_configs("grid", {
-            "enable_message_compaction": [True, False],
-            "compaction_head_size": [5, 10, 20],
-        })
-    """
-    if not variations:
-        return [AblationConfig(name=base_name)]
-    param_names = list(variations.keys())
-    param_values = list(variations.values())
-    configs = []
-    for values in product(*param_values):
-        kwargs = dict(zip(param_names, values, strict=True))
-        name = f"{base_name}_" + "_".join(f"{k}={v}" for k, v in kwargs.items())
-        configs.append(AblationConfig(name=name, **kwargs))
-    return configs
 def load_tasks_from_jsonl(path: Path) -> list[Task]:
     """Load tasks from a JSONL file.
-    Each line should be a JSON object with:
-    - name: Task name
-    - prompt: Task prompt
-    - criteria: Optional list of evaluation criteria
-    - category: Optional category string
-    - metadata: Optional additional metadata dict
     Args:
         path: Path to JSONL file
     Returns:
         List of Task objects
     """
-    from flow.experiments.types import _load_tasks_from_jsonl
-    return _load_tasks_from_jsonl(path)

 """Optimizer service for finding best agent configurations.
 Runs experiments in parallel, evaluates with LLM-as-Judge,
+ranks via Pareto analysis, and exports reusable agent configs.
 """
 from __future__ import annotations
 from collections.abc import Callable
 from dataclasses import asdict, dataclass, field
 from datetime import datetime
 from pathlib import Path
 from typing import Any
 from openai import AsyncAzureOpenAI
 from .ablation import (
     compute_pareto_frontier,
+    create_harness_from_agent,
 )
 from .evaluators import LLMEvaluator
 from .metrics import TraceMetrics, extract_metrics
+from .models import (
+    Candidate,
+    export_optimization_results,
+)
 from .runner import FlowExperimentRunner, setup_tracing
+from .types import RunResult, Task, load_tasks_from_jsonl as _load_tasks_impl
 logger = logging.getLogger(__name__)
 @dataclass
 class TaskResult:
+    """Result for a single candidate-task pair."""
+    candidate_name: str
     task_name: str
     run_result: RunResult
     metrics: TraceMetrics
 @dataclass
+class CandidateSummary:
+    """Aggregated summary for a candidate across all tasks."""
     name: str
+    candidate: Candidate
+    task_results: list[TaskResult] = field(default_factory=lambda: [])
     # Aggregated metrics
     avg_score: float = 0.0
         """Convert to dictionary for serialization."""
         return {
             "name": self.name,
+            "agent": asdict(self.candidate.agent),
+            "mutations": self.candidate.mutations,
+            "rationale": self.candidate.rationale,
             "avg_score": self.avg_score,
             "avg_tokens": self.avg_tokens,
             "avg_duration": self.avg_duration,
     timestamp: str
     output_dir: Path
+    summaries: list[CandidateSummary]
     pareto_frontier: list[str]
+    exported_agents: dict[str, Path]
     # Rankings
+    rank_by_score: list[str] = field(default_factory=lambda: [])
+    rank_by_tokens: list[str] = field(default_factory=lambda: [])
+    rank_by_efficiency: list[str] = field(default_factory=lambda: [])
     # Stats
     total_experiments: int = 0
     total_duration_seconds: float = 0.0
+    def get_best_candidate(self, criterion: str = "score") -> CandidateSummary | None:
+        """Get the best candidate by a criterion."""
         if criterion == "score":
             names = self.rank_by_score
         elif criterion == "tokens":
 class FlowOptimizer:
     """Optimizer for finding best agent configurations.
+    Takes a base Agent and a CandidateStrategy, generates candidates,
+    runs experiments in parallel, evaluates results, performs Pareto
+    analysis, and exports winning agent configs.
     Example:
+        strategy = GridSearchStrategy(variations={
+            "enable_memory": [True, False],
+        })
         optimizer = FlowOptimizer(parallel=4)
+        base = Agent(name="my_agent")
+        candidates = strategy.generate(base, budget=10)
+        result = await optimizer.optimize(candidates, tasks)
         print(f"Best: {result.rank_by_score[0]}")
     """
         use_llm_evaluator: bool = True,
         output_dir: Path | None = None,
     ) -> None:
         self.parallel = parallel
         self.use_llm_evaluator = use_llm_evaluator
         self.output_dir = output_dir or Path.home() / ".flow" / "optimizations"
     async def optimize(
         self,
+        candidates: list[Candidate],
         tasks: list[Task],
         progress_callback: Callable[[int, int, str, str], None] | None = None,
     ) -> OptimizationResult:
+        """Run optimization across all candidates and tasks.
         Args:
+            candidates: Candidates to test
+            tasks: Tasks to run each candidate on
+            progress_callback: Optional callback(completed, total, candidate_name, task_name)
         Returns:
+            OptimizationResult with rankings and exported agents
         """
         start_time = datetime.now()
         timestamp = start_time.strftime("%Y%m%d_%H%M%S")
         run_dir = self.output_dir / timestamp
         run_dir.mkdir(parents=True, exist_ok=True)
         setup_tracing("flow-optimizer")
+        self._save_config(candidates, tasks, run_dir)
         print("=" * 70)
         print(" FLOW OPTIMIZER")
         print("=" * 70)
+        print(f" Candidates: {len(candidates)}")
+        print(f" Tasks:      {len(tasks)}")
+        print(f" Total:      {len(candidates) * len(tasks)} experiments")
+        print(f" Parallel:   {self.parallel}")
+        print(f" Output:     {run_dir}")
         print("=" * 70)
         evaluator = None
         if self.use_llm_evaluator:
             evaluator = self._create_evaluator()
         task_results = await self._run_parallel(
+            candidates, tasks, run_dir, evaluator, progress_callback
         )
+        summaries = self._aggregate_results(task_results, candidates)
         pareto_names = self._compute_pareto(summaries)
         rank_by_score = sorted(summaries, key=lambda s: s.avg_score, reverse=True)
         rank_by_tokens = sorted(summaries, key=lambda s: s.avg_tokens)
         rank_by_efficiency = sorted(
             reverse=True,
         )
         summary_dicts = [s.to_dict() for s in summaries]
+        exported = export_optimization_results(
             summary_dicts, pareto_names, run_dir, timestamp
         )
             output_dir=run_dir,
             summaries=summaries,
             pareto_frontier=pareto_names,
+            exported_agents=exported,
             rank_by_score=[s.name for s in rank_by_score],
             rank_by_tokens=[s.name for s in rank_by_tokens],
             rank_by_efficiency=[s.name for s in rank_by_efficiency],
             total_duration_seconds=(end_time - start_time).total_seconds(),
         )
         self._save_results(result, run_dir)
         self._print_summary(result)
         return result
     async def _run_parallel(
         self,
+        candidates: list[Candidate],
         tasks: list[Task],
         run_dir: Path,
         evaluator: LLMEvaluator | None,
         progress_callback: Callable[[int, int, str, str], None] | None,
     ) -> list[TaskResult]:
+        """Run all candidate-task pairs in parallel with semaphore control."""
         semaphore = asyncio.Semaphore(self.parallel)
+        total = len(candidates) * len(tasks)
         completed = 0
         lock = asyncio.Lock()
+        async def run_one(candidate: Candidate, task: Task) -> TaskResult:
             nonlocal completed
             async with semaphore:
+                workspace = run_dir / "workspaces" / candidate.agent.name / task.name
                 workspace.mkdir(parents=True, exist_ok=True)
+                result = await self._run_single(candidate, task, workspace, evaluator)
                 async with lock:
                     completed += 1
                     status = "✓" if result.eval_passed else "✗"
                     print(
+                        f"  [{completed}/{total}] {candidate.agent.name}/{task.name}: "
                         f"{status} score={result.eval_score:.2f} "
                         f"tokens={result.metrics.total_tokens:,}"
                     )
                     if progress_callback:
+                        progress_callback(completed, total, candidate.agent.name, task.name)
                 return result
+        coroutines = [run_one(c, t) for c in candidates for t in tasks]
         gather_results = await asyncio.gather(*coroutines, return_exceptions=True)
         valid_results: list[TaskResult] = []
         for r in gather_results:
             if isinstance(r, BaseException):
     async def _run_single(
         self,
+        candidate: Candidate,
         task: Task,
         workspace: Path,
         evaluator: LLMEvaluator | None,
     ) -> TaskResult:
+        """Run a single candidate-task experiment."""
+        harness = create_harness_from_agent(candidate.agent, workspace)
         try:
             runner = FlowExperimentRunner(keep_workspace=True)
             run_result = await runner.run(harness, task, workspace=workspace)
             metrics = extract_metrics(run_result.trace)
             if evaluator:
                 eval_result = await evaluator.evaluate(run_result)
                 eval_score = eval_result.score
                 eval_passed = eval_result.passed
                 eval_reasoning = eval_result.reasoning
             else:
                 eval_score = 1.0 if run_result.success else 0.0
                 eval_passed = run_result.success
                 eval_reasoning = "Success" if run_result.success else run_result.error or "Failed"
             return TaskResult(
+                candidate_name=candidate.agent.name,
                 task_name=task.name,
                 run_result=run_result,
                 metrics=metrics,
     def _aggregate_results(
         self,
         task_results: list[TaskResult],
+        candidates: list[Candidate],
+    ) -> list[CandidateSummary]:
+        """Aggregate task results into candidate summaries."""
+        candidate_map = {c.agent.name: c for c in candidates}
+        results_by_name: dict[str, list[TaskResult]] = {c.agent.name: [] for c in candidates}
         for result in task_results:
+            if result.candidate_name in results_by_name:
+                results_by_name[result.candidate_name].append(result)
         summaries = []
+        for name, results in results_by_name.items():
             if not results:
                 continue
+            candidate = candidate_map[name]
+            summary = CandidateSummary(
                 name=name,
+                candidate=candidate,
                 task_results=results,
                 avg_score=sum(r.eval_score for r in results) / len(results),
                 avg_tokens=sum(r.metrics.total_tokens for r in results) / len(results),
         return summaries
+    def _compute_pareto(self, summaries: list[CandidateSummary]) -> list[str]:
         """Compute Pareto frontier (maximize score, minimize tokens)."""
         pareto_names = compute_pareto_frontier(summaries)
         for summary in summaries:
             if summary.name in pareto_names:
                 summary.is_pareto_optimal = True
                 summary.pareto_rank = 0
             else:
                 summary.is_pareto_optimal = False
+                summary.pareto_rank = 1
         return pareto_names
     def _save_config(
         self,
+        candidates: list[Candidate],
         tasks: list[Task],
         run_dir: Path,
     ) -> None:
         with open(run_dir / "optimization_config.json", "w") as f:
             json.dump(
                 {
+                    "candidates": [asdict(c) for c in candidates],
                     "tasks": [{"name": t.name, "prompt": t.prompt} for t in tasks],
                     "parallel": self.parallel,
                     "use_llm_evaluator": self.use_llm_evaluator,
             "rank_by_score": result.rank_by_score,
             "rank_by_tokens": result.rank_by_tokens,
             "rank_by_efficiency": result.rank_by_efficiency,
+            "exported_agents": {k: str(v) for k, v in result.exported_agents.items()},
             "summaries": [s.to_dict() for s in result.summaries],
         }
         print(" OPTIMIZATION RESULTS")
         print("=" * 70)
+        print(f"\n{'Candidate':<30} | {'Score':>8} | {'Tokens':>10} | {'Pareto':>8}")
         print("-" * 65)
         for summary in sorted(result.summaries, key=lambda s: s.avg_score, reverse=True):
         print(f"Pareto frontier: {result.pareto_frontier}")
         print(f"Best by score: {result.rank_by_score[0] if result.rank_by_score else 'N/A'}")
         print(f"Best by efficiency: {result.rank_by_efficiency[0] if result.rank_by_efficiency else 'N/A'}")
+        print("\nExported agents:")
+        for name, path in result.exported_agents.items():
             print(f"  {name}: {path}")
         print(f"\nResults saved to: {result.output_dir}")
 def load_tasks_from_jsonl(path: Path) -> list[Task]:
     """Load tasks from a JSONL file.
     Args:
         path: Path to JSONL file
     Returns:
         List of Task objects
     """
+    return _load_tasks_impl(path)

src/flow/experiments/types.py CHANGED Viewed

@@ -109,7 +109,7 @@ class EvalResult:
 _DATA_DIR = Path(__file__).parent / "data" / "tasks"
-def _load_tasks_from_jsonl(path: Path) -> list[Task]:
     """Load tasks from a JSONL file.
     Each line should be a JSON object with:
@@ -186,4 +186,4 @@ def get_task_suite(suite_name: str) -> list[Task]:
     if not path.exists():
         available = ", ".join(get_available_suites())
         raise ValueError(f"Unknown suite '{suite_name}'. Available: {available}")
-    return _load_tasks_from_jsonl(path)

 _DATA_DIR = Path(__file__).parent / "data" / "tasks"
+def load_tasks_from_jsonl(path: Path) -> list[Task]:
     """Load tasks from a JSONL file.
     Each line should be a JSON object with:
     if not path.exists():
         available = ", ".join(get_available_suites())
         raise ValueError(f"Unknown suite '{suite_name}'. Available: {available}")
+    return load_tasks_from_jsonl(path)

src/flow/harness/maf/agent.py CHANGED Viewed

@@ -9,9 +9,10 @@ from collections.abc import Callable, Coroutine, Sequence
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
 from flow.harness.maf.message_store import HeadTailCompactingChatMessageStore
-from flow.prompts import FLOW_AGENT_INSTRUCTIONS
-from flow.tools import create_all_tools
 if TYPE_CHECKING:
     from agent_framework import ChatAgent
@@ -37,10 +38,7 @@ def create_agent(
     workspace: Path | None = None,
     memory_path: Path | None = None,
     # Tool configuration
-    tools: Sequence[Callable[..., Coroutine[Any, Any, str]]] | None = None,
-    enable_memory_tool: bool = True,
-    enable_sub_agent: bool = False,
-    bash_timeout: int = 120,
     # Context engineering
     enable_compaction: bool = True,
     compaction_head_size: int = 10,
@@ -52,8 +50,6 @@ def create_agent(
     - Azure OpenAI as the backend
     - Flow's standard tools (coding, execution, memory)
     - Optional message compaction for long conversations
-    - Optional agent-managed memory tool
-    - Optional sub-agent for isolated research
     Args:
         endpoint: Azure OpenAI endpoint URL. Defaults to AZURE_OPENAI_ENDPOINT env var.
@@ -64,10 +60,11 @@ def create_agent(
         instructions: Agent instructions. Defaults to FLOW_AGENT_INSTRUCTIONS.
         workspace: Directory for file operations. Defaults to ~/.flow/workspace.
         memory_path: Directory for persistent memory. Defaults to ~/.flow/memory.
-        tools: Custom tools to use. If None, creates standard Flow tools.
-        enable_memory_tool: Whether to include the memory tool (default: True).
-        enable_sub_agent: Whether to include the sub-agent tool (default: False).
-        bash_timeout: Timeout for bash commands in seconds.
         enable_compaction: Whether to enable head+tail message compaction.
         compaction_head_size: Number of initial messages to keep.
         compaction_tail_size: Number of recent messages to keep.
@@ -81,9 +78,12 @@ def create_agent(
     Example:
         >>> from flow.harness.maf import create_agent
-        >>> agent = create_agent()
-        >>> thread = agent.get_new_thread()
-        >>> response = await agent.run("Create a hello world script", thread=thread)
     """
     try:
         from agent_framework import ChatAgent, ai_function
@@ -123,19 +123,18 @@ def create_agent(
     workspace.mkdir(parents=True, exist_ok=True)
     memory_path.mkdir(parents=True, exist_ok=True)
-    # Create or use provided tools
-    if tools is None:
-        tools = create_all_tools(
-            workspace=workspace,
-            memory_path=memory_path,
-            bash_timeout=bash_timeout,
-            enable_memory_tool=enable_memory_tool,
-            enable_sub_agent=enable_sub_agent,
-        )
     # Wrap tools with ai_function decorator for Agent Framework
     converted_tools = []
-    for tool_func in tools:
         tool_name = getattr(tool_func, "_tool_name", tool_func.__name__)
         tool_description = getattr(tool_func, "_tool_description", tool_func.__doc__ or "")
         wrapped = ai_function(name=tool_name, description=tool_description)(tool_func)
@@ -163,11 +162,22 @@ def create_agent(
             f"Message compaction enabled: head={compaction_head_size}, tail={compaction_tail_size}"
         )
     # Create the agent
     agent = ChatAgent(
         name=name,
         description="Autonomous coding agent",
-        instructions=instructions or FLOW_AGENT_INSTRUCTIONS,
         chat_client=client,
         tools=converted_tools,
         chat_message_store_factory=message_store_factory,

 from pathlib import Path
 from typing import TYPE_CHECKING, Any
+from flow.experiments.models import TOOL_PRESETS, resolve_tools
 from flow.harness.maf.message_store import HeadTailCompactingChatMessageStore
+from flow.harness.maf.tools import build_tools
+from flow.prompts import build_instructions
 if TYPE_CHECKING:
     from agent_framework import ChatAgent
     workspace: Path | None = None,
     memory_path: Path | None = None,
     # Tool configuration
+    tools: str | list[str] | dict[str, dict[str, Any]] | Sequence[Callable[..., Coroutine[Any, Any, str]]] = "standard",
     # Context engineering
     enable_compaction: bool = True,
     compaction_head_size: int = 10,
     - Azure OpenAI as the backend
     - Flow's standard tools (coding, execution, memory)
     - Optional message compaction for long conversations
     Args:
         endpoint: Azure OpenAI endpoint URL. Defaults to AZURE_OPENAI_ENDPOINT env var.
         instructions: Agent instructions. Defaults to FLOW_AGENT_INSTRUCTIONS.
         workspace: Directory for file operations. Defaults to ~/.flow/workspace.
         memory_path: Directory for persistent memory. Defaults to ~/.flow/memory.
+        tools: Tool configuration - can be:
+            - str: Preset name ("standard", "minimal", "full", "readonly")
+            - list[str]: List of tool names
+            - dict[str, dict]: Full specification with per-tool configs
+            - Sequence[Callable]: Pre-built tool functions (advanced)
         enable_compaction: Whether to enable head+tail message compaction.
         compaction_head_size: Number of initial messages to keep.
         compaction_tail_size: Number of recent messages to keep.
     Example:
         >>> from flow.harness.maf import create_agent
+        >>> # Using preset
+        >>> agent = create_agent(tools="standard")
+        >>> # Using explicit list
+        >>> agent = create_agent(tools=["read_file", "write_file", "bash_execute"])
+        >>> # Using full config
+        >>> agent = create_agent(tools={"bash_execute": {"timeout": 60}, "memory": {}})
     """
     try:
         from agent_framework import ChatAgent, ai_function
     workspace.mkdir(parents=True, exist_ok=True)
     memory_path.mkdir(parents=True, exist_ok=True)
+    # Create tools from specification or use provided functions
+    if isinstance(tools, (str, list, dict)):
+        # Resolve to dict form and build tools
+        tools_spec = resolve_tools(tools)
+        tool_functions = build_tools(tools_spec, workspace, memory_path)
+    else:
+        # Already a sequence of callable tools
+        tool_functions = tools
     # Wrap tools with ai_function decorator for Agent Framework
     converted_tools = []
+    for tool_func in tool_functions:
         tool_name = getattr(tool_func, "_tool_name", tool_func.__name__)
         tool_description = getattr(tool_func, "_tool_description", tool_func.__doc__ or "")
         wrapped = ai_function(name=tool_name, description=tool_description)(tool_func)
             f"Message compaction enabled: head={compaction_head_size}, tail={compaction_tail_size}"
         )
+    # Determine if memory is enabled for instructions
+    enable_memory = False
+    if isinstance(tools, str):
+        enable_memory = "memory" in TOOL_PRESETS.get(tools, {})
+    elif isinstance(tools, list):
+        enable_memory = "memory" in tools
+    elif isinstance(tools, dict):
+        enable_memory = "memory" in tools
     # Create the agent
     agent = ChatAgent(
         name=name,
         description="Autonomous coding agent",
+        instructions=instructions or build_instructions(
+            enable_memory=enable_memory,
+        ),
         chat_client=client,
         tools=converted_tools,
         chat_message_store_factory=message_store_factory,

src/flow/harness/maf/tools/__init__.py ADDED Viewed

	@@ -0,0 +1,157 @@

+"""MAF-specific tools for the Flow agent.
+This module provides tools that work with the Microsoft Agent Framework harness.
+Tools are created based on a specification dict that maps tool names to their configs.
+Available tools:
+- read_file: Read file contents
+- write_file: Write/edit file content
+- list_directory: List directory contents
+- grep_search: Search for text patterns
+- bash_execute: Execute bash commands (config: timeout)
+- check_processes: Manage background processes
+- python_repl: Execute Python code
+- think: Explicit reasoning tool
+- task_done: Task completion marker
+- memory: Persistent memory storage
+- sub_agent: Isolated research sub-agent (config: model)
+"""
+from collections.abc import Callable, Coroutine, Sequence
+from pathlib import Path
+from typing import Any
+from flow.harness.maf.tools.coding import (
+    create_grep_search_tool,
+    create_list_directory_tool,
+    create_read_file_tool,
+    create_write_file_tool,
+)
+from flow.harness.maf.tools.core import task_done, think
+from flow.harness.maf.tools.execution import (
+    create_bash_execute_tool,
+    create_check_processes_tool,
+    create_python_repl_tool,
+)
+from flow.harness.maf.tools.memory import create_memory_tool
+from flow.harness.maf.tools.sub_agent import create_sub_agent_tool
+__all__ = [
+    "build_tools",
+    "create_bash_execute_tool",
+    "create_check_processes_tool",
+    "create_grep_search_tool",
+    "create_list_directory_tool",
+    "create_memory_tool",
+    "create_python_repl_tool",
+    "create_read_file_tool",
+    "create_sub_agent_tool",
+    "create_write_file_tool",
+    "task_done",
+    "think",
+]
+# Registry of tool factories that don't require config
+# Maps tool name -> factory function(workspace, memory_path) -> tool
+_SIMPLE_TOOL_FACTORIES: dict[str, Callable[..., Any]] = {}
+# Registry of tools that are standalone (no factory needed)
+_STANDALONE_TOOLS: dict[str, Callable[..., Coroutine[Any, Any, str]]] = {
+    "think": think,
+    "task_done": task_done,
+}
+def build_tools(
+    tools_spec: dict[str, dict[str, Any]],
+    workspace: Path,
+    memory_path: Path,
+) -> Sequence[Callable[..., Coroutine[Any, Any, str]]]:
+    """Build tool functions from a specification dict.
+    This is the main entry point for creating tools based on a resolved
+    tool specification (from resolve_tools()).
+    Args:
+        tools_spec: Dict mapping tool names to their config dicts.
+                   e.g., {"bash_execute": {"timeout": 60}, "read_file": {}}
+        workspace: Root directory for file operations
+        memory_path: Directory for persistent memory
+    Returns:
+        List of tool functions ready to use with MAF
+    Example:
+        >>> from flow.experiments.models import resolve_tools
+        >>> tools_spec = resolve_tools("standard")
+        >>> tools = build_tools(tools_spec, workspace, memory_path)
+    """
+    workspace = Path(workspace).resolve()
+    memory_path = Path(memory_path).resolve()
+    tools: list[Callable[..., Coroutine[Any, Any, str]]] = []
+    for tool_name, config in tools_spec.items():
+        tool = _create_tool(tool_name, config, workspace, memory_path)
+        if tool is not None:
+            tools.append(tool)
+    return tools
+def _create_tool(
+    name: str,
+    config: dict[str, Any],
+    workspace: Path,
+    memory_path: Path,
+) -> Callable[..., Coroutine[Any, Any, str]] | None:
+    """Create a single tool by name with the given config.
+    Args:
+        name: Tool name (e.g., "read_file", "bash_execute")
+        config: Tool-specific configuration dict
+        workspace: Root directory for file operations
+        memory_path: Directory for persistent memory
+    Returns:
+        Tool function or None if unknown tool name
+    """
+    # Standalone tools (no config needed)
+    if name in _STANDALONE_TOOLS:
+        return _STANDALONE_TOOLS[name]
+    # Coding tools
+    if name == "read_file":
+        return create_read_file_tool(workspace)
+    if name == "write_file":
+        return create_write_file_tool(workspace)
+    if name == "list_directory":
+        return create_list_directory_tool(workspace)
+    if name == "grep_search":
+        return create_grep_search_tool(workspace)
+    # Execution tools
+    if name == "bash_execute":
+        timeout = config.get("timeout", 120)
+        return create_bash_execute_tool(workspace, memory_path, timeout)
+    if name == "check_processes":
+        return create_check_processes_tool(workspace, memory_path)
+    if name == "python_repl":
+        return create_python_repl_tool(workspace)
+    # Memory tool
+    if name == "memory":
+        return create_memory_tool(memory_path)
+    # Sub-agent tool
+    if name == "sub_agent":
+        model = config.get("model", "gpt-4o-mini")
+        return create_sub_agent_tool(workspace, model=model)
+    # Unknown tool - log warning and skip
+    import logging
+    logger = logging.getLogger(__name__)
+    logger.warning(f"Unknown tool name: {name}. Skipping.")
+    return None

src/flow/{tools → harness/maf/tools}/coding.py RENAMED Viewed

File without changes

src/flow/{tools → harness/maf/tools}/core.py RENAMED Viewed

File without changes

src/flow/{tools → harness/maf/tools}/execution.py RENAMED Viewed

File without changes

src/flow/{tools → harness/maf/tools}/memory.py RENAMED Viewed

File without changes

src/flow/{tools → harness/maf/tools}/sub_agent.py RENAMED Viewed

@@ -100,12 +100,20 @@ def create_sub_agent_tool(
         # Create basic tools for the sub-agent
         # Keep it minimal - just what's needed for research
-        from flow.tools.coding import create_coding_tools
-        from flow.tools.core import create_core_tools
-        sub_tools: list[Callable[..., Any]] = []
-        sub_tools.extend(create_coding_tools(workspace))
-        sub_tools.extend(create_core_tools())
         # Convert tools to agent_framework format
         from agent_framework import ai_function

         # Create basic tools for the sub-agent
         # Keep it minimal - just what's needed for research
+        from flow.harness.maf.tools.coding import (
+            create_grep_search_tool,
+            create_list_directory_tool,
+            create_read_file_tool,
+        )
+        from flow.harness.maf.tools.core import task_done, think
+        sub_tools: list[Callable[..., Any]] = [
+            create_read_file_tool(workspace),
+            create_list_directory_tool(workspace),
+            create_grep_search_tool(workspace),
+            think,
+            task_done,
+        ]
         # Convert tools to agent_framework format
         from agent_framework import ai_function

src/flow/prompts.py CHANGED Viewed

@@ -1,9 +1,14 @@
 """System prompts for the Flow agent.
 Defines the structured workflow for software engineering tasks.
 """
-FLOW_AGENT_INSTRUCTIONS = """
 You are an expert autonomous agent. You solve problems end-to-end by composing your available tools.
 ## CORE PRINCIPLE: BE AUTONOMOUS
@@ -22,7 +27,9 @@ When asked to solve a task:
 **Example - GOOD (autonomous):**
 > *writes code* → *executes code* → *sees output* → *fixes any errors*
 > → "Done! The script ran successfully and output X."
 ---
 ## YOUR CAPABILITIES
@@ -41,35 +48,23 @@ When asked to solve a task:
 - `web_search`: Search the web using Google (requires GOOGLE_API_KEY and GOOGLE_CSE_ID)
 - `web_fetch`: Fetch and read content from URLs
-**Memory Tools:**
-- `memory`: Persistent storage that survives across conversations
-  - view: See directory or file contents
-  - create: Create new files
-  - str_replace: Edit existing files
-  - append: Add to files
-  - search: Find text across memory
-  - delete: Remove files
 **Thinking Tools:**
 - `think`: Pause to reason through complex problems
 - `task_done`: Report when task is complete or blocked
-**Skills Tool (if available):**
-- `skills`: Discover and load domain-specific expertise
-  - `skills(action='list')`: See available skills with descriptions
-  - `skills(action='load', name='skill-name')`: Load full skill content
 ---
 ## WORKFLOW
 ### 1. UNDERSTAND
 - Read the user's request carefully
-- **If the `skills` tool is available**, call `skills(action='list')` to discover relevant expertise
 - Use `list_directory` to understand the workspace structure
 - Use `grep_search` to find relevant existing code
-- Check memory for relevant patterns: `memory(command="view", path="/memory")`
 ### 2. PLAN
 - Use `think` tool to plan your approach for complex tasks
 - Break down into small, testable steps
@@ -120,7 +115,9 @@ bash_execute("cd project && npm run build")  # Production build must succeed
 - Clean up any background processes you started
 - Call `task_done` with status and summary
 - Include files created and suggested next steps
 ---
 ## WORKSPACE
@@ -139,50 +136,9 @@ Your workspace is at `~/.flow/workspace/`
 - Each `bash_execute` runs from workspace root in a fresh shell
 - Use `cd project && command` for commands in subdirectories
 - Multiple commands: `cd project && cmd1 && cmd2`
----
-## MEMORY
-Your memory persists at `~/.flow/memory/`
-**Recommended structure:**
-- `/memory/patterns/` - Reusable solutions and code patterns
-- `/memory/projects/` - Per-project context and notes
-- `/memory/decisions/` - Why you made certain choices
-**Best practices:**
-When storing information, include context:
-- **Date**: When was this created/learned?
-- **Project**: What project did this come from?
-- **Context**: Why was this approach chosen?
-**Example pattern file** (`/memory/patterns/fastapi_cors.md`):
-```markdown
-# FastAPI CORS Setup
-Created: 2025-01-15
-Source: sleep_tracker project
-## Pattern
-from fastapi.middleware.cors import CORSMiddleware
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-## When to use
-- Full-stack apps with separate frontend/backend
-- Frontend on different port than backend
-## Notes
-- Must add before routes
-- Restrict origins in production
-```
-**Check memory first** - you may have solved similar problems before!
 ---
 ## CLI TOOLS
@@ -210,7 +166,9 @@ npm install @shadcn/ui
 npx shadcn@latest init --defaults --yes
 npx shadcn@latest add button card --yes
 ```
 ---
 ## FULL-STACK APPS
@@ -235,7 +193,9 @@ app.add_middleware(
 cd backend && python -c "from main import app; print('Backend OK')"
 cd frontend && npm run build && echo "Frontend OK"
 ```
 ---
 ## BACKGROUND PROCESSES
@@ -257,9 +217,6 @@ check_processes(action="list")
 check_processes(action="kill", pid=12345)
 ```
-**Process registry** is at `/memory/processes.md` - view it with:
-`memory(command='view', path='/memory/processes.md')`
 **IMPORTANT:**
 - NEVER start servers without `background=True` - they will timeout after 120s
 - ALWAYS clean up background processes when done testing
@@ -276,48 +233,19 @@ check_processes(action="cleanup")  # Kill all when done
 # Bad - will timeout!
 bash_execute("uvicorn main:app --port 8000")  # Blocks forever
 ```
 ---
 ## ERROR HANDLING
 - If a command fails, analyze the error and try alternatives
-- Log failures and solutions to memory for future reference
 - Don't give up after first failure - iterate
 - If truly blocked, call `task_done` with status="incomplete" and explain why
----
-## SKILLS
-**If the `skills` tool is available**, use it to access domain-specific expertise:
-```python
-# At the start of complex tasks, discover what expertise is available
-skills(action='list')
-# Output shows available skills with descriptions:
-# - fastapi-patterns: Build REST APIs with FastAPI...
-# - react-components: Build React components with hooks...
-# - testing-strategies: Write comprehensive tests...
-# Load relevant skills before implementation
-skills(action='load', name='fastapi-patterns')
-```
-**Skills provide:**
-- Domain-specific patterns and best practices
-- Code examples and templates
-- Common pitfalls to avoid
-**When to load skills:**
-- Before starting a new project type (API, frontend, CLI)
-- When working with unfamiliar frameworks
-- For complex tasks requiring specialized knowledge
-**Skills location:** `~/.flow/skills/`
-Each skill is a folder with a `SKILL.md` file following the Anthropic Skills standard.
 ---
 ## COMPOSING TOOLS FOR COMPLEX TASKS
@@ -358,7 +286,9 @@ Each skill is a folder with a `SKILL.md` file following the Anthropic Skills sta
 4. bash_execute("curl localhost:8000/endpoint") → Reproduce the error
 5. Analyze error → Fix code → Test again → Iterate until fixed
 ```
 ---
 ## RESEARCH WORKFLOW
@@ -388,7 +318,9 @@ async def fetch_data(url):
 # 4. Test it
 python_repl("import httpx; print(httpx.__version__)")
 ```
 ---
 ## REMEMBER
@@ -401,7 +333,212 @@ python_repl("import httpx; print(httpx.__version__)")
 6. **TEST EVERYTHING** - Never assume code works
 7. **USE NON-INTERACTIVE FLAGS** - Avoid hanging commands
 8. **CLEAN UP** - Kill background processes when done
-9. **STORE LEARNINGS** - Save patterns to memory for future use
 **Your goal is to deliver RESULTS, not instructions.**
 """

 """System prompts for the Flow agent.
 Defines the structured workflow for software engineering tasks.
+Instructions are composed dynamically based on which tools are enabled.
 """
+# =============================================================================
+# Core instructions - always included
+# =============================================================================
+_CORE_INTRO = """
 You are an expert autonomous agent. You solve problems end-to-end by composing your available tools.
 ## CORE PRINCIPLE: BE AUTONOMOUS
 **Example - GOOD (autonomous):**
 > *writes code* → *executes code* → *sees output* → *fixes any errors*
 > → "Done! The script ran successfully and output X."
+"""
+_CORE_CAPABILITIES = """
 ---
 ## YOUR CAPABILITIES
 - `web_search`: Search the web using Google (requires GOOGLE_API_KEY and GOOGLE_CSE_ID)
 - `web_fetch`: Fetch and read content from URLs
 **Thinking Tools:**
 - `think`: Pause to reason through complex problems
 - `task_done`: Report when task is complete or blocked
+"""
+_CORE_WORKFLOW_UNDERSTAND = """
 ---
 ## WORKFLOW
 ### 1. UNDERSTAND
 - Read the user's request carefully
 - Use `list_directory` to understand the workspace structure
 - Use `grep_search` to find relevant existing code
+"""
+_CORE_WORKFLOW_PLAN_EXECUTE_VERIFY = """
 ### 2. PLAN
 - Use `think` tool to plan your approach for complex tasks
 - Break down into small, testable steps
 - Clean up any background processes you started
 - Call `task_done` with status and summary
 - Include files created and suggested next steps
+"""
+_CORE_WORKSPACE = """
 ---
 ## WORKSPACE
 - Each `bash_execute` runs from workspace root in a fresh shell
 - Use `cd project && command` for commands in subdirectories
 - Multiple commands: `cd project && cmd1 && cmd2`
+"""
+_CORE_CLI_TOOLS = """
 ---
 ## CLI TOOLS
 npx shadcn@latest init --defaults --yes
 npx shadcn@latest add button card --yes
 ```
+"""
+_CORE_FULLSTACK = """
 ---
 ## FULL-STACK APPS
 cd backend && python -c "from main import app; print('Backend OK')"
 cd frontend && npm run build && echo "Frontend OK"
 ```
+"""
+_CORE_BACKGROUND = """
 ---
 ## BACKGROUND PROCESSES
 check_processes(action="kill", pid=12345)
 ```
 **IMPORTANT:**
 - NEVER start servers without `background=True` - they will timeout after 120s
 - ALWAYS clean up background processes when done testing
 # Bad - will timeout!
 bash_execute("uvicorn main:app --port 8000")  # Blocks forever
 ```
+"""
+_CORE_ERROR_HANDLING = """
 ---
 ## ERROR HANDLING
 - If a command fails, analyze the error and try alternatives
 - Don't give up after first failure - iterate
 - If truly blocked, call `task_done` with status="incomplete" and explain why
+"""
+_CORE_EXAMPLES = """
 ---
 ## COMPOSING TOOLS FOR COMPLEX TASKS
 4. bash_execute("curl localhost:8000/endpoint") → Reproduce the error
 5. Analyze error → Fix code → Test again → Iterate until fixed
 ```
+"""
+_CORE_RESEARCH = """
 ---
 ## RESEARCH WORKFLOW
 # 4. Test it
 python_repl("import httpx; print(httpx.__version__)")
 ```
+"""
+_CORE_REMEMBER = """
 ---
 ## REMEMBER
 6. **TEST EVERYTHING** - Never assume code works
 7. **USE NON-INTERACTIVE FLAGS** - Avoid hanging commands
 8. **CLEAN UP** - Kill background processes when done
 **Your goal is to deliver RESULTS, not instructions.**
 """
+# =============================================================================
+# Optional sections - included only when corresponding tools are enabled
+# =============================================================================
+_MEMORY_CAPABILITIES = """
+**Memory Tools:**
+- `memory`: Persistent storage that survives across conversations
+  - view: See directory or file contents
+  - create: Create new files
+  - str_replace: Edit existing files
+  - append: Add to files
+  - search: Find text across memory
+  - delete: Remove files
+"""
+_MEMORY_WORKFLOW_UNDERSTAND = """- Check memory for relevant patterns: `memory(command="view", path="/memory")`
+"""
+_MEMORY_SECTION = """
+---
+## MEMORY
+Your memory persists at `~/.flow/memory/`
+**Recommended structure:**
+- `/memory/patterns/` - Reusable solutions and code patterns
+- `/memory/projects/` - Per-project context and notes
+- `/memory/decisions/` - Why you made certain choices
+**Best practices:**
+When storing information, include context:
+- **Date**: When was this created/learned?
+- **Project**: What project did this come from?
+- **Context**: Why was this approach chosen?
+**Example pattern file** (`/memory/patterns/fastapi_cors.md`):
+```markdown
+# FastAPI CORS Setup
+Created: 2025-01-15
+Source: sleep_tracker project
+## Pattern
+from fastapi.middleware.cors import CORSMiddleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+## When to use
+- Full-stack apps with separate frontend/backend
+- Frontend on different port than backend
+## Notes
+- Must add before routes
+- Restrict origins in production
+```
+**Check memory first** - you may have solved similar problems before!
+"""
+_MEMORY_ERROR_HANDLING = """- Log failures and solutions to memory for future reference
+"""
+_MEMORY_REMEMBER = """9. **STORE LEARNINGS** - Save patterns to memory for future use
+"""
+_MEMORY_BACKGROUND_PROCESS_REGISTRY = """
+**Process registry** is at `/memory/processes.md` - view it with:
+`memory(command='view', path='/memory/processes.md')`
+"""
+_SKILLS_CAPABILITIES = """
+**Skills Tool (if available):**
+- `skills`: Discover and load domain-specific expertise
+  - `skills(action='list')`: See available skills with descriptions
+  - `skills(action='load', name='skill-name')`: Load full skill content
+"""
+_SKILLS_WORKFLOW_UNDERSTAND = """- **If the `skills` tool is available**, call `skills(action='list')` to discover relevant expertise
+"""
+_SKILLS_SECTION = """
+---
+## SKILLS
+**If the `skills` tool is available**, use it to access domain-specific expertise:
+```python
+# At the start of complex tasks, discover what expertise is available
+skills(action='list')
+# Output shows available skills with descriptions:
+# - fastapi-patterns: Build REST APIs with FastAPI...
+# - react-components: Build React components with hooks...
+# - testing-strategies: Write comprehensive tests...
+# Load relevant skills before implementation
+skills(action='load', name='fastapi-patterns')
+```
+**Skills provide:**
+- Domain-specific patterns and best practices
+- Code examples and templates
+- Common pitfalls to avoid
+**When to load skills:**
+- Before starting a new project type (API, frontend, CLI)
+- When working with unfamiliar frameworks
+- For complex tasks requiring specialized knowledge
+**Skills location:** `~/.flow/skills/`
+Each skill is a folder with a `SKILL.md` file following the Anthropic Skills standard.
+"""
+# =============================================================================
+# Instruction builder
+# =============================================================================
+def build_instructions(
+    *,
+    enable_memory: bool = True,
+    enable_skills: bool = False,
+) -> str:
+    """Build agent instructions dynamically based on enabled tools.
+    Composes the instruction prompt from core sections plus optional sections
+    for memory and skills, so the agent only sees documentation for tools
+    it actually has.
+    Args:
+        enable_memory: Include memory tool documentation.
+        enable_skills: Include skills tool documentation.
+    Returns:
+        Complete instruction string.
+    """
+    # -- Capabilities section --
+    capabilities = _CORE_CAPABILITIES
+    if enable_memory:
+        capabilities += "\n" + _MEMORY_CAPABILITIES
+    if enable_skills:
+        capabilities += "\n" + _SKILLS_CAPABILITIES
+    # -- Workflow > Understand section --
+    understand = _CORE_WORKFLOW_UNDERSTAND
+    if enable_skills:
+        understand += _SKILLS_WORKFLOW_UNDERSTAND
+    if enable_memory:
+        understand += _MEMORY_WORKFLOW_UNDERSTAND
+    # -- Error handling section --
+    error_handling = _CORE_ERROR_HANDLING
+    if enable_memory:
+        error_handling += _MEMORY_ERROR_HANDLING
+    # -- Background processes section --
+    background = _CORE_BACKGROUND
+    if enable_memory:
+        background += _MEMORY_BACKGROUND_PROCESS_REGISTRY
+    # -- Remember section --
+    remember = _CORE_REMEMBER
+    if enable_memory:
+        remember += _MEMORY_REMEMBER
+    # -- Assemble --
+    sections = [
+        _CORE_INTRO,
+        capabilities,
+        understand,
+        _CORE_WORKFLOW_PLAN_EXECUTE_VERIFY,
+        _CORE_WORKSPACE,
+    ]
+    if enable_memory:
+        sections.append(_MEMORY_SECTION)
+    sections.extend([
+        _CORE_CLI_TOOLS,
+        _CORE_FULLSTACK,
+        background,
+        error_handling,
+    ])
+    if enable_skills:
+        sections.append(_SKILLS_SECTION)
+    sections.extend([
+        _CORE_EXAMPLES,
+        _CORE_RESEARCH,
+        remember,
+    ])
+    return "\n".join(sections)
+# Legacy constant for backwards compatibility.
+# Equivalent to build_instructions(enable_memory=True, enable_skills=True).
+FLOW_AGENT_INSTRUCTIONS = build_instructions(enable_memory=True, enable_skills=True)

src/flow/tools/__init__.py DELETED Viewed

@@ -1,172 +0,0 @@
-"""Flow agent tools.
-Provides coding, execution, memory, and core tools for software engineering tasks.
-Tools are harness-agnostic - they return plain data that harnesses adapt.
-"""
-import inspect
-from collections.abc import Callable, Sequence
-from functools import wraps
-from pathlib import Path
-from typing import Any, get_type_hints
-from flow.tools.coding import create_coding_tools
-from flow.tools.core import create_core_tools
-from flow.tools.execution import create_execution_tools
-from flow.tools.memory import create_memory_tool
-from flow.tools.sub_agent import create_sub_agent_tool
-__all__ = [
-    "create_all_tools",
-    "create_coding_tools",
-    "create_core_tools",
-    "create_execution_tools",
-    "create_memory_tool",
-    "create_sub_agent_tool",
-    "get_tool_schema",
-    "tool",
-]
-def tool(
-    name: str | None = None,
-    description: str | None = None,
-) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
-    """Decorator to mark a function as an agent tool.
-    This decorator adds metadata to functions that allows harnesses
-    to discover and use them as agent tools.
-    Args:
-        name: Tool name (defaults to function name)
-        description: Tool description (defaults to docstring)
-    Returns:
-        Decorated function with tool metadata
-    Example:
-        @tool(name="read_file", description="Read file contents")
-        async def read_file(path: str) -> str:
-            ...
-    """
-    def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
-        @wraps(func)
-        def wrapper(*args: Any, **kwargs: Any) -> Any:
-            return func(*args, **kwargs)
-        # Store tool metadata
-        wrapper._tool_name = name or func.__name__  # type: ignore[attr-defined]
-        wrapper._tool_description = description or func.__doc__ or ""  # type: ignore[attr-defined]
-        wrapper._is_tool = True  # type: ignore[attr-defined]
-        return wrapper
-    return decorator
-def get_tool_schema(func: Callable[..., Any]) -> dict[str, Any]:
-    """Extract JSON schema from a tool function.
-    Uses type hints and Annotated metadata to build the schema.
-    Args:
-        func: Tool function to extract schema from
-    Returns:
-        JSON schema dict for the tool's parameters
-    """
-    hints = get_type_hints(func, include_extras=True)
-    sig = inspect.signature(func)
-    properties: dict[str, Any] = {}
-    required: list[str] = []
-    for param_name, param in sig.parameters.items():
-        if param_name in ("self", "cls"):
-            continue
-        param_schema: dict[str, Any] = {}
-        hint = hints.get(param_name, Any)
-        # Handle Annotated types
-        origin = getattr(hint, "__origin__", None)
-        if origin is not None:
-            # Check if it's Annotated
-            if hasattr(hint, "__metadata__"):
-                # Extract description from Annotated metadata
-                for meta in hint.__metadata__:
-                    if isinstance(meta, str):
-                        param_schema["description"] = meta
-                        break
-                # Get the actual type
-                hint = hint.__args__[0]
-                origin = getattr(hint, "__origin__", None)
-        # Map Python types to JSON schema types
-        if hint is str:
-            param_schema["type"] = "string"
-        elif hint is int:
-            param_schema["type"] = "integer"
-        elif hint is float:
-            param_schema["type"] = "number"
-        elif hint is bool:
-            param_schema["type"] = "boolean"
-        elif origin is list:
-            param_schema["type"] = "array"
-        elif origin is dict:
-            param_schema["type"] = "object"
-        else:
-            param_schema["type"] = "string"  # Default fallback
-        properties[param_name] = param_schema
-        # Check if parameter is required (no default value)
-        if param.default is inspect.Parameter.empty:
-            required.append(param_name)
-    return {
-        "type": "object",
-        "properties": properties,
-        "required": required,
-    }
-def create_all_tools(
-    workspace: Path,
-    memory_path: Path,
-    bash_timeout: int = 120,
-    *,
-    enable_memory_tool: bool = True,
-    enable_sub_agent: bool = False,
-    sub_agent_model: str = "gpt-4o-mini",
-) -> Sequence[Callable[..., Any]]:
-    """Create all standard tools for the Flow agent.
-    Args:
-        workspace: Root directory for file operations
-        memory_path: Directory for persistent memory
-        bash_timeout: Timeout for bash commands in seconds
-        enable_memory_tool: Whether to include the memory tool
-        enable_sub_agent: Whether to include the sub-agent research tool
-        sub_agent_model: Model to use for sub-agent (default: gpt-4o-mini)
-    Returns:
-        List of all tool functions
-    """
-    tools: list[Callable[..., Any]] = []
-    # Core tools always included
-    tools.extend(create_coding_tools(workspace))
-    tools.extend(create_execution_tools(workspace, memory_path, bash_timeout))
-    tools.extend(create_core_tools())
-    # Optional: Agent-managed memory tool
-    if enable_memory_tool:
-        tools.append(create_memory_tool(memory_path))
-    # Optional: Sub-agent for isolated research
-    if enable_sub_agent:
-        tools.append(create_sub_agent_tool(workspace, model=sub_agent_model))
-    return tools

src/flow/ui/api/configs.py CHANGED Viewed

@@ -1,7 +1,6 @@
 # Copyright (c) Microsoft. All rights reserved.
-"""Config API routes."""
-from itertools import product
 from uuid import UUID
 from fastapi import APIRouter, Depends, HTTPException
@@ -9,32 +8,37 @@ from pydantic import BaseModel
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlmodel import select, desc
 from ..database import get_session
 from ..models.config import AgentConfig
-from ..schemas import ConfigCreate, ConfigUpdate, ConfigResponse
 router = APIRouter(prefix="/configs", tags=["configs"])
-class VariationRequest(BaseModel):
-    """Request schema for generating config variations."""
     base_name: str = "experiment"
-    # Which features to vary (on/off)
     vary_compaction: bool = False
-    vary_memory: bool = False
-    vary_sub_agent: bool = False
-    # Which numeric parameters to vary
     vary_compaction_head: bool = False
     vary_compaction_tail: bool = False
-    # Values to use for numeric variations
     compaction_head_values: list[int] = [5, 10, 20]
     compaction_tail_values: list[int] = [20, 40, 60]
-    # Optional job ID to associate configs with
     job_id: str | None = None
@@ -46,17 +50,12 @@ def parse_uuid(id_str: str) -> UUID:
         raise HTTPException(status_code=400, detail=f"Invalid UUID: {id_str}") from e
-@router.get("", response_model=list[ConfigResponse])
 async def list_configs(
     include_auto_generated: bool = False,
     session: AsyncSession = Depends(get_session),
 ) -> list[AgentConfig]:
-    """List agent configurations.
-    Args:
-        include_auto_generated: If False (default), only show user-created configs.
-                               If True, include auto-generated configs from jobs.
-    """
     query = select(AgentConfig)
     if not include_auto_generated:
         query = query.where(AgentConfig.is_auto_generated == False)  # noqa: E712
@@ -65,9 +64,9 @@ async def list_configs(
     return list(result.scalars().all())
-@router.post("", response_model=ConfigResponse, status_code=201)
 async def create_config(
-    data: ConfigCreate,
     session: AsyncSession = Depends(get_session),
 ) -> AgentConfig:
     """Create a new agent configuration."""
@@ -82,7 +81,7 @@ async def create_config(
     return config
-@router.get("/{config_id}", response_model=ConfigResponse)
 async def get_config(
     config_id: str,
     session: AsyncSession = Depends(get_session),
@@ -96,10 +95,10 @@ async def get_config(
     return config
-@router.put("/{config_id}", response_model=ConfigResponse)
 async def update_config(
     config_id: str,
-    data: ConfigUpdate,
     session: AsyncSession = Depends(get_session),
 ) -> AgentConfig:
     """Update an agent configuration."""
@@ -109,25 +108,23 @@ async def update_config(
     if not config:
         raise HTTPException(status_code=404, detail="Config not found")
-    # Update fields that were provided
     update_data = data.model_dump(exclude_unset=True)
-    # Handle config_json fields separately
     config_fields = [
-        "enable_message_compaction",
-        "enable_memory_tool",
-        "enable_sub_agent",
-        "compaction_head_size",
-        "compaction_tail_size",
-        "bash_timeout",
     ]
     config_json = dict(config.config_json)
-    for field in config_fields:
-        if field in update_data:
-            config_json[field] = update_data.pop(field)
-    # Update top-level fields
     for key, value in update_data.items():
         setattr(config, key, value)
@@ -157,57 +154,44 @@ async def delete_config(
     await session.commit()
-@router.post("/generate-variations", response_model=list[ConfigResponse], status_code=201)
-async def generate_variations(
-    data: VariationRequest,
     session: AsyncSession = Depends(get_session),
 ) -> list[AgentConfig]:
-    """Generate config variations for ablation testing.
-    This creates multiple configs by combining variation options.
-    Each variation is named based on the features enabled.
     """
-    # Build variation dimensions
-    dimensions: list[list[tuple[str, str, bool | int]]] = []
-    dimension_names: list[str] = []
     if data.vary_compaction:
-        dimensions.append([
-            ("compaction", "enable_message_compaction", True),
-            ("no_compact", "enable_message_compaction", False),
-        ])
-        dimension_names.append("compaction")
-    if data.vary_memory:
-        dimensions.append([
-            ("memory", "enable_memory_tool", True),
-            ("no_mem", "enable_memory_tool", False),
-        ])
-        dimension_names.append("memory")
-    if data.vary_sub_agent:
-        dimensions.append([
-            ("subagent", "enable_sub_agent", True),
-            ("no_sub", "enable_sub_agent", False),
-        ])
-        dimension_names.append("sub_agent")
     if data.vary_compaction_head:
-        dimensions.append([
-            (f"head{size}", "compaction_head_size", size)
-            for size in data.compaction_head_values
-        ])
-        dimension_names.append("head_size")
     if data.vary_compaction_tail:
-        dimensions.append([
-            (f"tail{size}", "compaction_tail_size", size)
-            for size in data.compaction_tail_values
-        ])
-        dimension_names.append("tail_size")
-    # Parse job_id if provided
     job_uuid = None
     if data.job_id:
         try:
@@ -215,19 +199,16 @@ async def generate_variations(
         except ValueError:
             pass
-    # If no variations selected, create a single baseline config
-    if not dimensions:
         config = AgentConfig(
             name=f"{data.base_name}_baseline",
-            description=f"Baseline config from {data.base_name}",
             config_json={
                 "name": f"{data.base_name}_baseline",
-                "enable_message_compaction": True,
-                "enable_memory_tool": True,
-                "enable_sub_agent": False,
-                "compaction_head_size": 10,
-                "compaction_tail_size": 40,
-                "bash_timeout": 120,
             },
             is_auto_generated=True,
             job_id=job_uuid,
@@ -237,41 +218,30 @@ async def generate_variations(
         await session.refresh(config)
         return [config]
-    # Generate all combinations
     configs = []
-    for combo in product(*dimensions):
-        # Build name from variation labels
-        name_parts = [label for label, _, _ in combo]
-        config_name = f"{data.base_name}_{'_'.join(name_parts)}"
-        # Build config JSON from defaults + variations
-        config_json = {
-            "name": config_name,
-            "enable_message_compaction": True,
-            "enable_memory_tool": True,
-            "enable_sub_agent": False,
-            "compaction_head_size": 10,
-            "compaction_tail_size": 40,
-            "bash_timeout": 120,
-        }
-        # Apply variations
-        for _, key, value in combo:
-            config_json[key] = value
-        # Check if config with this name already exists
         existing = await session.execute(
-            select(AgentConfig).where(AgentConfig.name == config_name).limit(1)
         )
         existing_config = existing.scalar_one_or_none()
         if existing_config:
             configs.append(existing_config)
         else:
             config = AgentConfig(
-                name=config_name,
-                description=f"Auto-generated variation: {', '.join(name_parts)}",
-                config_json=config_json,
                 is_auto_generated=True,
                 job_id=job_uuid,
             )

 # Copyright (c) Microsoft. All rights reserved.
+"""Agent config API routes."""
 from uuid import UUID
 from fastapi import APIRouter, Depends, HTTPException
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlmodel import select, desc
+from flow.experiments.models import Agent, CompactionConfig, GridSearchStrategy
 from ..database import get_session
 from ..models.config import AgentConfig
+from ..schemas import AgentCreate, AgentUpdate, AgentResponse
 router = APIRouter(prefix="/configs", tags=["configs"])
+class CandidateRequest(BaseModel):
+    """Request schema for generating candidate agents."""
     base_name: str = "experiment"
+    # Which dimensions to vary
     vary_compaction: bool = False
+    vary_tools: bool = False
     vary_compaction_head: bool = False
     vary_compaction_tail: bool = False
+    # Values for tool variations (preset names)
+    tool_presets: list[str] = ["standard", "minimal", "full"]
+    # Values for numeric variations
     compaction_head_values: list[int] = [5, 10, 20]
     compaction_tail_values: list[int] = [20, 40, 60]
+    # Budget limit
+    budget: int = 100
+    # Optional job ID to associate candidates with
     job_id: str | None = None
         raise HTTPException(status_code=400, detail=f"Invalid UUID: {id_str}") from e
+@router.get("", response_model=list[AgentResponse])
 async def list_configs(
     include_auto_generated: bool = False,
     session: AsyncSession = Depends(get_session),
 ) -> list[AgentConfig]:
+    """List agent configurations."""
     query = select(AgentConfig)
     if not include_auto_generated:
         query = query.where(AgentConfig.is_auto_generated == False)  # noqa: E712
     return list(result.scalars().all())
+@router.post("", response_model=AgentResponse, status_code=201)
 async def create_config(
+    data: AgentCreate,
     session: AsyncSession = Depends(get_session),
 ) -> AgentConfig:
     """Create a new agent configuration."""
     return config
+@router.get("/{config_id}", response_model=AgentResponse)
 async def get_config(
     config_id: str,
     session: AsyncSession = Depends(get_session),
     return config
+@router.put("/{config_id}", response_model=AgentResponse)
 async def update_config(
     config_id: str,
+    data: AgentUpdate,
     session: AsyncSession = Depends(get_session),
 ) -> AgentConfig:
     """Update an agent configuration."""
     if not config:
         raise HTTPException(status_code=404, detail="Config not found")
     update_data = data.model_dump(exclude_unset=True)
     config_fields = [
+        "instructions",
+        "model",
+        "compaction",
+        "tools",
     ]
     config_json = dict(config.config_json)
+    for field_name in config_fields:
+        if field_name in update_data:
+            value = update_data.pop(field_name)
+            if field_name == "compaction" and hasattr(value, "model_dump"):
+                value = value.model_dump()
+            config_json[field_name] = value
     for key, value in update_data.items():
         setattr(config, key, value)
     await session.commit()
+@router.post("/generate-candidates", response_model=list[AgentResponse], status_code=201)
+async def generate_candidates(
+    data: CandidateRequest,
     session: AsyncSession = Depends(get_session),
 ) -> list[AgentConfig]:
+    """Generate candidate agents for optimization.
+    Uses GridSearchStrategy to generate candidate variants from a base agent.
+    Each candidate is stored as an AgentConfig in the database.
     """
+    variations: dict[str, list] = {}
     if data.vary_compaction:
+        variations["compaction"] = [
+            CompactionConfig.head_tail(10, 40),
+            CompactionConfig.none(),
+        ]
+    if data.vary_tools:
+        variations["tools"] = data.tool_presets
     if data.vary_compaction_head:
+        variations["compaction"] = [
+            CompactionConfig.head_tail(h, 40) for h in data.compaction_head_values
+        ]
     if data.vary_compaction_tail:
+        if data.vary_compaction_head:
+            variations["compaction"] = [
+                CompactionConfig.head_tail(h, t)
+                for h in data.compaction_head_values
+                for t in data.compaction_tail_values
+            ]
+        else:
+            variations["compaction"] = [
+                CompactionConfig.head_tail(10, t) for t in data.compaction_tail_values
+            ]
     job_uuid = None
     if data.job_id:
         try:
         except ValueError:
             pass
+    base = Agent(name=data.base_name)
+    if not variations:
         config = AgentConfig(
             name=f"{data.base_name}_baseline",
+            description=f"Baseline agent from {data.base_name}",
             config_json={
                 "name": f"{data.base_name}_baseline",
+                "compaction": {"strategy": "head_tail", "params": {"head_size": 10, "tail_size": 40}},
+                "tools": "standard",
             },
             is_auto_generated=True,
             job_id=job_uuid,
         await session.refresh(config)
         return [config]
+    strategy = GridSearchStrategy(variations)
+    candidates = strategy.generate(base, data.budget)
     configs = []
+    for candidate in candidates:
+        candidate_name = candidate.agent.name
         existing = await session.execute(
+            select(AgentConfig).where(AgentConfig.name == candidate_name).limit(1)
         )
         existing_config = existing.scalar_one_or_none()
         if existing_config:
             configs.append(existing_config)
         else:
+            from dataclasses import asdict
             config = AgentConfig(
+                name=candidate_name,
+                description=candidate.rationale,
+                config_json={
+                    "name": candidate_name,
+                    "compaction": asdict(candidate.agent.compaction),
+                    "tools": candidate.agent.tools,
+                },
                 is_auto_generated=True,
                 job_id=job_uuid,
             )

src/flow/ui/api/jobs.py CHANGED Viewed

@@ -53,12 +53,12 @@ async def create_job(
     session: AsyncSession = Depends(get_session),
 ) -> OptimizationJob:
     """Create a new optimization job."""
-    # Validate config_ids exist
-    for config_id in data.config_ids:
-        uuid_id = parse_uuid(config_id)
         result = await session.execute(select(AgentConfig).where(AgentConfig.id == uuid_id))
         if not result.scalar_one_or_none():
-            raise HTTPException(status_code=400, detail=f"Config {config_id} not found")
     # Validate task_ids exist
     for task_id in data.task_ids:
@@ -69,11 +69,11 @@ async def create_job(
     job = OptimizationJob(
         name=data.name,
-        config_ids=data.config_ids,
         task_ids=data.task_ids,
         parallel=data.parallel,
         use_llm_eval=data.use_llm_eval,
-        total_experiments=len(data.config_ids) * len(data.task_ids),
     )
     session.add(job)
     await session.commit()

     session: AsyncSession = Depends(get_session),
 ) -> OptimizationJob:
     """Create a new optimization job."""
+    # Validate candidate_ids exist
+    for candidate_id in data.candidate_ids:
+        uuid_id = parse_uuid(candidate_id)
         result = await session.execute(select(AgentConfig).where(AgentConfig.id == uuid_id))
         if not result.scalar_one_or_none():
+            raise HTTPException(status_code=400, detail=f"Candidate {candidate_id} not found")
     # Validate task_ids exist
     for task_id in data.task_ids:
     job = OptimizationJob(
         name=data.name,
+        candidate_ids=data.candidate_ids,
         task_ids=data.task_ids,
         parallel=data.parallel,
         use_llm_eval=data.use_llm_eval,
+        total_experiments=len(data.candidate_ids) * len(data.task_ids),
     )
     session.add(job)
     await session.commit()

src/flow/ui/api/runs.py CHANGED Viewed

@@ -26,7 +26,7 @@ def parse_uuid(id_str: str) -> UUID:
 @router.get("", response_model=list[RunResponse])
 async def list_runs(
     job_id: str | None = None,
-    config_name: str | None = None,
     task_name: str | None = None,
     is_pareto: bool | None = None,
     session: AsyncSession = Depends(get_session),
@@ -37,8 +37,8 @@ async def list_runs(
     if job_id:
         uuid_id = parse_uuid(job_id)
         query = query.where(ExperimentRun.job_id == uuid_id)
-    if config_name:
-        query = query.where(ExperimentRun.config_name == config_name)
     if task_name:
         query = query.where(ExperimentRun.task_name == task_name)
     if is_pareto is not None:
@@ -75,7 +75,7 @@ async def get_run(
     return {
         "id": str(run.id),
         "job_id": str(run.job_id),
-        "config_name": run.config_name,
         "task_name": run.task_name,
         "status": run.status,
         "tokens_total": run.tokens_total,
@@ -111,11 +111,11 @@ async def get_job_summary(
         raise HTTPException(status_code=404, detail="No runs found for job")
     # Aggregate by config
-    config_summaries: dict[str, dict[str, Any]] = {}
     for run in runs:
-        if run.config_name not in config_summaries:
-            config_summaries[run.config_name] = {
-                "config_name": run.config_name,
                 "total_runs": 0,
                 "passed_runs": 0,
                 "avg_score": 0.0,
@@ -125,7 +125,7 @@ async def get_job_summary(
                 "pareto_rank": 999,
             }
-        summary = config_summaries[run.config_name]
         summary["total_runs"] += 1
         if run.passed:
             summary["passed_runs"] += 1
@@ -137,7 +137,7 @@ async def get_job_summary(
             summary["pareto_rank"] = min(summary["pareto_rank"], run.pareto_rank)
     # Calculate averages
-    for summary in config_summaries.values():
         n = summary["total_runs"]
         summary["avg_score"] /= n
         summary["avg_tokens"] /= n
@@ -145,13 +145,13 @@ async def get_job_summary(
     # Sort by score descending
     sorted_summaries = sorted(
-        config_summaries.values(),
         key=lambda x: (-x["avg_score"], x["avg_tokens"]),
     )
     return {
         "job_id": job_id,
         "total_runs": len(runs),
-        "config_summaries": sorted_summaries,
-        "pareto_configs": [s["config_name"] for s in sorted_summaries if s["is_pareto"]],
     }

 @router.get("", response_model=list[RunResponse])
 async def list_runs(
     job_id: str | None = None,
+    candidate_name: str | None = None,
     task_name: str | None = None,
     is_pareto: bool | None = None,
     session: AsyncSession = Depends(get_session),
     if job_id:
         uuid_id = parse_uuid(job_id)
         query = query.where(ExperimentRun.job_id == uuid_id)
+    if candidate_name:
+        query = query.where(ExperimentRun.candidate_name == candidate_name)
     if task_name:
         query = query.where(ExperimentRun.task_name == task_name)
     if is_pareto is not None:
     return {
         "id": str(run.id),
         "job_id": str(run.job_id),
+        "candidate_name": run.candidate_name,
         "task_name": run.task_name,
         "status": run.status,
         "tokens_total": run.tokens_total,
         raise HTTPException(status_code=404, detail="No runs found for job")
     # Aggregate by config
+    candidate_summaries: dict[str, dict[str, Any]] = {}
     for run in runs:
+        if run.candidate_name not in candidate_summaries:
+            candidate_summaries[run.candidate_name] = {
+                "candidate_name": run.candidate_name,
                 "total_runs": 0,
                 "passed_runs": 0,
                 "avg_score": 0.0,
                 "pareto_rank": 999,
             }
+        summary = candidate_summaries[run.candidate_name]
         summary["total_runs"] += 1
         if run.passed:
             summary["passed_runs"] += 1
             summary["pareto_rank"] = min(summary["pareto_rank"], run.pareto_rank)
     # Calculate averages
+    for summary in candidate_summaries.values():
         n = summary["total_runs"]
         summary["avg_score"] /= n
         summary["avg_tokens"] /= n
     # Sort by score descending
     sorted_summaries = sorted(
+        candidate_summaries.values(),
         key=lambda x: (-x["avg_score"], x["avg_tokens"]),
     )
     return {
         "job_id": job_id,
         "total_runs": len(runs),
+        "candidate_summaries": sorted_summaries,
+        "pareto_candidates": [s["candidate_name"] for s in sorted_summaries if s["is_pareto"]],
     }

src/flow/ui/database.py CHANGED Viewed

@@ -21,70 +21,14 @@ engine = create_async_engine(DATABASE_URL, echo=False, future=True)
 async_session = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
-async def _migrate_schema(conn) -> None:
-    """Apply schema migrations for new columns.
-    SQLModel's create_all only creates missing tables, not columns.
-    This adds any missing columns to existing tables.
-    """
-    from sqlalchemy import text, inspect
-    def _sync_migrate(sync_conn):
-        inspector = inspect(sync_conn)
-        # Check agent_configs table
-        if inspector.has_table("agent_configs"):
-            columns = {c["name"] for c in inspector.get_columns("agent_configs")}
-            # Add is_auto_generated column if missing
-            if "is_auto_generated" not in columns:
-                logger.info("Adding is_auto_generated column to agent_configs")
-                sync_conn.execute(
-                    text("ALTER TABLE agent_configs ADD COLUMN is_auto_generated BOOLEAN DEFAULT 0")
-                )
-            # Add job_id column if missing
-            if "job_id" not in columns:
-                logger.info("Adding job_id column to agent_configs")
-                sync_conn.execute(
-                    text("ALTER TABLE agent_configs ADD COLUMN job_id VARCHAR(36)")
-                )
-            # Retroactively mark configs with "Auto-generated variation:" in description
-            logger.info("Marking auto-generated configs based on description pattern")
-            sync_conn.execute(
-                text(
-                    "UPDATE agent_configs SET is_auto_generated = 1 "
-                    "WHERE description LIKE 'Auto-generated variation:%' "
-                    "AND (is_auto_generated IS NULL OR is_auto_generated = 0)"
-                )
-            )
-    await conn.run_sync(_sync_migrate)
 async def init_db() -> None:
-    """Initialize database tables.
-    With multiple uvicorn workers, each worker calls this on startup.
-    SQLite + create_all can race: worker A checks table doesn't exist,
-    worker B creates it, worker A tries to create and fails.
-    Solution: Catch the OperationalError and continue - if the table
-    already exists, that's fine.
-    See: https://github.com/sqlalchemy/sqlalchemy/issues/4936
-    """
-    # Import models to ensure they're registered with SQLModel.metadata
     from flow.ui.models import AgentConfig, TaskModel, OptimizationJob, ExperimentRun  # noqa: F401
     try:
         async with engine.begin() as conn:
             await conn.run_sync(SQLModel.metadata.create_all)
-            # Apply migrations for new columns
-            await _migrate_schema(conn)
     except Exception as e:
-        # Handle race condition: "table already exists" is fine
         if "already exists" in str(e).lower():
             logger.debug("Tables already exist (race condition handled)")
         else:

 async_session = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
 async def init_db() -> None:
+    """Initialize database tables."""
     from flow.ui.models import AgentConfig, TaskModel, OptimizationJob, ExperimentRun  # noqa: F401
     try:
         async with engine.begin() as conn:
             await conn.run_sync(SQLModel.metadata.create_all)
     except Exception as e:
         if "already exists" in str(e).lower():
             logger.debug("Tables already exist (race condition handled)")
         else:

src/flow/ui/models/config.py CHANGED Viewed

@@ -17,12 +17,12 @@ class AgentConfig(SQLModel, table=True):
     name: str = Field(index=True)
     description: str = ""
-    # Store AblationConfig as JSON
     config_json: dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON))
-    # Track auto-generated configs (created by variation endpoint)
     is_auto_generated: bool = Field(default=False, index=True)
-    # Link to the job that created this config (if auto-generated)
     job_id: UUID | None = Field(default=None, index=True)
     created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
@@ -30,5 +30,5 @@ class AgentConfig(SQLModel, table=True):
     @property
     def config(self) -> dict[str, Any]:
-        """Alias for config_json for API compatibility."""
         return self.config_json

     name: str = Field(index=True)
     description: str = ""
+    # Store Agent config as JSON
     config_json: dict[str, Any] = Field(default_factory=dict, sa_column=Column(JSON))
+    # Track auto-generated candidates (created by generate-candidates endpoint)
     is_auto_generated: bool = Field(default=False, index=True)
+    # Link to the job that created this candidate (if auto-generated)
     job_id: UUID | None = Field(default=None, index=True)
     created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
     @property
     def config(self) -> dict[str, Any]:
+        """Alias for config_json used by API response serialization."""
         return self.config_json

src/flow/ui/models/job.py CHANGED Viewed

@@ -33,7 +33,7 @@ class OptimizationJob(SQLModel, table=True):
     use_llm_eval: bool = Field(default=False)
     # Selected configs and tasks (stored as IDs)
-    config_ids: list[str] = Field(default_factory=list, sa_column=Column(JSON))
     task_ids: list[str] = Field(default_factory=list, sa_column=Column(JSON))
     # Results

     use_llm_eval: bool = Field(default=False)
     # Selected configs and tasks (stored as IDs)
+    candidate_ids: list[str] = Field(default_factory=list, sa_column=Column(JSON))
     task_ids: list[str] = Field(default_factory=list, sa_column=Column(JSON))
     # Results

src/flow/ui/models/run.py CHANGED Viewed

@@ -16,7 +16,7 @@ class ExperimentRun(SQLModel, table=True):
     id: UUID = Field(default_factory=uuid4, primary_key=True)
     job_id: UUID = Field(foreign_key="optimization_jobs.id", index=True)
-    config_name: str
     task_name: str
     # Status

     id: UUID = Field(default_factory=uuid4, primary_key=True)
     job_id: UUID = Field(foreign_key="optimization_jobs.id", index=True)
+    candidate_name: str
     task_name: str
     # Status

src/flow/ui/models/task.py CHANGED Viewed

@@ -28,5 +28,5 @@ class TaskModel(SQLModel, table=True):
     @property
     def criteria(self) -> list[dict[str, Any]]:
-        """Alias for criteria_json for API compatibility."""
         return self.criteria_json

     @property
     def criteria(self) -> list[dict[str, Any]]:
+        """Alias for criteria_json used by API response serialization."""
         return self.criteria_json

src/flow/ui/schemas/__init__.py CHANGED Viewed

@@ -1,15 +1,15 @@
 # Copyright (c) Microsoft. All rights reserved.
 """Pydantic schemas for API requests/responses."""
-from .config import ConfigCreate, ConfigUpdate, ConfigResponse
 from .task import TaskCreate, TaskResponse, CriterionSchema
 from .job import JobCreate, JobResponse, JobProgress
 from .run import RunResponse, RunDetailResponse, CriterionResultSchema
 __all__ = [
-    "ConfigCreate",
-    "ConfigUpdate",
-    "ConfigResponse",
     "TaskCreate",
     "TaskResponse",
     "CriterionSchema",

 # Copyright (c) Microsoft. All rights reserved.
 """Pydantic schemas for API requests/responses."""
+from .config import AgentCreate, AgentUpdate, AgentResponse
 from .task import TaskCreate, TaskResponse, CriterionSchema
 from .job import JobCreate, JobResponse, JobProgress
 from .run import RunResponse, RunDetailResponse, CriterionResultSchema
 __all__ = [
+    "AgentCreate",
+    "AgentUpdate",
+    "AgentResponse",
     "TaskCreate",
     "TaskResponse",
     "CriterionSchema",

src/flow/ui/schemas/config.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # Copyright (c) Microsoft. All rights reserved.
-"""Config schemas."""
 from datetime import datetime
 from typing import Any
@@ -8,46 +8,52 @@ from uuid import UUID
 from pydantic import BaseModel, ConfigDict, field_validator
-class ConfigCreate(BaseModel):
-    """Request schema for creating a config."""
     name: str
     description: str = ""
-    enable_message_compaction: bool = True
-    enable_memory_tool: bool = True
-    enable_sub_agent: bool = False
-    compaction_head_size: int = 10
-    compaction_tail_size: int = 40
-    bash_timeout: int = 120
     def to_config_json(self) -> dict[str, Any]:
-        """Convert to config JSON for storage."""
         return {
-            "name": self.name,
-            "enable_message_compaction": self.enable_message_compaction,
-            "enable_memory_tool": self.enable_memory_tool,
-            "enable_sub_agent": self.enable_sub_agent,
-            "compaction_head_size": self.compaction_head_size,
-            "compaction_tail_size": self.compaction_tail_size,
-            "bash_timeout": self.bash_timeout,
         }
-class ConfigUpdate(BaseModel):
-    """Request schema for updating a config."""
     name: str | None = None
     description: str | None = None
-    enable_message_compaction: bool | None = None
-    enable_memory_tool: bool | None = None
-    enable_sub_agent: bool | None = None
-    compaction_head_size: int | None = None
-    compaction_tail_size: int | None = None
-    bash_timeout: int | None = None
-class ConfigResponse(BaseModel):
-    """Response schema for a config."""
     model_config = ConfigDict(from_attributes=True)
@@ -63,7 +69,6 @@ class ConfigResponse(BaseModel):
     @field_validator("id", mode="before")
     @classmethod
     def convert_uuid(cls, v: UUID | str) -> str:
-        """Convert UUID to string."""
         if isinstance(v, UUID):
             return str(v)
         return v
@@ -71,7 +76,6 @@ class ConfigResponse(BaseModel):
     @field_validator("job_id", mode="before")
     @classmethod
     def convert_job_uuid(cls, v: UUID | str | None) -> str | None:
-        """Convert job UUID to string."""
         if v is None:
             return None
         if isinstance(v, UUID):

 # Copyright (c) Microsoft. All rights reserved.
+"""Agent config schemas."""
 from datetime import datetime
 from typing import Any
 from pydantic import BaseModel, ConfigDict, field_validator
+class CompactionConfigSchema(BaseModel):
+    """Compaction strategy configuration."""
+    strategy: str = "head_tail"
+    params: dict[str, Any] = {"head_size": 10, "tail_size": 40}
+class AgentCreate(BaseModel):
+    """Request schema for creating an agent.
+    Tools can be specified as:
+    - str: Preset name ("standard", "minimal", "full", "readonly")
+    - list[str]: List of tool names
+    - dict[str, dict]: Full specification with per-tool configs
+    """
     name: str
     description: str = ""
+    instructions: str | None = None
+    model: str | None = None
+    compaction: CompactionConfigSchema = CompactionConfigSchema()
+    tools: str | list[str] | dict[str, dict[str, Any]] = "standard"
     def to_config_json(self) -> dict[str, Any]:
+        """Convert to config JSON for storage (runtime settings only)."""
         return {
+            "instructions": self.instructions,
+            "model": self.model,
+            "compaction": self.compaction.model_dump(),
+            "tools": self.tools,
         }
+class AgentUpdate(BaseModel):
+    """Request schema for updating an agent."""
     name: str | None = None
     description: str | None = None
+    instructions: str | None = None
+    model: str | None = None
+    compaction: CompactionConfigSchema | None = None
+    tools: str | list[str] | dict[str, dict[str, Any]] | None = None
+class AgentResponse(BaseModel):
+    """Response schema for an agent."""
     model_config = ConfigDict(from_attributes=True)
     @field_validator("id", mode="before")
     @classmethod
     def convert_uuid(cls, v: UUID | str) -> str:
         if isinstance(v, UUID):
             return str(v)
         return v
     @field_validator("job_id", mode="before")
     @classmethod
     def convert_job_uuid(cls, v: UUID | str | None) -> str | None:
         if v is None:
             return None
         if isinstance(v, UUID):

src/flow/ui/schemas/job.py CHANGED Viewed

@@ -13,7 +13,7 @@ class JobCreate(BaseModel):
     """Request schema for creating a job."""
     name: str = ""
-    config_ids: list[str]
     task_ids: list[str]
     parallel: int = 4
     use_llm_eval: bool = False
@@ -29,7 +29,7 @@ class JobResponse(BaseModel):
     status: JobStatus
     parallel: int
     use_llm_eval: bool
-    config_ids: list[str]
     task_ids: list[str]
     pareto_frontier: list[str]
     output_dir: str | None
@@ -56,6 +56,6 @@ class JobProgress(BaseModel):
     job_id: str
     completed: int = 0
     total: int = 0
-    current_config: str = ""
     current_task: str = ""
     message: str = ""

     """Request schema for creating a job."""
     name: str = ""
+    candidate_ids: list[str]
     task_ids: list[str]
     parallel: int = 4
     use_llm_eval: bool = False
     status: JobStatus
     parallel: int
     use_llm_eval: bool
+    candidate_ids: list[str]
     task_ids: list[str]
     pareto_frontier: list[str]
     output_dir: str | None
     job_id: str
     completed: int = 0
     total: int = 0
+    current_candidate: str = ""
     current_task: str = ""
     message: str = ""

src/flow/ui/schemas/run.py CHANGED Viewed

@@ -15,7 +15,7 @@ class RunResponse(BaseModel):
     id: str
     job_id: str
-    config_name: str
     task_name: str
     status: str
     tokens_total: int
@@ -51,7 +51,7 @@ class RunDetailResponse(BaseModel):
     id: str
     job_id: str
-    config_name: str
     task_name: str
     status: str

     id: str
     job_id: str
+    candidate_name: str
     task_name: str
     status: str
     tokens_total: int
     id: str
     job_id: str
+    candidate_name: str
     task_name: str
     status: str

src/flow/ui/services/optimizer_service.py CHANGED Viewed

@@ -9,7 +9,7 @@ from uuid import UUID
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlmodel import select
-from flow.experiments.ablation import AblationConfig
 from flow.experiments.optimizer import FlowOptimizer
 from flow.experiments.types import EvalCriterion, Task
@@ -26,12 +26,10 @@ class OptimizerService:
     async def run_job(self, job_id: str | UUID) -> AsyncGenerator[JobProgress, None]:
         """Run an optimization job and yield progress updates."""
-        # Convert to UUID if string
         if isinstance(job_id, str):
             job_id = UUID(job_id)
         async with async_session() as session:
-            # Load job
             result = await session.execute(
                 select(OptimizationJob).where(OptimizationJob.id == job_id)
             )
@@ -44,7 +42,6 @@ class OptimizerService:
                 )
                 return
-            # Update job status
             job.status = JobStatus.RUNNING
             job.started_at = datetime.now(timezone.utc)
             await session.commit()
@@ -58,48 +55,39 @@ class OptimizerService:
             )
             try:
-                # Load configs
-                configs = await self._load_configs(session, job.config_ids)
-                if not configs:
-                    raise ValueError("No valid configs found")
-                # Load tasks
                 tasks = await self._load_tasks(session, job.task_ids)
                 if not tasks:
                     raise ValueError("No valid tasks found")
-                # Create optimizer
                 optimizer = FlowOptimizer(
                     parallel=job.parallel,
                     use_llm_evaluator=job.use_llm_eval,
                 )
-                # Track progress via callback
                 progress_queue: asyncio.Queue[tuple[int, int, str, str]] = asyncio.Queue()
                 def progress_callback(completed: int, total: int, config: str, task: str) -> None:
-                    """Callback invoked by FlowOptimizer on each completion."""
                     try:
                         progress_queue.put_nowait((completed, total, config, task))
                     except asyncio.QueueFull:
                         pass
-                # Run optimization in background task
                 async def run_optimization():
                     return await optimizer.optimize(
-                        configs=configs,
                         tasks=tasks,
                         progress_callback=progress_callback,
                     )
-                # Start optimization
                 opt_task = asyncio.create_task(run_optimization())
-                # Yield progress updates while optimization runs
                 while not opt_task.done():
                     try:
-                        # Wait for progress with timeout
-                        completed, total, config_name, task_name = await asyncio.wait_for(
                             progress_queue.get(),
                             timeout=1.0,
                         )
@@ -108,32 +96,26 @@ class OptimizerService:
                             job_id=str(job_id),
                             completed=completed,
                             total=total,
-                            current_config=config_name,
                             current_task=task_name,
-                            message=f"Running {config_name}/{task_name}...",
                         )
-                        # Update job progress in DB
                         job.completed_experiments = completed
                         await session.commit()
                     except asyncio.TimeoutError:
-                        # No progress update, check if task failed
                         if opt_task.done():
-                            # Check for exception before breaking
                             exc = opt_task.exception()
                             if exc:
                                 raise exc
                         continue
-                # Get final result - this will re-raise any exception from the task
                 opt_result = await opt_task
-                # Check if all experiments failed
                 if opt_result.total_experiments == 0 or len(opt_result.summaries) == 0:
-                    # No successful experiments - this is a failure
                     job.status = JobStatus.FAILED
-                    job.error = "All experiments failed. Check server logs for details. Common causes: missing API keys (AZURE_OPENAI_ENDPOINT, OPENAI_API_KEY), invalid configuration."
                     job.completed_at = datetime.now(timezone.utc)
                     await session.commit()
@@ -144,12 +126,11 @@ class OptimizerService:
                     )
                     return
-                # Save runs to database
                 for summary in opt_result.summaries:
                     for task_result in summary.task_results:
                         run = ExperimentRun(
                             job_id=job.id,
-                            config_name=task_result.config_name,
                             task_name=task_result.task_name,
                             status="completed",
                             tokens_total=task_result.metrics.total_tokens,
@@ -171,7 +152,6 @@ class OptimizerService:
                         )
                         session.add(run)
-                # Update job
                 job.status = JobStatus.COMPLETED
                 job.completed_experiments = opt_result.total_experiments
                 job.pareto_frontier = opt_result.pareto_frontier
@@ -184,7 +164,7 @@ class OptimizerService:
                     job_id=str(job_id),
                     completed=opt_result.total_experiments,
                     total=job.total_experiments,
-                    message=f"Optimization complete. Pareto configs: {', '.join(opt_result.pareto_frontier)}",
                 )
             except Exception as e:
@@ -199,37 +179,47 @@ class OptimizerService:
                     message=f"Optimization failed: {e}",
                 )
-    async def _load_configs(
         self,
         session: AsyncSession,
-        config_ids: list[str],
-    ) -> list[AblationConfig]:
-        """Load configs from database and convert to AblationConfig."""
-        configs = []
-        for config_id in config_ids:
             result = await session.execute(
-                select(AgentConfig).where(AgentConfig.id == UUID(config_id))
             )
             db_config = result.scalar_one_or_none()
             if db_config:
                 cfg = db_config.config_json
-                configs.append(AblationConfig(
                     name=db_config.name,
-                    enable_message_compaction=cfg.get("enable_message_compaction", True),
-                    enable_memory_tool=cfg.get("enable_memory_tool", True),
-                    enable_sub_agent=cfg.get("enable_sub_agent", False),
-                    compaction_head_size=cfg.get("compaction_head_size", 10),
-                    compaction_tail_size=cfg.get("compaction_tail_size", 40),
-                    bash_timeout=cfg.get("bash_timeout", 120),
-                ))
-        return configs
     async def _load_tasks(
         self,
         session: AsyncSession,
         task_ids: list[str],
     ) -> list[Task]:
-        """Load tasks from database and convert to Task."""
         tasks = []
         for task_id in task_ids:
             result = await session.execute(

 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlmodel import select
+from flow.experiments.models import Agent, Candidate, CompactionConfig
 from flow.experiments.optimizer import FlowOptimizer
 from flow.experiments.types import EvalCriterion, Task
     async def run_job(self, job_id: str | UUID) -> AsyncGenerator[JobProgress, None]:
         """Run an optimization job and yield progress updates."""
         if isinstance(job_id, str):
             job_id = UUID(job_id)
         async with async_session() as session:
             result = await session.execute(
                 select(OptimizationJob).where(OptimizationJob.id == job_id)
             )
                 )
                 return
             job.status = JobStatus.RUNNING
             job.started_at = datetime.now(timezone.utc)
             await session.commit()
             )
             try:
+                candidates = await self._load_candidates(session, job.candidate_ids)
+                if not candidates:
+                    raise ValueError("No valid candidates found")
                 tasks = await self._load_tasks(session, job.task_ids)
                 if not tasks:
                     raise ValueError("No valid tasks found")
                 optimizer = FlowOptimizer(
                     parallel=job.parallel,
                     use_llm_evaluator=job.use_llm_eval,
                 )
                 progress_queue: asyncio.Queue[tuple[int, int, str, str]] = asyncio.Queue()
                 def progress_callback(completed: int, total: int, config: str, task: str) -> None:
                     try:
                         progress_queue.put_nowait((completed, total, config, task))
                     except asyncio.QueueFull:
                         pass
                 async def run_optimization():
                     return await optimizer.optimize(
+                        candidates=candidates,
                         tasks=tasks,
                         progress_callback=progress_callback,
                     )
                 opt_task = asyncio.create_task(run_optimization())
                 while not opt_task.done():
                     try:
+                        completed, total, candidate_name, task_name = await asyncio.wait_for(
                             progress_queue.get(),
                             timeout=1.0,
                         )
                             job_id=str(job_id),
                             completed=completed,
                             total=total,
+                            current_candidate=candidate_name,
                             current_task=task_name,
+                            message=f"Running {candidate_name}/{task_name}...",
                         )
                         job.completed_experiments = completed
                         await session.commit()
                     except asyncio.TimeoutError:
                         if opt_task.done():
                             exc = opt_task.exception()
                             if exc:
                                 raise exc
                         continue
                 opt_result = await opt_task
                 if opt_result.total_experiments == 0 or len(opt_result.summaries) == 0:
                     job.status = JobStatus.FAILED
+                    job.error = "All experiments failed. Check server logs for details."
                     job.completed_at = datetime.now(timezone.utc)
                     await session.commit()
                     )
                     return
                 for summary in opt_result.summaries:
                     for task_result in summary.task_results:
                         run = ExperimentRun(
                             job_id=job.id,
+                            candidate_name=task_result.candidate_name,
                             task_name=task_result.task_name,
                             status="completed",
                             tokens_total=task_result.metrics.total_tokens,
                         )
                         session.add(run)
                 job.status = JobStatus.COMPLETED
                 job.completed_experiments = opt_result.total_experiments
                 job.pareto_frontier = opt_result.pareto_frontier
                     job_id=str(job_id),
                     completed=opt_result.total_experiments,
                     total=job.total_experiments,
+                    message=f"Optimization complete. Pareto candidates: {', '.join(opt_result.pareto_frontier)}",
                 )
             except Exception as e:
                     message=f"Optimization failed: {e}",
                 )
+    async def _load_candidates(
         self,
         session: AsyncSession,
+        candidate_ids: list[str],
+    ) -> list[Candidate]:
+        """Load configs from database and convert to Candidate objects."""
+        candidates = []
+        for candidate_id in candidate_ids:
             result = await session.execute(
+                select(AgentConfig).where(AgentConfig.id == UUID(candidate_id))
             )
             db_config = result.scalar_one_or_none()
             if db_config:
                 cfg = db_config.config_json
+                # Build CompactionConfig from stored JSON
+                compaction_data = cfg.get("compaction", {})
+                compaction = CompactionConfig(
+                    strategy=compaction_data.get("strategy", "head_tail"),
+                    params=compaction_data.get("params", {"head_size": 10, "tail_size": 40}),
+                )
+                # Get tools configuration (can be str, list, or dict)
+                tools = cfg.get("tools", "standard")
+                agent = Agent(
                     name=db_config.name,
+                    instructions=cfg.get("instructions"),
+                    model=cfg.get("model"),
+                    compaction=compaction,
+                    tools=tools,
+                )
+                candidates.append(Candidate(agent=agent))
+        return candidates
     async def _load_tasks(
         self,
         session: AsyncSession,
         task_ids: list[str],
     ) -> list[Task]:
+        """Load tasks from database and convert to Task objects."""
         tasks = []
         for task_id in task_ids:
             result = await session.execute(

src/flow/ui/tests/test_e2e_user_journey.py CHANGED Viewed

@@ -138,7 +138,7 @@ class TestE2EUserJourney:
             job_data = {
                 "name": "E2E Test Optimization",
-                "config_ids": created_agent_ids,
                 "task_ids": created_task_ids[:2],  # Use first 2 tasks
                 "parallel": 2,
                 "use_llm_eval": False,
@@ -150,7 +150,7 @@ class TestE2EUserJourney:
             print(f"    ✓ Created job: {job['name']} (id: {job['id'][:8]}...)")
             print(f"      - Status: {job['status']}")
             print(f"      - Total experiments: {job['total_experiments']}")
-            print(f"      - Configs: {len(job['config_ids'])}, Tasks: {len(job['task_ids'])}")
             # ========================================
             # STEP 5: Get Job Details
@@ -284,7 +284,7 @@ class TestE2EUserJourney:
         # Test creating job with non-existent config
         job_data = {
             "name": "Invalid Job",
-            "config_ids": ["00000000-0000-0000-0000-000000000000"],
             "task_ids": ["00000000-0000-0000-0000-000000000001"],
         }
         resp = await client.post("/api/jobs", json=job_data)
@@ -403,7 +403,7 @@ class TestAPIEndpoints:
                 "/api/jobs",
                 json={
                     "name": "test-job",
-                    "config_ids": [config["id"]],
                     "task_ids": [task["id"]],
                 },
             )
@@ -481,7 +481,7 @@ class TestAPIEndpoints:
                 "/api/jobs",
                 json={
                     "name": "start-test-job",
-                    "config_ids": [config["id"]],
                     "task_ids": [task["id"]],
                     "parallel": 1,
                 },
@@ -593,7 +593,7 @@ class TestAPIEndpoints:
                 "/api/jobs",
                 json={
                     "name": "reset-test-job",
-                    "config_ids": [config["id"]],
                     "task_ids": [task["id"]],
                 },
             )

             job_data = {
                 "name": "E2E Test Optimization",
+                "candidate_ids": created_agent_ids,
                 "task_ids": created_task_ids[:2],  # Use first 2 tasks
                 "parallel": 2,
                 "use_llm_eval": False,
             print(f"    ✓ Created job: {job['name']} (id: {job['id'][:8]}...)")
             print(f"      - Status: {job['status']}")
             print(f"      - Total experiments: {job['total_experiments']}")
+            print(f"      - Candidates: {len(job['candidate_ids'])}, Tasks: {len(job['task_ids'])}")
             # ========================================
             # STEP 5: Get Job Details
         # Test creating job with non-existent config
         job_data = {
             "name": "Invalid Job",
+            "candidate_ids": ["00000000-0000-0000-0000-000000000000"],
             "task_ids": ["00000000-0000-0000-0000-000000000001"],
         }
         resp = await client.post("/api/jobs", json=job_data)
                 "/api/jobs",
                 json={
                     "name": "test-job",
+                    "candidate_ids": [config["id"]],
                     "task_ids": [task["id"]],
                 },
             )
                 "/api/jobs",
                 json={
                     "name": "start-test-job",
+                    "candidate_ids": [config["id"]],
                     "task_ids": [task["id"]],
                     "parallel": 1,
                 },
                 "/api/jobs",
                 json={
                     "name": "reset-test-job",
+                    "candidate_ids": [config["id"]],
                     "task_ids": [task["id"]],
                 },
             )

src/flow/ui/ui/assets/index-2zMAgGgo.js ADDED Viewed

The diff for this file is too large to render. See raw diff

src/flow/ui/ui/assets/index-BG9n9RHB.js ADDED Viewed

The diff for this file is too large to render. See raw diff

src/flow/ui/ui/assets/index-BHAF8mLj.css ADDED Viewed

	@@ -0,0 +1 @@

+ *,:before,:after{--tw-border-spacing-x: 0;--tw-border-spacing-y: 0;--tw-translate-x: 0;--tw-translate-y: 0;--tw-rotate: 0;--tw-skew-x: 0;--tw-skew-y: 0;--tw-scale-x: 1;--tw-scale-y: 1;--tw-pan-x: ;--tw-pan-y: ;--tw-pinch-zoom: ;--tw-scroll-snap-strictness: proximity;--tw-gradient-from-position: ;--tw-gradient-via-position: ;--tw-gradient-to-position: ;--tw-ordinal: ;--tw-slashed-zero: ;--tw-numeric-figure: ;--tw-numeric-spacing: ;--tw-numeric-fraction: ;--tw-ring-inset: ;--tw-ring-offset-width: 0px;--tw-ring-offset-color: #fff;--tw-ring-color: rgb(59 130 246 / .5);--tw-ring-offset-shadow: 0 0 #0000;--tw-ring-shadow: 0 0 #0000;--tw-shadow: 0 0 #0000;--tw-shadow-colored: 0 0 #0000;--tw-blur: ;--tw-brightness: ;--tw-contrast: ;--tw-grayscale: ;--tw-hue-rotate: ;--tw-invert: ;--tw-saturate: ;--tw-sepia: ;--tw-drop-shadow: ;--tw-backdrop-blur: ;--tw-backdrop-brightness: ;--tw-backdrop-contrast: ;--tw-backdrop-grayscale: ;--tw-backdrop-hue-rotate: ;--tw-backdrop-invert: ;--tw-backdrop-opacity: ;--tw-backdrop-saturate: ;--tw-backdrop-sepia: ;--tw-contain-size: ;--tw-contain-layout: ;--tw-contain-paint: ;--tw-contain-style: }::backdrop{--tw-border-spacing-x: 0;--tw-border-spacing-y: 0;--tw-translate-x: 0;--tw-translate-y: 0;--tw-rotate: 0;--tw-skew-x: 0;--tw-skew-y: 0;--tw-scale-x: 1;--tw-scale-y: 1;--tw-pan-x: ;--tw-pan-y: ;--tw-pinch-zoom: ;--tw-scroll-snap-strictness: proximity;--tw-gradient-from-position: ;--tw-gradient-via-position: ;--tw-gradient-to-position: ;--tw-ordinal: ;--tw-slashed-zero: ;--tw-numeric-figure: ;--tw-numeric-spacing: ;--tw-numeric-fraction: ;--tw-ring-inset: ;--tw-ring-offset-width: 0px;--tw-ring-offset-color: #fff;--tw-ring-color: rgb(59 130 246 / .5);--tw-ring-offset-shadow: 0 0 #0000;--tw-ring-shadow: 0 0 #0000;--tw-shadow: 0 0 #0000;--tw-shadow-colored: 0 0 #0000;--tw-blur: ;--tw-brightness: ;--tw-contrast: ;--tw-grayscale: ;--tw-hue-rotate: ;--tw-invert: ;--tw-saturate: ;--tw-sepia: ;--tw-drop-shadow: ;--tw-backdrop-blur: ;--tw-backdrop-brightness: ;--tw-backdrop-contrast: ;--tw-backdrop-grayscale: ;--tw-backdrop-hue-rotate: ;--tw-backdrop-invert: ;--tw-backdrop-opacity: ;--tw-backdrop-saturate: ;--tw-backdrop-sepia: ;--tw-contain-size: ;--tw-contain-layout: ;--tw-contain-paint: ;--tw-contain-style: }*,:before,:after{box-sizing:border-box;border-width:0;border-style:solid;border-color:#e5e7eb}:before,:after{--tw-content: ""}html,:host{line-height:1.5;-webkit-text-size-adjust:100%;-moz-tab-size:4;-o-tab-size:4;tab-size:4;font-family:ui-sans-serif,system-ui,sans-serif,"Apple Color Emoji","Segoe UI Emoji",Segoe UI Symbol,"Noto Color Emoji";font-feature-settings:normal;font-variation-settings:normal;-webkit-tap-highlight-color:transparent}body{margin:0;line-height:inherit}hr{height:0;color:inherit;border-top-width:1px}abbr:where([title]){-webkit-text-decoration:underline dotted;text-decoration:underline dotted}h1,h2,h3,h4,h5,h6{font-size:inherit;font-weight:inherit}a{color:inherit;text-decoration:inherit}b,strong{font-weight:bolder}code,kbd,samp,pre{font-family:JetBrains Mono,ui-monospace,monospace;font-feature-settings:normal;font-variation-settings:normal;font-size:1em}small{font-size:80%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}sub{bottom:-.25em}sup{top:-.5em}table{text-indent:0;border-color:inherit;border-collapse:collapse}button,input,optgroup,select,textarea{font-family:inherit;font-feature-settings:inherit;font-variation-settings:inherit;font-size:100%;font-weight:inherit;line-height:inherit;letter-spacing:inherit;color:inherit;margin:0;padding:0}button,select{text-transform:none}button,input:where([type=button]),input:where([type=reset]),input:where([type=submit]){-webkit-appearance:button;background-color:transparent;background-image:none}:-moz-focusring{outline:auto}:-moz-ui-invalid{box-shadow:none}progress{vertical-align:baseline}::-webkit-inner-spin-button,::-webkit-outer-spin-button{height:auto}[type=search]{-webkit-appearance:textfield;outline-offset:-2px}::-webkit-search-decoration{-webkit-appearance:none}::-webkit-file-upload-button{-webkit-appearance:button;font:inherit}summary{display:list-item}blockquote,dl,dd,h1,h2,h3,h4,h5,h6,hr,figure,p,pre{margin:0}fieldset{margin:0;padding:0}legend{padding:0}ol,ul,menu{list-style:none;margin:0;padding:0}dialog{padding:0}textarea{resize:vertical}input::-moz-placeholder,textarea::-moz-placeholder{opacity:1;color:#9ca3af}input::placeholder,textarea::placeholder{opacity:1;color:#9ca3af}button,[role=button]{cursor:pointer}:disabled{cursor:default}img,svg,video,canvas,audio,iframe,embed,object{display:block;vertical-align:middle}img,video{max-width:100%;height:auto}[hidden]:where(:not([hidden=until-found])){display:none}.pointer-events-none{pointer-events:none}.fixed{position:fixed}.absolute{position:absolute}.relative{position:relative}.sticky{position:sticky}.inset-0{top:0;right:0;bottom:0;left:0}.bottom-0{bottom:0}.left-0{left:0}.left-3{left:.75rem}.top-0{top:0}.top-1\/2{top:50%}.z-10{z-index:10}.z-50{z-index:50}.mx-0\.5{margin-left:.125rem;margin-right:.125rem}.mx-4{margin-left:1rem;margin-right:1rem}.mx-auto{margin-left:auto;margin-right:auto}.-mt-1{margin-top:-.25rem}.mb-1{margin-bottom:.25rem}.mb-2{margin-bottom:.5rem}.mb-3{margin-bottom:.75rem}.mb-4{margin-bottom:1rem}.mb-6{margin-bottom:1.5rem}.mb-8{margin-bottom:2rem}.ml-2{margin-left:.5rem}.ml-4{margin-left:1rem}.ml-6{margin-left:1.5rem}.mr-1{margin-right:.25rem}.mt-0\.5{margin-top:.125rem}.mt-1{margin-top:.25rem}.mt-2{margin-top:.5rem}.mt-3{margin-top:.75rem}.mt-4{margin-top:1rem}.mt-8{margin-top:2rem}.mt-auto{margin-top:auto}.line-clamp-2{overflow:hidden;display:-webkit-box;-webkit-box-orient:vertical;-webkit-line-clamp:2}.line-clamp-3{overflow:hidden;display:-webkit-box;-webkit-box-orient:vertical;-webkit-line-clamp:3}.block{display:block}.inline-block{display:inline-block}.flex{display:flex}.inline-flex{display:inline-flex}.table{display:table}.grid{display:grid}.hidden{display:none}.h-1\.5{height:.375rem}.h-12{height:3rem}.h-2{height:.5rem}.h-3{height:.75rem}.h-32{height:8rem}.h-4{height:1rem}.h-5{height:1.25rem}.h-6{height:1.5rem}.h-8{height:2rem}.h-full{height:100%}.max-h-32{max-height:8rem}.max-h-40{max-height:10rem}.max-h-48{max-height:12rem}.max-h-96{max-height:24rem}.max-h-\[80vh\]{max-height:80vh}.min-h-\[100px\]{min-height:100px}.min-h-screen{min-height:100vh}.w-12{width:3rem}.w-2{width:.5rem}.w-20{width:5rem}.w-24{width:6rem}.w-3{width:.75rem}.w-32{width:8rem}.w-4{width:1rem}.w-5{width:1.25rem}.w-6{width:1.5rem}.w-8{width:2rem}.w-full{width:100%}.min-w-0{min-width:0px}.min-w-\[90px\]{min-width:90px}.max-w-7xl{max-width:80rem}.max-w-lg{max-width:32rem}.max-w-md{max-width:28rem}.flex-1{flex:1 1 0%}.flex-shrink-0{flex-shrink:0}.-translate-y-1\/2{--tw-translate-y: -50%;transform:translate(var(--tw-translate-x),var(--tw-translate-y)) rotate(var(--tw-rotate)) skew(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y))}.rotate-180{--tw-rotate: 180deg;transform:translate(var(--tw-translate-x),var(--tw-translate-y)) rotate(var(--tw-rotate)) skew(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y))}.transform{transform:translate(var(--tw-translate-x),var(--tw-translate-y)) rotate(var(--tw-rotate)) skew(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y))}@keyframes pulse{50%{opacity:.5}}.animate-pulse{animation:pulse 2s cubic-bezier(.4,0,.6,1) infinite}@keyframes spin{to{transform:rotate(360deg)}}.animate-spin{animation:spin 1s linear infinite}.cursor-pointer{cursor:pointer}.select-none{-webkit-user-select:none;-moz-user-select:none;user-select:none}.resize-y{resize:vertical}.resize{resize:both}.grid-cols-1{grid-template-columns:repeat(1,minmax(0,1fr))}.grid-cols-2{grid-template-columns:repeat(2,minmax(0,1fr))}.grid-cols-3{grid-template-columns:repeat(3,minmax(0,1fr))}.grid-cols-4{grid-template-columns:repeat(4,minmax(0,1fr))}.flex-col{flex-direction:column}.flex-wrap{flex-wrap:wrap}.items-start{align-items:flex-start}.items-end{align-items:flex-end}.items-center{align-items:center}.justify-end{justify-content:flex-end}.justify-center{justify-content:center}.justify-between{justify-content:space-between}.gap-1{gap:.25rem}.gap-1\.5{gap:.375rem}.gap-2{gap:.5rem}.gap-3{gap:.75rem}.gap-4{gap:1rem}.gap-6{gap:1.5rem}.gap-8{gap:2rem}.gap-x-4{-moz-column-gap:1rem;column-gap:1rem}.gap-y-1{row-gap:.25rem}.space-y-1>:not([hidden])~:not([hidden]){--tw-space-y-reverse: 0;margin-top:calc(.25rem * calc(1 - var(--tw-space-y-reverse)));margin-bottom:calc(.25rem * var(--tw-space-y-reverse))}.space-y-2>:not([hidden])~:not([hidden]){--tw-space-y-reverse: 0;margin-top:calc(.5rem * calc(1 - var(--tw-space-y-reverse)));margin-bottom:calc(.5rem * var(--tw-space-y-reverse))}.space-y-4>:not([hidden])~:not([hidden]){--tw-space-y-reverse: 0;margin-top:calc(1rem * calc(1 - var(--tw-space-y-reverse)));margin-bottom:calc(1rem * var(--tw-space-y-reverse))}.space-y-6>:not([hidden])~:not([hidden]){--tw-space-y-reverse: 0;margin-top:calc(1.5rem * calc(1 - var(--tw-space-y-reverse)));margin-bottom:calc(1.5rem * var(--tw-space-y-reverse))}.overflow-auto{overflow:auto}.overflow-hidden{overflow:hidden}.overflow-x-auto{overflow-x:auto}.overflow-y-auto{overflow-y:auto}.truncate{overflow:hidden;text-overflow:ellipsis;white-space:nowrap}.whitespace-pre-wrap{white-space:pre-wrap}.break-all{word-break:break-all}.rounded{border-radius:.25rem}.rounded-full{border-radius:9999px}.rounded-lg{border-radius:.5rem}.rounded-md{border-radius:.375rem}.border{border-width:1px}.border-b{border-bottom-width:1px}.border-l-2{border-left-width:2px}.border-t{border-top-width:1px}.border-dashed{border-style:dashed}.border-\[var\(--accent\)\]{border-color:var(--accent)}.border-\[var\(--border\)\]{border-color:var(--border)}.border-blue-500\/30{border-color:#3b82f64d}.border-green-500\/30{border-color:#22c55e4d}.border-red-500\/30{border-color:#ef44444d}.border-red-500\/50{border-color:#ef444480}.bg-\[var\(--accent\)\]{background-color:var(--accent)}.bg-\[var\(--bg-primary\)\]{background-color:var(--bg-primary)}.bg-\[var\(--bg-secondary\)\]{background-color:var(--bg-secondary)}.bg-\[var\(--bg-tertiary\)\]{background-color:var(--bg-tertiary)}.bg-\[var\(--error\)\]{background-color:var(--error)}.bg-black\/80{background-color:#000c}.bg-blue-100{--tw-bg-opacity: 1;background-color:rgb(219 234 254 / var(--tw-bg-opacity, 1))}.bg-blue-400{--tw-bg-opacity: 1;background-color:rgb(96 165 250 / var(--tw-bg-opacity, 1))}.bg-blue-500{--tw-bg-opacity: 1;background-color:rgb(59 130 246 / var(--tw-bg-opacity, 1))}.bg-blue-500\/10{background-color:#3b82f61a}.bg-blue-600{--tw-bg-opacity: 1;background-color:rgb(37 99 235 / var(--tw-bg-opacity, 1))}.bg-emerald-500{--tw-bg-opacity: 1;background-color:rgb(16 185 129 / var(--tw-bg-opacity, 1))}.bg-green-100{--tw-bg-opacity: 1;background-color:rgb(220 252 231 / var(--tw-bg-opacity, 1))}.bg-green-400{--tw-bg-opacity: 1;background-color:rgb(74 222 128 / var(--tw-bg-opacity, 1))}.bg-green-500{--tw-bg-opacity: 1;background-color:rgb(34 197 94 / var(--tw-bg-opacity, 1))}.bg-green-500\/10{background-color:#22c55e1a}.bg-green-500\/20{background-color:#22c55e33}.bg-green-600{--tw-bg-opacity: 1;background-color:rgb(22 163 74 / var(--tw-bg-opacity, 1))}.bg-orange-100{--tw-bg-opacity: 1;background-color:rgb(255 237 213 / var(--tw-bg-opacity, 1))}.bg-purple-100{--tw-bg-opacity: 1;background-color:rgb(243 232 255 / var(--tw-bg-opacity, 1))}.bg-red-100{--tw-bg-opacity: 1;background-color:rgb(254 226 226 / var(--tw-bg-opacity, 1))}.bg-red-500{--tw-bg-opacity: 1;background-color:rgb(239 68 68 / var(--tw-bg-opacity, 1))}.bg-red-500\/10{background-color:#ef44441a}.bg-red-600{--tw-bg-opacity: 1;background-color:rgb(220 38 38 / var(--tw-bg-opacity, 1))}.bg-yellow-500{--tw-bg-opacity: 1;background-color:rgb(234 179 8 / var(--tw-bg-opacity, 1))}.p-1{padding:.25rem}.p-2{padding:.5rem}.p-3{padding:.75rem}.p-4{padding:1rem}.p-6{padding:1.5rem}.px-1{padding-left:.25rem;padding-right:.25rem}.px-1\.5{padding-left:.375rem;padding-right:.375rem}.px-2{padding-left:.5rem;padding-right:.5rem}.px-3{padding-left:.75rem;padding-right:.75rem}.px-4{padding-left:1rem;padding-right:1rem}.py-0\.5{padding-top:.125rem;padding-bottom:.125rem}.py-1{padding-top:.25rem;padding-bottom:.25rem}.py-1\.5{padding-top:.375rem;padding-bottom:.375rem}.py-12{padding-top:3rem;padding-bottom:3rem}.py-16{padding-top:4rem;padding-bottom:4rem}.py-2{padding-top:.5rem;padding-bottom:.5rem}.py-3{padding-top:.75rem;padding-bottom:.75rem}.py-4{padding-top:1rem;padding-bottom:1rem}.py-8{padding-top:2rem;padding-bottom:2rem}.pb-1{padding-bottom:.25rem}.pb-2{padding-bottom:.5rem}.pl-10{padding-left:2.5rem}.pr-3{padding-right:.75rem}.pr-4{padding-right:1rem}.pt-2{padding-top:.5rem}.pt-3{padding-top:.75rem}.pt-4{padding-top:1rem}.text-left{text-align:left}.text-center{text-align:center}.text-right{text-align:right}.font-mono{font-family:JetBrains Mono,ui-monospace,monospace}.text-2xl{font-size:1.5rem;line-height:2rem}.text-lg{font-size:1.125rem;line-height:1.75rem}.text-sm{font-size:.875rem;line-height:1.25rem}.text-xl{font-size:1.25rem;line-height:1.75rem}.text-xs{font-size:.75rem;line-height:1rem}.font-bold{font-weight:700}.font-medium{font-weight:500}.font-semibold{font-weight:600}.uppercase{text-transform:uppercase}.tracking-wide{letter-spacing:.025em}.tracking-wider{letter-spacing:.05em}.text-\[var\(--accent\)\]{color:var(--accent)}.text-\[var\(--error\)\]{color:var(--error)}.text-\[var\(--text-primary\)\]{color:var(--text-primary)}.text-\[var\(--text-secondary\)\]{color:var(--text-secondary)}.text-\[var\(--text-tertiary\)\]{color:var(--text-tertiary)}.text-black{--tw-text-opacity: 1;color:rgb(0 0 0 / var(--tw-text-opacity, 1))}.text-blue-400{--tw-text-opacity: 1;color:rgb(96 165 250 / var(--tw-text-opacity, 1))}.text-blue-800{--tw-text-opacity: 1;color:rgb(30 64 175 / var(--tw-text-opacity, 1))}.text-emerald-400{--tw-text-opacity: 1;color:rgb(52 211 153 / var(--tw-text-opacity, 1))}.text-green-400{--tw-text-opacity: 1;color:rgb(74 222 128 / var(--tw-text-opacity, 1))}.text-green-500{--tw-text-opacity: 1;color:rgb(34 197 94 / var(--tw-text-opacity, 1))}.text-green-800{--tw-text-opacity: 1;color:rgb(22 101 52 / var(--tw-text-opacity, 1))}.text-orange-800{--tw-text-opacity: 1;color:rgb(154 52 18 / var(--tw-text-opacity, 1))}.text-purple-400{--tw-text-opacity: 1;color:rgb(192 132 252 / var(--tw-text-opacity, 1))}.text-purple-800{--tw-text-opacity: 1;color:rgb(107 33 168 / var(--tw-text-opacity, 1))}.text-red-400{--tw-text-opacity: 1;color:rgb(248 113 113 / var(--tw-text-opacity, 1))}.text-red-800{--tw-text-opacity: 1;color:rgb(153 27 27 / var(--tw-text-opacity, 1))}.text-white{--tw-text-opacity: 1;color:rgb(255 255 255 / var(--tw-text-opacity, 1))}.accent-\[var\(--accent\)\]{accent-color:var(--accent)}.shadow-lg{--tw-shadow: 0 10px 15px -3px rgb(0 0 0 / .1), 0 4px 6px -4px rgb(0 0 0 / .1);--tw-shadow-colored: 0 10px 15px -3px var(--tw-shadow-color), 0 4px 6px -4px var(--tw-shadow-color);box-shadow:var(--tw-ring-offset-shadow, 0 0 #0000),var(--tw-ring-shadow, 0 0 #0000),var(--tw-shadow)}.filter{filter:var(--tw-blur) var(--tw-brightness) var(--tw-contrast) var(--tw-grayscale) var(--tw-hue-rotate) var(--tw-invert) var(--tw-saturate) var(--tw-sepia) var(--tw-drop-shadow)}.transition-all{transition-property:all;transition-timing-function:cubic-bezier(.4,0,.2,1);transition-duration:.15s}.transition-colors{transition-property:color,background-color,border-color,text-decoration-color,fill,stroke;transition-timing-function:cubic-bezier(.4,0,.2,1);transition-duration:.15s}.duration-300{transition-duration:.3s}:root{--bg-primary: #0a0a0a;--bg-secondary: #141414;--bg-tertiary: #1a1a1a;--text-primary: #f5f5f5;--text-secondary: #a3a3a3;--accent: #22c55e;--accent-dim: #166534;--border: #262626;--error: #ef4444}[data-theme=light]{--bg-primary: #ffffff;--bg-secondary: #f7f8f9;--bg-tertiary: #eef0f2;--text-primary: #1a1a1a;--text-secondary: #4a4a4a;--accent: #16a34a;--accent-dim: #dcfce7;--border: #d1d5db;--error: #dc2626}*{box-sizing:border-box}body{margin:0;background-color:var(--bg-primary);color:var(--text-primary);font-family:JetBrains Mono,ui-monospace,monospace;font-size:14px;line-height:1.6}::-webkit-scrollbar{width:8px;height:8px}::-webkit-scrollbar-track{background:var(--bg-secondary)}::-webkit-scrollbar-thumb{background:var(--border);border-radius:4px}::-webkit-scrollbar-thumb:hover{background:#404040}[data-theme=light] ::-webkit-scrollbar-thumb:hover{background:silver}.last\:border-0:last-child{border-width:0px}.hover\:border-\[var\(--accent-dim\)\]:hover{border-color:var(--accent-dim)}.hover\:bg-\[\#16a34a\]:hover{--tw-bg-opacity: 1;background-color:rgb(22 163 74 / var(--tw-bg-opacity, 1))}.hover\:bg-\[var\(--bg-primary\)\]:hover{background-color:var(--bg-primary)}.hover\:bg-\[var\(--bg-tertiary\)\]:hover{background-color:var(--bg-tertiary)}.hover\:bg-\[var\(--border\)\]:hover{background-color:var(--border)}.hover\:bg-red-600:hover{--tw-bg-opacity: 1;background-color:rgb(220 38 38 / var(--tw-bg-opacity, 1))}.hover\:text-\[var\(--accent\)\]:hover{color:var(--accent)}.hover\:text-\[var\(--text-primary\)\]:hover{color:var(--text-primary)}.hover\:opacity-80:hover{opacity:.8}.focus\:border-\[var\(--accent\)\]:focus{border-color:var(--accent)}.focus\:outline-none:focus{outline:2px solid transparent;outline-offset:2px}.focus\:ring-2:focus{--tw-ring-offset-shadow: var(--tw-ring-inset) 0 0 0 var(--tw-ring-offset-width) var(--tw-ring-offset-color);--tw-ring-shadow: var(--tw-ring-inset) 0 0 0 calc(2px + var(--tw-ring-offset-width)) var(--tw-ring-color);box-shadow:var(--tw-ring-offset-shadow),var(--tw-ring-shadow),var(--tw-shadow, 0 0 #0000)}.focus\:ring-\[var\(--accent\)\]:focus{--tw-ring-color: var(--accent)}.disabled\:cursor-not-allowed:disabled{cursor:not-allowed}.disabled\:opacity-50:disabled{opacity:.5}@media (min-width: 768px){.md\:grid-cols-2{grid-template-columns:repeat(2,minmax(0,1fr))}}@media (min-width: 1024px){.lg\:grid-cols-3{grid-template-columns:repeat(3,minmax(0,1fr))}}@media (min-width: 1280px){.xl\:grid-cols-3{grid-template-columns:repeat(3,minmax(0,1fr))}}@media (prefers-color-scheme: dark){.dark\:bg-blue-900{--tw-bg-opacity: 1;background-color:rgb(30 58 138 / var(--tw-bg-opacity, 1))}.dark\:bg-green-900{--tw-bg-opacity: 1;background-color:rgb(20 83 45 / var(--tw-bg-opacity, 1))}.dark\:bg-orange-900{--tw-bg-opacity: 1;background-color:rgb(124 45 18 / var(--tw-bg-opacity, 1))}.dark\:bg-purple-900{--tw-bg-opacity: 1;background-color:rgb(88 28 135 / var(--tw-bg-opacity, 1))}.dark\:bg-red-900{--tw-bg-opacity: 1;background-color:rgb(127 29 29 / var(--tw-bg-opacity, 1))}.dark\:text-blue-200{--tw-text-opacity: 1;color:rgb(191 219 254 / var(--tw-text-opacity, 1))}.dark\:text-green-200{--tw-text-opacity: 1;color:rgb(187 247 208 / var(--tw-text-opacity, 1))}.dark\:text-orange-200{--tw-text-opacity: 1;color:rgb(254 215 170 / var(--tw-text-opacity, 1))}.dark\:text-purple-200{--tw-text-opacity: 1;color:rgb(233 213 255 / var(--tw-text-opacity, 1))}.dark\:text-red-200{--tw-text-opacity: 1;color:rgb(254 202 202 / var(--tw-text-opacity, 1))}}

src/flow/ui/ui/assets/index-Bx-_JS_6.js ADDED Viewed

The diff for this file is too large to render. See raw diff

src/flow/ui/ui/assets/index-VFZIS3uv.js ADDED Viewed

The diff for this file is too large to render. See raw diff

src/flow/ui/ui/assets/index-_IRgS-wR.css ADDED Viewed

	@@ -0,0 +1 @@

+ *,:before,:after{--tw-border-spacing-x: 0;--tw-border-spacing-y: 0;--tw-translate-x: 0;--tw-translate-y: 0;--tw-rotate: 0;--tw-skew-x: 0;--tw-skew-y: 0;--tw-scale-x: 1;--tw-scale-y: 1;--tw-pan-x: ;--tw-pan-y: ;--tw-pinch-zoom: ;--tw-scroll-snap-strictness: proximity;--tw-gradient-from-position: ;--tw-gradient-via-position: ;--tw-gradient-to-position: ;--tw-ordinal: ;--tw-slashed-zero: ;--tw-numeric-figure: ;--tw-numeric-spacing: ;--tw-numeric-fraction: ;--tw-ring-inset: ;--tw-ring-offset-width: 0px;--tw-ring-offset-color: #fff;--tw-ring-color: rgb(59 130 246 / .5);--tw-ring-offset-shadow: 0 0 #0000;--tw-ring-shadow: 0 0 #0000;--tw-shadow: 0 0 #0000;--tw-shadow-colored: 0 0 #0000;--tw-blur: ;--tw-brightness: ;--tw-contrast: ;--tw-grayscale: ;--tw-hue-rotate: ;--tw-invert: ;--tw-saturate: ;--tw-sepia: ;--tw-drop-shadow: ;--tw-backdrop-blur: ;--tw-backdrop-brightness: ;--tw-backdrop-contrast: ;--tw-backdrop-grayscale: ;--tw-backdrop-hue-rotate: ;--tw-backdrop-invert: ;--tw-backdrop-opacity: ;--tw-backdrop-saturate: ;--tw-backdrop-sepia: ;--tw-contain-size: ;--tw-contain-layout: ;--tw-contain-paint: ;--tw-contain-style: }::backdrop{--tw-border-spacing-x: 0;--tw-border-spacing-y: 0;--tw-translate-x: 0;--tw-translate-y: 0;--tw-rotate: 0;--tw-skew-x: 0;--tw-skew-y: 0;--tw-scale-x: 1;--tw-scale-y: 1;--tw-pan-x: ;--tw-pan-y: ;--tw-pinch-zoom: ;--tw-scroll-snap-strictness: proximity;--tw-gradient-from-position: ;--tw-gradient-via-position: ;--tw-gradient-to-position: ;--tw-ordinal: ;--tw-slashed-zero: ;--tw-numeric-figure: ;--tw-numeric-spacing: ;--tw-numeric-fraction: ;--tw-ring-inset: ;--tw-ring-offset-width: 0px;--tw-ring-offset-color: #fff;--tw-ring-color: rgb(59 130 246 / .5);--tw-ring-offset-shadow: 0 0 #0000;--tw-ring-shadow: 0 0 #0000;--tw-shadow: 0 0 #0000;--tw-shadow-colored: 0 0 #0000;--tw-blur: ;--tw-brightness: ;--tw-contrast: ;--tw-grayscale: ;--tw-hue-rotate: ;--tw-invert: ;--tw-saturate: ;--tw-sepia: ;--tw-drop-shadow: ;--tw-backdrop-blur: ;--tw-backdrop-brightness: ;--tw-backdrop-contrast: ;--tw-backdrop-grayscale: ;--tw-backdrop-hue-rotate: ;--tw-backdrop-invert: ;--tw-backdrop-opacity: ;--tw-backdrop-saturate: ;--tw-backdrop-sepia: ;--tw-contain-size: ;--tw-contain-layout: ;--tw-contain-paint: ;--tw-contain-style: }*,:before,:after{box-sizing:border-box;border-width:0;border-style:solid;border-color:#e5e7eb}:before,:after{--tw-content: ""}html,:host{line-height:1.5;-webkit-text-size-adjust:100%;-moz-tab-size:4;-o-tab-size:4;tab-size:4;font-family:ui-sans-serif,system-ui,sans-serif,"Apple Color Emoji","Segoe UI Emoji",Segoe UI Symbol,"Noto Color Emoji";font-feature-settings:normal;font-variation-settings:normal;-webkit-tap-highlight-color:transparent}body{margin:0;line-height:inherit}hr{height:0;color:inherit;border-top-width:1px}abbr:where([title]){-webkit-text-decoration:underline dotted;text-decoration:underline dotted}h1,h2,h3,h4,h5,h6{font-size:inherit;font-weight:inherit}a{color:inherit;text-decoration:inherit}b,strong{font-weight:bolder}code,kbd,samp,pre{font-family:JetBrains Mono,ui-monospace,monospace;font-feature-settings:normal;font-variation-settings:normal;font-size:1em}small{font-size:80%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}sub{bottom:-.25em}sup{top:-.5em}table{text-indent:0;border-color:inherit;border-collapse:collapse}button,input,optgroup,select,textarea{font-family:inherit;font-feature-settings:inherit;font-variation-settings:inherit;font-size:100%;font-weight:inherit;line-height:inherit;letter-spacing:inherit;color:inherit;margin:0;padding:0}button,select{text-transform:none}button,input:where([type=button]),input:where([type=reset]),input:where([type=submit]){-webkit-appearance:button;background-color:transparent;background-image:none}:-moz-focusring{outline:auto}:-moz-ui-invalid{box-shadow:none}progress{vertical-align:baseline}::-webkit-inner-spin-button,::-webkit-outer-spin-button{height:auto}[type=search]{-webkit-appearance:textfield;outline-offset:-2px}::-webkit-search-decoration{-webkit-appearance:none}::-webkit-file-upload-button{-webkit-appearance:button;font:inherit}summary{display:list-item}blockquote,dl,dd,h1,h2,h3,h4,h5,h6,hr,figure,p,pre{margin:0}fieldset{margin:0;padding:0}legend{padding:0}ol,ul,menu{list-style:none;margin:0;padding:0}dialog{padding:0}textarea{resize:vertical}input::-moz-placeholder,textarea::-moz-placeholder{opacity:1;color:#9ca3af}input::placeholder,textarea::placeholder{opacity:1;color:#9ca3af}button,[role=button]{cursor:pointer}:disabled{cursor:default}img,svg,video,canvas,audio,iframe,embed,object{display:block;vertical-align:middle}img,video{max-width:100%;height:auto}[hidden]:where(:not([hidden=until-found])){display:none}.pointer-events-none{pointer-events:none}.fixed{position:fixed}.absolute{position:absolute}.relative{position:relative}.sticky{position:sticky}.inset-0{top:0;right:0;bottom:0;left:0}.bottom-0{bottom:0}.left-0{left:0}.left-3{left:.75rem}.top-0{top:0}.top-1\/2{top:50%}.z-10{z-index:10}.z-50{z-index:50}.mx-0\.5{margin-left:.125rem;margin-right:.125rem}.mx-4{margin-left:1rem;margin-right:1rem}.mx-auto{margin-left:auto;margin-right:auto}.-mt-1{margin-top:-.25rem}.mb-1{margin-bottom:.25rem}.mb-2{margin-bottom:.5rem}.mb-3{margin-bottom:.75rem}.mb-4{margin-bottom:1rem}.mb-6{margin-bottom:1.5rem}.mb-8{margin-bottom:2rem}.ml-2{margin-left:.5rem}.ml-4{margin-left:1rem}.ml-6{margin-left:1.5rem}.mr-1{margin-right:.25rem}.mt-0\.5{margin-top:.125rem}.mt-1{margin-top:.25rem}.mt-2{margin-top:.5rem}.mt-3{margin-top:.75rem}.mt-4{margin-top:1rem}.mt-8{margin-top:2rem}.mt-auto{margin-top:auto}.line-clamp-2{overflow:hidden;display:-webkit-box;-webkit-box-orient:vertical;-webkit-line-clamp:2}.line-clamp-3{overflow:hidden;display:-webkit-box;-webkit-box-orient:vertical;-webkit-line-clamp:3}.block{display:block}.inline-block{display:inline-block}.flex{display:flex}.inline-flex{display:inline-flex}.table{display:table}.grid{display:grid}.hidden{display:none}.h-1\.5{height:.375rem}.h-12{height:3rem}.h-2{height:.5rem}.h-3{height:.75rem}.h-32{height:8rem}.h-4{height:1rem}.h-5{height:1.25rem}.h-6{height:1.5rem}.h-8{height:2rem}.h-full{height:100%}.max-h-32{max-height:8rem}.max-h-40{max-height:10rem}.max-h-48{max-height:12rem}.max-h-96{max-height:24rem}.max-h-\[80vh\]{max-height:80vh}.min-h-\[100px\]{min-height:100px}.min-h-screen{min-height:100vh}.w-12{width:3rem}.w-2{width:.5rem}.w-20{width:5rem}.w-24{width:6rem}.w-3{width:.75rem}.w-32{width:8rem}.w-4{width:1rem}.w-5{width:1.25rem}.w-6{width:1.5rem}.w-8{width:2rem}.w-full{width:100%}.min-w-0{min-width:0px}.min-w-\[90px\]{min-width:90px}.max-w-7xl{max-width:80rem}.max-w-lg{max-width:32rem}.max-w-md{max-width:28rem}.flex-1{flex:1 1 0%}.flex-shrink-0{flex-shrink:0}.-translate-y-1\/2{--tw-translate-y: -50%;transform:translate(var(--tw-translate-x),var(--tw-translate-y)) rotate(var(--tw-rotate)) skew(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y))}.rotate-180{--tw-rotate: 180deg;transform:translate(var(--tw-translate-x),var(--tw-translate-y)) rotate(var(--tw-rotate)) skew(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y))}.transform{transform:translate(var(--tw-translate-x),var(--tw-translate-y)) rotate(var(--tw-rotate)) skew(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y))}@keyframes pulse{50%{opacity:.5}}.animate-pulse{animation:pulse 2s cubic-bezier(.4,0,.6,1) infinite}@keyframes spin{to{transform:rotate(360deg)}}.animate-spin{animation:spin 1s linear infinite}.cursor-pointer{cursor:pointer}.select-none{-webkit-user-select:none;-moz-user-select:none;user-select:none}.resize-y{resize:vertical}.resize{resize:both}.grid-cols-1{grid-template-columns:repeat(1,minmax(0,1fr))}.grid-cols-2{grid-template-columns:repeat(2,minmax(0,1fr))}.grid-cols-3{grid-template-columns:repeat(3,minmax(0,1fr))}.grid-cols-4{grid-template-columns:repeat(4,minmax(0,1fr))}.flex-col{flex-direction:column}.flex-wrap{flex-wrap:wrap}.items-start{align-items:flex-start}.items-end{align-items:flex-end}.items-center{align-items:center}.justify-end{justify-content:flex-end}.justify-center{justify-content:center}.justify-between{justify-content:space-between}.gap-1{gap:.25rem}.gap-1\.5{gap:.375rem}.gap-2{gap:.5rem}.gap-3{gap:.75rem}.gap-4{gap:1rem}.gap-6{gap:1.5rem}.gap-8{gap:2rem}.gap-x-4{-moz-column-gap:1rem;column-gap:1rem}.gap-y-1{row-gap:.25rem}.space-y-1>:not([hidden])~:not([hidden]){--tw-space-y-reverse: 0;margin-top:calc(.25rem * calc(1 - var(--tw-space-y-reverse)));margin-bottom:calc(.25rem * var(--tw-space-y-reverse))}.space-y-2>:not([hidden])~:not([hidden]){--tw-space-y-reverse: 0;margin-top:calc(.5rem * calc(1 - var(--tw-space-y-reverse)));margin-bottom:calc(.5rem * var(--tw-space-y-reverse))}.space-y-4>:not([hidden])~:not([hidden]){--tw-space-y-reverse: 0;margin-top:calc(1rem * calc(1 - var(--tw-space-y-reverse)));margin-bottom:calc(1rem * var(--tw-space-y-reverse))}.space-y-6>:not([hidden])~:not([hidden]){--tw-space-y-reverse: 0;margin-top:calc(1.5rem * calc(1 - var(--tw-space-y-reverse)));margin-bottom:calc(1.5rem * var(--tw-space-y-reverse))}.overflow-auto{overflow:auto}.overflow-hidden{overflow:hidden}.overflow-x-auto{overflow-x:auto}.overflow-y-auto{overflow-y:auto}.truncate{overflow:hidden;text-overflow:ellipsis;white-space:nowrap}.whitespace-pre-wrap{white-space:pre-wrap}.break-all{word-break:break-all}.rounded{border-radius:.25rem}.rounded-full{border-radius:9999px}.rounded-lg{border-radius:.5rem}.rounded-md{border-radius:.375rem}.border{border-width:1px}.border-b{border-bottom-width:1px}.border-l-2{border-left-width:2px}.border-t{border-top-width:1px}.border-dashed{border-style:dashed}.border-\[var\(--accent\)\]{border-color:var(--accent)}.border-\[var\(--border\)\]{border-color:var(--border)}.border-blue-500\/30{border-color:#3b82f64d}.border-green-500\/30{border-color:#22c55e4d}.border-red-500\/30{border-color:#ef44444d}.border-red-500\/50{border-color:#ef444480}.bg-\[var\(--accent\)\]{background-color:var(--accent)}.bg-\[var\(--bg-primary\)\]{background-color:var(--bg-primary)}.bg-\[var\(--bg-secondary\)\]{background-color:var(--bg-secondary)}.bg-\[var\(--bg-tertiary\)\]{background-color:var(--bg-tertiary)}.bg-\[var\(--error\)\]{background-color:var(--error)}.bg-black\/80{background-color:#000c}.bg-blue-100{--tw-bg-opacity: 1;background-color:rgb(219 234 254 / var(--tw-bg-opacity, 1))}.bg-blue-400{--tw-bg-opacity: 1;background-color:rgb(96 165 250 / var(--tw-bg-opacity, 1))}.bg-blue-500{--tw-bg-opacity: 1;background-color:rgb(59 130 246 / var(--tw-bg-opacity, 1))}.bg-blue-500\/10{background-color:#3b82f61a}.bg-blue-600{--tw-bg-opacity: 1;background-color:rgb(37 99 235 / var(--tw-bg-opacity, 1))}.bg-emerald-500{--tw-bg-opacity: 1;background-color:rgb(16 185 129 / var(--tw-bg-opacity, 1))}.bg-green-100{--tw-bg-opacity: 1;background-color:rgb(220 252 231 / var(--tw-bg-opacity, 1))}.bg-green-400{--tw-bg-opacity: 1;background-color:rgb(74 222 128 / var(--tw-bg-opacity, 1))}.bg-green-500{--tw-bg-opacity: 1;background-color:rgb(34 197 94 / var(--tw-bg-opacity, 1))}.bg-green-500\/10{background-color:#22c55e1a}.bg-green-500\/20{background-color:#22c55e33}.bg-green-600{--tw-bg-opacity: 1;background-color:rgb(22 163 74 / var(--tw-bg-opacity, 1))}.bg-orange-100{--tw-bg-opacity: 1;background-color:rgb(255 237 213 / var(--tw-bg-opacity, 1))}.bg-purple-100{--tw-bg-opacity: 1;background-color:rgb(243 232 255 / var(--tw-bg-opacity, 1))}.bg-red-100{--tw-bg-opacity: 1;background-color:rgb(254 226 226 / var(--tw-bg-opacity, 1))}.bg-red-500{--tw-bg-opacity: 1;background-color:rgb(239 68 68 / var(--tw-bg-opacity, 1))}.bg-red-500\/10{background-color:#ef44441a}.bg-red-600{--tw-bg-opacity: 1;background-color:rgb(220 38 38 / var(--tw-bg-opacity, 1))}.bg-yellow-500{--tw-bg-opacity: 1;background-color:rgb(234 179 8 / var(--tw-bg-opacity, 1))}.p-2{padding:.5rem}.p-3{padding:.75rem}.p-4{padding:1rem}.p-6{padding:1.5rem}.px-1{padding-left:.25rem;padding-right:.25rem}.px-1\.5{padding-left:.375rem;padding-right:.375rem}.px-2{padding-left:.5rem;padding-right:.5rem}.px-3{padding-left:.75rem;padding-right:.75rem}.px-4{padding-left:1rem;padding-right:1rem}.py-0\.5{padding-top:.125rem;padding-bottom:.125rem}.py-1{padding-top:.25rem;padding-bottom:.25rem}.py-1\.5{padding-top:.375rem;padding-bottom:.375rem}.py-12{padding-top:3rem;padding-bottom:3rem}.py-16{padding-top:4rem;padding-bottom:4rem}.py-2{padding-top:.5rem;padding-bottom:.5rem}.py-3{padding-top:.75rem;padding-bottom:.75rem}.py-4{padding-top:1rem;padding-bottom:1rem}.py-8{padding-top:2rem;padding-bottom:2rem}.pb-1{padding-bottom:.25rem}.pb-2{padding-bottom:.5rem}.pl-10{padding-left:2.5rem}.pr-3{padding-right:.75rem}.pr-4{padding-right:1rem}.pt-2{padding-top:.5rem}.pt-3{padding-top:.75rem}.pt-4{padding-top:1rem}.text-left{text-align:left}.text-center{text-align:center}.text-right{text-align:right}.font-mono{font-family:JetBrains Mono,ui-monospace,monospace}.text-2xl{font-size:1.5rem;line-height:2rem}.text-lg{font-size:1.125rem;line-height:1.75rem}.text-sm{font-size:.875rem;line-height:1.25rem}.text-xl{font-size:1.25rem;line-height:1.75rem}.text-xs{font-size:.75rem;line-height:1rem}.font-bold{font-weight:700}.font-medium{font-weight:500}.font-semibold{font-weight:600}.uppercase{text-transform:uppercase}.tracking-wide{letter-spacing:.025em}.tracking-wider{letter-spacing:.05em}.text-\[var\(--accent\)\]{color:var(--accent)}.text-\[var\(--error\)\]{color:var(--error)}.text-\[var\(--text-primary\)\]{color:var(--text-primary)}.text-\[var\(--text-secondary\)\]{color:var(--text-secondary)}.text-\[var\(--text-tertiary\)\]{color:var(--text-tertiary)}.text-black{--tw-text-opacity: 1;color:rgb(0 0 0 / var(--tw-text-opacity, 1))}.text-blue-400{--tw-text-opacity: 1;color:rgb(96 165 250 / var(--tw-text-opacity, 1))}.text-blue-800{--tw-text-opacity: 1;color:rgb(30 64 175 / var(--tw-text-opacity, 1))}.text-emerald-400{--tw-text-opacity: 1;color:rgb(52 211 153 / var(--tw-text-opacity, 1))}.text-green-400{--tw-text-opacity: 1;color:rgb(74 222 128 / var(--tw-text-opacity, 1))}.text-green-500{--tw-text-opacity: 1;color:rgb(34 197 94 / var(--tw-text-opacity, 1))}.text-green-800{--tw-text-opacity: 1;color:rgb(22 101 52 / var(--tw-text-opacity, 1))}.text-orange-800{--tw-text-opacity: 1;color:rgb(154 52 18 / var(--tw-text-opacity, 1))}.text-purple-400{--tw-text-opacity: 1;color:rgb(192 132 252 / var(--tw-text-opacity, 1))}.text-purple-800{--tw-text-opacity: 1;color:rgb(107 33 168 / var(--tw-text-opacity, 1))}.text-red-400{--tw-text-opacity: 1;color:rgb(248 113 113 / var(--tw-text-opacity, 1))}.text-red-800{--tw-text-opacity: 1;color:rgb(153 27 27 / var(--tw-text-opacity, 1))}.text-white{--tw-text-opacity: 1;color:rgb(255 255 255 / var(--tw-text-opacity, 1))}.accent-\[var\(--accent\)\]{accent-color:var(--accent)}.shadow-lg{--tw-shadow: 0 10px 15px -3px rgb(0 0 0 / .1), 0 4px 6px -4px rgb(0 0 0 / .1);--tw-shadow-colored: 0 10px 15px -3px var(--tw-shadow-color), 0 4px 6px -4px var(--tw-shadow-color);box-shadow:var(--tw-ring-offset-shadow, 0 0 #0000),var(--tw-ring-shadow, 0 0 #0000),var(--tw-shadow)}.filter{filter:var(--tw-blur) var(--tw-brightness) var(--tw-contrast) var(--tw-grayscale) var(--tw-hue-rotate) var(--tw-invert) var(--tw-saturate) var(--tw-sepia) var(--tw-drop-shadow)}.transition-all{transition-property:all;transition-timing-function:cubic-bezier(.4,0,.2,1);transition-duration:.15s}.transition-colors{transition-property:color,background-color,border-color,text-decoration-color,fill,stroke;transition-timing-function:cubic-bezier(.4,0,.2,1);transition-duration:.15s}.duration-300{transition-duration:.3s}:root{--bg-primary: #0a0a0a;--bg-secondary: #141414;--bg-tertiary: #1a1a1a;--text-primary: #f5f5f5;--text-secondary: #a3a3a3;--accent: #22c55e;--accent-dim: #166534;--border: #262626;--error: #ef4444}[data-theme=light]{--bg-primary: #ffffff;--bg-secondary: #f7f8f9;--bg-tertiary: #eef0f2;--text-primary: #1a1a1a;--text-secondary: #4a4a4a;--accent: #16a34a;--accent-dim: #dcfce7;--border: #d1d5db;--error: #dc2626}*{box-sizing:border-box}body{margin:0;background-color:var(--bg-primary);color:var(--text-primary);font-family:JetBrains Mono,ui-monospace,monospace;font-size:14px;line-height:1.6}::-webkit-scrollbar{width:8px;height:8px}::-webkit-scrollbar-track{background:var(--bg-secondary)}::-webkit-scrollbar-thumb{background:var(--border);border-radius:4px}::-webkit-scrollbar-thumb:hover{background:#404040}[data-theme=light] ::-webkit-scrollbar-thumb:hover{background:silver}.last\:border-0:last-child{border-width:0px}.hover\:border-\[var\(--accent-dim\)\]:hover{border-color:var(--accent-dim)}.hover\:bg-\[\#16a34a\]:hover{--tw-bg-opacity: 1;background-color:rgb(22 163 74 / var(--tw-bg-opacity, 1))}.hover\:bg-\[var\(--bg-primary\)\]:hover{background-color:var(--bg-primary)}.hover\:bg-\[var\(--bg-tertiary\)\]:hover{background-color:var(--bg-tertiary)}.hover\:bg-\[var\(--border\)\]:hover{background-color:var(--border)}.hover\:bg-red-600:hover{--tw-bg-opacity: 1;background-color:rgb(220 38 38 / var(--tw-bg-opacity, 1))}.hover\:text-\[var\(--accent\)\]:hover{color:var(--accent)}.hover\:text-\[var\(--text-primary\)\]:hover{color:var(--text-primary)}.hover\:opacity-80:hover{opacity:.8}.focus\:border-\[var\(--accent\)\]:focus{border-color:var(--accent)}.focus\:outline-none:focus{outline:2px solid transparent;outline-offset:2px}.focus\:ring-2:focus{--tw-ring-offset-shadow: var(--tw-ring-inset) 0 0 0 var(--tw-ring-offset-width) var(--tw-ring-offset-color);--tw-ring-shadow: var(--tw-ring-inset) 0 0 0 calc(2px + var(--tw-ring-offset-width)) var(--tw-ring-color);box-shadow:var(--tw-ring-offset-shadow),var(--tw-ring-shadow),var(--tw-shadow, 0 0 #0000)}.focus\:ring-\[var\(--accent\)\]:focus{--tw-ring-color: var(--accent)}.disabled\:cursor-not-allowed:disabled{cursor:not-allowed}.disabled\:opacity-50:disabled{opacity:.5}@media (min-width: 768px){.md\:grid-cols-2{grid-template-columns:repeat(2,minmax(0,1fr))}}@media (min-width: 1024px){.lg\:grid-cols-3{grid-template-columns:repeat(3,minmax(0,1fr))}}@media (min-width: 1280px){.xl\:grid-cols-3{grid-template-columns:repeat(3,minmax(0,1fr))}}@media (prefers-color-scheme: dark){.dark\:bg-blue-900{--tw-bg-opacity: 1;background-color:rgb(30 58 138 / var(--tw-bg-opacity, 1))}.dark\:bg-green-900{--tw-bg-opacity: 1;background-color:rgb(20 83 45 / var(--tw-bg-opacity, 1))}.dark\:bg-orange-900{--tw-bg-opacity: 1;background-color:rgb(124 45 18 / var(--tw-bg-opacity, 1))}.dark\:bg-purple-900{--tw-bg-opacity: 1;background-color:rgb(88 28 135 / var(--tw-bg-opacity, 1))}.dark\:bg-red-900{--tw-bg-opacity: 1;background-color:rgb(127 29 29 / var(--tw-bg-opacity, 1))}.dark\:text-blue-200{--tw-text-opacity: 1;color:rgb(191 219 254 / var(--tw-text-opacity, 1))}.dark\:text-green-200{--tw-text-opacity: 1;color:rgb(187 247 208 / var(--tw-text-opacity, 1))}.dark\:text-orange-200{--tw-text-opacity: 1;color:rgb(254 215 170 / var(--tw-text-opacity, 1))}.dark\:text-purple-200{--tw-text-opacity: 1;color:rgb(233 213 255 / var(--tw-text-opacity, 1))}.dark\:text-red-200{--tw-text-opacity: 1;color:rgb(254 202 202 / var(--tw-text-opacity, 1))}}

src/flow/ui/ui/index.html CHANGED Viewed

@@ -8,8 +8,8 @@
     <link rel="preconnect" href="https://fonts.googleapis.com">
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
     <link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;500;600;700&display=swap" rel="stylesheet">
-    <script type="module" crossorigin src="/assets/index-BFk_2IKX.js"></script>
-    <link rel="stylesheet" crossorigin href="/assets/index-DlCyCyh_.css">
   </head>
   <body>
     <div id="root"></div>

     <link rel="preconnect" href="https://fonts.googleapis.com">
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
     <link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;500;600;700&display=swap" rel="stylesheet">
+    <script type="module" crossorigin src="/assets/index-2zMAgGgo.js"></script>
+    <link rel="stylesheet" crossorigin href="/assets/index-BHAF8mLj.css">
   </head>
   <body>
     <div id="root"></div>