Spaces:

tm23hgf
/

rust_algo_reasoning

Running

File size: 6,017 Bytes

c987dd2

#!/usr/bin/env python3
"""
Phase 4: Assembly, Compilation, and Execution Pipeline

Combines boilerplate + solution code + test harnesses into compilable Rust code,
then compiles and executes to verify correctness.

Usage:
    python phase4_executor.py [--num-problems 50] [--output-dir complexity_reasoning_data]
"""

import argparse
import json
from pathlib import Path
from typing import Dict, Set

from pipeline.problem import Problem
from pipeline.category_resolver import resolve_category
from pipeline.boilerplate.registry import get_registry
from pipeline.executor import execute_problem
from pipeline.logger import ExecutionLogger


def load_test_harnesses(harness_path: str) -> Dict[int, str]:
    """
    Load test harnesses from test_harness.jsonl.

    Args:
        harness_path: Path to test_harness.jsonl

    Returns:
        Dict mapping problem_id → Rust test harness code
    """
    harnesses = {}

    if not Path(harness_path).exists():
        print(f"Warning: Test harness file not found: {harness_path}")
        return harnesses

    with open(harness_path) as f:
        for line in f:
            data = json.loads(line)
            problem_id = data.get("problem_id")
            harness = data.get("harness")
            if problem_id and harness:
                harnesses[problem_id] = harness

    return harnesses


def load_problems_from_dataset(dataset_path: str, num_problems: int = 50) -> list:
    """
    Load problems from dataset.jsonl.

    Args:
        dataset_path: Path to dataset.jsonl
        num_problems: Number of problems to load

    Returns:
        List of problem dictionaries
    """
    problems = []

    with open(dataset_path) as f:
        for i, line in enumerate(f):
            if i >= num_problems:
                break
            problems.append(json.loads(line))

    return problems


def build_problem_object(
    dataset_entry: dict, harnesses: Dict[int, str], registry
) -> Problem:
    """
    Build a Problem object from dataset entry, harness, and boilerplate.

    Args:
        dataset_entry: Problem data from dataset.jsonl
        harnesses: Dict of problem_id → harness code
        registry: Boilerplate registry

    Returns:
        Problem instance ready for execution
    """
    problem_id = dataset_entry["problem_id"]

    # Resolve categories from tags
    categories = resolve_category(dataset_entry["tags"])

    # Load boilerplate for categories
    boilerplate = registry.merge_boilerplate(categories)

    # Get test harness (or empty string if not available)
    test_harness = harnesses.get(problem_id, "")

    # Create Problem object
    problem = Problem.from_dataset_entry(
        dataset_entry=dataset_entry,
        categories=categories,
        boilerplate=boilerplate,
        test_harness=test_harness,
        solution_code="    // TODO: Implement solution\n",
    )

    return problem


def main():
    parser = argparse.ArgumentParser(
        description="Phase 4: Compile and execute Rust problems"
    )
    parser.add_argument(
        "--num-problems",
        type=int,
        default=50,
        help="Number of problems to process (default: 50)",
    )
    parser.add_argument(
        "--dataset-path",
        type=str,
        default="complexity_reasoning_data/dataset.jsonl",
        help="Path to dataset.jsonl",
    )
    parser.add_argument(
        "--harness-path",
        type=str,
        default="complexity_reasoning_data/test_harness.jsonl",
        help="Path to test_harness.jsonl",
    )
    parser.add_argument(
        "--output-dir",
        type=str,
        default="complexity_reasoning_data",
        help="Output directory for results",
    )
    parser.add_argument(
        "--timeout",
        type=int,
        default=10,
        help="Execution timeout in seconds (default: 10)",
    )

    args = parser.parse_args()

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    print("Phase 4: Assembly, Compilation, and Execution Pipeline")
    print("=" * 70)
    print(f"Num problems: {args.num_problems}")
    print(f"Dataset path: {args.dataset_path}")
    print(f"Harness path: {args.harness_path}")
    print(f"Output directory: {output_dir}")
    print(f"Timeout: {args.timeout}s")
    print()

    # Load data
    print("Loading dataset...")
    problems_data = load_problems_from_dataset(args.dataset_path, args.num_problems)
    print(f"✅ Loaded {len(problems_data)} problems")

    print("Loading test harnesses...")
    harnesses = load_test_harnesses(args.harness_path)
    print(f"✅ Loaded {len(harnesses)} test harnesses")

    print("Initializing boilerplate registry...")
    registry = get_registry()
    print(f"✅ Registry ready with {len(registry.get_all_categories())} categories")

    # Initialize logger
    results_path = output_dir / "execution_results.jsonl"
    logger = ExecutionLogger(str(results_path))

    # Process problems
    print()
    print(f"Processing {len(problems_data)} problems...")
    print("-" * 70)

    for i, problem_data in enumerate(problems_data, 1):
        problem_id = problem_data["problem_id"]

        # Build problem object
        problem = build_problem_object(problem_data, harnesses, registry)

        # Execute
        print(
            f"[{i:3d}/{len(problems_data)}] Problem #{problem_id:4d}... ",
            end="",
            flush=True,
        )
        problem = execute_problem(problem, args.timeout)

        # Log result
        logger.log_problem(problem)

        # Print status
        if problem.compilation_success and problem.execution_success:
            print("✅ Success")
        elif problem.compilation_success:
            print(f"⚠️  Compiled but execution failed")
        else:
            print(f"❌ Compilation failed")

    print()
    print(f"✅ All problems processed!")
    print(f"Results saved to: {results_path}")

    # Print summary
    logger.print_summary()


if __name__ == "__main__":
    main()