#!/usr/bin/env python3 """ Phase 4: Assembly, Compilation, and Execution Pipeline Combines boilerplate + solution code + test harnesses into compilable Rust code, then compiles and executes to verify correctness. Usage: python phase4_executor.py [--num-problems 50] [--output-dir complexity_reasoning_data] """ import argparse import json from pathlib import Path from typing import Dict, Set from pipeline.problem import Problem from pipeline.category_resolver import resolve_category from pipeline.boilerplate.registry import get_registry from pipeline.executor import execute_problem from pipeline.logger import ExecutionLogger def load_test_harnesses(harness_path: str) -> Dict[int, str]: """ Load test harnesses from test_harness.jsonl. Args: harness_path: Path to test_harness.jsonl Returns: Dict mapping problem_id → Rust test harness code """ harnesses = {} if not Path(harness_path).exists(): print(f"Warning: Test harness file not found: {harness_path}") return harnesses with open(harness_path) as f: for line in f: data = json.loads(line) problem_id = data.get("problem_id") harness = data.get("harness") if problem_id and harness: harnesses[problem_id] = harness return harnesses def load_problems_from_dataset(dataset_path: str, num_problems: int = 50) -> list: """ Load problems from dataset.jsonl. Args: dataset_path: Path to dataset.jsonl num_problems: Number of problems to load Returns: List of problem dictionaries """ problems = [] with open(dataset_path) as f: for i, line in enumerate(f): if i >= num_problems: break problems.append(json.loads(line)) return problems def build_problem_object( dataset_entry: dict, harnesses: Dict[int, str], registry ) -> Problem: """ Build a Problem object from dataset entry, harness, and boilerplate. Args: dataset_entry: Problem data from dataset.jsonl harnesses: Dict of problem_id → harness code registry: Boilerplate registry Returns: Problem instance ready for execution """ problem_id = dataset_entry["problem_id"] # Resolve categories from tags categories = resolve_category(dataset_entry["tags"]) # Load boilerplate for categories boilerplate = registry.merge_boilerplate(categories) # Get test harness (or empty string if not available) test_harness = harnesses.get(problem_id, "") # Create Problem object problem = Problem.from_dataset_entry( dataset_entry=dataset_entry, categories=categories, boilerplate=boilerplate, test_harness=test_harness, solution_code=" // TODO: Implement solution\n", ) return problem def main(): parser = argparse.ArgumentParser( description="Phase 4: Compile and execute Rust problems" ) parser.add_argument( "--num-problems", type=int, default=50, help="Number of problems to process (default: 50)", ) parser.add_argument( "--dataset-path", type=str, default="complexity_reasoning_data/dataset.jsonl", help="Path to dataset.jsonl", ) parser.add_argument( "--harness-path", type=str, default="complexity_reasoning_data/test_harness.jsonl", help="Path to test_harness.jsonl", ) parser.add_argument( "--output-dir", type=str, default="complexity_reasoning_data", help="Output directory for results", ) parser.add_argument( "--timeout", type=int, default=10, help="Execution timeout in seconds (default: 10)", ) args = parser.parse_args() output_dir = Path(args.output_dir) output_dir.mkdir(parents=True, exist_ok=True) print("Phase 4: Assembly, Compilation, and Execution Pipeline") print("=" * 70) print(f"Num problems: {args.num_problems}") print(f"Dataset path: {args.dataset_path}") print(f"Harness path: {args.harness_path}") print(f"Output directory: {output_dir}") print(f"Timeout: {args.timeout}s") print() # Load data print("Loading dataset...") problems_data = load_problems_from_dataset(args.dataset_path, args.num_problems) print(f"✅ Loaded {len(problems_data)} problems") print("Loading test harnesses...") harnesses = load_test_harnesses(args.harness_path) print(f"✅ Loaded {len(harnesses)} test harnesses") print("Initializing boilerplate registry...") registry = get_registry() print(f"✅ Registry ready with {len(registry.get_all_categories())} categories") # Initialize logger results_path = output_dir / "execution_results.jsonl" logger = ExecutionLogger(str(results_path)) # Process problems print() print(f"Processing {len(problems_data)} problems...") print("-" * 70) for i, problem_data in enumerate(problems_data, 1): problem_id = problem_data["problem_id"] # Build problem object problem = build_problem_object(problem_data, harnesses, registry) # Execute print( f"[{i:3d}/{len(problems_data)}] Problem #{problem_id:4d}... ", end="", flush=True, ) problem = execute_problem(problem, args.timeout) # Log result logger.log_problem(problem) # Print status if problem.compilation_success and problem.execution_success: print("✅ Success") elif problem.compilation_success: print(f"⚠️ Compiled but execution failed") else: print(f"❌ Compilation failed") print() print(f"✅ All problems processed!") print(f"Results saved to: {results_path}") # Print summary logger.print_summary() if __name__ == "__main__": main()