rust_algo_reasoning / phase4_executor.py
TM23-sanji
initial clean commit
c987dd2
#!/usr/bin/env python3
"""
Phase 4: Assembly, Compilation, and Execution Pipeline
Combines boilerplate + solution code + test harnesses into compilable Rust code,
then compiles and executes to verify correctness.
Usage:
python phase4_executor.py [--num-problems 50] [--output-dir complexity_reasoning_data]
"""
import argparse
import json
from pathlib import Path
from typing import Dict, Set
from pipeline.problem import Problem
from pipeline.category_resolver import resolve_category
from pipeline.boilerplate.registry import get_registry
from pipeline.executor import execute_problem
from pipeline.logger import ExecutionLogger
def load_test_harnesses(harness_path: str) -> Dict[int, str]:
"""
Load test harnesses from test_harness.jsonl.
Args:
harness_path: Path to test_harness.jsonl
Returns:
Dict mapping problem_id → Rust test harness code
"""
harnesses = {}
if not Path(harness_path).exists():
print(f"Warning: Test harness file not found: {harness_path}")
return harnesses
with open(harness_path) as f:
for line in f:
data = json.loads(line)
problem_id = data.get("problem_id")
harness = data.get("harness")
if problem_id and harness:
harnesses[problem_id] = harness
return harnesses
def load_problems_from_dataset(dataset_path: str, num_problems: int = 50) -> list:
"""
Load problems from dataset.jsonl.
Args:
dataset_path: Path to dataset.jsonl
num_problems: Number of problems to load
Returns:
List of problem dictionaries
"""
problems = []
with open(dataset_path) as f:
for i, line in enumerate(f):
if i >= num_problems:
break
problems.append(json.loads(line))
return problems
def build_problem_object(
dataset_entry: dict, harnesses: Dict[int, str], registry
) -> Problem:
"""
Build a Problem object from dataset entry, harness, and boilerplate.
Args:
dataset_entry: Problem data from dataset.jsonl
harnesses: Dict of problem_id → harness code
registry: Boilerplate registry
Returns:
Problem instance ready for execution
"""
problem_id = dataset_entry["problem_id"]
# Resolve categories from tags
categories = resolve_category(dataset_entry["tags"])
# Load boilerplate for categories
boilerplate = registry.merge_boilerplate(categories)
# Get test harness (or empty string if not available)
test_harness = harnesses.get(problem_id, "")
# Create Problem object
problem = Problem.from_dataset_entry(
dataset_entry=dataset_entry,
categories=categories,
boilerplate=boilerplate,
test_harness=test_harness,
solution_code=" // TODO: Implement solution\n",
)
return problem
def main():
parser = argparse.ArgumentParser(
description="Phase 4: Compile and execute Rust problems"
)
parser.add_argument(
"--num-problems",
type=int,
default=50,
help="Number of problems to process (default: 50)",
)
parser.add_argument(
"--dataset-path",
type=str,
default="complexity_reasoning_data/dataset.jsonl",
help="Path to dataset.jsonl",
)
parser.add_argument(
"--harness-path",
type=str,
default="complexity_reasoning_data/test_harness.jsonl",
help="Path to test_harness.jsonl",
)
parser.add_argument(
"--output-dir",
type=str,
default="complexity_reasoning_data",
help="Output directory for results",
)
parser.add_argument(
"--timeout",
type=int,
default=10,
help="Execution timeout in seconds (default: 10)",
)
args = parser.parse_args()
output_dir = Path(args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
print("Phase 4: Assembly, Compilation, and Execution Pipeline")
print("=" * 70)
print(f"Num problems: {args.num_problems}")
print(f"Dataset path: {args.dataset_path}")
print(f"Harness path: {args.harness_path}")
print(f"Output directory: {output_dir}")
print(f"Timeout: {args.timeout}s")
print()
# Load data
print("Loading dataset...")
problems_data = load_problems_from_dataset(args.dataset_path, args.num_problems)
print(f"✅ Loaded {len(problems_data)} problems")
print("Loading test harnesses...")
harnesses = load_test_harnesses(args.harness_path)
print(f"✅ Loaded {len(harnesses)} test harnesses")
print("Initializing boilerplate registry...")
registry = get_registry()
print(f"✅ Registry ready with {len(registry.get_all_categories())} categories")
# Initialize logger
results_path = output_dir / "execution_results.jsonl"
logger = ExecutionLogger(str(results_path))
# Process problems
print()
print(f"Processing {len(problems_data)} problems...")
print("-" * 70)
for i, problem_data in enumerate(problems_data, 1):
problem_id = problem_data["problem_id"]
# Build problem object
problem = build_problem_object(problem_data, harnesses, registry)
# Execute
print(
f"[{i:3d}/{len(problems_data)}] Problem #{problem_id:4d}... ",
end="",
flush=True,
)
problem = execute_problem(problem, args.timeout)
# Log result
logger.log_problem(problem)
# Print status
if problem.compilation_success and problem.execution_success:
print("✅ Success")
elif problem.compilation_success:
print(f"⚠️ Compiled but execution failed")
else:
print(f"❌ Compilation failed")
print()
print(f"✅ All problems processed!")
print(f"Results saved to: {results_path}")
# Print summary
logger.print_summary()
if __name__ == "__main__":
main()