Spaces:
Sleeping
Sleeping
| # Copyright (c) Meta Platforms, Inc. and affiliates. | |
| # All rights reserved. | |
| # | |
| # This source code is licensed under the BSD-style license found in the | |
| # LICENSE file in the root directory of this source tree. | |
| """ | |
| Data models for the Compiler Pass Ordering RL Environment. | |
| This environment simulates compiler optimization — a real task performed by | |
| compilers like GCC and LLVM. An agent must select a sequence of optimization | |
| passes to apply to a program's Intermediate Representation (IR) to minimize | |
| estimated runtime cost. | |
| Three tasks of increasing difficulty: | |
| Task 1 (easy): Single-chain unlock. One prerequisite pass unlocks one target pass. | |
| Task 2 (medium): Two-chain unlock. Agent must discover two independent synergy chains. | |
| Task 3 (hard): Full optimization. Agent must sequence all passes optimally across | |
| a complex program with many interacting synergy gates. | |
| """ | |
| from typing import List, Optional | |
| from openenv.core.env_server.types import Action, Observation | |
| from pydantic import Field | |
| # --------------------------------------------------------------------------- | |
| # Pass registry | |
| # --------------------------------------------------------------------------- | |
| PASS_NAMES = { | |
| 0: "dead_code_elimination", | |
| 1: "constant_folding", | |
| 2: "loop_unrolling", | |
| 3: "function_inlining", | |
| 4: "vectorization", | |
| 5: "loop_invariant_motion", | |
| 6: "strength_reduction", | |
| 7: "common_subexpr_elimination", | |
| 8: "tail_call_optimization", | |
| 9: "branch_prediction_hints", | |
| 10: "register_allocation", | |
| 11: "instruction_scheduling", | |
| 12: "memory_coalescing", | |
| 13: "alias_analysis", | |
| 14: "interprocedural_analysis", | |
| } | |
| NUM_PASSES = len(PASS_NAMES) | |
| MAX_STEPS = 10 | |
| # Task IDs | |
| TASK_EASY = 1 | |
| TASK_MEDIUM = 2 | |
| TASK_HARD = 3 | |
| # --------------------------------------------------------------------------- | |
| # Action | |
| # --------------------------------------------------------------------------- | |
| class CompilerOptAction(Action): | |
| """ | |
| Select which optimization pass to apply next. | |
| pass_id: integer in [0, 14]. See PASS_NAMES for the full mapping. | |
| Applying a pass that has already been applied this episode incurs a penalty. | |
| Applying a pass whose prerequisites have not been met applies it at reduced | |
| effectiveness (0.3x) — the agent must discover correct ordering. | |
| """ | |
| pass_id: int = Field(..., ge=0, le=14, description="ID of the optimization pass to apply (0–14)") | |
| task_id: int = Field(default=TASK_HARD, ge=1, le=3, description="Task difficulty: 1=easy, 2=medium, 3=hard") | |
| # --------------------------------------------------------------------------- | |
| # Observation | |
| # --------------------------------------------------------------------------- | |
| class CompilerOptObservation(Observation): | |
| """ | |
| Full observable state of the simulated compiler IR after each step. | |
| The agent uses this to decide which pass to apply next. Key signals: | |
| - estimated_cost / baseline_cost: how much optimization has been achieved | |
| - passes_applied: history of applied passes (order matters for synergy) | |
| - synergy_state: current effectiveness multiplier for each pass | |
| - passes_available: which passes have not yet been applied | |
| - improvement_pct: total % cost reduction from baseline so far | |
| """ | |
| # Cost tracking | |
| estimated_cost: float = Field(default=0.0, description="Current estimated runtime cost") | |
| baseline_cost: float = Field(default=0.0, description="Cost before any optimization") | |
| # IR structural features (static for the episode, describe program type) | |
| num_instructions: int = Field(default=0, description="Total instruction count in the IR") | |
| num_loops: int = Field(default=0, description="Number of loop structures") | |
| num_branches: int = Field(default=0, description="Number of branch instructions") | |
| num_functions: int = Field(default=0, description="Number of functions") | |
| loop_depth: int = Field(default=0, description="Maximum loop nesting depth") | |
| program_type: str = Field(default="", description="Human-readable program category") | |
| # Episode progress | |
| passes_applied: List[int] = Field(default_factory=list, description="Ordered list of pass IDs applied so far") | |
| passes_available: List[int] = Field(default_factory=list, description="Pass IDs not yet applied this episode") | |
| step_count: int = Field(default=0, description="Number of steps taken this episode") | |
| max_steps: int = Field(default=MAX_STEPS, description="Maximum steps allowed per episode") | |
| # Synergy state: current effectiveness multiplier for each pass given history | |
| synergy_state: List[float] = Field( | |
| default_factory=lambda: [1.0] * NUM_PASSES, | |
| description="Per-pass effectiveness multiplier. >1 = boosted by prior passes, <1 = suppressed." | |
| ) | |
| # Task info | |
| task_id: int = Field(default=TASK_HARD, description="Current task difficulty (1/2/3)") | |
| task_description: str = Field(default="", description="Human-readable task goal") | |
| # Terminal / result fields | |
| done: bool = Field(default=False, description="Whether this episode has ended") | |
| reward: float = Field(default=0.0, description="Reward received for the last action") | |
| improvement_pct: float = Field(default=0.0, description="Total % cost reduction from baseline") | |
| last_pass_name: Optional[str] = Field(default=None, description="Name of the last pass applied") | |
| # Grader score (populated on done=True) | |
| grader_score: Optional[float] = Field( | |
| default=None, | |
| description="Final task score 0.0–1.0, populated when done=True" | |
| ) | |