Meta-Hackathon-main / models.py
Parth3841's picture
Upload folder using huggingface_hub
7c2f148 verified
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""
Data models for the Compiler Pass Ordering RL Environment.
This environment simulates compiler optimization — a real task performed by
compilers like GCC and LLVM. An agent must select a sequence of optimization
passes to apply to a program's Intermediate Representation (IR) to minimize
estimated runtime cost.
Three tasks of increasing difficulty:
Task 1 (easy): Single-chain unlock. One prerequisite pass unlocks one target pass.
Task 2 (medium): Two-chain unlock. Agent must discover two independent synergy chains.
Task 3 (hard): Full optimization. Agent must sequence all passes optimally across
a complex program with many interacting synergy gates.
"""
from typing import List, Optional
from openenv.core.env_server.types import Action, Observation
from pydantic import Field
# ---------------------------------------------------------------------------
# Pass registry
# ---------------------------------------------------------------------------
PASS_NAMES = {
0: "dead_code_elimination",
1: "constant_folding",
2: "loop_unrolling",
3: "function_inlining",
4: "vectorization",
5: "loop_invariant_motion",
6: "strength_reduction",
7: "common_subexpr_elimination",
8: "tail_call_optimization",
9: "branch_prediction_hints",
10: "register_allocation",
11: "instruction_scheduling",
12: "memory_coalescing",
13: "alias_analysis",
14: "interprocedural_analysis",
}
NUM_PASSES = len(PASS_NAMES)
MAX_STEPS = 10
# Task IDs
TASK_EASY = 1
TASK_MEDIUM = 2
TASK_HARD = 3
# ---------------------------------------------------------------------------
# Action
# ---------------------------------------------------------------------------
class CompilerOptAction(Action):
"""
Select which optimization pass to apply next.
pass_id: integer in [0, 14]. See PASS_NAMES for the full mapping.
Applying a pass that has already been applied this episode incurs a penalty.
Applying a pass whose prerequisites have not been met applies it at reduced
effectiveness (0.3x) — the agent must discover correct ordering.
"""
pass_id: int = Field(..., ge=0, le=14, description="ID of the optimization pass to apply (0–14)")
task_id: int = Field(default=TASK_HARD, ge=1, le=3, description="Task difficulty: 1=easy, 2=medium, 3=hard")
# ---------------------------------------------------------------------------
# Observation
# ---------------------------------------------------------------------------
class CompilerOptObservation(Observation):
"""
Full observable state of the simulated compiler IR after each step.
The agent uses this to decide which pass to apply next. Key signals:
- estimated_cost / baseline_cost: how much optimization has been achieved
- passes_applied: history of applied passes (order matters for synergy)
- synergy_state: current effectiveness multiplier for each pass
- passes_available: which passes have not yet been applied
- improvement_pct: total % cost reduction from baseline so far
"""
# Cost tracking
estimated_cost: float = Field(default=0.0, description="Current estimated runtime cost")
baseline_cost: float = Field(default=0.0, description="Cost before any optimization")
# IR structural features (static for the episode, describe program type)
num_instructions: int = Field(default=0, description="Total instruction count in the IR")
num_loops: int = Field(default=0, description="Number of loop structures")
num_branches: int = Field(default=0, description="Number of branch instructions")
num_functions: int = Field(default=0, description="Number of functions")
loop_depth: int = Field(default=0, description="Maximum loop nesting depth")
program_type: str = Field(default="", description="Human-readable program category")
# Episode progress
passes_applied: List[int] = Field(default_factory=list, description="Ordered list of pass IDs applied so far")
passes_available: List[int] = Field(default_factory=list, description="Pass IDs not yet applied this episode")
step_count: int = Field(default=0, description="Number of steps taken this episode")
max_steps: int = Field(default=MAX_STEPS, description="Maximum steps allowed per episode")
# Synergy state: current effectiveness multiplier for each pass given history
synergy_state: List[float] = Field(
default_factory=lambda: [1.0] * NUM_PASSES,
description="Per-pass effectiveness multiplier. >1 = boosted by prior passes, <1 = suppressed."
)
# Task info
task_id: int = Field(default=TASK_HARD, description="Current task difficulty (1/2/3)")
task_description: str = Field(default="", description="Human-readable task goal")
# Terminal / result fields
done: bool = Field(default=False, description="Whether this episode has ended")
reward: float = Field(default=0.0, description="Reward received for the last action")
improvement_pct: float = Field(default=0.0, description="Total % cost reduction from baseline")
last_pass_name: Optional[str] = Field(default=None, description="Name of the last pass applied")
# Grader score (populated on done=True)
grader_score: Optional[float] = Field(
default=None,
description="Final task score 0.0–1.0, populated when done=True"
)