Spaces:

Parth3841
/

compiler_opt_env

Sleeping

App Files Files Community

compiler_opt_env / models.py

Parth3841

Upload folder using huggingface_hub

1047077 verified 8 days ago

raw

history blame contribute delete

5.73 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# All rights reserved.
	#
	# This source code is licensed under the BSD-style license found in the
	# LICENSE file in the root directory of this source tree.

	"""
	Data models for the Compiler Pass Ordering RL Environment.

	This environment simulates compiler optimization — a real task performed by
	compilers like GCC and LLVM. An agent must select a sequence of optimization
	passes to apply to a program's Intermediate Representation (IR) to minimize
	estimated runtime cost.

	Three tasks of increasing difficulty:
	Task 1 (easy): Single-chain unlock. One prerequisite pass unlocks one target pass.
	Task 2 (medium): Two-chain unlock. Agent must discover two independent synergy chains.
	Task 3 (hard): Full optimization. Agent must sequence all passes optimally across
	a complex program with many interacting synergy gates.
	"""

	from typing import List, Optional
	from openenv.core.env_server.types import Action, Observation
	from pydantic import Field


	# ---------------------------------------------------------------------------
	# Pass registry
	# ---------------------------------------------------------------------------
	PASS_NAMES = {
	0: "dead_code_elimination",
	1: "constant_folding",
	2: "loop_unrolling",
	3: "function_inlining",
	4: "vectorization",
	5: "loop_invariant_motion",
	6: "strength_reduction",
	7: "common_subexpr_elimination",
	8: "tail_call_optimization",
	9: "branch_prediction_hints",
	10: "register_allocation",
	11: "instruction_scheduling",
	12: "memory_coalescing",
	13: "alias_analysis",
	14: "interprocedural_analysis",
	}

	NUM_PASSES = len(PASS_NAMES)
	MAX_STEPS = 10

	# Task IDs
	TASK_EASY = 1
	TASK_MEDIUM = 2
	TASK_HARD = 3


	# ---------------------------------------------------------------------------
	# Action
	# ---------------------------------------------------------------------------
	class CompilerOptAction(Action):
	"""
	Select which optimization pass to apply next.

	pass_id: integer in [0, 14]. See PASS_NAMES for the full mapping.
	Applying a pass that has already been applied this episode incurs a penalty.
	Applying a pass whose prerequisites have not been met applies it at reduced
	effectiveness (0.3x) — the agent must discover correct ordering.
	"""
	pass_id: int = Field(..., ge=0, le=14, description="ID of the optimization pass to apply (0–14)")
	task_id: int = Field(default=TASK_HARD, ge=1, le=3, description="Task difficulty: 1=easy, 2=medium, 3=hard")


	# ---------------------------------------------------------------------------
	# Observation
	# ---------------------------------------------------------------------------
	class CompilerOptObservation(Observation):
	"""
	Full observable state of the simulated compiler IR after each step.

	The agent uses this to decide which pass to apply next. Key signals:
	- estimated_cost / baseline_cost: how much optimization has been achieved
	- passes_applied: history of applied passes (order matters for synergy)
	- synergy_state: current effectiveness multiplier for each pass
	- passes_available: which passes have not yet been applied
	- improvement_pct: total % cost reduction from baseline so far
	"""
	# Cost tracking
	estimated_cost: float = Field(default=0.0, description="Current estimated runtime cost")
	baseline_cost: float = Field(default=0.0, description="Cost before any optimization")

	# IR structural features (static for the episode, describe program type)
	num_instructions: int = Field(default=0, description="Total instruction count in the IR")
	num_loops: int = Field(default=0, description="Number of loop structures")
	num_branches: int = Field(default=0, description="Number of branch instructions")
	num_functions: int = Field(default=0, description="Number of functions")
	loop_depth: int = Field(default=0, description="Maximum loop nesting depth")
	program_type: str = Field(default="", description="Human-readable program category")

	# Episode progress
	passes_applied: List[int] = Field(default_factory=list, description="Ordered list of pass IDs applied so far")
	passes_available: List[int] = Field(default_factory=list, description="Pass IDs not yet applied this episode")
	step_count: int = Field(default=0, description="Number of steps taken this episode")
	max_steps: int = Field(default=MAX_STEPS, description="Maximum steps allowed per episode")

	# Synergy state: current effectiveness multiplier for each pass given history
	synergy_state: List[float] = Field(
	default_factory=lambda: [1.0] * NUM_PASSES,
	description="Per-pass effectiveness multiplier. >1 = boosted by prior passes, <1 = suppressed."
	)

	# Task info
	task_id: int = Field(default=TASK_HARD, description="Current task difficulty (1/2/3)")
	task_description: str = Field(default="", description="Human-readable task goal")

	# Terminal / result fields
	done: bool = Field(default=False, description="Whether this episode has ended")
	reward: float = Field(default=0.0, description="Reward received for the last action")
	improvement_pct: float = Field(default=0.0, description="Total % cost reduction from baseline")
	last_pass_name: Optional[str] = Field(default=None, description="Name of the last pass applied")

	# Grader score (populated on done=True)
	grader_score: Optional[float] = Field(
	default=None,
	description="Final task score 0.0–1.0, populated when done=True"
	)