Spaces:

samrat-rm
/

CI_CD_Doctor

Sleeping

App Files Files Community

CI_CD_Doctor / environment /grader.py

samrat-rm

Upload folder using huggingface_hub

45c94ac verified 4 days ago

raw

history blame contribute delete

5.56 kB

	"""
	Grader for the CI/CD Doctor environment.

	Reward shape:
	fixes_applied_fraction * 0.35 proportional credit for each answer_key fix
	that is present in the filesystem (emitted
	incrementally as each fix lands, not all-or-
	nothing — on a 2-fix task, each fix is worth
	+0.175)
	pipeline_passed +0.50 pipeline_status == "passed" (terminal)

	Total positive: 0.85 from grade() + shaped bonuses from balance_score().

	Investigation milestones (investigated, logs_read, correct_file_located) are
	still tracked in state.milestones for the balance_score() logic but give no
	reward — reading a file is not progress, fixing it is.

	balance_score() applies per-step shaped adjustments on top of the tier delta:
	+0.05 First read of each answer-key file (exploration bonus, max 2 files)
	-0.05 cat on a file already read this episode (redundant read penalty)
	-0.10 pipeline run with no filesystem changes since last run (idle run)
	-0.01 * overage each step taken beyond the task's ideal step count
	(efficiency penalty scales linearly with how far past ideal — at
	ideal+1 it's -0.01, at ideal+5 it's -0.05; cumulative cost on a
	9-step overage tops out around -0.45)
	-0.08 agent has read the correct file but runs pipeline again with no edit
	(exploitation trap — knows the problem, not acting on it)
	"""

	from dataclasses import dataclass, field

	from models import PipelineState

	CORRECT_FILE_EDITED_TOTAL = 0.2

	TIER_REWARDS: dict[str, float] = {
	"investigated": 0.0,
	"logs_read": 0.0,
	"correct_file_located": 0.01,
	"pipeline_passed": 0.50,
	"optimal_step":0.05
	}

	PENALTIES: dict[str, float] = {
	"idle_pipeline_run": -0.10,
	"redundant_read": -0.05,
	"over_ideal_step": -0.01,
	"exploitation_trap": -0.08,
	}

	@dataclass
	class StepContext:
	cmd_type: str
	filename: str \| None = None
	files_read: set[str] = field(default_factory=set)
	fs_changed_since_last_run: bool = True
	step_count: int = 0
	max_steps: int = 15
	ideal_steps: int = 6
	pipeline_runs_since_last_edit: int = 0


	def _fixes_applied_fraction(state: PipelineState) -> float:
	"""
	Fraction of answer_key fixes that are currently present in the filesystem.
	Returns a value in [0.0, 1.0]. Each fix contributes incrementally the
	moment its fragment appears in the target file, so a 2-fix task rewards
	each correct edit as it happens rather than only when both are done.
	"""
	fixes = state.answer_key.get("fixes", {})
	if not fixes:
	return 0.0
	applied = sum(
	1 for filename, fragment in fixes.items()
	if fragment in state.filesystem.get(filename, "")
	)
	return applied / len(fixes)


	def grade(state: PipelineState) -> float:
	"""
	Compute the total earned grade from state. Fractional credit for fixes
	in the filesystem, plus the terminal bonus on pipeline pass. Investigation
	milestones contribute 0 — reading a file is not progress, fixing it is.
	"""
	score = CORRECT_FILE_EDITED_TOTAL * _fixes_applied_fraction(state)

	unlocked = set(state.milestones)
	if state.pipeline_status == "passed":
	unlocked.add("pipeline_passed")
	score += sum(TIER_REWARDS[tier] for tier in unlocked if tier in TIER_REWARDS)

	return round(score, 2)


	def balance_score(state: PipelineState, ctx: StepContext) -> float:
	"""
	Per-step shaped reward adjustment on top of the raw grade delta.

	Returns a float (may be negative). The caller adds this to the grade
	delta to produce the final step reward.

	The two goals:
	- Encourage exploration: small bonus the first time the agent reads a
	file that needs fixing (up to 2 files per episode).
	- Discourage waste: penalties for re-reading, idle pipeline runs,
	burning the step budget, and knowing the fix but not applying it.
	"""
	adjustment = 0.0
	fix_files = set(state.answer_key.get("fixes", {}).keys())

	if ctx.cmd_type == "cat" and ctx.filename:
	if ctx.filename in fix_files and ctx.filename not in ctx.files_read:
	# First read of a file that needs fixing — exploration bonus.
	# Cap at 2 files total to avoid rewarding excessive exploration.
	already_explored = sum(1 for f in ctx.files_read if f in fix_files)
	if already_explored < 2:
	adjustment += 0.05
	elif ctx.filename in ctx.files_read:
	# Already read this file — wasted step.
	adjustment += PENALTIES["redundant_read"]

	if ctx.cmd_type == "pipeline_run":
	if not ctx.fs_changed_since_last_run:
	# Nothing changed since the last run — this reveals no new info.
	adjustment += PENALTIES["idle_pipeline_run"]

	if (
	"correct_file_located" in state.milestones
	and ctx.pipeline_runs_since_last_edit >= 1
	):
	# Agent has already read the right file and run the pipeline at
	# least once since its last edit — it knows what to fix but is
	# stalling instead of applying the fix.
	adjustment += PENALTIES["exploitation_trap"]

	if ctx.step_count > ctx.ideal_steps:
	overage = ctx.step_count - ctx.ideal_steps
	adjustment += PENALTIES["over_ideal_step"] * overage
	else:
	adjustment += TIER_REWARDS["optimal_step"]

	return round(adjustment, 2)