omkarrr88
minor changes
206438f
"""Rule-based heuristic baseline for the /baseline endpoint.
Extracted from app.py to keep route definitions clean.
"""
from __future__ import annotations
from ml_training_debugger.models import MLTrainingAction
from server.environment import MLTrainingEnvironment
ALL_TASK_IDS = [
"task_001",
"task_002",
"task_003",
"task_004",
"task_005",
"task_006",
"task_007",
]
def run_baseline_all_tasks() -> dict[str, float]:
"""Run the rule-based baseline on all tasks. Returns {task_id: score}."""
scores: dict[str, float] = {}
for task_id in ALL_TASK_IDS:
env = MLTrainingEnvironment()
env.reset(seed=42, episode_id=f"baseline_{task_id}", task_id=task_id)
scores[task_id] = round(_run_heuristic_episode(env), 4)
return scores
def _run_heuristic_episode(
env: MLTrainingEnvironment, task_id: str = "",
) -> float:
"""Run one heuristic baseline episode. Returns grader score."""
# Step 1: inspect_gradients
obs = env.step(MLTrainingAction(action_type="inspect_gradients"))
if obs.gradient_stats:
if any(g.is_exploding for g in obs.gradient_stats):
env.step(MLTrainingAction(
action_type="modify_config", target="learning_rate", value=0.001,
))
env.step(MLTrainingAction(action_type="restart_run"))
env.step(MLTrainingAction(
action_type="mark_diagnosed", diagnosis="lr_too_high",
))
return _get_score(env)
if any(g.is_vanishing for g in obs.gradient_stats):
env.step(MLTrainingAction(
action_type="modify_config", target="learning_rate", value=0.01,
))
env.step(MLTrainingAction(action_type="restart_run"))
env.step(MLTrainingAction(
action_type="mark_diagnosed", diagnosis="vanishing_gradients",
))
return _get_score(env)
# Step 2: inspect_data_batch
obs = env.step(MLTrainingAction(action_type="inspect_data_batch"))
if obs.data_batch_stats and obs.data_batch_stats.class_overlap_score > 0.5:
env.step(MLTrainingAction(action_type="patch_data_loader"))
env.step(MLTrainingAction(action_type="restart_run"))
env.step(MLTrainingAction(
action_type="mark_diagnosed", diagnosis="data_leakage",
))
return _get_score(env)
# Detect overfitting pattern
looks_like_overfitting = _detect_overfitting(obs)
# Step 3: inspect_model_modes
obs = env.step(MLTrainingAction(action_type="inspect_model_modes"))
if obs.model_mode_info:
if any(v == "eval" for v in obs.model_mode_info.values()):
env.step(MLTrainingAction(action_type="fix_model_mode"))
env.step(MLTrainingAction(action_type="restart_run"))
env.step(MLTrainingAction(
action_type="mark_diagnosed", diagnosis="batchnorm_eval_mode",
))
return _get_score(env)
# Step 4: inspect_code (for Task 6)
obs = env.step(MLTrainingAction(action_type="inspect_code"))
if obs.code_snippet:
code = obs.code_snippet.code
_try_code_fix(env, code)
session = env._get_session()
if session and session.state.fix_action_taken:
env.step(MLTrainingAction(action_type="restart_run"))
env.step(MLTrainingAction(
action_type="mark_diagnosed", diagnosis="code_bug",
))
return _get_score(env)
# Step 5: scheduler issue (loss stagnates after initial progress)
if _detect_scheduler_issue(obs):
env.step(MLTrainingAction(
action_type="modify_config", target="learning_rate", value=0.001,
))
env.step(MLTrainingAction(action_type="restart_run"))
env.step(MLTrainingAction(
action_type="mark_diagnosed", diagnosis="scheduler_misconfigured",
))
return _get_score(env)
# Overfitting fallback
if looks_like_overfitting:
env.step(MLTrainingAction(
action_type="modify_config", target="weight_decay", value=0.01,
))
env.step(MLTrainingAction(action_type="restart_run"))
env.step(MLTrainingAction(
action_type="mark_diagnosed", diagnosis="overfitting",
))
return _get_score(env)
# Final fallback
env.step(MLTrainingAction(
action_type="mark_diagnosed", diagnosis="overfitting",
))
return _get_score(env)
def _try_code_fix(env: MLTrainingEnvironment, code: str) -> None:
"""Attempt to fix a detected code bug."""
if "model.eval()" in code and "model.train()" not in code:
env.step(MLTrainingAction(
action_type="fix_code", line=5, replacement="model.train()",
))
elif ".detach()" in code:
env.step(MLTrainingAction(
action_type="fix_code", line=14,
replacement=" loss = criterion(output, batch_y)",
))
elif "inplace=True" in code:
env.step(MLTrainingAction(
action_type="fix_code", line=15,
replacement=" output = F.relu(output)",
))
elif "optimizer.zero_grad()" not in code and "optimizer.step()" in code:
env.step(MLTrainingAction(
action_type="fix_code", line=11,
replacement=" optimizer.zero_grad()",
))
def _detect_overfitting(obs: object) -> bool:
"""Detect overfitting pattern from observation."""
if not (obs.val_loss_history and obs.training_loss_history
and len(obs.val_loss_history) >= 10):
return False
early_train = sum(obs.training_loss_history[:5]) / 5
late_train = sum(obs.training_loss_history[-5:]) / 5
early_val = sum(obs.val_loss_history[:5]) / 5
late_val = sum(obs.val_loss_history[-5:]) / 5
train_dropped = late_train < early_train * 0.5
train_loss_low = late_train < 0.15
val_not_improving = late_val >= early_val * 0.95
gap_widening = (late_val - late_train) > (early_val - early_train)
return (
(train_dropped or train_loss_low)
and (val_not_improving or gap_widening)
and obs.data_batch_stats
and obs.data_batch_stats.class_overlap_score < 0.3
)
def _detect_scheduler_issue(obs: object) -> bool:
"""Detect scheduler misconfiguration from loss history."""
if not (obs.training_loss_history and len(obs.training_loss_history) >= 10):
return False
early_loss = sum(obs.training_loss_history[:3]) / 3
mid_loss = sum(obs.training_loss_history[5:8]) / 3
finite_late = [v for v in obs.training_loss_history[-3:] if v != float("inf")]
late_loss = sum(finite_late) / max(len(finite_late), 1)
return early_loss > mid_loss and abs(late_loss - mid_loss) < 0.3
def _get_score(env: MLTrainingEnvironment) -> float:
"""Extract the grader score from the environment."""
session = env._get_session()
if session and session.last_score is not None:
return session.last_score
return 0.0