Spaces:

coffeine16
/

fitscript-env

Sleeping

App Files Files Community

fitscript-env / server /FitScript_environment.py

coffeine16

clean initial commit

0446283 about 1 month ago

raw

history blame contribute delete

25.9 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# All rights reserved.
	#
	# This source code is licensed under the BSD-style license found in the
	# LICENSE file in the root directory of this source tree.

	"""
	FitScript Environment Implementation.

	Simulates a real-world fitness prescription task: generating, evaluating,
	and refining personalized workout plans. Supports three tasks of increasing
	difficulty with deterministic graders.
	"""

	import json
	from uuid import uuid4
	from typing import Dict, Any, Tuple

	from openenv.core.env_server.interfaces import Environment
	from openenv.core.env_server.types import State

	try:
	from ..models import FitscriptAction, FitscriptObservation
	except ImportError:
	from models import FitscriptAction, FitscriptObservation


	# ---------------------------------------------------------------------------
	# Grader base class
	# ---------------------------------------------------------------------------

	class BaseTask:
	"""Base class for all FitScript tasks."""

	client_profile: dict = {}
	max_steps: int = 5

	def grade(
	self, action: FitscriptAction, step: int
	) -> Tuple[float, Dict[str, float], str]:
	"""
	Returns (score: float in [0,1], breakdown: dict, feedback: str).
	Must be implemented by every concrete task.
	"""
	raise NotImplementedError


	# ---------------------------------------------------------------------------
	# Task 1 - EASY: Basic Plan Generation
	# ---------------------------------------------------------------------------

	class BasicPlanTask(BaseTask):
	"""
	Scenario: 35-year-old beginner, no injuries, 3 days/week, home, no equipment.
	Grader: 4 criteria worth 0.25 each.
	Episode ends when plan submitted OR after 3 steps.
	"""

	client_profile = {
	"age": 35,
	"fitness_level": "beginner",
	"goal": "general fitness",
	"equipment": [],
	"injuries": [],
	"days_per_week": 3,
	}
	max_steps = 3

	# Exercises that require equipment --- flag any appearance
	EQUIPMENT_EXERCISES = {
	"barbell", "dumbbell", "kettlebell", "cable", "machine",
	"bench press", "squat rack", "pull-up bar", "resistance band",
	"treadmill", "stationary bike",
	}

	# Advanced movements not appropriate for beginners
	ADVANCED_MOVEMENTS = {
	"muscle-up", "muscle up", "handstand push-up", "handstand pushup",
	"pistol squat", "one-arm push-up", "planche", "front lever",
	"back lever", "dragon flag",
	}

	def grade(self, action: FitscriptAction, step: int) -> Tuple[float, Dict[str, float], str]:
	scores: Dict[str, float] = {}
	feedback_parts = []

	try:
	plan = json.loads(action.plan) if action.plan else {}
	except json.JSONDecodeError:
	plan = {}

	# Criterion 1: Plan contains exactly 3 workout days
	days = plan.get("days", plan.get("workout_days", []))
	if isinstance(days, list) and len(days) == 3:
	scores["three_days"] = 0.25
	feedback_parts.append("✓ Plan has exactly 3 workout days.")
	else:
	scores["three_days"] = 0.0
	found = len(days) if isinstance(days, list) else "unknown"
	feedback_parts.append(f"✗ Expected 3 workout days, found {found}.")

	# Criterion 2: All exercises are bodyweight-only
	all_exercises = _extract_exercises(plan)
	plan_text_lower = action.plan.lower()
	equipment_found = [e for e in self.EQUIPMENT_EXERCISES if e in plan_text_lower]
	if not equipment_found:
	scores["bodyweight_only"] = 0.25
	feedback_parts.append("✓ No equipment required --- all bodyweight exercises.")
	else:
	scores["bodyweight_only"] = 0.0
	feedback_parts.append(f"✗ Equipment-dependent exercises found: {equipment_found[:3]}.")

	# Criterion 3: Each day has 4-8 exercises with sets and reps defined
	if isinstance(days, list) and len(days) > 0:
	days_ok = 0
	for day in days:
	exs = day.get("exercises", [])
	if 4 <= len(exs) <= 8 and all(
	e.get("sets") and e.get("reps") for e in exs
	):
	days_ok += 1
	if days_ok == len(days) and len(days) > 0:
	scores["exercise_structure"] = 0.25
	feedback_parts.append("✓ Each day has 4-8 exercises with sets and reps defined.")
	else:
	scores["exercise_structure"] = 0.0
	feedback_parts.append(
	f"✗ {days_ok}/{len(days)} days have 4-8 exercises with sets+reps. "
	"Ensure every exercise has 'sets' and 'reps' fields."
	)
	else:
	scores["exercise_structure"] = 0.0
	feedback_parts.append("✗ Cannot evaluate exercise structure: no days found.")

	# Criterion 4: Beginner-appropriate (reps <= 15, no advanced movements)
	advanced_found = [m for m in self.ADVANCED_MOVEMENTS if m in plan_text_lower]
	reps_too_high = _check_reps_exceed(plan, max_reps=15)
	if not advanced_found and not reps_too_high:
	scores["beginner_appropriate"] = 0.25
	feedback_parts.append("✓ Plan is beginner-appropriate (no advanced movements, reps ≤ 15).")
	else:
	scores["beginner_appropriate"] = 0.0
	if advanced_found:
	feedback_parts.append(f"✗ Advanced movements not suitable for beginners: {advanced_found}.")
	if reps_too_high:
	feedback_parts.append("✗ Some exercises have reps > 15 --- too high for a beginner.")

	score = sum(scores.values())
	feedback = " ".join(feedback_parts)
	return score, scores, feedback


	# ---------------------------------------------------------------------------
	# Task 2 - MEDIUM: Injury-Safe Plan Modification
	# ---------------------------------------------------------------------------

	class InjurySafeTask(BaseTask):
	"""
	Scenario: Intermediate client with lower-back injury. Pre-generated plan
	contains back squats, deadlifts, and bent-over rows. Agent must modify safely.
	Episode ends when modification submitted OR after 5 steps.
	"""

	client_profile = {
	"age": 30,
	"fitness_level": "intermediate",
	"goal": "strength maintenance",
	"equipment": ["barbell", "dumbbells", "cables", "machines"],
	"injuries": ["lower back"],
	"days_per_week": 4,
	"initial_plan": {
	"days": [
	{
	"name": "Day 1 - Lower Body",
	"exercises": [
	{"name": "Back Squat", "sets": 4, "reps": 8},
	{"name": "Deadlift", "sets": 3, "reps": 5},
	{"name": "Leg Press", "sets": 3, "reps": 10},
	{"name": "Calf Raises", "sets": 4, "reps": 15},
	],
	},
	{
	"name": "Day 2 - Upper Body",
	"exercises": [
	{"name": "Bench Press", "sets": 4, "reps": 8},
	{"name": "Bent-Over Row", "sets": 4, "reps": 8},
	{"name": "Overhead Press", "sets": 3, "reps": 10},
	{"name": "Pull-Up", "sets": 3, "reps": "max"},
	],
	},
	]
	},
	}
	max_steps = 5

	DEADLIFT_REPLACEMENTS = {
	"romanian deadlift", "rdl", "leg press", "leg curl",
	"hip thrust", "glute bridge", "trap bar deadlift",
	}
	SQUAT_REPLACEMENTS = {
	"goblet squat", "wall sit", "wall squat", "leg press",
	"box squat", "safety bar squat", "hack squat",
	}
	ROW_REPLACEMENTS = {
	"seated cable row", "seated row", "machine row",
	"chest-supported row", "chest supported row",
	"t-bar row", "seal row",
	}
	ORIGINAL_MUSCLE_GROUPS = {"quads", "hamstrings", "glutes", "back", "chest", "shoulders"}

	def grade(self, action: FitscriptAction, step: int) -> Tuple[float, Dict[str, float], str]:
	scores: Dict[str, float] = {}
	feedback_parts = []
	plan_text_lower = action.plan.lower()

	# Criterion 1: Deadlifts removed or replaced with safe alternatives
	has_deadlift = "deadlift" in plan_text_lower and not any(
	r in plan_text_lower for r in self.DEADLIFT_REPLACEMENTS
	)
	raw_deadlift = "deadlift" in plan_text_lower and "romanian" not in plan_text_lower and "rdl" not in plan_text_lower
	if not raw_deadlift:
	scores["deadlift_removed"] = 0.25
	feedback_parts.append("✓ Conventional deadlift removed or replaced safely.")
	else:
	scores["deadlift_removed"] = 0.0
	feedback_parts.append(
	"✗ Conventional deadlift still present. Replace with Romanian deadlift, leg press, or hip thrust."
	)

	# Criterion 2: Back squats replaced with safe alternatives
	has_back_squat = "back squat" in plan_text_lower
	if not has_back_squat:
	scores["squat_replaced"] = 0.25
	feedback_parts.append("✓ Back squat removed or replaced safely.")
	else:
	scores["squat_replaced"] = 0.0
	feedback_parts.append(
	"✗ Back squat still present. Replace with goblet squat, wall sit, or leg press."
	)

	# Criterion 3: Bent-over rows replaced with seated/machine variants
	has_bent_over_row = "bent-over row" in plan_text_lower or "bent over row" in plan_text_lower
	if not has_bent_over_row:
	scores["rows_replaced"] = 0.25
	feedback_parts.append("✓ Bent-over rows removed or replaced with spine-neutral variant.")
	else:
	scores["rows_replaced"] = 0.0
	feedback_parts.append(
	"✗ Bent-over rows still present. Replace with seated cable rows or machine rows."
	)

	# Criterion 4: Plan retains same muscle group targets
	# Proxy: check that back/leg work still appears in the plan
	back_work = any(
	t in plan_text_lower
	for t in ["row", "pull", "lat", "back", "rhomboid"]
	)
	leg_work = any(
	t in plan_text_lower
	for t in ["squat", "press", "lunge", "hip", "glute", "quad", "hamstring", "leg"]
	)
	if back_work and leg_work:
	scores["muscle_targets_retained"] = 0.25
	feedback_parts.append("✓ Original muscle groups (back, legs) still targeted despite modifications.")
	else:
	scores["muscle_targets_retained"] = 0.0
	missing = []
	if not back_work:
	missing.append("back")
	if not leg_work:
	missing.append("legs")
	feedback_parts.append(
	f"✗ Missing muscle group coverage: {missing}. Ensure modifications keep the same target areas."
	)

	score = sum(scores.values())
	feedback = " ".join(feedback_parts)
	return score, scores, feedback


	# ---------------------------------------------------------------------------
	# Task 3 - HARD: Periodized 4-Week Program
	# ---------------------------------------------------------------------------

	class PeriodizedProgramTask(BaseTask):
	"""
	Scenario: Advanced powerlifter, 5 days/week, full gym, competition in 5 weeks.
	Needs 4-week block with deload in week 4.
	Episode ends when full program submitted OR after 8 steps.
	"""

	client_profile = {
	"age": 27,
	"fitness_level": "advanced",
	"goal": "powerlifting competition prep",
	"equipment": ["full gym", "barbell", "squat rack", "bench", "deadlift platform"],
	"injuries": [],
	"days_per_week": 5,
	"competition_weeks_out": 5,
	"weak_points": ["upper back", "lockout strength"],
	"current_maxes": {"squat": 180, "bench": 120, "deadlift": 220},
	}
	max_steps = 8

	COMPETITION_LIFTS = {"squat", "bench", "bench press", "deadlift"}

	def grade(self, action: FitscriptAction, step: int) -> Tuple[float, Dict[str, float], str]:
	scores: Dict[str, float] = {}
	feedback_parts = []

	try:
	plan = json.loads(action.plan) if action.plan else {}
	except json.JSONDecodeError:
	plan = {}

	weeks = plan.get("weeks", [])

	# Criterion 1: 4 distinct weeks, each with 5 training days
	if isinstance(weeks, list) and len(weeks) == 4:
	all_five_days = all(
	len(w.get("days", w.get("training_days", []))) == 5
	for w in weeks
	)
	if all_five_days:
	scores["week_structure"] = 0.2
	feedback_parts.append("✓ 4 weeks present, each with 5 training days.")
	else:
	scores["week_structure"] = 0.1
	feedback_parts.append(
	"~ 4 weeks present but not all weeks have exactly 5 training days."
	)
	else:
	scores["week_structure"] = 0.0
	found_weeks = len(weeks) if isinstance(weeks, list) else "unknown"
	feedback_parts.append(
	f"✗ Expected 4 weeks with 5 days each. Found {found_weeks} weeks."
	)

	# Criterion 2: Weeks 1-3 show progressive overload
	if isinstance(weeks, list) and len(weeks) >= 3:
	intensities = []
	for w in weeks[:3]:
	# Accept intensity as explicit field or infer from RPE/percentage keywords
	intensity = w.get("intensity") or w.get("avg_rpe") or w.get("percentage")
	if intensity is None:
	# Try to infer from week label/description
	desc = str(w).lower()
	if "heavy" in desc or "high" in desc:
	intensity = 85
	elif "moderate" in desc or "medium" in desc:
	intensity = 75
	else:
	intensity = None
	intensities.append(intensity)

	if all(i is not None for i in intensities) and intensities[0] < intensities[1] < intensities[2]:
	scores["progressive_overload"] = 0.2
	feedback_parts.append("✓ Weeks 1-3 show clear progressive overload (increasing intensity).")
	elif all(i is not None for i in intensities):
	scores["progressive_overload"] = 0.1
	feedback_parts.append(
	"~ Intensity values present but progressive overload pattern not clearly ascending across weeks 1-3."
	)
	else:
	scores["progressive_overload"] = 0.0
	feedback_parts.append(
	"✗ Cannot verify progressive overload. Add 'intensity', 'avg_rpe', or 'percentage' fields to each week."
	)
	else:
	scores["progressive_overload"] = 0.0
	feedback_parts.append("✗ Fewer than 3 weeks present; cannot verify progressive overload.")

	# Criterion 3: Week 4 is a deload (volume reduced >= 40% vs week 3)
	if isinstance(weeks, list) and len(weeks) == 4:
	w3 = weeks[2]
	w4 = weeks[3]
	w3_vol = _estimate_volume(w3)
	w4_vol = _estimate_volume(w4)
	is_deload_label = "deload" in str(w4).lower()
	if w3_vol > 0 and w4_vol > 0:
	reduction = (w3_vol - w4_vol) / w3_vol
	if reduction >= 0.40:
	scores["deload_week"] = 0.2
	feedback_parts.append(
	f"✓ Week 4 deload: volume reduced by {reduction*100:.0f}% vs week 3."
	)
	elif is_deload_label:
	scores["deload_week"] = 0.1
	feedback_parts.append(
	"~ Week 4 labeled as deload but volume reduction < 40%. Reduce total sets/volume further."
	)
	else:
	scores["deload_week"] = 0.0
	feedback_parts.append(
	f"✗ Week 4 volume only reduced by {reduction*100:.0f}%. Deload requires >= 40% reduction."
	)
	elif is_deload_label:
	scores["deload_week"] = 0.1
	feedback_parts.append(
	"~ Week 4 labeled as deload but no volume data to verify the 40% reduction threshold."
	)
	else:
	scores["deload_week"] = 0.0
	feedback_parts.append(
	"✗ Week 4 not identified as a deload and volume data insufficient to verify."
	)
	else:
	scores["deload_week"] = 0.0
	feedback_parts.append("✗ Fewer than 4 weeks present; cannot evaluate deload week.")

	# Criterion 4: Competition lifts appear as primary movements on separate days
	plan_text_lower = action.plan.lower()
	squat_present = "squat" in plan_text_lower
	bench_present = "bench" in plan_text_lower
	deadlift_present = "deadlift" in plan_text_lower
	if squat_present and bench_present and deadlift_present:
	scores["competition_lifts"] = 0.2
	feedback_parts.append("✓ All three competition lifts (squat, bench, deadlift) present as primary movements.")
	else:
	missing = []
	if not squat_present:
	missing.append("squat")
	if not bench_present:
	missing.append("bench press")
	if not deadlift_present:
	missing.append("deadlift")
	scores["competition_lifts"] = 0.0
	feedback_parts.append(f"✗ Missing competition lifts: {missing}.")

	# Criterion 5 (bonus): Accessory work targets weak points (upper back, lockout)
	weak_point_keywords = ["face pull", "upper back", "row", "rdl", "pause", "lockout", "band pull apart", "rear delt"]
	accessory_bonus = sum(1 for kw in weak_point_keywords if kw in plan_text_lower)
	if accessory_bonus >= 3:
	scores["accessory_weak_points"] = 0.2
	feedback_parts.append("✓ Accessory work targets weak points (upper back, lockout strength).")
	elif accessory_bonus >= 1:
	scores["accessory_weak_points"] = 0.1
	feedback_parts.append("~ Some accessory work present but weak points (upper back, lockout) not fully addressed.")
	else:
	scores["accessory_weak_points"] = 0.0
	feedback_parts.append("✗ No accessory work targeting weak points (upper back, lockout strength).")

	score = min(1.0, sum(scores.values()))
	feedback = " ".join(feedback_parts)
	return score, scores, feedback


	# ---------------------------------------------------------------------------
	# Helper utilities
	# ---------------------------------------------------------------------------

	def _extract_exercises(plan: dict) -> list:
	"""Flatten all exercises from all days in a plan."""
	exercises = []
	for day in plan.get("days", plan.get("workout_days", [])):
	if isinstance(day, dict):
	exercises.extend(day.get("exercises", []))
	return exercises


	def _check_reps_exceed(plan: dict, max_reps: int) -> bool:
	"""Return True if any exercise in the plan has reps > max_reps."""
	for ex in _extract_exercises(plan):
	reps = ex.get("reps")
	if isinstance(reps, (int, float)) and reps > max_reps:
	return True
	return False


	def _estimate_volume(week: dict) -> float:
	"""Estimate total volume (sets × reps) across all days in a week."""
	total = 0
	for day in week.get("days", week.get("training_days", [])):
	if isinstance(day, dict):
	for ex in day.get("exercises", []):
	sets = ex.get("sets", 0)
	reps = ex.get("reps", 0)
	if isinstance(sets, (int, float)) and isinstance(reps, (int, float)):
	total += sets * reps
	# Also accept a flat 'total_sets' key on the week
	if total == 0:
	total = week.get("total_sets", 0) * 8 # assume ~8 reps avg if only sets given
	return float(total)


	# ---------------------------------------------------------------------------
	# Task registry
	# ---------------------------------------------------------------------------

	TASKS: Dict[str, BaseTask] = {
	"basic_plan": BasicPlanTask(),
	"injury_safe_modification": InjurySafeTask(),
	"periodized_program": PeriodizedProgramTask(),
	}


	# ---------------------------------------------------------------------------
	# Main environment class
	# ---------------------------------------------------------------------------

	class FitscriptEnvironment(Environment):
	"""
	FitScript fitness prescription environment.

	Three tasks of increasing difficulty:
	- basic_plan (easy): generate a 3-day bodyweight beginner plan
	- injury_safe_modification (medium): modify a plan for a lower-back-injured client
	- periodized_program (hard): design a 4-week periodized powerlifting block

	Rewards are always in [0.0, 1.0]. Episodes terminate on task completion
	(score >= 0.99) or when max_steps is reached.
	"""

	SUPPORTS_CONCURRENT_SESSIONS: bool = True

	def __init__(self, task_id: str = "basic_plan"):
	"""
	Initialize the FitScript environment.

	Args:
	task_id: One of 'basic_plan', 'injury_safe_modification', 'periodized_program'.
	"""
	if task_id not in TASKS:
	raise ValueError(
	f"Unknown task_id '{task_id}'. Valid options: {list(TASKS.keys())}"
	)
	self._task_id = task_id
	self._state = State(episode_id=str(uuid4()), step_count=0)
	self._last_plan: str = ""

	def reset(self) -> FitscriptObservation:
	"""
	Reset the environment for the current task.

	Returns:
	FitscriptObservation with the client profile and welcome message.
	"""
	self._state = State(episode_id=str(uuid4()), step_count=0)
	self._last_plan = ""

	task = TASKS[self._task_id]

	return FitscriptObservation(
	client_profile=task.client_profile,
	feedback="Welcome! Review the client profile and generate a plan.",
	score_breakdown={},
	task_id=self._task_id,
	step_count=0,
	done=False,
	reward=0.0,
	)

	def step(self, action: FitscriptAction) -> FitscriptObservation: # type: ignore[override]
	"""
	Execute a step: grade the submitted plan and return feedback.

	Args:
	action: FitscriptAction with action_type, plan JSON string, and optional reasoning.

	Returns:
	FitscriptObservation with score breakdown and feedback.
	"""
	self._state.step_count += 1
	task = TASKS[self._task_id]

	# Penalty: empty or null plan
	if not action.plan or action.plan.strip() in ("", "null", "{}"):
	return FitscriptObservation(
	client_profile=task.client_profile,
	feedback="✗ Empty or null plan submitted. Please provide a structured workout plan.",
	score_breakdown={},
	task_id=self._task_id,
	step_count=self._state.step_count,
	done=self._state.step_count >= task.max_steps,
	reward=0.0,
	)

	# Penalty: identical plan submitted twice in a row
	if action.plan == self._last_plan:
	return FitscriptObservation(
	client_profile=task.client_profile,
	feedback="✗ Identical plan submitted twice. Please revise based on the previous feedback.",
	score_breakdown={},
	task_id=self._task_id,
	step_count=self._state.step_count,
	done=self._state.step_count >= task.max_steps,
	reward=0.0,
	)

	self._last_plan = action.plan

	# Grade the plan
	score, breakdown, feedback = task.grade(action, self._state.step_count)

	# Safety penalty: contraindicated exercises for injured clients
	injuries = task.client_profile.get("injuries", [])
	if injuries:
	plan_lower = action.plan.lower()
	CONTRAINDICATED = {
	"lower back": ["deadlift", "back squat", "good morning", "bent-over row"],
	"knee": ["lunge", "leg press", "deep squat", "box jump"],
	"shoulder": ["overhead press", "upright row", "behind neck"],
	}
	for injury in injuries:
	banned = CONTRAINDICATED.get(injury, [])
	if any(b in plan_lower for b in banned):
	score = max(0.0, score - 0.3)
	feedback += " ⚠️ Safety penalty applied: plan contains exercises contraindicated for the client's injury."
	break

	# Clamp to [0.0, 1.0]
	score = max(0.0, min(1.0, score))

	done = score >= 0.99 or self._state.step_count >= task.max_steps

	return FitscriptObservation(
	client_profile=task.client_profile,
	feedback=feedback,
	score_breakdown=breakdown,
	task_id=self._task_id,
	step_count=self._state.step_count,
	done=done,
	reward=score,
	)

	@property
	def state(self) -> State:
	"""Get the current environment state."""
	return self._state