class TaskDefinition: """Base class for all task graders.""" def evaluate(self, state, env_done: bool): """ Returns (reward: float, done: bool, message: str). reward must be a partial signal at every step, not just at terminal states. """ raise NotImplementedError class EasyTask(TaskDefinition): """ Goal: Reduce tech debt from 0.8 down to <= 0.4 before going bankrupt. Starting cash: Rs.20,000 | Starting debt: 80% Partial reward: how much debt has been cleared toward the 0.4 target. Full reward (1.0) when debt reaches 40% or below. """ INITIAL_DEBT = 0.8 TARGET_DEBT = 0.4 def __init__(self): self.level = "easy" def evaluate(self, state, env_done: bool): # --- Success --- if state.tech_debt <= self.TARGET_DEBT: return 0.99, True, "Task Success: Tech debt cleaned up to 40%. Codebase is healthy!" # --- Bankruptcy --- if env_done: # Still give partial credit for how far they got progress = max(0.0, (self.INITIAL_DEBT - state.tech_debt) / (self.INITIAL_DEBT - self.TARGET_DEBT)) reward = max(0.01, round(progress * 0.5, 3)) return reward, True, ( f"Task Failed: Bankrupt with debt still at {state.tech_debt * 100:.0f}%. " f"Partial progress: {progress * 100:.0f}%." ) # --- Partial progress signal (every step) --- # Scales from 0.0 (debt still at 80%) to just under 1.0 (debt near 40%) progress = max(0.0, (self.INITIAL_DEBT - state.tech_debt) / (self.INITIAL_DEBT - self.TARGET_DEBT)) partial_reward = max(0.01, round(progress * 0.8, 3)) # Cap at 0.8 so 0.99 is reserved for true success return partial_reward, False, ( f"In progress: debt at {state.tech_debt * 100:.0f}% " f"(target ≤ 40%). Progress: {progress * 100:.0f}%." ) class MediumTask(TaskDefinition): """ Goal: Reach Rs.10,000 Monthly Recurring Revenue starting from Rs.0. Starting cash: Rs.50,000 | Starting debt: 10% Partial reward: proportional to how close MRR is to the Rs.10,000 target. """ TARGET_REVENUE = 10_000.0 def __init__(self): self.level = "medium" def evaluate(self, state, env_done: bool): # --- Success --- if state.monthly_revenue >= self.TARGET_REVENUE: return 0.99, True, ( f"Task Success: Reached Rs.{state.monthly_revenue:,.0f} MRR. " f"Product-market fit achieved!" ) # --- Bankruptcy --- if env_done: progress = min(1.0, state.monthly_revenue / self.TARGET_REVENUE) reward = max(0.01, round(progress * 0.5, 3)) return reward, True, ( f"Task Failed: Bankrupt at Rs.{state.monthly_revenue:,.0f} MRR " f"({progress * 100:.0f}% of Rs.10,000 target)." ) # --- Partial progress signal (every step) --- # score = current_revenue / target_revenue, capped at 1.0 progress = min(1.0, state.monthly_revenue / self.TARGET_REVENUE) partial_reward = max(0.01, round(progress * 0.8, 3)) return partial_reward, False, ( f"In progress: Rs.{state.monthly_revenue:,.0f} MRR " f"({progress * 100:.0f}% of Rs.10,000 target)." ) class HardTask(TaskDefinition): """ Goal: Survive 12 months AND complete at least 12 features (the 'pivot'). Starting cash: Rs.30,000 | Devs: 3 | Starting debt: 40% | Seed revenue: Rs.2,000 Partial reward combines two dimensions: - Time survival (how many of 12 months completed) - Feature pivot (how many of 12 features shipped) Both are weighted equally. """ TARGET_MONTHS = 12 TARGET_FEATURES = 12 # Fixed: was 5 in original, openenv.yaml says 12 def __init__(self): self.level = "hard" def evaluate(self, state, env_done: bool): month_progress = min(1.0, state.current_month / self.TARGET_MONTHS) feature_progress = min(1.0, state.features_completed / self.TARGET_FEATURES) # --- Full success: survived 12 months AND shipped 12 features --- if state.current_month >= self.TARGET_MONTHS and state.cash > 0: if state.features_completed >= self.TARGET_FEATURES: return 0.99, True, ( f"Task Success: Survived 12 months and shipped " f"{state.features_completed} pivot features. The startup lives!" ) else: # Survived but didn't pivot enough partial = max(0.01, round((feature_progress * 0.5), 3)) return partial, True, ( f"Task Failed: Survived 12 months but only shipped " f"{state.features_completed}/{self.TARGET_FEATURES} pivot features. " f"Partial score: {partial}." ) # --- Bankruptcy --- if env_done: partial = max(0.01, round((month_progress + feature_progress) * 0.25, 3)) return partial, True, ( f"Task Failed: Bankrupt at month {state.current_month}/12 " f"with {state.features_completed}/{self.TARGET_FEATURES} features shipped. " f"Partial score: {partial}." ) # --- Partial progress signal (every step) --- partial_reward = max(0.01, round((month_progress + feature_progress) * 0.4, 3)) return partial_reward, False, ( f"In progress: Month {state.current_month}/{self.TARGET_MONTHS} | " f"Features {state.features_completed}/{self.TARGET_FEATURES} | " f"Cash Rs.{state.cash:,.0f}" )