Spaces:
Sleeping
Sleeping
| class TaskDefinition: | |
| """Base class for all task graders.""" | |
| def evaluate(self, state, env_done: bool): | |
| """ | |
| Returns (reward: float, done: bool, message: str). | |
| reward must be a partial signal at every step, not just at terminal states. | |
| """ | |
| raise NotImplementedError | |
| class EasyTask(TaskDefinition): | |
| """ | |
| Goal: Reduce tech debt from 0.8 down to <= 0.4 before going bankrupt. | |
| Starting cash: Rs.20,000 | Starting debt: 80% | |
| Partial reward: how much debt has been cleared toward the 0.4 target. | |
| Full reward (1.0) when debt reaches 40% or below. | |
| """ | |
| INITIAL_DEBT = 0.8 | |
| TARGET_DEBT = 0.4 | |
| def __init__(self): | |
| self.level = "easy" | |
| def evaluate(self, state, env_done: bool): | |
| # --- Success --- | |
| if state.tech_debt <= self.TARGET_DEBT: | |
| return 0.99, True, "Task Success: Tech debt cleaned up to 40%. Codebase is healthy!" | |
| # --- Bankruptcy --- | |
| if env_done: | |
| # Still give partial credit for how far they got | |
| progress = max(0.0, (self.INITIAL_DEBT - state.tech_debt) / (self.INITIAL_DEBT - self.TARGET_DEBT)) | |
| reward = max(0.01, round(progress * 0.5, 3)) | |
| return reward, True, ( | |
| f"Task Failed: Bankrupt with debt still at {state.tech_debt * 100:.0f}%. " | |
| f"Partial progress: {progress * 100:.0f}%." | |
| ) | |
| # --- Partial progress signal (every step) --- | |
| # Scales from 0.0 (debt still at 80%) to just under 1.0 (debt near 40%) | |
| progress = max(0.0, (self.INITIAL_DEBT - state.tech_debt) / (self.INITIAL_DEBT - self.TARGET_DEBT)) | |
| partial_reward = max(0.01, round(progress * 0.8, 3)) # Cap at 0.8 so 0.99 is reserved for true success | |
| return partial_reward, False, ( | |
| f"In progress: debt at {state.tech_debt * 100:.0f}% " | |
| f"(target ≤ 40%). Progress: {progress * 100:.0f}%." | |
| ) | |
| class MediumTask(TaskDefinition): | |
| """ | |
| Goal: Reach Rs.10,000 Monthly Recurring Revenue starting from Rs.0. | |
| Starting cash: Rs.50,000 | Starting debt: 10% | |
| Partial reward: proportional to how close MRR is to the Rs.10,000 target. | |
| """ | |
| TARGET_REVENUE = 10_000.0 | |
| def __init__(self): | |
| self.level = "medium" | |
| def evaluate(self, state, env_done: bool): | |
| # --- Success --- | |
| if state.monthly_revenue >= self.TARGET_REVENUE: | |
| return 0.99, True, ( | |
| f"Task Success: Reached Rs.{state.monthly_revenue:,.0f} MRR. " | |
| f"Product-market fit achieved!" | |
| ) | |
| # --- Bankruptcy --- | |
| if env_done: | |
| progress = min(1.0, state.monthly_revenue / self.TARGET_REVENUE) | |
| reward = max(0.01, round(progress * 0.5, 3)) | |
| return reward, True, ( | |
| f"Task Failed: Bankrupt at Rs.{state.monthly_revenue:,.0f} MRR " | |
| f"({progress * 100:.0f}% of Rs.10,000 target)." | |
| ) | |
| # --- Partial progress signal (every step) --- | |
| # score = current_revenue / target_revenue, capped at 1.0 | |
| progress = min(1.0, state.monthly_revenue / self.TARGET_REVENUE) | |
| partial_reward = max(0.01, round(progress * 0.8, 3)) | |
| return partial_reward, False, ( | |
| f"In progress: Rs.{state.monthly_revenue:,.0f} MRR " | |
| f"({progress * 100:.0f}% of Rs.10,000 target)." | |
| ) | |
| class HardTask(TaskDefinition): | |
| """ | |
| Goal: Survive 12 months AND complete at least 12 features (the 'pivot'). | |
| Starting cash: Rs.30,000 | Devs: 3 | Starting debt: 40% | Seed revenue: Rs.2,000 | |
| Partial reward combines two dimensions: | |
| - Time survival (how many of 12 months completed) | |
| - Feature pivot (how many of 12 features shipped) | |
| Both are weighted equally. | |
| """ | |
| TARGET_MONTHS = 12 | |
| TARGET_FEATURES = 12 # Fixed: was 5 in original, openenv.yaml says 12 | |
| def __init__(self): | |
| self.level = "hard" | |
| def evaluate(self, state, env_done: bool): | |
| month_progress = min(1.0, state.current_month / self.TARGET_MONTHS) | |
| feature_progress = min(1.0, state.features_completed / self.TARGET_FEATURES) | |
| # --- Full success: survived 12 months AND shipped 12 features --- | |
| if state.current_month >= self.TARGET_MONTHS and state.cash > 0: | |
| if state.features_completed >= self.TARGET_FEATURES: | |
| return 0.99, True, ( | |
| f"Task Success: Survived 12 months and shipped " | |
| f"{state.features_completed} pivot features. The startup lives!" | |
| ) | |
| else: | |
| # Survived but didn't pivot enough | |
| partial = max(0.01, round((feature_progress * 0.5), 3)) | |
| return partial, True, ( | |
| f"Task Failed: Survived 12 months but only shipped " | |
| f"{state.features_completed}/{self.TARGET_FEATURES} pivot features. " | |
| f"Partial score: {partial}." | |
| ) | |
| # --- Bankruptcy --- | |
| if env_done: | |
| partial = max(0.01, round((month_progress + feature_progress) * 0.25, 3)) | |
| return partial, True, ( | |
| f"Task Failed: Bankrupt at month {state.current_month}/12 " | |
| f"with {state.features_completed}/{self.TARGET_FEATURES} features shipped. " | |
| f"Partial score: {partial}." | |
| ) | |
| # --- Partial progress signal (every step) --- | |
| partial_reward = max(0.01, round((month_progress + feature_progress) * 0.4, 3)) | |
| return partial_reward, False, ( | |
| f"In progress: Month {state.current_month}/{self.TARGET_MONTHS} | " | |
| f"Features {state.features_completed}/{self.TARGET_FEATURES} | " | |
| f"Cash Rs.{state.cash:,.0f}" | |
| ) | |