| import copy |
| import math |
| from typing import List, Dict, Any |
|
|
| from models.models import Task, Observation, Action |
| from tasks.tasks import EASY_TASK, MEDIUM_TASK, HARD_TASK, TaskConfig |
|
|
|
|
| class MetaLearningPriorityPanicEnv: |
| def __init__(self, task_id: str = "easy", **kwargs): |
| self.task_id = task_id |
| self.tasks: List[Task] = [] |
| self.energy: int = 10 |
| self.step_count: int = 0 |
| self.social_debt: float = 0.0 |
| self.streak: int = 0 |
| self.last_action_result: str = "Environment initialized" |
| self.last_action_str: str = "" |
| self.max_steps = 10 |
| self.config: TaskConfig = None |
|
|
| |
| |
| |
| def reset(self, task_id: str = None, **kwargs) -> Dict[str, Any]: |
| if task_id: |
| self.task_id = task_id |
|
|
| if self.task_id == "hard": |
| self.config = HARD_TASK |
| elif self.task_id == "medium": |
| self.config = MEDIUM_TASK |
| else: |
| self.config = EASY_TASK |
|
|
| self.energy = self.config["initial_energy"] |
| self.max_steps = self.config["max_steps"] |
| self.step_count = 0 |
| self.social_debt = 0.0 |
| self.streak = 0 |
| self.last_action_result = "Environment initialized" |
| self.last_action_str = "" |
|
|
| self.tasks = copy.deepcopy(self.config["initial_tasks"]) |
|
|
| return { |
| "observation": self._get_observation().model_dump(), |
| "reward": 0.01, |
| "done": False, |
| "info": {} |
| } |
|
|
| |
| |
| |
| def _get_observation(self) -> Observation: |
| return Observation( |
| tasks=copy.deepcopy(self.tasks), |
| energy=self.energy, |
| step_count=self.step_count, |
| social_debt=self.social_debt, |
| streak=self.streak, |
| last_action_result=self.last_action_result |
| ) |
|
|
| |
| |
| |
| def step(self, action: Action) -> Dict[str, Any]: |
| raw_score = 0.0 |
| worked = False |
| self.last_action_result = "Action processed." |
|
|
| |
| action_str = f"{action.action_type}:{','.join(map(str, sorted(action.task_ids)))}" |
| if action_str == self.last_action_str and self.step_count > 0: |
| raw_score -= 0.2 |
| self.last_action_result = "Penalty: Repeated action." |
| self.last_action_str = action_str |
|
|
| |
| if action.action_type not in ["complete_task", "skip", "noop"]: |
| raw_score -= 0.2 |
| self.last_action_result = "Penalty: Invalid action." |
|
|
| |
| |
| |
| elif action.action_type == "complete_task": |
| for t_id in action.task_ids[:2]: |
| task = next((t for t in self.tasks if t.id == t_id), None) |
|
|
| if not task: |
| raw_score -= 0.2 |
| continue |
|
|
| if task.completed: |
| raw_score -= 0.2 |
| continue |
|
|
| if self.energy >= task.energy_cost: |
| self.energy -= task.energy_cost |
| task.completed = True |
| worked = True |
|
|
| base = 0.3 |
| bonus = ( |
| 0.4 if task.priority == "high" |
| else 0.2 if task.priority == "medium" |
| else 0.1 |
| ) |
|
|
| |
| raw_score += (base + bonus) * 0.8 |
|
|
| self.last_action_result = f"Task {t_id} completed." |
|
|
| else: |
| raw_score -= 0.05 |
|
|
| |
| if worked and len(action.task_ids) > 1: |
| raw_score += 0.05 |
|
|
| elif action.action_type == "skip": |
| self.last_action_result = "Skipped." |
|
|
| elif action.action_type == "noop": |
| raw_score -= 0.1 |
| self.last_action_result = "No-op." |
|
|
| |
| |
| |
| missed = sum( |
| 1 for t in self.tasks |
| if not t.completed and self.step_count >= t.deadline |
| ) |
| raw_score -= min(0.2, 0.1 * missed) |
|
|
| |
| |
| |
| high_tasks = [t for t in self.tasks if t.priority == "high" and not t.completed] |
| high_ids = {t.id for t in high_tasks} |
| completed_ids = set(action.task_ids) if action.action_type == "complete_task" else set() |
|
|
| ignored_high = len(high_tasks) > 0 and len(high_ids & completed_ids) == 0 |
|
|
| social_debt_active = self.config.get("social_debt_active", False) |
|
|
| if ignored_high and social_debt_active: |
| self.social_debt += 1.0 |
| raw_score -= 0.2 |
|
|
| if self.social_debt > 0 and social_debt_active: |
| raw_score -= 0.1 |
|
|
| |
| |
| |
| high_all = [t for t in self.tasks if t.priority == "high"] |
| if high_all and all(t.completed for t in high_all): |
| raw_score += 0.1 |
|
|
| |
| |
| |
| if self.step_count in self.config.get("task_injection_steps", []): |
| if self.step_count == 3: |
| self.tasks.append( |
| Task( |
| id=98, |
| description="Urgent CEO request", |
| priority="high", |
| deadline=self.step_count + 4, |
| energy_cost=3, |
| completed=False |
| ) |
| ) |
| elif self.step_count == 6: |
| self.tasks.append( |
| Task( |
| id=99, |
| description="Resolve production bug", |
| priority="medium", |
| deadline=self.step_count + 4, |
| energy_cost=2, |
| completed=False |
| ) |
| ) |
|
|
| |
| |
| |
| if action.action_type == "complete_task" and worked: |
| self.streak += 1 |
| raw_score += 0.1 * self.streak |
| else: |
| self.streak = 0 |
|
|
| |
| |
| |
| self.step_count += 1 |
|
|
| |
| |
| |
| try: |
| raw_score = float(raw_score) |
|
|
| if math.isnan(raw_score) or math.isinf(raw_score): |
| normalized = 0.01 |
| else: |
| |
| raw_score = max(0.0, min(raw_score, 1.0 - 1e-6)) |
|
|
| |
| if raw_score <= 1e-6: |
| normalized = 0.01 |
| elif raw_score >= 0.999: |
| normalized = 0.99 |
| else: |
| |
| normalized = max(0.01, min(raw_score, 0.99)) |
| normalized = max(0.01, min(normalized, 0.99)) |
|
|
|
|
| except: |
| normalized = 0.01 |
|
|
| done = self.step_count >= self.max_steps |
|
|
| return { |
| "observation": self._get_observation().model_dump(), |
| "reward": float(normalized), |
| "done": done, |
| "info": {} |
| } |