Spaces:

Eshit
/

Wildfire-Containment-Simulator

Sleeping

App Files Files Community

Eshit commited on 29 days ago

Commit

3e8e5dd

1 Parent(s): 5d6ff6f

Add GRPO training results: 150 steps, promoted easy→medium→hard

Browse files

Files changed (3) hide show

training/grpo_eval_results.json +90 -0
training/grpo_v2_colab.ipynb +61 -52
training/training_stats.json +152 -0

training/grpo_eval_results.json ADDED Viewed

	@@ -0,0 +1,90 @@

+{
+  "trained": {
+    "easy": {
+      "mean": 5.126686666666666,
+      "std": 3.900705914893981,
+      "pop_saved_pct": 86.66666666666667,
+      "json_success_rate": 98.52941176470588
+    },
+    "medium": {
+      "mean": 5.7353000000000005,
+      "std": 3.070706015886249,
+      "pop_saved_pct": 97.14285714285715,
+      "json_success_rate": 99.77900552486187
+    },
+    "hard": {
+      "mean": 2.144273333333333,
+      "std": 2.867108504973066,
+      "pop_saved_pct": 92.48062015503875,
+      "json_success_rate": 99.17695473251028
+    }
+  },
+  "baselines": {
+    "random": {
+      "easy": {
+        "scores": [7.7749, 7.7751, 7.775, 7.775, 0.04],
+        "mean": 6.228,
+        "std": 3.094,
+        "mean_containment_pct": 1.0,
+        "mean_pop_saved_pct": 0.92,
+        "mean_steps": 25.8,
+        "crew_casualty_rate": 0.0,
+        "mean_time_s": 0.067
+      },
+      "medium": {
+        "scores": [-1.7044, -1.0029, 1.0762, 0.7527, 7.4403],
+        "mean": 1.3124,
+        "std": 3.2367,
+        "mean_containment_pct": 1.0,
+        "mean_pop_saved_pct": 0.7365,
+        "mean_steps": 72.0,
+        "crew_casualty_rate": 0.0,
+        "mean_time_s": 0.676
+      },
+      "hard": {
+        "scores": [7.8668, 1.3602, -0.7466, 1.0443, 1.2813],
+        "mean": 2.1612,
+        "std": 2.9554,
+        "mean_containment_pct": 1.0,
+        "mean_pop_saved_pct": 0.9023,
+        "mean_steps": 84.6,
+        "crew_casualty_rate": 0.0,
+        "mean_time_s": 1.301
+      }
+    },
+    "heuristic": {
+      "easy": {
+        "scores": [7.6749, 7.575, 7.475, 7.475, 7.4749],
+        "mean": 7.535,
+        "std": 0.08,
+        "mean_containment_pct": 1.0,
+        "mean_pop_saved_pct": 1.0,
+        "mean_steps": 26.6,
+        "crew_casualty_rate": 0.0,
+        "mean_time_s": 0.118
+      },
+      "medium": {
+        "scores": [7.6001, 7.7001, 7.8, 7.7, 0.7683],
+        "mean": 6.3137,
+        "std": 2.7734,
+        "mean_containment_pct": 1.0,
+        "mean_pop_saved_pct": 0.9746,
+        "mean_steps": 46.2,
+        "crew_casualty_rate": 0.0,
+        "mean_time_s": 0.48
+      },
+      "hard": {
+        "scores": [7.8668, 7.867, 0.9443, 7.6667, -0.6696],
+        "mean": 4.735,
+        "std": 3.7892,
+        "mean_containment_pct": 1.0,
+        "mean_pop_saved_pct": 0.9279,
+        "mean_steps": 83.2,
+        "crew_casualty_rate": 0.0,
+        "mean_time_s": 1.487
+      }
+    }
+  },
+  "eval_seeds": [42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56],
+  "model": "Eshit/wildfire-grpo-7b"
+}

training/grpo_v2_colab.ipynb CHANGED Viewed

@@ -15,7 +15,14 @@
         "4. GRPO loop too slow - consequence of fix 3\n",
         "5. parse_action(text, None) crash - standalone check_json_format() for format reward\n",
         "\n",
-        "**Hardware:** A100 40GB on Colab"
       ]
     },
     {
@@ -27,26 +34,27 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
       "metadata": {},
-      "outputs": [],
       "source": [
         "!pip install \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\"\n",
-        "!pip install trl==0.15.2 datasets==3.4.1 wandb"
-      ]
     },
     {
       "cell_type": "code",
-      "execution_count": null,
       "metadata": {},
-      "outputs": [],
       "source": [
         "import torch\n",
         "assert torch.cuda.is_available(), \"GPU not available - switch to a GPU runtime\"\n",
         "gpu_name = torch.cuda.get_device_name(0)\n",
-        "gpu_mem = torch.cuda.get_device_properties(0).total_mem / 1e9\n",
         "print(f\"GPU: {gpu_name}  |  VRAM: {gpu_mem:.1f} GB\")"
-      ]
     },
     {
       "cell_type": "markdown",
@@ -57,9 +65,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
       "metadata": {},
-      "outputs": [],
       "source": [
         "from unsloth import FastLanguageModel\n",
         "\n",
@@ -80,7 +86,9 @@
         "\n",
         "print(f\"Loaded SFT checkpoint: {SFT_MODEL}\")\n",
         "model.print_trainable_parameters()"
-      ]
     },
     {
       "cell_type": "markdown",
@@ -91,9 +99,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
       "metadata": {},
-      "outputs": [],
       "source": [
         "import os, random, json, sys\n",
         "import torch\n",
@@ -132,7 +138,9 @@
         "print(f\"Start tier: {controller.get_tier()}\")\n",
         "print(f\"Seed pool: {len(SEED_POOL)} seeds\")\n",
         "print(\"Env imports OK\")"
-      ]
     },
     {
       "cell_type": "markdown",
@@ -145,9 +153,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
       "metadata": {},
-      "outputs": [],
       "source": [
         "import json as _json\n",
         "import re as _re\n",
@@ -194,7 +200,9 @@
         "assert check_json_format('{\"action_type\": \"bogus\"}') == 'regex_fallback'\n",
         "assert check_json_format('no json here') == 'safe_idle'\n",
         "print('check_json_format OK')"
-      ]
     },
     {
       "cell_type": "markdown",
@@ -209,9 +217,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
       "metadata": {},
-      "outputs": [],
       "source": [
         "def reward_fn_outcome(completions, prompts, tier=None, seed=None, **kwargs):\n",
         "    \"\"\"\n",
@@ -290,7 +296,9 @@
         "\n",
         "\n",
         "print('Reward functions defined.')"
-      ]
     },
     {
       "cell_type": "markdown",
@@ -304,9 +312,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
       "metadata": {},
-      "outputs": [],
       "source": [
         "def build_prompt_dataset(n=200):\n",
         "    \"\"\"\n",
@@ -338,7 +344,9 @@
         "print(f\"Tier: {_test_ds[0]['tier']}, Seed: {_test_ds[0]['seed']}\")\n",
         "print(f\"Prompt roles: {[m['role'] for m in _test_ds[0]['prompt']]}\")\n",
         "del _test_ds"
-      ]
     },
     {
       "cell_type": "markdown",
@@ -351,9 +359,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
       "metadata": {},
-      "outputs": [],
       "source": [
         "from transformers import TrainerCallback\n",
         "\n",
@@ -373,7 +379,9 @@
         "\n",
         "\n",
         "print('CurriculumDatasetCallback defined.')"
-      ]
     },
     {
       "cell_type": "markdown",
@@ -384,9 +392,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
       "metadata": {},
-      "outputs": [],
       "source": [
         "from trl import GRPOTrainer, GRPOConfig\n",
         "\n",
@@ -418,7 +424,9 @@
         "trainer.add_callback(CurriculumDatasetCallback(trainer))\n",
         "\n",
         "print('GRPOTrainer ready.')"
-      ]
     },
     {
       "cell_type": "markdown",
@@ -429,9 +437,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
       "metadata": {},
-      "outputs": [],
       "source": [
         "import wandb\n",
         "wandb.init(project='wildfire-grpo', name='qwen7b-v2')\n",
@@ -448,7 +454,9 @@
         "with open('./training_stats.json', 'w') as f:\n",
         "    json.dump(stats, f, indent=2)\n",
         "print('Stats saved -> training_stats.json')"
-      ]
     },
     {
       "cell_type": "markdown",
@@ -461,9 +469,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
       "metadata": {},
-      "outputs": [],
       "source": [
         "class LLMAgent:\n",
         "    \"\"\"Wraps the trained model for evaluation. Must be re-instantiated per episode.\"\"\"\n",
@@ -510,13 +516,13 @@
         "\n",
         "\n",
         "print('LLMAgent class defined.')"
-      ]
     },
     {
       "cell_type": "code",
-      "execution_count": null,
       "metadata": {},
-      "outputs": [],
       "source": [
         "import numpy as np\n",
         "\n",
@@ -613,7 +619,9 @@
         "print('\\nPASS: At least one tier within 1.0 of heuristic baseline.')\n",
         "\n",
         "FastLanguageModel.for_training(model)"
-      ]
     },
     {
       "cell_type": "markdown",
@@ -624,38 +632,39 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
       "metadata": {},
-      "outputs": [],
       "source": [
         "model.save_pretrained('./grpo_final')\n",
         "tokenizer.save_pretrained('./grpo_final')\n",
         "print('Saved to ./grpo_final')"
-      ]
     },
     {
       "cell_type": "code",
-      "execution_count": null,
       "metadata": {},
-      "outputs": [],
       "source": [
         "HF_USERNAME = 'Eshit'  # <-- CHANGE THIS\n",
         "model.push_to_hub(f'{HF_USERNAME}/wildfire-grpo-7b')\n",
         "tokenizer.push_to_hub(f'{HF_USERNAME}/wildfire-grpo-7b')\n",
         "print(f'Pushed to hub: {HF_USERNAME}/wildfire-grpo-7b')"
-      ]
     },
     {
       "cell_type": "code",
-      "execution_count": null,
       "metadata": {},
-      "outputs": [],
       "source": [
         "!zip -r grpo_final.zip ./grpo_final\n",
-        "from google.colab import files\n",
-        "files.download('grpo_final.zip')\n",
-        "print('Download started.')"
-      ]
     }
   ],
   "metadata": {
@@ -675,4 +684,4 @@
   },
   "nbformat": 4,
   "nbformat_minor": 5
-}

         "4. GRPO loop too slow - consequence of fix 3\n",
         "5. parse_action(text, None) crash - standalone check_json_format() for format reward\n",
         "\n",
+        "**Hardware:** A10G Large 24GB (HuggingFace Space JupyterLab)\n",
+        "\n",
+        "**Before running:** In a terminal, authenticate:\n",
+        "```\n",
+        "huggingface-cli login   # HF token with write access (to load SFT model + push result)\n",
+        "wandb login             # wandb API key (Section 9 logs to wandb)\n",
+        "```\n",
+        "Also ensure the repo is cloned and this notebook is opened from inside the repo root (so `REPO_ROOT = \".\"` resolves correctly)."
       ]
     },
     {
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "!pip install \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\"\n",
+        "!pip install trl==0.15.2 datasets==3.4.1 wandb\n",
+        "!pip install torchvision --extra-index-url https://download.pytorch.org/whl/cu121"
+      ],
+      "execution_count": null,
+      "outputs": []
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "import torch\n",
         "assert torch.cuda.is_available(), \"GPU not available - switch to a GPU runtime\"\n",
         "gpu_name = torch.cuda.get_device_name(0)\n",
+        "gpu_mem = torch.cuda.get_device_properties(0).total_memory / 1e9\n",
         "print(f\"GPU: {gpu_name}  |  VRAM: {gpu_mem:.1f} GB\")"
+      ],
+      "execution_count": null,
+      "outputs": []
     },
     {
       "cell_type": "markdown",
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "from unsloth import FastLanguageModel\n",
         "\n",
         "\n",
         "print(f\"Loaded SFT checkpoint: {SFT_MODEL}\")\n",
         "model.print_trainable_parameters()"
+      ],
+      "execution_count": null,
+      "outputs": []
     },
     {
       "cell_type": "markdown",
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "import os, random, json, sys\n",
         "import torch\n",
         "print(f\"Start tier: {controller.get_tier()}\")\n",
         "print(f\"Seed pool: {len(SEED_POOL)} seeds\")\n",
         "print(\"Env imports OK\")"
+      ],
+      "execution_count": null,
+      "outputs": []
     },
     {
       "cell_type": "markdown",
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "import json as _json\n",
         "import re as _re\n",
         "assert check_json_format('{\"action_type\": \"bogus\"}') == 'regex_fallback'\n",
         "assert check_json_format('no json here') == 'safe_idle'\n",
         "print('check_json_format OK')"
+      ],
+      "execution_count": null,
+      "outputs": []
     },
     {
       "cell_type": "markdown",
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "def reward_fn_outcome(completions, prompts, tier=None, seed=None, **kwargs):\n",
         "    \"\"\"\n",
         "\n",
         "\n",
         "print('Reward functions defined.')"
+      ],
+      "execution_count": null,
+      "outputs": []
     },
     {
       "cell_type": "markdown",
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "def build_prompt_dataset(n=200):\n",
         "    \"\"\"\n",
         "print(f\"Tier: {_test_ds[0]['tier']}, Seed: {_test_ds[0]['seed']}\")\n",
         "print(f\"Prompt roles: {[m['role'] for m in _test_ds[0]['prompt']]}\")\n",
         "del _test_ds"
+      ],
+      "execution_count": null,
+      "outputs": []
     },
     {
       "cell_type": "markdown",
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "from transformers import TrainerCallback\n",
         "\n",
         "\n",
         "\n",
         "print('CurriculumDatasetCallback defined.')"
+      ],
+      "execution_count": null,
+      "outputs": []
     },
     {
       "cell_type": "markdown",
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "from trl import GRPOTrainer, GRPOConfig\n",
         "\n",
         "trainer.add_callback(CurriculumDatasetCallback(trainer))\n",
         "\n",
         "print('GRPOTrainer ready.')"
+      ],
+      "execution_count": null,
+      "outputs": []
     },
     {
       "cell_type": "markdown",
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "import wandb\n",
         "wandb.init(project='wildfire-grpo', name='qwen7b-v2')\n",
         "with open('./training_stats.json', 'w') as f:\n",
         "    json.dump(stats, f, indent=2)\n",
         "print('Stats saved -> training_stats.json')"
+      ],
+      "execution_count": null,
+      "outputs": []
     },
     {
       "cell_type": "markdown",
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "class LLMAgent:\n",
         "    \"\"\"Wraps the trained model for evaluation. Must be re-instantiated per episode.\"\"\"\n",
         "\n",
         "\n",
         "print('LLMAgent class defined.')"
+      ],
+      "execution_count": null,
+      "outputs": []
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "import numpy as np\n",
         "\n",
         "print('\\nPASS: At least one tier within 1.0 of heuristic baseline.')\n",
         "\n",
         "FastLanguageModel.for_training(model)"
+      ],
+      "execution_count": null,
+      "outputs": []
     },
     {
       "cell_type": "markdown",
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "model.save_pretrained('./grpo_final')\n",
         "tokenizer.save_pretrained('./grpo_final')\n",
         "print('Saved to ./grpo_final')"
+      ],
+      "execution_count": null,
+      "outputs": []
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "HF_USERNAME = 'Eshit'  # <-- CHANGE THIS\n",
         "model.push_to_hub(f'{HF_USERNAME}/wildfire-grpo-7b')\n",
         "tokenizer.push_to_hub(f'{HF_USERNAME}/wildfire-grpo-7b')\n",
         "print(f'Pushed to hub: {HF_USERNAME}/wildfire-grpo-7b')"
+      ],
+      "execution_count": null,
+      "outputs": []
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "!zip -r grpo_final.zip ./grpo_final\n",
+        "print('Zipped to grpo_final.zip')\n",
+        "# On HF JupyterLab: right-click grpo_final.zip in the file browser and choose Download.\n",
+        "# On Google Colab only (not needed here):\n",
+        "# from google.colab import files; files.download('grpo_final.zip')"
+      ],
+      "execution_count": null,
+      "outputs": []
     }
   ],
   "metadata": {
   },
   "nbformat": 4,
   "nbformat_minor": 5
+}

training/training_stats.json ADDED Viewed

	@@ -0,0 +1,152 @@

+[
+  {"step": 0, "tier": "easy", "mean_reward": 4.22450625},
+  {"step": 1, "tier": "easy", "mean_reward": 7.235850000000001},
+  {"step": 2, "tier": "easy", "mean_reward": 5.956550000000004},
+  {"step": 3, "tier": "easy", "mean_reward": 3.8100750000000003},
+  {"step": 4, "tier": "easy", "mean_reward": 5.760793749999998},
+  {"step": 5, "tier": "easy", "mean_reward": 7.463293749999999},
+  {"step": 6, "tier": "easy", "mean_reward": 7.546843750000001},
+  {"step": 7, "tier": "easy", "mean_reward": 5.279537499999998},
+  {"step": 8, "tier": "easy", "mean_reward": 5.774396875000001},
+  {"step": 9, "tier": "easy", "mean_reward": 5.672221875000001},
+  {"step": 10, "tier": "easy", "mean_reward": 7.486034375000001},
+  {"step": 11, "tier": "easy", "mean_reward": 3.8812187500000004},
+  {"step": 12, "tier": "easy", "mean_reward": 6.099375},
+  {"step": 13, "tier": "easy", "mean_reward": 4.054215625},
+  {"step": 14, "tier": "easy", "mean_reward": 2.3378656249999996},
+  {"step": 15, "tier": "easy", "mean_reward": 7.213131249999999},
+  {"step": 16, "tier": "easy", "mean_reward": 7.514025000000003},
+  {"step": 17, "tier": "easy", "mean_reward": 5.553949999999997},
+  {"step": 18, "tier": "easy", "mean_reward": 5.864062499999999},
+  {"step": 19, "tier": "easy", "mean_reward": 7.496884374999997},
+  {"step": 20, "tier": "easy", "mean_reward": 5.562199999999999},
+  {"step": 21, "tier": "easy", "mean_reward": 5.7229656250000005},
+  {"step": 22, "tier": "easy", "mean_reward": 7.3982468750000026},
+  {"step": 23, "tier": "easy", "mean_reward": 4.385203124999996},
+  {"step": 24, "tier": "easy", "mean_reward": 7.101512500000001},
+  {"step": 25, "tier": "easy", "mean_reward": 7.246253125},
+  {"step": 26, "tier": "easy", "mean_reward": 5.618318749999997},
+  {"step": 27, "tier": "easy", "mean_reward": 3.7970281249999998},
+  {"step": 28, "tier": "easy", "mean_reward": 5.964250000000002},
+  {"step": 29, "tier": "easy", "mean_reward": 7.492940624999996},
+  {"step": 30, "tier": "easy", "mean_reward": 6.027812499999999},
+  {"step": 31, "tier": "easy", "mean_reward": 5.941168749999999},
+  {"step": 32, "tier": "easy", "mean_reward": 6.864665624999995},
+  {"step": 33, "tier": "easy", "mean_reward": 5.611512500000002},
+  {"step": 34, "tier": "easy", "mean_reward": 5.644321875},
+  {"step": 35, "tier": "easy", "mean_reward": 6.196540625},
+  {"step": 36, "tier": "easy", "mean_reward": 7.3195125},
+  {"step": 37, "tier": "easy", "mean_reward": 6.589524999999998},
+  {"step": 38, "tier": "easy", "mean_reward": 6.493584374999999},
+  {"step": 39, "tier": "easy", "mean_reward": 4.5787531249999995},
+  {"step": 40, "tier": "easy", "mean_reward": 7.1647374999999975},
+  {"step": 41, "tier": "easy", "mean_reward": 6.307021875},
+  {"step": 42, "tier": "easy", "mean_reward": 5.6441625},
+  {"step": 43, "tier": "easy", "mean_reward": 6.051987499999996},
+  {"step": 44, "tier": "easy", "mean_reward": 6.970406250000004},
+  {"step": 45, "tier": "easy", "mean_reward": 7.375721874999999},
+  {"step": 46, "tier": "easy", "mean_reward": 6.082374999999997},
+  {"step": 47, "tier": "easy", "mean_reward": 6.735612500000002},
+  {"step": 48, "tier": "easy", "mean_reward": 6.820753125000001},
+  {"step": 49, "tier": "easy", "mean_reward": 5.743384375000001},
+  {"step": 50, "tier": "easy", "mean_reward": 6.935793750000004},
+  {"step": 51, "tier": "easy", "mean_reward": 6.389853125},
+  {"step": 52, "tier": "easy", "mean_reward": 6.366893750000002},
+  {"step": 53, "tier": "medium", "mean_reward": 6.685290624999997},
+  {"step": 54, "tier": "medium", "mean_reward": 5.949612500000001},
+  {"step": 55, "tier": "medium", "mean_reward": 2.770065624999999},
+  {"step": 56, "tier": "medium", "mean_reward": 7.203259374999998},
+  {"step": 57, "tier": "medium", "mean_reward": 4.506112500000001},
+  {"step": 58, "tier": "medium", "mean_reward": 7.0263187500000015},
+  {"step": 59, "tier": "medium", "mean_reward": 5.168934375000002},
+  {"step": 60, "tier": "medium", "mean_reward": 7.033081250000002},
+  {"step": 61, "tier": "medium", "mean_reward": 6.253359374999997},
+  {"step": 62, "tier": "medium", "mean_reward": 6.959756249999999},
+  {"step": 63, "tier": "hard", "mean_reward": 6.969309374999998},
+  {"step": 64, "tier": "hard", "mean_reward": 5.3616906250000005},
+  {"step": 65, "tier": "hard", "mean_reward": 6.252678124999999},
+  {"step": 66, "tier": "hard", "mean_reward": 2.5560937500000005},
+  {"step": 67, "tier": "hard", "mean_reward": 5.578853125},
+  {"step": 68, "tier": "hard", "mean_reward": 7.466365625000002},
+  {"step": 69, "tier": "hard", "mean_reward": 7.713275000000002},
+  {"step": 70, "tier": "hard", "mean_reward": 7.621018749999998},
+  {"step": 71, "tier": "hard", "mean_reward": 6.264199999999996},
+  {"step": 72, "tier": "hard", "mean_reward": 4.712021874999998},
+  {"step": 73, "tier": "hard", "mean_reward": 3.8931437500000015},
+  {"step": 74, "tier": "hard", "mean_reward": 7.114093750000004},
+  {"step": 75, "tier": "hard", "mean_reward": 6.6951906249999995},
+  {"step": 76, "tier": "hard", "mean_reward": 2.933387499999999},
+  {"step": 77, "tier": "hard", "mean_reward": 6.704121874999999},
+  {"step": 78, "tier": "hard", "mean_reward": 5.275803125},
+  {"step": 79, "tier": "hard", "mean_reward": 5.645184375000001},
+  {"step": 80, "tier": "hard", "mean_reward": 7.5555062500000005},
+  {"step": 81, "tier": "hard", "mean_reward": 5.178903125000001},
+  {"step": 82, "tier": "hard", "mean_reward": 5.782215625},
+  {"step": 83, "tier": "hard", "mean_reward": 7.4922562500000005},
+  {"step": 84, "tier": "hard", "mean_reward": 5.397803125000002},
+  {"step": 85, "tier": "hard", "mean_reward": 5.785240625},
+  {"step": 86, "tier": "hard", "mean_reward": 6.006559375000001},
+  {"step": 87, "tier": "hard", "mean_reward": 5.064365625000001},
+  {"step": 88, "tier": "hard", "mean_reward": 6.120146874999998},
+  {"step": 89, "tier": "hard", "mean_reward": 7.3549874999999965},
+  {"step": 90, "tier": "hard", "mean_reward": 5.017793749999999},
+  {"step": 91, "tier": "hard", "mean_reward": 7.611765625000001},
+  {"step": 92, "tier": "hard", "mean_reward": 7.58835},
+  {"step": 93, "tier": "hard", "mean_reward": 4.282640625000003},
+  {"step": 94, "tier": "hard", "mean_reward": 7.624143749999999},
+  {"step": 95, "tier": "hard", "mean_reward": 7.467125},
+  {"step": 96, "tier": "hard", "mean_reward": 7.492253125000001},
+  {"step": 97, "tier": "hard", "mean_reward": 3.8446718750000026},
+  {"step": 98, "tier": "hard", "mean_reward": 6.381118750000002},
+  {"step": 99, "tier": "hard", "mean_reward": 5.9315812500000025},
+  {"step": 100, "tier": "hard", "mean_reward": 5.303253125000001},
+  {"step": 101, "tier": "hard", "mean_reward": 5.379359374999997},
+  {"step": 102, "tier": "hard", "mean_reward": 6.105550000000001},
+  {"step": 103, "tier": "hard", "mean_reward": 4.132209375000002},
+  {"step": 104, "tier": "hard", "mean_reward": 5.99065},
+  {"step": 105, "tier": "hard", "mean_reward": 6.396168749999998},
+  {"step": 106, "tier": "hard", "mean_reward": 6.190524999999998},
+  {"step": 107, "tier": "hard", "mean_reward": 7.378921874999999},
+  {"step": 108, "tier": "hard", "mean_reward": 5.527831249999997},
+  {"step": 109, "tier": "hard", "mean_reward": 5.664981250000001},
+  {"step": 110, "tier": "hard", "mean_reward": 6.596590625000001},
+  {"step": 111, "tier": "hard", "mean_reward": 5.718784375000003},
+  {"step": 112, "tier": "hard", "mean_reward": 5.454768749999999},
+  {"step": 113, "tier": "hard", "mean_reward": 5.661271875},
+  {"step": 114, "tier": "hard", "mean_reward": 4.344675},
+  {"step": 115, "tier": "hard", "mean_reward": 4.810181250000001},
+  {"step": 116, "tier": "hard", "mean_reward": 5.746131249999998},
+  {"step": 117, "tier": "hard", "mean_reward": 5.718934375},
+  {"step": 118, "tier": "hard", "mean_reward": 7.343309375},
+  {"step": 119, "tier": "hard", "mean_reward": 5.728325},
+  {"step": 120, "tier": "hard", "mean_reward": 4.915784375},
+  {"step": 121, "tier": "hard", "mean_reward": 5.746521875},
+  {"step": 122, "tier": "hard", "mean_reward": 6.815368750000003},
+  {"step": 123, "tier": "hard", "mean_reward": 6.415571874999999},
+  {"step": 124, "tier": "hard", "mean_reward": 6.616740625000003},
+  {"step": 125, "tier": "hard", "mean_reward": 7.136087499999999},
+  {"step": 126, "tier": "hard", "mean_reward": 6.3915187499999995},
+  {"step": 127, "tier": "hard", "mean_reward": 6.998762500000002},
+  {"step": 128, "tier": "hard", "mean_reward": 6.718474999999998},
+  {"step": 129, "tier": "hard", "mean_reward": 6.675468750000001},
+  {"step": 130, "tier": "hard", "mean_reward": 6.832443750000001},
+  {"step": 131, "tier": "hard", "mean_reward": 7.4953281249999995},
+  {"step": 132, "tier": "hard", "mean_reward": 6.984856249999997},
+  {"step": 133, "tier": "hard", "mean_reward": 4.969693749999999},
+  {"step": 134, "tier": "hard", "mean_reward": 6.62208125},
+  {"step": 135, "tier": "hard", "mean_reward": 5.769275000000002},
+  {"step": 136, "tier": "hard", "mean_reward": 5.799609374999999},
+  {"step": 137, "tier": "hard", "mean_reward": 5.565890624999998},
+  {"step": 138, "tier": "hard", "mean_reward": 3.290540625},
+  {"step": 139, "tier": "hard", "mean_reward": 7.368412500000004},
+  {"step": 140, "tier": "hard", "mean_reward": 7.106300000000002},
+  {"step": 141, "tier": "hard", "mean_reward": 5.6757718750000015},
+  {"step": 142, "tier": "hard", "mean_reward": 5.496281250000001},
+  {"step": 143, "tier": "hard", "mean_reward": 5.8853125},
+  {"step": 144, "tier": "hard", "mean_reward": 7.661725},
+  {"step": 145, "tier": "hard", "mean_reward": 5.6637625},
+  {"step": 146, "tier": "hard", "mean_reward": 6.095750000000003},
+  {"step": 147, "tier": "hard", "mean_reward": 7.636731250000005},
+  {"step": 148, "tier": "hard", "mean_reward": 6.188656249999999},
+  {"step": 149, "tier": "hard", "mean_reward": 6.59115}
+]