Upload unit3.ipynb
Browse files- unit3.ipynb +823 -0
unit3.ipynb
ADDED
|
@@ -0,0 +1,823 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {
|
| 6 |
+
"id": "k7xBVPzoXxOg"
|
| 7 |
+
},
|
| 8 |
+
"source": [
|
| 9 |
+
"# Unit 3: Deep Q-Learning with Atari Games 👾 using RL Baselines3 Zoo\n",
|
| 10 |
+
"\n",
|
| 11 |
+
"<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit4/thumbnail.jpg\" alt=\"Unit 3 Thumbnail\">\n",
|
| 12 |
+
"\n",
|
| 13 |
+
"In this notebook, **you'll train a Deep Q-Learning agent** playing Space Invaders using [RL Baselines3 Zoo](https://github.com/DLR-RM/rl-baselines3-zoo), a training framework based on [Stable-Baselines3](https://stable-baselines3.readthedocs.io/en/master/) that provides scripts for training, evaluating agents, tuning hyperparameters, plotting results and recording videos.\n",
|
| 14 |
+
"\n",
|
| 15 |
+
"We're using the [RL-Baselines-3 Zoo integration, a vanilla version of Deep Q-Learning](https://stable-baselines3.readthedocs.io/en/master/modules/dqn.html) with no extensions such as Double-DQN, Dueling-DQN, and Prioritized Experience Replay.\n",
|
| 16 |
+
"\n",
|
| 17 |
+
"⬇️ Here is an example of what **you will achieve** ⬇️"
|
| 18 |
+
]
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"cell_type": "code",
|
| 22 |
+
"execution_count": null,
|
| 23 |
+
"metadata": {
|
| 24 |
+
"id": "J9S713biXntc"
|
| 25 |
+
},
|
| 26 |
+
"outputs": [],
|
| 27 |
+
"source": [
|
| 28 |
+
"%%html\n",
|
| 29 |
+
"<video controls autoplay><source src=\"https://huggingface.co/ThomasSimonini/ppo-SpaceInvadersNoFrameskip-v4/resolve/main/replay.mp4\" type=\"video/mp4\"></video>"
|
| 30 |
+
]
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"cell_type": "markdown",
|
| 34 |
+
"source": [
|
| 35 |
+
"### 🎮 Environments:\n",
|
| 36 |
+
"\n",
|
| 37 |
+
"- [SpacesInvadersNoFrameskip-v4](https://gymnasium.farama.org/environments/atari/space_invaders/)\n",
|
| 38 |
+
"\n",
|
| 39 |
+
"You can see the difference between Space Invaders versions here 👉 https://gymnasium.farama.org/environments/atari/space_invaders/#variants\n",
|
| 40 |
+
"\n",
|
| 41 |
+
"### 📚 RL-Library:\n",
|
| 42 |
+
"\n",
|
| 43 |
+
"- [RL-Baselines3-Zoo](https://github.com/DLR-RM/rl-baselines3-zoo)"
|
| 44 |
+
],
|
| 45 |
+
"metadata": {
|
| 46 |
+
"id": "ykJiGevCMVc5"
|
| 47 |
+
}
|
| 48 |
+
},
|
| 49 |
+
{
|
| 50 |
+
"cell_type": "markdown",
|
| 51 |
+
"metadata": {
|
| 52 |
+
"id": "wciHGjrFYz9m"
|
| 53 |
+
},
|
| 54 |
+
"source": [
|
| 55 |
+
"## Objectives of this notebook 🏆\n",
|
| 56 |
+
"At the end of the notebook, you will:\n",
|
| 57 |
+
"- Be able to understand deeper **how RL Baselines3 Zoo works**.\n",
|
| 58 |
+
"- Be able to **push your trained agent and the code to the Hub** with a nice video replay and an evaluation score 🔥.\n",
|
| 59 |
+
"\n",
|
| 60 |
+
"\n"
|
| 61 |
+
]
|
| 62 |
+
},
|
| 63 |
+
{
|
| 64 |
+
"cell_type": "markdown",
|
| 65 |
+
"source": [
|
| 66 |
+
"## This notebook is from Deep Reinforcement Learning Course\n",
|
| 67 |
+
"<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/deep-rl-course-illustration.jpg\" alt=\"Deep RL Course illustration\"/>"
|
| 68 |
+
],
|
| 69 |
+
"metadata": {
|
| 70 |
+
"id": "TsnP0rjxMn1e"
|
| 71 |
+
}
|
| 72 |
+
},
|
| 73 |
+
{
|
| 74 |
+
"cell_type": "markdown",
|
| 75 |
+
"metadata": {
|
| 76 |
+
"id": "nw6fJHIAZd-J"
|
| 77 |
+
},
|
| 78 |
+
"source": [
|
| 79 |
+
"In this free course, you will:\n",
|
| 80 |
+
"\n",
|
| 81 |
+
"- 📖 Study Deep Reinforcement Learning in **theory and practice**.\n",
|
| 82 |
+
"- 🧑💻 Learn to **use famous Deep RL libraries** such as Stable Baselines3, RL Baselines3 Zoo, CleanRL and Sample Factory 2.0.\n",
|
| 83 |
+
"- 🤖 Train **agents in unique environments**\n",
|
| 84 |
+
"\n",
|
| 85 |
+
"And more check 📚 the syllabus 👉 https://simoninithomas.github.io/deep-rl-course\n",
|
| 86 |
+
"\n",
|
| 87 |
+
"Don’t forget to **<a href=\"http://eepurl.com/ic5ZUD\">sign up to the course</a>** (we are collecting your email to be able to **send you the links when each Unit is published and give you information about the challenges and updates).**\n",
|
| 88 |
+
"\n",
|
| 89 |
+
"\n",
|
| 90 |
+
"The best way to keep in touch is to join our discord server to exchange with the community and with us 👉🏻 https://discord.gg/ydHrjt3WP5"
|
| 91 |
+
]
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"cell_type": "markdown",
|
| 95 |
+
"metadata": {
|
| 96 |
+
"id": "0vgANIBBZg1p"
|
| 97 |
+
},
|
| 98 |
+
"source": [
|
| 99 |
+
"## Prerequisites 🏗️\n",
|
| 100 |
+
"Before diving into the notebook, you need to:\n",
|
| 101 |
+
"\n",
|
| 102 |
+
"🔲 📚 **[Study Deep Q-Learning by reading Unit 3](https://huggingface.co/deep-rl-course/unit3/introduction)** 🤗"
|
| 103 |
+
]
|
| 104 |
+
},
|
| 105 |
+
{
|
| 106 |
+
"cell_type": "markdown",
|
| 107 |
+
"source": [
|
| 108 |
+
"We're constantly trying to improve our tutorials, so **if you find some issues in this notebook**, please [open an issue on the Github Repo](https://github.com/huggingface/deep-rl-class/issues)."
|
| 109 |
+
],
|
| 110 |
+
"metadata": {
|
| 111 |
+
"id": "7kszpGFaRVhq"
|
| 112 |
+
}
|
| 113 |
+
},
|
| 114 |
+
{
|
| 115 |
+
"cell_type": "markdown",
|
| 116 |
+
"metadata": {
|
| 117 |
+
"id": "QR0jZtYreSI5"
|
| 118 |
+
},
|
| 119 |
+
"source": [
|
| 120 |
+
"# Let's train a Deep Q-Learning agent playing Atari' Space Invaders 👾 and upload it to the Hub.\n",
|
| 121 |
+
"\n",
|
| 122 |
+
"We strongly recommend students **to use Google Colab for the hands-on exercises instead of running them on their personal computers**.\n",
|
| 123 |
+
"\n",
|
| 124 |
+
"By using Google Colab, **you can focus on learning and experimenting without worrying about the technical aspects of setting up your environments**.\n",
|
| 125 |
+
"\n",
|
| 126 |
+
"To validate this hands-on for the certification process, you need to push your trained model to the Hub and **get a result of >= 200**.\n",
|
| 127 |
+
"\n",
|
| 128 |
+
"To find your result, go to the leaderboard and find your model, **the result = mean_reward - std of reward**\n",
|
| 129 |
+
"\n",
|
| 130 |
+
"For more information about the certification process, check this section 👉 https://huggingface.co/deep-rl-course/en/unit0/introduction#certification-process"
|
| 131 |
+
]
|
| 132 |
+
},
|
| 133 |
+
{
|
| 134 |
+
"cell_type": "markdown",
|
| 135 |
+
"source": [
|
| 136 |
+
"## An advice 💡\n",
|
| 137 |
+
"It's better to run this colab in a copy on your Google Drive, so that **if it timeouts** you still have the saved notebook on your Google Drive and do not need to fill everything from scratch.\n",
|
| 138 |
+
"\n",
|
| 139 |
+
"To do that you can either do `Ctrl + S` or `File > Save a copy in Google Drive.`\n",
|
| 140 |
+
"\n",
|
| 141 |
+
"Also, we're going to **train it for 90 minutes with 1M timesteps**. By typing `!nvidia-smi` will tell you what GPU you're using.\n",
|
| 142 |
+
"\n",
|
| 143 |
+
"And if you want to train more such 10 million steps, this will take about 9 hours, potentially resulting in Colab timing out. In that case, I recommend running this on your local computer (or somewhere else). Just click on: `File>Download`."
|
| 144 |
+
],
|
| 145 |
+
"metadata": {
|
| 146 |
+
"id": "Nc8BnyVEc3Ys"
|
| 147 |
+
}
|
| 148 |
+
},
|
| 149 |
+
{
|
| 150 |
+
"cell_type": "markdown",
|
| 151 |
+
"source": [
|
| 152 |
+
"## Set the GPU 💪\n",
|
| 153 |
+
"- To **accelerate the agent's training, we'll use a GPU**. To do that, go to `Runtime > Change Runtime type`\n",
|
| 154 |
+
"\n",
|
| 155 |
+
"<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/gpu-step1.jpg\" alt=\"GPU Step 1\">"
|
| 156 |
+
],
|
| 157 |
+
"metadata": {
|
| 158 |
+
"id": "PU4FVzaoM6fC"
|
| 159 |
+
}
|
| 160 |
+
},
|
| 161 |
+
{
|
| 162 |
+
"cell_type": "markdown",
|
| 163 |
+
"source": [
|
| 164 |
+
"- `Hardware Accelerator > GPU`\n",
|
| 165 |
+
"\n",
|
| 166 |
+
"<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/gpu-step2.jpg\" alt=\"GPU Step 2\">"
|
| 167 |
+
],
|
| 168 |
+
"metadata": {
|
| 169 |
+
"id": "KV0NyFdQM9ZG"
|
| 170 |
+
}
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"cell_type": "markdown",
|
| 174 |
+
"source": [
|
| 175 |
+
"# Install RL-Baselines3 Zoo and its dependencies 📚\n",
|
| 176 |
+
"\n",
|
| 177 |
+
"If you see `ERROR: pip's dependency resolver does not currently take into account all the packages that are installed.` **this is normal and it's not a critical error** there's a conflict of version. But the packages we need are installed."
|
| 178 |
+
],
|
| 179 |
+
"metadata": {
|
| 180 |
+
"id": "wS_cVefO-aYg"
|
| 181 |
+
}
|
| 182 |
+
},
|
| 183 |
+
{
|
| 184 |
+
"cell_type": "code",
|
| 185 |
+
"source": [
|
| 186 |
+
"!pip install git+https://github.com/DLR-RM/rl-baselines3-zoo"
|
| 187 |
+
],
|
| 188 |
+
"metadata": {
|
| 189 |
+
"id": "S1A_E4z3awa_"
|
| 190 |
+
},
|
| 191 |
+
"execution_count": null,
|
| 192 |
+
"outputs": []
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"cell_type": "code",
|
| 196 |
+
"source": [
|
| 197 |
+
"!apt-get install swig cmake ffmpeg"
|
| 198 |
+
],
|
| 199 |
+
"metadata": {
|
| 200 |
+
"id": "8_MllY6Om1eI"
|
| 201 |
+
},
|
| 202 |
+
"execution_count": null,
|
| 203 |
+
"outputs": []
|
| 204 |
+
},
|
| 205 |
+
{
|
| 206 |
+
"cell_type": "markdown",
|
| 207 |
+
"metadata": {
|
| 208 |
+
"id": "4S9mJiKg6SqC"
|
| 209 |
+
},
|
| 210 |
+
"source": [
|
| 211 |
+
"To be able to use Atari games in Gymnasium we need to install atari package. And accept-rom-license to download the rom files (games files)."
|
| 212 |
+
]
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"cell_type": "code",
|
| 216 |
+
"source": [
|
| 217 |
+
"!pip install gymnasium[atari]\n",
|
| 218 |
+
"!pip install gymnasium[accept-rom-license]"
|
| 219 |
+
],
|
| 220 |
+
"metadata": {
|
| 221 |
+
"id": "NsRP-lX1_2fC"
|
| 222 |
+
},
|
| 223 |
+
"execution_count": null,
|
| 224 |
+
"outputs": []
|
| 225 |
+
},
|
| 226 |
+
{
|
| 227 |
+
"cell_type": "markdown",
|
| 228 |
+
"source": [
|
| 229 |
+
"## Create a virtual display 🔽\n",
|
| 230 |
+
"\n",
|
| 231 |
+
"During the notebook, we'll need to generate a replay video. To do so, with colab, **we need to have a virtual screen to be able to render the environment** (and thus record the frames).\n",
|
| 232 |
+
"\n",
|
| 233 |
+
"Hence the following cell will install the librairies and create and run a virtual screen 🖥"
|
| 234 |
+
],
|
| 235 |
+
"metadata": {
|
| 236 |
+
"id": "bTpYcVZVMzUI"
|
| 237 |
+
}
|
| 238 |
+
},
|
| 239 |
+
{
|
| 240 |
+
"cell_type": "code",
|
| 241 |
+
"execution_count": null,
|
| 242 |
+
"metadata": {
|
| 243 |
+
"id": "jV6wjQ7Be7p5"
|
| 244 |
+
},
|
| 245 |
+
"outputs": [],
|
| 246 |
+
"source": [
|
| 247 |
+
"%%capture\n",
|
| 248 |
+
"!apt install python-opengl\n",
|
| 249 |
+
"!apt install xvfb\n",
|
| 250 |
+
"!pip3 install pyvirtualdisplay"
|
| 251 |
+
]
|
| 252 |
+
},
|
| 253 |
+
{
|
| 254 |
+
"cell_type": "code",
|
| 255 |
+
"source": [
|
| 256 |
+
"# Virtual display\n",
|
| 257 |
+
"from pyvirtualdisplay import Display\n",
|
| 258 |
+
"\n",
|
| 259 |
+
"virtual_display = Display(visible=0, size=(1400, 900))\n",
|
| 260 |
+
"virtual_display.start()"
|
| 261 |
+
],
|
| 262 |
+
"metadata": {
|
| 263 |
+
"id": "BE5JWP5rQIKf"
|
| 264 |
+
},
|
| 265 |
+
"execution_count": null,
|
| 266 |
+
"outputs": []
|
| 267 |
+
},
|
| 268 |
+
{
|
| 269 |
+
"cell_type": "markdown",
|
| 270 |
+
"metadata": {
|
| 271 |
+
"id": "5iPgzluo9z-u"
|
| 272 |
+
},
|
| 273 |
+
"source": [
|
| 274 |
+
"## Train our Deep Q-Learning Agent to Play Space Invaders 👾\n",
|
| 275 |
+
"\n",
|
| 276 |
+
"To train an agent with RL-Baselines3-Zoo, we just need to do two things:\n",
|
| 277 |
+
"\n",
|
| 278 |
+
"1. Create a hyperparameter config file that will contain our training hyperparameters called `dqn.yml`.\n",
|
| 279 |
+
"\n",
|
| 280 |
+
"This is a template example:\n",
|
| 281 |
+
"\n",
|
| 282 |
+
"```\n",
|
| 283 |
+
"SpaceInvadersNoFrameskip-v4:\n",
|
| 284 |
+
" env_wrapper:\n",
|
| 285 |
+
" - stable_baselines3.common.atari_wrappers.AtariWrapper\n",
|
| 286 |
+
" frame_stack: 4\n",
|
| 287 |
+
" policy: 'CnnPolicy'\n",
|
| 288 |
+
" n_timesteps: !!float 1e6\n",
|
| 289 |
+
" buffer_size: 100000\n",
|
| 290 |
+
" learning_rate: !!float 1e-4\n",
|
| 291 |
+
" batch_size: 32\n",
|
| 292 |
+
" learning_starts: 100000\n",
|
| 293 |
+
" target_update_interval: 1000\n",
|
| 294 |
+
" train_freq: 4\n",
|
| 295 |
+
" gradient_steps: 1\n",
|
| 296 |
+
" exploration_fraction: 0.1\n",
|
| 297 |
+
" exploration_final_eps: 0.01\n",
|
| 298 |
+
" # If True, you need to deactivate handle_timeout_termination\n",
|
| 299 |
+
" # in the replay_buffer_kwargs\n",
|
| 300 |
+
" optimize_memory_usage: False\n",
|
| 301 |
+
"```"
|
| 302 |
+
]
|
| 303 |
+
},
|
| 304 |
+
{
|
| 305 |
+
"cell_type": "markdown",
|
| 306 |
+
"metadata": {
|
| 307 |
+
"id": "_VjblFSVDQOj"
|
| 308 |
+
},
|
| 309 |
+
"source": [
|
| 310 |
+
"Here we see that:\n",
|
| 311 |
+
"- We use the `Atari Wrapper` that preprocess the input (Frame reduction ,grayscale, stack 4 frames)\n",
|
| 312 |
+
"- We use `CnnPolicy`, since we use Convolutional layers to process the frames\n",
|
| 313 |
+
"- We train it for 10 million `n_timesteps`\n",
|
| 314 |
+
"- Memory (Experience Replay) size is 100000, aka the amount of experience steps you saved to train again your agent with.\n",
|
| 315 |
+
"\n",
|
| 316 |
+
"💡 My advice is to **reduce the training timesteps to 1M,** which will take about 90 minutes on a P100. `!nvidia-smi` will tell you what GPU you're using. At 10 million steps, this will take about 9 hours, which could likely result in Colab timing out. I recommend running this on your local computer (or somewhere else). Just click on: `File>Download`."
|
| 317 |
+
]
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"cell_type": "markdown",
|
| 321 |
+
"metadata": {
|
| 322 |
+
"id": "5qTkbWrkECOJ"
|
| 323 |
+
},
|
| 324 |
+
"source": [
|
| 325 |
+
"In terms of hyperparameters optimization, my advice is to focus on these 3 hyperparameters:\n",
|
| 326 |
+
"- `learning_rate`\n",
|
| 327 |
+
"- `buffer_size (Experience Memory size)`\n",
|
| 328 |
+
"- `batch_size`\n",
|
| 329 |
+
"\n",
|
| 330 |
+
"As a good practice, you need to **check the documentation to understand what each hyperparameters does**: https://stable-baselines3.readthedocs.io/en/master/modules/dqn.html#parameters\n",
|
| 331 |
+
"\n"
|
| 332 |
+
]
|
| 333 |
+
},
|
| 334 |
+
{
|
| 335 |
+
"cell_type": "markdown",
|
| 336 |
+
"metadata": {
|
| 337 |
+
"id": "Hn8bRTHvERRL"
|
| 338 |
+
},
|
| 339 |
+
"source": [
|
| 340 |
+
"2. We start the training and save the models on `logs` folder 📁\n",
|
| 341 |
+
"\n",
|
| 342 |
+
"- Define the algorithm after `--algo`, where we save the model after `-f` and where the hyperparameter config is after `-c`."
|
| 343 |
+
]
|
| 344 |
+
},
|
| 345 |
+
{
|
| 346 |
+
"cell_type": "code",
|
| 347 |
+
"execution_count": null,
|
| 348 |
+
"metadata": {
|
| 349 |
+
"id": "Xr1TVW4xfbz3"
|
| 350 |
+
},
|
| 351 |
+
"outputs": [],
|
| 352 |
+
"source": [
|
| 353 |
+
"!python -m rl_zoo3.train --algo ________ --env SpaceInvadersNoFrameskip-v4 -f _________ -c _________"
|
| 354 |
+
]
|
| 355 |
+
},
|
| 356 |
+
{
|
| 357 |
+
"cell_type": "markdown",
|
| 358 |
+
"metadata": {
|
| 359 |
+
"id": "SeChoX-3SZfP"
|
| 360 |
+
},
|
| 361 |
+
"source": [
|
| 362 |
+
"#### Solution"
|
| 363 |
+
]
|
| 364 |
+
},
|
| 365 |
+
{
|
| 366 |
+
"cell_type": "code",
|
| 367 |
+
"execution_count": null,
|
| 368 |
+
"metadata": {
|
| 369 |
+
"id": "PuocgdokSab9"
|
| 370 |
+
},
|
| 371 |
+
"outputs": [],
|
| 372 |
+
"source": [
|
| 373 |
+
"!python -m rl_zoo3.train --algo dqn --env SpaceInvadersNoFrameskip-v4 -f logs/ -c dqn.yml"
|
| 374 |
+
]
|
| 375 |
+
},
|
| 376 |
+
{
|
| 377 |
+
"cell_type": "markdown",
|
| 378 |
+
"metadata": {
|
| 379 |
+
"id": "_dLomIiMKQaf"
|
| 380 |
+
},
|
| 381 |
+
"source": [
|
| 382 |
+
"## Let's evaluate our agent 👀\n",
|
| 383 |
+
"- RL-Baselines3-Zoo provides `enjoy.py`, a python script to evaluate our agent. In most RL libraries, we call the evaluation script `enjoy.py`.\n",
|
| 384 |
+
"- Let's evaluate it for 5000 timesteps 🔥"
|
| 385 |
+
]
|
| 386 |
+
},
|
| 387 |
+
{
|
| 388 |
+
"cell_type": "code",
|
| 389 |
+
"execution_count": null,
|
| 390 |
+
"metadata": {
|
| 391 |
+
"id": "co5um_KeKbBJ"
|
| 392 |
+
},
|
| 393 |
+
"outputs": [],
|
| 394 |
+
"source": [
|
| 395 |
+
"!python -m rl_zoo3.enjoy --algo dqn --env SpaceInvadersNoFrameskip-v4 --no-render --n-timesteps _________ --folder logs/"
|
| 396 |
+
]
|
| 397 |
+
},
|
| 398 |
+
{
|
| 399 |
+
"cell_type": "markdown",
|
| 400 |
+
"metadata": {
|
| 401 |
+
"id": "Q24K1tyWSj7t"
|
| 402 |
+
},
|
| 403 |
+
"source": [
|
| 404 |
+
"#### Solution"
|
| 405 |
+
]
|
| 406 |
+
},
|
| 407 |
+
{
|
| 408 |
+
"cell_type": "code",
|
| 409 |
+
"execution_count": null,
|
| 410 |
+
"metadata": {
|
| 411 |
+
"id": "P_uSmwGRSk0z"
|
| 412 |
+
},
|
| 413 |
+
"outputs": [],
|
| 414 |
+
"source": [
|
| 415 |
+
"!python -m rl_zoo3.enjoy --algo dqn --env SpaceInvadersNoFrameskip-v4 --no-render --n-timesteps 5000 --folder logs/"
|
| 416 |
+
]
|
| 417 |
+
},
|
| 418 |
+
{
|
| 419 |
+
"cell_type": "markdown",
|
| 420 |
+
"metadata": {
|
| 421 |
+
"id": "liBeTltiHJtr"
|
| 422 |
+
},
|
| 423 |
+
"source": [
|
| 424 |
+
"## Publish our trained model on the Hub 🚀\n",
|
| 425 |
+
"Now that we saw we got good results after the training, we can publish our trained model on the hub 🤗 with one line of code.\n",
|
| 426 |
+
"\n",
|
| 427 |
+
"<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/unit3/space-invaders-model.gif\" alt=\"Space Invaders model\">"
|
| 428 |
+
]
|
| 429 |
+
},
|
| 430 |
+
{
|
| 431 |
+
"cell_type": "markdown",
|
| 432 |
+
"metadata": {
|
| 433 |
+
"id": "ezbHS1q3HYVV"
|
| 434 |
+
},
|
| 435 |
+
"source": [
|
| 436 |
+
"By using `rl_zoo3.push_to_hub` **you evaluate, record a replay, generate a model card of your agent and push it to the hub**.\n",
|
| 437 |
+
"\n",
|
| 438 |
+
"This way:\n",
|
| 439 |
+
"- You can **showcase our work** 🔥\n",
|
| 440 |
+
"- You can **visualize your agent playing** 👀\n",
|
| 441 |
+
"- You can **share with the community an agent that others can use** 💾\n",
|
| 442 |
+
"- You can **access a leaderboard 🏆 to see how well your agent is performing compared to your classmates** 👉 https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard"
|
| 443 |
+
]
|
| 444 |
+
},
|
| 445 |
+
{
|
| 446 |
+
"cell_type": "markdown",
|
| 447 |
+
"metadata": {
|
| 448 |
+
"id": "XMSeZRBiHk6X"
|
| 449 |
+
},
|
| 450 |
+
"source": [
|
| 451 |
+
"To be able to share your model with the community there are three more steps to follow:\n",
|
| 452 |
+
"\n",
|
| 453 |
+
"1️⃣ (If it's not already done) create an account to HF ➡ https://huggingface.co/join\n",
|
| 454 |
+
"\n",
|
| 455 |
+
"2️⃣ Sign in and then, you need to store your authentication token from the Hugging Face website.\n",
|
| 456 |
+
"- Create a new token (https://huggingface.co/settings/tokens) **with write role**\n",
|
| 457 |
+
"\n",
|
| 458 |
+
"<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/create-token.jpg\" alt=\"Create HF Token\">"
|
| 459 |
+
]
|
| 460 |
+
},
|
| 461 |
+
{
|
| 462 |
+
"cell_type": "markdown",
|
| 463 |
+
"metadata": {
|
| 464 |
+
"id": "9O6FI0F8HnzE"
|
| 465 |
+
},
|
| 466 |
+
"source": [
|
| 467 |
+
"- Copy the token\n",
|
| 468 |
+
"- Run the cell below and past the token"
|
| 469 |
+
]
|
| 470 |
+
},
|
| 471 |
+
{
|
| 472 |
+
"cell_type": "code",
|
| 473 |
+
"execution_count": null,
|
| 474 |
+
"metadata": {
|
| 475 |
+
"id": "Ppu9yePwHrZX"
|
| 476 |
+
},
|
| 477 |
+
"outputs": [],
|
| 478 |
+
"source": [
|
| 479 |
+
"from huggingface_hub import notebook_login # To log to our Hugging Face account to be able to upload models to the Hub.\n",
|
| 480 |
+
"notebook_login()\n",
|
| 481 |
+
"!git config --global credential.helper store"
|
| 482 |
+
]
|
| 483 |
+
},
|
| 484 |
+
{
|
| 485 |
+
"cell_type": "markdown",
|
| 486 |
+
"metadata": {
|
| 487 |
+
"id": "2RVEdunPHs8B"
|
| 488 |
+
},
|
| 489 |
+
"source": [
|
| 490 |
+
"If you don't want to use a Google Colab or a Jupyter Notebook, you need to use this command instead: `huggingface-cli login`"
|
| 491 |
+
]
|
| 492 |
+
},
|
| 493 |
+
{
|
| 494 |
+
"cell_type": "markdown",
|
| 495 |
+
"metadata": {
|
| 496 |
+
"id": "dSLwdmvhHvjw"
|
| 497 |
+
},
|
| 498 |
+
"source": [
|
| 499 |
+
"3️⃣ We're now ready to push our trained agent to the 🤗 Hub 🔥"
|
| 500 |
+
]
|
| 501 |
+
},
|
| 502 |
+
{
|
| 503 |
+
"cell_type": "markdown",
|
| 504 |
+
"metadata": {
|
| 505 |
+
"id": "PW436XnhHw1H"
|
| 506 |
+
},
|
| 507 |
+
"source": [
|
| 508 |
+
"Let's run push_to_hub.py file to upload our trained agent to the Hub.\n",
|
| 509 |
+
"\n",
|
| 510 |
+
"`--repo-name `: The name of the repo\n",
|
| 511 |
+
"\n",
|
| 512 |
+
"`-orga`: Your Hugging Face username\n",
|
| 513 |
+
"\n",
|
| 514 |
+
"`-f`: Where the trained model folder is (in our case `logs`)\n",
|
| 515 |
+
"\n",
|
| 516 |
+
"<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/unit3/select-id.png\" alt=\"Select Id\">"
|
| 517 |
+
]
|
| 518 |
+
},
|
| 519 |
+
{
|
| 520 |
+
"cell_type": "code",
|
| 521 |
+
"execution_count": null,
|
| 522 |
+
"metadata": {
|
| 523 |
+
"id": "Ygk2sEktTDEw"
|
| 524 |
+
},
|
| 525 |
+
"outputs": [],
|
| 526 |
+
"source": [
|
| 527 |
+
"!python -m rl_zoo3.push_to_hub --algo dqn --env SpaceInvadersNoFrameskip-v4 --repo-name _____________________ -orga _____________________ -f logs/"
|
| 528 |
+
]
|
| 529 |
+
},
|
| 530 |
+
{
|
| 531 |
+
"cell_type": "markdown",
|
| 532 |
+
"metadata": {
|
| 533 |
+
"id": "otgpa0rhS9wR"
|
| 534 |
+
},
|
| 535 |
+
"source": [
|
| 536 |
+
"#### Solution"
|
| 537 |
+
]
|
| 538 |
+
},
|
| 539 |
+
{
|
| 540 |
+
"cell_type": "code",
|
| 541 |
+
"execution_count": null,
|
| 542 |
+
"metadata": {
|
| 543 |
+
"id": "_HQNlAXuEhci"
|
| 544 |
+
},
|
| 545 |
+
"outputs": [],
|
| 546 |
+
"source": [
|
| 547 |
+
"!python -m rl_zoo3.push_to_hub --algo dqn --env SpaceInvadersNoFrameskip-v4 --repo-name dqn-SpaceInvadersNoFrameskip-v4 -orga Anish13 -f logs/"
|
| 548 |
+
]
|
| 549 |
+
},
|
| 550 |
+
{
|
| 551 |
+
"cell_type": "code",
|
| 552 |
+
"source": [
|
| 553 |
+
"import os\n",
|
| 554 |
+
"print(os.path.exists(\"/tmp/tmpddei0m0p/-step-0-to-step-1000.mp4\"))\n"
|
| 555 |
+
],
|
| 556 |
+
"metadata": {
|
| 557 |
+
"id": "EJVz6PtP3OKG"
|
| 558 |
+
},
|
| 559 |
+
"execution_count": null,
|
| 560 |
+
"outputs": []
|
| 561 |
+
},
|
| 562 |
+
{
|
| 563 |
+
"cell_type": "code",
|
| 564 |
+
"source": [
|
| 565 |
+
"!python -m rl_zoo3.record_video --env SpaceInvadersNoFrameskip-v4 --algo dqn --folder logs/ --n-envs 1"
|
| 566 |
+
],
|
| 567 |
+
"metadata": {
|
| 568 |
+
"id": "CjGlE-x55V6H"
|
| 569 |
+
},
|
| 570 |
+
"execution_count": null,
|
| 571 |
+
"outputs": []
|
| 572 |
+
},
|
| 573 |
+
{
|
| 574 |
+
"cell_type": "markdown",
|
| 575 |
+
"metadata": {
|
| 576 |
+
"id": "0D4F5zsTTJ-L"
|
| 577 |
+
},
|
| 578 |
+
"source": [
|
| 579 |
+
"###."
|
| 580 |
+
]
|
| 581 |
+
},
|
| 582 |
+
{
|
| 583 |
+
"cell_type": "markdown",
|
| 584 |
+
"metadata": {
|
| 585 |
+
"id": "ff89kd2HL1_s"
|
| 586 |
+
},
|
| 587 |
+
"source": [
|
| 588 |
+
"Congrats 🥳 you've just trained and uploaded your first Deep Q-Learning agent using RL-Baselines-3 Zoo. The script above should have displayed a link to a model repository such as https://huggingface.co/ThomasSimonini/dqn-SpaceInvadersNoFrameskip-v4. When you go to this link, you can:\n",
|
| 589 |
+
"\n",
|
| 590 |
+
"- See a **video preview of your agent** at the right.\n",
|
| 591 |
+
"- Click \"Files and versions\" to see all the files in the repository.\n",
|
| 592 |
+
"- Click \"Use in stable-baselines3\" to get a code snippet that shows how to load the model.\n",
|
| 593 |
+
"- A model card (`README.md` file) which gives a description of the model and the hyperparameters you used.\n",
|
| 594 |
+
"\n",
|
| 595 |
+
"Under the hood, the Hub uses git-based repositories (don't worry if you don't know what git is), which means you can update the model with new versions as you experiment and improve your agent.\n",
|
| 596 |
+
"\n",
|
| 597 |
+
"**Compare the results of your agents with your classmates** using the [leaderboard](https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard) 🏆"
|
| 598 |
+
]
|
| 599 |
+
},
|
| 600 |
+
{
|
| 601 |
+
"cell_type": "markdown",
|
| 602 |
+
"metadata": {
|
| 603 |
+
"id": "fyRKcCYY-dIo"
|
| 604 |
+
},
|
| 605 |
+
"source": [
|
| 606 |
+
"## Load a powerful trained model 🔥\n",
|
| 607 |
+
"- The Stable-Baselines3 team uploaded **more than 150 trained Deep Reinforcement Learning agents on the Hub**.\n",
|
| 608 |
+
"\n",
|
| 609 |
+
"You can find them here: 👉 https://huggingface.co/sb3\n",
|
| 610 |
+
"\n",
|
| 611 |
+
"Some examples:\n",
|
| 612 |
+
"- Asteroids: https://huggingface.co/sb3/dqn-AsteroidsNoFrameskip-v4\n",
|
| 613 |
+
"- Beam Rider: https://huggingface.co/sb3/dqn-BeamRiderNoFrameskip-v4\n",
|
| 614 |
+
"- Breakout: https://huggingface.co/sb3/dqn-BreakoutNoFrameskip-v4\n",
|
| 615 |
+
"- Road Runner: https://huggingface.co/sb3/dqn-RoadRunnerNoFrameskip-v4\n",
|
| 616 |
+
"\n",
|
| 617 |
+
"Let's load an agent playing Beam Rider: https://huggingface.co/sb3/dqn-BeamRiderNoFrameskip-v4"
|
| 618 |
+
]
|
| 619 |
+
},
|
| 620 |
+
{
|
| 621 |
+
"cell_type": "code",
|
| 622 |
+
"execution_count": null,
|
| 623 |
+
"metadata": {
|
| 624 |
+
"id": "B-9QVFIROI5Y"
|
| 625 |
+
},
|
| 626 |
+
"outputs": [],
|
| 627 |
+
"source": [
|
| 628 |
+
"%%html\n",
|
| 629 |
+
"<video controls autoplay><source src=\"https://huggingface.co/sb3/dqn-BeamRiderNoFrameskip-v4/resolve/main/replay.mp4\" type=\"video/mp4\"></video>"
|
| 630 |
+
]
|
| 631 |
+
},
|
| 632 |
+
{
|
| 633 |
+
"cell_type": "markdown",
|
| 634 |
+
"metadata": {
|
| 635 |
+
"id": "7ZQNY_r6NJtC"
|
| 636 |
+
},
|
| 637 |
+
"source": [
|
| 638 |
+
"1. We download the model using `rl_zoo3.load_from_hub`, and place it in a new folder that we can call `rl_trained`"
|
| 639 |
+
]
|
| 640 |
+
},
|
| 641 |
+
{
|
| 642 |
+
"cell_type": "code",
|
| 643 |
+
"execution_count": null,
|
| 644 |
+
"metadata": {
|
| 645 |
+
"id": "OdBNZHy0NGTR"
|
| 646 |
+
},
|
| 647 |
+
"outputs": [],
|
| 648 |
+
"source": [
|
| 649 |
+
"# Download model and save it into the logs/ folder\n",
|
| 650 |
+
"!python -m rl_zoo3.load_from_hub --algo dqn --env BeamRiderNoFrameskip-v4 -orga sb3 -f rl_trained/"
|
| 651 |
+
]
|
| 652 |
+
},
|
| 653 |
+
{
|
| 654 |
+
"cell_type": "markdown",
|
| 655 |
+
"metadata": {
|
| 656 |
+
"id": "LFt6hmWsNdBo"
|
| 657 |
+
},
|
| 658 |
+
"source": [
|
| 659 |
+
"2. Let's evaluate if for 5000 timesteps"
|
| 660 |
+
]
|
| 661 |
+
},
|
| 662 |
+
{
|
| 663 |
+
"cell_type": "code",
|
| 664 |
+
"execution_count": null,
|
| 665 |
+
"metadata": {
|
| 666 |
+
"id": "aOxs0rNuN0uS"
|
| 667 |
+
},
|
| 668 |
+
"outputs": [],
|
| 669 |
+
"source": [
|
| 670 |
+
"!python -m rl_zoo3.enjoy --algo dqn --env BeamRiderNoFrameskip-v4 -n 5000 -f rl_trained/ --no-render"
|
| 671 |
+
]
|
| 672 |
+
},
|
| 673 |
+
{
|
| 674 |
+
"cell_type": "markdown",
|
| 675 |
+
"metadata": {
|
| 676 |
+
"id": "kxMDuDfPON57"
|
| 677 |
+
},
|
| 678 |
+
"source": [
|
| 679 |
+
"Why not trying to train your own **Deep Q-Learning Agent playing BeamRiderNoFrameskip-v4? 🏆.**\n",
|
| 680 |
+
"\n",
|
| 681 |
+
"If you want to try, check https://huggingface.co/sb3/dqn-BeamRiderNoFrameskip-v4#hyperparameters **in the model card, you have the hyperparameters of the trained agent.**"
|
| 682 |
+
]
|
| 683 |
+
},
|
| 684 |
+
{
|
| 685 |
+
"cell_type": "markdown",
|
| 686 |
+
"metadata": {
|
| 687 |
+
"id": "xL_ZtUgpOuY6"
|
| 688 |
+
},
|
| 689 |
+
"source": [
|
| 690 |
+
"But finding hyperparameters can be a daunting task. Fortunately, we'll see in the next Unit, how we can **use Optuna for optimizing the Hyperparameters 🔥.**\n"
|
| 691 |
+
]
|
| 692 |
+
},
|
| 693 |
+
{
|
| 694 |
+
"cell_type": "markdown",
|
| 695 |
+
"metadata": {
|
| 696 |
+
"id": "-pqaco8W-huW"
|
| 697 |
+
},
|
| 698 |
+
"source": [
|
| 699 |
+
"## Some additional challenges 🏆\n",
|
| 700 |
+
"The best way to learn **is to try things by your own**!\n",
|
| 701 |
+
"\n",
|
| 702 |
+
"In the [Leaderboard](https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard) you will find your agents. Can you get to the top?\n",
|
| 703 |
+
"\n",
|
| 704 |
+
"Here's a list of environments you can try to train your agent with:\n",
|
| 705 |
+
"- BeamRiderNoFrameskip-v4\n",
|
| 706 |
+
"- BreakoutNoFrameskip-v4\n",
|
| 707 |
+
"- EnduroNoFrameskip-v4\n",
|
| 708 |
+
"- PongNoFrameskip-v4\n",
|
| 709 |
+
"\n",
|
| 710 |
+
"Also, **if you want to learn to implement Deep Q-Learning by yourself**, you definitely should look at CleanRL implementation: https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/dqn_atari.py\n",
|
| 711 |
+
"\n",
|
| 712 |
+
"<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit4/atari-envs.gif\" alt=\"Environments\"/>"
|
| 713 |
+
]
|
| 714 |
+
},
|
| 715 |
+
{
|
| 716 |
+
"cell_type": "markdown",
|
| 717 |
+
"metadata": {
|
| 718 |
+
"id": "paS-XKo4-kmu"
|
| 719 |
+
},
|
| 720 |
+
"source": [
|
| 721 |
+
"________________________________________________________________________\n",
|
| 722 |
+
"Congrats on finishing this chapter!\n",
|
| 723 |
+
"\n",
|
| 724 |
+
"If you’re still feel confused with all these elements...it's totally normal! **This was the same for me and for all people who studied RL.**\n",
|
| 725 |
+
"\n",
|
| 726 |
+
"Take time to really **grasp the material before continuing and try the additional challenges**. It’s important to master these elements and having a solid foundations.\n",
|
| 727 |
+
"\n",
|
| 728 |
+
"In the next unit, **we’re going to learn about [Optuna](https://optuna.org/)**. One of the most critical task in Deep Reinforcement Learning is to find a good set of training hyperparameters. And Optuna is a library that helps you to automate the search.\n",
|
| 729 |
+
"\n",
|
| 730 |
+
"\n"
|
| 731 |
+
]
|
| 732 |
+
},
|
| 733 |
+
{
|
| 734 |
+
"cell_type": "markdown",
|
| 735 |
+
"metadata": {
|
| 736 |
+
"id": "5WRx7tO7-mvC"
|
| 737 |
+
},
|
| 738 |
+
"source": [
|
| 739 |
+
"\n",
|
| 740 |
+
"\n",
|
| 741 |
+
"### This is a course built with you 👷🏿♀️\n",
|
| 742 |
+
"\n",
|
| 743 |
+
"Finally, we want to improve and update the course iteratively with your feedback. If you have some, please fill this form 👉 https://forms.gle/3HgA7bEHwAmmLfwh9\n",
|
| 744 |
+
"\n",
|
| 745 |
+
"We're constantly trying to improve our tutorials, so **if you find some issues in this notebook**, please [open an issue on the Github Repo](https://github.com/huggingface/deep-rl-class/issues)."
|
| 746 |
+
]
|
| 747 |
+
},
|
| 748 |
+
{
|
| 749 |
+
"cell_type": "markdown",
|
| 750 |
+
"source": [
|
| 751 |
+
"See you on Bonus unit 2! 🔥"
|
| 752 |
+
],
|
| 753 |
+
"metadata": {
|
| 754 |
+
"id": "Kc3udPT-RcXc"
|
| 755 |
+
}
|
| 756 |
+
},
|
| 757 |
+
{
|
| 758 |
+
"cell_type": "markdown",
|
| 759 |
+
"metadata": {
|
| 760 |
+
"id": "fS3Xerx0fIMV"
|
| 761 |
+
},
|
| 762 |
+
"source": [
|
| 763 |
+
"### Keep Learning, Stay Awesome 🤗"
|
| 764 |
+
]
|
| 765 |
+
}
|
| 766 |
+
],
|
| 767 |
+
"metadata": {
|
| 768 |
+
"colab": {
|
| 769 |
+
"private_outputs": true,
|
| 770 |
+
"provenance": []
|
| 771 |
+
},
|
| 772 |
+
"kernelspec": {
|
| 773 |
+
"display_name": "Python 3 (ipykernel)",
|
| 774 |
+
"language": "python",
|
| 775 |
+
"name": "python3"
|
| 776 |
+
},
|
| 777 |
+
"language_info": {
|
| 778 |
+
"codemirror_mode": {
|
| 779 |
+
"name": "ipython",
|
| 780 |
+
"version": 3
|
| 781 |
+
},
|
| 782 |
+
"file_extension": ".py",
|
| 783 |
+
"mimetype": "text/x-python",
|
| 784 |
+
"name": "python",
|
| 785 |
+
"nbconvert_exporter": "python",
|
| 786 |
+
"pygments_lexer": "ipython3",
|
| 787 |
+
"version": "3.10.6"
|
| 788 |
+
},
|
| 789 |
+
"varInspector": {
|
| 790 |
+
"cols": {
|
| 791 |
+
"lenName": 16,
|
| 792 |
+
"lenType": 16,
|
| 793 |
+
"lenVar": 40
|
| 794 |
+
},
|
| 795 |
+
"kernels_config": {
|
| 796 |
+
"python": {
|
| 797 |
+
"delete_cmd_postfix": "",
|
| 798 |
+
"delete_cmd_prefix": "del ",
|
| 799 |
+
"library": "var_list.py",
|
| 800 |
+
"varRefreshCmd": "print(var_dic_list())"
|
| 801 |
+
},
|
| 802 |
+
"r": {
|
| 803 |
+
"delete_cmd_postfix": ") ",
|
| 804 |
+
"delete_cmd_prefix": "rm(",
|
| 805 |
+
"library": "var_list.r",
|
| 806 |
+
"varRefreshCmd": "cat(var_dic_list()) "
|
| 807 |
+
}
|
| 808 |
+
},
|
| 809 |
+
"types_to_exclude": [
|
| 810 |
+
"module",
|
| 811 |
+
"function",
|
| 812 |
+
"builtin_function_or_method",
|
| 813 |
+
"instance",
|
| 814 |
+
"_Feature"
|
| 815 |
+
],
|
| 816 |
+
"window_display": false
|
| 817 |
+
},
|
| 818 |
+
"accelerator": "GPU",
|
| 819 |
+
"gpuClass": "standard"
|
| 820 |
+
},
|
| 821 |
+
"nbformat": 4,
|
| 822 |
+
"nbformat_minor": 0
|
| 823 |
+
}
|