Buckets:

hf-doc-build
/

doc

Files

xet

hf-doc-build/doc / simulate /v0.0.2 /en /tutorials /rl_examples.html

rtrm

about 1 month ago

download

raw

15.2 kB

	<meta charset="utf-8" /><meta http-equiv="content-security-policy" content=""><meta name="hf:doc:metadata" content="{"local":"using-simulate-to-learn-agent-behaviors-with-stablebaselines3","sections":[{"local":"learning-to-navigate-in-a-simple-tmaze","title":"Learning to navigate in a simple T-Maze"},{"local":"collecting-objects","title":"Collecting objects"},{"local":"navigating-in-procedurally-generated-mazes","title":"Navigating in procedurally generated mazes"},{"local":"physical-interaction-with-movable-objects","title":"Physical interaction with movable objects"},{"local":"reward-functions-based-on-line-of-sight-observation-of-objects","title":"Reward functions based on line of sight observation of objects."}],"title":"Using 🤗 Simulate to learn Agent behaviors with Stable-Baselines3"}" data-svelte="svelte-1phssyn">
	<link rel="modulepreload" href="/docs/simulate/v0.0.2/en/_app/assets/pages/__layout.svelte-hf-doc-builder.css">
	<link rel="modulepreload" href="/docs/simulate/v0.0.2/en/_app/start-hf-doc-builder.js">
	<link rel="modulepreload" href="/docs/simulate/v0.0.2/en/_app/chunks/vendor-hf-doc-builder.js">
	<link rel="modulepreload" href="/docs/simulate/v0.0.2/en/_app/chunks/paths-hf-doc-builder.js">
	<link rel="modulepreload" href="/docs/simulate/v0.0.2/en/_app/pages/__layout.svelte-hf-doc-builder.js">
	<link rel="modulepreload" href="/docs/simulate/v0.0.2/en/_app/pages/tutorials/rl_examples.mdx-hf-doc-builder.js">
	<link rel="modulepreload" href="/docs/simulate/v0.0.2/en/_app/chunks/IconCopyLink-hf-doc-builder.js">






	<h1 class="relative group"><a id="using-simulate-to-learn-agent-behaviors-with-stablebaselines3" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#using-simulate-to-learn-agent-behaviors-with-stablebaselines3"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
	<span>Using 🤗 Simulate to learn Agent behaviors with Stable-Baselines3
	</span></h1>

	<p>We provide several example RL integrations with the Stable-Baselines3 (LINK) library. To install this dependancy use <code>pip install simulate[sb3]</code>.</p>
	<p>Including:</p>
	<ul><li>Learning to navigate in a simple T-Maze</li>
	<li>Collecting objects</li>
	<li>Navigating in procedurally generated mazes</li>
	<li>Physical interaction with movable objects</li>
	<li>Reward functions based on line of sight observation of objects.</li></ul>
	<h2 class="relative group"><a id="learning-to-navigate-in-a-simple-tmaze" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#learning-to-navigate-in-a-simple-tmaze"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
	<span>Learning to navigate in a simple T-Maze
	</span></h2>

	<img class="!m-0 !border-0 !dark:border-0 !shadow-none !max-w-lg w-[600px]" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/simulate/simulate_sb3_basic_maze.png">
	<p>Example: <a href="https://github.com/huggingface/simulate/examples/rl/sb3_basic_maze.py" rel="nofollow"><code>sb3_basic_maze.py</code></a></p>
	<p>Objective: Navigate to a spherical object in a simple T-Maze. Upon object collection, the environment resets.</p>
	<p>Actors: An EgoCentric Camera Actor (LINK) equipped with a monocular camera.</p>
	<p>Observation space: </p>
	<ul><li>An RGB camera of shape (3, 40, 40) (C, H, W) in uint8 format.</li></ul>
	<p>Action space:</p>
	<ul><li>A discrete action space with 3 possible actions</li>
	<li>Turn left 10 degrees</li>
	<li>Turn right 10 degrees</li>
	<li>Move forward</li></ul>
	<p>Reward function:</p>
	<ul><li>A dense reward based on improvement in best euclidean distance to the object</li>
	<li>A sparse reward of +1 when the object is collected</li>
	<li>A timeout penaly of -1 if the agent does not reach the object in 200 time-steps</li></ul>
	<p>Parallel: 4 independent instances of the same environment configuration. </p>
	<h2 class="relative group"><a id="collecting-objects" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#collecting-objects"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
	<span>Collecting objects
	</span></h2>

	<img class="!m-0 !border-0 !dark:border-0 !shadow-none !max-w-lg w-[600px]" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/simulate/simulate_sb3_collectables.png">
	<p>Example: <a href="https://github.com/huggingface/simulate/examples/rl/sb3_collectables.py" rel="nofollow"><code>sb3_collectables.py</code></a></p>
	<p>Objective: Collect all 20 objects in a large square room.</p>
	<p>Actors: An EgoCentric Camera Actor (LINK) equipped with a monocular camera.</p>
	<p>Observation space: </p>
	<ul><li>An RGB camera of shape (3, 40, 40) (C, H, W) in uint8 format.</li></ul>
	<p>Action space:</p>
	<ul><li>A discrete action space with 3 possible actions</li>
	<li>Turn left 10 degrees</li>
	<li>Turn right 10 degrees</li>
	<li>Move forward</li></ul>
	<p>Reward function:</p>
	<ul><li>A sparse reward of +1 when an object is collected</li>
	<li>A timeout penaly of -1 if the agent does not reach the object in 500 time-steps</li></ul>
	<p>Parallel: 4 independent instances of the same environment configuration. </p>
	<h2 class="relative group"><a id="navigating-in-procedurally-generated-mazes" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#navigating-in-procedurally-generated-mazes"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
	<span>Navigating in procedurally generated mazes
	</span></h2>

	<img class="!m-0 !border-0 !dark:border-0 !shadow-none !max-w-lg w-[600px]" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/simulate/simulate_sb3_procgen.png">
	<p>Example: <a href="https://github.com/huggingface/simulate/examples/rl/sb3_procgen.py" rel="nofollow"><code>sb3_procgen.py</code></a></p>
	<p>Objective: Navigate to an object in a 3D maze, when the object is collected the environment resets.</p>
	<p>Actors: An EgoCentric Camera Actor (LINK) equipped with a monocular camera</p>
	<p>Observation space: </p>
	<ul><li>An RGB camera of shape (3, 40, 40) (C, H, W) in uint8 format.</li></ul>
	<p>Action space:</p>
	<ul><li>A discrete action space with 3 possible actions</li>
	<li>Turn left 10 degrees</li>
	<li>Turn right 10 degrees</li>
	<li>Move forward</li></ul>
	<p>Reward function:</p>
	<ul><li>A sparse reward of +1 when the object is reached</li>
	<li>A timeout penaly of -1 if the agent does not reach the object in 500 time-steps</li></ul>
	<p>Parallel: 4 independent instances of randomly generated environment configurations.</p>
	<h2 class="relative group"><a id="physical-interaction-with-movable-objects" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#physical-interaction-with-movable-objects"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
	<span>Physical interaction with movable objects
	</span></h2>

	<img class="!m-0 !border-0 !dark:border-0 !shadow-none !max-w-lg w-[600px]" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/simulate/simulate_sb3_move_boxes.png">
	<p>Example: <a href="https://github.com/huggingface/simulate/examples/rl/sb3_move_boxes.py" rel="nofollow"><code>sb3_move_boxes.py</code></a></p>
	<p>Objective: Push boxes in a room near to each other.</p>
	<p>Actors: An EgoCentric Camera Actor (LINK) equipped with a monocular camera</p>
	<p>Observation space: </p>
	<ul><li>An RGB camera of shape (3, 40, 40) (C, H, W) in uint8 format.</li></ul>
	<p>Action space:</p>
	<ul><li>A discrete action space with 3 possible actions</li>
	<li>Turn left 10 degrees</li>
	<li>Turn right 10 degrees</li>
	<li>Move forward</li></ul>
	<p>Reward function:</p>
	<ul><li>A reward for moving the red and yellow boxes close to eachother</li>
	<li>A reward for moving the green and white boxes close to eachother</li>
	<li>A timeout penaly of -1 if the agent does not reach the object in 100 time-steps</li></ul>
	<p>Parallel: 16 independent instances of the same environment configuration.</p>
	<h2 class="relative group"><a id="reward-functions-based-on-line-of-sight-observation-of-objects" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#reward-functions-based-on-line-of-sight-observation-of-objects"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
	<span>Reward functions based on line of sight observation of objects.
	</span></h2>

	<img class="!m-0 !border-0 !dark:border-0 !shadow-none !max-w-lg w-[600px]" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/simulate/simulate_sb3_see_reward.png">
	<p>Example: <a href="https://github.com/huggingface/simulate/examples/rl/sb3_visual_reward.py" rel="nofollow"><code>sb3_visual_reward.py</code></a></p>
	<p>Objective: Move the agent so the box is within the agents its field of view</p>
	<p>Actors: An EgoCentric Camera Actor (LINK) equipped with a monocular camera</p>
	<p>Observation space: </p>
	<ul><li>An RGB camera of shape (3, 40, 40) (C, H, W) in uint8 format.</li></ul>
	<p>Action space:</p>
	<ul><li>A discrete action space with 3 possible actions</li>
	<li>Turn left 10 degrees</li>
	<li>Turn right 10 degrees</li>
	<li>Move forward</li></ul>
	<p>Reward function:</p>
	<ul><li>A sparse reward for moving the box within a 60 degree fov cone in front of the agent.</li>
	<li>A timeout penaly of -1 if the agent does not reach the object in 100 time-steps</li></ul>
	<p>Parallel: 4 independent instances of the same environment configuration.</p>


	<script type="module" data-hydrate="1xwd4ja">
	import { start } from "/docs/simulate/v0.0.2/en/_app/start-hf-doc-builder.js";
	start({
	target: document.querySelector('[data-hydrate="1xwd4ja"]').parentNode,
	paths: {"base":"/docs/simulate/v0.0.2/en","assets":"/docs/simulate/v0.0.2/en"},
	session: {},
	route: false,
	spa: false,
	trailing_slash: "never",
	hydrate: {
	status: 200,
	error: null,
	nodes: [
	import("/docs/simulate/v0.0.2/en/_app/pages/__layout.svelte-hf-doc-builder.js"),
	import("/docs/simulate/v0.0.2/en/_app/pages/tutorials/rl_examples.mdx-hf-doc-builder.js")
	],
	params: {}
	}
	});
	</script>

Xet Storage Details

Size:: 15.2 kB
Xet hash:: c499a47d631ef2b6468d8552c35cb35b701d2cdb7d799b5bb25e07b31dc213f1

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.