Buckets:

hf-doc-build/doc-dev / openenv /pr_749 /en /tutorials /openenv-tutorial.html
download
raw
179 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;OpenEnv: Production RL Made Simple&quot;,&quot;local&quot;:&quot;openenv-production-rl-made-simple&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;From “Hello World” to RL Training in 5 Minutes ✨&quot;,&quot;local&quot;:&quot;from-hello-world-to-rl-training-in-5-minutes-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Why OpenEnv?&quot;,&quot;local&quot;:&quot;why-openenv&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;📋 What You’ll Learn&quot;,&quot;local&quot;:&quot;-what-youll-learn&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;📑 Table of Contents&quot;,&quot;local&quot;:&quot;-table-of-contents&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Foundation&quot;,&quot;local&quot;:&quot;foundation&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Architecture&quot;,&quot;local&quot;:&quot;architecture&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Hands-On Demo&quot;,&quot;local&quot;:&quot;hands-on-demo&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Advanced&quot;,&quot;local&quot;:&quot;advanced&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Wrap Up&quot;,&quot;local&quot;:&quot;wrap-up&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Part 1: RL in 60 Seconds ⏱️&quot;,&quot;local&quot;:&quot;part-1-rl-in-60-seconds-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Part 2: The Problem with Traditional RL 😤&quot;,&quot;local&quot;:&quot;part-2-the-problem-with-traditional-rl-&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;🤔 Why Can’t We Just Use OpenAI Gym?&quot;,&quot;local&quot;:&quot;-why-cant-we-just-use-openai-gym&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;💡 The OpenEnv Philosophy&quot;,&quot;local&quot;:&quot;-the-openenv-philosophy&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;The Architecture&quot;,&quot;local&quot;:&quot;the-architecture&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Part 4: The OpenEnv Pattern 🏗️&quot;,&quot;local&quot;:&quot;part-4-the-openenv-pattern-&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Every OpenEnv Environment Has 3 Components:&quot;,&quot;local&quot;:&quot;every-openenv-environment-has-3-components&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Part 5: Example Integration - OpenSpiel 🎮&quot;,&quot;local&quot;:&quot;part-5-example-integration---openspiel-&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;What is OpenSpiel?&quot;,&quot;local&quot;:&quot;what-is-openspiel&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;OpenEnv’s Integration&quot;,&quot;local&quot;:&quot;openenvs-integration&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Type-Safe Models&quot;,&quot;local&quot;:&quot;type-safe-models&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;How the Client Works&quot;,&quot;local&quot;:&quot;how-the-client-works&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Part 6: Using Real OpenSpiel 🎮&quot;,&quot;local&quot;:&quot;part-6-using-real-openspiel-&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Now let’s USE a production environment!&quot;,&quot;local&quot;:&quot;now-lets-use-a-production-environment&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;The Game: Catch 🔴🏓&quot;,&quot;local&quot;:&quot;the-game-catch-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Part 7: Four Policies 🤖&quot;,&quot;local&quot;:&quot;part-7-four-policies-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Part 8: Policy Competition! 🏆&quot;,&quot;local&quot;:&quot;part-8-policy-competition-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Part 9: Switching to Other Games 🎮&quot;,&quot;local&quot;:&quot;part-9-switching-to-other-games-&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;What We Just Used: Real OpenSpiel! 🎉&quot;,&quot;local&quot;:&quot;what-we-just-used-real-openspiel-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;🎮 6 Games Available - Same Interface!&quot;,&quot;local&quot;:&quot;-6-games-available---same-interface&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Try Another Game (Optional):&quot;,&quot;local&quot;:&quot;try-another-game-optional&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Part 10: Create Your Own Integration 🛠️&quot;,&quot;local&quot;:&quot;part-10-create-your-own-integration-&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;The 5-Step Pattern&quot;,&quot;local&quot;:&quot;the-5-step-pattern&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Step 1: Define Types ( models.py )&quot;,&quot;local&quot;:&quot;step-1-define-types--modelspy-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Step 2: Implement Environment ( server/environment.py )&quot;,&quot;local&quot;:&quot;step-2-implement-environment--serverenvironmentpy-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Step 3: Create Client ( client.py )&quot;,&quot;local&quot;:&quot;step-3-create-client--clientpy-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Step 4: Create Server ( server/app.py )&quot;,&quot;local&quot;:&quot;step-4-create-server--serverapppy-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Step 5: Dockerize ( server/Dockerfile )&quot;,&quot;local&quot;:&quot;step-5-dockerize--serverdockerfile-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;🎓 Examples to Study&quot;,&quot;local&quot;:&quot;-examples-to-study&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;🎓 Summary: Your Journey&quot;,&quot;local&quot;:&quot;-summary-your-journey&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;What You Learned&quot;,&quot;local&quot;:&quot;what-you-learned&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;📚 Concepts&quot;,&quot;local&quot;:&quot;-concepts&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;🛠️ Skills&quot;,&quot;local&quot;:&quot;-skills&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;OpenEnv vs Traditional RL&quot;,&quot;local&quot;:&quot;openenv-vs-traditional-rl&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;📚 Resources&quot;,&quot;local&quot;:&quot;-resources&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;🔗 Essential Links&quot;,&quot;local&quot;:&quot;-essential-links&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;📖 Documentation Deep Dives&quot;,&quot;local&quot;:&quot;-documentation-deep-dives&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;🎓 Community &amp; Support&quot;,&quot;local&quot;:&quot;-community--support&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;🌈 What’s Next?&quot;,&quot;local&quot;:&quot;-whats-next&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/openenv/pr_749/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/entry/start.85477f45.js">
<link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/chunks/scheduler.2b22cead.js">
<link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/chunks/singletons.63566282.js">
<link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/chunks/paths.dd876c7b.js">
<link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/entry/app.51835dc5.js">
<link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/chunks/preload-helper.0820fbc7.js">
<link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/chunks/index.1a0e8013.js">
<link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/nodes/0.167255c0.js">
<link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/nodes/62.e8898b85.js">
<link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/chunks/Heading.c0d3f116.js">
<link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.21bcf336.js">
<link rel="modulepreload" href="/docs/openenv/pr_749/en/_app/immutable/chunks/CodeBlock.c8d73295.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;OpenEnv: Production RL Made Simple&quot;,&quot;local&quot;:&quot;openenv-production-rl-made-simple&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;From “Hello World” to RL Training in 5 Minutes ✨&quot;,&quot;local&quot;:&quot;from-hello-world-to-rl-training-in-5-minutes-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Why OpenEnv?&quot;,&quot;local&quot;:&quot;why-openenv&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;📋 What You’ll Learn&quot;,&quot;local&quot;:&quot;-what-youll-learn&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;📑 Table of Contents&quot;,&quot;local&quot;:&quot;-table-of-contents&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Foundation&quot;,&quot;local&quot;:&quot;foundation&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Architecture&quot;,&quot;local&quot;:&quot;architecture&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Hands-On Demo&quot;,&quot;local&quot;:&quot;hands-on-demo&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Advanced&quot;,&quot;local&quot;:&quot;advanced&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Wrap Up&quot;,&quot;local&quot;:&quot;wrap-up&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Part 1: RL in 60 Seconds ⏱️&quot;,&quot;local&quot;:&quot;part-1-rl-in-60-seconds-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Part 2: The Problem with Traditional RL 😤&quot;,&quot;local&quot;:&quot;part-2-the-problem-with-traditional-rl-&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;🤔 Why Can’t We Just Use OpenAI Gym?&quot;,&quot;local&quot;:&quot;-why-cant-we-just-use-openai-gym&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;💡 The OpenEnv Philosophy&quot;,&quot;local&quot;:&quot;-the-openenv-philosophy&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;The Architecture&quot;,&quot;local&quot;:&quot;the-architecture&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Part 4: The OpenEnv Pattern 🏗️&quot;,&quot;local&quot;:&quot;part-4-the-openenv-pattern-&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Every OpenEnv Environment Has 3 Components:&quot;,&quot;local&quot;:&quot;every-openenv-environment-has-3-components&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Part 5: Example Integration - OpenSpiel 🎮&quot;,&quot;local&quot;:&quot;part-5-example-integration---openspiel-&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;What is OpenSpiel?&quot;,&quot;local&quot;:&quot;what-is-openspiel&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;OpenEnv’s Integration&quot;,&quot;local&quot;:&quot;openenvs-integration&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Type-Safe Models&quot;,&quot;local&quot;:&quot;type-safe-models&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;How the Client Works&quot;,&quot;local&quot;:&quot;how-the-client-works&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Part 6: Using Real OpenSpiel 🎮&quot;,&quot;local&quot;:&quot;part-6-using-real-openspiel-&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Now let’s USE a production environment!&quot;,&quot;local&quot;:&quot;now-lets-use-a-production-environment&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;The Game: Catch 🔴🏓&quot;,&quot;local&quot;:&quot;the-game-catch-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Part 7: Four Policies 🤖&quot;,&quot;local&quot;:&quot;part-7-four-policies-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Part 8: Policy Competition! 🏆&quot;,&quot;local&quot;:&quot;part-8-policy-competition-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Part 9: Switching to Other Games 🎮&quot;,&quot;local&quot;:&quot;part-9-switching-to-other-games-&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;What We Just Used: Real OpenSpiel! 🎉&quot;,&quot;local&quot;:&quot;what-we-just-used-real-openspiel-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;🎮 6 Games Available - Same Interface!&quot;,&quot;local&quot;:&quot;-6-games-available---same-interface&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Try Another Game (Optional):&quot;,&quot;local&quot;:&quot;try-another-game-optional&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Part 10: Create Your Own Integration 🛠️&quot;,&quot;local&quot;:&quot;part-10-create-your-own-integration-&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;The 5-Step Pattern&quot;,&quot;local&quot;:&quot;the-5-step-pattern&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Step 1: Define Types ( models.py )&quot;,&quot;local&quot;:&quot;step-1-define-types--modelspy-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Step 2: Implement Environment ( server/environment.py )&quot;,&quot;local&quot;:&quot;step-2-implement-environment--serverenvironmentpy-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Step 3: Create Client ( client.py )&quot;,&quot;local&quot;:&quot;step-3-create-client--clientpy-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Step 4: Create Server ( server/app.py )&quot;,&quot;local&quot;:&quot;step-4-create-server--serverapppy-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Step 5: Dockerize ( server/Dockerfile )&quot;,&quot;local&quot;:&quot;step-5-dockerize--serverdockerfile-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;🎓 Examples to Study&quot;,&quot;local&quot;:&quot;-examples-to-study&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;🎓 Summary: Your Journey&quot;,&quot;local&quot;:&quot;-summary-your-journey&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;What You Learned&quot;,&quot;local&quot;:&quot;what-you-learned&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;📚 Concepts&quot;,&quot;local&quot;:&quot;-concepts&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;🛠️ Skills&quot;,&quot;local&quot;:&quot;-skills&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;OpenEnv vs Traditional RL&quot;,&quot;local&quot;:&quot;openenv-vs-traditional-rl&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;📚 Resources&quot;,&quot;local&quot;:&quot;-resources&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;🔗 Essential Links&quot;,&quot;local&quot;:&quot;-essential-links&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;📖 Documentation Deep Dives&quot;,&quot;local&quot;:&quot;-documentation-deep-dives&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;🎓 Community &amp; Support&quot;,&quot;local&quot;:&quot;-community--support&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;🌈 What’s Next?&quot;,&quot;local&quot;:&quot;-whats-next&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="openenv-production-rl-made-simple" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#openenv-production-rl-made-simple"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>OpenEnv: Production RL Made Simple</span></h1> <div align="center"><img src="https://upload.wikimedia.org/wikipedia/commons/1/10/PyTorch_logo_icon.svg" width="200" alt="PyTorch"> <h2 class="relative group"><a id="from-hello-world-to-rl-training-in-5-minutes-" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#from-hello-world-to-rl-training-in-5-minutes-"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>From “Hello World” to RL Training in 5 Minutes ✨</span></h2> <p data-svelte-h="svelte-dqwr3"><strong>What if RL environments were as easy to use as REST APIs?</strong></p> <p data-svelte-h="svelte-1dgvh9j">That’s OpenEnv. Type-safe. Isolated. Production-ready. 🎯</p> <p data-svelte-h="svelte-10npbdt"><a href="https://colab.research.google.com/github/huggingface/OpenEnv/blob/main/examples/OpenEnv_Tutorial.ipynb" rel="nofollow"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a> <a href="https://github.com/huggingface/OpenEnv" rel="nofollow"><img src="https://img.shields.io/badge/GitHub-huggingface%2FOpenEnv-blue?logo=github" alt="GitHub"></a> <a href="https://opensource.org/licenses/BSD-3-Clause" rel="nofollow"><img src="https://img.shields.io/badge/License-BSD%203--Clause-green.svg" alt="License"></a> <a href="https://pytorch.org/" rel="nofollow"><img src="https://img.shields.io/badge/PyTorch-EE4C2C?logo=pytorch&logoColor=white" alt="PyTorch"></a></p> <p data-svelte-h="svelte-wo7gof">Author: Sanyam Bhutani</p></div> <h2 class="relative group"><a id="why-openenv" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#why-openenv"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Why OpenEnv?</span></h2> <p data-svelte-h="svelte-147i8a1">Let’s take a trip down memory lane:</p> <p data-svelte-h="svelte-1mep8fs">It’s 2016, RL is popular. You read some papers, it looks promising.</p> <p data-svelte-h="svelte-3h5zwk">But in real world: Cartpole is the best you can run on a gaming GPU.</p> <p data-svelte-h="svelte-1pwvial">What do you do beyond Cartpole?</p> <p data-svelte-h="svelte-u3ba3d">Fast-forward to 2025, GRPO is awesome and this time it’s not JUST in theory, it works well in practise and is really here!</p> <p data-svelte-h="svelte-13dejvb">The problem still remains, how do you take these RL algorithms and take them beyond Cartpole?</p> <p data-svelte-h="svelte-a6sebp">A huge part of RL is giving your algorithms environment access to learn.</p> <p data-svelte-h="svelte-1t9nmmv">We are excited to introduce an Environment Spec for adding Open Environments for RL Training. This will allow you to focus on your experiments and allow everyone to bring their environments.</p> <p data-svelte-h="svelte-1wh8n1a">Focus on experiments, use OpenEnvironments, and build agents that go beyond Cartpole on a single spec.</p> <hr> <h2 class="relative group"><a id="-what-youll-learn" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#-what-youll-learn"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>📋 What You’ll Learn</span></h2> <table data-svelte-h="svelte-1ujwz7q"><tr><td width="50%"><p><strong>🎯 Part 1-2: The Fundamentals</strong></p> <ul><li>⚡ RL in 60 seconds</li> <li>🤔 Why existing solutions fall short</li> <li>💡 The OpenEnv solution</li></ul></td> <td width="50%"><p><strong>🏗️ Part 3-5: The Architecture</strong></p> <ul><li>🔧 How OpenEnv works</li> <li>🔍 Exploring real code</li> <li>🎮 OpenSpiel integration example</li></ul></td></tr> <tr><td width="50%"><p><strong>🎮 Part 6-8: Hands-On Demo</strong></p> <ul><li>🔌 Use existing OpenSpiel environment</li> <li>🤖 Test 4 different policies</li> <li>👀 Watch learning happen live</li></ul></td> <td width="50%"><p><strong>🔧 Part 9-10: Going Further</strong></p> <ul><li>🎮 Switch to other OpenSpiel games</li> <li>✨ Build your own integration</li> <li>🌐 Deploy to production</li></ul></td></tr></table> <p data-svelte-h="svelte-1wlgfct">!!! tip “Pro Tip”
This notebook is designed to run top-to-bottom in Google Colab with zero setup!</p> <p data-svelte-h="svelte-xqa9wu">⏱️ <strong>Time</strong>: ~5 minutes | 📊 <strong>Difficulty</strong>: Beginner-friendly | 🎯 <strong>Outcome</strong>: Production-ready RL knowledge</p> <hr> <h2 class="relative group"><a id="-table-of-contents" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#-table-of-contents"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>📑 Table of Contents</span></h2> <h3 class="relative group"><a id="foundation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#foundation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Foundation</span></h3> <ul data-svelte-h="svelte-16zxjx5"><li><a href="#part-1-rl-in-60-seconds">Part 1: RL in 60 Seconds ⏱️</a></li> <li><a href="#part-2-the-problem-with-traditional-rl">Part 2: The Problem with Traditional RL 😤</a></li> <li><a href="#part-3-setup">Part 3: Setup 🛠️</a></li></ul> <h3 class="relative group"><a id="architecture" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#architecture"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Architecture</span></h3> <ul data-svelte-h="svelte-1phxpcb"><li><a href="#part-4-the-openenv-pattern">Part 4: The OpenEnv Pattern 🏗️</a></li> <li><a href="#part-5-example-integration---openspiel">Part 5: Example Integration - OpenSpiel 🎮</a></li></ul> <h3 class="relative group"><a id="hands-on-demo" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#hands-on-demo"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Hands-On Demo</span></h3> <ul data-svelte-h="svelte-412jsu"><li><a href="#part-6-using-real-openspiel">Part 6: Interactive Demo 🎮</a></li> <li><a href="#part-7-four-policies">Part 7: Four Policies 🤖</a></li> <li><a href="#part-8-policy-competition">Part 8: Policy Competition! 🏆</a></li></ul> <h3 class="relative group"><a id="advanced" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#advanced"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Advanced</span></h3> <ul data-svelte-h="svelte-whbwpr"><li><a href="#part-9-switching-to-other-games">Part 9: Using Real OpenSpiel 🎮</a></li> <li><a href="#part-10-create-your-own-integration">Part 10: Create Your Own Integration 🛠️</a></li></ul> <h3 class="relative group"><a id="wrap-up" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#wrap-up"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Wrap Up</span></h3> <ul data-svelte-h="svelte-1rdx1k6"><li><a href="#summary-your-journey">Summary: Your Journey 🎓</a></li> <li><a href="#resources">Resources 📚</a></li></ul> <hr> <p data-svelte-h="svelte-1ytaih1">(part-1-rl-in-60-seconds)=</p> <h2 class="relative group"><a id="part-1-rl-in-60-seconds-" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#part-1-rl-in-60-seconds-"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Part 1: RL in 60 Seconds ⏱️</span></h2> <p data-svelte-h="svelte-1sra4qo"><strong>Reinforcement Learning is simpler than you think.</strong></p> <p data-svelte-h="svelte-1hq0szu">It’s just a loop:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">while</span> <span class="hljs-keyword">not</span> done:
observation = environment.observe()
action = policy.choose(observation)
reward = environment.step(action)
policy.learn(reward)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-12623bd">That’s it. That’s RL.</p> <p data-svelte-h="svelte-se3du">Let’s see it in action:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> random
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;🎲 &quot;</span> + <span class="hljs-string">&quot;=&quot;</span>*<span class="hljs-number">58</span> + <span class="hljs-string">&quot; 🎲&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; Number Guessing Game - The Simplest RL Example&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;🎲 &quot;</span> + <span class="hljs-string">&quot;=&quot;</span>*<span class="hljs-number">58</span> + <span class="hljs-string">&quot; 🎲&quot;</span>)
<span class="hljs-comment"># Environment setup</span>
target = random.randint(<span class="hljs-number">1</span>, <span class="hljs-number">10</span>)
guesses_left = <span class="hljs-number">3</span>
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;\n🎯 I&#x27;m thinking of a number between 1 and 10...&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;💭 You have <span class="hljs-subst">{guesses_left}</span> guesses. Let&#x27;s see how random guessing works!\n&quot;</span>)
<span class="hljs-comment"># The RL Loop - Pure random policy (no learning!)</span>
<span class="hljs-keyword">while</span> guesses_left &gt; <span class="hljs-number">0</span>:
<span class="hljs-comment"># Policy: Random guessing (no learning yet!)</span>
guess = random.randint(<span class="hljs-number">1</span>, <span class="hljs-number">10</span>)
guesses_left -= <span class="hljs-number">1</span>
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;💭 Guess #<span class="hljs-subst">{<span class="hljs-number">3</span>-guesses_left}</span>: <span class="hljs-subst">{guess}</span>&quot;</span>, end=<span class="hljs-string">&quot;&quot;</span>)
<span class="hljs-comment"># Reward signal (but we&#x27;re not using it!)</span>
<span class="hljs-keyword">if</span> guess == target:
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;🎉 Correct! +10 points&quot;</span>)
<span class="hljs-keyword">break</span>
<span class="hljs-keyword">elif</span> <span class="hljs-built_in">abs</span>(guess - target) &lt;= <span class="hljs-number">2</span>:
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;🔥 Warm! (close)&quot;</span>)
<span class="hljs-keyword">else</span>:
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;❄️ Cold! (far)&quot;</span>)
<span class="hljs-keyword">else</span>:
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;\n💔 Out of guesses. The number was <span class="hljs-subst">{target}</span>.&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;\n&quot;</span> + <span class="hljs-string">&quot;=&quot;</span>*<span class="hljs-number">62</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;💡 This is RL: Observe → Act → Reward → Repeat&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; But this policy is terrible! It doesn&#x27;t learn from rewards.&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;=&quot;</span>*<span class="hljs-number">62</span> + <span class="hljs-string">&quot;\n&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1mvdyro"><strong>Output:</strong></p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=" "><!-- HTML_TAG_START -->🎲 ========================================================== 🎲
<span class="hljs-code"> Number Guessing Game - The Simplest RL Example</span>
🎲 ========================================================== 🎲
🎯 I&#x27;m thinking of a number between 1 and 10...
💭 You have 3 guesses. Let&#x27;s see how random guessing works!
💭 Guess #1: 2 → ❄️ Cold! (far)
💭 Guess #2: 10 → 🎉 Correct! +10 points
==============================================================
💡 This is RL: Observe → Act → Reward → Repeat
But this policy is terrible! It doesn&#x27;t learn from rewards.
==============================================================<!-- HTML_TAG_END --></pre></div> <hr> <p data-svelte-h="svelte-5oozyk">(part-2-the-problem-with-traditional-rl)=</p> <h2 class="relative group"><a id="part-2-the-problem-with-traditional-rl-" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#part-2-the-problem-with-traditional-rl-"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Part 2: The Problem with Traditional RL 😤</span></h2> <h3 class="relative group"><a id="-why-cant-we-just-use-openai-gym" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#-why-cant-we-just-use-openai-gym"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>🤔 Why Can’t We Just Use OpenAI Gym?</span></h3> <p data-svelte-h="svelte-i67p0s">Good question! Gym is great for research, but production needs more…</p> <table data-svelte-h="svelte-18a95on"><thead><tr><th>Challenge</th> <th>Traditional Approach</th> <th>OpenEnv Solution</th></tr></thead> <tbody><tr><td><strong>Type Safety</strong></td> <td><code>obs[0][3]</code> - what is this?</td> <td><code>obs.info_state</code> - IDE knows!</td></tr> <tr><td><strong>Isolation</strong></td> <td>❌ Same process (can crash your training)</td> <td>✅ Docker containers (fully isolated)</td></tr> <tr><td><strong>Deployment</strong></td> <td>❌ “Works on my machine” 🤷</td> <td>✅ Same container everywhere 🐳</td></tr> <tr><td><strong>Scaling</strong></td> <td>❌ Hard to distribute</td> <td>✅ Deploy to Kubernetes ☸️</td></tr> <tr><td><strong>Language</strong></td> <td>❌ Python only</td> <td>✅ Any language (HTTP API) 🌐</td></tr> <tr><td><strong>Debugging</strong></td> <td>❌ Cryptic numpy errors</td> <td>✅ Clear type errors 🐛</td></tr></tbody></table> <h3 class="relative group"><a id="-the-openenv-philosophy" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#-the-openenv-philosophy"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>💡 The OpenEnv Philosophy</span></h3> <p data-svelte-h="svelte-5fc4nf"><strong>“RL environments should be like microservices”</strong></p> <p data-svelte-h="svelte-q35fwk">Think of it like this: You don’t run your database in the same process as your web server, right? Same principle!</p> <ul data-svelte-h="svelte-r5r65e"><li>🔒 <strong>Isolated</strong>: Run in containers (security + stability)</li> <li>🌐 <strong>Standard</strong>: HTTP API, works everywhere</li> <li>📦 <strong>Versioned</strong>: Docker images (reproducibility!)</li> <li>🚀 <strong>Scalable</strong>: Deploy to cloud with one command</li> <li>🛡️ <strong>Type-safe</strong>: Catch bugs before they happen</li> <li>🔄 <strong>Portable</strong>: Works on Mac, Linux, Windows, Cloud</li></ul> <h3 class="relative group"><a id="the-architecture" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#the-architecture"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>The Architecture</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=" "><!-- HTML_TAG_START -->┌────────────────────────────────────────────────────────────┐
│ YOUR TRAINING CODE │
│ │
│ env = OpenSpielEnv(...) ← Import the client │
│ result = env.reset() ← Type-safe! │
│ result = env.step(action) ← Type-safe! │
│ │
└─────────────────┬──────────────────────────────────────────┘
│ HTTP/JSON (Language-Agnostic)
│ POST /reset, POST /step, GET /<span class="hljs-keyword">state</span>
┌─────────────────▼──────────────────────────────────────────┐
│ DOCKER CONTAINER │
│ │
│ ┌──────────────────────────────────────────────┐ │
│ │ FastAPI Server │ │
│ │ └─ Environment (reset, step, <span class="hljs-keyword">state</span>) │ │
│ │ └─ Your Game/Simulation Logic │ │
│ └──────────────────────────────────────────────┘ │
│ │
│ Isolated • Reproducible • Secure │
└────────────────────────────────────────────────────────────┘<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-ryzf6q">!!! info “Key Insight”
You never see HTTP details - just clean Python methods!</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START -->env.reset() <span class="hljs-comment"># Under the hood: HTTP POST to /reset</span>
env.step(...) <span class="hljs-comment"># Under the hood: HTTP POST to /step</span>
env.state() <span class="hljs-comment"># Under the hood: HTTP GET to /state</span>
```
The magic? OpenEnv handles <span class="hljs-built_in">all</span> the plumbing. You focus on RL! ✨
---
(part-<span class="hljs-number">3</span>-setup)=
<span class="hljs-comment">## Part 3: Setup 🛠️</span>
**Running <span class="hljs-keyword">in</span> Colab?** This cell will clone OpenEnv <span class="hljs-keyword">and</span> install dependencies automatically.
**Running locally?** Make sure yo<span class="hljs-string">u&#x27;re in the OpenEnv directory.
```ipython3
# Detect environment
try:
import google.colab
IN_COLAB = True
print(&quot;🌐 Running in Google Colab - Perfect!&quot;)
except ImportError:
IN_COLAB = False
print(&quot;💻 Running locally - Nice!&quot;)
if IN_COLAB:
print(&quot;\n📦 Cloning OpenEnv repository...&quot;)
!git clone https://github.com/huggingface/OpenEnv.git &gt; /dev/null 2&gt;&amp;1
%cd OpenEnv
print(&quot;📚 Installing dependencies (this takes ~10 seconds)...&quot;)
!pip install -q fastapi uvicorn requests
import sys
sys.path.insert(0, &#x27;</span>./src<span class="hljs-string">&#x27;)
print(&quot;\n✅ Setup complete! Everything is ready to go! 🎉&quot;)
else:
import sys
from pathlib import Path
sys.path.insert(0, str(Path.cwd().parent / &#x27;</span>src<span class="hljs-string">&#x27;))
print(&quot;✅ Using local OpenEnv installation&quot;)
print(&quot;\n🚀 Ready to explore OpenEnv and build amazing things!&quot;)
print(&quot;💡 Tip: Run cells top-to-bottom for the best experience.\n&quot;)</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1mvdyro"><strong>Output:</strong></p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=" "><!-- HTML_TAG_START -->💻 Running locally - Nice!
✅ Using local OpenEnv <span class="hljs-keyword">installation
</span>
🚀 Ready to explore OpenEnv <span class="hljs-keyword">and </span><span class="hljs-keyword">build </span>amazing things!
💡 Tip: Run cells top-to-<span class="hljs-keyword">bottom </span>for the <span class="hljs-keyword">best </span>experience.<!-- HTML_TAG_END --></pre></div> <hr> <p data-svelte-h="svelte-5vdevt">(part-4-the-openenv-pattern)=</p> <h2 class="relative group"><a id="part-4-the-openenv-pattern-" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#part-4-the-openenv-pattern-"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Part 4: The OpenEnv Pattern 🏗️</span></h2> <h3 class="relative group"><a id="every-openenv-environment-has-3-components" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#every-openenv-environment-has-3-components"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Every OpenEnv Environment Has 3 Components:</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=" "><!-- HTML_TAG_START --><span class="hljs-attribute">src</span>/envs/your_env/
├── 📝 models<span class="hljs-selector-class">.py</span> ← Type-safe contracts
│ (Action, Observation, State)
├── 📱 client<span class="hljs-selector-class">.py</span> ← What YOU import
│ (HTTPEnvClient implementation)
└── 🖥️ server/
├── environment<span class="hljs-selector-class">.py</span> ← Game/simulation logic
├── app<span class="hljs-selector-class">.py</span> ← FastAPI server
└── Dockerfile ← Container definition<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-13qkwtj">Let’s explore the actual OpenEnv code to see how this works:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-comment"># Import OpenEnv&#x27;s core abstractions</span>
<span class="hljs-keyword">from</span> core.env_server <span class="hljs-keyword">import</span> Environment, Action, Observation, State
<span class="hljs-keyword">from</span> core.http_env_client <span class="hljs-keyword">import</span> HTTPEnvClient
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;=&quot;</span>*<span class="hljs-number">70</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; 🧩 OPENENV CORE ABSTRACTIONS&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;=&quot;</span>*<span class="hljs-number">70</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;&quot;&quot;
🖥️ SERVER SIDE (runs in Docker):
class Environment(ABC):
&#x27;&#x27;&#x27;Base class for all environment implementations&#x27;&#x27;&#x27;
@abstractmethod
def reset(self) -&gt; Observation:
&#x27;&#x27;&#x27;Start new episode&#x27;&#x27;&#x27;
@abstractmethod
def step(self, action: Action) -&gt; Observation:
&#x27;&#x27;&#x27;Execute action, return observation&#x27;&#x27;&#x27;
@property
def state(self) -&gt; State:
&#x27;&#x27;&#x27;Get episode metadata&#x27;&#x27;&#x27;
📱 CLIENT SIDE (your training code):
class HTTPEnvClient(ABC):
&#x27;&#x27;&#x27;Base class for HTTP clients&#x27;&#x27;&#x27;
def reset(self) -&gt; StepResult:
# HTTP POST /reset
def step(self, action) -&gt; StepResult:
# HTTP POST /step
def state(self) -&gt; State:
# HTTP GET /state
&quot;&quot;&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;=&quot;</span>*<span class="hljs-number">70</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;\n✨ Same interface on both sides - communication via HTTP!&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;🎯 You focus on RL, OpenEnv handles the infrastructure.\n&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1mvdyro"><strong>Output:</strong></p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=" "><!-- HTML_TAG_START -->======================================================================
🧩 OPENENV CORE ABSTRACTIONS
======================================================================
🖥️ SERVER SIDE (runs <span class="hljs-keyword">in</span> Docker):
<span class="hljs-keyword">class</span> <span class="hljs-title class_">Environment</span>(<span class="hljs-title class_ inherited__">ABC</span>):
<span class="hljs-string">&#x27;&#x27;&#x27;Base class for all environment implementations&#x27;&#x27;&#x27;</span>
<span class="hljs-meta"> @abstractmethod</span>
<span class="hljs-keyword">def</span> <span class="hljs-title function_">reset</span>(<span class="hljs-params">self</span>) -&gt; Observation:
<span class="hljs-string">&#x27;&#x27;&#x27;Start new episode&#x27;&#x27;&#x27;</span>
<span class="hljs-meta"> @abstractmethod</span>
<span class="hljs-keyword">def</span> <span class="hljs-title function_">step</span>(<span class="hljs-params">self, action: Action</span>) -&gt; Observation:
<span class="hljs-string">&#x27;&#x27;&#x27;Execute action, return observation&#x27;&#x27;&#x27;</span>
<span class="hljs-meta"> @property</span>
<span class="hljs-keyword">def</span> <span class="hljs-title function_">state</span>(<span class="hljs-params">self</span>) -&gt; State:
<span class="hljs-string">&#x27;&#x27;&#x27;Get episode metadata&#x27;&#x27;&#x27;</span>
📱 CLIENT SIDE (your training code):
<span class="hljs-keyword">class</span> <span class="hljs-title class_">HTTPEnvClient</span>(<span class="hljs-title class_ inherited__">ABC</span>):
<span class="hljs-string">&#x27;&#x27;&#x27;Base class for HTTP clients&#x27;&#x27;&#x27;</span>
<span class="hljs-keyword">def</span> <span class="hljs-title function_">reset</span>(<span class="hljs-params">self</span>) -&gt; StepResult:
<span class="hljs-comment"># HTTP POST /reset</span>
<span class="hljs-keyword">def</span> <span class="hljs-title function_">step</span>(<span class="hljs-params">self, action</span>) -&gt; StepResult:
<span class="hljs-comment"># HTTP POST /step</span>
<span class="hljs-keyword">def</span> <span class="hljs-title function_">state</span>(<span class="hljs-params">self</span>) -&gt; State:
<span class="hljs-comment"># HTTP GET /state</span>
======================================================================
✨ Same interface on both sides - communication via HTTP!
🎯 You focus on RL, OpenEnv handles the infrastructure.<!-- HTML_TAG_END --></pre></div> <hr> <p data-svelte-h="svelte-xnymdh">(part-5-example-integration---openspiel)=</p> <h2 class="relative group"><a id="part-5-example-integration---openspiel-" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#part-5-example-integration---openspiel-"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Part 5: Example Integration - OpenSpiel 🎮</span></h2> <h3 class="relative group"><a id="what-is-openspiel" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#what-is-openspiel"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>What is OpenSpiel?</span></h3> <p data-svelte-h="svelte-1ulvbfl"><strong>OpenSpiel</strong> is a library from DeepMind with <strong>70+ game environments</strong> for RL research.</p> <h3 class="relative group"><a id="openenvs-integration" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#openenvs-integration"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>OpenEnv’s Integration</span></h3> <p data-svelte-h="svelte-11ajvx">We’ve wrapped <strong>6 OpenSpiel games</strong> following the OpenEnv pattern:</p> <table data-svelte-h="svelte-10odg88"><thead><tr><th><strong>🎯 Single-Player</strong></th> <th><strong>👥 Multi-Player</strong></th></tr></thead> <tbody><tr><td>1. <strong>Catch</strong> - Catch falling ball</td> <td>5. <strong>Tic-Tac-Toe</strong> - Classic 3×3</td></tr> <tr><td>2. <strong>Cliff Walking</strong> - Navigate grid</td> <td>6. <strong>Kuhn Poker</strong> - Imperfect info poker</td></tr> <tr><td>3. <strong>2048</strong> - Tile puzzle</td> <td></td></tr> <tr><td>4. <strong>Blackjack</strong> - Card game</td> <td></td></tr></tbody></table> <p data-svelte-h="svelte-1qjt3rd">This shows how OpenEnv can wrap <strong>any</strong> existing RL library!</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> envs.openspiel_env.client <span class="hljs-keyword">import</span> OpenSpielEnv
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;=&quot;</span>*<span class="hljs-number">70</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; 🔌 HOW OPENENV WRAPS OPENSPIEL&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;=&quot;</span>*<span class="hljs-number">70</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;&quot;&quot;
class OpenSpielEnv(HTTPEnvClient[OpenSpielAction, OpenSpielObservation]):
def _step_payload(self, action: OpenSpielAction) -&gt; dict:
&#x27;&#x27;&#x27;Convert typed action to JSON for HTTP&#x27;&#x27;&#x27;
return {
&quot;action_id&quot;: action.action_id,
&quot;game_name&quot;: action.game_name,
}
def _parse_result(self, payload: dict) -&gt; StepResult:
&#x27;&#x27;&#x27;Parse HTTP JSON response into typed observation&#x27;&#x27;&#x27;
return StepResult(
observation=OpenSpielObservation(...),
reward=payload[&#x27;reward&#x27;],
done=payload[&#x27;done&#x27;]
)
&quot;&quot;&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;&quot;</span> * <span class="hljs-number">70</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;\n✨ Usage (works for ALL OpenEnv environments):&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;&quot;&quot;
env = OpenSpielEnv(base_url=&quot;http://localhost:8000&quot;)
result = env.reset()
# Returns StepResult[OpenSpielObservation] - Type safe!
result = env.step(OpenSpielAction(action_id=2, game_name=&quot;catch&quot;))
# Type checker knows this is valid!
state = env.state()
# Returns OpenSpielState
&quot;&quot;&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;&quot;</span> * <span class="hljs-number">70</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;\n🎯 This pattern works for ANY environment you want to wrap!\n&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1mvdyro"><strong>Output:</strong></p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=" "><!-- HTML_TAG_START -->======================================================================
🔌 HOW OPENENV WRAPS OPENSPIEL
======================================================================
<span class="hljs-keyword">class</span> <span class="hljs-title class_">OpenSpielEnv</span>(HTTPEnvClient[OpenSpielAction, OpenSpielObservation]):
<span class="hljs-keyword">def</span> <span class="hljs-title function_">_step_payload</span>(<span class="hljs-params">self, action: OpenSpielAction</span>) -&gt; <span class="hljs-built_in">dict</span>:
<span class="hljs-string">&#x27;&#x27;&#x27;Convert typed action to JSON for HTTP&#x27;&#x27;&#x27;</span>
<span class="hljs-keyword">return</span> {
<span class="hljs-string">&quot;action_id&quot;</span>: action.action_id,
<span class="hljs-string">&quot;game_name&quot;</span>: action.game_name,
}
<span class="hljs-keyword">def</span> <span class="hljs-title function_">_parse_result</span>(<span class="hljs-params">self, payload: <span class="hljs-built_in">dict</span></span>) -&gt; StepResult:
<span class="hljs-string">&#x27;&#x27;&#x27;Parse HTTP JSON response into typed observation&#x27;&#x27;&#x27;</span>
<span class="hljs-keyword">return</span> StepResult(
observation=OpenSpielObservation(...),
reward=payload[<span class="hljs-string">&#x27;reward&#x27;</span>],
done=payload[<span class="hljs-string">&#x27;done&#x27;</span>]
)
──────────────────────────────────────────────────────────────────────
✨ Usage (works <span class="hljs-keyword">for</span> ALL OpenEnv environments):
env = OpenSpielEnv(base_url=<span class="hljs-string">&quot;http://localhost:8000&quot;</span>)
result = env.reset()
<span class="hljs-comment"># Returns StepResult[OpenSpielObservation] - Type safe!</span>
result = env.step(OpenSpielAction(action_id=<span class="hljs-number">2</span>, game_name=<span class="hljs-string">&quot;catch&quot;</span>))
<span class="hljs-comment"># Type checker knows this is valid!</span>
state = env.state()
<span class="hljs-comment"># Returns OpenSpielState</span>
──────────────────────────────────────────────────────────────────────
🎯 This pattern works <span class="hljs-keyword">for</span> ANY environment you want to wrap!<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="type-safe-models" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#type-safe-models"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Type-Safe Models</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-comment"># Import OpenSpiel integration models</span>
<span class="hljs-keyword">from</span> envs.openspiel_env.models <span class="hljs-keyword">import</span> (
OpenSpielAction,
OpenSpielObservation,
OpenSpielState
)
<span class="hljs-keyword">from</span> dataclasses <span class="hljs-keyword">import</span> fields
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;=&quot;</span>*<span class="hljs-number">70</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; 🎮 OPENSPIEL INTEGRATION - TYPE-SAFE MODELS&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;=&quot;</span>*<span class="hljs-number">70</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;\n📤 OpenSpielAction (what you send):&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; &quot;</span> + <span class="hljs-string">&quot;&quot;</span> * <span class="hljs-number">64</span>)
<span class="hljs-keyword">for</span> field <span class="hljs-keyword">in</span> fields(OpenSpielAction):
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;<span class="hljs-subst">{field.name:20s}</span> : <span class="hljs-subst">{field.<span class="hljs-built_in">type</span>}</span>&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;\n📥 OpenSpielObservation (what you receive):&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; &quot;</span> + <span class="hljs-string">&quot;&quot;</span> * <span class="hljs-number">64</span>)
<span class="hljs-keyword">for</span> field <span class="hljs-keyword">in</span> fields(OpenSpielObservation):
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;<span class="hljs-subst">{field.name:20s}</span> : <span class="hljs-subst">{field.<span class="hljs-built_in">type</span>}</span>&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;\n📊 OpenSpielState (episode metadata):&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; &quot;</span> + <span class="hljs-string">&quot;&quot;</span> * <span class="hljs-number">64</span>)
<span class="hljs-keyword">for</span> field <span class="hljs-keyword">in</span> fields(OpenSpielState):
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;<span class="hljs-subst">{field.name:20s}</span> : <span class="hljs-subst">{field.<span class="hljs-built_in">type</span>}</span>&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;\n&quot;</span> + <span class="hljs-string">&quot;=&quot;</span>*<span class="hljs-number">70</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;\n💡 Type safety means:&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; ✅ Your IDE autocompletes these fields&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; ✅ Typos are caught before running&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; ✅ Refactoring is safe&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; ✅ Self-documenting code\n&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1mvdyro"><strong>Output:</strong></p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=" "><!-- HTML_TAG_START -->======================================================================
🎮 OPENSPIEL INTEGRATION - TYPE-SAFE MODELS
======================================================================
📤 OpenSpielAction (what you send):
────────────────────────────────────────────────────────────────
• metadata : typing.<span class="hljs-type">Dict</span>[<span class="hljs-built_in">str</span>, typing.<span class="hljs-type">Any</span>]
• action_id : <span class="hljs-built_in">int</span>
• game_name : <span class="hljs-built_in">str</span>
• game_params : <span class="hljs-type">Dict</span>[<span class="hljs-built_in">str</span>, <span class="hljs-type">Any</span>]
📥 OpenSpielObservation (what you receive):
────────────────────────────────────────────────────────────────
• done : &lt;<span class="hljs-keyword">class</span> <span class="hljs-string">&#x27;bool&#x27;</span>&gt;
• reward : typing.<span class="hljs-type">Union</span>[<span class="hljs-built_in">bool</span>, <span class="hljs-built_in">int</span>, <span class="hljs-built_in">float</span>, NoneType]
• metadata : typing.<span class="hljs-type">Dict</span>[<span class="hljs-built_in">str</span>, typing.<span class="hljs-type">Any</span>]
• info_state : <span class="hljs-type">List</span>[<span class="hljs-built_in">float</span>]
• legal_actions : <span class="hljs-type">List</span>[<span class="hljs-built_in">int</span>]
• game_phase : <span class="hljs-built_in">str</span>
• current_player_id : <span class="hljs-built_in">int</span>
• opponent_last_action : <span class="hljs-type">Optional</span>[<span class="hljs-built_in">int</span>]
📊 OpenSpielState (episode metadata):
────────────────────────────────────────────────────────────────
• episode_id : typing.<span class="hljs-type">Optional</span>[<span class="hljs-built_in">str</span>]
• step_count : &lt;<span class="hljs-keyword">class</span> <span class="hljs-string">&#x27;int&#x27;</span>&gt;
• game_name : <span class="hljs-built_in">str</span>
• agent_player : <span class="hljs-built_in">int</span>
• opponent_policy : <span class="hljs-built_in">str</span>
• game_params : <span class="hljs-type">Dict</span>[<span class="hljs-built_in">str</span>, <span class="hljs-type">Any</span>]
• num_players : <span class="hljs-built_in">int</span>
======================================================================
💡 <span class="hljs-type">Type</span> safety means:
✅ Your IDE autocompletes these fields
✅ Typos are caught before running
✅ Refactoring <span class="hljs-keyword">is</span> safe
✅ Self-documenting code<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="how-the-client-works" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#how-the-client-works"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>How the Client Works</span></h3> <p data-svelte-h="svelte-13byqjp">The client <strong>inherits from HTTPEnvClient</strong> and implements 3 methods:</p> <ol data-svelte-h="svelte-1e9pr2e"><li><code>_step_payload()</code> - Convert action → JSON</li> <li><code>_parse_result()</code> - Parse JSON → typed observation</li> <li><code>_parse_state()</code> - Parse JSON → state</li></ol> <p data-svelte-h="svelte-t9q2et">That’s it! The base class handles all HTTP communication.</p> <hr> <p data-svelte-h="svelte-6yjuva">(part-6-using-real-openspiel)=</p> <h2 class="relative group"><a id="part-6-using-real-openspiel-" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#part-6-using-real-openspiel-"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Part 6: Using Real OpenSpiel 🎮</span></h2> <div style="text-align: center; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 30px; border-radius: 15px; margin: 30px 0;"> <h3 class="relative group"><a id="now-lets-use-a-production-environment" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#now-lets-use-a-production-environment"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Now let’s USE a production environment!</span></h3> <p data-svelte-h="svelte-cx2mlx">We’ll play <strong>Catch</strong> using OpenEnv’s <strong>OpenSpiel integration</strong> 🎯</p> <p data-svelte-h="svelte-mrbvl4">This is a REAL environment running in production at companies!</p> <p data-svelte-h="svelte-9gqed3"><strong>Get ready for:</strong></p> <ul data-svelte-h="svelte-1syb72m"><li>🔌 Using existing environments (not building)</li> <li>🤖 Testing policies against real games</li> <li>📊 Live gameplay visualization</li> <li>🎯 Production-ready patterns</li></ul></div> <h3 class="relative group"><a id="the-game-catch-" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#the-game-catch-"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>The Game: Catch 🔴🏓</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=" "><!-- HTML_TAG_START -->⬜ ⬜ 🔴 ⬜ ⬜
⬜ ⬜ ⬜ ⬜ ⬜
⬜ ⬜ ⬜ ⬜ ⬜ <span class="hljs-built_in">Ball</span>
⬜ ⬜ ⬜ ⬜ ⬜
⬜ ⬜ ⬜ ⬜ ⬜ <span class="hljs-variable">falls</span>
⬜ ⬜ ⬜ ⬜ ⬜
⬜ ⬜ ⬜ ⬜ ⬜ <span class="hljs-variable">down</span>
⬜ ⬜ ⬜ ⬜ ⬜
⬜ ⬜ ⬜ ⬜ ⬜
⬜ ⬜ 🏓 ⬜ ⬜
<span class="hljs-variable">Paddle</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1sb7zhg"><strong>Rules:</strong></p> <ul data-svelte-h="svelte-168m2ui"><li>10×5 grid</li> <li>Ball falls from random column</li> <li>Move paddle left/right to catch it</li></ul> <p data-svelte-h="svelte-1nta3bs"><strong>Actions:</strong></p> <ul data-svelte-h="svelte-104dyvo"><li><code>0</code> = Move LEFT ⬅️</li> <li><code>1</code> = STAY 🛑</li> <li><code>2</code> = Move RIGHT ➡️</li></ul> <p data-svelte-h="svelte-94aj12"><strong>Reward:</strong></p> <ul data-svelte-h="svelte-1pwfkd5"><li><code>+1</code> if caught 🎉</li> <li><code>0</code> if missed 😢</li></ul> <p data-svelte-h="svelte-voieho">!!! note “Why Catch?”</p> <ul data-svelte-h="svelte-cs3wc8"><li><p>Simple rules (easy to understand)</p></li> <li><p>Fast episodes (~5 steps)</p></li> <li><p>Clear success/failure</p></li> <li><p>Part of OpenSpiel’s 70+ games!</p> <p><strong>💡 The Big Idea:</strong>
Instead of building this from scratch, we’ll USE OpenEnv’s existing OpenSpiel integration. Same interface, but production-ready!</p></li></ul> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> envs.openspiel_env <span class="hljs-keyword">import</span> OpenSpielEnv
<span class="hljs-keyword">from</span> envs.openspiel_env.models <span class="hljs-keyword">import</span> (
OpenSpielAction,
OpenSpielObservation,
OpenSpielState
)
<span class="hljs-keyword">from</span> dataclasses <span class="hljs-keyword">import</span> fields
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;🎮 &quot;</span> + <span class="hljs-string">&quot;=&quot;</span>*<span class="hljs-number">64</span> + <span class="hljs-string">&quot; 🎮&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; ✅ Importing Real OpenSpiel Environment!&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;🎮 &quot;</span> + <span class="hljs-string">&quot;=&quot;</span>*<span class="hljs-number">64</span> + <span class="hljs-string">&quot; 🎮\n&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;📦 What we just imported:&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; • OpenSpielEnv - HTTP client for OpenSpiel games&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; • OpenSpielAction - Type-safe actions&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; • OpenSpielObservation - Type-safe observations&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; • OpenSpielState - Episode metadata\n&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;📋 OpenSpielObservation fields:&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; &quot;</span> + <span class="hljs-string">&quot;&quot;</span> * <span class="hljs-number">60</span>)
<span class="hljs-keyword">for</span> field <span class="hljs-keyword">in</span> fields(OpenSpielObservation):
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;<span class="hljs-subst">{field.name:25s}</span> : <span class="hljs-subst">{field.<span class="hljs-built_in">type</span>}</span>&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;\n&quot;</span> + <span class="hljs-string">&quot;=&quot;</span>*<span class="hljs-number">70</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;\n💡 This is REAL OpenEnv code - used in production!&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; • Wraps 6 OpenSpiel games (Catch, Tic-Tac-Toe, Poker, etc.)&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; • Type-safe actions and observations&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; • Works via HTTP (we&#x27;ll see that next!)\n&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1mvdyro"><strong>Output:</strong></p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=" "><!-- HTML_TAG_START -->🎮 ================================================================ 🎮
✅ Importing Real OpenSpiel Environment!
🎮 ================================================================ 🎮
📦 What we just imported:
• OpenSpielEnv - HTTP client <span class="hljs-keyword">for</span> OpenSpiel games
• OpenSpielAction - <span class="hljs-type">Type</span>-safe actions
• OpenSpielObservation - <span class="hljs-type">Type</span>-safe observations
• OpenSpielState - Episode metadata
📋 OpenSpielObservation fields:
────────────────────────────────────────────────────────────
• done : &lt;<span class="hljs-keyword">class</span> <span class="hljs-string">&#x27;bool&#x27;</span>&gt;
• reward : typing.<span class="hljs-type">Union</span>[<span class="hljs-built_in">bool</span>, <span class="hljs-built_in">int</span>, <span class="hljs-built_in">float</span>, NoneType]
• metadata : typing.<span class="hljs-type">Dict</span>[<span class="hljs-built_in">str</span>, typing.<span class="hljs-type">Any</span>]
• info_state : <span class="hljs-type">List</span>[<span class="hljs-built_in">float</span>]
• legal_actions : <span class="hljs-type">List</span>[<span class="hljs-built_in">int</span>]
• game_phase : <span class="hljs-built_in">str</span>
• current_player_id : <span class="hljs-built_in">int</span>
• opponent_last_action : <span class="hljs-type">Optional</span>[<span class="hljs-built_in">int</span>]
======================================================================
💡 This <span class="hljs-keyword">is</span> REAL OpenEnv code - used <span class="hljs-keyword">in</span> production!
• Wraps <span class="hljs-number">6</span> OpenSpiel games (Catch, Tic-Tac-Toe, Poker, etc.)
<span class="hljs-type">Type</span>-safe actions <span class="hljs-keyword">and</span> observations
• Works via HTTP (we<span class="hljs-string">&#x27;ll see that next!)</span><!-- HTML_TAG_END --></pre></div> <hr> <p data-svelte-h="svelte-f80av3">(part-7-four-policies)=</p> <h2 class="relative group"><a id="part-7-four-policies-" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#part-7-four-policies-"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Part 7: Four Policies 🤖</span></h2> <p data-svelte-h="svelte-13ty1hx">Let’s test 4 different AI strategies:</p> <table data-svelte-h="svelte-thn5hf"><thead><tr><th>Policy</th> <th>Strategy</th> <th>Expected Performance</th></tr></thead> <tbody><tr><td><strong>🎲 Random</strong></td> <td>Pick random action every step</td> <td>~20% (pure luck)</td></tr> <tr><td><strong>🛑 Always Stay</strong></td> <td>Never move, hope ball lands in center</td> <td>~20% (terrible!)</td></tr> <tr><td><strong>🧠 Smart</strong></td> <td>Move paddle toward ball</td> <td>100% (optimal!)</td></tr> <tr><td><strong>📈 Learning</strong></td> <td>Start random, learn smart strategy</td> <td>~85% (improves over time)</td></tr></tbody></table> <p data-svelte-h="svelte-3ez0lz"><strong>💡 These policies work with ANY OpenSpiel game!</strong></p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> random
<span class="hljs-comment"># ============================================================================</span>
<span class="hljs-comment"># POLICIES - Different AI strategies (adapted for OpenSpiel)</span>
<span class="hljs-comment"># ============================================================================</span>
<span class="hljs-keyword">class</span> <span class="hljs-title class_">RandomPolicy</span>:
<span class="hljs-string">&quot;&quot;&quot;Baseline: Pure random guessing.&quot;&quot;&quot;</span>
name = <span class="hljs-string">&quot;🎲 Random Guesser&quot;</span>
<span class="hljs-keyword">def</span> <span class="hljs-title function_">select_action</span>(<span class="hljs-params">self, obs: OpenSpielObservation</span>) -&gt; <span class="hljs-built_in">int</span>:
<span class="hljs-keyword">return</span> random.choice(obs.legal_actions)
<span class="hljs-keyword">class</span> <span class="hljs-title class_">AlwaysStayPolicy</span>:
<span class="hljs-string">&quot;&quot;&quot;Bad strategy: Never moves.&quot;&quot;&quot;</span>
name = <span class="hljs-string">&quot;🛑 Always Stay&quot;</span>
<span class="hljs-keyword">def</span> <span class="hljs-title function_">select_action</span>(<span class="hljs-params">self, obs: OpenSpielObservation</span>) -&gt; <span class="hljs-built_in">int</span>:
<span class="hljs-keyword">return</span> <span class="hljs-number">1</span> <span class="hljs-comment"># STAY</span>
<span class="hljs-keyword">class</span> <span class="hljs-title class_">SmartPolicy</span>:
<span class="hljs-string">&quot;&quot;&quot;Optimal: Move paddle toward ball.&quot;&quot;&quot;</span>
name = <span class="hljs-string">&quot;🧠 Smart Heuristic&quot;</span>
<span class="hljs-keyword">def</span> <span class="hljs-title function_">select_action</span>(<span class="hljs-params">self, obs: OpenSpielObservation</span>) -&gt; <span class="hljs-built_in">int</span>:
<span class="hljs-comment"># Parse OpenSpiel observation</span>
<span class="hljs-comment"># For Catch: info_state is a flattened 10x5 grid</span>
<span class="hljs-comment"># Ball position and paddle position encoded in the vector</span>
info_state = obs.info_state
<span class="hljs-comment"># Find ball and paddle positions from info_state</span>
<span class="hljs-comment"># Catch uses a 10x5 grid, so 50 values</span>
grid_size = <span class="hljs-number">5</span>
<span class="hljs-comment"># Find positions (ball = 1.0 in the flattened grid, paddle = 1.0 in the last row of the flattened grid)</span>
ball_col = <span class="hljs-literal">None</span>
paddle_col = <span class="hljs-literal">None</span>
<span class="hljs-keyword">for</span> idx, val <span class="hljs-keyword">in</span> <span class="hljs-built_in">enumerate</span>(info_state):
<span class="hljs-keyword">if</span> <span class="hljs-built_in">abs</span>(val - <span class="hljs-number">1.0</span>) &lt; <span class="hljs-number">0.01</span>: <span class="hljs-comment"># Ball</span>
ball_col = idx % grid_size
<span class="hljs-keyword">break</span>
last_row = info_state[-grid_size:]
paddle_col = last_row.index(<span class="hljs-number">1.0</span>) <span class="hljs-comment"># Paddle</span>
<span class="hljs-keyword">if</span> ball_col <span class="hljs-keyword">is</span> <span class="hljs-keyword">not</span> <span class="hljs-literal">None</span> <span class="hljs-keyword">and</span> paddle_col <span class="hljs-keyword">is</span> <span class="hljs-keyword">not</span> <span class="hljs-literal">None</span>:
<span class="hljs-keyword">if</span> paddle_col &lt; ball_col:
<span class="hljs-keyword">return</span> <span class="hljs-number">2</span> <span class="hljs-comment"># Move RIGHT</span>
<span class="hljs-keyword">elif</span> paddle_col &gt; ball_col:
<span class="hljs-keyword">return</span> <span class="hljs-number">0</span> <span class="hljs-comment"># Move LEFT</span>
<span class="hljs-keyword">return</span> <span class="hljs-number">1</span> <span class="hljs-comment"># STAY (fallback)</span>
<span class="hljs-keyword">class</span> <span class="hljs-title class_">LearningPolicy</span>:
<span class="hljs-string">&quot;&quot;&quot;Simulated RL: Epsilon-greedy exploration.&quot;&quot;&quot;</span>
name = <span class="hljs-string">&quot;📈 Learning Agent&quot;</span>
<span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self</span>):
self.steps = <span class="hljs-number">0</span>
self.smart_policy = SmartPolicy()
<span class="hljs-keyword">def</span> <span class="hljs-title function_">select_action</span>(<span class="hljs-params">self, obs: OpenSpielObservation</span>) -&gt; <span class="hljs-built_in">int</span>:
self.steps += <span class="hljs-number">1</span>
<span class="hljs-comment"># Decay exploration rate over time</span>
epsilon = <span class="hljs-built_in">max</span>(<span class="hljs-number">0.1</span>, <span class="hljs-number">1.0</span> - (self.steps / <span class="hljs-number">100</span>))
<span class="hljs-keyword">if</span> random.random() &lt; epsilon:
<span class="hljs-comment"># Explore: random action</span>
<span class="hljs-keyword">return</span> random.choice(obs.legal_actions)
<span class="hljs-keyword">else</span>:
<span class="hljs-comment"># Exploit: use smart strategy</span>
<span class="hljs-keyword">return</span> self.smart_policy.select_action(obs)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;🤖 &quot;</span> + <span class="hljs-string">&quot;=&quot;</span>*<span class="hljs-number">64</span> + <span class="hljs-string">&quot; 🤖&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; ✅ 4 Policies Created (Adapted for OpenSpiel)!&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;🤖 &quot;</span> + <span class="hljs-string">&quot;=&quot;</span>*<span class="hljs-number">64</span> + <span class="hljs-string">&quot; 🤖\n&quot;</span>)
policies = [RandomPolicy(), AlwaysStayPolicy(), SmartPolicy(), LearningPolicy()]
<span class="hljs-keyword">for</span> i, policy <span class="hljs-keyword">in</span> <span class="hljs-built_in">enumerate</span>(policies, <span class="hljs-number">1</span>):
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot; <span class="hljs-subst">{i}</span>. <span class="hljs-subst">{policy.name}</span>&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;\n💡 These policies work with OpenSpielObservation!&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; • Read info_state (flattened grid)&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; • Use legal_actions&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; • Work with ANY OpenSpiel game that exposes these!\n&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1mvdyro"><strong>Output:</strong></p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=" "><!-- HTML_TAG_START -->🤖 ================================================================ 🤖
<span class="hljs-number">4</span> Policies Created (Adapted <span class="hljs-keyword">for</span> OpenSpiel)!
🤖 ================================================================ 🤖
<span class="hljs-number">1.</span> 🎲 Random Guesser
<span class="hljs-number">2.</span> 🛑 <span class="hljs-keyword">Always</span> Stay
<span class="hljs-number">3.</span> 🧠 Smart Heuristic
<span class="hljs-number">4.</span> 📈 Learning Agent
💡 These policies <span class="hljs-keyword">work</span> <span class="hljs-keyword">with</span> OpenSpielObservation!
<span class="hljs-keyword">Read</span> info_state (flattened grid)
• Use legal_actions
<span class="hljs-keyword">Work</span> <span class="hljs-keyword">with</span> <span class="hljs-keyword">ANY</span> OpenSpiel game that exposes these!<!-- HTML_TAG_END --></pre></div> <hr> <p data-svelte-h="svelte-1qpu4mr">(part-8-policy-competition)=</p> <h2 class="relative group"><a id="part-8-policy-competition-" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#part-8-policy-competition-"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Part 8: Policy Competition! 🏆</span></h2> <p data-svelte-h="svelte-kempwn">Let’s run <strong>50 episodes</strong> for each policy against <strong>REAL OpenSpiel</strong> and see who wins!</p> <p data-svelte-h="svelte-m5903s">This is production code - every action is an HTTP call to the OpenSpiel server!</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">evaluate_policies</span>(<span class="hljs-params">env, num_episodes=<span class="hljs-number">50</span></span>):
<span class="hljs-string">&quot;&quot;&quot;Compare all policies over many episodes using real OpenSpiel.&quot;&quot;&quot;</span>
policies = [
RandomPolicy(),
AlwaysStayPolicy(),
SmartPolicy(),
LearningPolicy(),
]
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;\n🏆 &quot;</span> + <span class="hljs-string">&quot;=&quot;</span>*<span class="hljs-number">66</span> + <span class="hljs-string">&quot; 🏆&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot; POLICY SHOWDOWN - <span class="hljs-subst">{num_episodes}</span> Episodes Each&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot; Playing against REAL OpenSpiel Catch!&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;🏆 &quot;</span> + <span class="hljs-string">&quot;=&quot;</span>*<span class="hljs-number">66</span> + <span class="hljs-string">&quot; 🏆\n&quot;</span>)
results = []
<span class="hljs-keyword">for</span> policy <span class="hljs-keyword">in</span> policies:
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;⚡ Testing <span class="hljs-subst">{policy.name}</span>...&quot;</span>, end=<span class="hljs-string">&quot; &quot;</span>)
successes = <span class="hljs-built_in">sum</span>(run_episode(env, policy, visualize=<span class="hljs-literal">False</span>)
<span class="hljs-keyword">for</span> _ <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(num_episodes))
success_rate = (successes / num_episodes) * <span class="hljs-number">100</span>
results.append((policy.name, success_rate, successes))
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;✓ Done!&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;\n&quot;</span> + <span class="hljs-string">&quot;=&quot;</span>*<span class="hljs-number">70</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; 📊 FINAL RESULTS&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;=&quot;</span>*<span class="hljs-number">70</span> + <span class="hljs-string">&quot;\n&quot;</span>)
<span class="hljs-comment"># Sort by success rate (descending)</span>
results.sort(key=<span class="hljs-keyword">lambda</span> x: x[<span class="hljs-number">1</span>], reverse=<span class="hljs-literal">True</span>)
<span class="hljs-comment"># Award medals to top 3</span>
medals = [<span class="hljs-string">&quot;🥇&quot;</span>, <span class="hljs-string">&quot;🥈&quot;</span>, <span class="hljs-string">&quot;🥉&quot;</span>, <span class="hljs-string">&quot; &quot;</span>]
<span class="hljs-keyword">for</span> i, (name, rate, successes) <span class="hljs-keyword">in</span> <span class="hljs-built_in">enumerate</span>(results):
medal = medals[i]
bar = <span class="hljs-string">&quot;&quot;</span> * <span class="hljs-built_in">int</span>(rate / <span class="hljs-number">2</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;<span class="hljs-subst">{medal}</span> <span class="hljs-subst">{name:25s}</span> [<span class="hljs-subst">{bar:&lt;<span class="hljs-number">50</span>}</span>] <span class="hljs-subst">{rate:<span class="hljs-number">5.1</span>f}</span>% (<span class="hljs-subst">{successes}</span>/<span class="hljs-subst">{num_episodes}</span>)&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;\n&quot;</span> + <span class="hljs-string">&quot;=&quot;</span>*<span class="hljs-number">70</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;\n✨ Key Insights:&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; • Random (~20%): Baseline - pure luck 🎲&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; • Always Stay (~20%): Bad strategy - stays center 🛑&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; • Smart (100%): Optimal - perfect play! 🧠&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; • Learning (~85%): Improves over time 📈&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;\n🎓 This is Reinforcement Learning + OpenEnv in action:&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; 1. We USED existing OpenSpiel environment (didn&#x27;t build it)&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; 2. Type-safe communication over HTTP&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; 3. Same code works for ANY OpenSpiel game&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot; 4. Production-ready architecture\n&quot;</span>)
<span class="hljs-comment"># Run the epic competition!</span>
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;🎮 Starting the showdown against REAL OpenSpiel...\n&quot;</span>)
evaluate_policies(client, num_episodes=<span class="hljs-number">50</span>)<!-- HTML_TAG_END --></pre></div> <hr> <p data-svelte-h="svelte-1o5fq89">(part-9-switching-to-other-games)=</p> <h2 class="relative group"><a id="part-9-switching-to-other-games-" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#part-9-switching-to-other-games-"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Part 9: Switching to Other Games 🎮</span></h2> <h3 class="relative group"><a id="what-we-just-used-real-openspiel-" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#what-we-just-used-real-openspiel-"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>What We Just Used: Real OpenSpiel! 🎉</span></h3> <p data-svelte-h="svelte-1ov8p0r">In Parts 6-8, we <strong>USED</strong> the existing OpenSpiel Catch environment:</p> <table data-svelte-h="svelte-rbd6y9"><thead><tr><th>What We Did</th> <th>How It Works</th></tr></thead> <tbody><tr><td><strong>Imported</strong></td> <td>OpenSpielEnv client (pre-built)</td></tr> <tr><td><strong>Started</strong></td> <td>OpenSpiel server via uvicorn</td></tr> <tr><td><strong>Connected</strong></td> <td>HTTP client to server</td></tr> <tr><td><strong>Played</strong></td> <td>Real OpenSpiel Catch game</td></tr></tbody></table> <p data-svelte-h="svelte-1gas7wo"><strong>🎯 This is production code!</strong> Every action was an HTTP call to a real OpenSpiel environment.</p> <h3 class="relative group"><a id="-6-games-available---same-interface" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#-6-games-available---same-interface"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>🎮 6 Games Available - Same Interface!</span></h3> <p data-svelte-h="svelte-gvky0b">The beauty of OpenEnv? <strong>Same code, different games!</strong></p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-comment"># We just used Catch</span>
env = OpenSpielEnv(base_url=<span class="hljs-string">&quot;http://localhost:8000&quot;</span>)
<span class="hljs-comment"># game_name=&quot;catch&quot; was set via environment variable</span>
<span class="hljs-comment"># Want Tic-Tac-Toe instead? Just change the game!</span>
<span class="hljs-comment"># Start server with: OPENSPIEL_GAME=tic_tac_toe uvicorn ...</span>
<span class="hljs-comment"># Same client code works!</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1bf95mj"><strong>🎮 All 6 Games:</strong></p> <ol data-svelte-h="svelte-pz67ff"><li><strong><code>catch</code></strong> - What we just used!</li> <li><strong><code>tic_tac_toe</code></strong> - Classic 3×3</li> <li><strong><code>kuhn_poker</code></strong> - Imperfect information poker</li> <li><strong><code>cliff_walking</code></strong> - Grid navigation</li> <li><strong><code>2048</code></strong> - Tile puzzle</li> <li><strong><code>blackjack</code></strong> - Card game</li></ol> <p data-svelte-h="svelte-g15ej3"><strong>All use the exact same OpenSpielEnv client!</strong></p> <h3 class="relative group"><a id="try-another-game-optional" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#try-another-game-optional"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Try Another Game (Optional):</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-comment"># Stop the current server (kill the server_process)</span>
<span class="hljs-comment"># Then start a new game:</span>
server_process = subprocess.Popen(
[sys.executable, <span class="hljs-string">&quot;-m&quot;</span>, <span class="hljs-string">&quot;uvicorn&quot;</span>,
<span class="hljs-string">&quot;envs.openspiel_env.server.app:app&quot;</span>,
<span class="hljs-string">&quot;--host&quot;</span>, <span class="hljs-string">&quot;0.0.0.0&quot;</span>,
<span class="hljs-string">&quot;--port&quot;</span>, <span class="hljs-string">&quot;8000&quot;</span>],
env={**os.environ,
<span class="hljs-string">&quot;PYTHONPATH&quot;</span>: <span class="hljs-string">f&quot;<span class="hljs-subst">{work_dir}</span>/src&quot;</span>,
<span class="hljs-string">&quot;OPENSPIEL_GAME&quot;</span>: <span class="hljs-string">&quot;tic_tac_toe&quot;</span>, <span class="hljs-comment"># Changed!</span>
<span class="hljs-string">&quot;OPENSPIEL_AGENT_PLAYER&quot;</span>: <span class="hljs-string">&quot;0&quot;</span>,
<span class="hljs-string">&quot;OPENSPIEL_OPPONENT_POLICY&quot;</span>: <span class="hljs-string">&quot;random&quot;</span>},
<span class="hljs-comment"># ... rest of config</span>
)
<span class="hljs-comment"># Same client works!</span>
client = OpenSpielEnv(base_url=<span class="hljs-string">&quot;http://localhost:8000&quot;</span>)
result = client.reset() <span class="hljs-comment"># Now playing Tic-Tac-Toe!</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1hfxpdg"><strong>💡 Key Insight</strong>: You don’t rebuild anything - you just USE different games with the same client!</p> <hr> <p data-svelte-h="svelte-1lky4i8">(part-10-create-your-own-integration)=</p> <h2 class="relative group"><a id="part-10-create-your-own-integration-" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#part-10-create-your-own-integration-"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Part 10: Create Your Own Integration 🛠️</span></h2> <h3 class="relative group"><a id="the-5-step-pattern" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#the-5-step-pattern"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>The 5-Step Pattern</span></h3> <p data-svelte-h="svelte-qjq0u2">Want to wrap your own environment in OpenEnv? Here’s how:</p> <h3 class="relative group"><a id="step-1-define-types--modelspy-" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#step-1-define-types--modelspy-"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Step 1: Define Types ( models.py )</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> dataclasses <span class="hljs-keyword">import</span> dataclass
<span class="hljs-keyword">from</span> core.env_server <span class="hljs-keyword">import</span> Action, Observation, State
<span class="hljs-meta">@dataclass</span>
<span class="hljs-keyword">class</span> <span class="hljs-title class_">YourAction</span>(<span class="hljs-title class_ inherited__">Action</span>):
action_value: <span class="hljs-built_in">int</span>
<span class="hljs-comment"># Add your action fields</span>
<span class="hljs-meta">@dataclass</span>
<span class="hljs-keyword">class</span> <span class="hljs-title class_">YourObservation</span>(<span class="hljs-title class_ inherited__">Observation</span>):
state_data: <span class="hljs-type">List</span>[<span class="hljs-built_in">float</span>]
done: <span class="hljs-built_in">bool</span>
reward: <span class="hljs-built_in">float</span>
<span class="hljs-comment"># Add your observation fields</span>
<span class="hljs-meta">@dataclass</span>
<span class="hljs-keyword">class</span> <span class="hljs-title class_">YourState</span>(<span class="hljs-title class_ inherited__">State</span>):
episode_id: <span class="hljs-built_in">str</span>
step_count: <span class="hljs-built_in">int</span>
<span class="hljs-comment"># Add your state fields</span><!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="step-2-implement-environment--serverenvironmentpy-" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#step-2-implement-environment--serverenvironmentpy-"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Step 2: Implement Environment ( server/environment.py )</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> core.env_server <span class="hljs-keyword">import</span> Environment
<span class="hljs-keyword">class</span> <span class="hljs-title class_">YourEnvironment</span>(<span class="hljs-title class_ inherited__">Environment</span>):
<span class="hljs-keyword">def</span> <span class="hljs-title function_">reset</span>(<span class="hljs-params">self</span>) -&gt; Observation:
<span class="hljs-comment"># Initialize your game/simulation</span>
<span class="hljs-keyword">return</span> YourObservation(...)
<span class="hljs-keyword">def</span> <span class="hljs-title function_">step</span>(<span class="hljs-params">self, action: Action</span>) -&gt; Observation:
<span class="hljs-comment"># Execute action, update state</span>
<span class="hljs-keyword">return</span> YourObservation(...)
<span class="hljs-meta"> @property</span>
<span class="hljs-keyword">def</span> <span class="hljs-title function_">state</span>(<span class="hljs-params">self</span>) -&gt; State:
<span class="hljs-keyword">return</span> self._state<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="step-3-create-client--clientpy-" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#step-3-create-client--clientpy-"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Step 3: Create Client ( client.py )</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> core.http_env_client <span class="hljs-keyword">import</span> HTTPEnvClient
<span class="hljs-keyword">from</span> core.types <span class="hljs-keyword">import</span> StepResult
<span class="hljs-keyword">class</span> <span class="hljs-title class_">YourEnv</span>(HTTPEnvClient[YourAction, YourObservation]):
<span class="hljs-keyword">def</span> <span class="hljs-title function_">_step_payload</span>(<span class="hljs-params">self, action: YourAction</span>) -&gt; <span class="hljs-built_in">dict</span>:
<span class="hljs-string">&quot;&quot;&quot;Convert action to JSON&quot;&quot;&quot;</span>
<span class="hljs-keyword">return</span> {<span class="hljs-string">&quot;action_value&quot;</span>: action.action_value}
<span class="hljs-keyword">def</span> <span class="hljs-title function_">_parse_result</span>(<span class="hljs-params">self, payload: <span class="hljs-built_in">dict</span></span>) -&gt; StepResult:
<span class="hljs-string">&quot;&quot;&quot;Parse JSON to observation&quot;&quot;&quot;</span>
<span class="hljs-keyword">return</span> StepResult(
observation=YourObservation(...),
reward=payload[<span class="hljs-string">&#x27;reward&#x27;</span>],
done=payload[<span class="hljs-string">&#x27;done&#x27;</span>]
)
<span class="hljs-keyword">def</span> <span class="hljs-title function_">_parse_state</span>(<span class="hljs-params">self, payload: <span class="hljs-built_in">dict</span></span>) -&gt; YourState:
<span class="hljs-keyword">return</span> YourState(...)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="step-4-create-server--serverapppy-" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#step-4-create-server--serverapppy-"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Step 4: Create Server ( server/app.py )</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> core.env_server <span class="hljs-keyword">import</span> create_fastapi_app
<span class="hljs-keyword">from</span> .your_environment <span class="hljs-keyword">import</span> YourEnvironment
env = YourEnvironment()
app = create_fastapi_app(env)
<span class="hljs-comment"># That&#x27;s it! OpenEnv creates all endpoints for you.</span><!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="step-5-dockerize--serverdockerfile-" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#step-5-dockerize--serverdockerfile-"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Step 5: Dockerize ( server/Dockerfile )</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-dockerfile "><!-- HTML_TAG_START --><span class="hljs-keyword">FROM</span> python:<span class="hljs-number">3.11</span>-slim
<span class="hljs-keyword">WORKDIR</span><span class="language-bash"> /app</span>
<span class="hljs-keyword">COPY</span><span class="language-bash"> requirements.txt .</span>
<span class="hljs-keyword">RUN</span><span class="language-bash"> pip install --no-cache-dir -r requirements.txt</span>
<span class="hljs-keyword">COPY</span><span class="language-bash"> . .</span>
<span class="hljs-keyword">CMD</span><span class="language-bash"> [<span class="hljs-string">&quot;uvicorn&quot;</span>, <span class="hljs-string">&quot;app:app&quot;</span>, <span class="hljs-string">&quot;--host&quot;</span>, <span class="hljs-string">&quot;0.0.0.0&quot;</span>, <span class="hljs-string">&quot;--port&quot;</span>, <span class="hljs-string">&quot;8000&quot;</span>]</span><!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="-examples-to-study" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#-examples-to-study"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>🎓 Examples to Study</span></h3> <p data-svelte-h="svelte-1o1vvxr">OpenEnv includes 3 complete examples:</p> <ol data-svelte-h="svelte-vr8qzd"><li><p><strong><code>src/envs/echo_env/</code></strong></p> <ul><li>Simplest possible environment</li> <li>Great for testing and learning</li></ul></li> <li><p><strong><code>src/envs/openspiel_env/</code></strong></p> <ul><li>Wraps external library (OpenSpiel)</li> <li>Shows integration pattern</li> <li>6 games in one integration</li></ul></li> <li><p><strong><code>src/envs/coding_env/</code></strong></p> <ul><li>Python code execution environment</li> <li>Shows complex use case</li> <li>Security considerations</li></ul></li></ol> <p data-svelte-h="svelte-xpj7m9"><strong>💡 Study these to understand the patterns!</strong></p> <hr> <p data-svelte-h="svelte-1o8m9nz">(summary-your-journey)=</p> <h2 class="relative group"><a id="-summary-your-journey" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#-summary-your-journey"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>🎓 Summary: Your Journey</span></h2> <h3 class="relative group"><a id="what-you-learned" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#what-you-learned"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>What You Learned</span></h3> <table><tr><td width="50%" style="vertical-align: top;"> <h3 class="relative group"><a id="-concepts" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#-concepts"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>📚 Concepts</span></h3> <p data-svelte-h="svelte-1vr33uo"><strong>RL Fundamentals</strong></p> <ul data-svelte-h="svelte-m84lyj"><li>The observe-act-reward loop</li> <li>What makes good policies</li> <li>Exploration vs exploitation</li></ul> <p data-svelte-h="svelte-1x4v1d4"><strong>OpenEnv Architecture</strong></p> <ul data-svelte-h="svelte-v91uca"><li>Client-server separation</li> <li>Type-safe contracts</li> <li>HTTP communication layer</li></ul> <p data-svelte-h="svelte-91807s"><strong>Production Patterns</strong></p> <ul data-svelte-h="svelte-yf1s25"><li>Docker isolation</li> <li>API design</li> <li>Reproducible deployments</li></ul></td> <td width="50%" style="vertical-align: top;"> <h3 class="relative group"><a id="-skills" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#-skills"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>🛠️ Skills</span></h3> <p data-svelte-h="svelte-l2hxiw"><strong>Using Environments</strong></p> <ul data-svelte-h="svelte-1sedtxv"><li>Import OpenEnv clients</li> <li>Call reset/step/state</li> <li>Work with typed observations</li></ul> <p data-svelte-h="svelte-ktnw4c"><strong>Building Environments</strong></p> <ul data-svelte-h="svelte-b67dck"><li>Define type-safe models</li> <li>Implement Environment class</li> <li>Create HTTPEnvClient</li></ul> <p data-svelte-h="svelte-s2ejng"><strong>Testing &amp; Debugging</strong></p> <ul data-svelte-h="svelte-1198i2y"><li>Compare policies</li> <li>Visualize episodes</li> <li>Measure performance</li></ul></td></tr></table> <h3 class="relative group"><a id="openenv-vs-traditional-rl" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#openenv-vs-traditional-rl"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>OpenEnv vs Traditional RL</span></h3> <table data-svelte-h="svelte-1qfha29"><thead><tr><th>Feature</th> <th>Traditional (Gym)</th> <th>OpenEnv</th> <th>Winner</th></tr></thead> <tbody><tr><td><strong>Type Safety</strong></td> <td>❌ Arrays, dicts</td> <td>✅ Dataclasses</td> <td>🏆 OpenEnv</td></tr> <tr><td><strong>Isolation</strong></td> <td>❌ Same process</td> <td>✅ Docker</td> <td>🏆 OpenEnv</td></tr> <tr><td><strong>Deployment</strong></td> <td>❌ Manual setup</td> <td>✅ K8s-ready</td> <td>🏆 OpenEnv</td></tr> <tr><td><strong>Language</strong></td> <td>❌ Python only</td> <td>✅ Any (HTTP)</td> <td>🏆 OpenEnv</td></tr> <tr><td><strong>Reproducibility</strong></td> <td>❌ “Works on my machine”</td> <td>✅ Same everywhere</td> <td>🏆 OpenEnv</td></tr> <tr><td><strong>Community</strong></td> <td>✅ Large ecosystem</td> <td>🟡 Growing</td> <td>🤝 Both!</td></tr></tbody></table> <p data-svelte-h="svelte-rdtjgl">!!! success “The Bottom Line”
OpenEnv brings <strong>production engineering</strong> to RL:</p> <ul data-svelte-h="svelte-j28w3j"><li><p>Same environments work locally and in production</p></li> <li><p>Type safety catches bugs early</p></li> <li><p>Docker isolation prevents conflicts</p></li> <li><p>HTTP API works with any language</p> <p><strong>It’s RL for 2024 and beyond.</strong></p></li></ul> <hr> <p data-svelte-h="svelte-xqk3m5">(resources)=</p> <h2 class="relative group"><a id="-resources" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#-resources"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>📚 Resources</span></h2> <h3 class="relative group"><a id="-essential-links" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#-essential-links"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>🔗 Essential Links</span></h3> <ul data-svelte-h="svelte-152slk3"><li><strong>🏠 OpenEnv GitHub</strong>: <a href="https://github.com/huggingface/OpenEnv" rel="nofollow">https://github.com/huggingface/OpenEnv</a></li> <li><strong>🎮 OpenSpiel</strong>: <a href="https://github.com/google-deepmind/open_spiel" rel="nofollow">https://github.com/google-deepmind/open_spiel</a></li> <li><strong>⚡ FastAPI Docs</strong>: <a href="https://fastapi.tiangolo.com/" rel="nofollow">https://fastapi.tiangolo.com/</a></li> <li><strong>🐳 Docker Guide</strong>: <a href="https://docs.docker.com/get-started/" rel="nofollow">https://docs.docker.com/get-started/</a></li> <li><strong>🔥 PyTorch</strong>: <a href="https://pytorch.org/" rel="nofollow">https://pytorch.org/</a></li></ul> <h3 class="relative group"><a id="-documentation-deep-dives" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#-documentation-deep-dives"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>📖 Documentation Deep Dives</span></h3> <ul data-svelte-h="svelte-120rcor"><li><strong>Environment Creation Guide</strong>: <code>src/envs/README.md</code></li> <li><strong>OpenSpiel Integration</strong>: <code>src/envs/openspiel_env/README.md</code></li> <li><strong>Example Scripts</strong>: <code>examples/</code></li> <li><strong>RFC 001</strong>: <a href="https://github.com/huggingface/OpenEnv/pull/26" rel="nofollow">Baseline API Specs</a></li></ul> <h3 class="relative group"><a id="-community--support" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#-community--support"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>🎓 Community &amp; Support</span></h3> <p data-svelte-h="svelte-s800wd"><strong>Openly governed by a technical committee including:</strong></p> <ul data-svelte-h="svelte-t3e2bl"><li>🤗 Hugging Face</li> <li>⚡ Unsloth</li> <li>🌟 Reflection</li> <li>🔥 Meta PyTorch</li></ul> <p data-svelte-h="svelte-18dmhca"><strong>Supported by amazing organizations and contributors.</strong></p> <ul data-svelte-h="svelte-xg09wq"><li>🚀 And many more!</li></ul> <p data-svelte-h="svelte-15hfh7e">Technical direction, RFCs, and release planning are coordinated in public through the OpenEnv repository.</p> <p data-svelte-h="svelte-hxsby3"><strong>License</strong>: BSD 3-Clause (very permissive!)</p> <p data-svelte-h="svelte-1d4miao"><strong>Contributions</strong>: Always welcome! Check out the issues tab.</p> <hr> <h3 class="relative group"><a id="-whats-next" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#-whats-next"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>🌈 What’s Next?</span></h3> <ol data-svelte-h="svelte-2e177m"><li><strong>Star the repo</strong> to show support and stay updated</li> <li>🔄 <strong>Try modifying</strong> the Catch game (make it harder? bigger grid?)</li> <li>🎮 <strong>Explore</strong> other OpenSpiel games</li> <li>🛠️ <strong>Build</strong> your own environment integration</li> <li>💬 <strong>Share</strong> what you build with the community!</li></ol> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/openenv/blob/main/docs/source/tutorials/openenv-tutorial.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_1qwoa43 = {
assets: "/docs/openenv/pr_749/en",
base: "/docs/openenv/pr_749/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/openenv/pr_749/en/_app/immutable/entry/start.85477f45.js"),
import("/docs/openenv/pr_749/en/_app/immutable/entry/app.51835dc5.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 62],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
179 kB
·
Xet hash:
79914301419fabb84f7eefab4ddf08649d05dcb798a1221489b1f0993cb206e2

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.