{"data": [{"hoverinfo": "skip", "mode": "markers", "x": [null], "y": [null], "type": "scatter"}], "layout": {"template": {"data": {"histogram2dcontour": [{"type": "histogram2dcontour", "colorbar": {"outlinewidth": 0, "ticks": ""}, "colorscale": [[0.0, "#0d0887"], [0.1111111111111111, "#46039f"], [0.2222222222222222, "#7201a8"], [0.3333333333333333, "#9c179e"], [0.4444444444444444, "#bd3786"], [0.5555555555555556, "#d8576b"], [0.6666666666666666, "#ed7953"], [0.7777777777777778, "#fb9f3a"], [0.8888888888888888, "#fdca26"], [1.0, "#f0f921"]]}], "choropleth": [{"type": "choropleth", "colorbar": {"outlinewidth": 0, "ticks": ""}}], "histogram2d": [{"type": "histogram2d", "colorbar": {"outlinewidth": 0, "ticks": ""}, "colorscale": [[0.0, "#0d0887"], [0.1111111111111111, "#46039f"], [0.2222222222222222, "#7201a8"], [0.3333333333333333, "#9c179e"], [0.4444444444444444, "#bd3786"], [0.5555555555555556, "#d8576b"], [0.6666666666666666, "#ed7953"], [0.7777777777777778, "#fb9f3a"], [0.8888888888888888, "#fdca26"], [1.0, "#f0f921"]]}], "heatmap": [{"type": "heatmap", "colorbar": {"outlinewidth": 0, "ticks": ""}, "colorscale": [[0.0, "#0d0887"], [0.1111111111111111, "#46039f"], [0.2222222222222222, "#7201a8"], [0.3333333333333333, "#9c179e"], [0.4444444444444444, "#bd3786"], [0.5555555555555556, "#d8576b"], [0.6666666666666666, "#ed7953"], [0.7777777777777778, "#fb9f3a"], [0.8888888888888888, "#fdca26"], [1.0, "#f0f921"]]}], "contourcarpet": [{"type": "contourcarpet", "colorbar": {"outlinewidth": 0, "ticks": ""}}], "contour": [{"type": "contour", "colorbar": {"outlinewidth": 0, "ticks": ""}, "colorscale": [[0.0, "#0d0887"], [0.1111111111111111, "#46039f"], [0.2222222222222222, "#7201a8"], [0.3333333333333333, "#9c179e"], [0.4444444444444444, "#bd3786"], [0.5555555555555556, "#d8576b"], [0.6666666666666666, "#ed7953"], [0.7777777777777778, "#fb9f3a"], [0.8888888888888888, "#fdca26"], [1.0, "#f0f921"]]}], "surface": [{"type": "surface", "colorbar": {"outlinewidth": 0, "ticks": ""}, "colorscale": [[0.0, "#0d0887"], [0.1111111111111111, "#46039f"], [0.2222222222222222, "#7201a8"], [0.3333333333333333, "#9c179e"], [0.4444444444444444, "#bd3786"], [0.5555555555555556, "#d8576b"], [0.6666666666666666, "#ed7953"], [0.7777777777777778, "#fb9f3a"], [0.8888888888888888, "#fdca26"], [1.0, "#f0f921"]]}], "mesh3d": [{"type": "mesh3d", "colorbar": {"outlinewidth": 0, "ticks": ""}}], "scatter": [{"fillpattern": {"fillmode": "overlay", "size": 10, "solidity": 0.2}, "type": "scatter"}], "parcoords": [{"type": "parcoords", "line": {"colorbar": {"outlinewidth": 0, "ticks": ""}}}], "scatterpolargl": [{"type": "scatterpolargl", "marker": {"colorbar": {"outlinewidth": 0, "ticks": ""}}}], "bar": [{"error_x": {"color": "#2a3f5f"}, "error_y": {"color": "#2a3f5f"}, "marker": {"line": {"color": "#E5ECF6", "width": 0.5}, "pattern": {"fillmode": "overlay", "size": 10, "solidity": 0.2}}, "type": "bar"}], "scattergeo": [{"type": "scattergeo", "marker": {"colorbar": {"outlinewidth": 0, "ticks": ""}}}], "scatterpolar": [{"type": "scatterpolar", "marker": {"colorbar": {"outlinewidth": 0, "ticks": ""}}}], "histogram": [{"marker": {"pattern": {"fillmode": "overlay", "size": 10, "solidity": 0.2}}, "type": "histogram"}], "scattergl": [{"type": "scattergl", "marker": {"colorbar": {"outlinewidth": 0, "ticks": ""}}}], "scatter3d": [{"type": "scatter3d", "line": {"colorbar": {"outlinewidth": 0, "ticks": ""}}, "marker": {"colorbar": {"outlinewidth": 0, "ticks": ""}}}], "scattermap": [{"type": "scattermap", "marker": {"colorbar": {"outlinewidth": 0, "ticks": ""}}}], "scattermapbox": [{"type": "scattermapbox", "marker": {"colorbar": {"outlinewidth": 0, "ticks": ""}}}], "scatterternary": [{"type": "scatterternary", "marker": {"colorbar": {"outlinewidth": 0, "ticks": ""}}}], "scattercarpet": [{"type": "scattercarpet", "marker": {"colorbar": {"outlinewidth": 0, "ticks": ""}}}], "carpet": [{"aaxis": {"endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f"}, "baxis": {"endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f"}, "type": "carpet"}], "table": [{"cells": {"fill": {"color": "#EBF0F8"}, "line": {"color": "white"}}, "header": {"fill": {"color": "#C8D4E3"}, "line": {"color": "white"}}, "type": "table"}], "barpolar": [{"marker": {"line": {"color": "#E5ECF6", "width": 0.5}, "pattern": {"fillmode": "overlay", "size": 10, "solidity": 0.2}}, "type": "barpolar"}], "pie": [{"automargin": true, "type": "pie"}]}, "layout": {"autotypenumbers": "strict", "colorway": ["#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52"], "font": {"color": "#2a3f5f"}, "hovermode": "closest", "hoverlabel": {"align": "left"}, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": {"bgcolor": "#E5ECF6", "angularaxis": {"gridcolor": "white", "linecolor": "white", "ticks": ""}, "radialaxis": {"gridcolor": "white", "linecolor": "white", "ticks": ""}}, "ternary": {"bgcolor": "#E5ECF6", "aaxis": {"gridcolor": "white", "linecolor": "white", "ticks": ""}, "baxis": {"gridcolor": "white", "linecolor": "white", "ticks": ""}, "caxis": {"gridcolor": "white", "linecolor": "white", "ticks": ""}}, "coloraxis": {"colorbar": {"outlinewidth": 0, "ticks": ""}}, "colorscale": {"sequential": [[0.0, "#0d0887"], [0.1111111111111111, "#46039f"], [0.2222222222222222, "#7201a8"], [0.3333333333333333, "#9c179e"], [0.4444444444444444, "#bd3786"], [0.5555555555555556, "#d8576b"], [0.6666666666666666, "#ed7953"], [0.7777777777777778, "#fb9f3a"], [0.8888888888888888, "#fdca26"], [1.0, "#f0f921"]], "sequentialminus": [[0.0, "#0d0887"], [0.1111111111111111, "#46039f"], [0.2222222222222222, "#7201a8"], [0.3333333333333333, "#9c179e"], [0.4444444444444444, "#bd3786"], [0.5555555555555556, "#d8576b"], [0.6666666666666666, "#ed7953"], [0.7777777777777778, "#fb9f3a"], [0.8888888888888888, "#fdca26"], [1.0, "#f0f921"]], "diverging": [[0, "#8e0152"], [0.1, "#c51b7d"], [0.2, "#de77ae"], [0.3, "#f1b6da"], [0.4, "#fde0ef"], [0.5, "#f7f7f7"], [0.6, "#e6f5d0"], [0.7, "#b8e186"], [0.8, "#7fbc41"], [0.9, "#4d9221"], [1, "#276419"]]}, "xaxis": {"gridcolor": "white", "linecolor": "white", "ticks": "", "title": {"standoff": 15}, "zerolinecolor": "white", "automargin": true, "zerolinewidth": 2}, "yaxis": {"gridcolor": "white", "linecolor": "white", "ticks": "", "title": {"standoff": 15}, "zerolinecolor": "white", "automargin": true, "zerolinewidth": 2}, "scene": {"xaxis": {"backgroundcolor": "#E5ECF6", "gridcolor": "white", "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white", "gridwidth": 2}, "yaxis": {"backgroundcolor": "#E5ECF6", "gridcolor": "white", "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white", "gridwidth": 2}, "zaxis": {"backgroundcolor": "#E5ECF6", "gridcolor": "white", "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white", "gridwidth": 2}}, "shapedefaults": {"line": {"color": "#2a3f5f"}}, "annotationdefaults": {"arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1}, "geo": {"bgcolor": "white", "landcolor": "#E5ECF6", "subunitcolor": "white", "showland": true, "showlakes": true, "lakecolor": "white"}, "title": {"x": 0.05}, "mapbox": {"style": "light"}}}, "title": {"font": {"size": 16, "color": "#212121"}, "text": "HASAC: Maximum Entropy RL with Auto Temperature", "x": 0.5}, "xaxis": {"range": [-0.5, 10.5], "visible": false, "fixedrange": true}, "yaxis": {"range": [-0.5, 8], "visible": false, "fixedrange": true, "scaleanchor": "x"}, "margin": {"l": 20, "r": 20, "t": 50, "b": 20}, "width": 900, "height": 560, "plot_bgcolor": "#FFFFFF", "paper_bgcolor": "#FFFFFF", "showlegend": false, "shapes": [{"fillcolor": "#26A69A", "layer": "below", "line": {"color": "#78909C", "width": 1.5}, "opacity": 1.0, "type": "rect", "x0": 0, "x1": 2, "y0": 6.5, "y1": 7.4}, {"fillcolor": "#FFEE58", "layer": "below", "line": {"color": "#78909C", "width": 1.5}, "opacity": 1.0, "type": "rect", "x0": 0, "x1": 2.5, "y0": 4.5, "y1": 5.7}, {"fillcolor": "#42A5F5", "layer": "below", "line": {"color": "#78909C", "width": 1.5}, "opacity": 1.0, "type": "rect", "x0": 3.5, "x1": 6.3, "y0": 6, "y1": 7.5}, {"fillcolor": "#EF5350", "layer": "below", "line": {"color": "#78909C", "width": 1.5}, "opacity": 1.0, "type": "rect", "x0": 7, "x1": 9.5, "y0": 6, "y1": 7.5}, {"fillcolor": "#AB47BC", "layer": "below", "line": {"color": "#78909C", "width": 1.5}, "opacity": 1.0, "type": "rect", "x0": 7, "x1": 9.5, "y0": 4, "y1": 5.2}, {"fillcolor": "#BDBDBD", "layer": "below", "line": {"color": "#78909C", "width": 1.5}, "opacity": 1.0, "type": "rect", "x0": 7, "x1": 9.5, "y0": 2.2, "y1": 3.4000000000000004}, {"line": {"color": "#BDBDBD", "dash": "dot", "width": 1.5}, "type": "line", "x0": 8.25, "x1": 8.25, "y0": 6.0, "y1": 3.4}, {"fillcolor": "#FFA726", "layer": "below", "line": {"color": "#78909C", "width": 1.5}, "opacity": 1.0, "type": "rect", "x0": 3, "x1": 6, "y0": 3.5, "y1": 4.7}, {"fillcolor": "#FFA726", "layer": "below", "line": {"color": "#78909C", "width": 1.5}, "opacity": 1.0, "type": "rect", "x0": 3, "x1": 6, "y0": 1.5, "y1": 2.7}], "annotations": [{"align": "center", "font": {"color": "#212121", "size": 12}, "showarrow": false, "text": "Environment", "x": 1.0, "y": 6.95}, {"align": "center", "font": {"color": "#212121", "size": 12}, "showarrow": false, "text": "Replay Buffer
Off-policy transitions
(s, a, r, s', done)
", "x": 1.25, "y": 5.1}, {"arrowcolor": "#546E7A", "arrowhead": 3, "arrowsize": 1.2, "arrowwidth": 1.8, "ax": 1, "axref": "x", "ay": 6.5, "ayref": "y", "showarrow": true, "standoff": 4, "startstandoff": 4, "x": 1, "xref": "x", "y": 5.7, "yref": "y"}, {"align": "center", "font": {"color": "#212121", "size": 12}, "showarrow": false, "text": "Actor \u03c0
SquashedGaussianPolicy
\u03bc, \u03c3 = f(o)
a = tanh(\u03bc + \u03c3\u00b7\u03b5)
\u03b5 ~ N(0,1)
", "x": 4.9, "y": 6.75}, {"arrowcolor": "#546E7A", "arrowhead": 3, "arrowsize": 1.2, "arrowwidth": 1.8, "ax": 2, "axref": "x", "ay": 6.95, "ayref": "y", "showarrow": true, "standoff": 4, "startstandoff": 4, "x": 3.5, "xref": "x", "y": 6.75, "yref": "y"}, {"bgcolor": "rgba(255,255,255,0.85)", "borderpad": 2, "font": {"color": "#666", "size": 9}, "showarrow": false, "text": "obs", "x": 2.75, "y": 6.85}, {"align": "center", "font": {"color": "#212121", "size": 12}, "showarrow": false, "text": "Twin Q Critics
SoftTwinContinuousQ
Q\u2081(s,a), Q\u2082(s,a)
use min(Q\u2081, Q\u2082)
", "x": 8.25, "y": 6.75}, {"align": "center", "font": {"color": "#212121", "size": 12}, "showarrow": false, "text": "Auto \u03b1 (Temperature)
\u03b1 = exp(log_alpha)
Target entropy = \u2212dim(A)
L_\u03b1 = \u2212\u03b1\u00b7(log \u03c0 + H\u0304)
", "x": 8.25, "y": 4.6}, {"align": "center", "font": {"color": "#212121", "size": 12}, "showarrow": false, "text": "Target Twin Q'
Soft update \u03c4", "x": 8.25, "y": 2.8000000000000003}, {"bgcolor": "rgba(255,255,255,0.85)", "borderpad": 2, "font": {"color": "#999", "size": 9}, "showarrow": false, "text": "soft update \u03c4", "x": 8.25, "y": 4.7}, {"align": "center", "font": {"color": "#212121", "size": 12}, "showarrow": false, "text": "Actor Loss
L_\u03c0 = \u03b1\u00b7log \u03c0(a|s) \u2212 min(Q\u2081,Q\u2082)
Maximize entropy + Q-value
", "x": 4.5, "y": 4.1}, {"arrowcolor": "#546E7A", "arrowhead": 3, "arrowsize": 1.2, "arrowwidth": 1.8, "ax": 4.75, "axref": "x", "ay": 6.0, "ayref": "y", "showarrow": true, "standoff": 4, "startstandoff": 4, "x": 4.5, "xref": "x", "y": 4.7, "yref": "y"}, {"bgcolor": "rgba(255,255,255,0.85)", "borderpad": 2, "font": {"color": "#666", "size": 9}, "showarrow": false, "text": "a, log \u03c0", "x": 4.625, "y": 5.35}, {"arrowcolor": "#546E7A", "arrowhead": 3, "arrowsize": 1.2, "arrowwidth": 1.8, "ax": 7, "axref": "x", "ay": 6.75, "ayref": "y", "showarrow": true, "standoff": 4, "startstandoff": 4, "x": 6.3, "xref": "x", "y": 6.75, "yref": "y"}, {"align": "center", "font": {"color": "#212121", "size": 12}, "showarrow": false, "text": "Critic Loss
L_Q = (Q\u1d62 \u2212 y)\u00b2
y = r + \u03b3\u00b7(min Q' \u2212 \u03b1\u00b7log \u03c0')
", "x": 4.5, "y": 2.1}, {"arrowcolor": "#546E7A", "arrowhead": 3, "arrowsize": 1.2, "arrowwidth": 1.8, "ax": 4.5, "axref": "x", "ay": 3.5, "ayref": "y", "showarrow": true, "standoff": 4, "startstandoff": 4, "x": 4.5, "xref": "x", "y": 2.7, "yref": "y"}, {"bgcolor": "rgba(255,255,255,0.85)", "borderpad": 2, "font": {"color": "#666", "size": 9}, "showarrow": false, "text": "Q values", "x": 4.5, "y": 3.1}, {"align": "left", "bgcolor": "rgba(255,255,255,0.9)", "bordercolor": "#ddd", "borderpad": 6, "borderwidth": 1, "font": {"size": 10}, "showarrow": false, "text": "Key: entropy bonus
encourages exploration,
\u03b1 auto-tuned to target H\u0304", "x": 1, "y": 1.5}]}}