| using System; | |
| using System.Collections.Generic; | |
| using System.Collections.ObjectModel; | |
| using UnityEngine; | |
| using Unity.Barracuda; | |
| using Unity.MLAgents.Actuators; | |
| using Unity.MLAgents.Sensors; | |
| using Unity.MLAgents.Sensors.Reflection; | |
| using Unity.MLAgents.Demonstrations; | |
| using Unity.MLAgents.Policies; | |
| using UnityEngine.Serialization; | |
| namespace Unity.MLAgents | |
| { | |
| /// <summary> | |
| /// Struct that contains all the information for an Agent, including its | |
| /// observations, actions and current status. | |
| /// </summary> | |
| public struct AgentInfo | |
| { | |
| /// <summary> | |
| /// Keeps track of the last actions taken by the Brain. | |
| /// </summary> | |
| public ActionBuffers storedActions; | |
| /// <summary> | |
| /// For discrete control, specifies the actions that the agent cannot take. | |
| /// An element of the mask array is <c>true</c> if the action is prohibited. | |
| /// </summary> | |
| public bool[] discreteActionMasks; | |
| /// <summary> | |
| /// The current agent reward. | |
| /// </summary> | |
| public float reward; | |
| /// <summary> | |
| /// The current group reward received by the agent. | |
| /// </summary> | |
| public float groupReward; | |
| /// <summary> | |
| /// Whether the agent is done or not. | |
| /// </summary> | |
| public bool done; | |
| /// <summary> | |
| /// Whether the agent has reached its max step count for this episode. | |
| /// </summary> | |
| public bool maxStepReached; | |
| /// <summary> | |
| /// Episode identifier each agent receives at every reset. It is used | |
| /// to separate between different agents in the environment. | |
| /// </summary> | |
| public int episodeId; | |
| /// <summary> | |
| /// MultiAgentGroup identifier. | |
| /// </summary> | |
| public int groupId; | |
| public void ClearActions() | |
| { | |
| storedActions.Clear(); | |
| } | |
| public void CopyActions(ActionBuffers actionBuffers) | |
| { | |
| var continuousActions = storedActions.ContinuousActions; | |
| for (var i = 0; i < actionBuffers.ContinuousActions.Length; i++) | |
| { | |
| continuousActions[i] = actionBuffers.ContinuousActions[i]; | |
| } | |
| var discreteActions = storedActions.DiscreteActions; | |
| for (var i = 0; i < actionBuffers.DiscreteActions.Length; i++) | |
| { | |
| discreteActions[i] = actionBuffers.DiscreteActions[i]; | |
| } | |
| } | |
| } | |
| /// <summary> | |
| /// Simple wrapper around VectorActuator that overrides GetBuiltInActuatorType | |
| /// so that it can be distinguished from a standard VectorActuator. | |
| /// </summary> | |
| internal class AgentVectorActuator : VectorActuator | |
| { | |
| public AgentVectorActuator(IActionReceiver actionReceiver, | |
| IHeuristicProvider heuristicProvider, | |
| ActionSpec actionSpec, | |
| string name = "VectorActuator" | |
| ) : base(actionReceiver, heuristicProvider, actionSpec, name) | |
| { } | |
| public override BuiltInActuatorType GetBuiltInActuatorType() | |
| { | |
| return BuiltInActuatorType.AgentVectorActuator; | |
| } | |
| } | |
| /// <summary> | |
| /// An agent is an actor that can observe its environment, decide on the | |
| /// best course of action using those observations, and execute those actions | |
| /// within the environment. | |
| /// </summary> | |
| /// <remarks> | |
| /// Use the Agent class as the subclass for implementing your own agents. Add | |
| /// your Agent implementation to a [GameObject] in the [Unity scene] that serves | |
| /// as the agent's environment. | |
| /// | |
| /// Agents in an environment operate in *steps*. At each step, an agent collects observations, | |
| /// passes them to its decision-making policy, and receives an action vector in response. | |
| /// | |
| /// Agents make observations using <see cref="ISensor"/> implementations. The ML-Agents | |
| /// API provides implementations for visual observations (<see cref="CameraSensor"/>) | |
| /// raycast observations (<see cref="RayPerceptionSensor"/>), and arbitrary | |
| /// data observations (<see cref="VectorSensor"/>). You can add the | |
| /// <see cref="CameraSensorComponent"/> and <see cref="RayPerceptionSensorComponent2D"/> or | |
| /// <see cref="RayPerceptionSensorComponent3D"/> components to an agent's [GameObject] to use | |
| /// those sensor types. You can implement the <see cref="CollectObservations(VectorSensor)"/> | |
| /// function in your Agent subclass to use a vector observation. The Agent class calls this | |
| /// function before it uses the observation vector to make a decision. (If you only use | |
| /// visual or raycast observations, you do not need to implement | |
| /// <see cref="CollectObservations"/>.) | |
| /// | |
| /// Assign a decision making policy to an agent using a <see cref="BehaviorParameters"/> | |
| /// component attached to the agent's [GameObject]. The <see cref="BehaviorType"/> setting | |
| /// determines how decisions are made: | |
| /// | |
| /// * <see cref="BehaviorType.Default"/>: decisions are made by the external process, | |
| /// when connected. Otherwise, decisions are made using inference. If no inference model | |
| /// is specified in the BehaviorParameters component, then heuristic decision | |
| /// making is used. | |
| /// * <see cref="BehaviorType.InferenceOnly"/>: decisions are always made using the trained | |
| /// model specified in the <see cref="BehaviorParameters"/> component. | |
| /// * <see cref="BehaviorType.HeuristicOnly"/>: when a decision is needed, the agent's | |
| /// <see cref="Heuristic(in ActionBuffers)"/> function is called. Your implementation is responsible for | |
| /// providing the appropriate action. | |
| /// | |
| /// To trigger an agent decision automatically, you can attach a <see cref="DecisionRequester"/> | |
| /// component to the Agent game object. You can also call the agent's <see cref="RequestDecision"/> | |
| /// function manually. You only need to call <see cref="RequestDecision"/> when the agent is | |
| /// in a position to act upon the decision. In many cases, this will be every [FixedUpdate] | |
| /// callback, but could be less frequent. For example, an agent that hops around its environment | |
| /// can only take an action when it touches the ground, so several frames might elapse between | |
| /// one decision and the need for the next. | |
| /// | |
| /// Use the <see cref="OnActionReceived(ActionBuffers)"/> function to implement the actions your agent can take, | |
| /// such as moving to reach a goal or interacting with its environment. | |
| /// | |
| /// When you call <see cref="EndEpisode"/> on an agent or the agent reaches its <see cref="MaxStep"/> count, | |
| /// its current episode ends. You can reset the agent -- or remove it from the | |
| /// environment -- by implementing the <see cref="OnEpisodeBegin"/> function. An agent also | |
| /// becomes done when the <see cref="Academy"/> resets the environment, which only happens when | |
| /// the <see cref="Academy"/> receives a reset signal from an external process via the | |
| /// <see cref="Academy.Communicator"/>. | |
| /// | |
| /// The Agent class extends the Unity [MonoBehaviour] class. You can implement the | |
| /// standard [MonoBehaviour] functions as needed for your agent. Since an agent's | |
| /// observations and actions typically take place during the [FixedUpdate] phase, you should | |
| /// only use the [MonoBehaviour.Update] function for cosmetic purposes. If you override the [MonoBehaviour] | |
| /// methods, [OnEnable()] or [OnDisable()], always call the base Agent class implementations. | |
| /// | |
| /// You can implement the <see cref="Heuristic(in ActionBuffers)"/> function to specify agent actions using | |
| /// your own heuristic algorithm. Implementing a heuristic function can be useful | |
| /// for debugging. For example, you can use keyboard input to select agent actions in | |
| /// order to manually control an agent's behavior. | |
| /// | |
| /// Note that you can change the inference model assigned to an agent at any step | |
| /// by calling <see cref="SetModel"/>. | |
| /// | |
| /// See [Agents] and [Reinforcement Learning in Unity] in the [Unity ML-Agents Toolkit manual] for | |
| /// more information on creating and training agents. | |
| /// | |
| /// For sample implementations of agent behavior, see the examples available in the | |
| /// [Unity ML-Agents Toolkit] on Github. | |
| /// | |
| /// [MonoBehaviour]: https://docs.unity3d.com/ScriptReference/MonoBehaviour.html | |
| /// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html | |
| /// [Unity scene]: https://docs.unity3d.com/Manual/CreatingScenes.html | |
| /// [FixedUpdate]: https://docs.unity3d.com/ScriptReference/MonoBehaviour.FixedUpdate.html | |
| /// [MonoBehaviour.Update]: https://docs.unity3d.com/ScriptReference/MonoBehaviour.Update.html | |
| /// [OnEnable()]: https://docs.unity3d.com/ScriptReference/MonoBehaviour.OnEnable.html | |
| /// [OnDisable()]: https://docs.unity3d.com/ScriptReference/MonoBehaviour.OnDisable.html] | |
| /// [OnBeforeSerialize()]: https://docs.unity3d.com/ScriptReference/MonoBehaviour.OnBeforeSerialize.html | |
| /// [OnAfterSerialize()]: https://docs.unity3d.com/ScriptReference/MonoBehaviour.OnAfterSerialize.html | |
| /// [Agents]: https://github.com/Unity-Technologies/ml-agents/blob/release_20_docs/docs/Learning-Environment-Design-Agents.md | |
| /// [Reinforcement Learning in Unity]: https://github.com/Unity-Technologies/ml-agents/blob/release_20_docs/docs/Learning-Environment-Design.md | |
| /// [Unity ML-Agents Toolkit]: https://github.com/Unity-Technologies/ml-agents | |
| /// [Unity ML-Agents Toolkit manual]: https://github.com/Unity-Technologies/ml-agents/blob/release_20_docs/docs/Readme.md | |
| /// | |
| /// </remarks> | |
| [ | |
| ] | |
| [] | |
| [] | |
| [] | |
| public partial class Agent : MonoBehaviour, ISerializationCallbackReceiver, IActionReceiver, IHeuristicProvider | |
| { | |
| IPolicy m_Brain; | |
| BehaviorParameters m_PolicyFactory; | |
| /// This code is here to make the upgrade path for users using MaxStep | |
| /// easier. We will hook into the Serialization code and make sure that | |
| /// agentParameters.maxStep and this.maxStep are in sync. | |
| [] | |
| internal struct AgentParameters | |
| { | |
| public int maxStep; | |
| } | |
| [] | |
| [] | |
| internal AgentParameters agentParameters; | |
| [] | |
| [] | |
| internal bool hasUpgradedFromAgentParameters; | |
| /// <summary> | |
| /// The maximum number of steps the agent takes before being done. | |
| /// </summary> | |
| /// <value>The maximum steps for an agent to take before it resets; or 0 for | |
| /// unlimited steps.</value> | |
| /// <remarks> | |
| /// The max step value determines the maximum length of an agent's episodes. | |
| /// Set to a positive integer to limit the episode length to that many steps. | |
| /// Set to 0 for unlimited episode length. | |
| /// | |
| /// When an episode ends and a new one begins, the Agent object's | |
| /// <seealso cref="OnEpisodeBegin"/> function is called. You can implement | |
| /// <see cref="OnEpisodeBegin"/> to reset the agent or remove it from the | |
| /// environment. An agent's episode can also end if you call its <seealso cref="EndEpisode"/> | |
| /// method or an external process resets the environment through the <see cref="Academy"/>. | |
| /// | |
| /// Consider limiting the number of steps in an episode to avoid wasting time during | |
| /// training. If you set the max step value to a reasonable estimate of the time it should | |
| /// take to complete a task, then agents that haven’t succeeded in that time frame will | |
| /// reset and start a new training episode rather than continue to fail. | |
| /// </remarks> | |
| /// <example> | |
| /// To use a step limit when training while allowing agents to run without resetting | |
| /// outside of training, you can set the max step to 0 in <see cref="Initialize"/> | |
| /// if the <see cref="Academy"/> is not connected to an external process. | |
| /// <code> | |
| /// using Unity.MLAgents; | |
| /// | |
| /// public class MyAgent : Agent | |
| /// { | |
| /// public override void Initialize() | |
| /// { | |
| /// if (!Academy.Instance.IsCommunicatorOn) | |
| /// { | |
| /// this.MaxStep = 0; | |
| /// } | |
| /// } | |
| /// } | |
| /// </code> | |
| /// **Note:** in general, you should limit the differences between the code you execute | |
| /// during training and the code you run during inference. | |
| /// </example> | |
| [] | |
| [] public int MaxStep; | |
| /// Current Agent information (message sent to Brain). | |
| AgentInfo m_Info; | |
| /// Represents the reward the agent accumulated during the current step. | |
| /// It is reset to 0 at the beginning of every step. | |
| /// Should be set to a positive value when the agent performs a "good" | |
| /// action that we wish to reinforce/reward, and set to a negative value | |
| /// when the agent performs a "bad" action that we wish to punish/deter. | |
| /// Additionally, the magnitude of the reward should not exceed 1.0 | |
| float m_Reward; | |
| /// Represents the group reward the agent accumulated during the current step. | |
| float m_GroupReward; | |
| /// Keeps track of the cumulative reward in this episode. | |
| float m_CumulativeReward; | |
| /// Whether or not the agent requests an action. | |
| bool m_RequestAction; | |
| /// Whether or not the agent requests a decision. | |
| bool m_RequestDecision; | |
| /// Keeps track of the number of steps taken by the agent in this episode. | |
| /// Note that this value is different for each agent, and may not overlap | |
| /// with the step counter in the Academy, since agents reset based on | |
| /// their own experience. | |
| int m_StepCount; | |
| /// Number of times the Agent has completed an episode. | |
| int m_CompletedEpisodes; | |
| /// Episode identifier each agent receives. It is used | |
| /// to separate between different agents in the environment. | |
| /// This Id will be changed every time the Agent resets. | |
| int m_EpisodeId; | |
| /// Whether or not the Agent has been initialized already | |
| bool m_Initialized; | |
| /// <summary> | |
| /// Set of DemonstrationWriters that the Agent will write its step information to. | |
| /// If you use a DemonstrationRecorder component, this will automatically register its DemonstrationWriter. | |
| /// You can also add your own DemonstrationWriter by calling | |
| /// DemonstrationRecorder.AddDemonstrationWriterToAgent() | |
| /// </summary> | |
| internal ISet<DemonstrationWriter> DemonstrationWriters = new HashSet<DemonstrationWriter>(); | |
| /// <summary> | |
| /// List of sensors used to generate observations. | |
| /// Currently generated from attached SensorComponents, and a legacy VectorSensor | |
| /// </summary> | |
| internal List<ISensor> sensors; | |
| /// <summary> | |
| /// VectorSensor which is written to by AddVectorObs | |
| /// </summary> | |
| internal VectorSensor collectObservationsSensor; | |
| /// <summary> | |
| /// StackingSensor which is written to by AddVectorObs | |
| /// </summary> | |
| internal StackingSensor stackedCollectObservationsSensor; | |
| private RecursionChecker m_CollectObservationsChecker = new RecursionChecker("CollectObservations"); | |
| private RecursionChecker m_OnEpisodeBeginChecker = new RecursionChecker("OnEpisodeBegin"); | |
| /// <summary> | |
| /// List of IActuators that this Agent will delegate actions to if any exist. | |
| /// </summary> | |
| ActuatorManager m_ActuatorManager; | |
| /// <summary> | |
| /// VectorActuator which is used by default if no other sensors exist on this Agent. This VectorSensor will | |
| /// delegate its actions to <see cref="OnActionReceived(ActionBuffers)"/> by default in order to keep backward compatibility | |
| /// with the current behavior of Agent. | |
| /// </summary> | |
| IActuator m_VectorActuator; | |
| /// Currect MultiAgentGroup ID. Default to 0 (meaning no group) | |
| int m_GroupId; | |
| /// Delegate for the agent to unregister itself from the MultiAgentGroup without cyclic reference | |
| /// between agent and the group | |
| internal event Action<Agent> OnAgentDisabled; | |
| /// <summary> | |
| /// Called when the Agent is being loaded (before OnEnable()). | |
| /// </summary> | |
| ///<remarks> | |
| /// This function registers the RpcCommunicator delegate if no delegate has been registered with CommunicatorFactory. | |
| /// Always call the base Agent class version of this function if you implement `Awake()` in your | |
| /// own Agent subclasses. | |
| /// </remarks> | |
| /// <example> | |
| /// <code> | |
| /// protected override void Awake() | |
| /// { | |
| /// base.Awake(); | |
| /// // additional Awake logic... | |
| /// } | |
| /// </code> | |
| /// </example> | |
| protected internal virtual void Awake() | |
| { | |
| if (!CommunicatorFactory.CommunicatorRegistered) | |
| { | |
| Debug.Log("Registered Communicator in Agent."); | |
| CommunicatorFactory.Register<ICommunicator>(RpcCommunicator.Create); | |
| } | |
| } | |
| /// <summary> | |
| /// Called when the attached [GameObject] becomes enabled and active. | |
| /// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html | |
| /// </summary> | |
| /// <remarks> | |
| /// This function initializes the Agent instance, if it hasn't been initialized yet. | |
| /// Always call the base Agent class version of this function if you implement `OnEnable()` | |
| /// in your own Agent subclasses. | |
| /// </remarks> | |
| /// <example> | |
| /// <code> | |
| /// protected override void OnEnable() | |
| /// { | |
| /// base.OnEnable(); | |
| /// // additional OnEnable logic... | |
| /// } | |
| /// </code> | |
| /// </example> | |
| protected virtual void OnEnable() | |
| { | |
| LazyInitialize(); | |
| } | |
| /// <summary> | |
| /// Called by Unity immediately before serializing this object. | |
| /// </summary> | |
| /// <remarks> | |
| /// The Agent class uses OnBeforeSerialize() for internal housekeeping. Call the | |
| /// base class implementation if you need your own custom serialization logic. | |
| /// | |
| /// See [OnBeforeSerialize] for more information. | |
| /// | |
| /// [OnBeforeSerialize]: https://docs.unity3d.com/ScriptReference/ISerializationCallbackReceiver.OnAfterDeserialize.html | |
| /// </remarks> | |
| /// <example> | |
| /// <code> | |
| /// public new void OnBeforeSerialize() | |
| /// { | |
| /// base.OnBeforeSerialize(); | |
| /// // additional serialization logic... | |
| /// } | |
| /// </code> | |
| /// </example> | |
| public void OnBeforeSerialize() | |
| { | |
| // Manages a serialization upgrade issue from v0.13 to v0.14 where MaxStep moved | |
| // from AgentParameters (since removed) to Agent | |
| if (MaxStep == 0 && MaxStep != agentParameters.maxStep && !hasUpgradedFromAgentParameters) | |
| { | |
| MaxStep = agentParameters.maxStep; | |
| } | |
| hasUpgradedFromAgentParameters = true; | |
| } | |
| /// <summary> | |
| /// Called by Unity immediately after deserializing this object. | |
| /// </summary> | |
| /// <remarks> | |
| /// The Agent class uses OnAfterDeserialize() for internal housekeeping. Call the | |
| /// base class implementation if you need your own custom deserialization logic. | |
| /// | |
| /// See [OnAfterDeserialize] for more information. | |
| /// | |
| /// [OnAfterDeserialize]: https://docs.unity3d.com/ScriptReference/ISerializationCallbackReceiver.OnAfterDeserialize.html | |
| /// </remarks> | |
| /// <example> | |
| /// <code> | |
| /// public new void OnAfterDeserialize() | |
| /// { | |
| /// base.OnAfterDeserialize(); | |
| /// // additional deserialization logic... | |
| /// } | |
| /// </code> | |
| /// </example> | |
| public void OnAfterDeserialize() | |
| { | |
| // Manages a serialization upgrade issue from v0.13 to v0.14 where MaxStep moved | |
| // from AgentParameters (since removed) to Agent | |
| if (MaxStep == 0 && MaxStep != agentParameters.maxStep && !hasUpgradedFromAgentParameters) | |
| { | |
| MaxStep = agentParameters.maxStep; | |
| } | |
| hasUpgradedFromAgentParameters = true; | |
| } | |
| /// <summary> | |
| /// Initializes the agent. Can be safely called multiple times. | |
| /// </summary> | |
| /// <remarks> | |
| /// This function calls your <seealso cref="Initialize"/> implementation, if one exists. | |
| /// </remarks> | |
| public void LazyInitialize() | |
| { | |
| if (m_Initialized) | |
| { | |
| return; | |
| } | |
| m_Initialized = true; | |
| // Grab the "static" properties for the Agent. | |
| m_EpisodeId = EpisodeIdCounter.GetEpisodeId(); | |
| m_PolicyFactory = GetComponent<BehaviorParameters>(); | |
| m_Info = new AgentInfo(); | |
| sensors = new List<ISensor>(); | |
| Academy.Instance.AgentIncrementStep += AgentIncrementStep; | |
| Academy.Instance.AgentSendState += SendInfo; | |
| Academy.Instance.DecideAction += DecideAction; | |
| Academy.Instance.AgentAct += AgentStep; | |
| Academy.Instance.AgentForceReset += _AgentReset; | |
| using (TimerStack.Instance.Scoped("InitializeActuators")) | |
| { | |
| InitializeActuators(); | |
| } | |
| m_Brain = m_PolicyFactory.GeneratePolicy(m_ActuatorManager.GetCombinedActionSpec(), m_ActuatorManager); | |
| ResetData(); | |
| Initialize(); | |
| using (TimerStack.Instance.Scoped("InitializeSensors")) | |
| { | |
| InitializeSensors(); | |
| } | |
| m_Info.storedActions = new ActionBuffers( | |
| new float[m_ActuatorManager.NumContinuousActions], | |
| new int[m_ActuatorManager.NumDiscreteActions] | |
| ); | |
| m_Info.groupId = m_GroupId; | |
| // The first time the Academy resets, all Agents in the scene will be | |
| // forced to reset through the <see cref="AgentForceReset"/> event. | |
| // To avoid the Agent resetting twice, the Agents will not begin their | |
| // episode when initializing until after the Academy had its first reset. | |
| if (Academy.Instance.TotalStepCount != 0) | |
| { | |
| using (m_OnEpisodeBeginChecker.Start()) | |
| { | |
| OnEpisodeBegin(); | |
| } | |
| } | |
| } | |
| /// <summary> | |
| /// The reason that the Agent has been set to "done". | |
| /// </summary> | |
| enum DoneReason | |
| { | |
| /// <summary> | |
| /// The episode was ended manually by calling <see cref="EndEpisode"/>. | |
| /// </summary> | |
| DoneCalled, | |
| /// <summary> | |
| /// The max steps for the Agent were reached. | |
| /// </summary> | |
| MaxStepReached, | |
| /// <summary> | |
| /// The Agent was disabled. | |
| /// </summary> | |
| Disabled, | |
| } | |
| /// <summary> | |
| /// Called when the attached [GameObject] becomes disabled and inactive. | |
| /// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html | |
| /// </summary> | |
| /// <remarks> | |
| /// Always call the base Agent class version of this function if you implement `OnDisable()` | |
| /// in your own Agent subclasses. | |
| /// </remarks> | |
| /// <example> | |
| /// <code> | |
| /// protected override void OnDisable() | |
| /// { | |
| /// base.OnDisable(); | |
| /// // additional OnDisable logic... | |
| /// } | |
| /// </code> | |
| /// </example> | |
| /// <seealso cref="OnEnable"/> | |
| protected virtual void OnDisable() | |
| { | |
| DemonstrationWriters.Clear(); | |
| // If Academy.Dispose has already been called, we don't need to unregister with it. | |
| // We don't want to even try, because this will lazily create a new Academy! | |
| if (Academy.IsInitialized) | |
| { | |
| Academy.Instance.AgentIncrementStep -= AgentIncrementStep; | |
| Academy.Instance.AgentSendState -= SendInfo; | |
| Academy.Instance.DecideAction -= DecideAction; | |
| Academy.Instance.AgentAct -= AgentStep; | |
| Academy.Instance.AgentForceReset -= _AgentReset; | |
| NotifyAgentDone(DoneReason.Disabled); | |
| } | |
| CleanupSensors(); | |
| m_Brain?.Dispose(); | |
| OnAgentDisabled?.Invoke(this); | |
| m_Initialized = false; | |
| } | |
| void NotifyAgentDone(DoneReason doneReason) | |
| { | |
| if (m_Info.done) | |
| { | |
| // The Agent was already marked as Done and should not be notified again | |
| return; | |
| } | |
| m_Info.episodeId = m_EpisodeId; | |
| m_Info.reward = m_Reward; | |
| m_Info.groupReward = m_GroupReward; | |
| m_Info.done = true; | |
| m_Info.maxStepReached = doneReason == DoneReason.MaxStepReached; | |
| m_Info.groupId = m_GroupId; | |
| UpdateSensors(); | |
| // Make sure the latest observations are being passed to training. | |
| using (m_CollectObservationsChecker.Start()) | |
| { | |
| CollectObservations(collectObservationsSensor); | |
| } | |
| // Request the last decision with no callbacks | |
| // We request a decision so Python knows the Agent is done immediately | |
| m_Brain?.RequestDecision(m_Info, sensors); | |
| // We also have to write any to any DemonstationStores so that they get the "done" flag. | |
| if (DemonstrationWriters.Count != 0) | |
| { | |
| foreach (var demoWriter in DemonstrationWriters) | |
| { | |
| demoWriter.Record(m_Info, sensors); | |
| } | |
| } | |
| ResetSensors(); | |
| if (doneReason != DoneReason.Disabled) | |
| { | |
| // We don't want to update the reward stats when the Agent is disabled, because this will make | |
| // the rewards look lower than they actually are during shutdown. | |
| m_CompletedEpisodes++; | |
| UpdateRewardStats(); | |
| } | |
| m_Reward = 0f; | |
| m_GroupReward = 0f; | |
| m_CumulativeReward = 0f; | |
| m_RequestAction = false; | |
| m_RequestDecision = false; | |
| m_Info.storedActions.Clear(); | |
| } | |
| /// <summary> | |
| /// Updates the Model assigned to this Agent instance. | |
| /// </summary> | |
| /// <remarks> | |
| /// If the agent already has an assigned model, that model is replaced with the | |
| /// the provided one. However, if you call this function with arguments that are | |
| /// identical to the current parameters of the agent, then no changes are made. | |
| /// | |
| /// **Note:** the <paramref name="behaviorName"/> parameter is ignored when not training. | |
| /// The <paramref name="model"/> and <paramref name="inferenceDevice"/> parameters | |
| /// are ignored when not using inference. | |
| /// </remarks> | |
| /// <param name="behaviorName"> The identifier of the behavior. This | |
| /// will categorize the agent when training. | |
| /// </param> | |
| /// <param name="model"> The model to use for inference.</param> | |
| /// <param name = "inferenceDevice"> Define the device on which the model | |
| /// will be run.</param> | |
| public void SetModel( | |
| string behaviorName, | |
| NNModel model, | |
| InferenceDevice inferenceDevice = InferenceDevice.Default) | |
| { | |
| if (behaviorName == m_PolicyFactory.BehaviorName && | |
| model == m_PolicyFactory.Model && | |
| inferenceDevice == m_PolicyFactory.InferenceDevice) | |
| { | |
| // If everything is the same, don't make any changes. | |
| return; | |
| } | |
| NotifyAgentDone(DoneReason.Disabled); | |
| m_PolicyFactory.Model = model; | |
| m_PolicyFactory.InferenceDevice = inferenceDevice; | |
| m_PolicyFactory.BehaviorName = behaviorName; | |
| ReloadPolicy(); | |
| } | |
| internal void ReloadPolicy() | |
| { | |
| if (!m_Initialized) | |
| { | |
| // If we haven't initialized yet, no need to make any changes now; they'll | |
| // happen in LazyInitialize later. | |
| return; | |
| } | |
| m_Brain?.Dispose(); | |
| m_Brain = m_PolicyFactory.GeneratePolicy(m_ActuatorManager.GetCombinedActionSpec(), m_ActuatorManager); | |
| } | |
| /// <summary> | |
| /// Returns the current step counter (within the current episode). | |
| /// </summary> | |
| /// <returns> | |
| /// Current step count. | |
| /// </returns> | |
| public int StepCount | |
| { | |
| get { return m_StepCount; } | |
| } | |
| /// <summary> | |
| /// Returns the number of episodes that the Agent has completed (either <see cref="Agent.EndEpisode()"/> | |
| /// was called, or maxSteps was reached). | |
| /// </summary> | |
| /// <returns> | |
| /// Current episode count. | |
| /// </returns> | |
| public int CompletedEpisodes | |
| { | |
| get { return m_CompletedEpisodes; } | |
| } | |
| /// <summary> | |
| /// Overrides the current step reward of the agent and updates the episode | |
| /// reward accordingly. | |
| /// </summary> | |
| /// <remarks> | |
| /// This function replaces any rewards given to the agent during the current step. | |
| /// Use <see cref="AddReward(float)"/> to incrementally change the reward rather than | |
| /// overriding it. | |
| /// | |
| /// Typically, you assign rewards in the Agent subclass's <see cref="OnActionReceived(ActionBuffers)"/> | |
| /// implementation after carrying out the received action and evaluating its success. | |
| /// | |
| /// Rewards are used during reinforcement learning; they are ignored during inference. | |
| /// | |
| /// See [Agents - Rewards] for general advice on implementing rewards and [Reward Signals] | |
| /// for information about mixing reward signals from curiosity and Generative Adversarial | |
| /// Imitation Learning (GAIL) with rewards supplied through this method. | |
| /// | |
| /// [Agents - Rewards]: https://github.com/Unity-Technologies/ml-agents/blob/release_20_docs/docs/Learning-Environment-Design-Agents.md#rewards | |
| /// [Reward Signals]: https://github.com/Unity-Technologies/ml-agents/blob/release_20_docs/docs/ML-Agents-Overview.md#a-quick-note-on-reward-signals | |
| /// </remarks> | |
| /// <param name="reward">The new value of the reward.</param> | |
| public void SetReward(float reward) | |
| { | |
| Utilities.DebugCheckNanAndInfinity(reward, nameof(reward), nameof(SetReward)); | |
| m_CumulativeReward += (reward - m_Reward); | |
| m_Reward = reward; | |
| } | |
| /// <summary> | |
| /// Increments the step and episode rewards by the provided value. | |
| /// </summary> | |
| /// <remarks>Use a positive reward to reinforce desired behavior. You can use a | |
| /// negative reward to penalize mistakes. Use <seealso cref="SetReward(float)"/> to | |
| /// set the reward assigned to the current step with a specific value rather than | |
| /// increasing or decreasing it. | |
| /// | |
| /// Typically, you assign rewards in the Agent subclass's <see cref="IActionReceiver.OnActionReceived"/> | |
| /// implementation after carrying out the received action and evaluating its success. | |
| /// | |
| /// Rewards are used during reinforcement learning; they are ignored during inference. | |
| /// | |
| /// See [Agents - Rewards] for general advice on implementing rewards and [Reward Signals] | |
| /// for information about mixing reward signals from curiosity and Generative Adversarial | |
| /// Imitation Learning (GAIL) with rewards supplied through this method. | |
| /// | |
| /// [Agents - Rewards]: https://github.com/Unity-Technologies/ml-agents/blob/release_20_docs/docs/Learning-Environment-Design-Agents.md#rewards | |
| /// [Reward Signals]: https://github.com/Unity-Technologies/ml-agents/blob/release_20_docs/docs/ML-Agents-Overview.md#a-quick-note-on-reward-signals | |
| ///</remarks> | |
| /// <param name="increment">Incremental reward value.</param> | |
| public void AddReward(float increment) | |
| { | |
| Utilities.DebugCheckNanAndInfinity(increment, nameof(increment), nameof(AddReward)); | |
| m_Reward += increment; | |
| m_CumulativeReward += increment; | |
| } | |
| internal void SetGroupReward(float reward) | |
| { | |
| Utilities.DebugCheckNanAndInfinity(reward, nameof(reward), nameof(SetGroupReward)); | |
| m_GroupReward = reward; | |
| } | |
| internal void AddGroupReward(float increment) | |
| { | |
| Utilities.DebugCheckNanAndInfinity(increment, nameof(increment), nameof(AddGroupReward)); | |
| m_GroupReward += increment; | |
| } | |
| /// <summary> | |
| /// Retrieves the episode reward for the Agent. | |
| /// </summary> | |
| /// <returns>The episode reward.</returns> | |
| public float GetCumulativeReward() | |
| { | |
| return m_CumulativeReward; | |
| } | |
| void UpdateRewardStats() | |
| { | |
| var gaugeName = $"{m_PolicyFactory.BehaviorName}.CumulativeReward"; | |
| TimerStack.Instance.SetGauge(gaugeName, GetCumulativeReward()); | |
| } | |
| /// <summary> | |
| /// Sets the done flag to true and resets the agent. | |
| /// </summary> | |
| /// <remarks> | |
| /// This should be used when the episode can no longer continue, such as when the Agent | |
| /// reaches the goal or fails at the task. | |
| /// </remarks> | |
| /// <seealso cref="OnEpisodeBegin"/> | |
| /// <seealso cref="EpisodeInterrupted"/> | |
| public void EndEpisode() | |
| { | |
| EndEpisodeAndReset(DoneReason.DoneCalled); | |
| } | |
| /// <summary> | |
| /// Indicate that the episode is over but not due to the "fault" of the Agent. | |
| /// This has the same end result as calling <see cref="EndEpisode"/>, but has a | |
| /// slightly different effect on training. | |
| /// </summary> | |
| /// <remarks> | |
| /// This should be used when the episode could continue, but has gone on for | |
| /// a sufficient number of steps. | |
| /// </remarks> | |
| /// <seealso cref="OnEpisodeBegin"/> | |
| /// <seealso cref="EndEpisode"/> | |
| public void EpisodeInterrupted() | |
| { | |
| EndEpisodeAndReset(DoneReason.MaxStepReached); | |
| } | |
| /// <summary> | |
| /// Internal method to end the episode and reset the Agent. | |
| /// </summary> | |
| /// <param name="reason"></param> | |
| void EndEpisodeAndReset(DoneReason reason) | |
| { | |
| NotifyAgentDone(reason); | |
| _AgentReset(); | |
| } | |
| /// <summary> | |
| /// Requests a new decision for this agent. | |
| /// </summary> | |
| /// <remarks> | |
| /// Call `RequestDecision()` whenever an agent needs a decision. You often | |
| /// want to request a decision every environment step. However, if an agent | |
| /// cannot use the decision every step, then you can request a decision less | |
| /// frequently. | |
| /// | |
| /// You can add a <seealso cref="DecisionRequester"/> component to the agent's | |
| /// [GameObject] to drive the agent's decision making. When you use this component, | |
| /// do not call `RequestDecision()` separately. | |
| /// | |
| /// Note that this function calls <seealso cref="RequestAction"/>; you do not need to | |
| /// call both functions at the same time. | |
| /// | |
| /// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html | |
| /// </remarks> | |
| public void RequestDecision() | |
| { | |
| m_RequestDecision = true; | |
| RequestAction(); | |
| } | |
| /// <summary> | |
| /// Requests an action for this agent. | |
| /// </summary> | |
| /// <remarks> | |
| /// Call `RequestAction()` to repeat the previous action returned by the agent's | |
| /// most recent decision. A new decision is not requested. When you call this function, | |
| /// the Agent instance invokes <seealso cref="IActionReceiver.OnActionReceived"/> with the | |
| /// existing action vector. | |
| /// | |
| /// You can use `RequestAction()` in situations where an agent must take an action | |
| /// every update, but doesn't need to make a decision as often. For example, an | |
| /// agent that moves through its environment might need to apply an action to keep | |
| /// moving, but only needs to make a decision to change course or speed occasionally. | |
| /// | |
| /// You can add a <seealso cref="DecisionRequester"/> component to the agent's | |
| /// [GameObject] to drive the agent's decision making and action frequency. When you | |
| /// use this component, do not call `RequestAction()` separately. | |
| /// | |
| /// Note that <seealso cref="RequestDecision"/> calls `RequestAction()`; you do not need to | |
| /// call both functions at the same time. | |
| /// | |
| /// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html | |
| /// </remarks> | |
| public void RequestAction() | |
| { | |
| m_RequestAction = true; | |
| } | |
| /// Helper function that resets all the data structures associated with | |
| /// the agent. Typically used when the agent is being initialized or reset | |
| /// at the end of an episode. | |
| void ResetData() | |
| { | |
| m_ActuatorManager?.ResetData(); | |
| } | |
| /// <summary> | |
| /// Implement `Initialize()` to perform one-time initialization or set up of the | |
| /// Agent instance. | |
| /// </summary> | |
| /// <remarks> | |
| /// `Initialize()` is called once when the agent is first enabled. If, for example, | |
| /// the Agent object needs references to other [GameObjects] in the scene, you | |
| /// can collect and store those references here. | |
| /// | |
| /// Note that <seealso cref="OnEpisodeBegin"/> is called at the start of each of | |
| /// the agent's "episodes". You can use that function for items that need to be reset | |
| /// for each episode. | |
| /// | |
| /// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html | |
| /// </remarks> | |
| public virtual void Initialize() { } | |
| /// <summary> | |
| /// Implement <see cref="Heuristic"/> to choose an action for this agent using a custom heuristic. | |
| /// </summary> | |
| /// <remarks> | |
| /// Implement this function to provide custom decision making logic or to support manual | |
| /// control of an agent using keyboard, mouse, game controller input, or a script. | |
| /// | |
| /// Your heuristic implementation can use any decision making logic you specify. Assign decision | |
| /// values to the <see cref="ActionBuffers.ContinuousActions"/> and <see cref="ActionBuffers.DiscreteActions"/> | |
| /// arrays , passed to your function as a parameter. | |
| /// The same array will be reused between steps. It is up to the user to initialize | |
| /// the values on each call, for example by calling `Array.Clear(actionsOut, 0, actionsOut.Length);`. | |
| /// Add values to the array at the same indexes as they are used in your | |
| /// <seealso cref="IActionReceiver.OnActionReceived"/> function, which receives this array and | |
| /// implements the corresponding agent behavior. See [Actions] for more information | |
| /// about agent actions. | |
| /// Note : Do not create a new float array of action in the `Heuristic()` method, | |
| /// as this will prevent writing floats to the original action array. | |
| /// | |
| /// An agent calls this `Heuristic()` function to make a decision when you set its behavior | |
| /// type to <see cref="BehaviorType.HeuristicOnly"/>. The agent also calls this function if | |
| /// you set its behavior type to <see cref="BehaviorType.Default"/> when the | |
| /// <see cref="Academy"/> is not connected to an external training process and you do not | |
| /// assign a trained model to the agent. | |
| /// | |
| /// To perform imitation learning, implement manual control of the agent in the `Heuristic()` | |
| /// function so that you can record the demonstrations required for the imitation learning | |
| /// algorithms. (Attach a [Demonstration Recorder] component to the agent's [GameObject] to | |
| /// record the demonstration session to a file.) | |
| /// | |
| /// Even when you don’t plan to use heuristic decisions for an agent or imitation learning, | |
| /// implementing a simple heuristic function can aid in debugging agent actions and interactions | |
| /// with its environment. | |
| /// | |
| /// [Demonstration Recorder]: https://github.com/Unity-Technologies/ml-agents/blob/release_20_docs/docs/Learning-Environment-Design-Agents.md#recording-demonstrations | |
| /// [Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_20_docs/docs/Learning-Environment-Design-Agents.md#actions | |
| /// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html | |
| /// </remarks> | |
| /// <example> | |
| /// The following example illustrates a `Heuristic()` function that provides WASD-style | |
| /// keyboard control for an agent that can move in two dimensions as well as jump. See | |
| /// [Input Manager] for more information about the built-in Unity input functions. | |
| /// You can also use the [Input System package], which provides a more flexible and | |
| /// configurable input system. | |
| /// <code> | |
| /// public override void Heuristic(in ActionBuffers actionsOut) | |
| /// { | |
| /// var continuousActionsOut = actionsOut.ContinuousActions; | |
| /// continuousActionsOut[0] = Input.GetAxis("Horizontal"); | |
| /// continuousActionsOut[1] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f; | |
| /// continuousActionsOut[2] = Input.GetAxis("Vertical"); | |
| /// } | |
| /// </code> | |
| /// [Input Manager]: https://docs.unity3d.com/Manual/class-InputManager.html | |
| /// [Input System package]: https://docs.unity3d.com/Packages/com.unity.inputsystem@1.0/manual/index.html | |
| /// </example> | |
| /// <param name="actionsOut">The <see cref="ActionBuffers"/> which contain the continuous and | |
| /// discrete action buffers to write to.</param> | |
| /// <seealso cref="IActionReceiver.OnActionReceived"/> | |
| public virtual void Heuristic(in ActionBuffers actionsOut) | |
| { | |
| Debug.LogWarning("Heuristic method called but not implemented. Returning placeholder actions."); | |
| } | |
| /// <summary> | |
| /// Set up the list of ISensors on the Agent. By default, this will select any | |
| /// SensorComponent's attached to the Agent. | |
| /// </summary> | |
| internal void InitializeSensors() | |
| { | |
| if (m_PolicyFactory == null) | |
| { | |
| m_PolicyFactory = GetComponent<BehaviorParameters>(); | |
| } | |
| if (m_PolicyFactory.ObservableAttributeHandling != ObservableAttributeOptions.Ignore) | |
| { | |
| var excludeInherited = | |
| m_PolicyFactory.ObservableAttributeHandling == ObservableAttributeOptions.ExcludeInherited; | |
| using (TimerStack.Instance.Scoped("CreateObservableSensors")) | |
| { | |
| var observableSensors = ObservableAttribute.CreateObservableSensors(this, excludeInherited); | |
| sensors.AddRange(observableSensors); | |
| } | |
| } | |
| // Get all attached sensor components | |
| SensorComponent[] attachedSensorComponents; | |
| if (m_PolicyFactory.UseChildSensors) | |
| { | |
| attachedSensorComponents = GetComponentsInChildren<SensorComponent>(); | |
| } | |
| else | |
| { | |
| attachedSensorComponents = GetComponents<SensorComponent>(); | |
| } | |
| sensors.Capacity += attachedSensorComponents.Length; | |
| foreach (var component in attachedSensorComponents) | |
| { | |
| sensors.AddRange(component.CreateSensors()); | |
| } | |
| // Support legacy CollectObservations | |
| var param = m_PolicyFactory.BrainParameters; | |
| if (param.VectorObservationSize > 0) | |
| { | |
| collectObservationsSensor = new VectorSensor(param.VectorObservationSize); | |
| if (param.NumStackedVectorObservations > 1) | |
| { | |
| stackedCollectObservationsSensor = new StackingSensor( | |
| collectObservationsSensor, param.NumStackedVectorObservations); | |
| sensors.Add(stackedCollectObservationsSensor); | |
| } | |
| else | |
| { | |
| sensors.Add(collectObservationsSensor); | |
| } | |
| } | |
| // Sort the Sensors by name to ensure determinism | |
| SensorUtils.SortSensors(sensors); | |
| // Make sure the names are actually unique | |
| for (var i = 0; i < sensors.Count - 1; i++) | |
| { | |
| Debug.Assert( | |
| !sensors[i].GetName().Equals(sensors[i + 1].GetName()), | |
| "Sensor names must be unique."); | |
| } | |
| } | |
| void CleanupSensors() | |
| { | |
| // Dispose all attached sensor | |
| for (var i = 0; i < sensors.Count; i++) | |
| { | |
| var sensor = sensors[i]; | |
| if (sensor is IDisposable disposableSensor) | |
| { | |
| disposableSensor.Dispose(); | |
| } | |
| } | |
| } | |
| void InitializeActuators() | |
| { | |
| ActuatorComponent[] attachedActuators; | |
| if (m_PolicyFactory.UseChildActuators) | |
| { | |
| attachedActuators = GetComponentsInChildren<ActuatorComponent>(); | |
| } | |
| else | |
| { | |
| attachedActuators = GetComponents<ActuatorComponent>(); | |
| } | |
| // Support legacy OnActionReceived | |
| // TODO don't set this up if the sizes are 0? | |
| var param = m_PolicyFactory.BrainParameters; | |
| m_VectorActuator = new AgentVectorActuator(this, this, param.ActionSpec); | |
| m_ActuatorManager = new ActuatorManager(attachedActuators.Length + 1); | |
| m_ActuatorManager.Add(m_VectorActuator); | |
| foreach (var actuatorComponent in attachedActuators) | |
| { | |
| m_ActuatorManager.AddActuators(actuatorComponent.CreateActuators()); | |
| } | |
| } | |
| /// <summary> | |
| /// Sends the Agent info to the linked Brain. | |
| /// </summary> | |
| void SendInfoToBrain() | |
| { | |
| if (!m_Initialized) | |
| { | |
| throw new UnityAgentsException("Call to SendInfoToBrain when Agent hasn't been initialized." + | |
| "Please ensure that you are calling 'base.OnEnable()' if you have overridden OnEnable."); | |
| } | |
| if (m_Brain == null) | |
| { | |
| return; | |
| } | |
| if (m_Info.done) | |
| { | |
| m_Info.ClearActions(); | |
| } | |
| else | |
| { | |
| m_Info.CopyActions(m_ActuatorManager.StoredActions); | |
| } | |
| UpdateSensors(); | |
| using (TimerStack.Instance.Scoped("CollectObservations")) | |
| { | |
| using (m_CollectObservationsChecker.Start()) | |
| { | |
| CollectObservations(collectObservationsSensor); | |
| } | |
| } | |
| using (TimerStack.Instance.Scoped("WriteActionMask")) | |
| { | |
| m_ActuatorManager.WriteActionMask(); | |
| } | |
| m_Info.discreteActionMasks = m_ActuatorManager.DiscreteActionMask?.GetMask(); | |
| m_Info.reward = m_Reward; | |
| m_Info.groupReward = m_GroupReward; | |
| m_Info.done = false; | |
| m_Info.maxStepReached = false; | |
| m_Info.episodeId = m_EpisodeId; | |
| m_Info.groupId = m_GroupId; | |
| using (TimerStack.Instance.Scoped("RequestDecision")) | |
| { | |
| m_Brain.RequestDecision(m_Info, sensors); | |
| } | |
| // If we have any DemonstrationWriters, write the AgentInfo and sensors to them. | |
| if (DemonstrationWriters.Count != 0) | |
| { | |
| foreach (var demoWriter in DemonstrationWriters) | |
| { | |
| demoWriter.Record(m_Info, sensors); | |
| } | |
| } | |
| } | |
| void UpdateSensors() | |
| { | |
| foreach (var sensor in sensors) | |
| { | |
| sensor.Update(); | |
| } | |
| } | |
| void ResetSensors() | |
| { | |
| foreach (var sensor in sensors) | |
| { | |
| sensor.Reset(); | |
| } | |
| } | |
| /// <summary> | |
| /// Implement `CollectObservations()` to collect the vector observations of | |
| /// the agent for the step. The agent observation describes the current | |
| /// environment from the perspective of the agent. | |
| /// </summary> | |
| /// <param name="sensor"> | |
| /// The vector observations for the agent. | |
| /// </param> | |
| /// <remarks> | |
| /// An agent's observation is any environment information that helps | |
| /// the agent achieve its goal. For example, for a fighting agent, its | |
| /// observation could include distances to friends or enemies, or the | |
| /// current level of ammunition at its disposal. | |
| /// | |
| /// You can use a combination of vector, visual, and raycast observations for an | |
| /// agent. If you only use visual or raycast observations, you do not need to | |
| /// implement a `CollectObservations()` function. | |
| /// | |
| /// Add vector observations to the <paramref name="sensor"/> parameter passed to | |
| /// this method by calling the <seealso cref="VectorSensor"/> helper methods: | |
| /// - <see cref="VectorSensor.AddObservation(int)"/> | |
| /// - <see cref="VectorSensor.AddObservation(float)"/> | |
| /// - <see cref="VectorSensor.AddObservation(Vector3)"/> | |
| /// - <see cref="VectorSensor.AddObservation(Vector2)"/> | |
| /// - <see cref="VectorSensor.AddObservation(Quaternion)"/> | |
| /// - <see cref="VectorSensor.AddObservation(bool)"/> | |
| /// - <see cref="VectorSensor.AddObservation(IList{float})"/> | |
| /// - <see cref="VectorSensor.AddOneHotObservation(int, int)"/> | |
| /// | |
| /// You can use any combination of these helper functions to build the agent's | |
| /// vector of observations. You must build the vector in the same order | |
| /// each time `CollectObservations()` is called and the length of the vector | |
| /// must always be the same. In addition, the length of the observation must | |
| /// match the <see cref="BrainParameters.VectorObservationSize"/> | |
| /// attribute of the linked Brain, which is set in the Editor on the | |
| /// **Behavior Parameters** component attached to the agent's [GameObject]. | |
| /// | |
| /// For more information about observations, see [Observations and Sensors]. | |
| /// | |
| /// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html | |
| /// [Observations and Sensors]: https://github.com/Unity-Technologies/ml-agents/blob/release_20_docs/docs/Learning-Environment-Design-Agents.md#observations-and-sensors | |
| /// </remarks> | |
| public virtual void CollectObservations(VectorSensor sensor) | |
| { | |
| } | |
| /// <summary> | |
| /// Returns a read-only view of the observations that were generated in | |
| /// <see cref="CollectObservations(VectorSensor)"/>. This is mainly useful inside of a | |
| /// <see cref="Heuristic(in ActionBuffers)"/> method to avoid recomputing the observations. | |
| /// </summary> | |
| /// <returns>A read-only view of the observations list.</returns> | |
| public ReadOnlyCollection<float> GetObservations() | |
| { | |
| return collectObservationsSensor.GetObservations(); | |
| } | |
| /// <summary> | |
| /// Returns a read-only view of the stacked observations that were generated in | |
| /// <see cref="CollectObservations(VectorSensor)"/>. This is mainly useful inside of a | |
| /// <see cref="Heuristic(in ActionBuffers)"/> method to avoid recomputing the observations. | |
| /// </summary> | |
| /// <returns>A read-only view of the stacked observations list.</returns> | |
| public ReadOnlyCollection<float> GetStackedObservations() | |
| { | |
| return stackedCollectObservationsSensor.GetStackedObservations(); | |
| } | |
| /// <summary> | |
| /// Implement `WriteDiscreteActionMask()` to collects the masks for discrete | |
| /// actions. When using discrete actions, the agent will not perform the masked | |
| /// action. | |
| /// </summary> | |
| /// <param name="actionMask"> | |
| /// The action mask for the agent. | |
| /// </param> | |
| /// <remarks> | |
| /// When using Discrete Control, you can prevent the Agent from using a certain | |
| /// action by masking it with <see cref="IDiscreteActionMask.SetActionEnabled"/>. | |
| /// | |
| /// See [Agents - Actions] for more information on masking actions. | |
| /// | |
| /// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_20_docs/docs/Learning-Environment-Design-Agents.md#actions | |
| /// </remarks> | |
| /// <seealso cref="IActionReceiver.OnActionReceived"/> | |
| public virtual void WriteDiscreteActionMask(IDiscreteActionMask actionMask) { } | |
| /// <summary> | |
| /// Implement `OnActionReceived()` to specify agent behavior at every step, based | |
| /// on the provided action. | |
| /// </summary> | |
| /// <remarks> | |
| /// An action is passed to this function in the form of an <seealso cref="ActionBuffers"/>. | |
| /// Your implementation must use the array to direct the agent's behavior for the | |
| /// current step. | |
| /// | |
| /// You decide how many elements you need in the ActionBuffers to control your | |
| /// agent and what each element means. For example, if you want to apply a | |
| /// force to move an agent around the environment, you can arbitrarily pick | |
| /// three values in ActionBuffers.ContinuousActions array to use as the force components. | |
| /// During training, the agent's policy learns to set those particular elements of | |
| /// the array to maximize the training rewards the agent receives. (Of course, | |
| /// if you implement a <seealso cref="Agent.Heuristic(in ActionBuffers)"/> function, it must use the same | |
| /// elements of the action array for the same purpose since there is no learning | |
| /// involved.) | |
| /// | |
| /// An Agent can use continuous and/or discrete actions. Configure this along with the size | |
| /// of the action array, in the <see cref="BrainParameters"/> of the agent's associated | |
| /// <see cref="BehaviorParameters"/> component. | |
| /// | |
| /// When an agent uses continuous actions, the values in the ActionBuffers.ContinuousActions | |
| /// array are floating point numbers. You should clamp the values to the range, | |
| /// -1..1, to increase numerical stability during training. | |
| /// | |
| /// When an agent uses discrete actions, the values in the ActionBuffers.DiscreteActions array | |
| /// are integers that each represent a specific, discrete action. For example, | |
| /// you could define a set of discrete actions such as: | |
| /// | |
| /// <code> | |
| /// 0 = Do nothing | |
| /// 1 = Move one space left | |
| /// 2 = Move one space right | |
| /// 3 = Move one space up | |
| /// 4 = Move one space down | |
| /// </code> | |
| /// | |
| /// When making a decision, the agent picks one of the five actions and puts the | |
| /// corresponding integer value in the ActionBuffers.DiscreteActions array. For example, if the agent | |
| /// decided to move left, the ActionBuffers.DiscreteActions parameter would be an array with | |
| /// a single element with the value 1. | |
| /// | |
| /// You can define multiple sets, or branches, of discrete actions to allow an | |
| /// agent to perform simultaneous, independent actions. For example, you could | |
| /// use one branch for movement and another branch for throwing a ball left, right, | |
| /// up, or down, to allow the agent to do both in the same step. | |
| /// | |
| /// The ActionBuffers.DiscreteActions array of an agent with discrete actions contains one | |
| /// element for each branch. The value of each element is the integer representing the | |
| /// chosen action for that branch. The agent always chooses one action for each branch. | |
| /// | |
| /// When you use the discrete actions, you can prevent the training process | |
| /// or the neural network model from choosing specific actions in a step by | |
| /// implementing the <see cref="WriteDiscreteActionMask(IDiscreteActionMask)"/> | |
| /// method. For example, if your agent is next to a wall, you could mask out any | |
| /// actions that would result in the agent trying to move into the wall. | |
| /// | |
| /// For more information about implementing agent actions see [Agents - Actions]. | |
| /// | |
| /// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_20_docs/docs/Learning-Environment-Design-Agents.md#actions | |
| /// </remarks> | |
| /// <param name="actions"> | |
| /// Struct containing the buffers of actions to be executed at this step. | |
| /// </param> | |
| public virtual void OnActionReceived(ActionBuffers actions) { } | |
| /// <summary> | |
| /// Implement `OnEpisodeBegin()` to set up an Agent instance at the beginning | |
| /// of an episode. | |
| /// </summary> | |
| /// <seealso cref="Initialize"/> | |
| /// <seealso cref="EndEpisode"/> | |
| public virtual void OnEpisodeBegin() { } | |
| /// <summary> | |
| /// Gets the most recent ActionBuffer for this agent. | |
| /// </summary> | |
| /// <returns>The most recent ActionBuffer for this agent</returns> | |
| public ActionBuffers GetStoredActionBuffers() | |
| { | |
| return m_ActuatorManager.StoredActions; | |
| } | |
| /// <summary> | |
| /// An internal reset method that updates internal data structures in | |
| /// addition to calling <see cref="OnEpisodeBegin"/>. | |
| /// </summary> | |
| void _AgentReset() | |
| { | |
| ResetData(); | |
| m_StepCount = 0; | |
| using (m_OnEpisodeBeginChecker.Start()) | |
| { | |
| OnEpisodeBegin(); | |
| } | |
| } | |
| /// <summary> | |
| /// Scales continuous action from [-1, 1] to arbitrary range. | |
| /// </summary> | |
| /// <param name="rawAction">The input action value.</param> | |
| /// <param name="min">The minimum output value.</param> | |
| /// <param name="max">The maximum output value.</param> | |
| /// <returns>The <paramref name="rawAction"/> scaled from [-1,1] to | |
| /// [<paramref name="min"/>, <paramref name="max"/>].</returns> | |
| protected static float ScaleAction(float rawAction, float min, float max) | |
| { | |
| var middle = (min + max) / 2; | |
| var range = (max - min) / 2; | |
| return rawAction * range + middle; | |
| } | |
| /// <summary> | |
| /// Signals the agent that it must send its decision to the brain. | |
| /// </summary> | |
| void SendInfo() | |
| { | |
| // If the Agent is done, it has just reset and thus requires a new decision | |
| if (m_RequestDecision) | |
| { | |
| SendInfoToBrain(); | |
| m_Reward = 0f; | |
| m_GroupReward = 0f; | |
| m_RequestDecision = false; | |
| } | |
| } | |
| void AgentIncrementStep() | |
| { | |
| m_StepCount += 1; | |
| } | |
| /// Used by the brain to make the agent perform a step. | |
| void AgentStep() | |
| { | |
| if ((m_RequestAction) && (m_Brain != null)) | |
| { | |
| m_RequestAction = false; | |
| m_ActuatorManager.ExecuteActions(); | |
| } | |
| if ((m_StepCount >= MaxStep) && (MaxStep > 0)) | |
| { | |
| NotifyAgentDone(DoneReason.MaxStepReached); | |
| _AgentReset(); | |
| } | |
| } | |
| void DecideAction() | |
| { | |
| if (m_ActuatorManager.StoredActions.ContinuousActions.Array == null) | |
| { | |
| ResetData(); | |
| } | |
| var actions = m_Brain?.DecideAction() ?? new ActionBuffers(); | |
| m_Info.CopyActions(actions); | |
| m_ActuatorManager.UpdateActions(actions); | |
| } | |
| internal void SetMultiAgentGroup(IMultiAgentGroup multiAgentGroup) | |
| { | |
| if (multiAgentGroup == null) | |
| { | |
| m_GroupId = 0; | |
| } | |
| else | |
| { | |
| var newGroupId = multiAgentGroup.GetId(); | |
| if (m_GroupId == 0 || m_GroupId == newGroupId) | |
| { | |
| m_GroupId = newGroupId; | |
| } | |
| else | |
| { | |
| throw new UnityAgentsException("Agent is already registered with a group. Unregister it first."); | |
| } | |
| } | |
| } | |
| } | |
| } | |