| | using System; |
| | using UnityEngine; |
| | using Unity.MLAgents; |
| | using Unity.MLAgents.Actuators; |
| | using Unity.MLAgentsExamples; |
| | using Unity.MLAgents.Sensors; |
| | using BodyPart = Unity.MLAgentsExamples.BodyPart; |
| | using Random = UnityEngine.Random; |
| |
|
| | public class WalkerAgent : Agent |
| | { |
| | [Header("Walk Speed")] |
| | [Range(0.1f, 10)] |
| | [SerializeField] |
| | |
| | private float m_TargetWalkingSpeed = 10; |
| |
|
| | public float MTargetWalkingSpeed |
| | { |
| | get { return m_TargetWalkingSpeed; } |
| | set { m_TargetWalkingSpeed = Mathf.Clamp(value, .1f, m_maxWalkingSpeed); } |
| | } |
| |
|
| | const float m_maxWalkingSpeed = 10; |
| |
|
| | |
| | |
| | |
| | public bool randomizeWalkSpeedEachEpisode; |
| |
|
| | |
| | private Vector3 m_WorldDirToWalk = Vector3.right; |
| |
|
| | [Header("Target To Walk Towards")] public Transform target; |
| |
|
| | [Header("Body Parts")] public Transform hips; |
| | public Transform chest; |
| | public Transform spine; |
| | public Transform head; |
| | public Transform thighL; |
| | public Transform shinL; |
| | public Transform footL; |
| | public Transform thighR; |
| | public Transform shinR; |
| | public Transform footR; |
| | public Transform armL; |
| | public Transform forearmL; |
| | public Transform handL; |
| | public Transform armR; |
| | public Transform forearmR; |
| | public Transform handR; |
| |
|
| | |
| | |
| | OrientationCubeController m_OrientationCube; |
| |
|
| | |
| | DirectionIndicator m_DirectionIndicator; |
| | JointDriveController m_JdController; |
| | EnvironmentParameters m_ResetParams; |
| |
|
| | public override void Initialize() |
| | { |
| | m_OrientationCube = GetComponentInChildren<OrientationCubeController>(); |
| | m_DirectionIndicator = GetComponentInChildren<DirectionIndicator>(); |
| |
|
| | |
| | m_JdController = GetComponent<JointDriveController>(); |
| | m_JdController.SetupBodyPart(hips); |
| | m_JdController.SetupBodyPart(chest); |
| | m_JdController.SetupBodyPart(spine); |
| | m_JdController.SetupBodyPart(head); |
| | m_JdController.SetupBodyPart(thighL); |
| | m_JdController.SetupBodyPart(shinL); |
| | m_JdController.SetupBodyPart(footL); |
| | m_JdController.SetupBodyPart(thighR); |
| | m_JdController.SetupBodyPart(shinR); |
| | m_JdController.SetupBodyPart(footR); |
| | m_JdController.SetupBodyPart(armL); |
| | m_JdController.SetupBodyPart(forearmL); |
| | m_JdController.SetupBodyPart(handL); |
| | m_JdController.SetupBodyPart(armR); |
| | m_JdController.SetupBodyPart(forearmR); |
| | m_JdController.SetupBodyPart(handR); |
| |
|
| | m_ResetParams = Academy.Instance.EnvironmentParameters; |
| | } |
| |
|
| | |
| | |
| | |
| | public override void OnEpisodeBegin() |
| | { |
| | |
| | foreach (var bodyPart in m_JdController.bodyPartsDict.Values) |
| | { |
| | bodyPart.Reset(bodyPart); |
| | } |
| |
|
| | |
| | hips.rotation = Quaternion.Euler(0, Random.Range(0.0f, 360.0f), 0); |
| |
|
| | UpdateOrientationObjects(); |
| |
|
| | |
| | MTargetWalkingSpeed = |
| | randomizeWalkSpeedEachEpisode ? Random.Range(0.1f, m_maxWalkingSpeed) : MTargetWalkingSpeed; |
| | } |
| |
|
| | |
| | |
| | |
| | public void CollectObservationBodyPart(BodyPart bp, VectorSensor sensor) |
| | { |
| | |
| | sensor.AddObservation(bp.groundContact.touchingGround); |
| |
|
| | |
| | |
| | sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.velocity)); |
| | sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.angularVelocity)); |
| |
|
| | |
| | sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.position - hips.position)); |
| |
|
| | if (bp.rb.transform != hips && bp.rb.transform != handL && bp.rb.transform != handR) |
| | { |
| | sensor.AddObservation(bp.rb.transform.localRotation); |
| | sensor.AddObservation(bp.currentStrength / m_JdController.maxJointForceLimit); |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | public override void CollectObservations(VectorSensor sensor) |
| | { |
| | var cubeForward = m_OrientationCube.transform.forward; |
| |
|
| | |
| | var velGoal = cubeForward * MTargetWalkingSpeed; |
| | |
| | var avgVel = GetAvgVelocity(); |
| |
|
| | |
| | sensor.AddObservation(Vector3.Distance(velGoal, avgVel)); |
| | |
| | sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(avgVel)); |
| | |
| | sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(velGoal)); |
| |
|
| | |
| | sensor.AddObservation(Quaternion.FromToRotation(hips.forward, cubeForward)); |
| | sensor.AddObservation(Quaternion.FromToRotation(head.forward, cubeForward)); |
| |
|
| | |
| | sensor.AddObservation(m_OrientationCube.transform.InverseTransformPoint(target.transform.position)); |
| |
|
| | foreach (var bodyPart in m_JdController.bodyPartsList) |
| | { |
| | CollectObservationBodyPart(bodyPart, sensor); |
| | } |
| | } |
| |
|
| | public override void OnActionReceived(ActionBuffers actionBuffers) |
| |
|
| | { |
| | var bpDict = m_JdController.bodyPartsDict; |
| | var i = -1; |
| |
|
| | var continuousActions = actionBuffers.ContinuousActions; |
| | bpDict[chest].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], continuousActions[++i]); |
| | bpDict[spine].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], continuousActions[++i]); |
| |
|
| | bpDict[thighL].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0); |
| | bpDict[thighR].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0); |
| | bpDict[shinL].SetJointTargetRotation(continuousActions[++i], 0, 0); |
| | bpDict[shinR].SetJointTargetRotation(continuousActions[++i], 0, 0); |
| | bpDict[footR].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], continuousActions[++i]); |
| | bpDict[footL].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], continuousActions[++i]); |
| |
|
| | bpDict[armL].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0); |
| | bpDict[armR].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0); |
| | bpDict[forearmL].SetJointTargetRotation(continuousActions[++i], 0, 0); |
| | bpDict[forearmR].SetJointTargetRotation(continuousActions[++i], 0, 0); |
| | bpDict[head].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0); |
| |
|
| | |
| | bpDict[chest].SetJointStrength(continuousActions[++i]); |
| | bpDict[spine].SetJointStrength(continuousActions[++i]); |
| | bpDict[head].SetJointStrength(continuousActions[++i]); |
| | bpDict[thighL].SetJointStrength(continuousActions[++i]); |
| | bpDict[shinL].SetJointStrength(continuousActions[++i]); |
| | bpDict[footL].SetJointStrength(continuousActions[++i]); |
| | bpDict[thighR].SetJointStrength(continuousActions[++i]); |
| | bpDict[shinR].SetJointStrength(continuousActions[++i]); |
| | bpDict[footR].SetJointStrength(continuousActions[++i]); |
| | bpDict[armL].SetJointStrength(continuousActions[++i]); |
| | bpDict[forearmL].SetJointStrength(continuousActions[++i]); |
| | bpDict[armR].SetJointStrength(continuousActions[++i]); |
| | bpDict[forearmR].SetJointStrength(continuousActions[++i]); |
| | } |
| |
|
| | |
| | void UpdateOrientationObjects() |
| | { |
| | m_WorldDirToWalk = target.position - hips.position; |
| | m_OrientationCube.UpdateOrientation(hips, target); |
| | if (m_DirectionIndicator) |
| | { |
| | m_DirectionIndicator.MatchOrientation(m_OrientationCube.transform); |
| | } |
| | } |
| |
|
| | void FixedUpdate() |
| | { |
| | UpdateOrientationObjects(); |
| |
|
| | var cubeForward = m_OrientationCube.transform.forward; |
| |
|
| | |
| | |
| | |
| | var matchSpeedReward = GetMatchingVelocityReward(cubeForward * MTargetWalkingSpeed, GetAvgVelocity()); |
| |
|
| | |
| | if (float.IsNaN(matchSpeedReward)) |
| | { |
| | throw new ArgumentException( |
| | "NaN in moveTowardsTargetReward.\n" + |
| | $" cubeForward: {cubeForward}\n" + |
| | $" hips.velocity: {m_JdController.bodyPartsDict[hips].rb.velocity}\n" + |
| | $" maximumWalkingSpeed: {m_maxWalkingSpeed}" |
| | ); |
| | } |
| |
|
| | |
| | |
| | var headForward = head.forward; |
| | headForward.y = 0; |
| | |
| | var lookAtTargetReward = (Vector3.Dot(cubeForward, headForward) + 1) * .5F; |
| |
|
| | |
| | if (float.IsNaN(lookAtTargetReward)) |
| | { |
| | throw new ArgumentException( |
| | "NaN in lookAtTargetReward.\n" + |
| | $" cubeForward: {cubeForward}\n" + |
| | $" head.forward: {head.forward}" |
| | ); |
| | } |
| |
|
| | AddReward(matchSpeedReward * lookAtTargetReward); |
| | } |
| |
|
| | |
| | |
| | |
| | Vector3 GetAvgVelocity() |
| | { |
| | Vector3 velSum = Vector3.zero; |
| |
|
| | |
| | int numOfRb = 0; |
| | foreach (var item in m_JdController.bodyPartsList) |
| | { |
| | numOfRb++; |
| | velSum += item.rb.velocity; |
| | } |
| |
|
| | var avgVel = velSum / numOfRb; |
| | return avgVel; |
| | } |
| |
|
| | |
| | public float GetMatchingVelocityReward(Vector3 velocityGoal, Vector3 actualVelocity) |
| | { |
| | |
| | var velDeltaMagnitude = Mathf.Clamp(Vector3.Distance(actualVelocity, velocityGoal), 0, MTargetWalkingSpeed); |
| |
|
| | |
| | |
| | return Mathf.Pow(1 - Mathf.Pow(velDeltaMagnitude / MTargetWalkingSpeed, 2), 2); |
| | } |
| |
|
| | |
| | |
| | |
| | public void TouchedTarget() |
| | { |
| | AddReward(1f); |
| | } |
| | } |
| |
|