| | using System.Collections.Generic; |
| | using System.Linq; |
| | using Unity.MLAgents.Inference.Utils; |
| | using Unity.MLAgents.Actuators; |
| | using Unity.Barracuda; |
| | using UnityEngine; |
| |
|
| | namespace Unity.MLAgents.Inference |
| | { |
| | |
| | |
| | |
| | |
| | internal class ContinuousActionOutputApplier : TensorApplier.IApplier |
| | { |
| | readonly ActionSpec m_ActionSpec; |
| |
|
| | public ContinuousActionOutputApplier(ActionSpec actionSpec) |
| | { |
| | m_ActionSpec = actionSpec; |
| | } |
| |
|
| | public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions) |
| | { |
| | var actionSize = tensorProxy.shape[tensorProxy.shape.Length - 1]; |
| | var agentIndex = 0; |
| | for (var i = 0; i < actionIds.Count; i++) |
| | { |
| | var agentId = actionIds[i]; |
| | if (lastActions.ContainsKey(agentId)) |
| | { |
| | var actionBuffer = lastActions[agentId]; |
| | if (actionBuffer.IsEmpty()) |
| | { |
| | actionBuffer = new ActionBuffers(m_ActionSpec); |
| | lastActions[agentId] = actionBuffer; |
| | } |
| | var continuousBuffer = actionBuffer.ContinuousActions; |
| | for (var j = 0; j < actionSize; j++) |
| | { |
| | continuousBuffer[j] = tensorProxy.data[agentIndex, j]; |
| | } |
| | } |
| | agentIndex++; |
| | } |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | internal class DiscreteActionOutputApplier : TensorApplier.IApplier |
| | { |
| | readonly ActionSpec m_ActionSpec; |
| |
|
| |
|
| | public DiscreteActionOutputApplier(ActionSpec actionSpec, int seed, ITensorAllocator allocator) |
| | { |
| | m_ActionSpec = actionSpec; |
| | } |
| |
|
| | public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions) |
| | { |
| | var agentIndex = 0; |
| | var actionSize = tensorProxy.shape[tensorProxy.shape.Length - 1]; |
| | for (var i = 0; i < actionIds.Count; i++) |
| | { |
| | var agentId = actionIds[i]; |
| | if (lastActions.ContainsKey(agentId)) |
| | { |
| | var actionBuffer = lastActions[agentId]; |
| | if (actionBuffer.IsEmpty()) |
| | { |
| | actionBuffer = new ActionBuffers(m_ActionSpec); |
| | lastActions[agentId] = actionBuffer; |
| | } |
| | var discreteBuffer = actionBuffer.DiscreteActions; |
| | for (var j = 0; j < actionSize; j++) |
| | { |
| | discreteBuffer[j] = (int)tensorProxy.data[agentIndex, j]; |
| | } |
| | } |
| | agentIndex++; |
| | } |
| | } |
| | } |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | internal class LegacyDiscreteActionOutputApplier : TensorApplier.IApplier |
| | { |
| | readonly int[] m_ActionSize; |
| | readonly Multinomial m_Multinomial; |
| | readonly ActionSpec m_ActionSpec; |
| | readonly int[] m_StartActionIndices; |
| | readonly float[] m_CdfBuffer; |
| |
|
| |
|
| | public LegacyDiscreteActionOutputApplier(ActionSpec actionSpec, int seed, ITensorAllocator allocator) |
| | { |
| | m_ActionSize = actionSpec.BranchSizes; |
| | m_Multinomial = new Multinomial(seed); |
| | m_ActionSpec = actionSpec; |
| | m_StartActionIndices = Utilities.CumSum(m_ActionSize); |
| |
|
| | |
| | |
| | var largestBranch = Mathf.Max(m_ActionSize); |
| | m_CdfBuffer = new float[largestBranch]; |
| | } |
| |
|
| | public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions) |
| | { |
| | var agentIndex = 0; |
| | for (var i = 0; i < actionIds.Count; i++) |
| | { |
| | var agentId = actionIds[i]; |
| | if (lastActions.ContainsKey(agentId)) |
| | { |
| | var actionBuffer = lastActions[agentId]; |
| | if (actionBuffer.IsEmpty()) |
| | { |
| | actionBuffer = new ActionBuffers(m_ActionSpec); |
| | lastActions[agentId] = actionBuffer; |
| | } |
| | var discreteBuffer = actionBuffer.DiscreteActions; |
| | for (var j = 0; j < m_ActionSize.Length; j++) |
| | { |
| | ComputeCdf(tensorProxy, agentIndex, m_StartActionIndices[j], m_ActionSize[j]); |
| | discreteBuffer[j] = m_Multinomial.Sample(m_CdfBuffer, m_ActionSize[j]); |
| | } |
| | } |
| | agentIndex++; |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | internal void ComputeCdf(TensorProxy logProbs, int batch, int channelOffset, int branchSize) |
| | { |
| | |
| | var maxProb = float.NegativeInfinity; |
| | for (var cls = 0; cls < branchSize; ++cls) |
| | { |
| | maxProb = Mathf.Max(logProbs.data[batch, cls + channelOffset], maxProb); |
| | } |
| |
|
| | |
| | var sumProb = 0.0f; |
| | for (var cls = 0; cls < branchSize; ++cls) |
| | { |
| | sumProb += Mathf.Exp(logProbs.data[batch, cls + channelOffset] - maxProb); |
| | m_CdfBuffer[cls] = sumProb; |
| | } |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | internal class MemoryOutputApplier : TensorApplier.IApplier |
| | { |
| | Dictionary<int, List<float>> m_Memories; |
| |
|
| | public MemoryOutputApplier( |
| | Dictionary<int, List<float>> memories) |
| | { |
| | m_Memories = memories; |
| | } |
| |
|
| | public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions) |
| | { |
| | var agentIndex = 0; |
| | var memorySize = tensorProxy.data.width; |
| | for (var i = 0; i < actionIds.Count; i++) |
| | { |
| | var agentId = actionIds[i]; |
| | List<float> memory; |
| | if (!m_Memories.TryGetValue(agentId, out memory) |
| | || memory.Count < memorySize) |
| | { |
| | memory = new List<float>(); |
| | memory.AddRange(Enumerable.Repeat(0f, memorySize)); |
| | } |
| |
|
| | for (var j = 0; j < memorySize; j++) |
| | { |
| | memory[j] = tensorProxy.data[agentIndex, 0, j, 0]; |
| | } |
| |
|
| | m_Memories[agentId] = memory; |
| | agentIndex++; |
| | } |
| | } |
| | } |
| | } |
| |
|