Spaces:
Sleeping
Sleeping
| import cupy as cp | |
| class CategoricalCrossEntropyLoss: | |
| def __init__(self): | |
| self.cache = {} | |
| def forward(self, Z, Y): | |
| """ | |
| Z: Raw logits from the final Linear layer (batch_size, num_classes) | |
| Y: True labels, one-hot encoded (batch_size, num_classes) | |
| """ | |
| Z_shifted = Z - cp.max(Z, axis=1, keepdims=True) | |
| exp_Z = cp.exp(Z_shifted) | |
| probabilities = exp_Z / cp.sum(exp_Z, axis=1, keepdims=True) | |
| self.cache['P'] = probabilities | |
| self.cache['Y'] = Y | |
| batch_size = Z.shape[0] | |
| P_clipped = cp.clip(probabilities, 1e-8, 1.0 - 1e-8) | |
| loss = -cp.sum(Y * cp.log(P_clipped)) / batch_size | |
| return loss | |
| def backward(self): | |
| P = self.cache['P'] | |
| Y = self.cache['Y'] | |
| batch_size = P.shape[0] | |
| dZ = (P - Y) / batch_size | |
| return dZ |