File size: 937 Bytes
ab81f90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import cupy as cp

class CategoricalCrossEntropyLoss:
    def __init__(self):
        self.cache = {}
        
    def forward(self, Z, Y):
        """
        Z: Raw logits from the final Linear layer (batch_size, num_classes)
        Y: True labels, one-hot encoded (batch_size, num_classes)
        """
        Z_shifted = Z - cp.max(Z, axis=1, keepdims=True)
        exp_Z = cp.exp(Z_shifted)
        probabilities = exp_Z / cp.sum(exp_Z, axis=1, keepdims=True)
        
        self.cache['P'] = probabilities
        self.cache['Y'] = Y
        
        batch_size = Z.shape[0]
        
        P_clipped = cp.clip(probabilities, 1e-8, 1.0 - 1e-8)
        
        loss = -cp.sum(Y * cp.log(P_clipped)) / batch_size
        
        return loss
    
    def backward(self):
        P = self.cache['P']
        Y = self.cache['Y']
        batch_size = P.shape[0]
        
        dZ = (P - Y) / batch_size
        
        return dZ