File size: 4,901 Bytes
766250f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | """
Module contenant la couche de neurones SitiNEUR
Syntaxe inspirée de PyTorch mais simplifiée
"""
import numpy as np
class SitiNEUR:
"""
Couche de neurones simplifiée pour Sitiai
Args:
input_size: Nombre d'entrées
output_size: Nombre de sorties
activation: Fonction d'activation ('relu', 'sigmoid', 'tanh', 'linear')
Example:
>>> from sitiai import SitiNEUR
>>> layer = SitiNEUR(input_size=10, output_size=5, activation='relu')
>>> output = layer.forward(input_data)
"""
def __init__(self, input_size: int, output_size: int, activation: str = 'relu'):
self.input_size = input_size
self.output_size = output_size
self.activation = activation
# Initialisation des poids et biais (He initialization pour ReLU, Xavier pour autres)
if activation == 'relu':
self.weights = np.random.randn(input_size, output_size) * np.sqrt(2.0 / input_size)
else:
self.weights = np.random.randn(input_size, output_size) * np.sqrt(1.0 / input_size)
self.bias = np.zeros(output_size)
# Pour la rétropropagation
self.last_input = None
self.last_output = None
self.last_z = None
def forward(self, x: np.ndarray) -> np.ndarray:
"""
Propagation avant
Args:
x: Données d'entrée (shape: [batch_size, input_size])
Returns:
Sortie de la couche (shape: [batch_size, output_size])
"""
self.last_input = x
# Calcul: y = x @ weights + bias
self.last_z = np.dot(x, self.weights) + self.bias
# Application de la fonction d'activation
self.last_output = self._apply_activation(self.last_z)
return self.last_output
def backward(self, grad_output: np.ndarray, learning_rate: float = 0.01) -> np.ndarray:
"""
Rétropropagation du gradient
Args:
grad_output: Gradient de la sortie
learning_rate: Taux d'apprentissage
Returns:
Gradient pour la couche précédente
"""
# Clip gradient output to prevent explosion
grad_output = np.clip(grad_output, -10, 10)
# Gradient de la fonction d'activation
if self.activation == 'relu':
grad_activation = (self.last_z > 0).astype(float)
else:
grad_activation = self._activation_gradient(self.last_output)
grad_z = grad_output * grad_activation
# Clip intermediate gradients
grad_z = np.clip(grad_z, -10, 10)
# Gradients des poids et biais
grad_weights = np.dot(self.last_input.T, grad_z)
grad_bias = np.sum(grad_z, axis=0)
# Clip parameter gradients
grad_weights = np.clip(grad_weights, -1, 1)
grad_bias = np.clip(grad_bias, -1, 1)
# Mise à jour des paramètres
self.weights -= learning_rate * grad_weights
self.bias -= learning_rate * grad_bias
# Gradient pour la couche précédente
grad_input = np.dot(grad_z, self.weights.T)
return grad_input
def _apply_activation(self, z: np.ndarray) -> np.ndarray:
"""Applique la fonction d'activation"""
if self.activation == 'relu':
return np.maximum(0, z)
elif self.activation == 'sigmoid':
return 1 / (1 + np.exp(-np.clip(z, -500, 500)))
elif self.activation == 'tanh':
return np.tanh(z)
elif self.activation == 'linear':
return z
else:
raise ValueError(f"Activation inconnue: {self.activation}")
def _activation_gradient(self, output: np.ndarray) -> np.ndarray:
"""Calcule le gradient de la fonction d'activation"""
if self.activation == 'relu':
return (output > 0).astype(float)
elif self.activation == 'sigmoid':
return output * (1 - output)
elif self.activation == 'tanh':
return 1 - output ** 2
elif self.activation == 'linear':
return np.ones_like(output)
else:
raise ValueError(f"Activation inconnue: {self.activation}")
def get_weights(self):
"""Retourne les poids et biais de la couche"""
return {
'weights': self.weights.copy(),
'bias': self.bias.copy()
}
def set_weights(self, weights_dict):
"""Définit les poids et biais de la couche"""
self.weights = weights_dict['weights'].copy()
self.bias = weights_dict['bias'].copy()
def __repr__(self):
return f"SitiNEUR(input_size={self.input_size}, output_size={self.output_size}, activation='{self.activation}')"
|