Yuchan commited on
Commit
e1bb994
·
verified ·
1 Parent(s): 8d881dd

Update Model.py

Browse files
Files changed (1) hide show
  1. Model.py +3 -3
Model.py CHANGED
@@ -125,7 +125,7 @@ class Lo(layers.Layer):
125
  super().__init__()
126
  # 내부 계산은 float32로 유지
127
  self.proj = layers.Dense(d_model, use_bias=True, dtype='float32')
128
- self.p = layers.Dense(128, use_bias=True, dtype='float32')
129
  self._out_dtype = 'float32'
130
 
131
  def call(self, x):
@@ -154,8 +154,8 @@ class LoSoU(layers.Layer):
154
  self.eps = float(eps)
155
 
156
  # projection / gating layers in float32
157
- self.Q = layers.Dense(128, dtype='float32')
158
- self.K = layers.Dense(128, dtype='float32')
159
  # V produces d_model so keep it float32 internally
160
  self.V = Lo(d_model) # Lo already handles casting to model dtype; we'll cast back to float32
161
  self.proj = layers.Dense(d_model, use_bias=True, dtype='float32')
 
125
  super().__init__()
126
  # 내부 계산은 float32로 유지
127
  self.proj = layers.Dense(d_model, use_bias=True, dtype='float32')
128
+ self.p = layers.Dense(96, use_bias=True, dtype='float32')
129
  self._out_dtype = 'float32'
130
 
131
  def call(self, x):
 
154
  self.eps = float(eps)
155
 
156
  # projection / gating layers in float32
157
+ self.Q = layers.Dense(96, dtype='float32')
158
+ self.K = layers.Dense(96, dtype='float32')
159
  # V produces d_model so keep it float32 internally
160
  self.V = Lo(d_model) # Lo already handles casting to model dtype; we'll cast back to float32
161
  self.proj = layers.Dense(d_model, use_bias=True, dtype='float32')