Spaces:
Sleeping
Sleeping
gary-boon
Claude
commited on
Commit
·
3c774b5
1
Parent(s):
4b03268
Fix: Use scaling approach instead of skipping layers
Browse files- Changed strategy: scale down layer output by 99.9% instead of skipping
- This maintains exact format compatibility
- Avoids tuple/tensor mismatch issues entirely
- Layer still runs but contributes only 0.1% to output
This simpler approach should work reliably with any transformer version
since we're not trying to bypass the normal data flow.
🤖 Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <noreply@anthropic.com>
- backend/model_service.py +15 -18
backend/model_service.py
CHANGED
|
@@ -298,27 +298,24 @@ class ModelManager:
|
|
| 298 |
|
| 299 |
def create_layer_hook():
|
| 300 |
def hook(module, input, output):
|
| 301 |
-
#
|
| 302 |
-
#
|
| 303 |
-
#
|
| 304 |
|
| 305 |
-
#
|
| 306 |
-
#
|
| 307 |
-
#
|
| 308 |
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
elif len(output) == 1:
|
| 317 |
-
# Single element tuple - preserve as single element tuple
|
| 318 |
-
return (input_hidden_states,)
|
| 319 |
else:
|
| 320 |
-
#
|
| 321 |
-
return
|
| 322 |
return hook
|
| 323 |
|
| 324 |
# Apply hooks and log what's being disabled
|
|
|
|
| 298 |
|
| 299 |
def create_layer_hook():
|
| 300 |
def hook(module, input, output):
|
| 301 |
+
# Alternative approach: drastically reduce layer's contribution
|
| 302 |
+
# instead of trying to skip it entirely
|
| 303 |
+
# This avoids format mismatch issues
|
| 304 |
|
| 305 |
+
# Scale down the output by 99.9% to effectively disable it
|
| 306 |
+
# while maintaining the exact format
|
| 307 |
+
scale_factor = 0.001 # Keep 0.1% of the layer's contribution
|
| 308 |
|
| 309 |
+
if isinstance(output, tuple):
|
| 310 |
+
# Scale the hidden states (first element) but keep structure
|
| 311 |
+
scaled_hidden = output[0] * scale_factor
|
| 312 |
+
if len(output) > 1:
|
| 313 |
+
return (scaled_hidden,) + output[1:]
|
| 314 |
+
else:
|
| 315 |
+
return (scaled_hidden,)
|
|
|
|
|
|
|
|
|
|
| 316 |
else:
|
| 317 |
+
# Single tensor output
|
| 318 |
+
return output * scale_factor
|
| 319 |
return hook
|
| 320 |
|
| 321 |
# Apply hooks and log what's being disabled
|