Spaces:

visualisable-ai
/

api

Paused

gary-boon Claude commited on Oct 31, 2025

Commit

cd300ee

1 Parent(s): 7dd568f

Fix ablation study for Code Llama compatibility

Fixed AttributeError in ablation generation when using Code Llama:
- CodeGen uses: n_layer, n_head
- Llama/Code Llama uses: num_hidden_layers, num_attention_heads

Changes:
- Added config attribute compatibility variables at start of ablation method
- Replaced hardcoded config.n_layer and config.n_head with compatibility variables
- Now handles both CodeGen and Llama model architectures

This fixes the 500 error in Ablation Study when using Code Llama 7B.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show

backend/model_service.py +11 -4

backend/model_service.py CHANGED Viewed

@@ -270,18 +270,25 @@ class ModelManager:
             disabled_attention = {int(k) if isinstance(k, str) else k: v for k, v in disabled_attention_raw.items()}
             disabled_ffn = set(disabled_components.get('ffn_layers', [])) if disabled_components else set()
             # Debug logging
             logger.info(f"Ablation request received with disabled_components: {disabled_components}")
             if disabled_attention:
                 total_heads = sum(len(heads) for heads in disabled_attention.values())
                 logger.info(f"Total attention heads to disable: {total_heads}")
             # Tokenize input
             inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
             generated_tokens = []
             token_probs = []
             token_strings = []
             # Create hooks for ablation
             handles = []
@@ -337,7 +344,7 @@ class ModelManager:
             # Apply hooks and log what's being disabled
             total_attention_disabled = 0
-            for layer_idx in range(self.model.config.n_layer):
                 if layer_idx in disabled_layers:
                     # Disable entire layer
                     handle = self.model.transformer.h[layer_idx].register_forward_hook(create_layer_hook())
@@ -362,7 +369,7 @@ class ModelManager:
             # Log summary
             if total_attention_disabled > 0:
-                logger.info(f"Total attention heads disabled: {total_attention_disabled} / {self.model.config.n_layer * self.model.config.n_head}")
             # Generation loop - wrapped in try-finally to ensure hooks are removed
             try:

             disabled_attention = {int(k) if isinstance(k, str) else k: v for k, v in disabled_attention_raw.items()}
             disabled_ffn = set(disabled_components.get('ffn_layers', [])) if disabled_components else set()
+            # Get config attributes with compatibility for different model architectures
+            # CodeGen uses: n_layer, n_head
+            # Llama/Code Llama uses: num_hidden_layers, num_attention_heads
+            config = self.model.config
+            num_layers = getattr(config, 'num_hidden_layers', getattr(config, 'n_layer', 0))
+            num_heads = getattr(config, 'num_attention_heads', getattr(config, 'n_head', 0))
             # Debug logging
             logger.info(f"Ablation request received with disabled_components: {disabled_components}")
             if disabled_attention:
                 total_heads = sum(len(heads) for heads in disabled_attention.values())
                 logger.info(f"Total attention heads to disable: {total_heads}")
             # Tokenize input
             inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
             generated_tokens = []
             token_probs = []
             token_strings = []
             # Create hooks for ablation
             handles = []
             # Apply hooks and log what's being disabled
             total_attention_disabled = 0
+            for layer_idx in range(num_layers):
                 if layer_idx in disabled_layers:
                     # Disable entire layer
                     handle = self.model.transformer.h[layer_idx].register_forward_hook(create_layer_hook())
             # Log summary
             if total_attention_disabled > 0:
+                logger.info(f"Total attention heads disabled: {total_attention_disabled} / {num_layers * num_heads}")
             # Generation loop - wrapped in try-finally to ensure hooks are removed
             try: