Spaces:
Sleeping
Sleeping
gary-boon
Claude
commited on
Commit
·
7dd568f
1
Parent(s):
ed40a9a
Fix model info endpoint for Code Llama compatibility
Browse filesFixed AttributeError when accessing model config attributes:
- CodeGen uses: n_layer, n_head, n_embd, n_positions
- Llama/Code Llama uses: num_hidden_layers, num_attention_heads, hidden_size, max_position_embeddings
Changes:
- Use getattr() with fallbacks to handle both config schemas
- Return dynamic model name from manager.model_name instead of hardcoded value
- Handle different activation function and layer norm attribute names
This fixes the 500 Internal Server Error on GPU backend when loading Code Llama 7B.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
- backend/model_service.py +20 -12
backend/model_service.py
CHANGED
|
@@ -855,30 +855,38 @@ async def model_info(authenticated: bool = Depends(verify_api_key)):
|
|
| 855 |
"""Get detailed information about the loaded model"""
|
| 856 |
if not manager.model:
|
| 857 |
raise HTTPException(status_code=503, detail="Model not loaded")
|
| 858 |
-
|
| 859 |
config = manager.model.config
|
| 860 |
-
|
| 861 |
# Calculate total parameters
|
| 862 |
total_params = sum(p.numel() for p in manager.model.parameters())
|
| 863 |
trainable_params = sum(p.numel() for p in manager.model.parameters() if p.requires_grad)
|
| 864 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 865 |
return {
|
| 866 |
-
"name":
|
| 867 |
"type": config.model_type,
|
| 868 |
"totalParams": total_params,
|
| 869 |
"trainableParams": trainable_params,
|
| 870 |
-
"layers":
|
| 871 |
-
"heads":
|
| 872 |
-
"hiddenSize":
|
| 873 |
"vocabSize": config.vocab_size,
|
| 874 |
-
"maxPositions":
|
| 875 |
"architecture": manager.model.__class__.__name__,
|
| 876 |
"device": str(manager.device),
|
| 877 |
"dtype": str(next(manager.model.parameters()).dtype),
|
| 878 |
"accessible": [
|
| 879 |
f"Token probabilities (all {config.vocab_size})",
|
| 880 |
-
f"Attention weights ({
|
| 881 |
-
f"Hidden states (all {
|
| 882 |
"Logits before softmax",
|
| 883 |
"Token embeddings",
|
| 884 |
"Position embeddings (RoPE)",
|
|
@@ -888,8 +896,8 @@ async def model_info(authenticated: bool = Depends(verify_api_key)):
|
|
| 888 |
"Activation functions (GELU)"
|
| 889 |
],
|
| 890 |
"config": {
|
| 891 |
-
"activation_function": config
|
| 892 |
-
"layer_norm_epsilon": config
|
| 893 |
"tie_word_embeddings": config.tie_word_embeddings,
|
| 894 |
"rotary_dim": config.rotary_dim if hasattr(config, 'rotary_dim') else None,
|
| 895 |
"use_cache": config.use_cache
|
|
|
|
| 855 |
"""Get detailed information about the loaded model"""
|
| 856 |
if not manager.model:
|
| 857 |
raise HTTPException(status_code=503, detail="Model not loaded")
|
| 858 |
+
|
| 859 |
config = manager.model.config
|
| 860 |
+
|
| 861 |
# Calculate total parameters
|
| 862 |
total_params = sum(p.numel() for p in manager.model.parameters())
|
| 863 |
trainable_params = sum(p.numel() for p in manager.model.parameters() if p.requires_grad)
|
| 864 |
+
|
| 865 |
+
# Handle different config attribute names across model architectures
|
| 866 |
+
# CodeGen uses: n_layer, n_head, n_embd, n_positions
|
| 867 |
+
# Llama/Code Llama uses: num_hidden_layers, num_attention_heads, hidden_size, max_position_embeddings
|
| 868 |
+
num_layers = getattr(config, 'num_hidden_layers', getattr(config, 'n_layer', 0))
|
| 869 |
+
num_heads = getattr(config, 'num_attention_heads', getattr(config, 'n_head', 0))
|
| 870 |
+
hidden_size = getattr(config, 'hidden_size', getattr(config, 'n_embd', 0))
|
| 871 |
+
max_positions = getattr(config, 'max_position_embeddings', getattr(config, 'n_positions', 0))
|
| 872 |
+
|
| 873 |
return {
|
| 874 |
+
"name": manager.model_name,
|
| 875 |
"type": config.model_type,
|
| 876 |
"totalParams": total_params,
|
| 877 |
"trainableParams": trainable_params,
|
| 878 |
+
"layers": num_layers,
|
| 879 |
+
"heads": num_heads,
|
| 880 |
+
"hiddenSize": hidden_size,
|
| 881 |
"vocabSize": config.vocab_size,
|
| 882 |
+
"maxPositions": max_positions,
|
| 883 |
"architecture": manager.model.__class__.__name__,
|
| 884 |
"device": str(manager.device),
|
| 885 |
"dtype": str(next(manager.model.parameters()).dtype),
|
| 886 |
"accessible": [
|
| 887 |
f"Token probabilities (all {config.vocab_size})",
|
| 888 |
+
f"Attention weights ({num_layers} layers × {num_heads} heads = {num_layers * num_heads} patterns)",
|
| 889 |
+
f"Hidden states (all {num_layers} layers)",
|
| 890 |
"Logits before softmax",
|
| 891 |
"Token embeddings",
|
| 892 |
"Position embeddings (RoPE)",
|
|
|
|
| 896 |
"Activation functions (GELU)"
|
| 897 |
],
|
| 898 |
"config": {
|
| 899 |
+
"activation_function": getattr(config, 'activation_function', getattr(config, 'hidden_act', 'unknown')),
|
| 900 |
+
"layer_norm_epsilon": getattr(config, 'layer_norm_epsilon', getattr(config, 'rms_norm_eps', 1e-5)),
|
| 901 |
"tie_word_embeddings": config.tie_word_embeddings,
|
| 902 |
"rotary_dim": config.rotary_dim if hasattr(config, 'rotary_dim') else None,
|
| 903 |
"use_cache": config.use_cache
|