Spaces:

visualisable-ai
/

api

Paused

gary-boon Claude Opus 4.5 commited on Dec 14, 2025

Commit

62525b2

1 Parent(s): 9080f28

Add recommended_dtype to model configs

Each model now specifies its recommended dtype:
- codegen-350m: fp16
- code-llama-7b: fp16
- devstral-small: bf16 (required for Mistral models)

Model loader now uses recommended_dtype from config when
TORCH_DTYPE env var is not explicitly set. This ensures
Devstral automatically loads with bf16 without requiring
manual configuration.

Priority: TORCH_DTYPE env > model config > device default

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

Files changed (2) hide show

backend/model_config.py +7 -3
backend/model_service.py +12 -2

backend/model_config.py CHANGED Viewed

@@ -22,6 +22,7 @@ class ModelConfig(TypedDict):
     requires_gpu: bool
     min_vram_gb: float
     min_ram_gb: float
 # Supported models registry
@@ -39,7 +40,8 @@ SUPPORTED_MODELS: Dict[str, ModelConfig] = {
         "attention_type": "multi_head",
         "requires_gpu": False,
         "min_vram_gb": 2.0,
-        "min_ram_gb": 4.0
     },
     "code-llama-7b": {
         "hf_path": "codellama/CodeLlama-7b-hf",
@@ -54,7 +56,8 @@ SUPPORTED_MODELS: Dict[str, ModelConfig] = {
         "attention_type": "grouped_query",
         "requires_gpu": True,  # Strongly recommended for usable performance
         "min_vram_gb": 14.0,   # FP16 requires ~14GB VRAM
-        "min_ram_gb": 18.0     # FP16 requires ~18GB RAM for CPU fallback
     },
     "devstral-small": {
         "hf_path": "mistralai/Devstral-Small-2507",
@@ -69,7 +72,8 @@ SUPPORTED_MODELS: Dict[str, ModelConfig] = {
         "attention_type": "grouped_query",
         "requires_gpu": True,  # BF16 required, GPU strongly recommended
         "min_vram_gb": 48.0,   # BF16 requires ~48GB VRAM
-        "min_ram_gb": 96.0     # BF16 requires ~96GB RAM for CPU fallback
     }
 }

     requires_gpu: bool
     min_vram_gb: float
     min_ram_gb: float
+    recommended_dtype: str  # "fp16", "bf16", or "fp32"
 # Supported models registry
         "attention_type": "multi_head",
         "requires_gpu": False,
         "min_vram_gb": 2.0,
+        "min_ram_gb": 4.0,
+        "recommended_dtype": "fp16"  # fp16 for GPU, fp32 for CPU
     },
     "code-llama-7b": {
         "hf_path": "codellama/CodeLlama-7b-hf",
         "attention_type": "grouped_query",
         "requires_gpu": True,  # Strongly recommended for usable performance
         "min_vram_gb": 14.0,   # FP16 requires ~14GB VRAM
+        "min_ram_gb": 18.0,    # FP16 requires ~18GB RAM for CPU fallback
+        "recommended_dtype": "fp16"
     },
     "devstral-small": {
         "hf_path": "mistralai/Devstral-Small-2507",
         "attention_type": "grouped_query",
         "requires_gpu": True,  # BF16 required, GPU strongly recommended
         "min_vram_gb": 48.0,   # BF16 requires ~48GB VRAM
+        "min_ram_gb": 96.0,    # BF16 requires ~96GB RAM for CPU fallback
+        "recommended_dtype": "bf16"  # Devstral requires bfloat16
     }
 }

backend/model_service.py CHANGED Viewed

@@ -154,8 +154,18 @@ class ModelManager:
                 self.device = torch.device("cpu")
                 device_name = "CPU"
-            # Determine dtype from environment or defaults
             dtype_str = os.environ.get("TORCH_DTYPE", "").lower()
             if dtype_str == "bf16" or dtype_str == "bfloat16":
                 self.dtype = torch.bfloat16
                 dtype_name = "bfloat16"
@@ -166,7 +176,7 @@ class ModelManager:
                 self.dtype = torch.float32
                 dtype_name = "float32"
             elif self.device.type == "cpu":
-                # Default to float32 for CPU
                 self.dtype = torch.float32
                 dtype_name = "float32 (CPU default)"
             else:

                 self.device = torch.device("cpu")
                 device_name = "CPU"
+            # Determine dtype from environment, model config, or defaults
             dtype_str = os.environ.get("TORCH_DTYPE", "").lower()
+            # If not set in env, use model's recommended dtype
+            if not dtype_str:
+                from .model_config import get_model_config
+                model_config = get_model_config(self.model_id)
+                if model_config and "recommended_dtype" in model_config:
+                    dtype_str = model_config["recommended_dtype"]
+                    logger.info(f"Using model's recommended dtype: {dtype_str}")
+            # Parse dtype string to torch dtype
             if dtype_str == "bf16" or dtype_str == "bfloat16":
                 self.dtype = torch.bfloat16
                 dtype_name = "bfloat16"
                 self.dtype = torch.float32
                 dtype_name = "float32"
             elif self.device.type == "cpu":
+                # Default to float32 for CPU (safest)
                 self.dtype = torch.float32
                 dtype_name = "float32 (CPU default)"
             else: