Spaces:

tech-doc
/

SkinProAI

Sleeping

cgoodmaker Claude Opus 4.6 commited on Feb 23

Commit

1a97904

1 Parent(s): 5157ba3

Force MCP tool models to CPU to avoid GPU VRAM contention with MedGemma

- MCP subprocess passes SKINPRO_TOOL_DEVICE=cpu env var
- MONET and ConvNeXt respect this override to stay on CPU
- Prevents OOM/hang on T4 (16GB) where MedGemma already uses ~8GB
- Fix max_length warning: set both max_length=None and max_new_tokens=400 on generation_config

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (3) hide show

models/convnext_classifier.py +6 -2
models/medgemma_agent.py +9 -2
models/monet_tool.py +6 -2

models/convnext_classifier.py CHANGED Viewed

@@ -3,6 +3,7 @@ ConvNeXt Classifier Tool - Skin lesion classification using ConvNeXt + MONET fea
 Loads seed42_fold0.pt checkpoint and performs classification.
 """
 import torch
 import torch.nn as nn
 import numpy as np
@@ -173,8 +174,11 @@ class ConvNeXtClassifier:
         if self.loaded:
             return
-        # Determine device
-        if self.device is None:
             if torch.cuda.is_available():
                 self.device = "cuda"
             elif torch.backends.mps.is_available():

 Loads seed42_fold0.pt checkpoint and performs classification.
 """
+import os
 import torch
 import torch.nn as nn
 import numpy as np
         if self.loaded:
             return
+        # Determine device (respect SKINPRO_TOOL_DEVICE override for GPU sharing)
+        forced = os.environ.get("SKINPRO_TOOL_DEVICE")
+        if forced:
+            self.device = forced
+        elif self.device is None:
             if torch.cuda.is_available():
                 self.device = "cuda"
             elif torch.backends.mps.is_available():

models/medgemma_agent.py CHANGED Viewed

@@ -76,6 +76,10 @@ class MCPClient:
         """Spawn the MCP server subprocess and complete the handshake."""
         root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
         server_script = os.path.join(root, "mcp_server", "server.py")
         self._process = subprocess.Popen(
             [sys.executable, server_script],  # use same venv Python (has all ML packages)
             stdin=subprocess.PIPE,
@@ -83,6 +87,7 @@ class MCPClient:
             stderr=subprocess.PIPE,
             text=True,
             bufsize=1,  # line-buffered
         )
         self._initialize()
@@ -246,10 +251,12 @@ class MedGemmaAgent:
             tokenizer=processor.tokenizer,
         )
-        # Clear default max_length from generation_config to avoid conflict
         # with max_new_tokens passed at inference time
         if hasattr(self.pipe.model, "generation_config"):
-            self.pipe.model.generation_config.max_length = None
         self._print(f"Model loaded in {time.time() - start:.1f}s")
         self.loaded = True

         """Spawn the MCP server subprocess and complete the handshake."""
         root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
         server_script = os.path.join(root, "mcp_server", "server.py")
+        # Force MCP tool models (MONET, ConvNeXt) onto CPU so they don't
+        # compete with MedGemma for GPU VRAM (T4 has only 16 GB).
+        env = os.environ.copy()
+        env["SKINPRO_TOOL_DEVICE"] = "cpu"
         self._process = subprocess.Popen(
             [sys.executable, server_script],  # use same venv Python (has all ML packages)
             stdin=subprocess.PIPE,
             stderr=subprocess.PIPE,
             text=True,
             bufsize=1,  # line-buffered
+            env=env,
         )
         self._initialize()
             tokenizer=processor.tokenizer,
         )
+        # Clear default max_length (20) from generation_config to avoid conflict
         # with max_new_tokens passed at inference time
         if hasattr(self.pipe.model, "generation_config"):
+            gc = self.pipe.model.generation_config
+            gc.max_length = None
+            gc.max_new_tokens = 400
         self._print(f"Model loaded in {time.time() - start:.1f}s")
         self.loaded = True

models/monet_tool.py CHANGED Viewed

@@ -3,6 +3,7 @@ MONET Tool - Skin lesion feature extraction using MONET model
 Correct implementation based on MONET tutorial: automatic_concept_annotation.ipynb
 """
 import torch
 import torch.nn.functional as F
 import numpy as np
@@ -90,8 +91,11 @@ class MonetTool:
         if self.loaded:
             return
-        # Determine device
-        if self.device is None:
             if torch.cuda.is_available():
                 self.device = "cuda:0"
             elif torch.backends.mps.is_available():

 Correct implementation based on MONET tutorial: automatic_concept_annotation.ipynb
 """
+import os
 import torch
 import torch.nn.functional as F
 import numpy as np
         if self.loaded:
             return
+        # Determine device (respect SKINPRO_TOOL_DEVICE override for GPU sharing)
+        forced = os.environ.get("SKINPRO_TOOL_DEVICE")
+        if forced:
+            self.device = forced
+        elif self.device is None:
             if torch.cuda.is_available():
                 self.device = "cuda:0"
             elif torch.backends.mps.is_available():