cgoodmaker Claude Opus 4.6 commited on
Commit
da343a7
·
1 Parent(s): c376e14

Fix MCP subprocess deadlock: use stderr=None instead of PIPE

Browse files

When the MCP subprocess downloads models (MONET, sentence-transformers)
on first call, progress output fills the 64KB stderr pipe buffer. The
subprocess blocks trying to write stderr while the main process blocks
reading stdout — classic pipe deadlock. Fix by inheriting parent stderr
so download progress prints directly to the container logs.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. models/medgemma_agent.py +7 -18
models/medgemma_agent.py CHANGED
@@ -38,24 +38,12 @@ class MCPClient:
38
  self._process.stdin.write(line)
39
  self._process.stdin.flush()
40
 
41
- def _drain_stderr(self):
42
- """Read any available stderr from the subprocess and print it."""
43
- if self._process and self._process.stderr:
44
- import select
45
- while select.select([self._process.stderr], [], [], 0)[0]:
46
- line = self._process.stderr.readline()
47
- if line:
48
- print(f"[MCP stderr] {line.strip()}", flush=True)
49
- else:
50
- break
51
-
52
  def _recv(self, timeout: int = 300) -> dict:
53
  import select
54
  deadline = time.time() + timeout
55
  while True:
56
  remaining = deadline - time.time()
57
  if remaining <= 0:
58
- self._drain_stderr()
59
  raise RuntimeError(
60
  f"MCP server did not respond within {timeout}s"
61
  )
@@ -65,14 +53,12 @@ class MCPClient:
65
  if not ready:
66
  # Check if subprocess died
67
  if self._process.poll() is not None:
68
- self._drain_stderr()
69
  raise RuntimeError(
70
  f"MCP server exited with code {self._process.returncode}"
71
  )
72
  continue
73
  line = self._process.stdout.readline()
74
  if not line:
75
- self._drain_stderr()
76
  raise RuntimeError("MCP server closed connection unexpectedly")
77
  line = line.strip()
78
  if not line:
@@ -111,13 +97,17 @@ class MCPClient:
111
  # compete with MedGemma for GPU VRAM (T4 has only 16 GB).
112
  env = os.environ.copy()
113
  env["SKINPRO_TOOL_DEVICE"] = "cpu"
 
 
 
 
114
  self._process = subprocess.Popen(
115
- [sys.executable, server_script], # use same venv Python (has all ML packages)
116
  stdin=subprocess.PIPE,
117
  stdout=subprocess.PIPE,
118
- stderr=subprocess.PIPE,
119
  text=True,
120
- bufsize=1, # line-buffered
121
  env=env,
122
  )
123
  self._initialize()
@@ -321,7 +311,6 @@ class MedGemmaAgent:
321
  self.mcp_client = MCPClient()
322
  self.mcp_client.start()
323
  self._print("MCP server started successfully")
324
- self.mcp_client._drain_stderr()
325
  self.tools_loaded = True
326
 
327
  def _multi_pass_visual_exam(self, image, question: Optional[str] = None) -> Generator[str, None, Dict[str, str]]:
 
38
  self._process.stdin.write(line)
39
  self._process.stdin.flush()
40
 
 
 
 
 
 
 
 
 
 
 
 
41
  def _recv(self, timeout: int = 300) -> dict:
42
  import select
43
  deadline = time.time() + timeout
44
  while True:
45
  remaining = deadline - time.time()
46
  if remaining <= 0:
 
47
  raise RuntimeError(
48
  f"MCP server did not respond within {timeout}s"
49
  )
 
53
  if not ready:
54
  # Check if subprocess died
55
  if self._process.poll() is not None:
 
56
  raise RuntimeError(
57
  f"MCP server exited with code {self._process.returncode}"
58
  )
59
  continue
60
  line = self._process.stdout.readline()
61
  if not line:
 
62
  raise RuntimeError("MCP server closed connection unexpectedly")
63
  line = line.strip()
64
  if not line:
 
97
  # compete with MedGemma for GPU VRAM (T4 has only 16 GB).
98
  env = os.environ.copy()
99
  env["SKINPRO_TOOL_DEVICE"] = "cpu"
100
+ # stderr inherits parent's stderr (not PIPE) to avoid deadlock:
101
+ # when the subprocess downloads models, progress output fills the
102
+ # 64KB pipe buffer and blocks the subprocess while the main process
103
+ # is blocked waiting on stdout — classic subprocess deadlock.
104
  self._process = subprocess.Popen(
105
+ [sys.executable, server_script],
106
  stdin=subprocess.PIPE,
107
  stdout=subprocess.PIPE,
108
+ stderr=None, # inherit parent stderr
109
  text=True,
110
+ bufsize=1,
111
  env=env,
112
  )
113
  self._initialize()
 
311
  self.mcp_client = MCPClient()
312
  self.mcp_client.start()
313
  self._print("MCP server started successfully")
 
314
  self.tools_loaded = True
315
 
316
  def _multi_pass_visual_exam(self, image, question: Optional[str] = None) -> Generator[str, None, Dict[str, str]]: