Commit ·
da343a7
1
Parent(s): c376e14
Fix MCP subprocess deadlock: use stderr=None instead of PIPE
Browse filesWhen the MCP subprocess downloads models (MONET, sentence-transformers)
on first call, progress output fills the 64KB stderr pipe buffer. The
subprocess blocks trying to write stderr while the main process blocks
reading stdout — classic pipe deadlock. Fix by inheriting parent stderr
so download progress prints directly to the container logs.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- models/medgemma_agent.py +7 -18
models/medgemma_agent.py
CHANGED
|
@@ -38,24 +38,12 @@ class MCPClient:
|
|
| 38 |
self._process.stdin.write(line)
|
| 39 |
self._process.stdin.flush()
|
| 40 |
|
| 41 |
-
def _drain_stderr(self):
|
| 42 |
-
"""Read any available stderr from the subprocess and print it."""
|
| 43 |
-
if self._process and self._process.stderr:
|
| 44 |
-
import select
|
| 45 |
-
while select.select([self._process.stderr], [], [], 0)[0]:
|
| 46 |
-
line = self._process.stderr.readline()
|
| 47 |
-
if line:
|
| 48 |
-
print(f"[MCP stderr] {line.strip()}", flush=True)
|
| 49 |
-
else:
|
| 50 |
-
break
|
| 51 |
-
|
| 52 |
def _recv(self, timeout: int = 300) -> dict:
|
| 53 |
import select
|
| 54 |
deadline = time.time() + timeout
|
| 55 |
while True:
|
| 56 |
remaining = deadline - time.time()
|
| 57 |
if remaining <= 0:
|
| 58 |
-
self._drain_stderr()
|
| 59 |
raise RuntimeError(
|
| 60 |
f"MCP server did not respond within {timeout}s"
|
| 61 |
)
|
|
@@ -65,14 +53,12 @@ class MCPClient:
|
|
| 65 |
if not ready:
|
| 66 |
# Check if subprocess died
|
| 67 |
if self._process.poll() is not None:
|
| 68 |
-
self._drain_stderr()
|
| 69 |
raise RuntimeError(
|
| 70 |
f"MCP server exited with code {self._process.returncode}"
|
| 71 |
)
|
| 72 |
continue
|
| 73 |
line = self._process.stdout.readline()
|
| 74 |
if not line:
|
| 75 |
-
self._drain_stderr()
|
| 76 |
raise RuntimeError("MCP server closed connection unexpectedly")
|
| 77 |
line = line.strip()
|
| 78 |
if not line:
|
|
@@ -111,13 +97,17 @@ class MCPClient:
|
|
| 111 |
# compete with MedGemma for GPU VRAM (T4 has only 16 GB).
|
| 112 |
env = os.environ.copy()
|
| 113 |
env["SKINPRO_TOOL_DEVICE"] = "cpu"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
self._process = subprocess.Popen(
|
| 115 |
-
[sys.executable, server_script],
|
| 116 |
stdin=subprocess.PIPE,
|
| 117 |
stdout=subprocess.PIPE,
|
| 118 |
-
stderr=
|
| 119 |
text=True,
|
| 120 |
-
bufsize=1,
|
| 121 |
env=env,
|
| 122 |
)
|
| 123 |
self._initialize()
|
|
@@ -321,7 +311,6 @@ class MedGemmaAgent:
|
|
| 321 |
self.mcp_client = MCPClient()
|
| 322 |
self.mcp_client.start()
|
| 323 |
self._print("MCP server started successfully")
|
| 324 |
-
self.mcp_client._drain_stderr()
|
| 325 |
self.tools_loaded = True
|
| 326 |
|
| 327 |
def _multi_pass_visual_exam(self, image, question: Optional[str] = None) -> Generator[str, None, Dict[str, str]]:
|
|
|
|
| 38 |
self._process.stdin.write(line)
|
| 39 |
self._process.stdin.flush()
|
| 40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
def _recv(self, timeout: int = 300) -> dict:
|
| 42 |
import select
|
| 43 |
deadline = time.time() + timeout
|
| 44 |
while True:
|
| 45 |
remaining = deadline - time.time()
|
| 46 |
if remaining <= 0:
|
|
|
|
| 47 |
raise RuntimeError(
|
| 48 |
f"MCP server did not respond within {timeout}s"
|
| 49 |
)
|
|
|
|
| 53 |
if not ready:
|
| 54 |
# Check if subprocess died
|
| 55 |
if self._process.poll() is not None:
|
|
|
|
| 56 |
raise RuntimeError(
|
| 57 |
f"MCP server exited with code {self._process.returncode}"
|
| 58 |
)
|
| 59 |
continue
|
| 60 |
line = self._process.stdout.readline()
|
| 61 |
if not line:
|
|
|
|
| 62 |
raise RuntimeError("MCP server closed connection unexpectedly")
|
| 63 |
line = line.strip()
|
| 64 |
if not line:
|
|
|
|
| 97 |
# compete with MedGemma for GPU VRAM (T4 has only 16 GB).
|
| 98 |
env = os.environ.copy()
|
| 99 |
env["SKINPRO_TOOL_DEVICE"] = "cpu"
|
| 100 |
+
# stderr inherits parent's stderr (not PIPE) to avoid deadlock:
|
| 101 |
+
# when the subprocess downloads models, progress output fills the
|
| 102 |
+
# 64KB pipe buffer and blocks the subprocess while the main process
|
| 103 |
+
# is blocked waiting on stdout — classic subprocess deadlock.
|
| 104 |
self._process = subprocess.Popen(
|
| 105 |
+
[sys.executable, server_script],
|
| 106 |
stdin=subprocess.PIPE,
|
| 107 |
stdout=subprocess.PIPE,
|
| 108 |
+
stderr=None, # inherit parent stderr
|
| 109 |
text=True,
|
| 110 |
+
bufsize=1,
|
| 111 |
env=env,
|
| 112 |
)
|
| 113 |
self._initialize()
|
|
|
|
| 311 |
self.mcp_client = MCPClient()
|
| 312 |
self.mcp_client.start()
|
| 313 |
self._print("MCP server started successfully")
|
|
|
|
| 314 |
self.tools_loaded = True
|
| 315 |
|
| 316 |
def _multi_pass_visual_exam(self, image, question: Optional[str] = None) -> Generator[str, None, Dict[str, str]]:
|