1ForrestW1
/

gta1-endpoint

Model card Files Files and versions

Forrest Wargo commited on Oct 20, 2025

Commit

eccbc24

·

1 Parent(s): 222ced7

fixing vllms

Files changed (1) hide show

handler.py +4 -0

handler.py CHANGED Viewed

@@ -59,6 +59,8 @@ class EndpointHandler:
                 pass
         # Auto-detect tensor parallel size from visible devices
         visible = os.environ.get("CUDA_VISIBLE_DEVICES")
         if visible and visible.strip():
             try:
@@ -80,6 +82,8 @@ class EndpointHandler:
             pipeline_parallel_size=1,
             gpu_memory_utilization=0.95,
             dtype="auto",
             trust_remote_code=True,
         )
         self.processor = AutoProcessor.from_pretrained(

                 pass
         # Auto-detect tensor parallel size from visible devices
+        # Some server environments require 'fork' for worker processes
+        os.environ.setdefault("VLLM_WORKER_MULTIPROC_METHOD", "fork")
         visible = os.environ.get("CUDA_VISIBLE_DEVICES")
         if visible and visible.strip():
             try:
             pipeline_parallel_size=1,
             gpu_memory_utilization=0.95,
             dtype="auto",
+            distributed_executor_backend="mp",
+            enforce_eager=True,
             trust_remote_code=True,
         )
         self.processor = AutoProcessor.from_pretrained(