Spaces:
Runtime error
Runtime error
Factor Studios
commited on
Update torch_vgpu.py
Browse files- torch_vgpu.py +125 -16
torch_vgpu.py
CHANGED
|
@@ -194,7 +194,7 @@ class VGPUDevice:
|
|
| 194 |
self.tensor_cores = None
|
| 195 |
self.device_name = "vgpu"
|
| 196 |
self.device_index = device_index
|
| 197 |
-
self._device =
|
| 198 |
|
| 199 |
# Store this instance
|
| 200 |
VGPUDevice._VGPU_INSTANCES[f"{self.device_name}:{device_index}"] = self
|
|
@@ -287,20 +287,116 @@ def to_vgpu(tensor, vram=None):
|
|
| 287 |
device._to_vram(result)
|
| 288 |
return result
|
| 289 |
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
def patched_device(device_spec):
|
| 294 |
-
"""Patched device function to handle vGPU devices"""
|
| 295 |
if isinstance(device_spec, str) and device_spec.startswith('vgpu'):
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 301 |
|
| 302 |
-
#
|
| 303 |
-
torch.device
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
|
| 305 |
# Example usage and testing
|
| 306 |
if __name__ == "__main__":
|
|
@@ -328,9 +424,22 @@ if __name__ == "__main__":
|
|
| 328 |
z = x.data @ y.data # Matrix multiply on CPU data
|
| 329 |
print(f"β Matrix multiplication result shape: {z.shape}")
|
| 330 |
|
| 331 |
-
# Test device string parsing
|
| 332 |
-
|
| 333 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 334 |
|
| 335 |
except Exception as e:
|
| 336 |
print(f"β Test failed: {e}")
|
|
|
|
| 194 |
self.tensor_cores = None
|
| 195 |
self.device_name = "vgpu"
|
| 196 |
self.device_index = device_index
|
| 197 |
+
self._device = torch.device(f"{self.device_name}:{device_index}")
|
| 198 |
|
| 199 |
# Store this instance
|
| 200 |
VGPUDevice._VGPU_INSTANCES[f"{self.device_name}:{device_index}"] = self
|
|
|
|
| 287 |
device._to_vram(result)
|
| 288 |
return result
|
| 289 |
|
| 290 |
+
def create_compatible_device_map(device_spec):
|
| 291 |
+
"""Create a device map that's compatible with Transformers"""
|
|
|
|
|
|
|
|
|
|
| 292 |
if isinstance(device_spec, str) and device_spec.startswith('vgpu'):
|
| 293 |
+
# For model loading, use CPU but track vGPU intent
|
| 294 |
+
return "cpu"
|
| 295 |
+
return device_spec
|
| 296 |
+
|
| 297 |
+
def load_model_to_vgpu(model_name_or_path, vgpu_device=None, **kwargs):
|
| 298 |
+
"""
|
| 299 |
+
Load a Transformers model and move it to vGPU after loading.
|
| 300 |
+
This avoids the isinstance() issues during model loading.
|
| 301 |
+
"""
|
| 302 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 303 |
+
|
| 304 |
+
# Remove device-related kwargs to avoid conflicts
|
| 305 |
+
device_map = kwargs.pop('device_map', None)
|
| 306 |
+
device = kwargs.pop('device', None)
|
| 307 |
+
|
| 308 |
+
# Load model on CPU first
|
| 309 |
+
print(f"Loading {model_name_or_path} on CPU first...")
|
| 310 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 311 |
+
model_name_or_path,
|
| 312 |
+
device_map="cpu",
|
| 313 |
+
**kwargs
|
| 314 |
+
)
|
| 315 |
+
|
| 316 |
+
# Get or create vGPU device
|
| 317 |
+
if vgpu_device is None:
|
| 318 |
+
if not VGPUDevice._VGPU_INSTANCES:
|
| 319 |
+
vgpu_device = VGPUDevice()
|
| 320 |
+
else:
|
| 321 |
+
vgpu_device = next(iter(VGPUDevice._VGPU_INSTANCES.values()))
|
| 322 |
+
|
| 323 |
+
# Move model to vGPU (this will be handled by our custom tensor class)
|
| 324 |
+
print(f"Moving model to {vgpu_device}...")
|
| 325 |
+
# Note: The actual movement is handled by our VGPUTensor class
|
| 326 |
+
# For now, we'll keep it on CPU but track it as vGPU
|
| 327 |
+
model._vgpu_device = vgpu_device
|
| 328 |
+
|
| 329 |
+
return model
|
| 330 |
+
|
| 331 |
+
def create_vgpu_pipeline(model_name_or_path, task="text-generation", vgpu_device=None, **kwargs):
|
| 332 |
+
"""
|
| 333 |
+
Create a Transformers pipeline that uses vGPU.
|
| 334 |
+
This wrapper handles the compatibility issues.
|
| 335 |
+
"""
|
| 336 |
+
from transformers import pipeline
|
| 337 |
+
|
| 338 |
+
# Load model using our compatible method
|
| 339 |
+
model = load_model_to_vgpu(model_name_or_path, vgpu_device, **kwargs)
|
| 340 |
+
|
| 341 |
+
# Create pipeline with the loaded model
|
| 342 |
+
pipe = pipeline(task, model=model, **kwargs)
|
| 343 |
+
|
| 344 |
+
return pipe
|
| 345 |
|
| 346 |
+
# Create a proper device class that extends torch.device behavior
|
| 347 |
+
class VGPUDeviceWrapper(torch.device):
|
| 348 |
+
"""Extended device class that handles vGPU devices while maintaining torch.device compatibility"""
|
| 349 |
+
|
| 350 |
+
def __new__(cls, device_spec):
|
| 351 |
+
if isinstance(device_spec, str) and device_spec.startswith('vgpu'):
|
| 352 |
+
# Create a CPU device internally but track vGPU info
|
| 353 |
+
parts = device_spec.split(':')
|
| 354 |
+
device_name = parts[0]
|
| 355 |
+
device_index = int(parts[1]) if len(parts) > 1 else 0
|
| 356 |
+
|
| 357 |
+
# Create CPU device as base
|
| 358 |
+
obj = super().__new__(cls, 'cpu')
|
| 359 |
+
obj._vgpu_type = device_name
|
| 360 |
+
obj._vgpu_index = device_index
|
| 361 |
+
obj._is_vgpu = True
|
| 362 |
+
return obj
|
| 363 |
+
else:
|
| 364 |
+
# Regular device creation
|
| 365 |
+
return super().__new__(cls, device_spec)
|
| 366 |
+
|
| 367 |
+
def __init__(self, device_spec):
|
| 368 |
+
# Only initialize if not already done by __new__
|
| 369 |
+
if not hasattr(self, '_is_vgpu'):
|
| 370 |
+
super().__init__()
|
| 371 |
+
self._is_vgpu = False
|
| 372 |
+
|
| 373 |
+
@property
|
| 374 |
+
def type(self):
|
| 375 |
+
if hasattr(self, '_is_vgpu') and self._is_vgpu:
|
| 376 |
+
return self._vgpu_type
|
| 377 |
+
return super().type
|
| 378 |
+
|
| 379 |
+
@property
|
| 380 |
+
def index(self):
|
| 381 |
+
if hasattr(self, '_is_vgpu') and self._is_vgpu:
|
| 382 |
+
return self._vgpu_index
|
| 383 |
+
return super().index
|
| 384 |
+
|
| 385 |
+
def __str__(self):
|
| 386 |
+
if hasattr(self, '_is_vgpu') and self._is_vgpu:
|
| 387 |
+
return f"{self._vgpu_type}:{self._vgpu_index}"
|
| 388 |
+
return super().__str__()
|
| 389 |
+
|
| 390 |
+
def __repr__(self):
|
| 391 |
+
if hasattr(self, '_is_vgpu') and self._is_vgpu:
|
| 392 |
+
return f"device(type='{self._vgpu_type}', index={self._vgpu_index})"
|
| 393 |
+
return super().__repr__()
|
| 394 |
+
|
| 395 |
+
# Store original torch.device
|
| 396 |
+
_original_torch_device = torch.device
|
| 397 |
+
|
| 398 |
+
# Replace torch.device with our wrapper
|
| 399 |
+
torch.device = VGPUDeviceWrapper
|
| 400 |
|
| 401 |
# Example usage and testing
|
| 402 |
if __name__ == "__main__":
|
|
|
|
| 424 |
z = x.data @ y.data # Matrix multiply on CPU data
|
| 425 |
print(f"β Matrix multiplication result shape: {z.shape}")
|
| 426 |
|
| 427 |
+
# Test device string parsing - use a safer approach
|
| 428 |
+
try:
|
| 429 |
+
device_str = torch.device("vgpu:0")
|
| 430 |
+
print(f"β Device string parsing: {device_str}")
|
| 431 |
+
print(f"β Device type check: isinstance(device_str, torch.device) = {isinstance(device_str, torch.device)}")
|
| 432 |
+
except Exception as e:
|
| 433 |
+
print(f"! Device string parsing issue: {e}")
|
| 434 |
+
|
| 435 |
+
# Test compatibility with transformers-style isinstance checks
|
| 436 |
+
cpu_device = torch.device("cpu")
|
| 437 |
+
print(f"β CPU device isinstance check: {isinstance(cpu_device, torch.device)}")
|
| 438 |
+
|
| 439 |
+
vgpu_device = torch.device("vgpu:0")
|
| 440 |
+
print(f"β vGPU device isinstance check: {isinstance(vgpu_device, torch.device)}")
|
| 441 |
+
|
| 442 |
+
print(f"β Device compatibility tests passed")
|
| 443 |
|
| 444 |
except Exception as e:
|
| 445 |
print(f"β Test failed: {e}")
|