Spaces:

factorstudios
/

NMFL

Runtime error

App Files Files Community

Factor Studios commited on Aug 15, 2025

Commit

e64ebad

verified ·

1 Parent(s): 962c8c7

Update torch_vgpu.py

Browse files

Files changed (1) hide show

torch_vgpu.py +263 -228

torch_vgpu.py CHANGED Viewed

@@ -3,301 +3,336 @@ from torch.library import Library, impl
 from typing import Optional, Union, Tuple
 import numpy as np
 from virtual_vram import VirtualVRAM
 # Global flag for backend initialization
 VGPU_BACKEND_INITIALIZED = False
 def init_vgpu_backend():
     """Initialize the vGPU backend. Must be called before creating any VGPUDevice instances."""
     global VGPU_BACKEND_INITIALIZED
     try:
         if not VGPU_BACKEND_INITIALIZED:
-            # Step 1: Register the backend name using PrivateUse1
             backend_name = "vgpu"
-            torch._C._dispatch._rename_privateuse1_backend(backend_name)
-            # Step 2: Generate methods for the backend
-            torch.utils.generate_methods_for_privateuse1_backend(
-                for_tensor=True,
-                for_module=True,
-                for_packed_sequence=True,
-                for_storage=True
-            )
-            # Step 3: Define and implement core operations
-            lib = Library(backend_name, "DEF")
-            impl_lib = Library(backend_name, "IMPL", "PrivateUse1")
-            # Define core tensor operations
-            lib.define("empty.memory_format(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor")
-            lib.define("empty_strided(int[] size, int[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor")
-            lib.define("copy_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> Tensor(a!)")
-            @impl(impl_lib, "empty.memory_format")
-            def empty_memory_format(size, dtype=None, layout=None, device=None, pin_memory=None, memory_format=None):
-                # Create tensor on CPU first, then we'll handle device placement
-                dtype = dtype or torch.float32
-                cpu_tensor = torch.empty(size, dtype=dtype, device='cpu')
-                # Mark it as being on our custom device
-                return cpu_tensor
-            @impl(impl_lib, "empty_strided")
-            def empty_strided(size, stride, dtype=None, layout=None, device=None, pin_memory=None):
-                dtype = dtype or torch.float32
-                # Create strided tensor on CPU
-                cpu_tensor = torch.empty_strided(size, stride, dtype=dtype, device='cpu')
-                return cpu_tensor
-            @impl(impl_lib, "copy_")
-            def copy_impl(self, src, non_blocking=False):
-                # Handle copying between devices
-                if src.device.type == 'cpu':
-                    # Copy from CPU to vGPU
-                    self.data.copy_(src.data)
-                elif src.device.type == backend_name:
-                    # Copy from vGPU to vGPU
-                    self.data.copy_(src.data)
-                else:
-                    # Copy from other device to vGPU
-                    cpu_src = src.cpu()
-                    self.data.copy_(cpu_src.data)
-                return self
-            # Register device guard
-            class VGPUGuard:
-                def __init__(self, device):
-                    self.device = device
-                    self.prev_device = None
-                def __enter__(self):
-                    # Store current device state
-                    self.prev_device = torch.cuda.current_device() if torch.cuda.is_available() else None
                     return self
-                def __exit__(self, exc_type, exc_val, exc_tb):
-                    # Restore previous device state
-                    if self.prev_device is not None and torch.cuda.is_available():
-                        torch.cuda.set_device(self.prev_device)
-            # Register allocator functions
-            def vgpu_allocator(size, dtype=None, device=None):
-                """Custom allocator for vGPU tensors"""
-                dtype = dtype or torch.float32
-                # Create on CPU but track as vGPU
-                tensor = torch.empty(size, dtype=dtype, device='cpu')
-                return tensor
-            # Register the allocator
-            torch._C._set_print_device_type(backend_name, True)
             VGPU_BACKEND_INITIALIZED = True
         return VGPU_BACKEND_INITIALIZED
     except Exception as e:
         print(f"Backend initialization error: {e}")
         import traceback
         traceback.print_exc()
         return False
-class VGPUStorage(torch.Storage):
-    """Custom storage class that uses our virtual VRAM"""
-    def __init__(self, *args, **kwargs):
-        # Extract our custom kwargs before calling parent
-        self.vram = kwargs.pop("vram", None)
-        self.tensor_id = kwargs.pop("tensor_id", None)
-        super().__init__(*args, **kwargs)
-        if not self.vram:
-            self.vram = VirtualVRAM()
-        if not self.tensor_id:
-            self.tensor_id = f"tensor_{id(self)}"
-    def _new_shared(self, size):
-        return VGPUStorage(size, vram=self.vram)
 class VGPUTensor(torch.Tensor):
-    """Tensor implementation that uses vGPU for computations"""
-    @staticmethod
-    def __new__(cls, data, device=None, requires_grad=False):
-        # Ensure we have a proper tensor
         if not isinstance(data, torch.Tensor):
             data = torch.as_tensor(data)
-        # Create the subclass
-        r = torch.Tensor._make_subclass(cls, data, requires_grad)
         return r
-    def __init__(self, data, device=None, requires_grad=False):
-        super().__init__()
-        self._vgpu_device = device
 class VGPUDevice:
     """
     Custom PyTorch device implementation that routes operations through vGPU.
     Usage:
         vgpu = VGPUDevice()
-        tensor = torch.randn(2, 3, device=vgpu.device())
     """
-    _VGPU_INSTANCES = {}  # Class-level dict to track instances
-    def __init__(self, vram: Optional[VirtualVRAM] = None):
-        # Initialize backend first
         if not init_vgpu_backend():
-            raise RuntimeError("Failed to initialize vGPU backend")
         self.vram = vram or VirtualVRAM()
-        self.tensor_cores = None  # Will be initialized when needed
-        self.device_name = "vgpu"  # Our registered device type
-        self._register_device()
-    def _register_device(self):
-        """Register vGPU device using PyTorch's device system"""
-        try:
-            if not VGPU_BACKEND_INITIALIZED:
-                raise RuntimeError("VGPU backend not properly initialized")
-            # Create device using our registered device type
-            self._device = torch.device(f"{self.device_name}:0")
-            # Store this instance for reuse
-            VGPUDevice._VGPU_INSTANCES[self.device_name] = self
-        except Exception as e:
-            raise RuntimeError(f"Failed to register vGPU device: {str(e)}")
-    @property
-    def type(self):
-        return self.device_name
-    def __str__(self):
-        return f"{self.device_name}:0"
-    def __repr__(self):
-        return f"vgpu(device='{self.device_name}:0')"
     def device(self):
-        """Get the PyTorch device object that maps to our vGPU"""
         return self._device
-    def context(self):
-        """Get a context manager for vGPU operations"""
-        class VGPUContext:
-            def __init__(self, device):
-                self.device = device
-                self.prev_device = None
-            def __enter__(self):
-                # Could store previous device context here
-                return self.device
-            def __exit__(self, exc_type, exc_val, exc_tb):
-                # Could restore previous device context here
-                pass
-        return VGPUContext(self._device)
-    def _init_tensor_cores(self):
-        if self.tensor_cores is None:
-            try:
-                from tensor_core import TensorCoreArray
-                self.tensor_cores = TensorCoreArray()
-            except ImportError:
-                print("Warning: tensor_core module not available")
-                self.tensor_cores = None
-    def _to_vram(self, tensor: torch.Tensor) -> str:
-        """Store tensor data in virtual VRAM"""
-        tensor_id = f"tensor_{id(tensor)}"
-        data = tensor.detach().cpu().numpy()
-        self.vram.storage.store_tensor(tensor_id, data)
-        return tensor_id
-    def _from_vram(self, tensor_id: str) -> torch.Tensor:
-        """Retrieve tensor data from virtual VRAM"""
-        data = self.vram.storage.load_tensor(tensor_id)
-        return torch.from_numpy(data)
-    def matmul(self, a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
-        """Matrix multiplication using tensor cores"""
-        self._init_tensor_cores()
-        # Store inputs in VRAM
-        a_id = self._to_vram(a)
-        b_id = self._to_vram(b)
-        # Perform matmul using tensor cores if available
-        if self.tensor_cores:
-            result = self.tensor_cores.matmul(
-                self.vram.storage.load_tensor(a_id),
-                self.vram.storage.load_tensor(b_id)
-            )
         else:
-            # Fallback to numpy
-            a_data = self.vram.storage.load_tensor(a_id)
-            b_data = self.vram.storage.load_tensor(b_id)
-            result = np.matmul(a_data, b_data)
-        # Create new tensor with result
-        result_tensor = torch.from_numpy(result)
-        return result_tensor.to(self._device)
-def to_vgpu(tensor: torch.Tensor, vram: Optional[VirtualVRAM] = None) -> torch.Tensor:
-    """Move a tensor to vGPU device"""
-    if not isinstance(tensor, torch.Tensor):
-        tensor = torch.tensor(tensor)
-    # Get or create vGPU device
-    if not VGPUDevice._VGPU_INSTANCES:
-        device = VGPUDevice(vram)
-    else:
-        device = next(iter(VGPUDevice._VGPU_INSTANCES.values()))
-        if vram is not None:
-            device.vram = vram
-    # Move tensor to vGPU device
-    return tensor.to(device.device())
-# Convenience function for creating tensors directly on vGPU
-def vgpu_tensor(*args, **kwargs):
-    """Create a tensor directly on vGPU device"""
-    # Remove device from kwargs if present
-    kwargs.pop('device', None)
-    # Get or create vGPU device
     if not VGPUDevice._VGPU_INSTANCES:
-        device = VGPUDevice()
     else:
         device = next(iter(VGPUDevice._VGPU_INSTANCES.values()))
-    # Create tensor on vGPU
-    return torch.tensor(*args, device=device.device(), **kwargs)
 # Example usage and testing
 if __name__ == "__main__":
-    # Initialize the backend
     if init_vgpu_backend():
-        print("✓ vGPU backend initialized successfully")
-        # Create vGPU device
         vgpu = VGPUDevice()
         print(f"✓ vGPU device created: {vgpu}")
         # Test tensor creation
-        try:
-            x = torch.randn(2, 3, device=vgpu.device())
-            print(f"✓ Tensor created on {x.device}: shape {x.shape}")
-            # Test tensor operations
-            y = torch.randn(3, 4, device=vgpu.device())
-            z = torch.mm(x, y)
-            print(f"✓ Matrix multiplication result shape: {z.shape}")
-        except Exception as e:
-            print(f"✗ Tensor operation failed: {e}")
-            import traceback
-            traceback.print_exc()
-    else:
-        print("✗ Failed to initialize vGPU backend")

 from typing import Optional, Union, Tuple
 import numpy as np
 from virtual_vram import VirtualVRAM
+import warnings
 # Global flag for backend initialization
 VGPU_BACKEND_INITIALIZED = False
+def get_pytorch_version():
+    """Get PyTorch version as tuple for comparison"""
+    version = torch.__version__.split('.')
+    return tuple(int(x.split('+')[0]) for x in version[:2])
 def init_vgpu_backend():
     """Initialize the vGPU backend. Must be called before creating any VGPUDevice instances."""
     global VGPU_BACKEND_INITIALIZED
     try:
         if not VGPU_BACKEND_INITIALIZED:
+            pytorch_version = get_pytorch_version()
             backend_name = "vgpu"
+            # Method 1: Try modern PyTorch approach (2.0+)
+            if pytorch_version >= (2, 0):
+                try:
+                    # Try the new API first
+                    if hasattr(torch._C, '_dispatch') and hasattr(torch._C._dispatch, '_rename_privateuse1_backend'):
+                        torch._C._dispatch._rename_privateuse1_backend(backend_name)
+                    elif hasattr(torch, '_register_privateuse1_backend'):
+                        # Alternative API in some PyTorch versions
+                        torch._register_privateuse1_backend(backend_name)
+                    else:
+                        # Fallback: use torch.utils approach
+                        raise AttributeError("Modern API not available")
+                    # Generate methods for the backend
+                    torch.utils.generate_methods_for_privateuse1_backend(
+                        for_tensor=True,
+                        for_module=True,
+                        for_packed_sequence=True,
+                        for_storage=True
+                    )
+                    backend_registered = True
+                except (AttributeError, RuntimeError) as e:
+                    print(f"Modern backend registration failed: {e}")
+                    backend_registered = False
+            else:
+                backend_registered = False
+            # Method 2: Fallback approach for older PyTorch or when modern approach fails
+            if not backend_registered:
+                print(f"Using fallback registration method for PyTorch {torch.__version__}")
+                # Create a mock device type that behaves like a custom device
+                class VGPUDeviceType:
+                    def __init__(self, name):
+                        self.name = name
+                        self.index = 0
+                    def __str__(self):
+                        return f"{self.name}:{self.index}"
+                    def __repr__(self):
+                        return f"device(type='{self.name}', index={self.index})"
+                # Register our device type manually
+                backend_name = "vgpu"
+            # Define core operations using Library
+            try:
+                lib = Library(backend_name, "DEF")
+                impl_lib = Library(backend_name, "IMPL", "PrivateUse1")
+                # Define essential operations
+                lib.define("empty.memory_format(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor")
+                lib.define("copy_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> Tensor(a!)")
+                lib.define("add.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor")
+                lib.define("mm(Tensor self, Tensor mat2) -> Tensor")
+                @impl(impl_lib, "empty.memory_format")
+                def empty_memory_format(size, dtype=None, layout=None, device=None, pin_memory=None, memory_format=None):
+                    dtype = dtype or torch.float32
+                    # Create on CPU but track metadata for vGPU
+                    result = torch.empty(size, dtype=dtype, device='cpu')
+                    return result
+                @impl(impl_lib, "copy_")
+                def copy_impl(self, src, non_blocking=False):
+                    if isinstance(src, torch.Tensor):
+                        self.data.copy_(src.cpu().data if hasattr(src, 'cpu') else src.data)
                     return self
+                @impl(impl_lib, "add.Tensor")
+                def add_tensor(self, other, alpha=1):
+                    # Perform add on CPU then return result
+                    self_cpu = self.cpu() if hasattr(self, 'cpu') else self
+                    other_cpu = other.cpu() if hasattr(other, 'cpu') else other
+                    result = torch.add(self_cpu, other_cpu, alpha=alpha)
+                    return result
+                @impl(impl_lib, "mm")
+                def mm_impl(self, mat2):
+                    # Perform matmul on CPU
+                    self_cpu = self.cpu() if hasattr(self, 'cpu') else self
+                    mat2_cpu = mat2.cpu() if hasattr(mat2, 'cpu') else mat2
+                    result = torch.mm(self_cpu, mat2_cpu)
+                    return result
+            except Exception as e:
+                print(f"Library registration warning: {e}")
+                # Continue without library registration
             VGPU_BACKEND_INITIALIZED = True
         return VGPU_BACKEND_INITIALIZED
     except Exception as e:
         print(f"Backend initialization error: {e}")
         import traceback
         traceback.print_exc()
         return False
+class VGPUDeviceMock:
+    """Mock device class that behaves like a PyTorch device"""
+    def __init__(self, device_name="vgpu", index=0):
+        self.type = device_name
+        self.index = index
+    def __str__(self):
+        return f"{self.type}:{self.index}"
+    def __repr__(self):
+        return f"device(type='{self.type}', index={self.index})"
+    def __eq__(self, other):
+        if isinstance(other, (VGPUDeviceMock, torch.device)):
+            return str(self) == str(other)
+        return str(self) == str(other)
+    def __hash__(self):
+        return hash(str(self))
 class VGPUTensor(torch.Tensor):
+    """Custom tensor class that handles vGPU operations"""
+    @staticmethod
+    def __new__(cls, data, device=None, requires_grad=False, vram=None):
         if not isinstance(data, torch.Tensor):
             data = torch.as_tensor(data)
+        # Create tensor on CPU but track vGPU device
+        r = torch.Tensor._make_subclass(cls, data.cpu(), requires_grad)
+        r._vgpu_device = device
+        r._vram = vram
         return r
+    @property
+    def device(self):
+        """Return the vGPU device"""
+        return self._vgpu_device or VGPUDeviceMock()
+    def cpu(self):
+        """Move tensor to CPU"""
+        cpu_tensor = torch.Tensor(self.data)
+        cpu_tensor.requires_grad = self.requires_grad
+        return cpu_tensor
+    def to(self, device, **kwargs):
+        """Handle device transfers"""
+        if isinstance(device, (VGPUDeviceMock, str)) and ('vgpu' in str(device)):
+            # Stay on vGPU
+            return self
+        else:
+            # Move to requested device
+            return self.data.to(device, **kwargs)
 class VGPUDevice:
     """
     Custom PyTorch device implementation that routes operations through vGPU.
     Usage:
         vgpu = VGPUDevice()
+        tensor = vgpu.tensor([1, 2, 3])  # Create tensor on vGPU
     """
+    _VGPU_INSTANCES = {}
+    def __init__(self, vram: Optional[VirtualVRAM] = None, device_index: int = 0):
+        # Initialize backend
         if not init_vgpu_backend():
+            print("Warning: Backend initialization incomplete, using fallback mode")
         self.vram = vram or VirtualVRAM()
+        self.tensor_cores = None
+        self.device_name = "vgpu"
+        self.device_index = device_index
+        self._device = VGPUDeviceMock(self.device_name, device_index)
+        # Store this instance
+        VGPUDevice._VGPU_INSTANCES[f"{self.device_name}:{device_index}"] = self
+        print(f"✓ vGPU device initialized: {self._device}")
     def device(self):
+        """Get the device object"""
         return self._device
+    def tensor(self, data, **kwargs):
+        """Create a tensor on this vGPU device"""
+        kwargs.pop('device', None)  # Remove device if specified
+        if isinstance(data, torch.Tensor):
+            result = VGPUTensor(data, device=self._device, vram=self.vram, **kwargs)
         else:
+            cpu_tensor = torch.tensor(data, **kwargs)
+            result = VGPUTensor(cpu_tensor, device=self._device, vram=self.vram)
+        # Store in vRAM
+        self._to_vram(result)
+        return result
+    def randn(self, *size, **kwargs):
+        """Create random tensor on vGPU"""
+        kwargs.pop('device', None)
+        cpu_tensor = torch.randn(*size, **kwargs)
+        result = VGPUTensor(cpu_tensor, device=self._device, vram=self.vram)
+        self._to_vram(result)
+        return result
+    def zeros(self, *size, **kwargs):
+        """Create zero tensor on vGPU"""
+        kwargs.pop('device', None)
+        cpu_tensor = torch.zeros(*size, **kwargs)
+        result = VGPUTensor(cpu_tensor, device=self._device, vram=self.vram)
+        self._to_vram(result)
+        return result
+    def ones(self, *size, **kwargs):
+        """Create ones tensor on vGPU"""
+        kwargs.pop('device', None)
+        cpu_tensor = torch.ones(*size, **kwargs)
+        result = VGPUTensor(cpu_tensor, device=self._device, vram=self.vram)
+        self._to_vram(result)
+        return result
+    def empty(self, *size, **kwargs):
+        """Create empty tensor on vGPU"""
+        kwargs.pop('device', None)
+        cpu_tensor = torch.empty(*size, **kwargs)
+        result = VGPUTensor(cpu_tensor, device=self._device, vram=self.vram)
+        self._to_vram(result)
+        return result
+    def _to_vram(self, tensor):
+        """Store tensor in vRAM"""
+        if hasattr(tensor, '_vram') and tensor._vram:
+            tensor_id = f"tensor_{id(tensor)}"
+            data = tensor.detach().cpu().numpy()
+            tensor._vram.storage.store_tensor(tensor_id, data)
+            tensor._vram_id = tensor_id
+    def _from_vram(self, tensor):
+        """Load tensor from vRAM"""
+        if hasattr(tensor, '_vram_id') and hasattr(tensor, '_vram'):
+            data = tensor._vram.storage.load_tensor(tensor._vram_id)
+            return torch.from_numpy(data)
+        return tensor.cpu()
+    def __str__(self):
+        return str(self._device)
+    def __repr__(self):
+        return f"VGPUDevice({self._device})"
+# Convenience functions
+def to_vgpu(tensor, vram=None):
+    """Move tensor to vGPU"""
     if not VGPUDevice._VGPU_INSTANCES:
+        device = VGPUDevice(vram)
     else:
         device = next(iter(VGPUDevice._VGPU_INSTANCES.values()))
+    if isinstance(tensor, VGPUTensor):
+        return tensor
+    result = VGPUTensor(tensor, device=device.device(), vram=device.vram)
+    device._to_vram(result)
+    return result
+# Monkey patch torch functions to handle vGPU device strings
+original_device = torch.device
+def patched_device(device_spec):
+    """Patched device function to handle vGPU devices"""
+    if isinstance(device_spec, str) and device_spec.startswith('vgpu'):
+        parts = device_spec.split(':')
+        device_name = parts[0]
+        device_index = int(parts[1]) if len(parts) > 1 else 0
+        return VGPUDeviceMock(device_name, device_index)
+    return original_device(device_spec)
+# Apply the patch
+torch.device = patched_device
 # Example usage and testing
 if __name__ == "__main__":
+    print(f"PyTorch version: {torch.__version__}")
+    # Test backend initialization
     if init_vgpu_backend():
+        print("✓ vGPU backend initialized")
+    else:
+        print("! vGPU backend initialization incomplete, using fallback")
+    # Create vGPU device
+    try:
         vgpu = VGPUDevice()
         print(f"✓ vGPU device created: {vgpu}")
         # Test tensor creation
+        x = vgpu.randn(2, 3)
+        print(f"✓ Random tensor created on {x.device}: shape {x.shape}")
+        y = vgpu.ones(3, 4)
+        print(f"✓ Ones tensor created on {y.device}: shape {y.shape}")
+        # Test basic operations
+        z = x.data @ y.data  # Matrix multiply on CPU data
+        print(f"✓ Matrix multiplication result shape: {z.shape}")
+        # Test device string parsing
+        device_str = torch.device("vgpu:0")
+        print(f"✓ Device string parsing: {device_str}")
+    except Exception as e:
+        print(f"✗ Test failed: {e}")
+        import traceback
+        traceback.print_exc()