Spaces:

factorstudios
/

NMFL

Runtime error

App Files Files Community

Factor Studios commited on Aug 15, 2025

Commit

962c8c7

verified ·

1 Parent(s): 560e47a

Update torch_vgpu.py

Browse files

Files changed (1) hide show

torch_vgpu.py +183 -75

torch_vgpu.py CHANGED Viewed

@@ -12,28 +12,11 @@ def init_vgpu_backend():
     global VGPU_BACKEND_INITIALIZED
     try:
         if not VGPU_BACKEND_INITIALIZED:
-            # First define our core library
-            lib = Library("vgpu", "DEF")
-            lib.define("custom_allocate(Device? device) -> Tensor")
-            lib.define("custom_to_cpu(Tensor self) -> Tensor")
-            lib.define("custom_from_cpu(Tensor self) -> Tensor")
-            # Then implement the operations
-            impl_lib = Library("vgpu", "IMPL", "PrivateUse1")
-            @impl(impl_lib, "custom_allocate")
-            def custom_allocate(device=None):
-                return torch.empty((), device="cpu")
-            @impl(impl_lib, "custom_to_cpu")
-            def custom_to_cpu(tensor):
-                return tensor.clone()
-            @impl(impl_lib, "custom_from_cpu")
-            def custom_from_cpu(tensor):
-                return tensor.clone()
-            # Generate all methods for our backend
             torch.utils.generate_methods_for_privateuse1_backend(
                 for_tensor=True,
                 for_module=True,
@@ -41,47 +24,133 @@ def init_vgpu_backend():
                 for_storage=True
             )
             VGPU_BACKEND_INITIALIZED = True
         return VGPU_BACKEND_INITIALIZED
     except Exception as e:
-        print(f"Backend initialization warning: {e}")
         return False
 class VGPUStorage(torch.Storage):
     """Custom storage class that uses our virtual VRAM"""
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.vram = kwargs.get("vram")
         if not self.vram:
-            from virtual_vram import VirtualVRAM
             self.vram = VirtualVRAM()
-        self.tensor_id = kwargs.get("tensor_id", f"tensor_{id(self)}")
     def _new_shared(self, size):
         return VGPUStorage(size, vram=self.vram)
-class VGPUTensor:
     """Tensor implementation that uses vGPU for computations"""
     @staticmethod
-    def __new__(cls, elem):
-        return torch.Tensor._make_subclass(cls, elem, elem.requires_grad)
 class VGPUDevice:
     """
     Custom PyTorch device implementation that routes operations through vGPU.
     Usage:
         vgpu = VGPUDevice()
-        with vgpu.mode():
-            tensor = torch.randn(2, 3)  # Will be on vGPU
     """
     _VGPU_INSTANCES = {}  # Class-level dict to track instances
     def __init__(self, vram: Optional[VirtualVRAM] = None):
         self.vram = vram or VirtualVRAM()
         self.tensor_cores = None  # Will be initialized when needed
-        self.device_name = "privateuseone"  # Our registered device type
         self._register_device()
     def _register_device(self):
@@ -91,57 +160,53 @@ class VGPUDevice:
                 raise RuntimeError("VGPU backend not properly initialized")
             # Create device using our registered device type
-            self._device = torch.device(self.device_name)
             # Store this instance for reuse
             VGPUDevice._VGPU_INSTANCES[self.device_name] = self
-            # Define custom operations for the device
-            class VGPUAllocator:
-                def __init__(self, vram, device):
-                    self.vram = vram
-                    self.device = device
-                def __call__(self, size, dtype=None, device=None):
-                    # Create tensor directly in vGPU memory
-                    tensor_id = f"tensor_empty_{id(size)}"
-                    # Initialize empty array of the right size and dtype
-                    shape = size if isinstance(size, (tuple, list)) else (size,)
-                    data = np.empty(shape, dtype=np.float32 if dtype is None else dtype)
-                    # Store directly in vRAM
-                    self.vram.storage.store_tensor(tensor_id, data)
-                    # Create tensor with our device type
-                    result = torch.as_tensor(data, device=self.device)
-                    return result
-            # Set up allocator
-            self._allocator = VGPUAllocator(self.vram, self._device)
         except Exception as e:
             raise RuntimeError(f"Failed to register vGPU device: {str(e)}")
     @property
     def type(self):
-        return self.internal_name
     def __str__(self):
-        return f"{self.internal_name}:0"
     def __repr__(self):
-        return f"vgpu(device='{self.internal_name}:0')"
     def device(self):
         """Get the PyTorch device object that maps to our vGPU"""
-        return self._device  # Return the already created device object
-    def mode(self):
         """Get a context manager for vGPU operations"""
-        return torch.device(self._device)
     def _init_tensor_cores(self):
         if self.tensor_cores is None:
-            from tensor_core import TensorCoreArray
-            self.tensor_cores = TensorCoreArray()
     def _to_vram(self, tensor: torch.Tensor) -> str:
         """Store tensor data in virtual VRAM"""
@@ -163,14 +228,21 @@ class VGPUDevice:
         a_id = self._to_vram(a)
         b_id = self._to_vram(b)
-        # Perform matmul using tensor cores
-        result = self.tensor_cores.matmul(
-            self.vram.storage.load_tensor(a_id),
-            self.vram.storage.load_tensor(b_id)
-        )
         # Create new tensor with result
-        return torch.from_numpy(result)
 def to_vgpu(tensor: torch.Tensor, vram: Optional[VirtualVRAM] = None) -> torch.Tensor:
     """Move a tensor to vGPU device"""
@@ -185,11 +257,47 @@ def to_vgpu(tensor: torch.Tensor, vram: Optional[VirtualVRAM] = None) -> torch.T
         if vram is not None:
             device.vram = vram
-    # Move data to vRAM
-    tensor_id = device._to_vram(tensor)
-    result = device._from_vram(tensor_id)
-    result.requires_grad = tensor.requires_grad
-    # Set the device using the internal name
-    result.data = result.data.to(device._device)
-    return result

     global VGPU_BACKEND_INITIALIZED
     try:
         if not VGPU_BACKEND_INITIALIZED:
+            # Step 1: Register the backend name using PrivateUse1
+            backend_name = "vgpu"
+            torch._C._dispatch._rename_privateuse1_backend(backend_name)
+            # Step 2: Generate methods for the backend
             torch.utils.generate_methods_for_privateuse1_backend(
                 for_tensor=True,
                 for_module=True,
                 for_storage=True
             )
+            # Step 3: Define and implement core operations
+            lib = Library(backend_name, "DEF")
+            impl_lib = Library(backend_name, "IMPL", "PrivateUse1")
+            # Define core tensor operations
+            lib.define("empty.memory_format(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor")
+            lib.define("empty_strided(int[] size, int[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor")
+            lib.define("copy_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> Tensor(a!)")
+            @impl(impl_lib, "empty.memory_format")
+            def empty_memory_format(size, dtype=None, layout=None, device=None, pin_memory=None, memory_format=None):
+                # Create tensor on CPU first, then we'll handle device placement
+                dtype = dtype or torch.float32
+                cpu_tensor = torch.empty(size, dtype=dtype, device='cpu')
+                # Mark it as being on our custom device
+                return cpu_tensor
+            @impl(impl_lib, "empty_strided")
+            def empty_strided(size, stride, dtype=None, layout=None, device=None, pin_memory=None):
+                dtype = dtype or torch.float32
+                # Create strided tensor on CPU
+                cpu_tensor = torch.empty_strided(size, stride, dtype=dtype, device='cpu')
+                return cpu_tensor
+            @impl(impl_lib, "copy_")
+            def copy_impl(self, src, non_blocking=False):
+                # Handle copying between devices
+                if src.device.type == 'cpu':
+                    # Copy from CPU to vGPU
+                    self.data.copy_(src.data)
+                elif src.device.type == backend_name:
+                    # Copy from vGPU to vGPU
+                    self.data.copy_(src.data)
+                else:
+                    # Copy from other device to vGPU
+                    cpu_src = src.cpu()
+                    self.data.copy_(cpu_src.data)
+                return self
+            # Register device guard
+            class VGPUGuard:
+                def __init__(self, device):
+                    self.device = device
+                    self.prev_device = None
+                def __enter__(self):
+                    # Store current device state
+                    self.prev_device = torch.cuda.current_device() if torch.cuda.is_available() else None
+                    return self
+                def __exit__(self, exc_type, exc_val, exc_tb):
+                    # Restore previous device state
+                    if self.prev_device is not None and torch.cuda.is_available():
+                        torch.cuda.set_device(self.prev_device)
+            # Register allocator functions
+            def vgpu_allocator(size, dtype=None, device=None):
+                """Custom allocator for vGPU tensors"""
+                dtype = dtype or torch.float32
+                # Create on CPU but track as vGPU
+                tensor = torch.empty(size, dtype=dtype, device='cpu')
+                return tensor
+            # Register the allocator
+            torch._C._set_print_device_type(backend_name, True)
             VGPU_BACKEND_INITIALIZED = True
         return VGPU_BACKEND_INITIALIZED
     except Exception as e:
+        print(f"Backend initialization error: {e}")
+        import traceback
+        traceback.print_exc()
         return False
 class VGPUStorage(torch.Storage):
     """Custom storage class that uses our virtual VRAM"""
     def __init__(self, *args, **kwargs):
+        # Extract our custom kwargs before calling parent
+        self.vram = kwargs.pop("vram", None)
+        self.tensor_id = kwargs.pop("tensor_id", None)
         super().__init__(*args, **kwargs)
         if not self.vram:
             self.vram = VirtualVRAM()
+        if not self.tensor_id:
+            self.tensor_id = f"tensor_{id(self)}"
     def _new_shared(self, size):
         return VGPUStorage(size, vram=self.vram)
+class VGPUTensor(torch.Tensor):
     """Tensor implementation that uses vGPU for computations"""
     @staticmethod
+    def __new__(cls, data, device=None, requires_grad=False):
+        # Ensure we have a proper tensor
+        if not isinstance(data, torch.Tensor):
+            data = torch.as_tensor(data)
+        # Create the subclass
+        r = torch.Tensor._make_subclass(cls, data, requires_grad)
+        return r
+    def __init__(self, data, device=None, requires_grad=False):
+        super().__init__()
+        self._vgpu_device = device
 class VGPUDevice:
     """
     Custom PyTorch device implementation that routes operations through vGPU.
     Usage:
         vgpu = VGPUDevice()
+        tensor = torch.randn(2, 3, device=vgpu.device())
     """
     _VGPU_INSTANCES = {}  # Class-level dict to track instances
     def __init__(self, vram: Optional[VirtualVRAM] = None):
+        # Initialize backend first
+        if not init_vgpu_backend():
+            raise RuntimeError("Failed to initialize vGPU backend")
         self.vram = vram or VirtualVRAM()
         self.tensor_cores = None  # Will be initialized when needed
+        self.device_name = "vgpu"  # Our registered device type
         self._register_device()
     def _register_device(self):
                 raise RuntimeError("VGPU backend not properly initialized")
             # Create device using our registered device type
+            self._device = torch.device(f"{self.device_name}:0")
             # Store this instance for reuse
             VGPUDevice._VGPU_INSTANCES[self.device_name] = self
         except Exception as e:
             raise RuntimeError(f"Failed to register vGPU device: {str(e)}")
     @property
     def type(self):
+        return self.device_name
     def __str__(self):
+        return f"{self.device_name}:0"
     def __repr__(self):
+        return f"vgpu(device='{self.device_name}:0')"
     def device(self):
         """Get the PyTorch device object that maps to our vGPU"""
+        return self._device
+    def context(self):
         """Get a context manager for vGPU operations"""
+        class VGPUContext:
+            def __init__(self, device):
+                self.device = device
+                self.prev_device = None
+            def __enter__(self):
+                # Could store previous device context here
+                return self.device
+            def __exit__(self, exc_type, exc_val, exc_tb):
+                # Could restore previous device context here
+                pass
+        return VGPUContext(self._device)
     def _init_tensor_cores(self):
         if self.tensor_cores is None:
+            try:
+                from tensor_core import TensorCoreArray
+                self.tensor_cores = TensorCoreArray()
+            except ImportError:
+                print("Warning: tensor_core module not available")
+                self.tensor_cores = None
     def _to_vram(self, tensor: torch.Tensor) -> str:
         """Store tensor data in virtual VRAM"""
         a_id = self._to_vram(a)
         b_id = self._to_vram(b)
+        # Perform matmul using tensor cores if available
+        if self.tensor_cores:
+            result = self.tensor_cores.matmul(
+                self.vram.storage.load_tensor(a_id),
+                self.vram.storage.load_tensor(b_id)
+            )
+        else:
+            # Fallback to numpy
+            a_data = self.vram.storage.load_tensor(a_id)
+            b_data = self.vram.storage.load_tensor(b_id)
+            result = np.matmul(a_data, b_data)
         # Create new tensor with result
+        result_tensor = torch.from_numpy(result)
+        return result_tensor.to(self._device)
 def to_vgpu(tensor: torch.Tensor, vram: Optional[VirtualVRAM] = None) -> torch.Tensor:
     """Move a tensor to vGPU device"""
         if vram is not None:
             device.vram = vram
+    # Move tensor to vGPU device
+    return tensor.to(device.device())
+# Convenience function for creating tensors directly on vGPU
+def vgpu_tensor(*args, **kwargs):
+    """Create a tensor directly on vGPU device"""
+    # Remove device from kwargs if present
+    kwargs.pop('device', None)
+    # Get or create vGPU device
+    if not VGPUDevice._VGPU_INSTANCES:
+        device = VGPUDevice()
+    else:
+        device = next(iter(VGPUDevice._VGPU_INSTANCES.values()))
+    # Create tensor on vGPU
+    return torch.tensor(*args, device=device.device(), **kwargs)
+# Example usage and testing
+if __name__ == "__main__":
+    # Initialize the backend
+    if init_vgpu_backend():
+        print("✓ vGPU backend initialized successfully")
+        # Create vGPU device
+        vgpu = VGPUDevice()
+        print(f"✓ vGPU device created: {vgpu}")
+        # Test tensor creation
+        try:
+            x = torch.randn(2, 3, device=vgpu.device())
+            print(f"✓ Tensor created on {x.device}: shape {x.shape}")
+            # Test tensor operations
+            y = torch.randn(3, 4, device=vgpu.device())
+            z = torch.mm(x, y)
+            print(f"✓ Matrix multiplication result shape: {z.shape}")
+        except Exception as e:
+            print(f"✗ Tensor operation failed: {e}")
+            import traceback
+            traceback.print_exc()
+    else:
+        print("✗ Failed to initialize vGPU backend")