Spaces:

factorstudios
/

NMFL

Runtime error

App Files Files Community

Factor Studios commited on Aug 15, 2025

Commit

319082b

verified ·

1 Parent(s): 8aea612

Update torch_vgpu.py

Browse files

Files changed (1) hide show

torch_vgpu.py +82 -93

torch_vgpu.py CHANGED Viewed

@@ -4,64 +4,43 @@ from typing import Optional, Union, Tuple
 import numpy as np
 from virtual_vram import VirtualVRAM
-# Global variables for backend state
 VGPU_BACKEND_INITIALIZED = False
-CURRENT_VRAM = None  # Global reference to current vRAM manager
-def set_current_vram(vram):
-    """Set the current vRAM manager globally"""
-    global CURRENT_VRAM
-    CURRENT_VRAM = vram
-def get_current_vram():
-    """Get the current vRAM manager"""
-    return CURRENT_VRAM
-def to_vgpu(tensor: torch.Tensor, vram: Optional[VirtualVRAM] = None) -> torch.Tensor:
-    """Move a tensor to vGPU memory"""
-    if vram is None:
-        vram = get_current_vram()
-        if vram is None:
-            raise RuntimeError("No vRAM manager available. Initialize VGPUDevice first.")
-    # Get data and store in vRAM
-    cpu_data = tensor.detach().cpu().numpy()
-    tensor_id = f"tensor_{id(tensor)}"
-    vram.store(tensor_id, cpu_data)
-    # Create vGPU tensor
-    device = torch.device("privateuseone")
-    vgpu_storage = VGPUStorage(
-        cpu_data.size,
-        vram=vram,
-        tensor_id=tensor_id
-    )
-    vgpu_tensor = torch.tensor(
-        [],
-        device=device,
-        requires_grad=tensor.requires_grad
-    )
-    vgpu_tensor.set_(vgpu_storage)
-    return vgpu_tensor
 def init_vgpu_backend():
     """Initialize the vGPU backend. Must be called before creating any VGPUDevice instances."""
     global VGPU_BACKEND_INITIALIZED
     try:
         if not VGPU_BACKEND_INITIALIZED:
-            # Create library for custom ops
             lib = Library("vgpu", "DEF")
-            lib.define("custom_from_cpu(Tensor x) -> Tensor")
-            impl_lib = Library("vgpu", "IMPL")
             @impl(impl_lib, "custom_from_cpu")
-            def custom_from_cpu(x):
-                """Copy tensor to our vGPU memory"""
-                return x.clone()
-            # Set initialization flag
             VGPU_BACKEND_INITIALIZED = True
         return VGPU_BACKEND_INITIALIZED
@@ -89,17 +68,6 @@ class VGPUTensor:
     def __new__(cls, elem):
         return torch.Tensor._make_subclass(cls, elem, elem.requires_grad)
-from contextlib import contextmanager
-# Custom allocator for vGPU tensors
-class VGPUAllocator:
-    def __init__(self, vram):
-        self.vram = vram
-    def __call__(self, size, dtype=None, device=None):
-        cpu_tensor = torch.empty(size, dtype=dtype, device='cpu')
-        return to_vgpu(cpu_tensor, self.vram)
 class VGPUDevice:
     """
     Custom PyTorch device implementation that routes operations through vGPU.
@@ -111,49 +79,69 @@ class VGPUDevice:
     _VGPU_INSTANCES = {}  # Class-level dict to track instances
     def __init__(self, vram: Optional[VirtualVRAM] = None):
-        """Initialize a vGPU device with optional vRAM manager"""
         self.vram = vram or VirtualVRAM()
-        self.device_name = "privateuseone"  # Our device type
-        self._init_device()
-    def _init_device(self):
-        """Initialize the device backend and settings"""
-        if not VGPU_BACKEND_INITIALIZED:
-            raise RuntimeError("VGPU backend not properly initialized")
-        # Setup device and global vRAM
-        self._device = torch.device(self.device_name)
-        set_current_vram(self.vram)
-        # Register instance
-        VGPUDevice._VGPU_INSTANCES[self.device_name] = self
-        # Setup allocator
-        self._allocator = VGPUAllocator(self.vram)
-    def device(self) -> torch.device:
-        """Get the PyTorch device object for this vGPU"""
-        return self._device
-    @contextmanager
-    def mode(self):
-        """Context manager for using this device as the default"""
-        prev_device = torch.device("cpu")
-        try:
-            prev_device = torch.cuda.current_device() if torch.cuda.is_available() else prev_device
-            torch.set_device(self._device)
-            yield
-        finally:
-            torch.set_device(prev_device)
     def __str__(self):
-        """String representation of the device"""
-        return f"{self.device_name}:0"
     def __repr__(self):
-        """Detailed string representation"""
-        return f"vgpu(device='{self.device_name}:0')"
         return tensor_id
     def _from_vram(self, tensor_id: str) -> torch.Tensor:
@@ -199,3 +187,4 @@ def to_vgpu(tensor: torch.Tensor, vram: Optional[VirtualVRAM] = None) -> torch.T
     # Set the device using the internal name
     result.data = result.data.to(device._device)
     return result

 import numpy as np
 from virtual_vram import VirtualVRAM
+# Global flag for backend initialization
 VGPU_BACKEND_INITIALIZED = False
 def init_vgpu_backend():
     """Initialize the vGPU backend. Must be called before creating any VGPUDevice instances."""
     global VGPU_BACKEND_INITIALIZED
     try:
         if not VGPU_BACKEND_INITIALIZED:
+            # First define our core library
             lib = Library("vgpu", "DEF")
+            lib.define("custom_allocate(Device? device) -> Tensor")
+            lib.define("custom_to_cpu(Tensor self) -> Tensor")
+            lib.define("custom_from_cpu(Tensor self) -> Tensor")
+            # Then implement the operations
+            impl_lib = Library("vgpu", "IMPL", "PrivateUse1")
+            @impl(impl_lib, "custom_allocate")
+            def custom_allocate(device=None):
+                return torch.empty((), device="cpu")
+            @impl(impl_lib, "custom_to_cpu")
+            def custom_to_cpu(tensor):
+                return tensor.clone()
             @impl(impl_lib, "custom_from_cpu")
+            def custom_from_cpu(tensor):
+                return tensor.clone()
+            # Generate all methods for our backend
+            torch.utils.generate_methods_for_privateuse1_backend(
+                for_tensor=True,
+                for_module=True,
+                for_packed_sequence=True,
+                for_storage=True
+            )
             VGPU_BACKEND_INITIALIZED = True
         return VGPU_BACKEND_INITIALIZED
     def __new__(cls, elem):
         return torch.Tensor._make_subclass(cls, elem, elem.requires_grad)
 class VGPUDevice:
     """
     Custom PyTorch device implementation that routes operations through vGPU.
     _VGPU_INSTANCES = {}  # Class-level dict to track instances
     def __init__(self, vram: Optional[VirtualVRAM] = None):
         self.vram = vram or VirtualVRAM()
+        self.tensor_cores = None  # Will be initialized when needed
+        self.device_name = "privateuseone"  # Our registered device type
+        self._register_device()
+    def _register_device(self):
+        """Register vGPU device using PyTorch's device system"""
+        try:
+            if not VGPU_BACKEND_INITIALIZED:
+                raise RuntimeError("VGPU backend not properly initialized")
+            # Create device using our registered device type
+            self._device = torch.device(self.device_name)
+            # Store this instance for reuse
+            VGPUDevice._VGPU_INSTANCES[self.device_name] = self
+            # Define custom operations for the device
+            class VGPUAllocator:
+                def __init__(self, vram, device):
+                    self.vram = vram
+                    self.device = device
+                def __call__(self, size, dtype=None, device=None):
+                    # Create tensor on CPU first
+                    cpu_tensor = torch.empty(size, dtype=dtype, device='cpu')
+                    # Move to vGPU storage
+                    return to_vgpu(cpu_tensor, self.vram)
+            # Set up allocator
+            self._allocator = VGPUAllocator(self.vram, self._device)
+        except Exception as e:
+            raise RuntimeError(f"Failed to register vGPU device: {str(e)}")
+    @property
+    def type(self):
+        return self.internal_name
     def __str__(self):
+        return f"{self.internal_name}:0"
     def __repr__(self):
+        return f"vgpu(device='{self.internal_name}:0')"
+    def device(self):
+        """Get the PyTorch device object that maps to our vGPU"""
+        return self._device  # Return the already created device object
+    def mode(self):
+        """Get a context manager for vGPU operations"""
+        return torch.device(self._device)
+    def _init_tensor_cores(self):
+        if self.tensor_cores is None:
+            from tensor_core import TensorCoreArray
+            self.tensor_cores = TensorCoreArray()
+    def _to_vram(self, tensor: torch.Tensor) -> str:
+        """Store tensor data in virtual VRAM"""
+        tensor_id = f"tensor_{id(tensor)}"
+        data = tensor.detach().cpu().numpy()
+        self.vram.storage.store_tensor(tensor_id, data)
         return tensor_id
     def _from_vram(self, tensor_id: str) -> torch.Tensor:
     # Set the device using the internal name
     result.data = result.data.to(device._device)
     return result