Spaces:

factorstudios
/

NMFL

Runtime error

App Files Files Community

Factor Studios commited on Aug 15, 2025

Commit

5cdc76b

verified ·

1 Parent(s): e4541c8

Update torch_vgpu.py

Browse files

Files changed (1) hide show

torch_vgpu.py +201 -200

torch_vgpu.py CHANGED Viewed

@@ -1,200 +1,201 @@
-import torch
-from torch.library import Library, impl
-from typing import Optional, Union, Tuple
-import numpy as np
-from virtual_vram import VirtualVRAM
-# Global variables for backend state
-VGPU_BACKEND_INITIALIZED = False
-CURRENT_VRAM = None  # Global reference to current vRAM manager
-def set_current_vram(vram):
-    """Set the current vRAM manager globally"""
-    global CURRENT_VRAM
-    CURRENT_VRAM = vram
-def get_current_vram():
-    """Get the current vRAM manager"""
-    return CURRENT_VRAM
-def init_vgpu_backend():
-    """Initialize the vGPU backend. Must be called before creating any VGPUDevice instances."""
-    global VGPU_BACKEND_INITIALIZED
-    try:
-        if not VGPU_BACKEND_INITIALIZED:
-            # First define our core library
-            lib = Library("vgpu", "DEF")
-            lib.define("custom_allocate(Device? device) -> Tensor")
-            lib.define("custom_to_cpu(Tensor self) -> Tensor")
-            lib.define("custom_from_cpu(Tensor self) -> Tensor")
-            # Then implement the operations
-            impl_lib = Library("vgpu", "IMPL", "PrivateUse1")
-            @impl(impl_lib, "custom_allocate")
-            def custom_allocate(device=None):
-                return torch.empty((), device="cpu")
-            @impl(impl_lib, "custom_to_cpu")
-            def custom_to_cpu(tensor):
-                return tensor.clone()
-            @impl(impl_lib, "custom_from_cpu")
-            def custom_from_cpu(tensor):
-                return tensor.clone()
-            # Generate all methods for our backend
-            torch.utils.generate_methods_for_privateuse1_backend(
-                for_tensor=True,
-                for_module=True,
-                for_packed_sequence=True,
-                for_storage=True
-            )
-            VGPU_BACKEND_INITIALIZED = True
-        return VGPU_BACKEND_INITIALIZED
-    except Exception as e:
-        print(f"Backend initialization warning: {e}")
-        return False
-class VGPUStorage(torch.Storage):
-    """Custom storage class that uses our virtual VRAM"""
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.vram = kwargs.get("vram")
-        if not self.vram:
-            from virtual_vram import VirtualVRAM
-            self.vram = VirtualVRAM()
-        self.tensor_id = kwargs.get("tensor_id", f"tensor_{id(self)}")
-    def _new_shared(self, size):
-        return VGPUStorage(size, vram=self.vram)
-class VGPUTensor:
-    """Tensor implementation that uses vGPU for computations"""
-    @staticmethod
-    def __new__(cls, elem):
-        return torch.Tensor._make_subclass(cls, elem, elem.requires_grad)
-class VGPUDevice:
-    """
-    Custom PyTorch device implementation that routes operations through vGPU.
-    Usage:
-        vgpu = VGPUDevice()
-        with vgpu.mode():
-            tensor = torch.randn(2, 3)  # Will be on vGPU
-    """
-    _VGPU_INSTANCES = {}  # Class-level dict to track instances
-    def __init__(self, vram: Optional[VirtualVRAM] = None):
-        self.vram = vram or VirtualVRAM()
-        self.tensor_cores = None  # Will be initialized when needed
-        self.device_name = "privateuseone"  # Our registered device type
-        set_current_vram(self.vram)  # Set up global vRAM reference
-        self._register_device()
-    def _register_device(self):
-        """Register vGPU device using PyTorch's device system"""
-        try:
-            if not VGPU_BACKEND_INITIALIZED:
-                raise RuntimeError("VGPU backend not properly initialized")
-            # Create device using our registered device type
-            self._device = torch.device(self.device_name)
-            # Store this instance for reuse
-            VGPUDevice._VGPU_INSTANCES[self.device_name] = self
-            # Define custom operations for the device
-            class VGPUAllocator:
-                def __init__(self, vram, device):
-                    self.vram = vram
-                    self.device = device
-                def __call__(self, size, dtype=None, device=None):
-                    # Create tensor on CPU first
-                    cpu_tensor = torch.empty(size, dtype=dtype, device='cpu')
-                    # Move to vGPU storage
-                    return to_vgpu(cpu_tensor, self.vram)
-            # Set up allocator
-            self._allocator = VGPUAllocator(self.vram, self._device)
-        except Exception as e:
-            raise RuntimeError(f"Failed to register vGPU device: {str(e)}")
-    @property
-    def type(self):
-        return self.internal_name
-    def __str__(self):
-        return f"{self.internal_name}:0"
-    def __repr__(self):
-        return f"vgpu(device='{self.internal_name}:0')"
-    def device(self):
-        """Get the PyTorch device object that maps to our vGPU"""
-        return self._device  # Return the already created device object
-    def mode(self):
-        """Get a context manager for vGPU operations"""
-        return torch.device(self._device)
-    def _init_tensor_cores(self):
-        if self.tensor_cores is None:
-            from tensor_core import TensorCoreArray
-            self.tensor_cores = TensorCoreArray()
-    def _to_vram(self, tensor: torch.Tensor) -> str:
-        """Store tensor data in virtual VRAM"""
-        tensor_id = f"tensor_{id(tensor)}"
-        data = tensor.detach().cpu().numpy()
-        self.vram.storage.store_tensor(tensor_id, data)
-        return tensor_id
-    def _from_vram(self, tensor_id: str) -> torch.Tensor:
-        """Retrieve tensor data from virtual VRAM"""
-        data = self.vram.storage.load_tensor(tensor_id)
-        return torch.from_numpy(data)
-    def matmul(self, a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
-        """Matrix multiplication using tensor cores"""
-        self._init_tensor_cores()
-        # Store inputs in VRAM
-        a_id = self._to_vram(a)
-        b_id = self._to_vram(b)
-        # Perform matmul using tensor cores
-        result = self.tensor_cores.matmul(
-            self.vram.storage.load_tensor(a_id),
-            self.vram.storage.load_tensor(b_id)
-        )
-        # Create new tensor with result
-        return torch.from_numpy(result)
-def to_vgpu(tensor: torch.Tensor, vram: Optional[VirtualVRAM] = None) -> torch.Tensor:
-    """Move a tensor to vGPU device"""
-    if not isinstance(tensor, torch.Tensor):
-        tensor = torch.tensor(tensor)
-    # Get or create vGPU device
-    if not VGPUDevice._VGPU_INSTANCES:
-        device = VGPUDevice(vram)
-    else:
-        device = next(iter(VGPUDevice._VGPU_INSTANCES.values()))
-        if vram is not None:
-            device.vram = vram
-    # Move data to vRAM
-    tensor_id = device._to_vram(tensor)
-    result = device._from_vram(tensor_id)
-    result.requires_grad = tensor.requires_grad
-    # Set the device using the internal name
-    result.data = result.data.to(device._device)
-    return result

+import torch
+from torch.library import Library, impl
+from typing import Optional, Union, Tuple
+import numpy as np
+from virtual_vram import VirtualVRAM
+# Global variables for backend state
+VGPU_BACKEND_INITIALIZED = False
+CURRENT_VRAM = None  # Global reference to current vRAM manager
+def set_current_vram(vram):
+    """Set the current vRAM manager globally"""
+    global CURRENT_VRAM
+    CURRENT_VRAM = vram
+def get_current_vram():
+    """Get the current vRAM manager"""
+    return CURRENT_VRAM
+def to_vgpu(tensor: torch.Tensor, vram: Optional[VirtualVRAM] = None) -> torch.Tensor:
+    """Move a tensor to vGPU memory"""
+    if vram is None:
+        vram = get_current_vram()
+        if vram is None:
+            raise RuntimeError("No vRAM manager available. Initialize VGPUDevice first.")
+    # Get data and store in vRAM
+    cpu_data = tensor.detach().cpu().numpy()
+    tensor_id = f"tensor_{id(tensor)}"
+    vram.store(tensor_id, cpu_data)
+    # Create vGPU tensor
+    device = torch.device("privateuseone")
+    vgpu_storage = VGPUStorage(
+        cpu_data.size,
+        vram=vram,
+        tensor_id=tensor_id
+    )
+    vgpu_tensor = torch.tensor(
+        [],
+        device=device,
+        requires_grad=tensor.requires_grad
+    )
+    vgpu_tensor.set_(vgpu_storage)
+    return vgpu_tensor
+def init_vgpu_backend():
+    """Initialize the vGPU backend. Must be called before creating any VGPUDevice instances."""
+    global VGPU_BACKEND_INITIALIZED
+    try:
+        if not VGPU_BACKEND_INITIALIZED:
+            # Create library for custom ops
+            lib = Library("vgpu", "DEF")
+            lib.define("custom_from_cpu(Tensor x) -> Tensor")
+            impl_lib = Library("vgpu", "IMPL")
+            @impl(impl_lib, "custom_from_cpu")
+            def custom_from_cpu(x):
+                """Copy tensor to our vGPU memory"""
+                return x.clone()
+            # Set initialization flag
+            VGPU_BACKEND_INITIALIZED = True
+        return VGPU_BACKEND_INITIALIZED
+    except Exception as e:
+        print(f"Backend initialization warning: {e}")
+        return False
+class VGPUStorage(torch.Storage):
+    """Custom storage class that uses our virtual VRAM"""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.vram = kwargs.get("vram")
+        if not self.vram:
+            from virtual_vram import VirtualVRAM
+            self.vram = VirtualVRAM()
+        self.tensor_id = kwargs.get("tensor_id", f"tensor_{id(self)}")
+    def _new_shared(self, size):
+        return VGPUStorage(size, vram=self.vram)
+class VGPUTensor:
+    """Tensor implementation that uses vGPU for computations"""
+    @staticmethod
+    def __new__(cls, elem):
+        return torch.Tensor._make_subclass(cls, elem, elem.requires_grad)
+from contextlib import contextmanager
+# Custom allocator for vGPU tensors
+class VGPUAllocator:
+    def __init__(self, vram):
+        self.vram = vram
+    def __call__(self, size, dtype=None, device=None):
+        cpu_tensor = torch.empty(size, dtype=dtype, device='cpu')
+        return to_vgpu(cpu_tensor, self.vram)
+class VGPUDevice:
+    """
+    Custom PyTorch device implementation that routes operations through vGPU.
+    Usage:
+        vgpu = VGPUDevice()
+        with vgpu.mode():
+            tensor = torch.randn(2, 3)  # Will be on vGPU
+    """
+    _VGPU_INSTANCES = {}  # Class-level dict to track instances
+    def __init__(self, vram: Optional[VirtualVRAM] = None):
+        """Initialize a vGPU device with optional vRAM manager"""
+        self.vram = vram or VirtualVRAM()
+        self.device_name = "privateuseone"  # Our device type
+        self._init_device()
+    def _init_device(self):
+        """Initialize the device backend and settings"""
+        if not VGPU_BACKEND_INITIALIZED:
+            raise RuntimeError("VGPU backend not properly initialized")
+        # Setup device and global vRAM
+        self._device = torch.device(self.device_name)
+        set_current_vram(self.vram)
+        # Register instance
+        VGPUDevice._VGPU_INSTANCES[self.device_name] = self
+        # Setup allocator
+        self._allocator = VGPUAllocator(self.vram)
+    def device(self) -> torch.device:
+        """Get the PyTorch device object for this vGPU"""
+        return self._device
+    @contextmanager
+    def mode(self):
+        """Context manager for using this device as the default"""
+        prev_device = torch.device("cpu")
+        try:
+            prev_device = torch.cuda.current_device() if torch.cuda.is_available() else prev_device
+            torch.set_device(self._device)
+            yield
+        finally:
+            torch.set_device(prev_device)
+    def __str__(self):
+        """String representation of the device"""
+        return f"{self.device_name}:0"
+    def __repr__(self):
+        """Detailed string representation"""
+        return f"vgpu(device='{self.device_name}:0')"
+        return tensor_id
+    def _from_vram(self, tensor_id: str) -> torch.Tensor:
+        """Retrieve tensor data from virtual VRAM"""
+        data = self.vram.storage.load_tensor(tensor_id)
+        return torch.from_numpy(data)
+    def matmul(self, a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
+        """Matrix multiplication using tensor cores"""
+        self._init_tensor_cores()
+        # Store inputs in VRAM
+        a_id = self._to_vram(a)
+        b_id = self._to_vram(b)
+        # Perform matmul using tensor cores
+        result = self.tensor_cores.matmul(
+            self.vram.storage.load_tensor(a_id),
+            self.vram.storage.load_tensor(b_id)
+        )
+        # Create new tensor with result
+        return torch.from_numpy(result)
+def to_vgpu(tensor: torch.Tensor, vram: Optional[VirtualVRAM] = None) -> torch.Tensor:
+    """Move a tensor to vGPU device"""
+    if not isinstance(tensor, torch.Tensor):
+        tensor = torch.tensor(tensor)
+    # Get or create vGPU device
+    if not VGPUDevice._VGPU_INSTANCES:
+        device = VGPUDevice(vram)
+    else:
+        device = next(iter(VGPUDevice._VGPU_INSTANCES.values()))
+        if vram is not None:
+            device.vram = vram
+    # Move data to vRAM
+    tensor_id = device._to_vram(tensor)
+    result = device._from_vram(tensor_id)
+    result.requires_grad = tensor.requires_grad
+    # Set the device using the internal name
+    result.data = result.data.to(device._device)
+    return result