| | #include "ggml-remoting.h"
|
| |
|
| | static const char * ggml_backend_remoting_device_get_name(ggml_backend_dev_t dev) {
|
| | virtgpu * gpu = DEV_TO_GPU(dev);
|
| |
|
| |
|
| | return gpu->cached_device_info.name;
|
| | }
|
| |
|
| | static const char * ggml_backend_remoting_device_get_description(ggml_backend_dev_t dev) {
|
| | virtgpu * gpu = DEV_TO_GPU(dev);
|
| |
|
| |
|
| | return gpu->cached_device_info.description;
|
| | }
|
| |
|
| | static enum ggml_backend_dev_type ggml_backend_remoting_device_get_type(ggml_backend_dev_t dev) {
|
| | virtgpu * gpu = DEV_TO_GPU(dev);
|
| |
|
| | return (enum ggml_backend_dev_type) gpu->cached_device_info.type;
|
| | }
|
| |
|
| | static void ggml_backend_remoting_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
|
| | virtgpu * gpu = DEV_TO_GPU(dev);
|
| |
|
| | *free = gpu->cached_device_info.memory_free;
|
| | *total = gpu->cached_device_info.memory_total;
|
| | }
|
| |
|
| | static bool ggml_backend_remoting_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
|
| | #if USE_ALWAYS_TRUE_SUPPORTS_OP == 1
|
| |
|
| |
|
| | UNUSED(dev);
|
| | UNUSED(op);
|
| |
|
| | return true;
|
| | #else
|
| | virtgpu * gpu = DEV_TO_GPU(dev);
|
| |
|
| | return apir_device_supports_op(gpu, op);
|
| | #endif
|
| | }
|
| |
|
| | static bool ggml_backend_remoting_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
|
| | bool supported = buft->device == dev;
|
| |
|
| | return supported;
|
| | }
|
| |
|
| | static bool ggml_backend_remoting_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
|
| | UNUSED(dev);
|
| | UNUSED(op);
|
| |
|
| | return false;
|
| | }
|
| |
|
| | static void ggml_backend_remoting_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
|
| | props->name = ggml_backend_remoting_device_get_name(dev);
|
| | props->description = ggml_backend_remoting_device_get_description(dev);
|
| | props->type = ggml_backend_remoting_device_get_type(dev);
|
| | ggml_backend_remoting_device_get_memory(dev, &props->memory_free, &props->memory_total);
|
| |
|
| | virtgpu * gpu = DEV_TO_GPU(dev);
|
| | apir_device_get_props(gpu, &props->caps.async, &props->caps.host_buffer, &props->caps.buffer_from_host_ptr,
|
| | &props->caps.events);
|
| |
|
| | props->caps.buffer_from_host_ptr = false;
|
| | props->caps.async = false;
|
| | props->caps.events = false;
|
| | }
|
| |
|
| | ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_backend_dev_t dev) {
|
| | virtgpu * gpu = DEV_TO_GPU(dev);
|
| |
|
| | static std::atomic<bool> initialized = false;
|
| | static ggml_backend_buffer_type buft;
|
| |
|
| | if (!initialized) {
|
| | static std::mutex mutex;
|
| | std::lock_guard<std::mutex> lock(mutex);
|
| |
|
| | if (!initialized) {
|
| | buft = {
|
| | ggml_backend_remoting_buffer_type_interface,
|
| | dev,
|
| | (void *) gpu->cached_buffer_type.host_handle,
|
| | };
|
| | initialized = true;
|
| | }
|
| | }
|
| |
|
| | return &buft;
|
| | }
|
| |
|
| | static ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_from_ptr_type(ggml_backend_dev_t dev) {
|
| | virtgpu * gpu = DEV_TO_GPU(dev);
|
| |
|
| | static std::atomic<bool> initialized = false;
|
| | static ggml_backend_buffer_type buft;
|
| |
|
| | if (!initialized) {
|
| | static std::mutex mutex;
|
| | std::lock_guard<std::mutex> lock(mutex);
|
| |
|
| | if (!initialized) {
|
| | buft = {
|
| | ggml_backend_remoting_buffer_from_ptr_type_interface,
|
| | dev,
|
| | (void *) gpu->cached_buffer_type.host_handle,
|
| | };
|
| | initialized = true;
|
| | }
|
| | }
|
| |
|
| | return &buft;
|
| | }
|
| |
|
| | static ggml_backend_buffer_t ggml_backend_remoting_device_buffer_from_ptr(ggml_backend_dev_t dev,
|
| | void * ptr,
|
| | size_t size,
|
| | size_t max_tensor_size) {
|
| | virtgpu * gpu = DEV_TO_GPU(dev);
|
| |
|
| | ggml_backend_remoting_buffer_context * context = (ggml_backend_remoting_buffer_context *) malloc(sizeof(*context));
|
| | if (!context) {
|
| | GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the buffer context ...", __func__);
|
| | }
|
| |
|
| | context->gpu = gpu;
|
| | context->apir_context = apir_device_buffer_from_ptr(gpu, size, max_tensor_size);
|
| | context->base = ptr;
|
| | context->is_from_ptr = true;
|
| |
|
| | ggml_backend_buffer_t buffer =
|
| | ggml_backend_buffer_init(ggml_backend_remoting_device_get_buffer_from_ptr_type(dev),
|
| | ggml_backend_remoting_buffer_from_ptr_interface, (void *) context, size);
|
| |
|
| | return buffer;
|
| | }
|
| |
|
| | const ggml_backend_device_i ggml_backend_remoting_device_interface = {
|
| | ggml_backend_remoting_device_get_name,
|
| | ggml_backend_remoting_device_get_description,
|
| | ggml_backend_remoting_device_get_memory,
|
| | ggml_backend_remoting_device_get_type,
|
| | ggml_backend_remoting_device_get_props,
|
| | ggml_backend_remoting_device_init,
|
| | ggml_backend_remoting_device_get_buffer_type,
|
| | NULL,
|
| | ggml_backend_remoting_device_buffer_from_ptr,
|
| | ggml_backend_remoting_device_supports_op,
|
| | ggml_backend_remoting_device_supports_buft,
|
| | ggml_backend_remoting_device_offload_op,
|
| | NULL,
|
| | NULL,
|
| | NULL,
|
| | };
|
| |
|