| | #include "ggml-backend.h" |
| | #include "ggml-alloc.h" |
| |
|
| | #include <assert.h> |
| | #include <stdarg.h> |
| | #include <stdio.h> |
| | #include <stdlib.h> |
| | #include <string.h> |
| |
|
| | #define UNUSED GGML_UNUSED |
| |
|
| | #define MAX(a, b) ((a) > (b) ? (a) : (b)) |
| |
|
| | |
| |
|
| | ggml_backend_buffer_t ggml_backend_buffer_init( |
| | struct ggml_backend * backend, |
| | struct ggml_backend_buffer_i iface, |
| | ggml_backend_buffer_context_t context, |
| | size_t size) { |
| | ggml_backend_buffer_t buffer = malloc(sizeof(struct ggml_backend_buffer)); |
| |
|
| | GGML_ASSERT(iface.get_base != NULL); |
| |
|
| | (*buffer) = (struct ggml_backend_buffer) { |
| | iface, |
| | backend, |
| | context, |
| | size, |
| | }; |
| |
|
| | return buffer; |
| | } |
| |
|
| | void ggml_backend_buffer_free(ggml_backend_buffer_t buffer) { |
| | if (buffer->iface.free_buffer != NULL) { |
| | buffer->iface.free_buffer(buffer); |
| | } |
| | free(buffer); |
| | } |
| |
|
| | size_t ggml_backend_buffer_get_alignment(ggml_backend_buffer_t buffer) { |
| | return ggml_backend_get_alignment(buffer->backend); |
| | } |
| |
|
| | void * ggml_backend_buffer_get_base(ggml_backend_buffer_t buffer) { |
| | return buffer->iface.get_base(buffer); |
| | } |
| |
|
| | size_t ggml_backend_buffer_get_size(ggml_backend_buffer_t buffer) { |
| | return buffer->size; |
| | } |
| |
|
| | size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) { |
| | if (buffer->iface.get_alloc_size) { |
| | return buffer->iface.get_alloc_size(buffer, tensor); |
| | } |
| | return ggml_nbytes(tensor); |
| | } |
| |
|
| | void ggml_backend_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) { |
| | if (buffer->iface.init_tensor) { |
| | buffer->iface.init_tensor(buffer, tensor); |
| | } |
| | } |
| |
|
| | void ggml_backend_buffer_free_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) { |
| | if (buffer->iface.free_tensor) { |
| | buffer->iface.free_tensor(buffer, tensor); |
| | } |
| | } |
| |
|
| | |
| |
|
| | ggml_backend_t ggml_get_backend(const struct ggml_tensor * tensor) { |
| | return tensor->buffer->backend; |
| | } |
| |
|
| | const char * ggml_backend_name(ggml_backend_t backend) { |
| | return backend->iface.get_name(backend); |
| | } |
| |
|
| | void ggml_backend_free(ggml_backend_t backend) { |
| | backend->iface.free(backend); |
| | } |
| |
|
| | ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size) { |
| | return backend->iface.alloc_buffer(backend, size); |
| | } |
| |
|
| | size_t ggml_backend_get_alignment(ggml_backend_t backend) { |
| | return backend->iface.get_alignment(backend); |
| | } |
| |
|
| | void ggml_backend_tensor_set_async(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) { |
| | ggml_get_backend(tensor)->iface.set_tensor_async(ggml_get_backend(tensor), tensor, data, offset, size); |
| | } |
| |
|
| | void ggml_backend_tensor_get_async(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) { |
| | ggml_get_backend(tensor)->iface.get_tensor_async(ggml_get_backend(tensor), tensor, data, offset, size); |
| | } |
| |
|
| | void ggml_backend_tensor_set(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) { |
| | ggml_get_backend(tensor)->iface.set_tensor_async(ggml_get_backend(tensor), tensor, data, offset, size); |
| | ggml_get_backend(tensor)->iface.synchronize(ggml_get_backend(tensor)); |
| | } |
| |
|
| | void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) { |
| | ggml_get_backend(tensor)->iface.get_tensor_async(ggml_get_backend(tensor), tensor, data, offset, size); |
| | ggml_get_backend(tensor)->iface.synchronize(ggml_get_backend(tensor)); |
| | } |
| |
|
| | void ggml_backend_synchronize(ggml_backend_t backend) { |
| | backend->iface.synchronize(backend); |
| | } |
| |
|
| | ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph) { |
| | return backend->iface.graph_plan_create(backend, cgraph); |
| | } |
| |
|
| | void ggml_backend_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) { |
| | backend->iface.graph_plan_free(backend, plan); |
| | } |
| |
|
| | void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) { |
| | backend->iface.graph_plan_compute(backend, plan); |
| | } |
| |
|
| | void ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { |
| | backend->iface.graph_compute(backend, cgraph); |
| | } |
| |
|
| | bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) { |
| | return backend->iface.supports_op(backend, op); |
| | } |
| |
|
| | |
| |
|
| | static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml_tensor * b) { |
| | if (a->type != b->type) { |
| | return false; |
| | } |
| | for (int i = 0; i < GGML_MAX_DIMS; i++) { |
| | if (a->ne[i] != b->ne[i]) { |
| | return false; |
| | } |
| | if (a->nb[i] != b->nb[i]) { |
| | return false; |
| | } |
| | } |
| | return true; |
| | } |
| |
|
| | void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst) { |
| | |
| | |
| | GGML_ASSERT(ggml_are_same_layout(src, dst) && "cannot copy tensors with different layouts"); |
| |
|
| | |
| |
|
| | if (src == dst) { |
| | return; |
| | } |
| |
|
| | |
| |
|
| | if (ggml_get_backend(dst)->iface.cpy_tensor_from != NULL) { |
| | ggml_get_backend(dst)->iface.cpy_tensor_from(ggml_get_backend(dst)->context, src, dst); |
| | } else if (ggml_get_backend(src)->iface.cpy_tensor_to != NULL) { |
| | ggml_get_backend(src)->iface.cpy_tensor_to(ggml_get_backend(src)->context, src, dst); |
| | } else { |
| | |
| | #ifndef NDEBUG |
| | fprintf(stderr, "ggml_backend_tensor_copy: neither cpy_tensor_from nor cpy_tensor_to are implemented for backends %s and %s, falling back to get/set\n", ggml_backend_name(src->buffer->backend), ggml_backend_name(dst->buffer->backend)); |
| | #endif |
| | size_t nbytes = ggml_nbytes(src); |
| | void * data = malloc(nbytes); |
| | ggml_backend_tensor_get(src, data, 0, nbytes); |
| | ggml_backend_tensor_set(dst, data, 0, nbytes); |
| | free(data); |
| | } |
| | } |
| |
|
| | |
| |
|
| | struct ggml_backend_cpu_context { |
| | int n_threads; |
| | void * work_data; |
| | size_t work_size; |
| | }; |
| |
|
| | static const char * ggml_backend_cpu_name(ggml_backend_t backend) { |
| | return "CPU"; |
| |
|
| | UNUSED(backend); |
| | } |
| |
|
| | static void ggml_backend_cpu_free(ggml_backend_t backend) { |
| | struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context; |
| | free(cpu_ctx->work_data); |
| | free(cpu_ctx); |
| | free(backend); |
| | } |
| |
|
| | static void * ggml_backend_cpu_buffer_get_base(ggml_backend_buffer_t buffer) { |
| | return (void *)buffer->context; |
| | } |
| |
|
| | static void ggml_backend_cpu_buffer_free_buffer(ggml_backend_buffer_t buffer) { |
| | free(buffer->context); |
| | UNUSED(buffer); |
| | } |
| |
|
| | static struct ggml_backend_buffer_i cpu_backend_buffer_i = { |
| | ggml_backend_cpu_buffer_free_buffer, |
| | ggml_backend_cpu_buffer_get_base, |
| | NULL, |
| | NULL, |
| | NULL, |
| | }; |
| |
|
| | |
| | static struct ggml_backend_buffer_i cpu_backend_buffer_i_from_ptr = { |
| | NULL, |
| | ggml_backend_cpu_buffer_get_base, |
| | NULL, |
| | NULL, |
| | NULL, |
| | }; |
| |
|
| | static const size_t TENSOR_ALIGNMENT = 64; |
| |
|
| | static ggml_backend_buffer_t ggml_backend_cpu_alloc_buffer(ggml_backend_t backend, size_t size) { |
| | size += TENSOR_ALIGNMENT; |
| | void * data = malloc(size); |
| |
|
| | return ggml_backend_buffer_init(backend, cpu_backend_buffer_i, data, size); |
| | } |
| |
|
| | static size_t ggml_backend_cpu_get_alignment(ggml_backend_t backend) { |
| | return TENSOR_ALIGNMENT; |
| | UNUSED(backend); |
| | } |
| |
|
| | static void ggml_backend_cpu_set_tensor_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) { |
| | GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds"); |
| | GGML_ASSERT(tensor->data != NULL && "tensor not allocated"); |
| |
|
| | memcpy((char *)tensor->data + offset, data, size); |
| |
|
| | UNUSED(backend); |
| | } |
| |
|
| | static void ggml_backend_cpu_get_tensor_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) { |
| | GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor read out of bounds"); |
| | GGML_ASSERT(tensor->data != NULL && "tensor not allocated"); |
| |
|
| | memcpy(data, (const char *)tensor->data + offset, size); |
| |
|
| | UNUSED(backend); |
| | } |
| |
|
| | static void ggml_backend_cpu_synchronize(ggml_backend_t backend) { |
| | UNUSED(backend); |
| | } |
| |
|
| | static void ggml_backend_cpu_cpy_tensor_from(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst) { |
| | ggml_backend_tensor_get(src, dst->data, 0, ggml_nbytes(src)); |
| |
|
| | UNUSED(backend); |
| | } |
| |
|
| | static void ggml_backend_cpu_cpy_tensor_to(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst) { |
| | |
| | ggml_backend_tensor_set_async(dst, src->data, 0, ggml_nbytes(src)); |
| |
|
| | UNUSED(backend); |
| | } |
| |
|
| | struct ggml_backend_plan_cpu { |
| | struct ggml_cplan cplan; |
| | struct ggml_cgraph cgraph; |
| | }; |
| |
|
| | static ggml_backend_graph_plan_t ggml_backend_cpu_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph) { |
| | struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context; |
| |
|
| | struct ggml_backend_plan_cpu * cpu_plan = malloc(sizeof(struct ggml_backend_plan_cpu)); |
| |
|
| | cpu_plan->cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads); |
| | cpu_plan->cgraph = *cgraph; |
| |
|
| | if (cpu_plan->cplan.work_size > 0) { |
| | cpu_plan->cplan.work_data = malloc(cpu_plan->cplan.work_size); |
| | } |
| |
|
| | return cpu_plan; |
| | } |
| |
|
| | static void ggml_backend_cpu_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) { |
| | struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu *)plan; |
| |
|
| | free(cpu_plan->cplan.work_data); |
| | free(cpu_plan); |
| |
|
| | UNUSED(backend); |
| | } |
| |
|
| | static void ggml_backend_cpu_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) { |
| | struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu *)plan; |
| |
|
| | ggml_graph_compute(&cpu_plan->cgraph, &cpu_plan->cplan); |
| |
|
| | UNUSED(backend); |
| | } |
| |
|
| | static void ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { |
| | struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context; |
| |
|
| | struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads); |
| |
|
| | if (cpu_ctx->work_size < cplan.work_size) { |
| | |
| | cpu_ctx->work_data = realloc(cpu_ctx->work_data, cplan.work_size); |
| | cpu_ctx->work_size = cplan.work_size; |
| | } |
| |
|
| | cplan.work_data = cpu_ctx->work_data; |
| |
|
| | ggml_graph_compute(cgraph, &cplan); |
| | } |
| |
|
| | static bool ggml_backend_cpu_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) { |
| | return true; |
| | UNUSED(backend); |
| | UNUSED(op); |
| | } |
| |
|
| | static struct ggml_backend_i cpu_backend_i = { |
| | ggml_backend_cpu_name, |
| | ggml_backend_cpu_free, |
| | ggml_backend_cpu_alloc_buffer, |
| | ggml_backend_cpu_get_alignment, |
| | ggml_backend_cpu_set_tensor_async, |
| | ggml_backend_cpu_get_tensor_async, |
| | ggml_backend_cpu_synchronize, |
| | ggml_backend_cpu_cpy_tensor_from, |
| | ggml_backend_cpu_cpy_tensor_to, |
| | ggml_backend_cpu_graph_plan_create, |
| | ggml_backend_cpu_graph_plan_free, |
| | ggml_backend_cpu_graph_plan_compute, |
| | ggml_backend_cpu_graph_compute, |
| | ggml_backend_cpu_supports_op, |
| | }; |
| |
|
| | ggml_backend_t ggml_backend_cpu_init(void) { |
| | struct ggml_backend_cpu_context * ctx = malloc(sizeof(struct ggml_backend_cpu_context)); |
| |
|
| | ctx->n_threads = GGML_DEFAULT_N_THREADS; |
| | ctx->work_data = NULL; |
| | ctx->work_size = 0; |
| |
|
| | ggml_backend_t cpu_backend = malloc(sizeof(struct ggml_backend)); |
| |
|
| | *cpu_backend = (struct ggml_backend) { |
| | cpu_backend_i, |
| | ctx |
| | }; |
| | return cpu_backend; |
| | } |
| |
|
| | bool ggml_backend_is_cpu(ggml_backend_t backend) { |
| | return backend->iface.get_name == ggml_backend_cpu_name; |
| | } |
| |
|
| | void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads) { |
| | GGML_ASSERT(ggml_backend_is_cpu(backend_cpu)); |
| |
|
| | struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context; |
| | ctx->n_threads = n_threads; |
| | } |
| |
|
| | ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(ggml_backend_t backend_cpu, void * ptr, size_t size) { |
| | return ggml_backend_buffer_init(backend_cpu, cpu_backend_buffer_i_from_ptr, ptr, size); |
| | } |
| |
|