| | #pragma once |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | #ifdef __cplusplus |
| | extern "C" { |
| | #endif |
| |
|
| | #include <stdint.h> |
| | #include <stddef.h> |
| | #include <stdbool.h> |
| |
|
| | #define GGML_V1_MAX_DIMS 4 |
| | #define GGML_V1_MAX_NODES 4096 |
| | #define GGML_V1_MAX_PARAMS 16 |
| | #define GGML_V1_MAX_CONTEXTS 64 |
| | #define GGML_V1_MAX_OPT 4 |
| |
|
| | #ifdef __ARM_NEON |
| | |
| | typedef __fp16 ggml_v1_fp16_t; |
| | #else |
| | typedef uint16_t ggml_v1_fp16_t; |
| | #endif |
| |
|
| | |
| | float ggml_v1_fp16_to_fp32(ggml_v1_fp16_t x); |
| | ggml_v1_fp16_t ggml_v1_fp32_to_fp16(float x); |
| |
|
| | struct ggml_v1_object; |
| | struct ggml_v1_context; |
| |
|
| | enum ggml_v1_type { |
| | GGML_V1_TYPE_Q4_0, |
| | GGML_V1_TYPE_Q4_1, |
| | GGML_V1_TYPE_I8, |
| | GGML_V1_TYPE_I16, |
| | GGML_V1_TYPE_I32, |
| | GGML_V1_TYPE_F16, |
| | GGML_V1_TYPE_F32, |
| | GGML_V1_TYPE_COUNT, |
| | }; |
| |
|
| | |
| | enum ggml_v1_op { |
| | GGML_V1_OP_NONE = 0, |
| |
|
| | GGML_V1_OP_DUP, |
| | GGML_V1_OP_ADD, |
| | GGML_V1_OP_SUB, |
| | GGML_V1_OP_MUL, |
| | GGML_V1_OP_DIV, |
| | GGML_V1_OP_SQR, |
| | GGML_V1_OP_SQRT, |
| | GGML_V1_OP_SUM, |
| | GGML_V1_OP_MEAN, |
| | GGML_V1_OP_REPEAT, |
| | GGML_V1_OP_ABS, |
| | GGML_V1_OP_SGN, |
| | GGML_V1_OP_NEG, |
| | GGML_V1_OP_STEP, |
| | GGML_V1_OP_RELU, |
| | GGML_V1_OP_GELU, |
| | GGML_V1_OP_NORM, |
| |
|
| | GGML_V1_OP_MUL_MAT, |
| |
|
| | GGML_V1_OP_SCALE, |
| | GGML_V1_OP_CPY, |
| | GGML_V1_OP_RESHAPE, |
| | GGML_V1_OP_VIEW, |
| | GGML_V1_OP_PERMUTE, |
| | GGML_V1_OP_TRANSPOSE, |
| | GGML_V1_OP_GET_ROWS, |
| | GGML_V1_OP_DIAG_MASK_INF, |
| | GGML_V1_OP_SOFT_MAX, |
| | GGML_V1_OP_ROPE, |
| | GGML_V1_OP_CONV_1D_1S, |
| | GGML_V1_OP_CONV_1D_2S, |
| |
|
| | GGML_V1_OP_FLASH_ATTN, |
| | GGML_V1_OP_FLASH_FF, |
| |
|
| | GGML_V1_OP_COUNT, |
| | }; |
| |
|
| | |
| | struct ggml_v1_tensor { |
| | enum ggml_v1_type type; |
| |
|
| | int n_dims; |
| | int ne[GGML_V1_MAX_DIMS]; |
| | size_t nb[GGML_V1_MAX_DIMS]; |
| | |
| | |
| | |
| |
|
| | |
| | enum ggml_v1_op op; |
| |
|
| | bool is_param; |
| |
|
| | struct ggml_v1_tensor * grad; |
| | struct ggml_v1_tensor * src0; |
| | struct ggml_v1_tensor * src1; |
| | struct ggml_v1_tensor * opt[GGML_V1_MAX_OPT]; |
| |
|
| | |
| | int n_tasks; |
| |
|
| | |
| | int perf_runs; |
| | int64_t perf_cycles; |
| | int64_t perf_time_us; |
| |
|
| | void * data; |
| | char padding[8]; |
| | }; |
| |
|
| | |
| | struct ggml_v1_cgraph { |
| | int n_nodes; |
| | int n_leafs; |
| | int n_threads; |
| |
|
| | size_t work_size; |
| | struct ggml_v1_tensor * work; |
| |
|
| | struct ggml_v1_tensor * nodes[GGML_V1_MAX_NODES]; |
| | struct ggml_v1_tensor * grads[GGML_V1_MAX_NODES]; |
| | struct ggml_v1_tensor * leafs[GGML_V1_MAX_NODES]; |
| |
|
| | |
| | int perf_runs; |
| | int64_t perf_cycles; |
| | int64_t perf_time_us; |
| | }; |
| |
|
| | |
| | struct ggml_v1_scratch { |
| | size_t offs; |
| | size_t size; |
| | void * data; |
| | }; |
| |
|
| | struct ggml_v1_init_params { |
| | |
| | size_t mem_size; |
| | void * mem_buffer; |
| | }; |
| |
|
| | void ggml_v1_time_init(void); |
| | int64_t ggml_v1_time_ms(void); |
| | int64_t ggml_v1_time_us(void); |
| | int64_t ggml_v1_cycles(void); |
| | int64_t ggml_v1_cycles_per_ms(void); |
| |
|
| | void ggml_v1_print_object (const struct ggml_v1_object * obj); |
| | void ggml_v1_print_objects(const struct ggml_v1_context * ctx); |
| |
|
| | int ggml_v1_nelements(const struct ggml_v1_tensor * tensor); |
| | size_t ggml_v1_nbytes (const struct ggml_v1_tensor * tensor); |
| |
|
| | int ggml_v1_blck_size (enum ggml_v1_type type); |
| | size_t ggml_v1_type_size (enum ggml_v1_type type); |
| | float ggml_v1_type_sizef(enum ggml_v1_type type); |
| |
|
| | size_t ggml_v1_element_size(const struct ggml_v1_tensor * tensor); |
| |
|
| | struct ggml_v1_context * ggml_v1_init(struct ggml_v1_init_params params); |
| | void ggml_v1_free(struct ggml_v1_context * ctx); |
| |
|
| | size_t ggml_v1_used_mem(const struct ggml_v1_context * ctx); |
| |
|
| | size_t ggml_v1_set_scratch(struct ggml_v1_context * ctx, struct ggml_v1_scratch scratch); |
| |
|
| | struct ggml_v1_tensor * ggml_v1_new_tensor( |
| | struct ggml_v1_context * ctx, |
| | enum ggml_v1_type type, |
| | int n_dims, |
| | const int *ne); |
| |
|
| | struct ggml_v1_tensor * ggml_v1_new_tensor_1d( |
| | struct ggml_v1_context * ctx, |
| | enum ggml_v1_type type, |
| | int ne0); |
| |
|
| | struct ggml_v1_tensor * ggml_v1_new_tensor_2d( |
| | struct ggml_v1_context * ctx, |
| | enum ggml_v1_type type, |
| | int ne0, |
| | int ne1); |
| |
|
| | struct ggml_v1_tensor * ggml_v1_new_tensor_3d( |
| | struct ggml_v1_context * ctx, |
| | enum ggml_v1_type type, |
| | int ne0, |
| | int ne1, |
| | int ne2); |
| |
|
| | struct ggml_v1_tensor * ggml_v1_new_tensor_4d( |
| | struct ggml_v1_context * ctx, |
| | enum ggml_v1_type type, |
| | int ne0, |
| | int ne1, |
| | int ne2, |
| | int ne3); |
| |
|
| | struct ggml_v1_tensor * ggml_v1_new_i32(struct ggml_v1_context * ctx, int32_t value); |
| | struct ggml_v1_tensor * ggml_v1_new_f32(struct ggml_v1_context * ctx, float value); |
| |
|
| | struct ggml_v1_tensor * ggml_v1_dup_tensor (struct ggml_v1_context * ctx, const struct ggml_v1_tensor * src); |
| | struct ggml_v1_tensor * ggml_v1_view_tensor(struct ggml_v1_context * ctx, const struct ggml_v1_tensor * src); |
| |
|
| | struct ggml_v1_tensor * ggml_v1_set_zero(struct ggml_v1_tensor * tensor); |
| | struct ggml_v1_tensor * ggml_v1_set_i32 (struct ggml_v1_tensor * tensor, int32_t value); |
| | struct ggml_v1_tensor * ggml_v1_set_f32 (struct ggml_v1_tensor * tensor, float value); |
| |
|
| | int32_t ggml_v1_get_i32_1d(const struct ggml_v1_tensor * tensor, int i); |
| | void ggml_v1_set_i32_1d(const struct ggml_v1_tensor * tensor, int i, int32_t value); |
| |
|
| | float ggml_v1_get_f32_1d(const struct ggml_v1_tensor * tensor, int i); |
| | void ggml_v1_set_f32_1d(const struct ggml_v1_tensor * tensor, int i, float value); |
| |
|
| | void * ggml_v1_get_data (const struct ggml_v1_tensor * tensor); |
| | float * ggml_v1_get_data_f32(const struct ggml_v1_tensor * tensor); |
| |
|
| | |
| | |
| | |
| |
|
| | struct ggml_v1_tensor * ggml_v1_dup( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a); |
| |
|
| | struct ggml_v1_tensor * ggml_v1_add( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a, |
| | struct ggml_v1_tensor * b); |
| |
|
| | struct ggml_v1_tensor * ggml_v1_sub( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a, |
| | struct ggml_v1_tensor * b); |
| |
|
| | struct ggml_v1_tensor * ggml_v1_mul( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a, |
| | struct ggml_v1_tensor * b); |
| |
|
| | struct ggml_v1_tensor * ggml_v1_div( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a, |
| | struct ggml_v1_tensor * b); |
| |
|
| | struct ggml_v1_tensor * ggml_v1_sqr( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a); |
| |
|
| | struct ggml_v1_tensor * ggml_v1_sqrt( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a); |
| |
|
| | |
| | |
| | struct ggml_v1_tensor * ggml_v1_sum( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a); |
| |
|
| | |
| | struct ggml_v1_tensor * ggml_v1_mean( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a); |
| |
|
| | |
| | |
| | struct ggml_v1_tensor * ggml_v1_repeat( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a, |
| | struct ggml_v1_tensor * b); |
| |
|
| | struct ggml_v1_tensor * ggml_v1_abs( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a); |
| |
|
| | struct ggml_v1_tensor * ggml_v1_sgn( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a); |
| |
|
| | struct ggml_v1_tensor * ggml_v1_neg( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a); |
| |
|
| | struct ggml_v1_tensor * ggml_v1_step( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a); |
| |
|
| | struct ggml_v1_tensor * ggml_v1_relu( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a); |
| |
|
| | |
| | struct ggml_v1_tensor * ggml_v1_gelu( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a); |
| |
|
| | |
| | |
| | struct ggml_v1_tensor * ggml_v1_norm( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a); |
| |
|
| | |
| | |
| | |
| | struct ggml_v1_tensor * ggml_v1_mul_mat( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a, |
| | struct ggml_v1_tensor * b); |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | struct ggml_v1_tensor * ggml_v1_scale( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a, |
| | struct ggml_v1_tensor * b); |
| |
|
| | |
| | struct ggml_v1_tensor * ggml_v1_cpy( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a, |
| | struct ggml_v1_tensor * b); |
| |
|
| | |
| | |
| | struct ggml_v1_tensor * ggml_v1_reshape( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a, |
| | struct ggml_v1_tensor * b); |
| |
|
| | |
| | |
| | struct ggml_v1_tensor * ggml_v1_reshape_2d( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a, |
| | int ne0, |
| | int ne1); |
| |
|
| | |
| | |
| | struct ggml_v1_tensor * ggml_v1_reshape_3d( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a, |
| | int ne0, |
| | int ne1, |
| | int ne2); |
| |
|
| | |
| | struct ggml_v1_tensor * ggml_v1_view_1d( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a, |
| | int ne0, |
| | size_t offset); |
| |
|
| | struct ggml_v1_tensor * ggml_v1_view_2d( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a, |
| | int ne0, |
| | int ne1, |
| | size_t nb1, |
| | size_t offset); |
| |
|
| | struct ggml_v1_tensor * ggml_v1_permute( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a, |
| | int axis0, |
| | int axis1, |
| | int axis2, |
| | int axis3); |
| |
|
| | |
| | struct ggml_v1_tensor * ggml_v1_transpose( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a); |
| |
|
| | struct ggml_v1_tensor * ggml_v1_get_rows( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a, |
| | struct ggml_v1_tensor * b); |
| |
|
| | |
| | |
| | struct ggml_v1_tensor * ggml_v1_diag_mask_inf( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a, |
| | int n_past); |
| |
|
| | |
| | struct ggml_v1_tensor * ggml_v1_soft_max( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a); |
| |
|
| | |
| | |
| | |
| | |
| | struct ggml_v1_tensor * ggml_v1_rope( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a, |
| | int n_past, |
| | int n_dims, |
| | int mode); |
| |
|
| | |
| | |
| | |
| | |
| | struct ggml_v1_tensor * ggml_v1_conv_1d_1s( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a, |
| | struct ggml_v1_tensor * b); |
| |
|
| | struct ggml_v1_tensor * ggml_v1_conv_1d_2s( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a, |
| | struct ggml_v1_tensor * b); |
| |
|
| | struct ggml_v1_tensor * ggml_v1_flash_attn( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * q, |
| | struct ggml_v1_tensor * k, |
| | struct ggml_v1_tensor * v, |
| | bool masked); |
| |
|
| | struct ggml_v1_tensor * ggml_v1_flash_ff( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * a, |
| | struct ggml_v1_tensor * b0, |
| | struct ggml_v1_tensor * b1, |
| | struct ggml_v1_tensor * c0, |
| | struct ggml_v1_tensor * c1); |
| |
|
| | |
| | |
| | |
| |
|
| | void ggml_v1_set_param( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_tensor * tensor); |
| |
|
| | void ggml_v1_build_forward_expand(struct ggml_v1_cgraph * cgraph, struct ggml_v1_tensor * tensor); |
| |
|
| | struct ggml_v1_cgraph ggml_v1_build_forward (struct ggml_v1_tensor * tensor); |
| | struct ggml_v1_cgraph ggml_v1_build_backward(struct ggml_v1_context * ctx, struct ggml_v1_cgraph * gf, bool keep); |
| |
|
| | void ggml_v1_graph_compute(struct ggml_v1_context * ctx, struct ggml_v1_cgraph * cgraph); |
| | void ggml_v1_graph_reset (struct ggml_v1_cgraph * cgraph); |
| |
|
| | |
| | void ggml_v1_graph_print(const struct ggml_v1_cgraph * cgraph); |
| |
|
| | |
| | void ggml_v1_graph_dump_dot(const struct ggml_v1_cgraph * gb, const struct ggml_v1_cgraph * gf, const char * filename); |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | enum ggml_v1_opt_type { |
| | GGML_V1_OPT_ADAM, |
| | GGML_V1_OPT_LBFGS, |
| | }; |
| |
|
| | |
| | enum ggml_v1_linesearch { |
| | GGML_V1_LINESEARCH_DEFAULT = 1, |
| |
|
| | GGML_V1_LINESEARCH_BACKTRACKING_ARMIJO = 0, |
| | GGML_V1_LINESEARCH_BACKTRACKING_WOLFE = 1, |
| | GGML_V1_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 2, |
| | }; |
| |
|
| | |
| | enum ggml_v1_opt_result { |
| | GGML_V1_OPT_OK = 0, |
| | GGML_V1_OPT_DID_NOT_CONVERGE, |
| | GGML_V1_OPT_NO_CONTEXT, |
| | GGML_V1_OPT_INVALID_WOLFE, |
| | GGML_V1_OPT_FAIL, |
| |
|
| | GGML_V1_LINESEARCH_FAIL = -128, |
| | GGML_V1_LINESEARCH_MINIMUM_STEP, |
| | GGML_V1_LINESEARCH_MAXIMUM_STEP, |
| | GGML_V1_LINESEARCH_MAXIMUM_ITERATIONS, |
| | GGML_V1_LINESEARCH_INVALID_PARAMETERS, |
| | }; |
| |
|
| | |
| | |
| | |
| | |
| | struct ggml_v1_opt_params { |
| | enum ggml_v1_opt_type type; |
| |
|
| | int n_threads; |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | int past; |
| | float delta; |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | int max_no_improvement; |
| |
|
| | bool print_forward_graph; |
| | bool print_backward_graph; |
| |
|
| | |
| | struct { |
| | int n_iter; |
| |
|
| | float alpha; |
| | float beta1; |
| | float beta2; |
| | float eps; |
| | float eps_f; |
| | float eps_g; |
| | } adam; |
| |
|
| | |
| | struct { |
| | int m; |
| | int n_iter; |
| | int max_linesearch; |
| |
|
| | float eps; |
| | float ftol; |
| | float wolfe; |
| | float min_step; |
| | float max_step; |
| |
|
| | enum ggml_v1_linesearch linesearch; |
| | } lbfgs; |
| | }; |
| |
|
| | struct ggml_v1_opt_params ggml_v1_opt_default_params(enum ggml_v1_opt_type type); |
| |
|
| | |
| | enum ggml_v1_opt_result ggml_v1_opt( |
| | struct ggml_v1_context * ctx, |
| | struct ggml_v1_opt_params params, |
| | struct ggml_v1_tensor * f); |
| |
|
| | |
| | |
| | |
| |
|
| | int ggml_v1_cpu_has_avx(void); |
| | int ggml_v1_cpu_has_avx2(void); |
| | int ggml_v1_cpu_has_avx512(void); |
| | int ggml_v1_cpu_has_fma(void); |
| | int ggml_v1_cpu_has_neon(void); |
| | int ggml_v1_cpu_has_arm_fma(void); |
| | int ggml_v1_cpu_has_f16c(void); |
| | int ggml_v1_cpu_has_fp16_va(void); |
| | int ggml_v1_cpu_has_wasm_simd(void); |
| | int ggml_v1_cpu_has_blas(void); |
| | int ggml_v1_cpu_has_sse3(void); |
| | int ggml_v1_cpu_has_vsx(void); |
| |
|
| | #ifdef __cplusplus |
| | } |
| | #endif |
| |
|