Spaces:
Runtime error
Runtime error
Upload llama.cpp/ggml/src/ggml-cuda/arange.cu with huggingface_hub
Browse files
llama.cpp/ggml/src/ggml-cuda/arange.cu
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "arange.cuh"
|
| 2 |
+
|
| 3 |
+
static __global__ void arange_f32(float * dst, const int ne0, const float start, const float step) {
|
| 4 |
+
// blockIDx.x: idx of ne0 / BLOCK_SIZE
|
| 5 |
+
int nidx = threadIdx.x + blockIdx.x * blockDim.x;
|
| 6 |
+
if (nidx >= ne0) {
|
| 7 |
+
return;
|
| 8 |
+
}
|
| 9 |
+
dst[nidx] = start + step * nidx;
|
| 10 |
+
}
|
| 11 |
+
|
| 12 |
+
static void arange_f32_cuda(float * dst, const int ne0, const float start, const float step, cudaStream_t stream) {
|
| 13 |
+
int num_blocks = (ne0 + CUDA_ARANGE_BLOCK_SIZE - 1) / CUDA_ARANGE_BLOCK_SIZE;
|
| 14 |
+
arange_f32<<<num_blocks, CUDA_ARANGE_BLOCK_SIZE, 0, stream>>>(dst, ne0, start, step);
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
void ggml_cuda_op_arange(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
| 18 |
+
float * dst_d = (float *)dst->data;
|
| 19 |
+
cudaStream_t stream = ctx.stream();
|
| 20 |
+
|
| 21 |
+
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
| 22 |
+
|
| 23 |
+
float start;
|
| 24 |
+
float stop;
|
| 25 |
+
float step;
|
| 26 |
+
memcpy(&start, (float *)dst->op_params + 0, sizeof(float));
|
| 27 |
+
memcpy(&stop, (float *)dst->op_params + 1, sizeof(float));
|
| 28 |
+
memcpy(&step, (float *)dst->op_params + 2, sizeof(float));
|
| 29 |
+
|
| 30 |
+
int64_t steps = (int64_t)ceil((stop - start) / step);
|
| 31 |
+
GGML_ASSERT(ggml_nelements(dst) == steps);
|
| 32 |
+
|
| 33 |
+
arange_f32_cuda(dst_d, dst->ne[0], start, step, stream);
|
| 34 |
+
}
|