Upload rth_tcn_ops.cpp with huggingface_hub
Browse files- rth_tcn_ops.cpp +67 -0
rth_tcn_ops.cpp
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* @file rth_tcn_ops.cpp
|
| 3 |
+
* @brief Implementation of custom TCN operators for llama.cpp.
|
| 4 |
+
*/
|
| 5 |
+
|
| 6 |
+
#include "ggml.h"
|
| 7 |
+
#include <cmath>
|
| 8 |
+
#include <algorithm>
|
| 9 |
+
|
| 10 |
+
// --- CAUSAL CONV1D (CPU REFERENCE) ---
|
| 11 |
+
// This is the reference implementation for the causal convolution.
|
| 12 |
+
// In production, this would be optimized with AVX/NEON or CUDA.
|
| 13 |
+
|
| 14 |
+
void ggml_compute_forward_causal_conv1d(
|
| 15 |
+
const struct ggml_compute_params * params,
|
| 16 |
+
const struct ggml_tensor * src0, // Input: [N, C, T]
|
| 17 |
+
const struct ggml_tensor * src1, // Kernel: [C, 1, K]
|
| 18 |
+
struct ggml_tensor * dst) {
|
| 19 |
+
|
| 20 |
+
const int nc = src0->ne[1]; // Channels
|
| 21 |
+
const int nt = src0->ne[0]; // Time steps
|
| 22 |
+
const int nk = src1->ne[0]; // Kernel size
|
| 23 |
+
const int dilation = params->ith; // We use params->ith to pass dilation for simplicity in prototype
|
| 24 |
+
|
| 25 |
+
// Causal Convolution Logic:
|
| 26 |
+
// Output[c, t] = sum_{k=0}^{nk-1} Input[c, t - k * dilation] * Kernel[c, k]
|
| 27 |
+
// If t - k * dilation < 0, Input is 0 (causal padding).
|
| 28 |
+
|
| 29 |
+
for (int c = 0; c < nc; c++) {
|
| 30 |
+
for (int t = 0; t < nt; t++) {
|
| 31 |
+
float sum = 0.0f;
|
| 32 |
+
for (int k = 0; k < nk; k++) {
|
| 33 |
+
int src_t = t - k * dilation;
|
| 34 |
+
if (src_t >= 0) {
|
| 35 |
+
float val = ((float*)src0->data)[c * nt + src_t];
|
| 36 |
+
float weight = ((float*)src1->data)[c * nk + k];
|
| 37 |
+
sum += val * weight;
|
| 38 |
+
}
|
| 39 |
+
}
|
| 40 |
+
((float*)dst->data)[c * nt + t] = sum;
|
| 41 |
+
}
|
| 42 |
+
}
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
// --- FRACTAL GATE (CPU REFERENCE) ---
|
| 46 |
+
// Y = SiLU(path_a) * Sigmoid(path_b)
|
| 47 |
+
// Note: In ZetaGrid, gating is applied after convolution.
|
| 48 |
+
|
| 49 |
+
void ggml_compute_forward_fractal_gate(
|
| 50 |
+
const struct ggml_compute_params * params,
|
| 51 |
+
const struct ggml_tensor * src0, // Path A (Mixed)
|
| 52 |
+
const struct ggml_tensor * src1, // Path B (Gate)
|
| 53 |
+
struct ggml_tensor * dst) {
|
| 54 |
+
|
| 55 |
+
const int ne = ggml_nelements(src0);
|
| 56 |
+
const float * a = (const float *)src0->data;
|
| 57 |
+
const float * b = (const float *)src1->data;
|
| 58 |
+
float * out = (float *)dst->data;
|
| 59 |
+
|
| 60 |
+
for (int i = 0; i < ne; i++) {
|
| 61 |
+
// SiLU(x) = x * sigmoid(x)
|
| 62 |
+
float silu_a = a[i] * (1.0f / (1.0f + std::exp(-a[i])));
|
| 63 |
+
// Sigmoid(g)
|
| 64 |
+
float sig_b = 1.0f / (1.0f + std::exp(-b[i]));
|
| 65 |
+
out[i] = silu_a * sig_b;
|
| 66 |
+
}
|
| 67 |
+
}
|