Spaces:

GAInTech
/

feather-a10g-large-runtime

Paused

Upload folder using huggingface_hub

1c59946 verified 14 days ago

1.27 kB

	// TM anomaly kernel.
	//
	// Computes:
	// n_active = sum of sp_active_mask
	// anomaly = unpredicted_count / n_active (if n_active > 0)
	// = 0 (else)
	//
	// Launch: single block, 256 threads.

	extern "C" __global__
	void tm_anomaly(
	const unsigned char * __restrict__ sp_active_mask,
	const unsigned int * __restrict__ unpredicted_count,
	float * __restrict__ anomaly_out, // (1,) or (t_slot,)
	unsigned int t_slot,
	unsigned int n_cols
	) {
	const unsigned int tid = threadIdx.x;
	__shared__ unsigned int n_active_s;

	if (tid == 0) n_active_s = 0u;
	__syncthreads();

	unsigned int local = 0u;
	for (unsigned int i = tid; i < n_cols; i += blockDim.x) {
	if (sp_active_mask[i]) local += 1u;
	}
	// Warp reduce.
	for (int off = 16; off > 0; off >>= 1) {
	local += __shfl_down_sync(0xffffffffu, local, off);
	}
	if ((tid & 31u) == 0) {
	atomicAdd(&n_active_s, local);
	}
	__syncthreads();

	if (tid == 0) {
	unsigned int total = n_active_s;
	unsigned int bad = unpredicted_count[0];
	float anom = (total > 0u) ? ((float)bad / (float)total) : 0.0f;
	anomaly_out[t_slot] = anom;
	}
	}