#pragma once #include void rmsnorm( torch::Tensor& output, const torch::Tensor& input, const torch::Tensor& weight, float eps );