MiMo-V2.5-QEdge / main.cpp
Qapdex's picture
Update main.cpp
5bed82c verified
Raw
History Blame Contribute Delete
3.59 kB
// project_natal/src/main.cpp
#include <iostream>
#include <vector>
#include <cmath>
#include <chrono>
#include <string>
#include <memory>
struct bf16_t {
uint16_t bits;
float to_float() const {
uint32_t val_32 = uint32_t(bits) << 16;
return *reinterpret_cast<float*>(&val_32);
}
static bf16_t from_float(float f) {
uint32_t val_32 = *reinterpret_cast<uint32_t*>(&f);
bf16_t out;
out.bits = uint16_t(val_32 >> 16);
return out;
}
};
struct NatalTernaryTensor {
std::string name;
std::vector<int8_t> weights;
std::vector<float> scales;
int rows;
int cols;
};
class NatalEngine {
public:
NatalEngine() {
std::cout << "[Project Natal] Bit-Masking Engine aktiv. Verzweigungen eliminiert.\n";
}
// BRANCHLESS TERNARY KERNEL (Verhindert Pipeline-Stalls bei echten Gewichten)
void mat_vec_multiply_branchless(const NatalTernaryTensor& matrix, const std::vector<bf16_t>& vec_in, std::vector<float>& vec_out) {
vec_out.assign(matrix.rows, 0.0f);
#pragma omp parallel for
for (int r = 0; r < matrix.rows; ++r) {
float sum = 0.0f;
int row_offset = r * matrix.cols;
float current_scale = matrix.scales[r];
for (int c = 0; c < matrix.cols; ++c) {
int8_t w = matrix.weights[row_offset + c];
float val = vec_in[c].to_float();
// Mathematischer Trick statt IF/ELSE:
// Wenn w = 1 -> (1 * val) - (0 * val) = +val
// Wenn w = -1 -> (0 * val) - (1 * val) = -val
// Wenn w = 0 -> (0 * val) - (0 * val) = 0
float is_pos = static_cast<float>(w == 1);
float is_neg = static_cast<float>(w == -1);
sum += (is_pos * val) - (is_neg * val);
}
vec_out[r] = sum * current_scale;
}
}
void process_fused_qkv(const NatalTernaryTensor& fused_qkv, const std::vector<bf16_t>& hidden_states) {
std::vector<float> qkv_output;
auto start = std::chrono::high_resolution_clock::now();
mat_vec_multiply_branchless(fused_qkv, hidden_states, qkv_output);
auto end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double, std::milli> elapsed = end - start;
std::cout << "[Natal - Performance] Zeit pro Schicht: " << elapsed.count() << " ms.\n";
}
};
int main() {
std::cout << "==================================================\n";
std::cout << " PROJECT NATAL - BENCHMARK RUN (BRANCHLESS)\n";
std::cout << "==================================================\n\n";
NatalEngine engine;
int hidden_dim = 4096;
int fused_rows = hidden_dim * 3;
NatalTernaryTensor mock_qkv;
mock_qkv.name = "layers.0.attention.attn_qkv";
mock_qkv.rows = fused_rows;
mock_qkv.cols = hidden_dim;
mock_qkv.weights.assign(fused_rows * hidden_dim, 0);
mock_qkv.scales.assign(fused_rows, 0.02f);
// Realistischeres Gewichtsrauschen simulieren
for (size_t i = 0; i < mock_qkv.weights.size(); ++i) {
if (i % 5 == 0) mock_qkv.weights[i] = 1;
else if (i % 11 == 0) mock_qkv.weights[i] = -1;
}
std::vector<bf16_t> mock_hidden_states(hidden_dim);
for (int i = 0; i < hidden_dim; ++i) {
mock_hidden_states[i] = bf16_t::from_float(1.0f + std::cos(i * 0.05f));
}
engine.process_fused_qkv(mock_qkv, mock_hidden_states);
return 0;
}