| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | #ifndef THIRD_PARTY_GEMMA_CPP_COMPRESSION_ANALYZE_H_ |
| | #define THIRD_PARTY_GEMMA_CPP_COMPRESSION_ANALYZE_H_ |
| |
|
| | #include <stddef.h> |
| | #include <stdint.h> |
| | #include <stdio.h> |
| | #include <string.h> |
| |
|
| | #include <cmath> |
| | #include <cstdlib> |
| | #include <vector> |
| |
|
| | #include "compression/distortion.h" |
| | #include "compression/nuq.h" |
| | #include "hwy/base.h" |
| | #include "hwy/contrib/thread_pool/thread_pool.h" |
| | #include "hwy/stats.h" |
| | #include "hwy/timer.h" |
| |
|
| | #endif |
| |
|
| | |
| | #if defined(THIRD_PARTY_GEMMA_CPP_ANALYZE_TOGGLE) == defined(HWY_TARGET_TOGGLE) |
| | #ifdef THIRD_PARTY_GEMMA_CPP_ANALYZE_TOGGLE |
| | #undef THIRD_PARTY_GEMMA_CPP_ANALYZE_TOGGLE |
| | #else |
| | #define THIRD_PARTY_GEMMA_CPP_ANALYZE_TOGGLE |
| | #endif |
| |
|
| | #include "compression/nuq-inl.h" |
| | #include "compression/sfp-inl.h" |
| | #include "hwy/contrib/sort/vqsort-inl.h" |
| | #include "hwy/highway.h" |
| |
|
| | HWY_BEFORE_NAMESPACE(); |
| | namespace gcpp { |
| | namespace HWY_NAMESPACE { |
| |
|
| | class PerThread { |
| | public: |
| | void NotifyGroup(const float* group) { |
| | hwy::Stats s_group; |
| | for (size_t i = 0; i < kGroupSize; ++i) { |
| | |
| | if (group[i] == 0.0f || group[i] == -0.0f) continue; |
| | s_all_.Notify(group[i]); |
| | s_group.Notify(group[i]); |
| |
|
| | num_tiny_ += std::abs(group[i]) < 1e-3f; |
| |
|
| | |
| | const uint32_t binary32 = |
| | hwy::BitCastScalar<uint32_t>(std::abs(group[i])); |
| |
|
| | |
| | b_exp256_.Notify(binary32 >> 23); |
| | const uint32_t m4 = (binary32 & 0x7FFFFF) >> (23 - 4); |
| | b_m4_.Notify(m4); |
| | } |
| | s_group_ranges_.Notify(s_group.Max() - s_group.Min()); |
| | s_group_mins_.Notify(s_group.Min()); |
| | s_group_maxs_.Notify(s_group.Max()); |
| |
|
| | float desc[kGroupSize]; |
| | memcpy(desc, group, kGroupSize * sizeof(group[0])); |
| | hn::VQSortStatic(desc, kGroupSize, hwy::SortDescending()); |
| |
|
| | |
| | float max_ratio = 0.0f; |
| | for (size_t i = 0; i < kGroupSize; ++i) { |
| | if (desc[i] != 0.0f && desc[i] != -0.0f) { |
| | max_ratio = std::max(max_ratio, std::abs(desc[0] / desc[i])); |
| | } |
| | } |
| | s_group_max_vs_min_.Notify(max_ratio); |
| |
|
| | |
| | float diffs[kGroupSize]; |
| | for (size_t i = 0; i < kGroupSize - 1; ++i) { |
| | |
| | diffs[i] = std::abs(desc[i]) < 1e-5 |
| | ? 0 |
| | : std::abs((desc[i] - desc[i + 1]) / desc[i]); |
| | } |
| | hn::VQSortStatic(diffs, kGroupSize, hwy::SortDescending()); |
| | s_cut15_.Notify(diffs[15]); |
| | } |
| |
|
| | void Assimilate(const PerThread& other) { |
| | num_tiny_ += other.num_tiny_; |
| | s_all_.Assimilate(other.s_all_); |
| | s_group_ranges_.Assimilate(other.s_group_ranges_); |
| | s_group_mins_.Assimilate(other.s_group_mins_); |
| | s_group_maxs_.Assimilate(other.s_group_maxs_); |
| | s_group_max_vs_min_.Assimilate(other.s_group_max_vs_min_); |
| | s_erange_.Assimilate(other.s_erange_); |
| | s_km_1_.Assimilate(other.s_km_1_); |
| | s_km_2_.Assimilate(other.s_km_2_); |
| | s_cut15_.Assimilate(other.s_cut15_); |
| | b_magn100_.Assimilate(other.b_magn100_); |
| | b_exp256_.Assimilate(other.b_exp256_); |
| | b_m4_.Assimilate(other.b_m4_); |
| | } |
| |
|
| | void PrintAll() { |
| | const int skip = hwy::Stats::kNoGeomean; |
| | fprintf(stderr, "num tiny %zu\n", num_tiny_); |
| | fprintf(stderr, "weights %s\n", s_all_.ToString(skip).c_str()); |
| | fprintf(stderr, " ranges %s\n", s_group_ranges_.ToString(skip).c_str()); |
| | fprintf(stderr, " mins %s\n", s_group_mins_.ToString(skip).c_str()); |
| | fprintf(stderr, " maxs %s\n", s_group_maxs_.ToString(skip).c_str()); |
| | fprintf(stderr, " Mvm %s\n", s_group_max_vs_min_.ToString(skip).c_str()); |
| | fprintf(stderr, " cut15 %s\n", s_cut15_.ToString(skip).c_str()); |
| | fprintf(stderr, " erange %s\n", s_erange_.ToString(skip).c_str()); |
| | fprintf(stderr, " km1 %s\n", s_km_1_.ToString(skip).c_str()); |
| | fprintf(stderr, " km2 %s\n", s_km_2_.ToString(skip).c_str()); |
| |
|
| | |
| | |
| | |
| |
|
| | fprintf(stderr, "\n"); |
| | } |
| |
|
| | private: |
| | size_t num_tiny_ = 0; |
| | hwy::Stats s_all_; |
| | hwy::Stats s_group_ranges_; |
| | hwy::Stats s_group_mins_; |
| | hwy::Stats s_group_maxs_; |
| | hwy::Stats s_group_max_vs_min_; |
| | hwy::Stats s_erange_; |
| | hwy::Stats s_km_1_; |
| | hwy::Stats s_km_2_; |
| | hwy::Stats s_cut15_; |
| | hwy::Bins<100> b_magn100_; |
| | hwy::Bins<256> b_exp256_; |
| | hwy::Bins<16> b_m4_; |
| | uint8_t padding_[64]; |
| | }; |
| |
|
| | class PerLayer { |
| | public: |
| | void NotifyGroup(const float* group) { |
| | for (size_t i = 0; i < kGroupSize; ++i) { |
| | s_layer_.Notify(group[i]); |
| | } |
| | } |
| |
|
| | void UpdateOutliers(const float* layer, size_t weights_per_layer) { |
| | const float layer_mean = s_layer_.Mean(); |
| | const float layer_sd = s_layer_.StandardDeviation(); |
| | for (size_t i = 0; i < weights_per_layer; ++i) { |
| | num_outliers_ += |
| | std::abs(std::abs(layer[i]) - layer_mean) >= 3.0f * layer_sd; |
| | } |
| | } |
| |
|
| | const hwy::Stats& GetStats() const { return s_layer_; } |
| | size_t Outliers() const { return num_outliers_; } |
| |
|
| | private: |
| | hwy::Stats s_layer_; |
| | size_t num_outliers_ = 0; |
| | uint8_t padding[64]; |
| | }; |
| |
|
| | static HWY_NOINLINE void Analyze(const char* caption, float* mat, size_t layers, |
| | size_t weights_per_layer, |
| | hwy::ThreadPool& pool) { |
| | std::vector<PerThread> tls; |
| | std::vector<PerLayer> per_layer(layers); |
| | const auto init = [&](size_t num_threads) { |
| | tls.resize(num_threads); |
| | return true; |
| | }; |
| |
|
| | pool.Run(0, static_cast<uint32_t>(layers), init, |
| | [&](uint32_t idx_layer, size_t idx_thread) { |
| | PerThread& self = tls[idx_thread]; |
| | const float* layer = &mat[idx_layer * weights_per_layer]; |
| | |
| | for (size_t group_start = 0; |
| | group_start + kGroupSize <= weights_per_layer; |
| | group_start += kGroupSize) { |
| | const float* group = layer + group_start; |
| | per_layer[idx_layer].NotifyGroup(group); |
| | self.NotifyGroup(group); |
| | } |
| |
|
| | per_layer[idx_layer].UpdateOutliers(layer, weights_per_layer); |
| | }); |
| |
|
| | const int skip = hwy::Stats::kNoGeomean; |
| | fprintf(stderr, "\n------------%s\n", caption); |
| |
|
| | for (size_t i = 1; i < pool.NumThreads(); ++i) { |
| | tls[0].Assimilate(tls[i]); |
| | } |
| | tls[0].PrintAll(); |
| |
|
| | hwy::Stats s_layer_ranges; |
| | hwy::Stats s_layer_outliers; |
| | for (size_t i = 0; i < layers; ++i) { |
| | fprintf(stderr, " %02zu %s\n", i, |
| | per_layer[i].GetStats().ToString(skip).c_str()); |
| | const float range = |
| | per_layer[i].GetStats().Max() - per_layer[i].GetStats().Min(); |
| | s_layer_ranges.Notify(range); |
| | s_layer_outliers.Notify((100.0 * per_layer[i].Outliers()) / |
| | weights_per_layer); |
| | } |
| | fprintf(stderr, "layer outliers%% %s\n", |
| | s_layer_outliers.ToString(skip).c_str()); |
| | fprintf(stderr, "layer ranges %s\n", s_layer_ranges.ToString(skip).c_str()); |
| | } |
| |
|
| | |
| | } |
| | } |
| | HWY_AFTER_NAMESPACE(); |
| |
|
| | #endif |
| |
|