| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| #ifndef HWY_DISABLED_TARGETS |
| #define HWY_DISABLED_TARGETS HWY_SCALAR |
| #endif |
|
|
| #include "compression/nuq.h" |
|
|
| #include <stddef.h> |
| #include <stdint.h> |
| #include <stdio.h> |
|
|
| #include <algorithm> |
| #include <random> |
|
|
| #include "compression/test_util.h" |
| #include "hwy/aligned_allocator.h" |
| #include "hwy/base.h" |
| #include "hwy/tests/test_util.h" |
| #include "hwy/timer.h" |
|
|
| |
| #undef HWY_TARGET_INCLUDE |
| #define HWY_TARGET_INCLUDE "compression/nuq_test.cc" |
| |
| #include "hwy/foreach_target.h" |
| |
| #include "compression/nuq-inl.h" |
| #include "hwy/highway.h" |
| #include "hwy/tests/hwy_gtest.h" |
| #include "hwy/tests/test_util-inl.h" |
|
|
| HWY_BEFORE_NAMESPACE(); |
| namespace gcpp { |
| namespace HWY_NAMESPACE { |
|
|
| |
| struct TestFlat { |
| template <typename T, class DF> |
| HWY_INLINE void operator()(T , DF df) { |
| |
| if (!(HWY_ONCE && hn::Lanes(df) == hn::Lanes(hn::ScalableTag<float>()))) { |
| return; |
| } |
|
|
| auto in = hwy::AllocateAligned<float>(kGroupSize); |
| HWY_ASSERT(in); |
| for (size_t i = 0; i < kGroupSize; ++i) { |
| in[i] = 0.5f; |
| } |
| ClusterBuf buf; |
| float centers[kClusters]; |
| uint16_t indices[kGroupSize]; |
| const size_t unused_clusters = |
| NuqClustering::ClusterExactL2(df, in.get(), buf, centers, indices); |
| HWY_ASSERT(unused_clusters == kClusters - 1); |
|
|
| for (size_t i = 0; i < unused_clusters; ++i) { |
| HWY_ASSERT(centers[i] == 0.0f); |
| } |
| HWY_ASSERT(centers[unused_clusters] == 0.5f); |
| for (size_t i = 0; i < kGroupSize; ++i) { |
| HWY_ASSERT(indices[i] == unused_clusters); |
| } |
| } |
| }; |
|
|
| void TestAllFlat() { hn::ForGEVectors<64, TestFlat>()(float()); } |
|
|
| |
| struct TestPlateaus { |
| template <typename T, class DF> |
| HWY_INLINE void operator()(T , DF df) { |
| |
| if (!(HWY_ONCE && hn::Lanes(df) == hn::Lanes(hn::ScalableTag<float>()))) { |
| return; |
| } |
|
|
| auto in = hwy::AllocateAligned<float>(kGroupSize); |
| HWY_ASSERT(in); |
|
|
| for (size_t i = 0; i < kGroupSize; ++i) { |
| const size_t idx_cluster = i / (kGroupSize / kClusters); |
| HWY_ASSERT(idx_cluster < kClusters); |
| in[i] = (1.0f * idx_cluster / kClusters) - 0.5f; |
| HWY_ASSERT(-0.5f <= in[i] && in[i] < 0.5f); |
| } |
|
|
| std::random_device rd; |
| std::mt19937 rng(rd()); |
| std::shuffle(in.get(), in.get() + kGroupSize, rng); |
|
|
| ClusterBuf buf; |
| float centers[kClusters]; |
| uint16_t indices[kGroupSize]; |
| const size_t unused_clusters = |
| NuqClustering::ClusterExactL2(df, in.get(), buf, centers, indices); |
| HWY_ASSERT(unused_clusters == 0); |
|
|
| DistortionStats stats; |
| for (size_t i = 0; i < kGroupSize; ++i) { |
| HWY_ASSERT(indices[i] < kClusters); |
| stats.Notify(in[i], centers[indices[i]]); |
| } |
| |
| HWY_ASSERT_EQ(kGroupSize, stats.NumExact()); |
| HWY_ASSERT_EQ(0, stats.NumSignFlip()); |
| HWY_ASSERT_EQ(0, stats.NumRoundedToZero()); |
| HWY_ASSERT_EQ(0.0, stats.SumL1()); |
| HWY_ASSERT_EQ(0.0f, stats.GeomeanValueDivL1()); |
| HWY_ASSERT_EQ(0.0f, stats.WeightedAverageL1()); |
| |
| HWY_ASSERT(gcpp::IsInside(-0.05, 0.05, stats.Original().Mean())); |
| HWY_ASSERT(gcpp::IsNear(0.0, stats.Original().Skewness())); |
| } |
| }; |
|
|
| void TestAllPlateaus() { hn::ForGEVectors<64, TestPlateaus>()(float()); } |
|
|
| struct TestRamp { |
| template <typename T, class DF> |
| HWY_INLINE void operator()(T , DF df) { |
| |
| if (!(HWY_ONCE && hn::Lanes(df) == hn::Lanes(hn::ScalableTag<float>()))) { |
| return; |
| } |
|
|
| auto in = hwy::AllocateAligned<float>(kGroupSize); |
| HWY_ASSERT(in); |
|
|
| for (size_t i = 0; i < kGroupSize; ++i) { |
| in[i] = (1.0f * i / kGroupSize) - 0.45f; |
| HWY_ASSERT(-0.45f <= in[i] && in[i] < 0.55f); |
| } |
|
|
| std::random_device rd; |
| std::mt19937 rng(rd()); |
| std::shuffle(in.get(), in.get() + kGroupSize, rng); |
|
|
| ClusterBuf buf; |
| float centers[kClusters]; |
| uint16_t indices[kGroupSize]; |
| const size_t unused_clusters = |
| NuqClustering::ClusterExactL2(df, in.get(), buf, centers, indices); |
| HWY_ASSERT(unused_clusters == 0); |
|
|
| DistortionStats stats; |
| for (size_t i = 0; i < kGroupSize; ++i) { |
| HWY_ASSERT(indices[i] < kClusters); |
| stats.Notify(in[i], centers[indices[i]]); |
| } |
|
|
| |
| HWY_ASSERT_EQ(0, stats.NumExact()); |
| HWY_ASSERT(stats.NumSignFlip() < 10); |
| HWY_ASSERT_EQ(0, stats.NumRoundedToZero()); |
| HWY_ASSERT_EQ(kGroupSize / kClusters / 4.0, stats.SumL1()); |
| HWY_ASSERT(gcpp::IsInside(17.0, 18.0, stats.GeomeanValueDivL1())); |
| HWY_ASSERT(gcpp::IsInside(0.005, 0.010, stats.WeightedAverageL1())); |
| HWY_ASSERT(stats.L1().Max() <= 0.04f); |
| |
| HWY_ASSERT(gcpp::IsNear(0.05, stats.Original().Mean(), 0.01)); |
| HWY_ASSERT(gcpp::IsNear(0.0, stats.Original().Skewness(), 1E-4)); |
| static_assert(kGroupSize == 256, "Update expected"); |
| } |
| }; |
|
|
| void TestAllRamp() { hn::ForGEVectors<64, TestRamp>()(float()); } |
|
|
| struct TestNormal { |
| template <typename T, class DF> |
| HWY_INLINE void operator()(T , DF df) { |
| auto in = hwy::AllocateAligned<float>(kGroupSize); |
| HWY_ASSERT(in); |
|
|
| hwy::RandomState rng; |
| hwy::Stats in_stats; |
| for (size_t i = 0; i < kGroupSize; ++i) { |
| const double r = RandomGaussian(rng); |
| in_stats.Notify(r); |
| in[i] = hwy::ConvertScalarTo<T>(r); |
| } |
| VerifyGaussian(in_stats); |
|
|
| ClusterBuf buf; |
| float centers[kClusters]; |
| uint16_t indices[kGroupSize]; |
| double elapsed = hwy::HighestValue<double>(); |
| for (size_t rep = 0; rep < 100; ++rep) { |
| const double t0 = hwy::platform::Now(); |
| const size_t unused_clusters = |
| NuqClustering::ClusterExactL2(df, in.get(), buf, centers, indices); |
| HWY_ASSERT(unused_clusters == 0); |
| const double t1 = hwy::platform::Now(); |
| elapsed = HWY_MIN(elapsed, t1 - t0); |
| } |
| fprintf(stderr, "Vec %zu Enc %.2f MB/s\n", Lanes(df) * 4, |
| kGroupSize * sizeof(float) * 1E-6 / elapsed); |
|
|
| DistortionStats stats; |
| for (size_t i = 0; i < kGroupSize; ++i) { |
| HWY_ASSERT(indices[i] < kClusters); |
| stats.Notify(in[i], centers[indices[i]]); |
| } |
|
|
| |
| HWY_ASSERT_EQ(0, stats.NumExact()); |
| HWY_ASSERT(stats.NumSignFlip() < kGroupSize / kClusters); |
| HWY_ASSERT_EQ(0, stats.NumRoundedToZero()); |
| HWY_ASSERT(gcpp::IsInside(5.0, 6.0, stats.SumL1())); |
| HWY_ASSERT(gcpp::IsInside(12.7, 12.8, stats.GeomeanValueDivL1())); |
| HWY_ASSERT(gcpp::IsInside(0.036, 0.037, stats.WeightedAverageL1())); |
| HWY_ASSERT(stats.L1().Max() <= 0.10f); |
| static_assert(kGroupSize == 256, "Update expected"); |
| } |
| }; |
|
|
| void TestAllNormal() { hn::ForGEVectors<64, TestNormal>()(float()); } |
|
|
| |
| struct TestOffset { |
| template <typename T, class D> |
| HWY_INLINE void operator()(T , D d) { |
| const hn::Repartition<float, D> df; |
| const size_t total = 10 * kGroupSize; |
| const size_t kMidLen = 2 * kGroupSize; |
|
|
| auto in = hwy::AllocateAligned<float>(total); |
| auto dec1 = hwy::AllocateAligned<T>(total); |
| auto dec2 = hwy::AllocateAligned<T>(kMidLen); |
| auto nuq = hwy::AllocateAligned<NuqStream>(NuqStream::PackedEnd(total)); |
| HWY_ASSERT(in && dec1 && dec2 && nuq); |
|
|
| hwy::RandomState rng; |
| for (size_t i = 0; i < total; ++i) { |
| in[i] = static_cast<float>(RandomGaussian(rng)); |
| } |
|
|
| |
| ClusterBuf buf; |
| (void)NuqCodec::Enc(df, in.get(), total, buf, total, nuq.get(), 0); |
| NuqCodec::Dec(d, total, nuq.get(), 0, dec1.get(), total); |
|
|
| |
| const size_t offset = 5 * kGroupSize; |
| (void)NuqCodec::Enc(df, in.get(), kMidLen, buf, total, nuq.get(), offset); |
|
|
| |
| NuqCodec::Dec(d, total, nuq.get(), offset, dec2.get(), kMidLen); |
| for (size_t i = 0; i < kMidLen; ++i) { |
| HWY_ASSERT(dec1[i] == dec2[i]); |
| } |
| } |
| }; |
|
|
| void TestAllOffsetF32() { |
| const hn::ForGEVectors<128, TestOffset> test; |
| test(float()); |
| } |
|
|
| void TestAllOffsetBF16() { |
| const hn::ForGEVectors<128, TestOffset> test; |
| test(hwy::bfloat16_t()); |
| } |
|
|
| struct TestStream { |
| template <typename T, class D> |
| HWY_INLINE void operator()(T , D d) { |
| const hn::Repartition<float, D> df; |
| const size_t num = 4 * kGroupSize; |
| auto in = hwy::AllocateAligned<float>(num); |
| auto out = hwy::AllocateAligned<T>(num); |
| auto nuq = hwy::AllocateAligned<NuqStream>(NuqStream::PackedEnd(num)); |
| HWY_ASSERT(in && out && nuq); |
|
|
| hwy::RandomState rng; |
| hwy::Stats in_stats; |
| for (size_t i = 0; i < num; ++i) { |
| in[i] = static_cast<float>(RandomGaussian(rng)); |
| in_stats.Notify(in[i]); |
| } |
| VerifyGaussian(in_stats); |
|
|
| ClusterBuf buf; |
| double elapsed = hwy::HighestValue<double>(); |
| for (size_t rep = 0; rep < 100; ++rep) { |
| const double t0 = hwy::platform::Now(); |
| const size_t unused_clusters = |
| NuqCodec::Enc(df, in.get(), num, buf, num, nuq.get(), 0); |
| HWY_ASSERT(unused_clusters == 0); |
| const double t1 = hwy::platform::Now(); |
| elapsed = HWY_MIN(elapsed, t1 - t0); |
| } |
| fprintf(stderr, "Vec %zu Enc %.2f MB/s\n", Lanes(d) * sizeof(T), |
| num * sizeof(float) * 1E-6 / elapsed); |
|
|
| elapsed = hwy::HighestValue<double>(); |
| for (size_t rep = 0; rep < 100; ++rep) { |
| const double t0 = hwy::platform::Now(); |
| NuqCodec::Dec(d, num, nuq.get(), 0, out.get(), num); |
| const double t1 = hwy::platform::Now(); |
| elapsed = HWY_MIN(elapsed, t1 - t0); |
| } |
| fprintf(stderr, "Vec %zu Dec %.2f MB/s\n", Lanes(d) * sizeof(T), |
| num * sizeof(T) * 1E-6 / elapsed); |
|
|
| DistortionStats stats; |
| for (size_t i = 0; i < num; ++i) { |
| stats.Notify(in[i], hwy::ConvertScalarTo<float>(out[i])); |
| } |
|
|
| |
| HWY_ASSERT_EQ(0, stats.NumExact()); |
| HWY_ASSERT(stats.NumSignFlip() < num / kClusters); |
| HWY_ASSERT_EQ(0, stats.NumRoundedToZero()); |
| HWY_ASSERT(gcpp::IsInside(23.0, 24.0, stats.SumL1())); |
| HWY_ASSERT(gcpp::IsInside(13.0, 13.3, stats.GeomeanValueDivL1())); |
| HWY_ASSERT(gcpp::IsInside(0.034, 0.035, stats.WeightedAverageL1())); |
| HWY_ASSERT(stats.L1().Max() <= 0.11f); |
| static_assert(kGroupSize == 256, "Update expected"); |
| } |
| }; |
|
|
| void TestAllStreamF32() { |
| const hn::ForGEVectors<128, TestStream> test; |
| test(float()); |
| } |
|
|
| void TestAllStreamBF16() { |
| const hn::ForGEVectors<128, TestStream> test; |
| test(hwy::bfloat16_t()); |
| } |
|
|
| struct TestDot { |
| template <typename T, class D> |
| HWY_INLINE void operator()(T , D d) { |
| const hn::Repartition<float, D> df; |
| const size_t num = 4 * kGroupSize; |
| auto in = hwy::AllocateAligned<float>(num); |
| auto dec = hwy::AllocateAligned<float>(num); |
| auto vec = hwy::AllocateAligned<T>(num); |
| auto nuq = hwy::AllocateAligned<NuqStream>(NuqStream::PackedEnd(num)); |
| HWY_ASSERT(in && dec && vec && nuq); |
|
|
| |
| hwy::RandomState rng; |
| hwy::Stats in_stats; |
| for (size_t i = 0; i < num; ++i) { |
| in[i] = static_cast<float>(RandomGaussian(rng)); |
| in_stats.Notify(in[i]); |
| } |
| for (size_t i = 0; i < num; ++i) { |
| const float r = static_cast<float>(RandomGaussian(rng)); |
| in_stats.Notify(r); |
| vec[i] = hwy::ConvertScalarTo<T>(r); |
| } |
| VerifyGaussian(in_stats); |
|
|
| ClusterBuf buf; |
| const size_t unused_clusters = |
| NuqCodec::Enc(df, in.get(), num, buf, num, nuq.get(), 0); |
| HWY_ASSERT(unused_clusters == 0); |
|
|
| |
| float actual = 0.0f; |
| double elapsed = hwy::HighestValue<double>(); |
| for (size_t rep = 0; rep < 20; ++rep) { |
| hn::Vec<decltype(df)> sum0 = hn::Zero(df); |
| hn::Vec<decltype(df)> sum1 = hn::Zero(df); |
| hn::Vec<decltype(df)> sum2 = hn::Zero(df); |
| hn::Vec<decltype(df)> sum3 = hn::Zero(df); |
| const double t0 = hwy::platform::Now(); |
| NuqCodec::Dot(df, num, nuq.get(), 0, vec.get(), num, sum0, sum1, sum2, |
| sum3); |
| const double t1 = hwy::platform::Now(); |
| elapsed = HWY_MIN(elapsed, t1 - t0); |
| sum0 = hn::Add(hn::Add(sum0, sum1), hn::Add(sum2, sum3)); |
| actual = hn::ReduceSum(df, sum0); |
| } |
|
|
| NuqCodec::Dec(df, num, nuq.get(), 0, dec.get(), num); |
| fprintf(stderr, "Vec %zu Dec %.2f MB/s\n", Lanes(d) * sizeof(T), |
| num * sizeof(in[0]) * 1E-6 / elapsed); |
|
|
| |
| float exact = 0.0f; |
| float expected = 0.0f; |
| DistortionStats dec_stats; |
| hwy::Stats ratios; |
| for (size_t i = 0; i < num; ++i) { |
| dec_stats.Notify(in[i], dec[i]); |
| const float v1 = hwy::ConvertScalarTo<float>(vec[i]); |
| exact += in[i] * v1; |
| expected += dec[i] * v1; |
| if (expected != 0.0f) { |
| ratios.Notify(exact / expected); |
| } |
| } |
| const bool isBF = sizeof(T) == 2; |
| const double dec_snr = dec_stats.GeomeanValueDivL1(); |
| const double dec_wl1 = dec_stats.WeightedAverageL1(); |
| const double dot_snr = 1.0 / hwy::ScalarAbs(1.0 - ratios.GeometricMean()); |
| |
| |
| const float final_ratio = HWY_MIN(exact / actual, actual / exact); |
| if (HWY_ONCE) { |
| fprintf(stderr, "ratios %s\n", ratios.ToString().c_str()); |
| fprintf(stderr, |
| "exact %.3f e2 %.4f actual %.4f final_ratio %.3f dec_snr %.2f " |
| "dot_snr %.2f dec_wl1 %.4f\n", |
| exact, expected, actual, final_ratio, dec_snr, dot_snr, dec_wl1); |
| } |
| |
| HWY_ASSERT(gcpp::IsInside(0.88f, 1.0f, final_ratio)); |
| |
| HWY_ASSERT(gcpp::IsNear(expected, actual, 1E-4f)); |
| |
| HWY_ASSERT(dot_snr >= (isBF ? 17.7 : 14.3)); |
|
|
| |
| |
| HWY_ASSERT(gcpp::IsNear(13.1, dec_snr, 0.1)); |
| HWY_ASSERT(gcpp::IsNear(0.034, dec_wl1, 0.001)); |
| HWY_ASSERT(gcpp::IsNear(23.5, dec_stats.SumL1(), 0.1)); |
| HWY_ASSERT(dec_stats.NumSignFlip() < num / kClusters); |
| HWY_ASSERT_EQ(0, dec_stats.NumExact()); |
| HWY_ASSERT_EQ(0, dec_stats.NumRoundedToZero()); |
| HWY_ASSERT_EQ(0.0, dec_stats.SumL1Rounded()); |
| |
| HWY_ASSERT(gcpp::IsInside(0.0f, 2E-5f, dec_stats.L1().Min())); |
| HWY_ASSERT(gcpp::IsInside(0.09f, 0.11f, dec_stats.L1().Max())); |
| HWY_ASSERT(gcpp::IsInside(0.02, 0.03, dec_stats.L1().Mean())); |
| HWY_ASSERT(gcpp::IsInside(1.0, 1.1, dec_stats.L1().Skewness())); |
| HWY_ASSERT(gcpp::IsInside(4.0, 5.0, dec_stats.L1().Kurtosis())); |
| static_assert(kGroupSize == 256, "Update expected*"); |
| } |
| }; |
|
|
| void TestAllDotF32() { |
| const hn::ForGEVectors<128, TestDot> test; |
| test(float()); |
| } |
| void TestAllDotBF16() { |
| const hn::ForGEVectors<128, TestDot> test; |
| test(hwy::bfloat16_t()); |
| } |
|
|
| |
| } |
| } |
| HWY_AFTER_NAMESPACE(); |
|
|
| #if HWY_ONCE |
|
|
| namespace gcpp { |
| HWY_BEFORE_TEST(NuqTest); |
| HWY_EXPORT_AND_TEST_P(NuqTest, TestAllFlat); |
| HWY_EXPORT_AND_TEST_P(NuqTest, TestAllPlateaus); |
| HWY_EXPORT_AND_TEST_P(NuqTest, TestAllRamp); |
| HWY_EXPORT_AND_TEST_P(NuqTest, TestAllNormal); |
| HWY_EXPORT_AND_TEST_P(NuqTest, TestAllOffsetF32); |
| HWY_EXPORT_AND_TEST_P(NuqTest, TestAllOffsetBF16); |
| HWY_EXPORT_AND_TEST_P(NuqTest, TestAllStreamF32); |
| HWY_EXPORT_AND_TEST_P(NuqTest, TestAllStreamBF16); |
| HWY_EXPORT_AND_TEST_P(NuqTest, TestAllDotF32); |
| HWY_EXPORT_AND_TEST_P(NuqTest, TestAllDotBF16); |
| #ifdef HWY_AFTER_TEST |
| HWY_AFTER_TEST(); |
| #endif |
| } |
|
|
| #endif |
|
|