|
|
|
|
|
|
|
|
import numpy as np |
|
|
import sys |
|
|
import os |
|
|
import argparse |
|
|
from pathlib import Path |
|
|
|
|
|
def calculate_nmse(reference, test): |
|
|
mse = np.mean((test - reference) ** 2) |
|
|
ref_var = np.var(reference) |
|
|
if ref_var == 0: |
|
|
nmse = float('inf') if mse > 0 else 0.0 |
|
|
return mse, mse, ref_var |
|
|
|
|
|
nmse = mse / ref_var |
|
|
|
|
|
return nmse, mse, ref_var |
|
|
|
|
|
def load_logits(file_path): |
|
|
if not os.path.exists(file_path): |
|
|
raise FileNotFoundError(f"File not found: {file_path}") |
|
|
|
|
|
if file_path.suffix == '.npy': |
|
|
return np.load(file_path) |
|
|
elif file_path.suffix == '.bin': |
|
|
return np.fromfile(file_path, dtype=np.float32) |
|
|
else: |
|
|
|
|
|
try: |
|
|
|
|
|
data = [] |
|
|
with open(file_path, 'r') as f: |
|
|
for line in f: |
|
|
if ':' in line: |
|
|
|
|
|
value = float(line.split(':')[1].strip()) |
|
|
else: |
|
|
|
|
|
value = float(line.strip()) |
|
|
data.append(value) |
|
|
return np.array(data, dtype=np.float32) |
|
|
except: |
|
|
return np.loadtxt(file_path, dtype=np.float32) |
|
|
|
|
|
def interpret_nmse(nmse): |
|
|
"""Provide interpretation of NMSE value""" |
|
|
if nmse == 0: |
|
|
return "Perfect match", "π" |
|
|
elif nmse < 1e-6: |
|
|
return "Essentially identical", "β
" |
|
|
elif nmse < 1e-4: |
|
|
return "Excellent match", "β
" |
|
|
elif nmse < 1e-3: |
|
|
return "Very good match", "π" |
|
|
elif nmse < 1e-2: |
|
|
return "Good match", "π" |
|
|
elif nmse < 0.1: |
|
|
return "Acceptable match", "β οΈ" |
|
|
elif nmse < 1.0: |
|
|
return "Poor match", "β" |
|
|
else: |
|
|
return "Very poor match (worse than noise)", "β" |
|
|
|
|
|
def main(): |
|
|
parser = argparse.ArgumentParser(description='Validate model logits') |
|
|
parser.add_argument('-m', '--model-path', required=True, help='Path to the model directory') |
|
|
args = parser.parse_args() |
|
|
|
|
|
model_name = os.path.basename(args.model_path) |
|
|
data_dir = Path("data") |
|
|
|
|
|
pytorch_file = data_dir / f"pytorch-{model_name}.bin" |
|
|
llamacpp_file = data_dir / f"llamacpp-{model_name}.bin" |
|
|
|
|
|
print(f"Model name: {model_name}") |
|
|
print(f"PyTorch logits file: {pytorch_file}") |
|
|
print(f"llama.cpp logits file: {llamacpp_file}") |
|
|
|
|
|
reference_file = pytorch_file |
|
|
test_file = llamacpp_file |
|
|
|
|
|
print("π NMSE Check for Model Comparison") |
|
|
print("=" * 50) |
|
|
print(f"Reference (ground truth): {reference_file}") |
|
|
print(f"Test (to evaluate): {test_file}") |
|
|
print() |
|
|
|
|
|
try: |
|
|
print("Loading reference logits...") |
|
|
reference = load_logits(reference_file) |
|
|
print(f" Shape: {reference.shape}, Type: {reference.dtype}") |
|
|
|
|
|
print("Loading test logits...") |
|
|
test = load_logits(test_file) |
|
|
print(f" Shape: {test.shape}, Type: {test.dtype}") |
|
|
|
|
|
|
|
|
if reference.shape != test.shape: |
|
|
print(f"\nβ Error: Shape mismatch!") |
|
|
print(f" Reference: {reference.shape}") |
|
|
print(f" Test: {test.shape}") |
|
|
sys.exit(1) |
|
|
|
|
|
print(f"\nβ
Shapes match: {reference.shape}") |
|
|
|
|
|
nmse, mse, ref_var = calculate_nmse(reference, test) |
|
|
|
|
|
|
|
|
max_abs_error = np.max(np.abs(test - reference)) |
|
|
mean_abs_error = np.mean(np.abs(test - reference)) |
|
|
|
|
|
|
|
|
print(f"\nπ METRICS") |
|
|
print("=" * 30) |
|
|
print(f"MSE (Mean Squared Error): {mse:.6e}") |
|
|
print(f"Reference Variance: {ref_var:.6e}") |
|
|
print(f"NMSE: {nmse:.6e}") |
|
|
print(f"Max Absolute Error: {max_abs_error:.6f}") |
|
|
print(f"Mean Absolute Error: {mean_abs_error:.6f}") |
|
|
|
|
|
|
|
|
if nmse > 0: |
|
|
nmse_db = 10 * np.log10(nmse) |
|
|
print(f"NMSE (dB): {nmse_db:.2f} dB") |
|
|
|
|
|
|
|
|
interpretation, emoji = interpret_nmse(nmse) |
|
|
print(f"\nπ― INTERPRETATION") |
|
|
print("=" * 30) |
|
|
print(f"{emoji} {interpretation}") |
|
|
|
|
|
|
|
|
print(f"\nπ GUIDANCE") |
|
|
print("=" * 30) |
|
|
if nmse < 1e-3: |
|
|
print("β
EXCELLENT: Your GGML conversion is working very well!") |
|
|
print(" The differences are negligible for practical use.") |
|
|
elif nmse < 1e-2: |
|
|
print("π GOOD: Your GGML conversion is working well.") |
|
|
print(" Small differences are likely due to precision/quantization.") |
|
|
elif nmse < 0.1: |
|
|
print("β οΈ ACCEPTABLE: Conversion is working but with some differences.") |
|
|
print(" Check if you're using quantization (Q4, Q8, etc.)") |
|
|
print(" Test generation quality to see if it's acceptable.") |
|
|
else: |
|
|
print("β PROBLEMATIC: Large differences detected.") |
|
|
print(" Check your conversion process for potential issues.") |
|
|
print(" Verify you're using the same model weights.") |
|
|
|
|
|
|
|
|
print(f"\nπ NMSE BENCHMARKS") |
|
|
print("=" * 30) |
|
|
print("< 1e-6: Essentially identical") |
|
|
print("< 1e-4: Excellent (typical for good conversions)") |
|
|
print("< 1e-3: Very good") |
|
|
print("< 1e-2: Good (acceptable for most use cases)") |
|
|
print("< 0.1: Acceptable (may need verification)") |
|
|
print("> 1.0: Poor (worse than random)") |
|
|
|
|
|
|
|
|
if nmse < 1e-2: |
|
|
print(f"\nβ
RESULT: PASS (NMSE = {nmse:.2e})") |
|
|
sys.exit(0) |
|
|
else: |
|
|
print(f"\nβ RESULT: NEEDS REVIEW (NMSE = {nmse:.2e})") |
|
|
sys.exit(1) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"β Error: {e}") |
|
|
sys.exit(1) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|