Spaces:
Running on Zero
Running on Zero
| import torch | |
| import unittest | |
| from src.clip.KleinEncoder import Qwen3_4BModel, KleinCLIP, KleinTokenizer | |
| from src.Model.ModelPatcher import ModelPatcher | |
| from src.Utilities.Quantization import dequantize_nvfp4 | |
| class TestKleinNVFP4(unittest.TestCase): | |
| def test_text_encoder_nvfp4_quantization(self): | |
| # Create a small version of the model for testing | |
| from src.cond.cast import manual_cast as ops | |
| device = torch.device("cpu") | |
| dtype = torch.bfloat16 | |
| # Use our custom Linear that supports comfy_cast_weights | |
| model = torch.nn.Sequential( | |
| ops.Linear(128, 128, bias=False, dtype=dtype, device=device) | |
| ) | |
| # Fill with some identifiable data | |
| with torch.no_grad(): | |
| model[0].weight.copy_(torch.randn(128, 128, dtype=dtype)) | |
| orig_weight = model[0].weight.clone() | |
| # Wrap in ModelPatcher and quantize to nvfp4 | |
| patcher = ModelPatcher(model, device, device) | |
| patcher.weight_only_quantize("nvfp4") | |
| # Verify quantization attributes | |
| self.assertTrue(patcher.model[0].comfy_cast_weights) | |
| self.assertEqual(patcher.model[0].quant_format, "nvfp4") | |
| self.assertTrue(hasattr(patcher.model[0], "weight_scale")) | |
| self.assertTrue(hasattr(patcher.model[0], "weight_scale_2")) | |
| # Verify weight is now packed uint8 | |
| self.assertEqual(patcher.model[0].weight.dtype, torch.uint8) | |
| self.assertEqual(patcher.model[0].weight.shape, (128, 64)) # Packed 4-bit | |
| # Test forward pass (triggers dequantization) | |
| input_tensor = torch.randn(1, 128, dtype=dtype, device=device) | |
| output = patcher.model(input_tensor) | |
| self.assertEqual(output.shape, (1, 128)) | |
| self.assertEqual(output.dtype, dtype) | |
| self.assertFalse(torch.isnan(output).any()) | |
| print("NVFP4 Text Encoder layer test passed!") | |
| if __name__ == "__main__": | |
| unittest.main() | |