Spaces:

TSXu
/

UniCalli_Dev

Running on Zero

TSXu commited on Jan 28

Commit

9d88d74

1 Parent(s): e7ca422

Use fp32 for inference to fix CUBLAS errors on ZeroGPU

Files changed (2) hide show

app.py CHANGED Viewed

@@ -104,7 +104,7 @@ def init_generator():
             author_descriptions_path='dataset/calligraphy_styles_en.json',
             use_deepspeed=False,
             use_4bit_quantization=False,  # Full precision model
-            dtype="fp16",  # Use fp16 instead of bf16 for better CUDA compatibility
         )
     return generator

             author_descriptions_path='dataset/calligraphy_styles_en.json',
             use_deepspeed=False,
             use_4bit_quantization=False,  # Full precision model
+            dtype="fp32",  # Use fp32 to avoid CUBLAS errors on ZeroGPU
         )
     return generator

inference.py CHANGED Viewed

@@ -365,13 +365,13 @@ class CalligraphyGenerator:
                     checkpoint = {k: v.to(target_dtype) for k, v in checkpoint.items()}
         if not forced_dtype:
-            # Always use fp16 for inference - bf16 has CUDA/CUBLAS compatibility issues
-            target_dtype = torch.float16
-            if checkpoint_dtype != torch.float16:
-                print(f"Converting checkpoint from {checkpoint_dtype} to float16...")
-                checkpoint = {k: v.to(torch.float16) for k, v in checkpoint.items()}
             else:
-                print(f"Using float16 for inference")
         # Load weights into model
         model.load_state_dict(checkpoint, strict=False, assign=True)

                     checkpoint = {k: v.to(target_dtype) for k, v in checkpoint.items()}
         if not forced_dtype:
+            # Always use fp32 for inference - fp16/bf16 have CUDA/CUBLAS compatibility issues on ZeroGPU
+            target_dtype = torch.float32
+            if checkpoint_dtype != torch.float32:
+                print(f"Converting checkpoint from {checkpoint_dtype} to float32...")
+                checkpoint = {k: v.to(torch.float32) for k, v in checkpoint.items()}
             else:
+                print(f"Using float32 for inference")
         # Load weights into model
         model.load_state_dict(checkpoint, strict=False, assign=True)