Spaces:

TSXu
/

UniCalli_Dev

Sleeping

TSXu commited on Jan 28

Commit

8bfa41d

1 Parent(s): c322e84

Add dtype parameter to fix CUDA bf16 compatibility issues

Use fp16 instead of bf16 for inference to avoid CUBLAS_STATUS_INVALID_VALUE
errors on GPUs/CUDA versions with limited bf16 support.

Files changed (2) hide show

app.py +1 -0
inference.py +30 -12

app.py CHANGED Viewed

@@ -104,6 +104,7 @@ def init_generator():
             author_descriptions_path='dataset/calligraphy_styles_en.json',
             use_deepspeed=False,
             use_4bit_quantization=False,  # Full precision model
         )
     return generator

             author_descriptions_path='dataset/calligraphy_styles_en.json',
             use_deepspeed=False,
             use_4bit_quantization=False,  # Full precision model
+            dtype="fp16",  # Use fp16 instead of bf16 for better CUDA compatibility
         )
     return generator

inference.py CHANGED Viewed

@@ -150,7 +150,8 @@ class CalligraphyGenerator:
         author_descriptions_path: str = "calligraphy_styles_en.json",
         use_deepspeed: bool = False,
         use_4bit_quantization: bool = False,
-        deepspeed_config: Optional[str] = None
     ):
         """
         Initialize the calligraphy generator
@@ -166,6 +167,7 @@ class CalligraphyGenerator:
             author_descriptions_path: path to author style descriptions JSON
             use_deepspeed: whether to use DeepSpeed ZeRO for memory optimization
             deepspeed_config: path to DeepSpeed config JSON file
         """
         self.device = torch.device(device)
         self.model_name = model_name
@@ -174,6 +176,7 @@ class CalligraphyGenerator:
         self.use_deepspeed = use_deepspeed
         self.deepspeed_config = deepspeed_config
         self.use_4bit_quantization = use_4bit_quantization
         # Load font and author style descriptions
         if os.path.exists(font_descriptions_path):
@@ -343,18 +346,33 @@ class CalligraphyGenerator:
         checkpoint_dtype = first_tensor.dtype
         print(f"Checkpoint dtype: {checkpoint_dtype}")
-        # Use bfloat16 for inference if checkpoint is in bf16/fp16, otherwise keep as is
-        # bfloat16 is preferred for stability, fp16 for speed
-        if checkpoint_dtype in [torch.bfloat16, torch.float16]:
-            target_dtype = checkpoint_dtype
-            print(f"Using {target_dtype} for inference (memory efficient)")
-        else:
-            # Convert to bfloat16 for memory efficiency if available
-            if torch.cuda.is_bf16_supported():
-                target_dtype = torch.bfloat16
-                print(f"Converting checkpoint from {checkpoint_dtype} to bfloat16 for efficiency...")
-                checkpoint = {k: v.to(torch.bfloat16) for k, v in checkpoint.items()}
             else:
                 target_dtype = torch.float16
                 print(f"Converting checkpoint from {checkpoint_dtype} to float16 for efficiency...")
                 checkpoint = {k: v.half() for k, v in checkpoint.items()}

         author_descriptions_path: str = "calligraphy_styles_en.json",
         use_deepspeed: bool = False,
         use_4bit_quantization: bool = False,
+        deepspeed_config: Optional[str] = None,
+        dtype: Optional[str] = None
     ):
         """
         Initialize the calligraphy generator
             author_descriptions_path: path to author style descriptions JSON
             use_deepspeed: whether to use DeepSpeed ZeRO for memory optimization
             deepspeed_config: path to DeepSpeed config JSON file
+            dtype: force specific dtype for inference: "fp16", "bf16", "fp32", or None for auto
         """
         self.device = torch.device(device)
         self.model_name = model_name
         self.use_deepspeed = use_deepspeed
         self.deepspeed_config = deepspeed_config
         self.use_4bit_quantization = use_4bit_quantization
+        self.forced_dtype = dtype  # "fp16", "bf16", "fp32", or None for auto
         # Load font and author style descriptions
         if os.path.exists(font_descriptions_path):
         checkpoint_dtype = first_tensor.dtype
         print(f"Checkpoint dtype: {checkpoint_dtype}")
+        # Check if user forced a specific dtype
+        forced_dtype = getattr(self, 'forced_dtype', None)
+        if forced_dtype:
+            dtype_map = {
+                "fp16": torch.float16,
+                "bf16": torch.bfloat16,
+                "fp32": torch.float32,
+            }
+            if forced_dtype not in dtype_map:
+                print(f"Warning: Unknown dtype '{forced_dtype}', using auto selection")
+                forced_dtype = None
+            else:
+                target_dtype = dtype_map[forced_dtype]
+                print(f"Using forced dtype: {target_dtype}")
+                if checkpoint_dtype != target_dtype:
+                    print(f"Converting checkpoint from {checkpoint_dtype} to {target_dtype}...")
+                    checkpoint = {k: v.to(target_dtype) for k, v in checkpoint.items()}
+        if not forced_dtype:
+            # Use bfloat16 for inference if checkpoint is in bf16/fp16, otherwise keep as is
+            # bfloat16 is preferred for stability, fp16 for speed
+            if checkpoint_dtype in [torch.bfloat16, torch.float16]:
+                target_dtype = checkpoint_dtype
+                print(f"Using {target_dtype} for inference (memory efficient)")
             else:
+                # Convert to float16 for memory efficiency (more compatible than bf16)
+                # bf16 can have CUBLAS issues on some GPUs/CUDA versions
                 target_dtype = torch.float16
                 print(f"Converting checkpoint from {checkpoint_dtype} to float16 for efficiency...")
                 checkpoint = {k: v.half() for k, v in checkpoint.items()}