DocUA commited on
Commit
002d07a
·
1 Parent(s): d54528e

fix: Configure CUDA precision to disable BF16 on older GPUs.

Browse files
Files changed (1) hide show
  1. app_hf.py +13 -0
app_hf.py CHANGED
@@ -84,6 +84,19 @@ threading.Thread(target=_warmup_hf_cache, daemon=True).start()
84
  device = "cuda" if torch.cuda.is_available() else "cpu"
85
  dtype = torch.float16 if torch.cuda.is_available() else torch.float32
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  class ModelManager:
88
  def __init__(self):
89
  self.models = {}
 
84
  device = "cuda" if torch.cuda.is_available() else "cpu"
85
  dtype = torch.float16 if torch.cuda.is_available() else torch.float32
86
 
87
+ def _configure_cuda_precision():
88
+ if not torch.cuda.is_available():
89
+ return
90
+ # Avoid BF16 on GPUs that don't support it (sm80+).
91
+ try:
92
+ major, minor = torch.cuda.get_device_capability()
93
+ if (major, minor) < (8, 0):
94
+ torch.backends.cuda.matmul.allow_bf16 = False
95
+ except Exception:
96
+ torch.backends.cuda.matmul.allow_bf16 = False
97
+
98
+ _configure_cuda_precision()
99
+
100
  class ModelManager:
101
  def __init__(self):
102
  self.models = {}