Spaces:

josesho
/

LLaDA-playground

Runtime error

josesho commited on Oct 16

Commit

96539ec

verified ·

1 Parent(s): 9f9407a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,8 +8,6 @@ from transformers import AutoTokenizer, AutoModel
 if torch.cuda.is_available():
     # Checks if you have an Nvidia GPU.
     # If so, it will use it for inference.
@@ -25,7 +23,7 @@ print(f"Using device: {DEVICE}")
-PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0
 try:
     # Load model and tokenizer
     TOKENIZER = AutoTokenizer.from_pretrained(
@@ -81,13 +79,13 @@ def add_gumbel_noise(logits, temperature):
     """
     The Gumbel max is a method for sampling categorical distributions.
     According to arXiv:2409.02908, for MDM, low-precision Gumbel Max improves perplexity score but reduces generation quality.
-    Thus, we use float32.
     """
     if temperature <= 0:
         return logits
-    logits = logits.to(torch.float32)
-    noise = torch.rand_like(logits, dtype=torch.float32)
     gumbel_noise = (-torch.log(noise)) ** temperature
     return logits.exp() / gumbel_noise
@@ -249,7 +247,7 @@ def generate_response_with_visualization(
             # Calculate confidence scores for remasking
             if remasking == "low_confidence":
-                p = F.softmax(logits.to(torch.float32), dim=-1)
                 x0_p = torch.squeeze(
                     torch.gather(p, dim=-1, index=torch.unsqueeze(x0, -1)), -1
                 )  # b, l

 if torch.cuda.is_available():
     # Checks if you have an Nvidia GPU.
     # If so, it will use it for inference.
+# PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0
 try:
     # Load model and tokenizer
     TOKENIZER = AutoTokenizer.from_pretrained(
     """
     The Gumbel max is a method for sampling categorical distributions.
     According to arXiv:2409.02908, for MDM, low-precision Gumbel Max improves perplexity score but reduces generation quality.
+    Thus, we use float64.
     """
     if temperature <= 0:
         return logits
+    logits = logits.to(torch.float64)
+    noise = torch.rand_like(logits, dtype=torch.float64)
     gumbel_noise = (-torch.log(noise)) ** temperature
     return logits.exp() / gumbel_noise
             # Calculate confidence scores for remasking
             if remasking == "low_confidence":
+                p = F.softmax(logits.to(torch.float64), dim=-1)
                 x0_p = torch.squeeze(
                     torch.gather(p, dim=-1, index=torch.unsqueeze(x0, -1)), -1
                 )  # b, l