Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,8 +8,6 @@ from transformers import AutoTokenizer, AutoModel
|
|
| 8 |
|
| 9 |
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
| 13 |
if torch.cuda.is_available():
|
| 14 |
# Checks if you have an Nvidia GPU.
|
| 15 |
# If so, it will use it for inference.
|
|
@@ -25,7 +23,7 @@ print(f"Using device: {DEVICE}")
|
|
| 25 |
|
| 26 |
|
| 27 |
|
| 28 |
-
PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0
|
| 29 |
try:
|
| 30 |
# Load model and tokenizer
|
| 31 |
TOKENIZER = AutoTokenizer.from_pretrained(
|
|
@@ -81,13 +79,13 @@ def add_gumbel_noise(logits, temperature):
|
|
| 81 |
"""
|
| 82 |
The Gumbel max is a method for sampling categorical distributions.
|
| 83 |
According to arXiv:2409.02908, for MDM, low-precision Gumbel Max improves perplexity score but reduces generation quality.
|
| 84 |
-
Thus, we use
|
| 85 |
"""
|
| 86 |
if temperature <= 0:
|
| 87 |
return logits
|
| 88 |
|
| 89 |
-
logits = logits.to(torch.
|
| 90 |
-
noise = torch.rand_like(logits, dtype=torch.
|
| 91 |
gumbel_noise = (-torch.log(noise)) ** temperature
|
| 92 |
return logits.exp() / gumbel_noise
|
| 93 |
|
|
@@ -249,7 +247,7 @@ def generate_response_with_visualization(
|
|
| 249 |
|
| 250 |
# Calculate confidence scores for remasking
|
| 251 |
if remasking == "low_confidence":
|
| 252 |
-
p = F.softmax(logits.to(torch.
|
| 253 |
x0_p = torch.squeeze(
|
| 254 |
torch.gather(p, dim=-1, index=torch.unsqueeze(x0, -1)), -1
|
| 255 |
) # b, l
|
|
|
|
| 8 |
|
| 9 |
|
| 10 |
|
|
|
|
|
|
|
| 11 |
if torch.cuda.is_available():
|
| 12 |
# Checks if you have an Nvidia GPU.
|
| 13 |
# If so, it will use it for inference.
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
|
| 26 |
+
# PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0
|
| 27 |
try:
|
| 28 |
# Load model and tokenizer
|
| 29 |
TOKENIZER = AutoTokenizer.from_pretrained(
|
|
|
|
| 79 |
"""
|
| 80 |
The Gumbel max is a method for sampling categorical distributions.
|
| 81 |
According to arXiv:2409.02908, for MDM, low-precision Gumbel Max improves perplexity score but reduces generation quality.
|
| 82 |
+
Thus, we use float64.
|
| 83 |
"""
|
| 84 |
if temperature <= 0:
|
| 85 |
return logits
|
| 86 |
|
| 87 |
+
logits = logits.to(torch.float64)
|
| 88 |
+
noise = torch.rand_like(logits, dtype=torch.float64)
|
| 89 |
gumbel_noise = (-torch.log(noise)) ** temperature
|
| 90 |
return logits.exp() / gumbel_noise
|
| 91 |
|
|
|
|
| 247 |
|
| 248 |
# Calculate confidence scores for remasking
|
| 249 |
if remasking == "low_confidence":
|
| 250 |
+
p = F.softmax(logits.to(torch.float64), dim=-1)
|
| 251 |
x0_p = torch.squeeze(
|
| 252 |
torch.gather(p, dim=-1, index=torch.unsqueeze(x0, -1)), -1
|
| 253 |
) # b, l
|