josesho commited on
Commit
96539ec
·
verified ·
1 Parent(s): 9f9407a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -7
app.py CHANGED
@@ -8,8 +8,6 @@ from transformers import AutoTokenizer, AutoModel
8
 
9
 
10
 
11
-
12
-
13
  if torch.cuda.is_available():
14
  # Checks if you have an Nvidia GPU.
15
  # If so, it will use it for inference.
@@ -25,7 +23,7 @@ print(f"Using device: {DEVICE}")
25
 
26
 
27
 
28
- PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0
29
  try:
30
  # Load model and tokenizer
31
  TOKENIZER = AutoTokenizer.from_pretrained(
@@ -81,13 +79,13 @@ def add_gumbel_noise(logits, temperature):
81
  """
82
  The Gumbel max is a method for sampling categorical distributions.
83
  According to arXiv:2409.02908, for MDM, low-precision Gumbel Max improves perplexity score but reduces generation quality.
84
- Thus, we use float32.
85
  """
86
  if temperature <= 0:
87
  return logits
88
 
89
- logits = logits.to(torch.float32)
90
- noise = torch.rand_like(logits, dtype=torch.float32)
91
  gumbel_noise = (-torch.log(noise)) ** temperature
92
  return logits.exp() / gumbel_noise
93
 
@@ -249,7 +247,7 @@ def generate_response_with_visualization(
249
 
250
  # Calculate confidence scores for remasking
251
  if remasking == "low_confidence":
252
- p = F.softmax(logits.to(torch.float32), dim=-1)
253
  x0_p = torch.squeeze(
254
  torch.gather(p, dim=-1, index=torch.unsqueeze(x0, -1)), -1
255
  ) # b, l
 
8
 
9
 
10
 
 
 
11
  if torch.cuda.is_available():
12
  # Checks if you have an Nvidia GPU.
13
  # If so, it will use it for inference.
 
23
 
24
 
25
 
26
+ # PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0
27
  try:
28
  # Load model and tokenizer
29
  TOKENIZER = AutoTokenizer.from_pretrained(
 
79
  """
80
  The Gumbel max is a method for sampling categorical distributions.
81
  According to arXiv:2409.02908, for MDM, low-precision Gumbel Max improves perplexity score but reduces generation quality.
82
+ Thus, we use float64.
83
  """
84
  if temperature <= 0:
85
  return logits
86
 
87
+ logits = logits.to(torch.float64)
88
+ noise = torch.rand_like(logits, dtype=torch.float64)
89
  gumbel_noise = (-torch.log(noise)) ** temperature
90
  return logits.exp() / gumbel_noise
91
 
 
247
 
248
  # Calculate confidence scores for remasking
249
  if remasking == "low_confidence":
250
+ p = F.softmax(logits.to(torch.float64), dim=-1)
251
  x0_p = torch.squeeze(
252
  torch.gather(p, dim=-1, index=torch.unsqueeze(x0, -1)), -1
253
  ) # b, l