SongGeneration

Runtime error

root commited on Feb 14

Commit

69ad7bf

1 Parent(s): fe9d2df

compatible with L40

Files changed (2) hide show

levo_inference.py CHANGED Viewed

@@ -48,7 +48,7 @@ class LeVoInference(torch.nn.Module):
             enforce_eager=True,
             dtype="bfloat16",
             gpu_memory_utilization=self.cfg.vllm.gpu_memory_utilization,
-            max_num_seqs=32,
             tokenizer=None,
             skip_tokenizer_init=True,
             enable_prompt_embeds=True,

             enforce_eager=True,
             dtype="bfloat16",
             gpu_memory_utilization=self.cfg.vllm.gpu_memory_utilization,
+            max_num_seqs=4,
             tokenizer=None,
             skip_tokenizer_init=True,
             enable_prompt_embeds=True,

vllm_hacked/v1/sample/sampler.py CHANGED Viewed

@@ -205,6 +205,13 @@ class Sampler(nn.Module):
         The various logits processing functions called in this method
         may update the logits tensor in-place.
         """
         assert not (sampling_metadata.all_greedy
                     and sampling_metadata.all_random)
@@ -223,17 +230,10 @@ class Sampler(nn.Module):
         assert sampling_metadata.temperature is not None
-        print("logits.shape:", logits.shape)
         # Apply temperature.
         logits = self.apply_temperature(logits, sampling_metadata.temperature,
                                         sampling_metadata.all_random)
-        if logits.dim() == 1:
-            logits = logits.unsqueeze(0)
-        if logits.size(0) != sampling_metadata.top_k.size(0):
-            target_batch = sampling_metadata.top_k.size(0)
-            logits = logits.expand(target_batch, -1).contiguous()
         # Apply logits processors that only apply to random sampling
         # (argmax invariant)
         for processor in sampling_metadata.logitsprocs.argmax_invariant:

         The various logits processing functions called in this method
         may update the logits tensor in-place.
         """
+        target_batch = sampling_metadata.top_k.size(0)
+        actual_batch = logits.size(0) if logits.dim() > 1 else 1
+        if actual_batch != target_batch:
+            if logits.dim() == 1:
+                logits = logits.unsqueeze(0)
+            logits = logits[0:1, :].expand(target_batch, -1).contiguous()
         assert not (sampling_metadata.all_greedy
                     and sampling_metadata.all_random)
         assert sampling_metadata.temperature is not None
         # Apply temperature.
         logits = self.apply_temperature(logits, sampling_metadata.temperature,
                                         sampling_metadata.all_random)
         # Apply logits processors that only apply to random sampling
         # (argmax invariant)
         for processor in sampling_metadata.logitsprocs.argmax_invariant: