| import torch |
| import comfy.model_management |
| import node_helpers |
|
|
| class TextEncodeAceStepAudio: |
| @classmethod |
| def INPUT_TYPES(s): |
| return {"required": { |
| "clip": ("CLIP", ), |
| "tags": ("STRING", {"multiline": True, "dynamicPrompts": True}), |
| "lyrics": ("STRING", {"multiline": True, "dynamicPrompts": True}), |
| "lyrics_strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), |
| }} |
| RETURN_TYPES = ("CONDITIONING",) |
| FUNCTION = "encode" |
|
|
| CATEGORY = "conditioning" |
|
|
| def encode(self, clip, tags, lyrics, lyrics_strength): |
| tokens = clip.tokenize(tags, lyrics=lyrics) |
| conditioning = clip.encode_from_tokens_scheduled(tokens) |
| conditioning = node_helpers.conditioning_set_values(conditioning, {"lyrics_strength": lyrics_strength}) |
| return (conditioning, ) |
|
|
|
|
| class EmptyAceStepLatentAudio: |
| def __init__(self): |
| self.device = comfy.model_management.intermediate_device() |
|
|
| @classmethod |
| def INPUT_TYPES(s): |
| return {"required": {"seconds": ("FLOAT", {"default": 120.0, "min": 1.0, "max": 1000.0, "step": 0.1}), |
| "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096, "tooltip": "The number of latent images in the batch."}), |
| }} |
| RETURN_TYPES = ("LATENT",) |
| FUNCTION = "generate" |
|
|
| CATEGORY = "latent/audio" |
|
|
| def generate(self, seconds, batch_size): |
| length = int(seconds * 44100 / 512 / 8) |
| latent = torch.zeros([batch_size, 8, 16, length], device=self.device) |
| return ({"samples": latent, "type": "audio"}, ) |
|
|
|
|
| NODE_CLASS_MAPPINGS = { |
| "TextEncodeAceStepAudio": TextEncodeAceStepAudio, |
| "EmptyAceStepLatentAudio": EmptyAceStepLatentAudio, |
| } |
|
|