Spaces:
Running
on
A100
Running
on
A100
fix audio stength control
Browse files- acestep/gradio_ui.py +2 -2
- acestep/handler.py +5 -1
acestep/gradio_ui.py
CHANGED
|
@@ -438,8 +438,8 @@ def create_generation_section(dit_handler, llm_handler, init_params=None) -> dic
|
|
| 438 |
maximum=1.0,
|
| 439 |
value=1.0,
|
| 440 |
step=0.01,
|
| 441 |
-
label="
|
| 442 |
-
info="Control how many denoising steps use
|
| 443 |
visible=True
|
| 444 |
)
|
| 445 |
|
|
|
|
| 438 |
maximum=1.0,
|
| 439 |
value=1.0,
|
| 440 |
step=0.01,
|
| 441 |
+
label="LM codes strength",
|
| 442 |
+
info="Control how many denoising steps use LM-generated codes",
|
| 443 |
visible=True
|
| 444 |
)
|
| 445 |
|
acestep/handler.py
CHANGED
|
@@ -1406,6 +1406,9 @@ class AceStepHandler:
|
|
| 1406 |
for seq in non_cover_text_attention_masks
|
| 1407 |
])
|
| 1408 |
|
|
|
|
|
|
|
|
|
|
| 1409 |
# Prepare batch
|
| 1410 |
batch = {
|
| 1411 |
"keys": keys,
|
|
@@ -1690,6 +1693,7 @@ class AceStepHandler:
|
|
| 1690 |
repainting_end=repainting_end,
|
| 1691 |
instructions=instructions,
|
| 1692 |
audio_code_hints=audio_code_hints,
|
|
|
|
| 1693 |
)
|
| 1694 |
|
| 1695 |
processed_data = self.preprocess_batch(batch)
|
|
@@ -1738,7 +1742,7 @@ class AceStepHandler:
|
|
| 1738 |
"silence_latent": self.silence_latent,
|
| 1739 |
"seed": seed_param,
|
| 1740 |
"non_cover_text_hidden_states": non_cover_text_hidden_states,
|
| 1741 |
-
"
|
| 1742 |
"precomputed_lm_hints_25Hz": precomputed_lm_hints_25Hz,
|
| 1743 |
"audio_cover_strength": audio_cover_strength,
|
| 1744 |
"infer_method": infer_method,
|
|
|
|
| 1406 |
for seq in non_cover_text_attention_masks
|
| 1407 |
])
|
| 1408 |
|
| 1409 |
+
if audio_cover_strength < 1.0:
|
| 1410 |
+
assert padded_non_cover_text_input_ids is not None, "When audio_cover_strength < 1.0, padded_non_cover_text_input_ids must not be None"
|
| 1411 |
+
assert padded_non_cover_text_attention_masks is not None, "When audio_cover_strength < 1.0, padded_non_cover_text_attention_masks must not be None"
|
| 1412 |
# Prepare batch
|
| 1413 |
batch = {
|
| 1414 |
"keys": keys,
|
|
|
|
| 1693 |
repainting_end=repainting_end,
|
| 1694 |
instructions=instructions,
|
| 1695 |
audio_code_hints=audio_code_hints,
|
| 1696 |
+
audio_cover_strength=audio_cover_strength,
|
| 1697 |
)
|
| 1698 |
|
| 1699 |
processed_data = self.preprocess_batch(batch)
|
|
|
|
| 1742 |
"silence_latent": self.silence_latent,
|
| 1743 |
"seed": seed_param,
|
| 1744 |
"non_cover_text_hidden_states": non_cover_text_hidden_states,
|
| 1745 |
+
"non_cover_text_attention_mask": non_cover_text_attention_masks,
|
| 1746 |
"precomputed_lm_hints_25Hz": precomputed_lm_hints_25Hz,
|
| 1747 |
"audio_cover_strength": audio_cover_strength,
|
| 1748 |
"infer_method": infer_method,
|