Spaces:

ACE-Step
/

Ace-Step-v1.5

Running on A100

App Files Files Community

ChuxiJ commited on 25 days ago

Commit

bb87271

1 Parent(s): 858eb3e

add shift support

Browse files

Files changed (9) hide show

acestep/gradio_ui/events/__init__.py +6 -0
acestep/gradio_ui/events/generation_handlers.py +17 -9
acestep/gradio_ui/events/results_handlers.py +9 -4
acestep/gradio_ui/i18n/en.json +2 -0
acestep/gradio_ui/i18n/ja.json +2 -0
acestep/gradio_ui/i18n/zh.json +2 -0
acestep/gradio_ui/interfaces/generation.py +10 -0
acestep/handler.py +4 -0
acestep/inference.py +3 -0

acestep/gradio_ui/events/__init__.py CHANGED Viewed

@@ -34,6 +34,7 @@ def setup_event_handlers(demo, dit_handler, llm_handler, dataset_handler, datase
             generation_section["inference_steps"],
             generation_section["guidance_scale"],
             generation_section["use_adg"],
             generation_section["cfg_interval_start"],
             generation_section["cfg_interval_end"],
             generation_section["task_type"],
@@ -235,12 +236,14 @@ def setup_event_handlers(demo, dit_handler, llm_handler, dataset_handler, datase
             generation_section["use_adg"],
             generation_section["cfg_interval_start"],
             generation_section["cfg_interval_end"],
             generation_section["audio_format"],
             generation_section["lm_temperature"],
             generation_section["lm_cfg_scale"],
             generation_section["lm_top_k"],
             generation_section["lm_top_p"],
             generation_section["lm_negative_prompt"],
             generation_section["use_cot_caption"],
             generation_section["use_cot_language"],
             generation_section["audio_cover_strength"],
@@ -250,6 +253,7 @@ def setup_event_handlers(demo, dit_handler, llm_handler, dataset_handler, datase
             generation_section["repainting_end"],
             generation_section["track_name"],
             generation_section["complete_track_classes"],
             results_section["is_format_caption_state"]
         ]
     )
@@ -396,6 +400,7 @@ def setup_event_handlers(demo, dit_handler, llm_handler, dataset_handler, datase
             generation_section["use_adg"],
             generation_section["cfg_interval_start"],
             generation_section["cfg_interval_end"],
             generation_section["audio_format"],
             generation_section["lm_temperature"],
             generation_section["think_checkbox"],
@@ -547,6 +552,7 @@ def setup_event_handlers(demo, dit_handler, llm_handler, dataset_handler, datase
             generation_section["use_adg"],
             generation_section["cfg_interval_start"],
             generation_section["cfg_interval_end"],
             generation_section["audio_format"],
             generation_section["lm_temperature"],
             generation_section["think_checkbox"],

             generation_section["inference_steps"],
             generation_section["guidance_scale"],
             generation_section["use_adg"],
+            generation_section["shift"],
             generation_section["cfg_interval_start"],
             generation_section["cfg_interval_end"],
             generation_section["task_type"],
             generation_section["use_adg"],
             generation_section["cfg_interval_start"],
             generation_section["cfg_interval_end"],
+            generation_section["shift"],
             generation_section["audio_format"],
             generation_section["lm_temperature"],
             generation_section["lm_cfg_scale"],
             generation_section["lm_top_k"],
             generation_section["lm_top_p"],
             generation_section["lm_negative_prompt"],
+            generation_section["use_cot_metas"],  # Added: use_cot_metas
             generation_section["use_cot_caption"],
             generation_section["use_cot_language"],
             generation_section["audio_cover_strength"],
             generation_section["repainting_end"],
             generation_section["track_name"],
             generation_section["complete_track_classes"],
+            generation_section["instrumental_checkbox"],  # Added: instrumental_checkbox
             results_section["is_format_caption_state"]
         ]
     )
             generation_section["use_adg"],
             generation_section["cfg_interval_start"],
             generation_section["cfg_interval_end"],
+            generation_section["shift"],
             generation_section["audio_format"],
             generation_section["lm_temperature"],
             generation_section["think_checkbox"],
             generation_section["use_adg"],
             generation_section["cfg_interval_start"],
             generation_section["cfg_interval_end"],
+            generation_section["shift"],
             generation_section["audio_format"],
             generation_section["lm_temperature"],
             generation_section["think_checkbox"],

acestep/gradio_ui/events/generation_handlers.py CHANGED Viewed

@@ -19,7 +19,7 @@ def load_metadata(file_obj):
     """Load generation parameters from a JSON file"""
     if file_obj is None:
         gr.Warning(t("messages.no_file_selected"))
-        return [None] * 31 + [False]  # Return None for all fields, False for is_format_caption
     try:
         # Read the uploaded file
@@ -74,35 +74,38 @@ def load_metadata(file_obj):
         lm_top_k = metadata.get('lm_top_k', 0)
         lm_top_p = metadata.get('lm_top_p', 0.9)
         lm_negative_prompt = metadata.get('lm_negative_prompt', 'NO USER INPUT')
         use_cot_caption = metadata.get('use_cot_caption', True)
         use_cot_language = metadata.get('use_cot_language', True)
         audio_cover_strength = metadata.get('audio_cover_strength', 1.0)
-        think = metadata.get('think', True)
         audio_codes = metadata.get('audio_codes', '')
         repainting_start = metadata.get('repainting_start', 0.0)
         repainting_end = metadata.get('repainting_end', -1)
         track_name = metadata.get('track_name')
         complete_track_classes = metadata.get('complete_track_classes', [])
         gr.Info(t("messages.params_loaded", filename=os.path.basename(filepath)))
         return (
             task_type, captions, lyrics, vocal_language, bpm, key_scale, time_signature,
             audio_duration, batch_size, inference_steps, guidance_scale, seed, random_seed,
-            use_adg, cfg_interval_start, cfg_interval_end, audio_format,
             lm_temperature, lm_cfg_scale, lm_top_k, lm_top_p, lm_negative_prompt,
-            use_cot_caption, use_cot_language, audio_cover_strength,
             think, audio_codes, repainting_start, repainting_end,
-            track_name, complete_track_classes,
             True  # Set is_format_caption to True when loading from file
         )
     except json.JSONDecodeError as e:
         gr.Warning(t("messages.invalid_json", error=str(e)))
-        return [None] * 31 + [False]
     except Exception as e:
         gr.Warning(t("messages.load_error", error=str(e)))
-        return [None] * 31 + [False]
 def load_random_example(task_type: str):
@@ -282,21 +285,25 @@ def update_model_type_settings(config_path):
     config_path_lower = config_path.lower()
     if "turbo" in config_path_lower:
-        # Turbo model: max 8 steps, hide CFG/ADG, only show text2music/repaint/cover
         return (
             gr.update(value=8, maximum=8, minimum=1),  # inference_steps
             gr.update(visible=False),  # guidance_scale
             gr.update(visible=False),  # use_adg
             gr.update(visible=False),  # cfg_interval_start
             gr.update(visible=False),  # cfg_interval_end
             gr.update(choices=TASK_TYPES_TURBO),  # task_type
         )
     elif "base" in config_path_lower:
-        # Base model: max 100 steps, show CFG/ADG, show all task types
         return (
             gr.update(value=32, maximum=100, minimum=1),  # inference_steps
             gr.update(visible=True),  # guidance_scale
             gr.update(visible=True),  # use_adg
             gr.update(visible=True),  # cfg_interval_start
             gr.update(visible=True),  # cfg_interval_end
             gr.update(choices=TASK_TYPES_BASE),  # task_type
@@ -307,6 +314,7 @@ def update_model_type_settings(config_path):
             gr.update(value=8, maximum=8, minimum=1),
             gr.update(visible=False),
             gr.update(visible=False),
             gr.update(visible=False),
             gr.update(visible=False),
             gr.update(choices=TASK_TYPES_TURBO),  # task_type

     """Load generation parameters from a JSON file"""
     if file_obj is None:
         gr.Warning(t("messages.no_file_selected"))
+        return [None] * 34 + [False]  # Return None for all fields, False for is_format_caption
     try:
         # Read the uploaded file
         lm_top_k = metadata.get('lm_top_k', 0)
         lm_top_p = metadata.get('lm_top_p', 0.9)
         lm_negative_prompt = metadata.get('lm_negative_prompt', 'NO USER INPUT')
+        use_cot_metas = metadata.get('use_cot_metas', True)  # Added: read use_cot_metas
         use_cot_caption = metadata.get('use_cot_caption', True)
         use_cot_language = metadata.get('use_cot_language', True)
         audio_cover_strength = metadata.get('audio_cover_strength', 1.0)
+        think = metadata.get('thinking', True)  # Fixed: read 'thinking' not 'think'
         audio_codes = metadata.get('audio_codes', '')
         repainting_start = metadata.get('repainting_start', 0.0)
         repainting_end = metadata.get('repainting_end', -1)
         track_name = metadata.get('track_name')
         complete_track_classes = metadata.get('complete_track_classes', [])
+        shift = metadata.get('shift', 3.0)  # Default 3.0 for base models
+        instrumental = metadata.get('instrumental', False)  # Added: read instrumental
         gr.Info(t("messages.params_loaded", filename=os.path.basename(filepath)))
         return (
             task_type, captions, lyrics, vocal_language, bpm, key_scale, time_signature,
             audio_duration, batch_size, inference_steps, guidance_scale, seed, random_seed,
+            use_adg, cfg_interval_start, cfg_interval_end, shift, audio_format,
             lm_temperature, lm_cfg_scale, lm_top_k, lm_top_p, lm_negative_prompt,
+            use_cot_metas, use_cot_caption, use_cot_language, audio_cover_strength,
             think, audio_codes, repainting_start, repainting_end,
+            track_name, complete_track_classes, instrumental,
             True  # Set is_format_caption to True when loading from file
         )
     except json.JSONDecodeError as e:
         gr.Warning(t("messages.invalid_json", error=str(e)))
+        return [None] * 34 + [False]
     except Exception as e:
         gr.Warning(t("messages.load_error", error=str(e)))
+        return [None] * 34 + [False]
 def load_random_example(task_type: str):
     config_path_lower = config_path.lower()
     if "turbo" in config_path_lower:
+        # Turbo model: max 8 steps, hide CFG/ADG/shift, only show text2music/repaint/cover
+        # Shift is not effective for turbo models, default to 1.0
         return (
             gr.update(value=8, maximum=8, minimum=1),  # inference_steps
             gr.update(visible=False),  # guidance_scale
             gr.update(visible=False),  # use_adg
+            gr.update(value=1.0, visible=False),  # shift (not effective for turbo)
             gr.update(visible=False),  # cfg_interval_start
             gr.update(visible=False),  # cfg_interval_end
             gr.update(choices=TASK_TYPES_TURBO),  # task_type
         )
     elif "base" in config_path_lower:
+        # Base model: max 100 steps, show CFG/ADG/shift, show all task types
+        # Shift range 1.0~5.0, default 3.0 for base models
         return (
             gr.update(value=32, maximum=100, minimum=1),  # inference_steps
             gr.update(visible=True),  # guidance_scale
             gr.update(visible=True),  # use_adg
+            gr.update(value=3.0, visible=True),  # shift (effective for base, default 3.0)
             gr.update(visible=True),  # cfg_interval_start
             gr.update(visible=True),  # cfg_interval_end
             gr.update(choices=TASK_TYPES_BASE),  # task_type
             gr.update(value=8, maximum=8, minimum=1),
             gr.update(visible=False),
             gr.update(visible=False),
+            gr.update(value=1.0, visible=False),  # shift default 1.0
             gr.update(visible=False),
             gr.update(visible=False),
             gr.update(choices=TASK_TYPES_TURBO),  # task_type

acestep/gradio_ui/events/results_handlers.py CHANGED Viewed

@@ -267,7 +267,7 @@ def generate_with_progress(
     reference_audio, audio_duration, batch_size_input, src_audio,
     text2music_audio_code_string, repainting_start, repainting_end,
     instruction_display_gen, audio_cover_strength, task_type,
-    use_adg, cfg_interval_start, cfg_interval_end, audio_format, lm_temperature,
     think_checkbox, lm_cfg_scale, lm_top_k, lm_top_p, lm_negative_prompt,
     use_cot_metas, use_cot_caption, use_cot_language, is_format_caption,
     constrained_decoding_debug,
@@ -300,6 +300,7 @@ def generate_with_progress(
         use_adg=use_adg,
         cfg_interval_start=cfg_interval_start,
         cfg_interval_end=cfg_interval_end,
         repainting_start=repainting_start,
         repainting_end=repainting_end,
         audio_cover_strength=audio_cover_strength,
@@ -650,7 +651,7 @@ def capture_current_params(
     reference_audio, audio_duration, batch_size_input, src_audio,
     text2music_audio_code_string, repainting_start, repainting_end,
     instruction_display_gen, audio_cover_strength, task_type,
-    use_adg, cfg_interval_start, cfg_interval_end, audio_format, lm_temperature,
     think_checkbox, lm_cfg_scale, lm_top_k, lm_top_p, lm_negative_prompt,
     use_cot_metas, use_cot_caption, use_cot_language,
     constrained_decoding_debug, allow_lm_batch, auto_score, score_scale, lm_batch_chunk_size,
@@ -686,6 +687,7 @@ def capture_current_params(
         "use_adg": use_adg,
         "cfg_interval_start": cfg_interval_start,
         "cfg_interval_end": cfg_interval_end,
         "audio_format": audio_format,
         "lm_temperature": lm_temperature,
         "think_checkbox": think_checkbox,
@@ -713,7 +715,7 @@ def generate_with_batch_management(
     reference_audio, audio_duration, batch_size_input, src_audio,
     text2music_audio_code_string, repainting_start, repainting_end,
     instruction_display_gen, audio_cover_strength, task_type,
-    use_adg, cfg_interval_start, cfg_interval_end, audio_format, lm_temperature,
     think_checkbox, lm_cfg_scale, lm_top_k, lm_top_p, lm_negative_prompt,
     use_cot_metas, use_cot_caption, use_cot_language, is_format_caption,
     constrained_decoding_debug,
@@ -741,7 +743,7 @@ def generate_with_batch_management(
         reference_audio, audio_duration, batch_size_input, src_audio,
         text2music_audio_code_string, repainting_start, repainting_end,
         instruction_display_gen, audio_cover_strength, task_type,
-        use_adg, cfg_interval_start, cfg_interval_end, audio_format, lm_temperature,
         think_checkbox, lm_cfg_scale, lm_top_k, lm_top_p, lm_negative_prompt,
         use_cot_metas, use_cot_caption, use_cot_language, is_format_caption,
         constrained_decoding_debug,
@@ -811,6 +813,7 @@ def generate_with_batch_management(
         "use_adg": use_adg,
         "cfg_interval_start": cfg_interval_start,
         "cfg_interval_end": cfg_interval_end,
         "audio_format": audio_format,
         "lm_temperature": lm_temperature,
         "think_checkbox": think_checkbox,
@@ -964,6 +967,7 @@ def generate_next_batch_background(
         params.setdefault("use_adg", False)
         params.setdefault("cfg_interval_start", 0.0)
         params.setdefault("cfg_interval_end", 1.0)
         params.setdefault("audio_format", "mp3")
         params.setdefault("lm_temperature", 0.85)
         params.setdefault("think_checkbox", True)
@@ -1010,6 +1014,7 @@ def generate_next_batch_background(
             use_adg=params.get("use_adg"),
             cfg_interval_start=params.get("cfg_interval_start"),
             cfg_interval_end=params.get("cfg_interval_end"),
             audio_format=params.get("audio_format"),
             lm_temperature=params.get("lm_temperature"),
             think_checkbox=params.get("think_checkbox"),

     reference_audio, audio_duration, batch_size_input, src_audio,
     text2music_audio_code_string, repainting_start, repainting_end,
     instruction_display_gen, audio_cover_strength, task_type,
+    use_adg, cfg_interval_start, cfg_interval_end, shift, audio_format, lm_temperature,
     think_checkbox, lm_cfg_scale, lm_top_k, lm_top_p, lm_negative_prompt,
     use_cot_metas, use_cot_caption, use_cot_language, is_format_caption,
     constrained_decoding_debug,
         use_adg=use_adg,
         cfg_interval_start=cfg_interval_start,
         cfg_interval_end=cfg_interval_end,
+        shift=shift,
         repainting_start=repainting_start,
         repainting_end=repainting_end,
         audio_cover_strength=audio_cover_strength,
     reference_audio, audio_duration, batch_size_input, src_audio,
     text2music_audio_code_string, repainting_start, repainting_end,
     instruction_display_gen, audio_cover_strength, task_type,
+    use_adg, cfg_interval_start, cfg_interval_end, shift, audio_format, lm_temperature,
     think_checkbox, lm_cfg_scale, lm_top_k, lm_top_p, lm_negative_prompt,
     use_cot_metas, use_cot_caption, use_cot_language,
     constrained_decoding_debug, allow_lm_batch, auto_score, score_scale, lm_batch_chunk_size,
         "use_adg": use_adg,
         "cfg_interval_start": cfg_interval_start,
         "cfg_interval_end": cfg_interval_end,
+        "shift": shift,
         "audio_format": audio_format,
         "lm_temperature": lm_temperature,
         "think_checkbox": think_checkbox,
     reference_audio, audio_duration, batch_size_input, src_audio,
     text2music_audio_code_string, repainting_start, repainting_end,
     instruction_display_gen, audio_cover_strength, task_type,
+    use_adg, cfg_interval_start, cfg_interval_end, shift, audio_format, lm_temperature,
     think_checkbox, lm_cfg_scale, lm_top_k, lm_top_p, lm_negative_prompt,
     use_cot_metas, use_cot_caption, use_cot_language, is_format_caption,
     constrained_decoding_debug,
         reference_audio, audio_duration, batch_size_input, src_audio,
         text2music_audio_code_string, repainting_start, repainting_end,
         instruction_display_gen, audio_cover_strength, task_type,
+        use_adg, cfg_interval_start, cfg_interval_end, shift, audio_format, lm_temperature,
         think_checkbox, lm_cfg_scale, lm_top_k, lm_top_p, lm_negative_prompt,
         use_cot_metas, use_cot_caption, use_cot_language, is_format_caption,
         constrained_decoding_debug,
         "use_adg": use_adg,
         "cfg_interval_start": cfg_interval_start,
         "cfg_interval_end": cfg_interval_end,
+        "shift": shift,
         "audio_format": audio_format,
         "lm_temperature": lm_temperature,
         "think_checkbox": think_checkbox,
         params.setdefault("use_adg", False)
         params.setdefault("cfg_interval_start", 0.0)
         params.setdefault("cfg_interval_end", 1.0)
+        params.setdefault("shift", 1.0)
         params.setdefault("audio_format", "mp3")
         params.setdefault("lm_temperature", 0.85)
         params.setdefault("think_checkbox", True)
             use_adg=params.get("use_adg"),
             cfg_interval_start=params.get("cfg_interval_start"),
             cfg_interval_end=params.get("cfg_interval_end"),
+            shift=params.get("shift"),
             audio_format=params.get("audio_format"),
             lm_temperature=params.get("lm_temperature"),
             think_checkbox=params.get("think_checkbox"),

acestep/gradio_ui/i18n/en.json CHANGED Viewed

@@ -116,6 +116,8 @@
     "audio_format_info": "Audio format for saved files",
     "use_adg_label": "Use ADG",
     "use_adg_info": "Enable Angle Domain Guidance",
     "cfg_interval_start": "CFG Interval Start",
     "cfg_interval_end": "CFG Interval End",
     "lm_params_title": "🤖 LM Generation Parameters",

     "audio_format_info": "Audio format for saved files",
     "use_adg_label": "Use ADG",
     "use_adg_info": "Enable Angle Domain Guidance",
+    "shift_label": "Shift",
+    "shift_info": "Timestep shift factor for base models (range 1.0~5.0, default 3.0). Not effective for turbo models.",
     "cfg_interval_start": "CFG Interval Start",
     "cfg_interval_end": "CFG Interval End",
     "lm_params_title": "🤖 LM Generation Parameters",

acestep/gradio_ui/i18n/ja.json CHANGED Viewed

@@ -116,6 +116,8 @@
     "audio_format_info": "保存ファイルのオーディオフォーマット",
     "use_adg_label": "ADG を使用",
     "use_adg_info": "角度ドメインガイダンスを有効化",
     "cfg_interval_start": "CFG 間隔開始",
     "cfg_interval_end": "CFG 間隔終了",
     "lm_params_title": "🤖 LM 生成パラメータ",

     "audio_format_info": "保存ファイルのオーディオフォーマット",
     "use_adg_label": "ADG を使用",
     "use_adg_info": "角度ドメインガイダンスを有効化",
+    "shift_label": "シフト",
+    "shift_info": "baseモデル用タイムステップシフト係数 (範囲 1.0~5.0、デフォルト 3.0)。turboモデルには無効。",
     "cfg_interval_start": "CFG 間隔開始",
     "cfg_interval_end": "CFG 間隔終了",
     "lm_params_title": "🤖 LM 生成パラメータ",

acestep/gradio_ui/i18n/zh.json CHANGED Viewed

@@ -116,6 +116,8 @@
     "audio_format_info": "保存文件的音频格式",
     "use_adg_label": "使用 ADG",
     "use_adg_info": "启用角域引导",
     "cfg_interval_start": "CFG 间隔开始",
     "cfg_interval_end": "CFG 间隔结束",
     "lm_params_title": "🤖 LM 生成参数",

     "audio_format_info": "保存文件的音频格式",
     "use_adg_label": "使用 ADG",
     "use_adg_info": "启用角域引导",
+    "shift_label": "Shift",
+    "shift_info": "时间步偏移因子，仅对 base 模型生效 (范围 1.0~5.0，默认 3.0)。对 turbo 模型无效。",
     "cfg_interval_start": "CFG 间隔开始",
     "cfg_interval_end": "CFG 间隔结束",
     "lm_params_title": "🤖 LM 生成参数",

acestep/gradio_ui/interfaces/generation.py CHANGED Viewed

@@ -436,6 +436,15 @@ def create_generation_section(dit_handler, llm_handler, init_params=None, langua
                     info=t("generation.use_adg_info"),
                     visible=False
                 )
             with gr.Row():
                 cfg_interval_start = gr.Slider(
@@ -649,6 +658,7 @@ def create_generation_section(dit_handler, llm_handler, init_params=None, langua
         "use_adg": use_adg,
         "cfg_interval_start": cfg_interval_start,
         "cfg_interval_end": cfg_interval_end,
         "audio_format": audio_format,
         "output_alignment_preference": output_alignment_preference,
         "think_checkbox": think_checkbox,

                     info=t("generation.use_adg_info"),
                     visible=False
                 )
+                shift = gr.Slider(
+                    minimum=1.0,
+                    maximum=5.0,
+                    value=3.0,
+                    step=0.1,
+                    label=t("generation.shift_label"),
+                    info=t("generation.shift_info"),
+                    visible=False
+                )
             with gr.Row():
                 cfg_interval_start = gr.Slider(
         "use_adg": use_adg,
         "cfg_interval_start": cfg_interval_start,
         "cfg_interval_end": cfg_interval_end,
+        "shift": shift,
         "audio_format": audio_format,
         "output_alignment_preference": output_alignment_preference,
         "think_checkbox": think_checkbox,

acestep/handler.py CHANGED Viewed

@@ -1785,6 +1785,7 @@ class AceStepHandler:
         use_adg: bool = False,
         cfg_interval_start: float = 0.0,
         cfg_interval_end: float = 1.0,
         audio_code_hints: Optional[Union[str, List[str]]] = None,
         infer_method: str = "ode",
     ) -> Dict[str, Any]:
@@ -1948,6 +1949,7 @@ class AceStepHandler:
             "use_adg": use_adg,
             "cfg_interval_start": cfg_interval_start,
             "cfg_interval_end": cfg_interval_end,
         }
         logger.info("[service_generate] Generating audio...")
         with self._load_model_context("model"):
@@ -2055,6 +2057,7 @@ class AceStepHandler:
         use_adg: bool = False,
         cfg_interval_start: float = 0.0,
         cfg_interval_end: float = 1.0,
         use_tiled_decode: bool = True,
         progress=None
     ) -> Dict[str, Any]:
@@ -2202,6 +2205,7 @@ class AceStepHandler:
                 use_adg=use_adg,  # Pass use_adg parameter
                 cfg_interval_start=cfg_interval_start,  # Pass CFG interval start
                 cfg_interval_end=cfg_interval_end,  # Pass CFG interval end
                 audio_code_hints=audio_code_hints_batch,  # Pass audio code hints as list
                 return_intermediate=should_return_intermediate
             )

         use_adg: bool = False,
         cfg_interval_start: float = 0.0,
         cfg_interval_end: float = 1.0,
+        shift: float = 1.0,
         audio_code_hints: Optional[Union[str, List[str]]] = None,
         infer_method: str = "ode",
     ) -> Dict[str, Any]:
             "use_adg": use_adg,
             "cfg_interval_start": cfg_interval_start,
             "cfg_interval_end": cfg_interval_end,
+            "shift": shift,
         }
         logger.info("[service_generate] Generating audio...")
         with self._load_model_context("model"):
         use_adg: bool = False,
         cfg_interval_start: float = 0.0,
         cfg_interval_end: float = 1.0,
+        shift: float = 1.0,
         use_tiled_decode: bool = True,
         progress=None
     ) -> Dict[str, Any]:
                 use_adg=use_adg,  # Pass use_adg parameter
                 cfg_interval_start=cfg_interval_start,  # Pass CFG interval start
                 cfg_interval_end=cfg_interval_end,  # Pass CFG interval end
+                shift=shift,  # Pass shift parameter
                 audio_code_hints=audio_code_hints_batch,  # Pass audio code hints as list
                 return_intermediate=should_return_intermediate
             )

acestep/inference.py CHANGED Viewed

@@ -42,6 +42,7 @@ class GenerationParams:
         use_adg: Whether to use Adaptive Dual Guidance (only works for base model).
         cfg_interval_start: Start ratio (0.0–1.0) to apply CFG.
         cfg_interval_end: End ratio (0.0–1.0) to apply CFG.
         # Task-Specific Parameters
         task_type: Type of generation task. One of: "text2music", "cover", "repaint", "lego", "extract", "complete".
@@ -94,6 +95,7 @@ class GenerationParams:
     use_adg: bool = False
     cfg_interval_start: float = 0.0
     cfg_interval_end: float = 1.0
     repainting_start: float = 0.0
     repainting_end: float = -1
@@ -485,6 +487,7 @@ def generate_music(
             use_adg=params.use_adg,
             cfg_interval_start=params.cfg_interval_start,
             cfg_interval_end=params.cfg_interval_end,
             progress=progress,
         )

         use_adg: Whether to use Adaptive Dual Guidance (only works for base model).
         cfg_interval_start: Start ratio (0.0–1.0) to apply CFG.
         cfg_interval_end: End ratio (0.0–1.0) to apply CFG.
+        shift: Timestep shift factor (default 1.0). When != 1.0, applies t = shift * t / (1 + (shift - 1) * t) to timesteps.
         # Task-Specific Parameters
         task_type: Type of generation task. One of: "text2music", "cover", "repaint", "lego", "extract", "complete".
     use_adg: bool = False
     cfg_interval_start: float = 0.0
     cfg_interval_end: float = 1.0
+    shift: float = 1.0
     repainting_start: float = 0.0
     repainting_end: float = -1
             use_adg=params.use_adg,
             cfg_interval_start=params.cfg_interval_start,
             cfg_interval_end=params.cfg_interval_end,
+            shift=params.shift,
             progress=progress,
         )