Spaces:

adowu
/

ACE-Step-Training

Sleeping

App Files Files Community

ACE-Step Custom commited on Feb 8

Commit

4709141

1 Parent(s): a602628

Fix engine initialization and audio I/O components

Browse files

Files changed (3) hide show

app.py +70 -25
src/ace_step_engine.py +25 -12
src/utils.py +7 -1

app.py CHANGED Viewed

@@ -24,11 +24,39 @@ from src.utils import setup_logging, load_config
 logger = setup_logging()
 config = load_config()
-# Initialize components
-ace_engine = ACEStepEngine(config)
-timeline_manager = TimelineManager(config)
-lora_trainer = LoRATrainer(config)
-audio_processor = AudioProcessor(config)
 # ==================== TAB 1: STANDARD ACE-STEP GUI ====================
@@ -49,8 +77,11 @@ def standard_generate(
     try:
         logger.info(f"Standard generation: {prompt[:50]}...")
         # Generate audio
-        audio_path = ace_engine.generate(
             prompt=prompt,
             lyrics=lyrics,
             duration=duration,
@@ -73,7 +104,7 @@ def standard_generate(
 def standard_variation(audio_path: str, variation_strength: float) -> Tuple[str, str]:
     """Generate variation of existing audio."""
     try:
-        result = ace_engine.generate_variation(audio_path, variation_strength)
         return result, "✅ Variation generated"
     except Exception as e:
         return None, f"❌ Error: {str(e)}"
@@ -88,7 +119,7 @@ def standard_repaint(
 ) -> Tuple[str, str]:
     """Repaint specific section of audio."""
     try:
-        result = ace_engine.repaint(audio_path, start_time, end_time, new_prompt)
         return result, f"✅ Repainted {start_time}s-{end_time}s"
     except Exception as e:
         return None, f"❌ Error: {str(e)}"
@@ -101,7 +132,7 @@ def standard_lyric_edit(
 ) -> Tuple[str, str]:
     """Edit lyrics while maintaining music."""
     try:
-        result = ace_engine.edit_lyrics(audio_path, new_lyrics)
         return result, "✅ Lyrics edited"
     except Exception as e:
         return None, f"❌ Error: {str(e)}"
@@ -130,14 +161,19 @@ def timeline_generate(
         logger.info(f"Timeline generation with {context_length}s context")
         # Get context from timeline
-        context_audio = timeline_manager.get_context(
             session_state.get("timeline_id"),
             context_length
         )
         # Generate 32s clip
-        clip = ace_engine.generate_clip(
             prompt=prompt,
             lyrics=lyrics,
             duration=32,
@@ -148,15 +184,15 @@ def timeline_generate(
         )
         # Blend with timeline (2s lead-in and lead-out)
-        blended_clip = audio_processor.blend_clip(
             clip,
-            timeline_manager.get_last_clip(session_state.get("timeline_id")),
             lead_in=2.0,
             lead_out=2.0
         )
         # Add to timeline
-        timeline_id = timeline_manager.add_clip(
             session_state.get("timeline_id"),
             blended_clip,
             metadata={
@@ -171,12 +207,12 @@ def timeline_generate(
         session_state["total_clips"] = session_state.get("total_clips", 0) + 1
         # Get full timeline audio
-        full_audio = timeline_manager.export_timeline(timeline_id)
         # Get timeline visualization
-        timeline_viz = timeline_manager.visualize_timeline(timeline_id)
-        info = f"✅ Clip {session_state['total_clips']} added • Total: {timeline_manager.get_duration(timeline_id):.1f}s"
         return blended_clip, full_audio, timeline_viz, session_state, info
@@ -210,16 +246,17 @@ def timeline_inpaint(
         if session_state is None:
             session_state = {"timeline_id": None, "total_clips": 0}
         timeline_id = session_state.get("timeline_id")
-        result = timeline_manager.inpaint_region(
             timeline_id,
             start_time,
             end_time,
             new_prompt
         )
-        full_audio = timeline_manager.export_timeline(timeline_id)
-        timeline_viz = timeline_manager.visualize_timeline(timeline_id)
         info = f"✅ Inpainted {start_time:.1f}s-{end_time:.1f}s"
         return full_audio, timeline_viz, session_state, info
@@ -234,7 +271,7 @@ def timeline_reset(session_state: dict) -> Tuple[None, None, str, dict]:
     if session_state is None:
         session_state = {"timeline_id": None, "total_clips": 0}
     elif session_state.get("timeline_id"):
-        timeline_manager.delete_timeline(session_state["timeline_id"])
     session_state = {"timeline_id": None, "total_clips": 0}
     return None, None, "Timeline cleared", session_state
@@ -245,7 +282,7 @@ def timeline_reset(session_state: dict) -> Tuple[None, None, str, dict]:
 def lora_upload_files(files: List[str]) -> str:
     """Upload and prepare audio files for LoRA training."""
     try:
-        prepared_files = lora_trainer.prepare_dataset(files)
         return f"✅ Prepared {len(prepared_files)} files for training"
     except Exception as e:
         return f"❌ Error: {str(e)}"
@@ -365,7 +402,15 @@ def create_ui():
                         std_generate_btn = gr.Button("🎵 Generate", variant="primary", size="lg")
                     with gr.Column():
-                        std_audio_out = gr.Audio(label="Generated Audio", type="filepath")
                         std_info = gr.Textbox(label="Status", lines=2)
                         gr.Markdown("### Advanced Controls")
@@ -460,8 +505,8 @@ def create_ui():
                         tl_info = gr.Textbox(label="Status", lines=2)
                     with gr.Column():
-                        tl_clip_audio = gr.Audio(label="Latest Clip", type="filepath")
-                        tl_full_audio = gr.Audio(label="Full Timeline", type="filepath")
                         tl_timeline_viz = gr.Image(label="Timeline Visualization")
                         with gr.Accordion("🎨 Inpaint Timeline Region", open=False):

 logger = setup_logging()
 config = load_config()
+# Lazy initialize components (will be initialized on first use)
+ace_engine = None
+timeline_manager = None
+lora_trainer = None
+audio_processor = None
+def get_ace_engine():
+    """Lazy-load ACE-Step engine."""
+    global ace_engine
+    if ace_engine is None:
+        ace_engine = ACEStepEngine(config)
+    return ace_engine
+def get_timeline_manager():
+    """Lazy-load timeline manager."""
+    global timeline_manager
+    if timeline_manager is None:
+        timeline_manager = TimelineManager(config)
+    return timeline_manager
+def get_lora_trainer():
+    """Lazy-load LoRA trainer."""
+    global lora_trainer
+    if lora_trainer is None:
+        lora_trainer = LoRATrainer(config)
+    return lora_trainer
+def get_audio_processor():
+    """Lazy-load audio processor."""
+    global audio_processor
+    if audio_processor is None:
+        audio_processor = AudioProcessor(config)
+    return audio_processor
 # ==================== TAB 1: STANDARD ACE-STEP GUI ====================
     try:
         logger.info(f"Standard generation: {prompt[:50]}...")
+        # Get engine instance
+        engine = get_ace_engine()
         # Generate audio
+        audio_path = engine.generate(
             prompt=prompt,
             lyrics=lyrics,
             duration=duration,
 def standard_variation(audio_path: str, variation_strength: float) -> Tuple[str, str]:
     """Generate variation of existing audio."""
     try:
+        result = get_ace_engine().generate_variation(audio_path, variation_strength)
         return result, "✅ Variation generated"
     except Exception as e:
         return None, f"❌ Error: {str(e)}"
 ) -> Tuple[str, str]:
     """Repaint specific section of audio."""
     try:
+        result = get_ace_engine().repaint(audio_path, start_time, end_time, new_prompt)
         return result, f"✅ Repainted {start_time}s-{end_time}s"
     except Exception as e:
         return None, f"❌ Error: {str(e)}"
 ) -> Tuple[str, str]:
     """Edit lyrics while maintaining music."""
     try:
+        result = get_ace_engine().edit_lyrics(audio_path, new_lyrics)
         return result, "✅ Lyrics edited"
     except Exception as e:
         return None, f"❌ Error: {str(e)}"
         logger.info(f"Timeline generation with {context_length}s context")
+        # Get managers
+        tm = get_timeline_manager()
+        engine = get_ace_engine()
+        ap = get_audio_processor()
         # Get context from timeline
+        context_audio = tm.get_context(
             session_state.get("timeline_id"),
             context_length
         )
         # Generate 32s clip
+        clip = engine.generate_clip(
             prompt=prompt,
             lyrics=lyrics,
             duration=32,
         )
         # Blend with timeline (2s lead-in and lead-out)
+        blended_clip = ap.blend_clip(
             clip,
+            tm.get_last_clip(session_state.get("timeline_id")),
             lead_in=2.0,
             lead_out=2.0
         )
         # Add to timeline
+        timeline_id = tm.add_clip(
             session_state.get("timeline_id"),
             blended_clip,
             metadata={
         session_state["total_clips"] = session_state.get("total_clips", 0) + 1
         # Get full timeline audio
+        full_audio = tm.export_timeline(timeline_id)
         # Get timeline visualization
+        timeline_viz = tm.visualize_timeline(timeline_id)
+        info = f"✅ Clip {session_state['total_clips']} added • Total: {tm.get_duration(timeline_id):.1f}s"
         return blended_clip, full_audio, timeline_viz, session_state, info
         if session_state is None:
             session_state = {"timeline_id": None, "total_clips": 0}
+        tm = get_timeline_manager()
         timeline_id = session_state.get("timeline_id")
+        result = tm.inpaint_region(
             timeline_id,
             start_time,
             end_time,
             new_prompt
         )
+        full_audio = tm.export_timeline(timeline_id)
+        timeline_viz = tm.visualize_timeline(timeline_id)
         info = f"✅ Inpainted {start_time:.1f}s-{end_time:.1f}s"
         return full_audio, timeline_viz, session_state, info
     if session_state is None:
         session_state = {"timeline_id": None, "total_clips": 0}
     elif session_state.get("timeline_id"):
+        get_timeline_manager().delete_timeline(session_state["timeline_id"])
     session_state = {"timeline_id": None, "total_clips": 0}
     return None, None, "Timeline cleared", session_state
 def lora_upload_files(files: List[str]) -> str:
     """Upload and prepare audio files for LoRA training."""
     try:
+        prepared_files = get_lora_trainer().prepare_dataset(files)
         return f"✅ Prepared {len(prepared_files)} files for training"
     except Exception as e:
         return f"❌ Error: {str(e)}"
                         std_generate_btn = gr.Button("🎵 Generate", variant="primary", size="lg")
                     with gr.Column():
+                        gr.Markdown("### Audio Input (Optional)")
+                        std_audio_input = gr.Audio(
+                            label="Style Reference Audio",
+                            type="filepath",
+                            info="Upload audio file or record to use as style guidance"
+                        )
+                        gr.Markdown("### Generated Output")
+                        std_audio_out = gr.Audio(label="Generated Audio")
                         std_info = gr.Textbox(label="Status", lines=2)
                         gr.Markdown("### Advanced Controls")
                         tl_info = gr.Textbox(label="Status", lines=2)
                     with gr.Column():
+                        tl_clip_audio = gr.Audio(label="Latest Clip")
+                        tl_full_audio = gr.Audio(label="Full Timeline")
                         tl_timeline_viz = gr.Image(label="Timeline Visualization")
                         with gr.Accordion("🎨 Inpaint Timeline Region", open=False):

src/ace_step_engine.py CHANGED Viewed

@@ -35,21 +35,32 @@ class ACEStepEngine:
         """
         self.config = config
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        self._initialized = False  # Always initialize this attribute
-        logger.info(f"ACE-Step Engine initialized on {self.device}")
         if not ACE_STEP_AVAILABLE:
-            logger.error("ACE-Step 1.5 not installed. Install: pip install git+https://github.com/ace-step/ACE-Step-1.5.git")
-            self.dit_handler = None
-            self.llm_handler = None
             return
-        # Initialize official handlers
-        self.dit_handler = AceStepHandler()
-        self.llm_handler = LLMHandler()
-        self._download_checkpoints()
-        self._load_models()
     def _download_checkpoints(self):
         """Download model checkpoints from HuggingFace if not present."""
@@ -184,7 +195,9 @@ class ACEStepEngine:
             Path to generated audio file
         """
         if not self._initialized:
-            raise RuntimeError("Engine not initialized - ACE-Step 1.5 may not be installed")
         try:
             # Prepare generation parameters

         """
         self.config = config
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self._initialized = False
+        self.dit_handler = None
+        self.llm_handler = None
+        logger.info(f"ACE-Step Engine initializing on {self.device}")
         if not ACE_STEP_AVAILABLE:
+            logger.error("ACE-Step 1.5 modules not available")
+            logger.error("Please ensure acestep package is installed in your environment")
             return
+        try:
+            # Initialize official handlers
+            self.dit_handler = AceStepHandler()
+            self.llm_handler = LLMHandler()
+            # Download and load models
+            self._download_checkpoints()
+            self._load_models()
+            logger.info("✓ ACE-Step Engine fully initialized")
+        except Exception as e:
+            logger.error(f"Failed to initialize ACE-Step Engine: {e}")
+            logger.error("Engine will not be available for generation")
+            import traceback
+            traceback.print_exc()
     def _download_checkpoints(self):
         """Download model checkpoints from HuggingFace if not present."""
             Path to generated audio file
         """
         if not self._initialized:
+            error_msg = "❌ Engine not initialized - ACE-Step 1.5 may not be installed or models are not loaded"
+            logger.error(error_msg)
+            raise RuntimeError(error_msg)
         try:
             # Prepare generation parameters

src/utils.py CHANGED Viewed

@@ -57,13 +57,19 @@ def load_config(config_path: str = "config.yaml") -> Dict[str, Any]:
     else:
         # Default configuration
         config = {
             "model_path": "ACE-Step/ACE-Step-v1-3.5B",
             "sample_rate": 44100,
             "output_dir": "outputs",
             "timeline_dir": "timelines",
             "training_dir": "lora_training",
             "chunk_duration": 30,
-            "force_mono": False
         }
         # Save default config

     else:
         # Default configuration
         config = {
+            "checkpoint_dir": "./checkpoints",
+            "dit_model_path": "acestep-v15-turbo",
+            "lm_model_path": "acestep-5Hz-lm-1.7B",
             "model_path": "ACE-Step/ACE-Step-v1-3.5B",
             "sample_rate": 44100,
             "output_dir": "outputs",
             "timeline_dir": "timelines",
             "training_dir": "lora_training",
             "chunk_duration": 30,
+            "force_mono": False,
+            "device": "auto",
+            "use_flash_attention": False,
+            "offload_to_cpu": False
         }
         # Save default config