Spaces:
Build error
Build error
| """Music generation service using MusicGen/AudioCraft.""" | |
| import os | |
| import uuid | |
| import asyncio | |
| from pathlib import Path | |
| from typing import Any, TYPE_CHECKING | |
| import structlog | |
| # Optional ML dependencies | |
| try: | |
| import torch | |
| import torchaudio | |
| from audiocraft.models import MusicGen | |
| import numpy as np | |
| ML_AVAILABLE = True | |
| except ImportError: | |
| ML_AVAILABLE = False | |
| torch = None | |
| torchaudio = None | |
| MusicGen = None | |
| np = None | |
| # Create a dummy type for type hints | |
| if TYPE_CHECKING: | |
| import numpy as np | |
| from app.core.config import settings | |
| from app.core.metrics import ( | |
| generation_requests_total, | |
| generation_duration, | |
| active_generations, | |
| ) | |
| logger = structlog.get_logger(__name__) | |
| class MusicGenerationService: | |
| """Service for generating music using MusicGen.""" | |
| def __init__(self): | |
| """Initialize the music generation service.""" | |
| self.logger = logger.bind(service="music_generation") | |
| self.model: Any | None = None | |
| if not ML_AVAILABLE: | |
| self.logger.warning("ml_dependencies_not_available", | |
| message="torch/audiocraft not installed, music generation will fail") | |
| self.device = "cpu" | |
| else: | |
| # Check if torch is available and has CUDA support | |
| has_cuda = torch is not None and hasattr(torch, 'cuda') and torch.cuda.is_available() | |
| self.device = settings.MUSICGEN_DEVICE if has_cuda else "cpu" | |
| self._model_loading = False | |
| # Don't load model on init - load lazily on first use | |
| def _ensure_model_loaded(self) -> None: | |
| """Ensure MusicGen model is loaded (lazy loading).""" | |
| if not ML_AVAILABLE: | |
| raise RuntimeError( | |
| "ML dependencies (torch, audiocraft) not installed. " | |
| "Install with: uv pip install -e '.[ml]'" | |
| ) | |
| if self.model is not None: | |
| return | |
| if self._model_loading: | |
| # Wait if already loading | |
| import time | |
| while self._model_loading: | |
| time.sleep(0.1) | |
| return | |
| self._model_loading = True | |
| try: | |
| self.logger.info( | |
| "loading_musicgen_model", | |
| model=settings.MUSICGEN_MODEL, | |
| device=self.device, | |
| ) | |
| self.model = MusicGen.get_pretrained(settings.MUSICGEN_MODEL) | |
| self.model.set_generation_params(duration=settings.MUSICGEN_DURATION) | |
| # Move to CUDA if available | |
| has_cuda = torch is not None and hasattr(torch, 'cuda') and torch.cuda.is_available() | |
| if self.device == "cuda" and has_cuda: | |
| self.model = self.model.to(self.device) | |
| self.logger.info("musicgen_model_loaded") | |
| except Exception as e: | |
| self.logger.error("failed_to_load_musicgen", exc_info=e) | |
| self._model_loading = False | |
| raise | |
| finally: | |
| self._model_loading = False | |
| async def generate( | |
| self, | |
| prompt: str, | |
| duration: int | None = None, | |
| style: str | None = None, | |
| tempo: int | None = None, | |
| output_path: Path | None = None, | |
| ) -> Path: | |
| """ | |
| Generate music from text prompt. | |
| Args: | |
| prompt: Text description of the music | |
| duration: Duration in seconds (defaults to config) | |
| style: Musical style/genre | |
| tempo: Tempo in BPM | |
| output_path: Path to save the generated audio | |
| Returns: | |
| Path to the generated audio file | |
| """ | |
| # self._ensure_model_loaded() # Skip strict check for dev mode | |
| if os.environ.get("FORCE_SIMULATION", "").lower() == "true" or not ML_AVAILABLE: | |
| self.logger.warning("simulating_generation", message="Simulation forced or ML dependencies missing") | |
| import shutil | |
| # Simulate processing time | |
| await asyncio.sleep(5) | |
| if output_path is None: | |
| output_path = Path(settings.AUDIO_STORAGE_PATH) / "music" | |
| output_path.parent.mkdir(parents=True, exist_ok=True) | |
| # Generate unique filename | |
| filename = f"{uuid.uuid4()}.wav" | |
| full_path = output_path / filename | |
| # Create a dummy silent wav file or copy a sample if available | |
| # For now, just write some bytes | |
| with open(full_path, "wb") as f: | |
| # Write a minimal valid WAV header for 1 second of silence | |
| # RIFF header | |
| f.write(b'RIFF') | |
| f.write((36 + 44100 * 2).to_bytes(4, 'little')) # File size | |
| f.write(b'WAVE') | |
| # fmt chunk | |
| f.write(b'fmt ') | |
| f.write((16).to_bytes(4, 'little')) # Chunk size | |
| f.write((1).to_bytes(2, 'little')) # PCM | |
| f.write((1).to_bytes(2, 'little')) # Mono | |
| f.write((44100).to_bytes(4, 'little')) # Sample rate | |
| f.write((44100 * 2).to_bytes(4, 'little')) # Byte rate | |
| f.write((2).to_bytes(2, 'little')) # Block align | |
| f.write((16).to_bytes(2, 'little')) # Bits per sample | |
| # data chunk | |
| f.write(b'data') | |
| f.write((44100 * 2).to_bytes(4, 'little')) # Data size | |
| f.write(b'\x00' * (44100 * 2)) # Silence | |
| return full_path | |
| self._ensure_model_loaded() | |
| if self.model is None: | |
| raise RuntimeError("MusicGen model not loaded") | |
| if not prompt or not prompt.strip(): | |
| raise ValueError("Prompt cannot be empty") | |
| if duration is not None and duration <= 0: | |
| raise ValueError("Duration must be positive") | |
| generation_type = "music" | |
| active_generations.labels(type=generation_type).inc() | |
| try: | |
| with generation_duration.labels(type=generation_type).time(): | |
| self.logger.info( | |
| "generating_music", | |
| prompt=prompt[:100], | |
| duration=duration, | |
| style=style, | |
| ) | |
| # Set generation duration | |
| gen_duration = duration or settings.MUSICGEN_DURATION | |
| # Run generation in a separate thread to avoid blocking the event loop | |
| def _generate(): | |
| self.model.set_generation_params(duration=gen_duration) | |
| with torch.no_grad(): | |
| return self.model.generate([prompt]) | |
| wav = await asyncio.to_thread(_generate) | |
| # Convert to numpy array | |
| audio_array = wav[0].cpu().numpy() | |
| # Ensure output directory exists | |
| if output_path is None: | |
| output_path = Path(settings.AUDIO_STORAGE_PATH) / "music" | |
| output_path.parent.mkdir(parents=True, exist_ok=True) | |
| # Generate unique filename | |
| filename = f"{uuid.uuid4()}.wav" | |
| full_path = output_path / filename | |
| # Save audio file in a thread | |
| def _save(): | |
| torchaudio.save( | |
| str(full_path), | |
| torch.from_numpy(audio_array), | |
| sample_rate=self.model.sample_rate, | |
| ) | |
| await asyncio.to_thread(_save) | |
| self.logger.info( | |
| "music_generated", | |
| output_path=str(full_path), | |
| duration=gen_duration, | |
| ) | |
| generation_requests_total.labels( | |
| type=generation_type, status="success" | |
| ).inc() | |
| return full_path | |
| except Exception as e: | |
| self.logger.error("music_generation_failed", exc_info=e) | |
| generation_requests_total.labels( | |
| type=generation_type, status="error" | |
| ).inc() | |
| raise | |
| finally: | |
| active_generations.labels(type=generation_type).dec() | |
| async def generate_with_conditioning( | |
| self, | |
| prompt: str, | |
| melody_audio: Any | None = None, | |
| duration: int | None = None, | |
| ) -> Path: | |
| """ | |
| Generate music conditioned on a melody. | |
| Args: | |
| prompt: Text description | |
| melody_audio: Melody audio array to condition on | |
| duration: Duration in seconds | |
| Returns: | |
| Path to generated audio | |
| """ | |
| self._ensure_model_loaded() | |
| if self.model is None: | |
| raise RuntimeError("MusicGen model not loaded") | |
| # Implementation for melody conditioning | |
| # This would use MusicGen's melody conditioning feature | |
| raise NotImplementedError("Melody conditioning not yet implemented") | |
| # Singleton instance | |
| _music_service: MusicGenerationService | None = None | |
| def get_music_service() -> MusicGenerationService: | |
| """Get music generation service instance.""" | |
| global _music_service | |
| if _music_service is None: | |
| _music_service = MusicGenerationService() | |
| return _music_service | |