AudioForge / backend /app /services /music_generation.py
OnyxlMunkey's picture
Merge branch 'main' of https://github.com/kwizzlesurp10-ctrl/AudioForge
61b8f7d
"""Music generation service using MusicGen/AudioCraft."""
import os
import uuid
import asyncio
from pathlib import Path
from typing import Any, TYPE_CHECKING
import structlog
# Optional ML dependencies
try:
import torch
import torchaudio
from audiocraft.models import MusicGen
import numpy as np
ML_AVAILABLE = True
except ImportError:
ML_AVAILABLE = False
torch = None
torchaudio = None
MusicGen = None
np = None
# Create a dummy type for type hints
if TYPE_CHECKING:
import numpy as np
from app.core.config import settings
from app.core.metrics import (
generation_requests_total,
generation_duration,
active_generations,
)
logger = structlog.get_logger(__name__)
class MusicGenerationService:
"""Service for generating music using MusicGen."""
def __init__(self):
"""Initialize the music generation service."""
self.logger = logger.bind(service="music_generation")
self.model: Any | None = None
if not ML_AVAILABLE:
self.logger.warning("ml_dependencies_not_available",
message="torch/audiocraft not installed, music generation will fail")
self.device = "cpu"
else:
# Check if torch is available and has CUDA support
has_cuda = torch is not None and hasattr(torch, 'cuda') and torch.cuda.is_available()
self.device = settings.MUSICGEN_DEVICE if has_cuda else "cpu"
self._model_loading = False
# Don't load model on init - load lazily on first use
def _ensure_model_loaded(self) -> None:
"""Ensure MusicGen model is loaded (lazy loading)."""
if not ML_AVAILABLE:
raise RuntimeError(
"ML dependencies (torch, audiocraft) not installed. "
"Install with: uv pip install -e '.[ml]'"
)
if self.model is not None:
return
if self._model_loading:
# Wait if already loading
import time
while self._model_loading:
time.sleep(0.1)
return
self._model_loading = True
try:
self.logger.info(
"loading_musicgen_model",
model=settings.MUSICGEN_MODEL,
device=self.device,
)
self.model = MusicGen.get_pretrained(settings.MUSICGEN_MODEL)
self.model.set_generation_params(duration=settings.MUSICGEN_DURATION)
# Move to CUDA if available
has_cuda = torch is not None and hasattr(torch, 'cuda') and torch.cuda.is_available()
if self.device == "cuda" and has_cuda:
self.model = self.model.to(self.device)
self.logger.info("musicgen_model_loaded")
except Exception as e:
self.logger.error("failed_to_load_musicgen", exc_info=e)
self._model_loading = False
raise
finally:
self._model_loading = False
async def generate(
self,
prompt: str,
duration: int | None = None,
style: str | None = None,
tempo: int | None = None,
output_path: Path | None = None,
) -> Path:
"""
Generate music from text prompt.
Args:
prompt: Text description of the music
duration: Duration in seconds (defaults to config)
style: Musical style/genre
tempo: Tempo in BPM
output_path: Path to save the generated audio
Returns:
Path to the generated audio file
"""
# self._ensure_model_loaded() # Skip strict check for dev mode
if os.environ.get("FORCE_SIMULATION", "").lower() == "true" or not ML_AVAILABLE:
self.logger.warning("simulating_generation", message="Simulation forced or ML dependencies missing")
import shutil
# Simulate processing time
await asyncio.sleep(5)
if output_path is None:
output_path = Path(settings.AUDIO_STORAGE_PATH) / "music"
output_path.parent.mkdir(parents=True, exist_ok=True)
# Generate unique filename
filename = f"{uuid.uuid4()}.wav"
full_path = output_path / filename
# Create a dummy silent wav file or copy a sample if available
# For now, just write some bytes
with open(full_path, "wb") as f:
# Write a minimal valid WAV header for 1 second of silence
# RIFF header
f.write(b'RIFF')
f.write((36 + 44100 * 2).to_bytes(4, 'little')) # File size
f.write(b'WAVE')
# fmt chunk
f.write(b'fmt ')
f.write((16).to_bytes(4, 'little')) # Chunk size
f.write((1).to_bytes(2, 'little')) # PCM
f.write((1).to_bytes(2, 'little')) # Mono
f.write((44100).to_bytes(4, 'little')) # Sample rate
f.write((44100 * 2).to_bytes(4, 'little')) # Byte rate
f.write((2).to_bytes(2, 'little')) # Block align
f.write((16).to_bytes(2, 'little')) # Bits per sample
# data chunk
f.write(b'data')
f.write((44100 * 2).to_bytes(4, 'little')) # Data size
f.write(b'\x00' * (44100 * 2)) # Silence
return full_path
self._ensure_model_loaded()
if self.model is None:
raise RuntimeError("MusicGen model not loaded")
if not prompt or not prompt.strip():
raise ValueError("Prompt cannot be empty")
if duration is not None and duration <= 0:
raise ValueError("Duration must be positive")
generation_type = "music"
active_generations.labels(type=generation_type).inc()
try:
with generation_duration.labels(type=generation_type).time():
self.logger.info(
"generating_music",
prompt=prompt[:100],
duration=duration,
style=style,
)
# Set generation duration
gen_duration = duration or settings.MUSICGEN_DURATION
# Run generation in a separate thread to avoid blocking the event loop
def _generate():
self.model.set_generation_params(duration=gen_duration)
with torch.no_grad():
return self.model.generate([prompt])
wav = await asyncio.to_thread(_generate)
# Convert to numpy array
audio_array = wav[0].cpu().numpy()
# Ensure output directory exists
if output_path is None:
output_path = Path(settings.AUDIO_STORAGE_PATH) / "music"
output_path.parent.mkdir(parents=True, exist_ok=True)
# Generate unique filename
filename = f"{uuid.uuid4()}.wav"
full_path = output_path / filename
# Save audio file in a thread
def _save():
torchaudio.save(
str(full_path),
torch.from_numpy(audio_array),
sample_rate=self.model.sample_rate,
)
await asyncio.to_thread(_save)
self.logger.info(
"music_generated",
output_path=str(full_path),
duration=gen_duration,
)
generation_requests_total.labels(
type=generation_type, status="success"
).inc()
return full_path
except Exception as e:
self.logger.error("music_generation_failed", exc_info=e)
generation_requests_total.labels(
type=generation_type, status="error"
).inc()
raise
finally:
active_generations.labels(type=generation_type).dec()
async def generate_with_conditioning(
self,
prompt: str,
melody_audio: Any | None = None,
duration: int | None = None,
) -> Path:
"""
Generate music conditioned on a melody.
Args:
prompt: Text description
melody_audio: Melody audio array to condition on
duration: Duration in seconds
Returns:
Path to generated audio
"""
self._ensure_model_loaded()
if self.model is None:
raise RuntimeError("MusicGen model not loaded")
# Implementation for melody conditioning
# This would use MusicGen's melody conditioning feature
raise NotImplementedError("Melody conditioning not yet implemented")
# Singleton instance
_music_service: MusicGenerationService | None = None
def get_music_service() -> MusicGenerationService:
"""Get music generation service instance."""
global _music_service
if _music_service is None:
_music_service = MusicGenerationService()
return _music_service