Spaces:

OnyxMunk
/

AudioForge

Build error

App Files Files Community

AudioForge / backend /app /services /music_generation.py

OnyxlMunkey

Merge branch 'main' of https://github.com/kwizzlesurp10-ctrl/AudioForge

61b8f7d 3 months ago

raw

history blame contribute delete

9.37 kB

	"""Music generation service using MusicGen/AudioCraft."""

	import os
	import uuid
	import asyncio
	from pathlib import Path
	from typing import Any, TYPE_CHECKING
	import structlog

	# Optional ML dependencies
	try:
	import torch
	import torchaudio
	from audiocraft.models import MusicGen
	import numpy as np
	ML_AVAILABLE = True
	except ImportError:
	ML_AVAILABLE = False
	torch = None
	torchaudio = None
	MusicGen = None
	np = None
	# Create a dummy type for type hints
	if TYPE_CHECKING:
	import numpy as np

	from app.core.config import settings
	from app.core.metrics import (
	generation_requests_total,
	generation_duration,
	active_generations,
	)

	logger = structlog.get_logger(__name__)


	class MusicGenerationService:
	"""Service for generating music using MusicGen."""

	def __init__(self):
	"""Initialize the music generation service."""
	self.logger = logger.bind(service="music_generation")
	self.model: Any \| None = None
	if not ML_AVAILABLE:
	self.logger.warning("ml_dependencies_not_available",
	message="torch/audiocraft not installed, music generation will fail")
	self.device = "cpu"
	else:
	# Check if torch is available and has CUDA support
	has_cuda = torch is not None and hasattr(torch, 'cuda') and torch.cuda.is_available()
	self.device = settings.MUSICGEN_DEVICE if has_cuda else "cpu"
	self._model_loading = False
	# Don't load model on init - load lazily on first use

	def _ensure_model_loaded(self) -> None:
	"""Ensure MusicGen model is loaded (lazy loading)."""
	if not ML_AVAILABLE:
	raise RuntimeError(
	"ML dependencies (torch, audiocraft) not installed. "
	"Install with: uv pip install -e '.[ml]'"
	)

	if self.model is not None:
	return
	if self._model_loading:
	# Wait if already loading
	import time
	while self._model_loading:
	time.sleep(0.1)
	return

	self._model_loading = True
	try:
	self.logger.info(
	"loading_musicgen_model",
	model=settings.MUSICGEN_MODEL,
	device=self.device,
	)
	self.model = MusicGen.get_pretrained(settings.MUSICGEN_MODEL)
	self.model.set_generation_params(duration=settings.MUSICGEN_DURATION)
	# Move to CUDA if available
	has_cuda = torch is not None and hasattr(torch, 'cuda') and torch.cuda.is_available()
	if self.device == "cuda" and has_cuda:
	self.model = self.model.to(self.device)
	self.logger.info("musicgen_model_loaded")
	except Exception as e:
	self.logger.error("failed_to_load_musicgen", exc_info=e)
	self._model_loading = False
	raise
	finally:
	self._model_loading = False

	async def generate(
	self,
	prompt: str,
	duration: int \| None = None,
	style: str \| None = None,
	tempo: int \| None = None,
	output_path: Path \| None = None,
	) -> Path:
	"""
	Generate music from text prompt.

	Args:
	prompt: Text description of the music
	duration: Duration in seconds (defaults to config)
	style: Musical style/genre
	tempo: Tempo in BPM
	output_path: Path to save the generated audio

	Returns:
	Path to the generated audio file
	"""
	# self._ensure_model_loaded() # Skip strict check for dev mode

	if os.environ.get("FORCE_SIMULATION", "").lower() == "true" or not ML_AVAILABLE:
	self.logger.warning("simulating_generation", message="Simulation forced or ML dependencies missing")
	import shutil

	# Simulate processing time
	await asyncio.sleep(5)

	if output_path is None:
	output_path = Path(settings.AUDIO_STORAGE_PATH) / "music"
	output_path.parent.mkdir(parents=True, exist_ok=True)

	# Generate unique filename
	filename = f"{uuid.uuid4()}.wav"
	full_path = output_path / filename

	# Create a dummy silent wav file or copy a sample if available
	# For now, just write some bytes
	with open(full_path, "wb") as f:
	# Write a minimal valid WAV header for 1 second of silence
	# RIFF header
	f.write(b'RIFF')
	f.write((36 + 44100 * 2).to_bytes(4, 'little')) # File size
	f.write(b'WAVE')
	# fmt chunk
	f.write(b'fmt ')
	f.write((16).to_bytes(4, 'little')) # Chunk size
	f.write((1).to_bytes(2, 'little')) # PCM
	f.write((1).to_bytes(2, 'little')) # Mono
	f.write((44100).to_bytes(4, 'little')) # Sample rate
	f.write((44100 * 2).to_bytes(4, 'little')) # Byte rate
	f.write((2).to_bytes(2, 'little')) # Block align
	f.write((16).to_bytes(2, 'little')) # Bits per sample
	# data chunk
	f.write(b'data')
	f.write((44100 * 2).to_bytes(4, 'little')) # Data size
	f.write(b'\x00' * (44100 * 2)) # Silence

	return full_path

	self._ensure_model_loaded()
	if self.model is None:
	raise RuntimeError("MusicGen model not loaded")

	if not prompt or not prompt.strip():
	raise ValueError("Prompt cannot be empty")

	if duration is not None and duration <= 0:
	raise ValueError("Duration must be positive")

	generation_type = "music"
	active_generations.labels(type=generation_type).inc()

	try:
	with generation_duration.labels(type=generation_type).time():
	self.logger.info(
	"generating_music",
	prompt=prompt[:100],
	duration=duration,
	style=style,
	)

	# Set generation duration
	gen_duration = duration or settings.MUSICGEN_DURATION

	# Run generation in a separate thread to avoid blocking the event loop
	def _generate():
	self.model.set_generation_params(duration=gen_duration)
	with torch.no_grad():
	return self.model.generate([prompt])

	wav = await asyncio.to_thread(_generate)

	# Convert to numpy array
	audio_array = wav[0].cpu().numpy()

	# Ensure output directory exists
	if output_path is None:
	output_path = Path(settings.AUDIO_STORAGE_PATH) / "music"
	output_path.parent.mkdir(parents=True, exist_ok=True)

	# Generate unique filename
	filename = f"{uuid.uuid4()}.wav"
	full_path = output_path / filename

	# Save audio file in a thread
	def _save():
	torchaudio.save(
	str(full_path),
	torch.from_numpy(audio_array),
	sample_rate=self.model.sample_rate,
	)

	await asyncio.to_thread(_save)

	self.logger.info(
	"music_generated",
	output_path=str(full_path),
	duration=gen_duration,
	)
	generation_requests_total.labels(
	type=generation_type, status="success"
	).inc()

	return full_path

	except Exception as e:
	self.logger.error("music_generation_failed", exc_info=e)
	generation_requests_total.labels(
	type=generation_type, status="error"
	).inc()
	raise
	finally:
	active_generations.labels(type=generation_type).dec()

	async def generate_with_conditioning(
	self,
	prompt: str,
	melody_audio: Any \| None = None,
	duration: int \| None = None,
	) -> Path:
	"""
	Generate music conditioned on a melody.

	Args:
	prompt: Text description
	melody_audio: Melody audio array to condition on
	duration: Duration in seconds

	Returns:
	Path to generated audio
	"""
	self._ensure_model_loaded()
	if self.model is None:
	raise RuntimeError("MusicGen model not loaded")

	# Implementation for melody conditioning
	# This would use MusicGen's melody conditioning feature
	raise NotImplementedError("Melody conditioning not yet implemented")


	# Singleton instance
	_music_service: MusicGenerationService \| None = None


	def get_music_service() -> MusicGenerationService:
	"""Get music generation service instance."""
	global _music_service
	if _music_service is None:
	_music_service = MusicGenerationService()
	return _music_service