Update app.py
Browse files
app.py
CHANGED
|
@@ -4,7 +4,6 @@ import torchaudio
|
|
| 4 |
import time
|
| 5 |
import sys
|
| 6 |
import numpy as np
|
| 7 |
-
import gc
|
| 8 |
import gradio as gr
|
| 9 |
from pydub import AudioSegment
|
| 10 |
from audiocraft.models import MusicGen
|
|
@@ -15,148 +14,908 @@ import traceback
|
|
| 15 |
import logging
|
| 16 |
from datetime import datetime
|
| 17 |
from pathlib import Path
|
| 18 |
-
import mmap
|
| 19 |
|
| 20 |
-
# Suppress warnings
|
| 21 |
warnings.filterwarnings("ignore")
|
| 22 |
|
| 23 |
-
#
|
| 24 |
-
# Optimize allocations for RTX 2080 Ti (11.5 GB) on CUDA 12.1
|
| 25 |
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
|
| 26 |
-
torch.cuda.set_per_process_memory_fraction(0.95, 0) # reserve 95% of VRAM
|
| 27 |
-
torch.backends.cudnn.benchmark = True
|
| 28 |
-
torch.backends.cudnn.deterministic = False
|
| 29 |
|
| 30 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
log_dir = "logs"
|
| 32 |
os.makedirs(log_dir, exist_ok=True)
|
| 33 |
-
log_file = os.path.join(log_dir, f"
|
| 34 |
logging.basicConfig(
|
| 35 |
level=logging.DEBUG,
|
| 36 |
format="%(asctime)s [%(levelname)s] %(message)s",
|
| 37 |
-
handlers=[
|
|
|
|
|
|
|
|
|
|
| 38 |
)
|
| 39 |
logger = logging.getLogger(__name__)
|
| 40 |
|
| 41 |
-
#
|
| 42 |
-
|
| 43 |
-
|
|
|
|
| 44 |
sys.exit(1)
|
| 45 |
-
|
| 46 |
-
logger.info(f"Using
|
| 47 |
-
|
| 48 |
-
# === Memory Cleanup ===
|
| 49 |
-
def clean_memory():
|
| 50 |
-
torch.cuda.empty_cache()
|
| 51 |
-
gc.collect()
|
| 52 |
-
torch.cuda.synchronize()
|
| 53 |
-
used_mb = torch.cuda.memory_allocated() / 1024**2
|
| 54 |
-
logger.debug(f"VRAM used: {used_mb:.1f} MB")
|
| 55 |
-
return used_mb
|
| 56 |
-
|
| 57 |
-
clean_memory()
|
| 58 |
|
| 59 |
-
#
|
| 60 |
try:
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
sys.exit(1)
|
| 71 |
|
| 72 |
-
#
|
| 73 |
-
def check_disk_space(
|
| 74 |
-
stat = os.statvfs(
|
| 75 |
-
|
| 76 |
-
if
|
| 77 |
-
logger.warning(f"Low disk
|
| 78 |
-
return
|
| 79 |
|
| 80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
samples = np.array(segment.get_array_of_samples(), dtype=np.float32)
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
}
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
logger.
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
css = """
|
| 135 |
-
body {
|
| 136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
"""
|
| 138 |
|
|
|
|
|
|
|
| 139 |
with gr.Blocks(css=css) as demo:
|
| 140 |
-
gr.Markdown("
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
import time
|
| 5 |
import sys
|
| 6 |
import numpy as np
|
|
|
|
| 7 |
import gradio as gr
|
| 8 |
from pydub import AudioSegment
|
| 9 |
from audiocraft.models import MusicGen
|
|
|
|
| 14 |
import logging
|
| 15 |
from datetime import datetime
|
| 16 |
from pathlib import Path
|
|
|
|
| 17 |
|
| 18 |
+
# Suppress warnings for cleaner output
|
| 19 |
warnings.filterwarnings("ignore")
|
| 20 |
|
| 21 |
+
# Set PYTORCH_CUDA_ALLOC_CONF for CUDA 12.1 and 11 GB VRAM (RTX 2080 Ti)
|
|
|
|
| 22 |
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
+
# Optimize for CUDA 12.1
|
| 25 |
+
torch.backends.cudnn.benchmark = False
|
| 26 |
+
torch.backends.cudnn.deterministic = True
|
| 27 |
+
|
| 28 |
+
# Setup logging
|
| 29 |
log_dir = "logs"
|
| 30 |
os.makedirs(log_dir, exist_ok=True)
|
| 31 |
+
log_file = os.path.join(log_dir, f"musicgen_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log")
|
| 32 |
logging.basicConfig(
|
| 33 |
level=logging.DEBUG,
|
| 34 |
format="%(asctime)s [%(levelname)s] %(message)s",
|
| 35 |
+
handlers=[
|
| 36 |
+
logging.FileHandler(log_file),
|
| 37 |
+
logging.StreamHandler(sys.stdout)
|
| 38 |
+
]
|
| 39 |
)
|
| 40 |
logger = logging.getLogger(__name__)
|
| 41 |
|
| 42 |
+
# Device setup
|
| 43 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 44 |
+
if device != "cuda":
|
| 45 |
+
logger.error("CUDA is required for GPU rendering. CPU rendering is disabled.")
|
| 46 |
sys.exit(1)
|
| 47 |
+
logger.info(f"Using GPU: {torch.cuda.get_device_name(0)} (CUDA 12.1, 11 GB VRAM)")
|
| 48 |
+
logger.info(f"Using precision: float16 for model, float32 for processing")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
+
# Load MusicGen medium model into VRAM
|
| 51 |
try:
|
| 52 |
+
logger.info("Loading MusicGen medium model into VRAM...")
|
| 53 |
+
local_model_path = "./models/musicgen-medium"
|
| 54 |
+
if not os.path.exists(local_model_path):
|
| 55 |
+
logger.warning(f"Local model path {local_model_path} does not exist. Attempting to download facebook/musicgen-medium...")
|
| 56 |
+
MusicGen.get_pretrained("facebook/musicgen-medium")
|
| 57 |
+
cache_path = os.path.expanduser("~/.cache/audiocraft/models--facebook--musicgen-medium")
|
| 58 |
+
os.makedirs(local_model_path, exist_ok=True)
|
| 59 |
+
import shutil
|
| 60 |
+
shutil.copytree(cache_path, local_model_path, dirs_exist_ok=True)
|
| 61 |
+
logger.info(f"Model weights copied to {local_model_path}")
|
| 62 |
+
musicgen_model = MusicGen.get_pretrained(local_model_path, device=device)
|
| 63 |
+
logger.info("MusicGen medium model (facebook/musicgen-medium) loaded successfully.")
|
| 64 |
+
except Exception as e:
|
| 65 |
+
logger.error(f"Failed to load MusicGen model: {e}")
|
| 66 |
+
logger.error(traceback.format_exc())
|
| 67 |
sys.exit(1)
|
| 68 |
|
| 69 |
+
# Check disk space
|
| 70 |
+
def check_disk_space(path="."):
|
| 71 |
+
stat = os.statvfs(path)
|
| 72 |
+
free_space = stat.f_bavail * stat.f_frsize / (1024**3)
|
| 73 |
+
if free_space < 1.0:
|
| 74 |
+
logger.warning(f"Low disk space ({free_space:.2f} GB). Ensure at least 1 GB free.")
|
| 75 |
+
return free_space >= 1.0
|
| 76 |
|
| 77 |
+
# Clear GPU memory
|
| 78 |
+
def clear_gpu_memory():
|
| 79 |
+
torch.cuda.empty_cache()
|
| 80 |
+
logger.debug(f"Cleared GPU memory cache. Current VRAM: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
|
| 81 |
+
|
| 82 |
+
# Audio processing functions (GPU-based where possible)
|
| 83 |
+
def balance_stereo(audio_segment, noise_threshold=-60, sample_rate=32000):
|
| 84 |
+
logger.debug(f"Balancing stereo for segment with sample rate {sample_rate}")
|
| 85 |
+
samples = np.array(audio_segment.get_array_of_samples(), dtype=np.float32)
|
| 86 |
+
if audio_segment.channels == 2:
|
| 87 |
+
stereo_samples = samples.reshape(-1, 2)
|
| 88 |
+
stereo_samples_torch = torch.from_numpy(stereo_samples).to(device, dtype=torch.float32)
|
| 89 |
+
db_samples = 20 * torch.log10(torch.abs(stereo_samples_torch) + 1e-10)
|
| 90 |
+
mask = db_samples > noise_threshold
|
| 91 |
+
stereo_samples_torch = stereo_samples_torch * mask
|
| 92 |
+
left_nonzero = stereo_samples_torch[:, 0][stereo_samples_torch[:, 0] != 0]
|
| 93 |
+
right_nonzero = stereo_samples_torch[:, 1][stereo_samples_torch[:, 1] != 0]
|
| 94 |
+
left_rms = torch.sqrt(torch.mean(left_nonzero**2)) if left_nonzero.numel() > 0 else torch.tensor(0.0, device=device)
|
| 95 |
+
right_rms = torch.sqrt(torch.mean(right_nonzero**2)) if right_nonzero.numel() > 0 else torch.tensor(0.0, device=device)
|
| 96 |
+
if left_rms > 0 and right_rms > 0:
|
| 97 |
+
avg_rms = (left_rms + right_rms) / 2
|
| 98 |
+
stereo_samples_torch[:, 0] = stereo_samples_torch[:, 0] * (avg_rms / left_rms)
|
| 99 |
+
stereo_samples_torch[:, 1] = stereo_samples_torch[:, 1] * (avg_rms / right_rms)
|
| 100 |
+
balanced_samples = stereo_samples_torch.cpu().numpy().flatten().astype(np.int16)
|
| 101 |
+
stereo_samples_torch = None
|
| 102 |
+
balanced_segment = AudioSegment(
|
| 103 |
+
balanced_samples.tobytes(),
|
| 104 |
+
frame_rate=sample_rate,
|
| 105 |
+
sample_width=audio_segment.sample_width,
|
| 106 |
+
channels=2
|
| 107 |
+
)
|
| 108 |
+
logger.debug("Stereo balancing completed")
|
| 109 |
+
return balanced_segment
|
| 110 |
+
logger.debug("Segment is not stereo, returning unchanged")
|
| 111 |
+
return audio_segment
|
| 112 |
+
|
| 113 |
+
def calculate_rms(segment):
|
| 114 |
samples = np.array(segment.get_array_of_samples(), dtype=np.float32)
|
| 115 |
+
samples_torch = torch.from_numpy(samples).to(device, dtype=torch.float32)
|
| 116 |
+
rms = torch.sqrt(torch.mean(samples_torch**2)).cpu().numpy()
|
| 117 |
+
samples_torch = None
|
| 118 |
+
logger.debug(f"Calculated RMS: {rms}")
|
| 119 |
+
return rms
|
| 120 |
+
|
| 121 |
+
def rms_normalize(segment, target_rms_db=-23.0, peak_limit_db=-3.0, sample_rate=32000):
|
| 122 |
+
logger.debug(f"Normalizing RMS for segment with target {target_rms_db} dBFS")
|
| 123 |
+
target_rms = 10 ** (target_rms_db / 20) * 32767
|
| 124 |
+
current_rms = calculate_rms(segment)
|
| 125 |
+
if current_rms > 0:
|
| 126 |
+
gain_factor = target_rms / current_rms
|
| 127 |
+
segment = segment.apply_gain(20 * np.log10(gain_factor))
|
| 128 |
+
segment = hard_limit(segment, limit_db=peak_limit_db, sample_rate=sample_rate)
|
| 129 |
+
logger.debug("RMS normalization completed")
|
| 130 |
+
return segment
|
| 131 |
+
|
| 132 |
+
def hard_limit(audio_segment, limit_db=-3.0, sample_rate=32000):
|
| 133 |
+
logger.debug(f"Applying hard limit at {limit_db} dBFS")
|
| 134 |
+
limit = 10 ** (limit_db / 20.0) * 32767
|
| 135 |
+
samples = np.array(audio_segment.get_array_of_samples(), dtype=np.float32)
|
| 136 |
+
samples_torch = torch.from_numpy(samples).to(device, dtype=torch.float32)
|
| 137 |
+
samples_torch = torch.clamp(samples_torch, -limit, limit)
|
| 138 |
+
limited_samples = samples_torch.cpu().numpy().astype(np.int16)
|
| 139 |
+
samples_torch = None
|
| 140 |
+
limited_segment = AudioSegment(
|
| 141 |
+
limited_samples.tobytes(),
|
| 142 |
+
frame_rate=sample_rate,
|
| 143 |
+
sample_width=audio_segment.sample_width,
|
| 144 |
+
channels=audio_segment.channels
|
| 145 |
+
)
|
| 146 |
+
logger.debug("Hard limit applied")
|
| 147 |
+
return limited_segment
|
| 148 |
+
|
| 149 |
+
def apply_eq(segment, sample_rate=32000):
|
| 150 |
+
logger.debug(f"Applying EQ with sample rate {sample_rate}")
|
| 151 |
+
segment = segment.high_pass_filter(20)
|
| 152 |
+
segment = segment.low_pass_filter(20000)
|
| 153 |
+
logger.debug("EQ applied")
|
| 154 |
+
return segment
|
| 155 |
+
|
| 156 |
+
def apply_fade(segment, fade_in_duration=500, fade_out_duration=500):
|
| 157 |
+
logger.debug(f"Applying fade: in={fade_in_duration}ms, out={fade_out_duration}ms")
|
| 158 |
+
segment = segment.fade_in(fade_in_duration)
|
| 159 |
+
segment = segment.fade_out(fade_out_duration)
|
| 160 |
+
logger.debug("Fade applied")
|
| 161 |
+
return segment
|
| 162 |
+
|
| 163 |
+
# Genre prompt functions with increased variance
|
| 164 |
+
def set_red_hot_chili_peppers_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, chunk_type="verse"):
|
| 165 |
+
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("syncopated funky steps" if bpm > 120 else "groovy rhythmic flow")
|
| 166 |
+
drum = f", {drum_beat} drums" if drum_beat != "none" else ", tight funk-rock drums with syncopated fills and crisp snares"
|
| 167 |
+
synth = f", {synthesizer} accents" if synthesizer != "none" else ", subtle wah-wah effects and ambient pads"
|
| 168 |
+
bass = f", {bass_style}" if bass_style != "none" else ", groovy slap basslines with popping accents and rhythmic flourishes"
|
| 169 |
+
guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else ", syncopated funky guitar riffs with staccato phrasing and wah-wah"
|
| 170 |
+
if chunk_type == "verse":
|
| 171 |
+
prompt = f"Red Hot Chili Peppers-inspired vibe with {bass}{guitar}{drum}{synth}, laid-back funky grooves, intricate bass-driven rhythms with playful syncopation, minimal guitar accents, {rhythm} at {bpm} BPM, similar to Incubus or early Faith No More."
|
| 172 |
+
elif chunk_type == "chorus":
|
| 173 |
+
prompt = f"Red Hot Chili Peppers-inspired vibe with {bass}{guitar}{drum}{synth}, explosive funky breakdowns, high-energy guitar hooks with soaring melodic riffs, punchy drum fills with dynamic crashes, {rhythm} at {bpm} BPM, similar to Incubus or early Faith No More."
|
| 174 |
+
else: # bridge
|
| 175 |
+
prompt = f"Red Hot Chili Peppers-inspired vibe with {bass}{guitar}{drum}{synth}, atmospheric funky interlude, melodic bass solos with intricate runs, sparse wah-wah guitar textures with ambient synth layers and subdued percussion, {rhythm} at {bpm} BPM, similar to Incubus or early Faith No More."
|
| 176 |
+
logger.debug(f"Generated RHCP prompt for {chunk_type}: {prompt}")
|
| 177 |
+
return prompt
|
| 178 |
+
|
| 179 |
+
def set_nirvana_grunge_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, chunk_type="verse"):
|
| 180 |
+
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("intense rhythmic steps" if bpm > 120 else "grungy rhythmic pulse")
|
| 181 |
+
drum = f", {drum_beat} drums" if drum_beat != "none" else ", heavy grunge drums with crashing cymbals and tom rolls"
|
| 182 |
+
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 183 |
+
bass = f", {bass_style}" if bass_style != "none" else ", melodic basslines with gritty distorted tone"
|
| 184 |
+
guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else ", raw distorted guitar riffs with feedback and slides"
|
| 185 |
+
if chunk_type == "verse":
|
| 186 |
+
prompt = f"Nirvana-inspired vibe with {bass}{guitar}{drum}{synth}, quiet grunge texture, brooding angst-filled energy with minimalistic riff patterns, subtle bass grooves, {rhythm} at {bpm} BPM, similar to Mudhoney or early Soundgarden."
|
| 187 |
+
elif chunk_type == "chorus":
|
| 188 |
+
prompt = f"Nirvana-inspired vibe with {bass}{guitar}{drum}{synth}, loud explosive grunge dynamics, chaotic distorted guitar walls with aggressive strums, pounding drum grooves with heavy cymbal crashes, {rhythm} at {bpm} BPM, similar to Mudhoney or early Soundgarden."
|
| 189 |
+
else: # bridge
|
| 190 |
+
prompt = f"Nirvana-inspired vibe with {bass}{guitar}{drum}{synth}, moody grunge atmosphere, haunting bass-driven melody with dissonant tones, sparse feedback-heavy guitar drones with minimal percussion, {rhythm} at {bpm} BPM, similar to Mudhoney or early Soundgarden."
|
| 191 |
+
logger.debug(f"Generated Nirvana prompt for {chunk_type}: {prompt}")
|
| 192 |
+
return prompt
|
| 193 |
+
|
| 194 |
+
def set_pearl_jam_grunge_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, chunk_type="verse"):
|
| 195 |
+
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("soulful rhythmic steps" if bpm > 120 else "driving rhythmic flow")
|
| 196 |
+
drum = f", {drum_beat} drums" if drum_beat != "none" else ", driving grunge drums with tom-heavy fills and deep kicks"
|
| 197 |
+
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 198 |
+
bass = f", {bass_style}" if bass_style != "none" else ", deep resonant basslines with melodic runs"
|
| 199 |
+
guitar = f", {guitar_style} guitar leads" if guitar_style != "none" else ", soulful guitar leads with reverb and bends"
|
| 200 |
+
if chunk_type == "verse":
|
| 201 |
+
prompt = f"Pearl Jam-inspired vibe with {bass}{guitar}{drum}{synth}, emotional grunge storytelling, steady soulful grooves with warm bass tones, lyrical guitar phrases with subtle dynamics, {rhythm} at {bpm} BPM, similar to Alice in Chains."
|
| 202 |
+
elif chunk_type == "chorus":
|
| 203 |
+
prompt = f"Pearl Jam-inspired vibe with {bass}{guitar}{drum}{synth}, soaring grunge choruses, powerful guitar solos with emotive bends and slides, intense rhythmic drive with dynamic tom fills, {rhythm} at {bpm} BPM, similar to Alice in Chains."
|
| 204 |
+
else: # bridge
|
| 205 |
+
prompt = f"Pearl Jam-inspired vibe with {bass}{guitar}{drum}{synth}, introspective grunge atmosphere, melodic bass and reverb-heavy guitar interplay with atmospheric textures and soft percussion, {rhythm} at {bpm} BPM, similar to Alice in Chains."
|
| 206 |
+
logger.debug(f"Generated Pearl Jam prompt for {chunk_type}: {prompt}")
|
| 207 |
+
return prompt
|
| 208 |
+
|
| 209 |
+
def set_soundgarden_grunge_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, chunk_type="verse"):
|
| 210 |
+
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("heavy rhythmic steps" if bpm > 120 else "sludgy rhythmic groove")
|
| 211 |
+
drum = f", {drum_beat} drums" if drum_beat != "none" else ", heavy sludgy drums with syncopated patterns and deep toms"
|
| 212 |
+
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 213 |
+
bass = f", {bass_style}" if bass_style != "none" else ", heavy distorted basslines with growling tone"
|
| 214 |
+
guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else ", heavy sludgy guitar riffs with drop tunings and slides"
|
| 215 |
+
if chunk_type == "verse":
|
| 216 |
+
prompt = f"Soundgarden-inspired vibe with {bass}{guitar}{drum}{synth}, dark grunge texture, brooding psychedelic undertones with sludgy bass-heavy rhythms, minimal guitar accents, {rhythm} at {bpm} BPM, similar to Temple of the Dog."
|
| 217 |
+
elif chunk_type == "chorus":
|
| 218 |
+
prompt = f"Soundgarden-inspired vibe with {bass}{guitar}{drum}{synth}, powerful grunge riffs, soaring vocal-like guitar melodies with heavy distortion, dynamic drum grooves with intense fills, {rhythm} at {bpm} BPM, similar to Temple of the Dog."
|
| 219 |
+
else: # bridge
|
| 220 |
+
prompt = f"Soundgarden-inspired vibe with {bass}{guitar}{drum}{synth}, psychedelic grunge atmosphere, hypnotic bass-driven grooves with droning guitar textures and sparse percussion, {rhythm} at {bpm} BPM, similar to Temple of the Dog."
|
| 221 |
+
logger.debug(f"Generated Soundgarden prompt for {chunk_type}: {prompt}")
|
| 222 |
+
return prompt
|
| 223 |
+
|
| 224 |
+
def set_foo_fighters_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, chunk_type="verse"):
|
| 225 |
+
styles = ["anthemic", "gritty", "melodic", "fast-paced", "driving"]
|
| 226 |
+
moods = ["energetic", "introspective", "rebellious", "uplifting"]
|
| 227 |
+
style = random.choice(styles)
|
| 228 |
+
mood = random.choice(moods)
|
| 229 |
+
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("powerful rhythmic steps" if bpm > 120 else "catchy rhythmic groove")
|
| 230 |
+
drum = f", {drum_beat} drums" if drum_beat != "none" else ", driving rock drums with punchy fills and tight snares"
|
| 231 |
+
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 232 |
+
bass = f", {bass_style}" if bass_style != "none" else ", driving basslines with melodic hooks and syncopation"
|
| 233 |
+
guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else f", {style} guitar riffs with crunchy distortion and palm muting"
|
| 234 |
+
if chunk_type == "verse":
|
| 235 |
+
prompt = f"Foo Fighters-inspired vibe with {bass}{guitar}{drum}{synth}, {mood} alt-rock energy, catchy verse riffs with rhythmic bass grooves, steady driving pulse with subtle dynamics, {rhythm} at {bpm} BPM, similar to Queens of the Stone Age."
|
| 236 |
+
elif chunk_type == "chorus":
|
| 237 |
+
prompt = f"Foo Fighters-inspired vibe with {bass}{guitar}{drum}{synth}, {mood} alt-rock power, anthemic choruses with explosive guitar hooks and soaring melodies, punchy drum fills with crashing cymbals, {rhythm} at {bpm} BPM, similar to Queens of the Stone Age."
|
| 238 |
+
else: # bridge
|
| 239 |
+
prompt = f"Foo Fighters-inspired vibe with {bass}{guitar}{drum}{synth}, {mood} alt-rock atmosphere, melodic guitar and bass interplay with atmospheric textures and subdued percussion, {rhythm} at {bpm} BPM, similar to Queens of the Stone Age."
|
| 240 |
+
logger.debug(f"Generated Foo Fighters prompt for {chunk_type}: {prompt}")
|
| 241 |
+
return prompt
|
| 242 |
+
|
| 243 |
+
def set_smashing_pumpkins_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, chunk_type="verse"):
|
| 244 |
+
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("dynamic rhythmic steps" if bpm > 120 else "dreamy rhythmic flow")
|
| 245 |
+
drum = f", {drum_beat} drums" if drum_beat != "none" else ", layered alt-rock drums with syncopated patterns and reverb"
|
| 246 |
+
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 247 |
+
bass = f", {bass_style}" if bass_style != "none" else ", melodic basslines with warm resonant tone"
|
| 248 |
+
guitar = f", {guitar_style} guitar textures" if guitar_style != "none" else ", dreamy layered guitar textures with chorus effects"
|
| 249 |
+
if chunk_type == "verse":
|
| 250 |
+
prompt = f"Smashing Pumpkins-inspired vibe with {bass}{guitar}{drum}{synth}, melancholic alt-rock texture, intricate layered dynamics with delicate guitar arpeggios and steady bass grooves, {rhythm} at {bpm} BPM, similar to My Bloody Valentine."
|
| 251 |
+
elif chunk_type == "chorus":
|
| 252 |
+
prompt = f"Smashing Pumpkins-inspired vibe with {bass}{guitar}{drum}{synth}, aggressive alt-rock energy, soaring distorted guitar walls with pounding drum grooves and intense basslines, {rhythm} at {bpm} BPM, similar to My Bloody Valentine."
|
| 253 |
+
else: # bridge
|
| 254 |
+
prompt = f"Smashing Pumpkins-inspired vibe with {bass}{guitar}{drum}{synth}, dreamy alt-rock atmosphere, ethereal guitar and bass interplay with ambient textures and soft percussion, {rhythm} at {bpm} BPM, similar to My Bloody Valentine."
|
| 255 |
+
logger.debug(f"Generated Smashing Pumpkins prompt for {chunk_type}: {prompt}")
|
| 256 |
+
return prompt
|
| 257 |
+
|
| 258 |
+
def set_radiohead_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, chunk_type="verse"):
|
| 259 |
+
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("complex rhythmic steps" if bpm > 120 else "intricate rhythmic pulse")
|
| 260 |
+
drum = f", {drum_beat} drums" if drum_beat != "none" else ", intricate experimental drums with glitchy offbeat patterns"
|
| 261 |
+
synth = f", {synthesizer} accents" if synthesizer != "none" else ", atmospheric synths with evolving textures and drones"
|
| 262 |
+
bass = f", {bass_style}" if bass_style != "none" else ", subtle pulsing basslines with ambient resonance"
|
| 263 |
+
guitar = f", {guitar_style} guitar layers" if guitar_style != "none" else ", intricate ambient guitar layers with delay effects"
|
| 264 |
+
if chunk_type == "verse":
|
| 265 |
+
prompt = f"Radiohead-inspired vibe with {bass}{guitar}{drum}{synth}, introspective experimental rock, delicate layered textures with glitchy rhythms and minimal bass pulses, {rhythm} at {bpm} BPM, similar to Sigur RΓ³s."
|
| 266 |
+
elif chunk_type == "chorus":
|
| 267 |
+
prompt = f"Radiohead-inspired vibe with {bass}{guitar}{drum}{synth}, climactic experimental rock, intense layered dynamics with soaring synth melodies and chaotic guitar lines, {rhythm} at {bpm} BPM, similar to Sigur RΓ³s."
|
| 268 |
+
else: # bridge
|
| 269 |
+
prompt = f"Radiohead-inspired vibe with {bass}{guitar}{drum}{synth}, atmospheric experimental rock, ambient synth and guitar drones with minimalist bass pulses and sparse percussion, {rhythm} at {bpm} BPM, similar to Sigur RΓ³s."
|
| 270 |
+
logger.debug(f"Generated Radiohead prompt for {chunk_type}: {prompt}")
|
| 271 |
+
return prompt
|
| 272 |
+
|
| 273 |
+
def set_classic_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, chunk_type="verse"):
|
| 274 |
+
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("bluesy rhythmic steps" if bpm > 120 else "steady rhythmic groove")
|
| 275 |
+
drum = f", {drum_beat} drums" if drum_beat != "none" else ", classic rock drums with rolling fills and deep kicks"
|
| 276 |
+
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 277 |
+
bass = f", {bass_style}" if bass_style != "none" else ", groovy walking bass with melodic runs"
|
| 278 |
+
guitar = f", {guitar_style} electric guitars" if guitar_style != "none" else ", bluesy electric guitars with vibrato and bends"
|
| 279 |
+
if chunk_type == "verse":
|
| 280 |
+
prompt = f"Led Zeppelin-inspired vibe with {bass}{guitar}{drum}{synth}, raw classic rock energy, bluesy rhythmic grooves with steady bass walks and minimal guitar riffs, {rhythm} at {bpm} BPM, similar to The Who."
|
| 281 |
+
elif chunk_type == "chorus":
|
| 282 |
+
prompt = f"Led Zeppelin-inspired vibe with {bass}{guitar}{drum}{synth}, explosive classic rock riffs, fiery guitar solos with soaring melodies and driving drum grooves, {rhythm} at {bpm} BPM, similar to The Who."
|
| 283 |
+
else: # bridge
|
| 284 |
+
prompt = f"Led Zeppelin-inspired vibe with {bass}{guitar}{drum}{synth}, atmospheric classic rock texture, melodic bass and slide guitar interplay with ambient reverb and sparse percussion, {rhythm} at {bpm} BPM, similar to The Who."
|
| 285 |
+
logger.debug(f"Generated Classic Rock prompt for {chunk_type}: {prompt}")
|
| 286 |
+
return prompt
|
| 287 |
+
|
| 288 |
+
def set_alternative_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, chunk_type="verse"):
|
| 289 |
+
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("quirky rhythmic steps" if bpm > 120 else "energetic rhythmic flow")
|
| 290 |
+
drum = f", {drum_beat} drums" if drum_beat != "none" else ", quirky alt-rock drums with offbeat fills and tight snares"
|
| 291 |
+
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 292 |
+
bass = f", {bass_style}" if bass_style != "none" else ", melodic basslines with syncopated grooves"
|
| 293 |
+
guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else ", distorted guitar riffs with angular phrasing and reverb"
|
| 294 |
+
if chunk_type == "verse":
|
| 295 |
+
prompt = f"Pixies-inspired vibe with {bass}{guitar}{drum}{synth}, quirky alt-rock energy, playful dynamic shifts with rhythmic bass grooves and minimal guitar accents, {rhythm} at {bpm} BPM, similar to Sonic Youth."
|
| 296 |
+
elif chunk_type == "chorus":
|
| 297 |
+
prompt = f"Pixies-inspired vibe with {bass}{guitar}{drum}{synth}, explosive alt-rock intensity, jagged guitar hooks with soaring melodies and driving drum patterns, {rhythm} at {bpm} BPM, similar to Sonic Youth."
|
| 298 |
+
else: # bridge
|
| 299 |
+
prompt = f"Pixies-inspired vibe with {bass}{guitar}{drum}{synth}, atmospheric alt-rock texture, dissonant guitar and bass interplay with ambient reverb and sparse percussion, {rhythm} at {bpm} BPM, similar to Sonic Youth."
|
| 300 |
+
logger.debug(f"Generated Alternative Rock prompt for {chunk_type}: {prompt}")
|
| 301 |
+
return prompt
|
| 302 |
+
|
| 303 |
+
def set_post_punk_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, chunk_type="verse"):
|
| 304 |
+
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("sharp rhythmic steps" if bpm > 120 else "moody rhythmic pulse")
|
| 305 |
+
drum = f", {drum_beat} drums" if drum_beat != "none" else ", hypnotic post-punk drums with reverb and sharp hi-hats"
|
| 306 |
+
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 307 |
+
bass = f", {bass_style}" if bass_style != "none" else ", driving basslines with dark resonant tone"
|
| 308 |
+
guitar = f", {guitar_style} guitars" if guitar_style != "none" else ", jangly guitars with chorus effects and sharp picking"
|
| 309 |
+
if chunk_type == "verse":
|
| 310 |
+
prompt = f"Joy Division-inspired vibe with {bass}{guitar}{drum}{synth}, moody post-punk atmosphere, hypnotic minimalist grooves with steady bass pulse and subtle guitar accents, {rhythm} at {bpm} BPM, similar to The Cure."
|
| 311 |
+
elif chunk_type == "chorus":
|
| 312 |
+
prompt = f"Joy Division-inspired vibe with {bass}{guitar}{drum}{synth}, intense post-punk energy, driving bass and jangly guitar hooks with soaring dynamics and reverb-heavy drums, {rhythm} at {bpm} BPM, similar to The Cure."
|
| 313 |
+
else: # bridge
|
| 314 |
+
prompt = f"Joy Division-inspired vibe with {bass}{guitar}{drum}{synth}, atmospheric post-punk texture, reverb-heavy bass and guitar drones with sparse percussion and ambient tones, {rhythm} at {bpm} BPM, similar to The Cure."
|
| 315 |
+
logger.debug(f"Generated Post-Punk prompt for {chunk_type}: {prompt}")
|
| 316 |
+
return prompt
|
| 317 |
+
|
| 318 |
+
def set_indie_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, chunk_type="verse"):
|
| 319 |
+
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("catchy rhythmic steps" if bpm > 120 else "jangly rhythmic flow")
|
| 320 |
+
drum = f", {drum_beat} drums" if drum_beat != "none" else ", catchy indie drums with tight snares and offbeat fills"
|
| 321 |
+
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 322 |
+
bass = f", {bass_style}" if bass_style != "none" else ", melodic basslines with upbeat syncopated pulse"
|
| 323 |
+
guitar = f", {guitar_style} guitars" if guitar_style != "none" else ", jangly guitars with bright chords and reverb"
|
| 324 |
+
if chunk_type == "verse":
|
| 325 |
+
prompt = f"Arctic Monkeys-inspired vibe with {bass}{guitar}{drum}{synth}, catchy indie rock grooves, sharp rhythmic riffs with steady bass grooves and minimal guitar accents, {rhythm} at {bpm} BPM, similar to The Strokes."
|
| 326 |
+
elif chunk_type == "chorus":
|
| 327 |
+
prompt = f"Arctic Monkeys-inspired vibe with {bass}{guitar}{drum}{synth}, high-energy indie rock hooks, punchy guitar and bass interplay with soaring melodic riffs, driving drum patterns, {rhythm} at {bpm} BPM, similar to The Strokes."
|
| 328 |
+
else: # bridge
|
| 329 |
+
prompt = f"Arctic Monkeys-inspired vibe with {bass}{guitar}{drum}{synth}, atmospheric indie rock texture, melodic jangly guitar leads with ambient reverb and sparse percussion, {rhythm} at {bpm} BPM, similar to The Strokes."
|
| 330 |
+
logger.debug(f"Generated Indie Rock prompt for {chunk_type}: {prompt}")
|
| 331 |
+
return prompt
|
| 332 |
+
|
| 333 |
+
def set_funk_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, chunk_type="verse"):
|
| 334 |
+
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("aggressive rhythmic steps" if bpm > 120 else "funky rhythmic groove")
|
| 335 |
+
drum = f", {drum_beat} drums" if drum_beat != "none" else ", tight funk-rock drums with heavy kicks and syncopated fills"
|
| 336 |
+
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 337 |
+
bass = f", {bass_style}" if bass_style != "none" else ", slap bass with aggressive pops and rhythmic flourishes"
|
| 338 |
+
guitar = f", {guitar_style} guitar chords" if guitar_style != "none" else ", funky guitar chords with wah-wah and staccato strums"
|
| 339 |
+
if chunk_type == "verse":
|
| 340 |
+
prompt = f"Rage Against the Machine-inspired vibe with {bass}{guitar}{drum}{synth}, aggressive funk-rock grooves, heavy syncopated bass rhythms with gritty guitar accents, {rhythm} at {bpm} BPM, similar to Audioslave."
|
| 341 |
+
elif chunk_type == "chorus":
|
| 342 |
+
prompt = f"Rage Against the Machine-inspired vibe with {bass}{guitar}{drum}{synth}, explosive funk-rock energy, powerful riff-driven hooks with intense drum grooves and dynamic basslines, {rhythm} at {bpm} BPM, similar to Audioslave."
|
| 343 |
+
else: # bridge
|
| 344 |
+
prompt = f"Rage Against the Machine-inspired vibe with {bass}{guitar}{drum}{synth}, atmospheric funk-rock interlude, bass-heavy grooves with sparse wah-wah guitar effects and subtle percussion, {rhythm} at {bpm} BPM, similar to Audioslave."
|
| 345 |
+
logger.debug(f"Generated Funk Rock prompt for {chunk_type}: {prompt}")
|
| 346 |
+
return prompt
|
| 347 |
+
|
| 348 |
+
def set_detroit_techno_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, chunk_type="verse"):
|
| 349 |
+
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("pulsing rhythmic steps" if bpm > 120 else "deep rhythmic groove")
|
| 350 |
+
drum = f", {drum_beat} drums" if drum_beat != "none" else ", crisp hi-hats and steady four-on-the-floor kick drum with subtle variations"
|
| 351 |
+
synth = f", {synthesizer} accents" if synthesizer != "none" else ", deep pulsing synths with repetitive arpeggiated patterns"
|
| 352 |
+
bass = f", {bass_style}" if bass_style != "none" else ", driving basslines with groovy resonant pulse"
|
| 353 |
+
guitar = f", {guitar_style} guitars" if guitar_style != "none" else ""
|
| 354 |
+
if chunk_type == "verse":
|
| 355 |
+
prompt = f"Juan Atkins-inspired vibe with {bass}{guitar}{drum}{synth}, hypnotic Detroit techno grooves, minimalistic pulsing rhythms with steady bass pulse and subtle synth textures, {rhythm} at {bpm} BPM, similar to Derrick May."
|
| 356 |
+
elif chunk_type == "chorus":
|
| 357 |
+
prompt = f"Juan Atkins-inspired vibe with {bass}{guitar}{drum}{synth}, intense Detroit techno energy, layered synth arpeggios with dynamic basslines and vibrant synth leads, {rhythm} at {bpm} BPM, similar to Derrick May."
|
| 358 |
+
else: # bridge
|
| 359 |
+
prompt = f"Juan Atkins-inspired vibe with {bass}{guitar}{drum}{synth}, atmospheric Detroit techno texture, deep resonant bass and ambient synth drones with minimal percussion, {rhythm} at {bpm} BPM, similar to Derrick May."
|
| 360 |
+
logger.debug(f"Generated Detroit Techno prompt for {chunk_type}: {prompt}")
|
| 361 |
+
return prompt
|
| 362 |
+
|
| 363 |
+
def set_deep_house_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, chunk_type="verse"):
|
| 364 |
+
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("soulful rhythmic steps" if bpm > 120 else "laid-back rhythmic flow")
|
| 365 |
+
drum = f", {drum_beat} drums" if drum_beat != "none" else ", steady four-on-the-floor kick drum with soft hi-hats and subtle shakers"
|
| 366 |
+
synth = f", {synthesizer} accents" if synthesizer != "none" else ", warm analog synth chords with hypnotic progression and ambient pads"
|
| 367 |
+
bass = f", {bass_style}" if bass_style != "none" else ", deep basslines with groovy pulsing rhythm"
|
| 368 |
+
guitar = f", {guitar_style} guitars" if guitar_style != "none" else ""
|
| 369 |
+
if chunk_type == "verse":
|
| 370 |
+
prompt = f"Larry Heard-inspired vibe with {bass}{guitar}{drum}{synth}, laid-back deep house grooves, soulful chord progressions with steady bass pulse and minimal synth accents, {rhythm} at {bpm} BPM, similar to Frankie Knuckles."
|
| 371 |
+
elif chunk_type == "chorus":
|
| 372 |
+
prompt = f"Larry Heard-inspired vibe with {bass}{guitar}{drum}{synth}, uplifting deep house energy, rich layered synths with dynamic basslines and vibrant chord progressions, {rhythm} at {bpm} BPM, similar to Frankie Knuckles."
|
| 373 |
+
else: # bridge
|
| 374 |
+
prompt = f"Larry Heard-inspired vibe with {bass}{guitar}{drum}{synth}, atmospheric deep house texture, warm resonant bass and ambient synth pads with minimal percussion and subtle hi-hat patterns, {rhythm} at {bpm} BPM, similar to Frankie Knuckles."
|
| 375 |
+
logger.debug(f"Generated Deep House prompt for {chunk_type}: {prompt}")
|
| 376 |
+
return prompt
|
| 377 |
+
|
| 378 |
+
# Preset configurations for genres
|
| 379 |
+
PRESETS = {
|
| 380 |
+
"default": {"cfg_scale": 2.0, "top_k": 150, "top_p": 0.9, "temperature": 0.8},
|
| 381 |
+
"rock": {"cfg_scale": 2.5, "top_k": 140, "top_p": 0.9, "temperature": 0.9},
|
| 382 |
+
"techno": {"cfg_scale": 1.8, "top_k": 160, "top_p": 0.85, "temperature": 0.7},
|
| 383 |
+
"grunge": {"cfg_scale": 2.0, "top_k": 150, "top_p": 0.9, "temperature": 0.85},
|
| 384 |
+
"indie": {"cfg_scale": 2.2, "top_k": 145, "top_p": 0.9, "temperature": 0.8}
|
| 385 |
+
}
|
| 386 |
+
|
| 387 |
+
# Function to get the latest log file
|
| 388 |
+
def get_latest_log():
|
| 389 |
+
log_files = sorted(Path(log_dir).glob("musicgen_log_*.log"), key=os.path.getmtime, reverse=True)
|
| 390 |
+
if not log_files:
|
| 391 |
+
logger.warning("No log files found")
|
| 392 |
+
return "No log files found."
|
| 393 |
+
try:
|
| 394 |
+
with open(log_files[0], "r") as f:
|
| 395 |
+
content = f.read()
|
| 396 |
+
logger.info(f"Retrieved latest log file: {log_files[0]}")
|
| 397 |
+
return content
|
| 398 |
+
except Exception as e:
|
| 399 |
+
logger.error(f"Failed to read log file {log_files[0]}: {e}")
|
| 400 |
+
return f"Error reading log file: {e}"
|
| 401 |
+
|
| 402 |
+
# Generate a single track segment
|
| 403 |
+
def generate_segment(prompt, cfg_scale, top_k, top_p, temperature, duration, seed, target_volume, preset):
|
| 404 |
+
global musicgen_model
|
| 405 |
+
try:
|
| 406 |
+
logger.info(f"Generating segment ({duration}s, seed={seed})")
|
| 407 |
+
if preset != "default":
|
| 408 |
+
preset_params = PRESETS.get(preset, PRESETS["default"])
|
| 409 |
+
cfg_scale = preset_params["cfg_scale"]
|
| 410 |
+
top_k = preset_params["top_k"]
|
| 411 |
+
top_p = preset_params["top_p"]
|
| 412 |
+
temperature = preset_params["temperature"]
|
| 413 |
+
logger.info(f"Applied preset {preset}: cfg_scale={cfg_scale}, top_k={top_k}, top_p={top_p}, temperature={temperature}")
|
| 414 |
+
|
| 415 |
+
# Calculate expected steps (~50 steps/second at 32000 Hz)
|
| 416 |
+
expected_steps = int(duration * 50)
|
| 417 |
+
logger.debug(f"Setting duration={duration}s, expected_steps={expected_steps}")
|
| 418 |
+
|
| 419 |
+
musicgen_model.set_generation_params(
|
| 420 |
+
duration=duration,
|
| 421 |
+
use_sampling=True,
|
| 422 |
+
top_k=top_k,
|
| 423 |
+
top_p=top_p,
|
| 424 |
+
temperature=temperature,
|
| 425 |
+
cfg_coef=cfg_scale
|
| 426 |
+
)
|
| 427 |
+
|
| 428 |
+
with torch.no_grad():
|
| 429 |
+
with autocast(dtype=torch.float16):
|
| 430 |
+
torch.manual_seed(seed)
|
| 431 |
+
np.random.seed(seed)
|
| 432 |
+
torch.cuda.manual_seed_all(seed)
|
| 433 |
+
logger.debug("Generating track")
|
| 434 |
+
audio_segment = musicgen_model.generate([prompt], progress=True)[0].to(device)
|
| 435 |
+
logger.debug(f"Generated audio segment shape: {audio_segment.shape}")
|
| 436 |
+
|
| 437 |
+
# Verify duration
|
| 438 |
+
sample_rate = 32000
|
| 439 |
+
expected_samples = duration * sample_rate
|
| 440 |
+
actual_samples = audio_segment.shape[-1]
|
| 441 |
+
logger.debug(f"Expected samples: {expected_samples}, Actual samples: {actual_samples}")
|
| 442 |
+
if abs(actual_samples - expected_samples) > sample_rate: # Allow 1s tolerance
|
| 443 |
+
logger.warning(f"Generated audio duration mismatch: expected {duration}s, got {actual_samples/sample_rate:.2f}s")
|
| 444 |
+
|
| 445 |
+
audio_segment = audio_segment.to(dtype=torch.float32)
|
| 446 |
+
if audio_segment.dim() == 1:
|
| 447 |
+
logger.debug("Converting mono to stereo")
|
| 448 |
+
audio_segment = torch.stack([audio_segment, audio_segment], dim=0)
|
| 449 |
+
elif audio_segment.dim() == 2 and audio_segment.shape[0] != 2:
|
| 450 |
+
logger.debug("Adjusting to stereo")
|
| 451 |
+
audio_segment = torch.cat([audio_segment, audio_segment], dim=0)
|
| 452 |
+
|
| 453 |
+
if audio_segment.shape[0] != 2:
|
| 454 |
+
logger.error(f"Expected stereo audio with shape (2, samples), got shape {audio_segment.shape}")
|
| 455 |
+
raise ValueError(f"Expected stereo audio with shape (2, samples), got shape {audio_segment.shape}")
|
| 456 |
+
|
| 457 |
+
temp_wav_path = f"temp_audio_{int(time.time()*1000)}.wav"
|
| 458 |
+
logger.debug(f"Saving audio segment to {temp_wav_path}")
|
| 459 |
+
torchaudio.save(temp_wav_path, audio_segment.cpu(), sample_rate, bits_per_sample=16)
|
| 460 |
+
final_segment = AudioSegment.from_wav(temp_wav_path)
|
| 461 |
+
os.remove(temp_wav_path)
|
| 462 |
+
logger.debug(f"Deleted temporary file {temp_wav_path}")
|
| 463 |
+
final_segment = final_segment - 15
|
| 464 |
+
if final_segment.frame_rate != sample_rate:
|
| 465 |
+
logger.debug(f"Setting segment sample rate to {sample_rate}")
|
| 466 |
+
final_segment = final_segment.set_frame_rate(sample_rate)
|
| 467 |
+
final_segment = balance_stereo(final_segment, noise_threshold=-60, sample_rate=sample_rate)
|
| 468 |
+
final_segment = rms_normalize(final_segment, target_rms_db=target_volume, peak_limit_db=-3.0, sample_rate=sample_rate)
|
| 469 |
+
final_segment = apply_eq(final_segment, sample_rate=sample_rate)
|
| 470 |
+
clear_gpu_memory()
|
| 471 |
+
return final_segment
|
| 472 |
+
except Exception as e:
|
| 473 |
+
logger.error(f"Segment generation failed: {e}")
|
| 474 |
+
logger.error(traceback.format_exc())
|
| 475 |
+
clear_gpu_memory()
|
| 476 |
+
raise e
|
| 477 |
+
|
| 478 |
+
# Generate a full 180-second song with chunking
|
| 479 |
+
def generate_full_song(prompt_func, cfg_scale, top_k, top_p, bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, target_volume, preset, seed, vram_status):
|
| 480 |
+
try:
|
| 481 |
+
logger.info("Starting full song generation (180s, 3 chunks)...")
|
| 482 |
+
start_time = time.time()
|
| 483 |
+
chunks = []
|
| 484 |
+
chunk_types = ["verse", "chorus", "bridge"]
|
| 485 |
+
sample_rate = 32000
|
| 486 |
+
chunk_params = [
|
| 487 |
+
{"temperature": 1.0, "top_p": 0.975, "seed_offset": 0}, # Verse: moderate randomness
|
| 488 |
+
{"temperature": 1.05, "top_p": 1.0, "seed_offset": 50}, # Chorus: balanced
|
| 489 |
+
{"temperature": 1.1, "top_p": 1.025, "seed_offset": 100} # Bridge: most varied
|
| 490 |
+
]
|
| 491 |
+
|
| 492 |
+
if not check_disk_space():
|
| 493 |
+
logger.error("Insufficient disk space")
|
| 494 |
+
return None, "β οΈ Insufficient disk space. Free up at least 1 GB.", vram_status
|
| 495 |
+
|
| 496 |
+
vram_status = f"Initial VRAM: {torch.cuda.memory_allocated() / 1024**2:.2f} MB"
|
| 497 |
+
|
| 498 |
+
for i, (chunk_type, params) in enumerate(zip(chunk_types, chunk_params)):
|
| 499 |
+
logger.info(f"Generating chunk {i+1}/3 ({chunk_type}, 60s, seed={seed + params['seed_offset']}, VRAM: {torch.cuda.memory_allocated() / 1024**2:.2f} MB)")
|
| 500 |
+
chunk_start = time.time()
|
| 501 |
+
for attempt in range(3): # Retry up to 3 times
|
| 502 |
+
try:
|
| 503 |
+
prompt = prompt_func(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, chunk_type)
|
| 504 |
+
chunk = generate_segment(
|
| 505 |
+
prompt, cfg_scale, top_k, params["top_p"], params["temperature"], 60,
|
| 506 |
+
seed + params["seed_offset"], target_volume, preset
|
| 507 |
+
)
|
| 508 |
+
# Adjust fades for smooth transitions
|
| 509 |
+
if i == 0:
|
| 510 |
+
chunk = chunk.fade_in(500).fade_out(2000) # Verse: fade in, longer fade out
|
| 511 |
+
elif i == 1:
|
| 512 |
+
chunk = chunk.fade_in(2000).fade_out(2000) # Chorus: crossfade both
|
| 513 |
+
else:
|
| 514 |
+
chunk = chunk.fade_in(2000).fade_out(500) # Bridge: longer fade in, fade out
|
| 515 |
+
chunks.append(chunk)
|
| 516 |
+
logger.info(f"Chunk {i+1} generated in {time.time() - chunk_start:.2f} seconds")
|
| 517 |
+
break
|
| 518 |
+
except Exception as e:
|
| 519 |
+
logger.warning(f"Chunk {i+1} attempt {attempt+1} failed: {e}")
|
| 520 |
+
clear_gpu_memory()
|
| 521 |
+
if attempt == 2:
|
| 522 |
+
raise e
|
| 523 |
+
clear_gpu_memory()
|
| 524 |
+
|
| 525 |
+
# Combine chunks with crossfades
|
| 526 |
+
logger.debug("Combining chunks with crossfades")
|
| 527 |
+
combined = chunks[0]
|
| 528 |
+
for i in range(1, len(chunks)):
|
| 529 |
+
combined = combined.append(chunks[i], crossfade=2000) # 2-second crossfade
|
| 530 |
+
|
| 531 |
+
mp3_path = f"output_full_song_{int(time.time())}.mp3"
|
| 532 |
+
logger.info("β οΈ WARNING: Audio is set to safe levels (~ -23 dBFS RMS, -3 dBFS peak). Start playback at LOW volume (10-20%) and adjust gradually.")
|
| 533 |
+
logger.info("VERIFY: Open the file in Audacity to check for static. RMS should be ~ -23 dBFS, peaks β€ -3 dBFS. Report any static or issues.")
|
| 534 |
+
try:
|
| 535 |
+
logger.debug(f"Exporting full song to {mp3_path}")
|
| 536 |
+
combined.export(
|
| 537 |
+
mp3_path,
|
| 538 |
+
format="mp3",
|
| 539 |
+
bitrate="96k",
|
| 540 |
+
tags={"title": "GhostAI Full Song", "artist": "GhostAI"}
|
| 541 |
+
)
|
| 542 |
+
logger.info(f"Full song saved to {mp3_path}")
|
| 543 |
+
except Exception as e:
|
| 544 |
+
logger.error(f"Error exporting MP3: {e}")
|
| 545 |
+
fallback_path = f"fallback_full_song_{int(time.time())}.mp3"
|
| 546 |
+
try:
|
| 547 |
+
combined.export(fallback_path, format="mp3", bitrate="96k")
|
| 548 |
+
logger.info(f"Full song saved to fallback: {fallback_path}")
|
| 549 |
+
mp3_path = fallback_path
|
| 550 |
+
except Exception as fallback_e:
|
| 551 |
+
logger.error(f"Failed to save fallback MP3: {fallback_e}")
|
| 552 |
+
raise e
|
| 553 |
+
|
| 554 |
+
vram_status = f"Final VRAM: {torch.cuda.memory_allocated() / 1024**2:.2f} MB"
|
| 555 |
+
logger.info(f"Full song generation completed in {time.time() - start_time:.2f} seconds")
|
| 556 |
+
return mp3_path, "β
Done! Generated static-free full song (180s) with adjusted volume levels.", vram_status
|
| 557 |
+
except Exception as e:
|
| 558 |
+
logger.error(f"Full song generation failed: {e}")
|
| 559 |
+
logger.error(traceback.format_exc())
|
| 560 |
+
return None, f"β Full song generation failed: {e}", vram_status
|
| 561 |
+
|
| 562 |
+
# Generate music (single track or full song)
|
| 563 |
+
def generate_music(instrumental_prompt, cfg_scale, top_k, top_p, temperature, duration, bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, target_volume, preset, vram_status):
|
| 564 |
+
global musicgen_model
|
| 565 |
+
if not instrumental_prompt.strip():
|
| 566 |
+
logger.warning("Empty instrumental prompt provided")
|
| 567 |
+
return None, "β οΈ Please enter a valid instrumental prompt!", vram_status
|
| 568 |
+
|
| 569 |
+
try:
|
| 570 |
+
logger.info("Starting music generation...")
|
| 571 |
+
start_time = time.time()
|
| 572 |
+
sample_rate = 32000
|
| 573 |
+
|
| 574 |
+
if not check_disk_space():
|
| 575 |
+
logger.error("Insufficient disk space")
|
| 576 |
+
return None, "β οΈ Insufficient disk space. Free up at least 1 GB.", vram_status
|
| 577 |
+
|
| 578 |
+
seed = random.randint(1, 100)
|
| 579 |
+
vram_status = f"Initial VRAM: {torch.cuda.memory_allocated() / 1024**2:.2f} MB"
|
| 580 |
+
|
| 581 |
+
if duration == "Full Song (180 seconds)":
|
| 582 |
+
# Map prompt to corresponding function for chunking
|
| 583 |
+
prompt_func_map = {
|
| 584 |
+
"rhcp": set_red_hot_chili_peppers_prompt,
|
| 585 |
+
"nirvana": set_nirvana_grunge_prompt,
|
| 586 |
+
"pearl_jam": set_pearl_jam_grunge_prompt,
|
| 587 |
+
"soundgarden": set_soundgarden_grunge_prompt,
|
| 588 |
+
"foo_fighters": set_foo_fighters_prompt,
|
| 589 |
+
"smashing_pumpkins": set_smashing_pumpkins_prompt,
|
| 590 |
+
"radiohead": set_radiohead_prompt,
|
| 591 |
+
"classic_rock": set_classic_rock_prompt,
|
| 592 |
+
"alternative_rock": set_alternative_rock_prompt,
|
| 593 |
+
"post_punk": set_post_punk_prompt,
|
| 594 |
+
"indie_rock": set_indie_rock_prompt,
|
| 595 |
+
"funk_rock": set_funk_rock_prompt,
|
| 596 |
+
"detroit_techno": set_detroit_techno_prompt,
|
| 597 |
+
"deep_house": set_deep_house_prompt
|
| 598 |
+
}
|
| 599 |
+
prompt_key = None
|
| 600 |
+
for key, func in prompt_func_map.items():
|
| 601 |
+
if func(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, "verse") == instrumental_prompt:
|
| 602 |
+
prompt_key = key
|
| 603 |
+
break
|
| 604 |
+
if not prompt_key:
|
| 605 |
+
logger.error("Prompt does not match any genre function")
|
| 606 |
+
return None, "β οΈ Prompt does not match any supported genre!", vram_status
|
| 607 |
+
return generate_full_song(
|
| 608 |
+
prompt_func_map[prompt_key], cfg_scale, top_k, top_p, bpm, drum_beat,
|
| 609 |
+
synthesizer, rhythmic_steps, bass_style, guitar_style, target_volume, preset, seed, vram_status
|
| 610 |
+
)
|
| 611 |
+
|
| 612 |
+
# Single track generation
|
| 613 |
+
logger.info(f"Generating single track ({duration}, seed={seed})")
|
| 614 |
+
duration_secs = 30 if duration == "30 seconds" else 60
|
| 615 |
+
final_segment = generate_segment(
|
| 616 |
+
instrumental_prompt, cfg_scale, top_k, top_p, temperature, duration_secs, seed, target_volume, preset
|
| 617 |
+
)
|
| 618 |
+
final_segment = apply_fade(final_segment)
|
| 619 |
+
|
| 620 |
+
mp3_path = f"output_adjusted_volume_{int(time.time())}.mp3"
|
| 621 |
+
logger.info("β οΈ WARNING: Audio is set to safe levels (~ -23 dBFS RMS, -3 dBFS peak). Start playback at LOW volume (10-20%) and adjust gradually.")
|
| 622 |
+
logger.info("VERIFY: Open the file in Audacity to check for static. RMS should be ~ -23 dBFS, peaks β€ -3 dBFS. Report any static or issues.")
|
| 623 |
+
try:
|
| 624 |
+
logger.debug(f"Exporting single track to {mp3_path}")
|
| 625 |
+
final_segment.export(
|
| 626 |
+
mp3_path,
|
| 627 |
+
format="mp3",
|
| 628 |
+
bitrate="96k",
|
| 629 |
+
tags={"title": "GhostAI Instrumental", "artist": "GhostAI"}
|
| 630 |
+
)
|
| 631 |
+
logger.info(f"Single track saved to {mp3_path}")
|
| 632 |
+
except Exception as e:
|
| 633 |
+
logger.error(f"Error exporting MP3: {e}")
|
| 634 |
+
fallback_path = f"fallback_output_{int(time.time())}.mp3"
|
| 635 |
+
try:
|
| 636 |
+
final_segment.export(fallback_path, format="mp3", bitrate="96k")
|
| 637 |
+
logger.info(f"Single track saved to fallback: {fallback_path}")
|
| 638 |
+
mp3_path = fallback_path
|
| 639 |
+
except Exception as fallback_e:
|
| 640 |
+
logger.error(f"Failed to save fallback MP3: {fallback_e}")
|
| 641 |
+
raise e
|
| 642 |
+
|
| 643 |
+
vram_status = f"Final VRAM: {torch.cuda.memory_allocated() / 1024**2:.2f} MB"
|
| 644 |
+
logger.info(f"Single track generation completed in {time.time() - start_time:.2f} seconds")
|
| 645 |
+
return mp3_path, "β
Done! Generated static-free track with adjusted volume levels.", vram_status
|
| 646 |
+
except Exception as e:
|
| 647 |
+
logger.error(f"Generation failed: {e}")
|
| 648 |
+
logger.error(traceback.format_exc())
|
| 649 |
+
return None, f"β Generation failed: {e}", vram_status
|
| 650 |
+
|
| 651 |
+
# Clear inputs function
|
| 652 |
+
def clear_inputs():
|
| 653 |
+
logger.info("Clearing input fields")
|
| 654 |
+
return "", 2.0, 150, 0.9, 0.8, "30 seconds", 120, "none", "none", "none", "none", "none", -23.0, "default", ""
|
| 655 |
+
|
| 656 |
+
# Custom CSS
|
| 657 |
css = """
|
| 658 |
+
body {
|
| 659 |
+
background: linear-gradient(135deg, #0A0A0A 0%, #1C2526 100%);
|
| 660 |
+
color: #E0E0E0;
|
| 661 |
+
font-family: 'Orbitron', sans-serif;
|
| 662 |
+
}
|
| 663 |
+
.header-container {
|
| 664 |
+
text-align: center;
|
| 665 |
+
padding: 10px 20px;
|
| 666 |
+
background: rgba(0, 0, 0, 0.9);
|
| 667 |
+
border-bottom: 1px solid #00FF9F;
|
| 668 |
+
}
|
| 669 |
+
#ghost-logo {
|
| 670 |
+
font-size: 40px;
|
| 671 |
+
animation: glitch-ghost 1.5s infinite;
|
| 672 |
+
}
|
| 673 |
+
h1 {
|
| 674 |
+
color: #A100FF;
|
| 675 |
+
font-size: 24px;
|
| 676 |
+
animation: glitch-text 2s infinite;
|
| 677 |
+
}
|
| 678 |
+
p {
|
| 679 |
+
color: #E0E0E0;
|
| 680 |
+
font-size: 12px;
|
| 681 |
+
}
|
| 682 |
+
.input-container, .settings-container, .output-container, .logs-container {
|
| 683 |
+
max-width: 1200px;
|
| 684 |
+
margin: 20px auto;
|
| 685 |
+
padding: 20px;
|
| 686 |
+
background: rgba(28, 37, 38, 0.8);
|
| 687 |
+
border-radius: 10px;
|
| 688 |
+
}
|
| 689 |
+
.textbox {
|
| 690 |
+
background: #1A1A1A;
|
| 691 |
+
border: 1px solid #A100FF;
|
| 692 |
+
color: #E0E0E0;
|
| 693 |
+
}
|
| 694 |
+
.genre-buttons {
|
| 695 |
+
display: flex;
|
| 696 |
+
justify-content: center;
|
| 697 |
+
flex-wrap: wrap;
|
| 698 |
+
gap: 15px;
|
| 699 |
+
}
|
| 700 |
+
.genre-btn, button {
|
| 701 |
+
background: linear-gradient(45deg, #A100FF, #00FF9F);
|
| 702 |
+
border: none;
|
| 703 |
+
color: #0A0A0A;
|
| 704 |
+
padding: 10px 20px;
|
| 705 |
+
border-radius: 5px;
|
| 706 |
+
}
|
| 707 |
+
.gradio-container {
|
| 708 |
+
padding: 20px;
|
| 709 |
+
}
|
| 710 |
+
.group-container {
|
| 711 |
+
margin-bottom: 20px;
|
| 712 |
+
padding: 15px;
|
| 713 |
+
border: 1px solid #00FF9F;
|
| 714 |
+
border-radius: 8px;
|
| 715 |
+
}
|
| 716 |
+
@keyframes glitch-ghost {
|
| 717 |
+
0% { transform: translate(0, 0); opacity: 1; }
|
| 718 |
+
20% { transform: translate(-5px, 2px); opacity: 0.8; }
|
| 719 |
+
100% { transform: translate(0, 0); opacity: 1; }
|
| 720 |
+
}
|
| 721 |
+
@keyframes glitch-text {
|
| 722 |
+
0% { transform: translate(0, 0); }
|
| 723 |
+
20% { transform: translate(-2px, 1px); }
|
| 724 |
+
100% { transform: translate(0, 0); }
|
| 725 |
+
}
|
| 726 |
+
@font-face {
|
| 727 |
+
font-family: 'Orbitron';
|
| 728 |
+
src: url('https://fonts.gstatic.com/s/orbitron/v29/yMJRMIlzdpvBhQQL_Qq7dy0.woff2') format('woff2');
|
| 729 |
+
}
|
| 730 |
"""
|
| 731 |
|
| 732 |
+
# Build Gradio interface
|
| 733 |
+
logger.info("Building Gradio interface...")
|
| 734 |
with gr.Blocks(css=css) as demo:
|
| 735 |
+
gr.Markdown("""
|
| 736 |
+
<div class="header-container">
|
| 737 |
+
<div id="ghost-logo">π»</div>
|
| 738 |
+
<h1>GhostAI Music Generator πΉ</h1>
|
| 739 |
+
<p>Summon the Sound of the Unknown</p>
|
| 740 |
+
</div>
|
| 741 |
+
""")
|
| 742 |
+
|
| 743 |
+
with gr.Column(elem_classes="input-container"):
|
| 744 |
+
gr.Markdown("### πΈ Prompt Settings")
|
| 745 |
+
instrumental_prompt = gr.Textbox(
|
| 746 |
+
label="Instrumental Prompt βοΈ",
|
| 747 |
+
placeholder="Click a genre button or type your own instrumental prompt",
|
| 748 |
+
lines=4,
|
| 749 |
+
elem_classes="textbox"
|
| 750 |
+
)
|
| 751 |
+
with gr.Row(elem_classes="genre-buttons"):
|
| 752 |
+
rhcp_btn = gr.Button("Red Hot Chili Peppers πΆοΈ", elem_classes="genre-btn")
|
| 753 |
+
nirvana_btn = gr.Button("Nirvana Grunge πΈ", elem_classes="genre-btn")
|
| 754 |
+
pearl_jam_btn = gr.Button("Pearl Jam Grunge π¦ͺ", elem_classes="genre-btn")
|
| 755 |
+
soundgarden_btn = gr.Button("Soundgarden Grunge π", elem_classes="genre-btn")
|
| 756 |
+
foo_fighters_btn = gr.Button("Foo Fighters π€", elem_classes="genre-btn")
|
| 757 |
+
smashing_pumpkins_btn = gr.Button("Smashing Pumpkins π", elem_classes="genre-btn")
|
| 758 |
+
radiohead_btn = gr.Button("Radiohead π§ ", elem_classes="genre-btn")
|
| 759 |
+
classic_rock_btn = gr.Button("Classic Rock πΈ", elem_classes="genre-btn")
|
| 760 |
+
alternative_rock_btn = gr.Button("Alternative Rock π΅", elem_classes="genre-btn")
|
| 761 |
+
post_punk_btn = gr.Button("Post-Punk π€", elem_classes="genre-btn")
|
| 762 |
+
indie_rock_btn = gr.Button("Indie Rock π€", elem_classes="genre-btn")
|
| 763 |
+
funk_rock_btn = gr.Button("Funk Rock πΊ", elem_classes="genre-btn")
|
| 764 |
+
detroit_techno_btn = gr.Button("Detroit Techno ποΈ", elem_classes="genre-btn")
|
| 765 |
+
deep_house_btn = gr.Button("Deep House π ", elem_classes="genre-btn")
|
| 766 |
+
|
| 767 |
+
with gr.Column(elem_classes="settings-container"):
|
| 768 |
+
gr.Markdown("### βοΈ API Settings")
|
| 769 |
+
with gr.Group(elem_classes="group-container"):
|
| 770 |
+
cfg_scale = gr.Slider(
|
| 771 |
+
label="CFG Scale π―",
|
| 772 |
+
minimum=1.0,
|
| 773 |
+
maximum=10.0,
|
| 774 |
+
value=2.0,
|
| 775 |
+
step=0.1,
|
| 776 |
+
info="Controls how closely the music follows the prompt."
|
| 777 |
+
)
|
| 778 |
+
top_k = gr.Slider(
|
| 779 |
+
label="Top-K Sampling π’",
|
| 780 |
+
minimum=10,
|
| 781 |
+
maximum=500,
|
| 782 |
+
value=150,
|
| 783 |
+
step=10,
|
| 784 |
+
info="Limits sampling to the top k most likely tokens."
|
| 785 |
+
)
|
| 786 |
+
top_p = gr.Slider(
|
| 787 |
+
label="Top-P Sampling π°",
|
| 788 |
+
minimum=0.0,
|
| 789 |
+
maximum=1.0,
|
| 790 |
+
value=0.9,
|
| 791 |
+
step=0.05,
|
| 792 |
+
info="Keeps tokens with cumulative probability above p."
|
| 793 |
+
)
|
| 794 |
+
temperature = gr.Slider(
|
| 795 |
+
label="Temperature π₯",
|
| 796 |
+
minimum=0.1,
|
| 797 |
+
maximum=2.0,
|
| 798 |
+
value=0.8,
|
| 799 |
+
step=0.1,
|
| 800 |
+
info="Controls randomness; lower values reduce noise."
|
| 801 |
+
)
|
| 802 |
+
duration = gr.Radio(
|
| 803 |
+
label="Song Length β³",
|
| 804 |
+
choices=["30 seconds", "60 seconds", "Full Song (180 seconds)"],
|
| 805 |
+
value="30 seconds",
|
| 806 |
+
info="Select the duration of the track. Full Song generates a 180s track with three 60s chunks (verse, chorus, bridge)."
|
| 807 |
+
)
|
| 808 |
+
bpm = gr.Slider(
|
| 809 |
+
label="Tempo π΅ (BPM)",
|
| 810 |
+
minimum=60,
|
| 811 |
+
maximum=180,
|
| 812 |
+
value=120,
|
| 813 |
+
step=1,
|
| 814 |
+
info="Beats per minute to set the track's tempo."
|
| 815 |
+
)
|
| 816 |
+
drum_beat = gr.Dropdown(
|
| 817 |
+
label="Drum Beat π₯",
|
| 818 |
+
choices=["none", "standard rock", "funk groove", "techno kick", "jazz swing"],
|
| 819 |
+
value="none",
|
| 820 |
+
info="Select a drum beat style to influence the rhythm."
|
| 821 |
+
)
|
| 822 |
+
synthesizer = gr.Dropdown(
|
| 823 |
+
label="Synthesizer πΉ",
|
| 824 |
+
choices=["none", "analog synth", "digital pad", "arpeggiated synth"],
|
| 825 |
+
value="none",
|
| 826 |
+
info="Select a synthesizer style for electronic accents."
|
| 827 |
+
)
|
| 828 |
+
rhythmic_steps = gr.Dropdown(
|
| 829 |
+
label="Rhythmic Steps π£",
|
| 830 |
+
choices=["none", "syncopated steps", "steady steps", "complex steps"],
|
| 831 |
+
value="none",
|
| 832 |
+
info="Select a rhythmic step style to enhance the beat."
|
| 833 |
+
)
|
| 834 |
+
bass_style = gr.Dropdown(
|
| 835 |
+
label="Bass Style πΈ",
|
| 836 |
+
choices=["none", "slap bass", "deep bass", "melodic bass"],
|
| 837 |
+
value="none",
|
| 838 |
+
info="Select a bass style to shape the low end."
|
| 839 |
+
)
|
| 840 |
+
guitar_style = gr.Dropdown(
|
| 841 |
+
label="Guitar Style πΈ",
|
| 842 |
+
choices=["none", "distorted", "clean", "jangle"],
|
| 843 |
+
value="none",
|
| 844 |
+
info="Select a guitar style to define the riffs."
|
| 845 |
+
)
|
| 846 |
+
target_volume = gr.Slider(
|
| 847 |
+
label="Target Volume ποΈ (dBFS RMS)",
|
| 848 |
+
minimum=-30.0,
|
| 849 |
+
maximum=-20.0,
|
| 850 |
+
value=-23.0,
|
| 851 |
+
step=1.0,
|
| 852 |
+
info="Adjust output loudness (-23 dBFS is standard, -20 dBFS is louder, -30 dBFS is quieter)."
|
| 853 |
+
)
|
| 854 |
+
preset = gr.Dropdown(
|
| 855 |
+
label="Preset Configuration ποΈ",
|
| 856 |
+
choices=["default", "rock", "techno", "grunge", "indie"],
|
| 857 |
+
value="default",
|
| 858 |
+
info="Select a preset optimized for specific genres."
|
| 859 |
+
)
|
| 860 |
+
|
| 861 |
+
with gr.Row(elem_classes="action-buttons"):
|
| 862 |
+
gen_btn = gr.Button("Generate Music π")
|
| 863 |
+
clr_btn = gr.Button("Clear Inputs π§Ή")
|
| 864 |
+
|
| 865 |
+
with gr.Column(elem_classes="output-container"):
|
| 866 |
+
gr.Markdown("### π§ Output")
|
| 867 |
+
out_audio = gr.Audio(label="Generated Instrumental Track π΅", type="filepath")
|
| 868 |
+
status = gr.Textbox(label="Status π’", interactive=False)
|
| 869 |
+
vram_status = gr.Textbox(label="VRAM Usage π", interactive=False, value="")
|
| 870 |
+
|
| 871 |
+
with gr.Column(elem_classes="logs-container"):
|
| 872 |
+
gr.Markdown("### π Logs")
|
| 873 |
+
log_output = gr.Textbox(label="Last Log File Contents", lines=20, interactive=False)
|
| 874 |
+
log_btn = gr.Button("View Last Log π")
|
| 875 |
+
|
| 876 |
+
rhcp_btn.click(set_red_hot_chili_peppers_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, gr.State("verse")], outputs=instrumental_prompt)
|
| 877 |
+
nirvana_btn.click(set_nirvana_grunge_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, gr.State("verse")], outputs=instrumental_prompt)
|
| 878 |
+
pearl_jam_btn.click(set_pearl_jam_grunge_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, gr.State("verse")], outputs=instrumental_prompt)
|
| 879 |
+
soundgarden_btn.click(set_soundgarden_grunge_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, gr.State("verse")], outputs=instrumental_prompt)
|
| 880 |
+
foo_fighters_btn.click(set_foo_fighters_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, gr.State("verse")], outputs=instrumental_prompt)
|
| 881 |
+
smashing_pumpkins_btn.click(set_smashing_pumpkins_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, gr.State("verse")], outputs=instrumental_prompt)
|
| 882 |
+
radiohead_btn.click(set_radiohead_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, gr.State("verse")], outputs=instrumental_prompt)
|
| 883 |
+
classic_rock_btn.click(set_classic_rock_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, gr.State("verse")], outputs=instrumental_prompt)
|
| 884 |
+
alternative_rock_btn.click(set_alternative_rock_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, gr.State("verse")], outputs=instrumental_prompt)
|
| 885 |
+
post_punk_btn.click(set_post_punk_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, gr.State("verse")], outputs=instrumental_prompt)
|
| 886 |
+
indie_rock_btn.click(set_indie_rock_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, gr.State("verse")], outputs=instrumental_prompt)
|
| 887 |
+
funk_rock_btn.click(set_funk_rock_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, gr.State("verse")], outputs=instrumental_prompt)
|
| 888 |
+
detroit_techno_btn.click(set_detroit_techno_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, gr.State("verse")], outputs=instrumental_prompt)
|
| 889 |
+
deep_house_btn.click(set_deep_house_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, gr.State("verse")], outputs=instrumental_prompt)
|
| 890 |
+
gen_btn.click(
|
| 891 |
+
generate_music,
|
| 892 |
+
inputs=[instrumental_prompt, cfg_scale, top_k, top_p, temperature, duration, bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, target_volume, preset, vram_status],
|
| 893 |
+
outputs=[out_audio, status, vram_status]
|
| 894 |
+
)
|
| 895 |
+
clr_btn.click(
|
| 896 |
+
clear_inputs,
|
| 897 |
+
inputs=None,
|
| 898 |
+
outputs=[instrumental_prompt, cfg_scale, top_k, top_p, temperature, duration, bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, target_volume, preset, vram_status]
|
| 899 |
+
)
|
| 900 |
+
log_btn.click(
|
| 901 |
+
get_latest_log,
|
| 902 |
+
inputs=None,
|
| 903 |
+
outputs=log_output
|
| 904 |
+
)
|
| 905 |
+
|
| 906 |
+
# Launch locally without OpenAPI/docs
|
| 907 |
+
logger.info("Launching Gradio UI at http://localhost:9999...")
|
| 908 |
+
app = demo.launch(
|
| 909 |
+
server_name="0.0.0.0",
|
| 910 |
+
server_port=9999,
|
| 911 |
+
share=False,
|
| 912 |
+
inbrowser=False,
|
| 913 |
+
show_error=True
|
| 914 |
+
)
|
| 915 |
+
try:
|
| 916 |
+
fastapi_app = demo._server.app
|
| 917 |
+
fastapi_app.docs_url = None
|
| 918 |
+
fastapi_app.redoc_url = None
|
| 919 |
+
fastapi_app.openapi_url = None
|
| 920 |
+
except Exception as e:
|
| 921 |
+
logger.error(f"Failed to configure FastAPI app: {e}")
|