Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
801ea60
1
Parent(s):
f086c75
All the tools and gradio server
Browse files- .gitignore +82 -0
- README.md +1 -1
- mcp_server.py +426 -0
- mypy.ini +2 -0
- requirements.txt +14 -0
- tools/__init__.py +0 -0
- tools/combine_tracks.py +387 -0
- tools/pitch_alignment.py +228 -0
- tools/stems_separation.py +358 -0
- tools/time_strech.py +145 -0
- tools/youtube_extract.py +167 -0
.gitignore
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Byte-compiled / optimized / DLL files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
|
| 6 |
+
# C extensions
|
| 7 |
+
*.so
|
| 8 |
+
|
| 9 |
+
# Distribution / packaging
|
| 10 |
+
.Python
|
| 11 |
+
build/
|
| 12 |
+
develop-eggs/
|
| 13 |
+
dist/
|
| 14 |
+
downloads/
|
| 15 |
+
eggs/
|
| 16 |
+
.eggs/
|
| 17 |
+
lib/
|
| 18 |
+
lib64/
|
| 19 |
+
parts/
|
| 20 |
+
sdist/
|
| 21 |
+
var/
|
| 22 |
+
*.egg-info/
|
| 23 |
+
.installed.cfg
|
| 24 |
+
*.egg
|
| 25 |
+
|
| 26 |
+
# PyInstaller
|
| 27 |
+
# Usually these files are written by a python script from a template
|
| 28 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 29 |
+
*.manifest
|
| 30 |
+
*.spec
|
| 31 |
+
|
| 32 |
+
# Installer logs
|
| 33 |
+
pip-log.txt
|
| 34 |
+
pip-delete-this-directory.txt
|
| 35 |
+
|
| 36 |
+
# Unit test / coverage reports
|
| 37 |
+
htmlcov/
|
| 38 |
+
.tox/
|
| 39 |
+
.nox/
|
| 40 |
+
.coverage
|
| 41 |
+
.coverage.*
|
| 42 |
+
.cache
|
| 43 |
+
nosetests.xml
|
| 44 |
+
coverage.xml
|
| 45 |
+
*.cover
|
| 46 |
+
.hypothesis/
|
| 47 |
+
.pytest_cache/
|
| 48 |
+
|
| 49 |
+
# Jupyter Notebook
|
| 50 |
+
.ipynb_checkpoints
|
| 51 |
+
|
| 52 |
+
# pyenv
|
| 53 |
+
.python-version
|
| 54 |
+
|
| 55 |
+
# mypy
|
| 56 |
+
.mypy_cache/
|
| 57 |
+
.dmypy.json
|
| 58 |
+
dmypy.json
|
| 59 |
+
|
| 60 |
+
# Pyre type checker
|
| 61 |
+
.pyre/
|
| 62 |
+
|
| 63 |
+
# VS Code
|
| 64 |
+
.vscode/
|
| 65 |
+
|
| 66 |
+
# Local env
|
| 67 |
+
.env
|
| 68 |
+
.venv
|
| 69 |
+
env/
|
| 70 |
+
venv/
|
| 71 |
+
ENV/
|
| 72 |
+
env.bak/
|
| 73 |
+
venv.bak/
|
| 74 |
+
|
| 75 |
+
# MacOS
|
| 76 |
+
.DS_Store
|
| 77 |
+
|
| 78 |
+
# IDEs
|
| 79 |
+
.idea/
|
| 80 |
+
*.iml
|
| 81 |
+
*.sublime-workspace
|
| 82 |
+
*.sublime-project
|
README.md
CHANGED
|
@@ -5,7 +5,7 @@ colorFrom: indigo
|
|
| 5 |
colorTo: pink
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 5.49.1
|
| 8 |
-
app_file:
|
| 9 |
pinned: false
|
| 10 |
---
|
| 11 |
|
|
|
|
| 5 |
colorTo: pink
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 5.49.1
|
| 8 |
+
app_file: mcp_server.py
|
| 9 |
pinned: false
|
| 10 |
---
|
| 11 |
|
mcp_server.py
ADDED
|
@@ -0,0 +1,426 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from typing import Dict, Tuple
|
| 3 |
+
|
| 4 |
+
from tools.combine_tracks import combine_tracks, create_medley
|
| 5 |
+
from tools.stems_separation import (
|
| 6 |
+
separate_audio,
|
| 7 |
+
extract_selected_stems,
|
| 8 |
+
extract_vocal_non_vocal,
|
| 9 |
+
create_karaoke_track,
|
| 10 |
+
)
|
| 11 |
+
from tools.time_strech import align_songs_by_bpm, stretch_to_bpm
|
| 12 |
+
from tools.youtube_extract import extract_audio_from_youtube
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def pitch_shift_with_semitones(audio_path: str, semitones: int) -> str:
|
| 16 |
+
"""
|
| 17 |
+
Shift the pitch of an audio file by a specified number of semitones.
|
| 18 |
+
|
| 19 |
+
This function uses librosa's pitch shifting algorithm to change the musical pitch
|
| 20 |
+
of an audio file while maintaining its tempo and duration.
|
| 21 |
+
|
| 22 |
+
Args:
|
| 23 |
+
audio_path: Path to the input audio file (supports common formats: WAV, MP3, FLAC)
|
| 24 |
+
semitones: Number of semitones to shift (positive = higher pitch, negative = lower pitch)
|
| 25 |
+
Range: -12 to +12 semitones (1 octave up/down)
|
| 26 |
+
|
| 27 |
+
Returns:
|
| 28 |
+
Path to the pitch-shifted audio file in WAV format
|
| 29 |
+
|
| 30 |
+
Examples:
|
| 31 |
+
- semitones=2: Shift up by 2 semitones (1 whole tone)
|
| 32 |
+
- semitones=-5: Shift down by 5 semitones (1 perfect fourth)
|
| 33 |
+
- semitones=0: No change (returns original file)
|
| 34 |
+
|
| 35 |
+
Note:
|
| 36 |
+
The function creates a temporary WAV file that should be cleaned up by the caller
|
| 37 |
+
"""
|
| 38 |
+
if semitones == 0:
|
| 39 |
+
return audio_path
|
| 40 |
+
|
| 41 |
+
# Load audio to get sample rate
|
| 42 |
+
import librosa
|
| 43 |
+
|
| 44 |
+
y, sr = librosa.load(audio_path, sr=None, mono=False)
|
| 45 |
+
|
| 46 |
+
# Apply pitch shift
|
| 47 |
+
y_shifted = librosa.effects.pitch_shift(y, n_steps=semitones, sr=sr)
|
| 48 |
+
|
| 49 |
+
# Save to temporary file
|
| 50 |
+
import tempfile
|
| 51 |
+
import soundfile as sf
|
| 52 |
+
|
| 53 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
|
| 54 |
+
if y_shifted.ndim == 2:
|
| 55 |
+
y_shifted = y_shifted.T
|
| 56 |
+
sf.write(tmp.name, y_shifted, sr, format="wav", subtype="PCM_16")
|
| 57 |
+
return tmp.name
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def stretch_audio_to_bpm_wrapper(audio_path: str, target_bpm: float) -> str:
|
| 61 |
+
"""
|
| 62 |
+
Stretch or compress audio to match a specific BPM (beats per minute) while maintaining pitch.
|
| 63 |
+
|
| 64 |
+
This function uses time-stretching algorithms to change the tempo of an audio file
|
| 65 |
+
without affecting its musical pitch, making it useful for beat-matching and tempo alignment.
|
| 66 |
+
|
| 67 |
+
Args:
|
| 68 |
+
audio_path: Path to the input audio file (supports common formats: WAV, MP3, FLAC)
|
| 69 |
+
target_bpm: Target beats per minute (BPM) value
|
| 70 |
+
Typical range: 60-200 BPM
|
| 71 |
+
Common values: 90 (slow), 120 (medium), 140 (fast), 128 (electronic)
|
| 72 |
+
|
| 73 |
+
Returns:
|
| 74 |
+
Path to the time-stretched audio file in WAV format
|
| 75 |
+
|
| 76 |
+
Examples:
|
| 77 |
+
- target_bpm=128: Stretch to typical electronic dance music tempo
|
| 78 |
+
- target_bpm=120: Stretch to standard pop/rock tempo
|
| 79 |
+
- target_bpm=140: Stretch to fast electronic or rock tempo
|
| 80 |
+
|
| 81 |
+
Note:
|
| 82 |
+
The function automatically detects the original BPM and calculates the stretch factor
|
| 83 |
+
Creates a new WAV file with the modified tempo
|
| 84 |
+
"""
|
| 85 |
+
return stretch_to_bpm(audio_path, target_bpm)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def extract_selected_stems_wrapper(
|
| 89 |
+
audio_path: str, vocals: bool, drums: bool, bass: bool, other: bool
|
| 90 |
+
) -> Dict[str, str]:
|
| 91 |
+
"""
|
| 92 |
+
Extract selected stems from an audio file based on user choices.
|
| 93 |
+
|
| 94 |
+
This function allows selective extraction of specific stems rather than all four stems,
|
| 95 |
+
which can save processing time and storage space when only certain elements are needed.
|
| 96 |
+
|
| 97 |
+
Args:
|
| 98 |
+
audio_path: Path to the input audio file (supports common formats: WAV, MP3, FLAC, M4A)
|
| 99 |
+
vocals: Whether to extract the vocals stem
|
| 100 |
+
drums: Whether to extract the drums stem
|
| 101 |
+
bass: Whether to extract the bass stem
|
| 102 |
+
other: Whether to extract the other stem
|
| 103 |
+
|
| 104 |
+
Returns:
|
| 105 |
+
dict[str, str]: Dictionary mapping stem names to their file paths
|
| 106 |
+
|
| 107 |
+
Examples:
|
| 108 |
+
- vocals=True, drums=True, bass=False, other=False: Extract only vocals and drums
|
| 109 |
+
- vocals=True, drums=False, bass=False, other=False: Extract only vocals for karaoke
|
| 110 |
+
- vocals=False, drums=True, bass=True, other=False: Extract rhythm section (drums + bass)
|
| 111 |
+
|
| 112 |
+
Note:
|
| 113 |
+
At least one stem must be selected for extraction
|
| 114 |
+
Uses the same high-quality Demucs model as separate_audio
|
| 115 |
+
Processing time is the same as full separation since Demucs extracts all stems internally
|
| 116 |
+
"""
|
| 117 |
+
stems_to_extract = []
|
| 118 |
+
if vocals:
|
| 119 |
+
stems_to_extract.append("vocals")
|
| 120 |
+
if drums:
|
| 121 |
+
stems_to_extract.append("drums")
|
| 122 |
+
if bass:
|
| 123 |
+
stems_to_extract.append("bass")
|
| 124 |
+
if other:
|
| 125 |
+
stems_to_extract.append("other")
|
| 126 |
+
|
| 127 |
+
if not stems_to_extract:
|
| 128 |
+
raise ValueError("At least one stem must be selected for extraction")
|
| 129 |
+
|
| 130 |
+
return extract_selected_stems(audio_path, stems_to_extract)
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
def extract_vocal_non_vocal_wrapper(audio_path: str) -> Tuple[str, str]:
|
| 134 |
+
"""
|
| 135 |
+
Extract vocals and non-vocals (instrumental) stems from an audio file.
|
| 136 |
+
|
| 137 |
+
This function provides a simple interface to separate audio into vocal and
|
| 138 |
+
non-vocal components, which is useful for karaoke creation, vocal isolation,
|
| 139 |
+
or instrumental extraction.
|
| 140 |
+
|
| 141 |
+
Args:
|
| 142 |
+
audio_path: Path to the input audio file (supports common formats: WAV, MP3, FLAC, M4A)
|
| 143 |
+
|
| 144 |
+
Returns:
|
| 145 |
+
tuple[str, str]: Paths to (vocals_file, instrumental_file)
|
| 146 |
+
- vocals_file: Path to the isolated vocal track
|
| 147 |
+
- instrumental_file: Path to the combined instrumental track (drums + bass + other)
|
| 148 |
+
|
| 149 |
+
Examples:
|
| 150 |
+
- extract_vocal_non_vocal_wrapper('song.mp3'): Separate into vocals and instrumental
|
| 151 |
+
- extract_vocal_non_vocal_wrapper('song.wav'): Create vocal and backing track versions
|
| 152 |
+
|
| 153 |
+
Note:
|
| 154 |
+
The instrumental track combines drums, bass, and other stems into a single track
|
| 155 |
+
Uses the same high-quality Demucs model as separate_audio
|
| 156 |
+
Instrumental track is automatically mixed and normalized for consistent volume
|
| 157 |
+
"""
|
| 158 |
+
return extract_vocal_non_vocal(audio_path)
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
def create_karaoke_track_wrapper(audio_path: str) -> str:
|
| 162 |
+
"""
|
| 163 |
+
Create a karaoke (instrumental) track by removing vocals from an audio file.
|
| 164 |
+
|
| 165 |
+
This is a convenience function that extracts the instrumental (non-vocal) portion
|
| 166 |
+
of a song, creating a karaoke-ready backing track.
|
| 167 |
+
|
| 168 |
+
Args:
|
| 169 |
+
audio_path: Path to the input audio file (supports common formats: WAV, MP3, FLAC, M4A)
|
| 170 |
+
|
| 171 |
+
Returns:
|
| 172 |
+
Path to the karaoke (instrumental) audio file
|
| 173 |
+
|
| 174 |
+
Examples:
|
| 175 |
+
- create_karaoke_track_wrapper('song.mp3'): Create karaoke version
|
| 176 |
+
- create_karaoke_track_wrapper('song.wav'): Create instrumental backing track
|
| 177 |
+
|
| 178 |
+
Note:
|
| 179 |
+
Uses the same high-quality Demucs model as separate_audio
|
| 180 |
+
Combines drums, bass, and other stems into a single instrumental track
|
| 181 |
+
Automatically normalized for consistent volume and quality
|
| 182 |
+
Perfect for karaoke applications or backing track creation
|
| 183 |
+
"""
|
| 184 |
+
return create_karaoke_track(audio_path)
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
def create_interface():
|
| 188 |
+
"""Create the Gradio interface with all tools."""
|
| 189 |
+
|
| 190 |
+
# Tab 1: Stem Separation
|
| 191 |
+
stem_interface = gr.Interface(
|
| 192 |
+
fn=separate_audio,
|
| 193 |
+
inputs=gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]),
|
| 194 |
+
outputs=[
|
| 195 |
+
gr.Audio(label="Vocals", type="filepath"),
|
| 196 |
+
gr.Audio(label="Drums", type="filepath"),
|
| 197 |
+
gr.Audio(label="Bass", type="filepath"),
|
| 198 |
+
gr.Audio(label="Other", type="filepath"),
|
| 199 |
+
],
|
| 200 |
+
title="Audio Stem Separation",
|
| 201 |
+
description="Upload an audio file to separate it into vocals, drums, bass, and other stems.",
|
| 202 |
+
examples=None,
|
| 203 |
+
cache_examples=False,
|
| 204 |
+
flagging_mode="never",
|
| 205 |
+
)
|
| 206 |
+
|
| 207 |
+
# Tab 2: Track Combination
|
| 208 |
+
combine_interface = gr.Interface(
|
| 209 |
+
fn=combine_tracks,
|
| 210 |
+
inputs=[
|
| 211 |
+
gr.Audio(type="filepath", label="First Audio Track", sources=["upload"]),
|
| 212 |
+
gr.Audio(type="filepath", label="Second Audio Track", sources=["upload"]),
|
| 213 |
+
gr.Slider(
|
| 214 |
+
minimum=0.0, maximum=1.0, value=0.5, label="Weight for First Track"
|
| 215 |
+
),
|
| 216 |
+
gr.Slider(
|
| 217 |
+
minimum=0.0, maximum=1.0, value=0.5, label="Weight for Second Track"
|
| 218 |
+
),
|
| 219 |
+
gr.Checkbox(value=True, label="Normalize Output"),
|
| 220 |
+
gr.Number(value=0.0, label="Fade In Duration (seconds)"),
|
| 221 |
+
gr.Number(value=0.0, label="Fade Out Duration (seconds)"),
|
| 222 |
+
],
|
| 223 |
+
outputs=gr.Audio(label="Combined Track", type="filepath"),
|
| 224 |
+
title="Combine Audio Tracks",
|
| 225 |
+
description="Combine two audio tracks with adjustable weights and optional fade effects.",
|
| 226 |
+
examples=None,
|
| 227 |
+
cache_examples=False,
|
| 228 |
+
flagging_mode="never",
|
| 229 |
+
)
|
| 230 |
+
|
| 231 |
+
# Tab 3: Pitch Alignment
|
| 232 |
+
pitch_interface = gr.Interface(
|
| 233 |
+
fn=pitch_shift_with_semitones,
|
| 234 |
+
inputs=[
|
| 235 |
+
gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]),
|
| 236 |
+
gr.Number(value=0, label="Semitones to Shift"),
|
| 237 |
+
],
|
| 238 |
+
outputs=gr.Audio(label="Pitch Shifted Audio", type="filepath"),
|
| 239 |
+
title="Pitch Shift Audio",
|
| 240 |
+
description="Shift the pitch of an audio file by specified semitones.",
|
| 241 |
+
examples=None,
|
| 242 |
+
cache_examples=False,
|
| 243 |
+
flagging_mode="never",
|
| 244 |
+
)
|
| 245 |
+
|
| 246 |
+
# Tab 4: Time Stretching
|
| 247 |
+
stretch_interface = gr.Interface(
|
| 248 |
+
fn=stretch_audio_to_bpm_wrapper,
|
| 249 |
+
inputs=[
|
| 250 |
+
gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]),
|
| 251 |
+
gr.Number(value=120, label="Target BPM"),
|
| 252 |
+
],
|
| 253 |
+
outputs=gr.Audio(label="Stretched Audio", type="filepath"),
|
| 254 |
+
title="Stretch Audio to BPM",
|
| 255 |
+
description="Stretch audio to match a specific BPM.",
|
| 256 |
+
examples=None,
|
| 257 |
+
cache_examples=False,
|
| 258 |
+
allow_flagging="never",
|
| 259 |
+
)
|
| 260 |
+
|
| 261 |
+
# Tab 5: BPM Alignment
|
| 262 |
+
bpm_interface = gr.Interface(
|
| 263 |
+
fn=align_songs_by_bpm,
|
| 264 |
+
inputs=[
|
| 265 |
+
gr.Audio(type="filepath", label="First Audio Track", sources=["upload"]),
|
| 266 |
+
gr.Audio(type="filepath", label="Second Audio Track", sources=["upload"]),
|
| 267 |
+
],
|
| 268 |
+
outputs=[
|
| 269 |
+
gr.Audio(label="Aligned First Track", type="filepath"),
|
| 270 |
+
gr.Audio(label="Aligned Second Track", type="filepath"),
|
| 271 |
+
],
|
| 272 |
+
title="Align Songs by BPM",
|
| 273 |
+
description="Align two songs to the same BPM by stretching the faster one to match the slower one.",
|
| 274 |
+
examples=None,
|
| 275 |
+
cache_examples=False,
|
| 276 |
+
flagging_mode="never",
|
| 277 |
+
)
|
| 278 |
+
|
| 279 |
+
# Tab 6: Selective Stem Extraction
|
| 280 |
+
selective_interface = gr.Interface(
|
| 281 |
+
fn=extract_selected_stems_wrapper,
|
| 282 |
+
inputs=[
|
| 283 |
+
gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]),
|
| 284 |
+
gr.Checkbox(value=True, label="Extract Vocals"),
|
| 285 |
+
gr.Checkbox(value=True, label="Extract Drums"),
|
| 286 |
+
gr.Checkbox(value=True, label="Extract Bass"),
|
| 287 |
+
gr.Checkbox(value=True, label="Extract Other"),
|
| 288 |
+
],
|
| 289 |
+
outputs=gr.JSON(label="Extracted Stems"),
|
| 290 |
+
title="Selective Stem Extraction",
|
| 291 |
+
description="Extract only specific stems from an audio file to save processing time and storage.",
|
| 292 |
+
examples=None,
|
| 293 |
+
cache_examples=False,
|
| 294 |
+
flagging_mode="never",
|
| 295 |
+
)
|
| 296 |
+
|
| 297 |
+
# Tab 7: Vocal/Non-Vocal Separation
|
| 298 |
+
vocal_nonvocal_interface = gr.Interface(
|
| 299 |
+
fn=extract_vocal_non_vocal_wrapper,
|
| 300 |
+
inputs=gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]),
|
| 301 |
+
outputs=[
|
| 302 |
+
gr.Audio(label="Vocals Track", type="filepath"),
|
| 303 |
+
gr.Audio(label="Instrumental Track", type="filepath"),
|
| 304 |
+
],
|
| 305 |
+
title="Vocal/Instrumental Separation",
|
| 306 |
+
description="Separate audio into vocal and instrumental components for karaoke or vocal isolation.",
|
| 307 |
+
examples=None,
|
| 308 |
+
cache_examples=False,
|
| 309 |
+
flagging_mode="never",
|
| 310 |
+
)
|
| 311 |
+
|
| 312 |
+
# Tab 8: Karaoke Track Creation
|
| 313 |
+
karaoke_interface = gr.Interface(
|
| 314 |
+
fn=create_karaoke_track_wrapper,
|
| 315 |
+
inputs=gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]),
|
| 316 |
+
outputs=gr.Audio(label="Karaoke Track", type="filepath"),
|
| 317 |
+
title="Create Karaoke Track",
|
| 318 |
+
description="Create a karaoke-ready instrumental track by removing vocals from any song.",
|
| 319 |
+
examples=None,
|
| 320 |
+
cache_examples=False,
|
| 321 |
+
flagging_mode="never",
|
| 322 |
+
)
|
| 323 |
+
|
| 324 |
+
# Tab 9: Medley Creation
|
| 325 |
+
medley_interface = gr.Interface(
|
| 326 |
+
fn=create_medley,
|
| 327 |
+
inputs=[
|
| 328 |
+
gr.Audio(type="filepath", label="Vocals Stem", sources=["upload"]),
|
| 329 |
+
gr.Audio(type="filepath", label="Instrumental Stem", sources=["upload"]),
|
| 330 |
+
gr.Number(
|
| 331 |
+
value=1.2, label="Vocals Gain", minimum=0.1, maximum=3.0, step=0.1
|
| 332 |
+
),
|
| 333 |
+
gr.Number(
|
| 334 |
+
value=0.9, label="Instrumental Gain", minimum=0.1, maximum=3.0, step=0.1
|
| 335 |
+
),
|
| 336 |
+
gr.Textbox(
|
| 337 |
+
value="threshold=-18dB:ratio=3:attack=50:release=200",
|
| 338 |
+
label="Compressor Settings",
|
| 339 |
+
placeholder="threshold=-18dB:ratio=3:attack=50:release=200",
|
| 340 |
+
),
|
| 341 |
+
gr.Dropdown(
|
| 342 |
+
choices=["libmp3lame", "aac", "flac", "pcm_s16le"],
|
| 343 |
+
value="libmp3lame",
|
| 344 |
+
label="Audio Codec",
|
| 345 |
+
),
|
| 346 |
+
gr.Textbox(value="192k", label="Audio Bitrate", placeholder="192k"),
|
| 347 |
+
],
|
| 348 |
+
outputs=gr.Audio(label="Medley Audio", type="filepath"),
|
| 349 |
+
title="Create Vocal/Instrumental Medley",
|
| 350 |
+
description="Mix vocals and instrumental stems into a polished medley with compression and gain control.",
|
| 351 |
+
examples=None,
|
| 352 |
+
cache_examples=False,
|
| 353 |
+
flagging_mode="never",
|
| 354 |
+
)
|
| 355 |
+
|
| 356 |
+
# Tab 10: YouTube Extraction
|
| 357 |
+
youtube_interface = gr.Interface(
|
| 358 |
+
fn=extract_audio_from_youtube,
|
| 359 |
+
inputs=[
|
| 360 |
+
gr.Textbox(
|
| 361 |
+
label="YouTube URL", placeholder="https://www.youtube.com/watch?v=..."
|
| 362 |
+
),
|
| 363 |
+
gr.Dropdown(
|
| 364 |
+
choices=["wav", "mp3", "flac"], value="wav", label="Output Format"
|
| 365 |
+
),
|
| 366 |
+
gr.Dropdown(choices=["best", "worst"], value="best", label="Audio Quality"),
|
| 367 |
+
],
|
| 368 |
+
outputs=gr.Audio(label="Extracted Audio", type="filepath"),
|
| 369 |
+
title="Extract Audio from YouTube",
|
| 370 |
+
description="Extract audio from a YouTube video URL.",
|
| 371 |
+
examples=None,
|
| 372 |
+
cache_examples=False,
|
| 373 |
+
flagging_mode="never",
|
| 374 |
+
)
|
| 375 |
+
|
| 376 |
+
# Tab 7: YouTube Extraction
|
| 377 |
+
youtube_interface = gr.Interface(
|
| 378 |
+
fn=extract_audio_from_youtube,
|
| 379 |
+
inputs=[
|
| 380 |
+
gr.Textbox(
|
| 381 |
+
label="YouTube URL", placeholder="https://www.youtube.com/watch?v=..."
|
| 382 |
+
),
|
| 383 |
+
gr.Dropdown(
|
| 384 |
+
choices=["wav", "mp3", "flac"], value="wav", label="Output Format"
|
| 385 |
+
),
|
| 386 |
+
gr.Dropdown(choices=["best", "worst"], value="best", label="Audio Quality"),
|
| 387 |
+
],
|
| 388 |
+
outputs=gr.Audio(label="Extracted Audio", type="filepath"),
|
| 389 |
+
title="Extract Audio from YouTube",
|
| 390 |
+
description="Extract audio from a YouTube video URL.",
|
| 391 |
+
examples=None,
|
| 392 |
+
cache_examples=False,
|
| 393 |
+
flagging_mode="never",
|
| 394 |
+
)
|
| 395 |
+
|
| 396 |
+
return gr.TabbedInterface(
|
| 397 |
+
[
|
| 398 |
+
stem_interface,
|
| 399 |
+
combine_interface,
|
| 400 |
+
pitch_interface,
|
| 401 |
+
stretch_interface,
|
| 402 |
+
bpm_interface,
|
| 403 |
+
selective_interface,
|
| 404 |
+
vocal_nonvocal_interface,
|
| 405 |
+
karaoke_interface,
|
| 406 |
+
medley_interface,
|
| 407 |
+
youtube_interface,
|
| 408 |
+
],
|
| 409 |
+
[
|
| 410 |
+
"Stem Separation",
|
| 411 |
+
"Track Combination",
|
| 412 |
+
"Pitch Alignment",
|
| 413 |
+
"Time Stretching",
|
| 414 |
+
"BPM Alignment",
|
| 415 |
+
"Selective Stems",
|
| 416 |
+
"Vocal/Instrumental",
|
| 417 |
+
"Karaoke Creation",
|
| 418 |
+
"Medley Creation",
|
| 419 |
+
"YouTube Extraction",
|
| 420 |
+
],
|
| 421 |
+
)
|
| 422 |
+
|
| 423 |
+
|
| 424 |
+
if __name__ == "__main__":
|
| 425 |
+
interface = create_interface()
|
| 426 |
+
interface.launch(server_name="0.0.0.0", server_port=7860, mcp_server=True)
|
mypy.ini
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[mypy-untyped_package.*]
|
| 2 |
+
follow_untyped_imports = True
|
requirements.txt
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio[mcp]>=4.0.0
|
| 2 |
+
librosa>=0.10.0
|
| 3 |
+
numpy>=1.24.0
|
| 4 |
+
torch>=2.0.0
|
| 5 |
+
torchaudio>=2.0.0
|
| 6 |
+
transformers>=4.30.0
|
| 7 |
+
soundfile>=0.12.0
|
| 8 |
+
pydub>=0.25.0
|
| 9 |
+
demucs>=4.0.0
|
| 10 |
+
pytest>=7.0.0
|
| 11 |
+
ruff>=0.1.0
|
| 12 |
+
mypy>=1.0.0
|
| 13 |
+
smolagents[mcp]
|
| 14 |
+
yt_dlp>=2025.11.12
|
tools/__init__.py
ADDED
|
File without changes
|
tools/combine_tracks.py
ADDED
|
@@ -0,0 +1,387 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import subprocess
|
| 3 |
+
import tempfile
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import Optional
|
| 6 |
+
|
| 7 |
+
import librosa
|
| 8 |
+
import numpy as np
|
| 9 |
+
import soundfile as sf
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def combine_tracks(
|
| 13 |
+
track1_path: str,
|
| 14 |
+
track2_path: str,
|
| 15 |
+
weight1: float = 0.5,
|
| 16 |
+
weight2: float = 0.5,
|
| 17 |
+
output_path: Optional[str] = None,
|
| 18 |
+
normalize: bool = True,
|
| 19 |
+
fade_in: float = 0.0,
|
| 20 |
+
fade_out: float = 0.0,
|
| 21 |
+
) -> str:
|
| 22 |
+
"""
|
| 23 |
+
Combine two audio tracks into a new single stereo audio track with adjustable mixing weights.
|
| 24 |
+
|
| 25 |
+
This function mixes two audio files together with customizable balance, normalization,
|
| 26 |
+
and fade effects. Useful for creating mashups, adding background music to vocals,
|
| 27 |
+
or layering multiple audio sources.
|
| 28 |
+
|
| 29 |
+
Args:
|
| 30 |
+
track1_path: Path to first audio file (supports common formats: WAV, MP3, FLAC)
|
| 31 |
+
track2_path: Path to second audio file (supports common formats: WAV, MP3, FLAC)
|
| 32 |
+
weight1: Weight factor for first track (0.0-1.0, default: 0.5)
|
| 33 |
+
1.0 = full volume, 0.5 = half volume, 0.0 = silent
|
| 34 |
+
weight2: Weight factor for second track (0.0-1.0, default: 0.5)
|
| 35 |
+
1.0 = full volume, 0.5 = half volume, 0.0 = silent
|
| 36 |
+
output_path: Optional output file path (default: temporary file)
|
| 37 |
+
normalize: Whether to normalize the final output to prevent clipping (default: True)
|
| 38 |
+
fade_in: Fade in duration in seconds (default: 0.0)
|
| 39 |
+
fade_out: Fade out duration in seconds (default: 0.0)
|
| 40 |
+
|
| 41 |
+
Returns:
|
| 42 |
+
Path to the combined audio file in WAV format
|
| 43 |
+
|
| 44 |
+
Examples:
|
| 45 |
+
- weight1=0.8, weight2=0.2: First track dominates the mix
|
| 46 |
+
- weight1=0.5, weight2=0.5: Equal balance between tracks
|
| 47 |
+
- weight1=1.0, weight2=0.3: First track at full volume, second track quiet
|
| 48 |
+
- fade_in=2.0, fade_out=3.0: Gradual volume increase and decrease
|
| 49 |
+
|
| 50 |
+
Note:
|
| 51 |
+
Both tracks are automatically resampled to match the higher sample rate
|
| 52 |
+
Tracks of different lengths are padded with silence to match the longer one
|
| 53 |
+
Output is saved in WAV format for maximum quality
|
| 54 |
+
"""
|
| 55 |
+
try:
|
| 56 |
+
# Load both audio files
|
| 57 |
+
y1, sr1 = librosa.load(track1_path, mono=False)
|
| 58 |
+
y2, sr2 = librosa.load(track2_path, mono=False)
|
| 59 |
+
|
| 60 |
+
# Ensure both tracks are stereo
|
| 61 |
+
if y1.ndim == 1:
|
| 62 |
+
y1 = np.stack([y1, y1])
|
| 63 |
+
if y2.ndim == 1:
|
| 64 |
+
y2 = np.stack([y2, y2])
|
| 65 |
+
|
| 66 |
+
# Ensure same sample rate
|
| 67 |
+
if sr1 != sr2:
|
| 68 |
+
y2 = librosa.resample(y2, orig_sr=sr2, target_sr=sr1)
|
| 69 |
+
sr2 = sr1
|
| 70 |
+
|
| 71 |
+
# Ensure same length
|
| 72 |
+
max_length = max(y1.shape[1], y2.shape[1])
|
| 73 |
+
if y1.shape[1] < max_length:
|
| 74 |
+
y1 = np.pad(y1, ((0, 0), (0, max_length - y1.shape[1])), mode="constant")
|
| 75 |
+
if y2.shape[1] < max_length:
|
| 76 |
+
y2 = np.pad(y2, ((0, 0), (0, max_length - y2.shape[1])), mode="constant")
|
| 77 |
+
|
| 78 |
+
# Apply weights and combine
|
| 79 |
+
combined = weight1 * y1 + weight2 * y2
|
| 80 |
+
|
| 81 |
+
# Apply fade in/out if specified
|
| 82 |
+
if fade_in > 0:
|
| 83 |
+
fade_samples = int(fade_in * sr1)
|
| 84 |
+
if fade_samples > 0:
|
| 85 |
+
fade_curve = np.linspace(0, 1, fade_samples)
|
| 86 |
+
combined[:, :fade_samples] *= fade_curve
|
| 87 |
+
|
| 88 |
+
if fade_out > 0:
|
| 89 |
+
fade_samples = int(fade_out * sr1)
|
| 90 |
+
if fade_samples > 0:
|
| 91 |
+
fade_curve = np.linspace(1, 0, fade_samples)
|
| 92 |
+
combined[:, -fade_samples:] *= fade_curve
|
| 93 |
+
|
| 94 |
+
# Normalize if requested
|
| 95 |
+
if normalize:
|
| 96 |
+
max_val = np.max(np.abs(combined))
|
| 97 |
+
if max_val > 0:
|
| 98 |
+
combined = combined / max_val * 0.95
|
| 99 |
+
|
| 100 |
+
# Save to file
|
| 101 |
+
if output_path:
|
| 102 |
+
os.makedirs(output_path, exist_ok=True)
|
| 103 |
+
else:
|
| 104 |
+
output_path = tempfile.mkdtemp(suffix="_combined")
|
| 105 |
+
|
| 106 |
+
final_audio_filename = os.path.join(output_path, "stereo_combined.wav")
|
| 107 |
+
sf.write(final_audio_filename, combined.T, sr1, format="wav", subtype="PCM_16")
|
| 108 |
+
|
| 109 |
+
return final_audio_filename
|
| 110 |
+
|
| 111 |
+
except Exception as e:
|
| 112 |
+
raise RuntimeError(f"Error combining tracks: {str(e)}")
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def create_stereo_mix(
|
| 116 |
+
left_track_path: str,
|
| 117 |
+
right_track_path: str,
|
| 118 |
+
output_path: Optional[str] = None,
|
| 119 |
+
normalize: bool = True,
|
| 120 |
+
) -> str:
|
| 121 |
+
"""
|
| 122 |
+
Create a stereo track with one track in left channel and another in right channel.
|
| 123 |
+
|
| 124 |
+
Args:
|
| 125 |
+
left_track_path: Path to audio file for left channel
|
| 126 |
+
right_track_path: Path to audio file for right channel
|
| 127 |
+
output_path: Optional output file path (default: temp file)
|
| 128 |
+
normalize: Whether to normalize the final output
|
| 129 |
+
|
| 130 |
+
Returns:
|
| 131 |
+
Path to the stereo audio file
|
| 132 |
+
"""
|
| 133 |
+
try:
|
| 134 |
+
# Load both audio files
|
| 135 |
+
y_left, sr_left = librosa.load(left_track_path, mono=True)
|
| 136 |
+
y_right, sr_right = librosa.load(right_track_path, mono=True)
|
| 137 |
+
|
| 138 |
+
# Ensure same sample rate
|
| 139 |
+
if sr_left != sr_right:
|
| 140 |
+
y_right = librosa.resample(y_right, orig_sr=sr_right, target_sr=sr_left)
|
| 141 |
+
sr_right = sr_left
|
| 142 |
+
|
| 143 |
+
# Ensure same length
|
| 144 |
+
max_length = max(len(y_left), len(y_right))
|
| 145 |
+
if len(y_left) < max_length:
|
| 146 |
+
y_left = np.pad(y_left, (0, max_length - len(y_left)), mode="constant")
|
| 147 |
+
if len(y_right) < max_length:
|
| 148 |
+
y_right = np.pad(y_right, (0, max_length - len(y_right)), mode="constant")
|
| 149 |
+
|
| 150 |
+
# Create stereo array
|
| 151 |
+
stereo = np.array([y_left, y_right])
|
| 152 |
+
|
| 153 |
+
# Normalize if requested
|
| 154 |
+
if normalize:
|
| 155 |
+
max_val = np.max(np.abs(stereo))
|
| 156 |
+
if max_val > 0:
|
| 157 |
+
stereo = stereo / max_val * 0.95
|
| 158 |
+
|
| 159 |
+
# Save to file
|
| 160 |
+
|
| 161 |
+
if output_path is None:
|
| 162 |
+
output_path = tempfile.mkdtemp(suffix="_combined")
|
| 163 |
+
else:
|
| 164 |
+
os.makedirs(output_path, exist_ok=True)
|
| 165 |
+
|
| 166 |
+
final_audio_filename = os.path.join(output_path, "stereo_mix.wav")
|
| 167 |
+
sf.write(
|
| 168 |
+
final_audio_filename, stereo.T, sr_left, format="wav", subtype="PCM_16"
|
| 169 |
+
)
|
| 170 |
+
|
| 171 |
+
return final_audio_filename
|
| 172 |
+
|
| 173 |
+
except Exception as e:
|
| 174 |
+
raise RuntimeError(f"Error creating stereo mix: {str(e)}")
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
def create_medley(
|
| 178 |
+
vocals_path: str,
|
| 179 |
+
instrumental_path: str,
|
| 180 |
+
*,
|
| 181 |
+
output_path: Optional[str] = None,
|
| 182 |
+
vocals_gain: float = 1.2,
|
| 183 |
+
instrumental_gain: float = 0.9,
|
| 184 |
+
compressor: str = "threshold=-18dB:ratio=3:attack=50:release=200",
|
| 185 |
+
audio_codec: str = "libmp3lame",
|
| 186 |
+
audio_bitrate: str = "192k",
|
| 187 |
+
) -> str:
|
| 188 |
+
"""
|
| 189 |
+
Mixes a vocal stem with an instrumental stem using ffmpeg filters.
|
| 190 |
+
|
| 191 |
+
Parameters
|
| 192 |
+
----------
|
| 193 |
+
vocals_path : str
|
| 194 |
+
Absolute path (or MCP-accessible URI) to the vocals stem.
|
| 195 |
+
instrumental_path : str
|
| 196 |
+
Absolute path (or MCP-accessible URI) to the instrumental/no-vocals stem.
|
| 197 |
+
output_path : str, optional
|
| 198 |
+
Where to write the medley. Defaults to a temp file the MCP tool returns.
|
| 199 |
+
vocals_gain : float
|
| 200 |
+
Linear gain applied to the vocals stem (1.0 = unity).
|
| 201 |
+
instrumental_gain : float
|
| 202 |
+
Linear gain applied to the instrumental stem.
|
| 203 |
+
compressor : str
|
| 204 |
+
ffmpeg acompressor parameters for peak control after mixing.
|
| 205 |
+
audio_codec : str
|
| 206 |
+
Target codec passed to ffmpeg’s -c:a flag.
|
| 207 |
+
audio_bitrate : str
|
| 208 |
+
Bitrate passed to ffmpeg’s -b:a flag.
|
| 209 |
+
|
| 210 |
+
Returns
|
| 211 |
+
-------
|
| 212 |
+
str
|
| 213 |
+
Path to the rendered medley file.
|
| 214 |
+
"""
|
| 215 |
+
vocals = Path(vocals_path).expanduser().resolve()
|
| 216 |
+
instrumental = Path(instrumental_path).expanduser().resolve()
|
| 217 |
+
if not vocals.exists():
|
| 218 |
+
raise FileNotFoundError(f"Vocals stem not found: {vocals}")
|
| 219 |
+
if not instrumental.exists():
|
| 220 |
+
raise FileNotFoundError(f"Instrumental stem not found: {instrumental}")
|
| 221 |
+
|
| 222 |
+
if output_path is None:
|
| 223 |
+
tmp_dir = tempfile.mkdtemp(prefix="mcp-medley-")
|
| 224 |
+
output = Path(tmp_dir) / "unidos_hyper_medley.mp3"
|
| 225 |
+
else:
|
| 226 |
+
output = Path(output_path).expanduser().resolve()
|
| 227 |
+
output.parent.mkdir(parents=True, exist_ok=True)
|
| 228 |
+
|
| 229 |
+
filter_complex = (
|
| 230 |
+
f"[0:a]volume={vocals_gain}[v];"
|
| 231 |
+
f"[1:a]volume={instrumental_gain}[i];"
|
| 232 |
+
f"[v][i]amix=inputs=2:duration=longest:dropout_transition=2,"
|
| 233 |
+
f"acompressor={compressor}"
|
| 234 |
+
)
|
| 235 |
+
|
| 236 |
+
cmd = [
|
| 237 |
+
"ffmpeg",
|
| 238 |
+
"-y",
|
| 239 |
+
"-i",
|
| 240 |
+
str(vocals),
|
| 241 |
+
"-i",
|
| 242 |
+
str(instrumental),
|
| 243 |
+
"-filter_complex",
|
| 244 |
+
filter_complex,
|
| 245 |
+
"-c:a",
|
| 246 |
+
audio_codec,
|
| 247 |
+
"-b:a",
|
| 248 |
+
audio_bitrate,
|
| 249 |
+
str(output),
|
| 250 |
+
]
|
| 251 |
+
|
| 252 |
+
completed = subprocess.run(cmd, capture_output=True, text=True)
|
| 253 |
+
if completed.returncode != 0:
|
| 254 |
+
raise RuntimeError(
|
| 255 |
+
f"ffmpeg failed ({completed.returncode}):\n"
|
| 256 |
+
f"STDOUT:\n{completed.stdout}\nSTDERR:\n{completed.stderr}"
|
| 257 |
+
)
|
| 258 |
+
|
| 259 |
+
return str(output)
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
if __name__ == "__main__":
|
| 263 |
+
import argparse
|
| 264 |
+
|
| 265 |
+
parser = argparse.ArgumentParser(description="Combine audio tracks")
|
| 266 |
+
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
| 267 |
+
|
| 268 |
+
# Combine tracks with weights
|
| 269 |
+
combine_parser = subparsers.add_parser(
|
| 270 |
+
"combine", help="Combine two tracks with weights"
|
| 271 |
+
)
|
| 272 |
+
combine_parser.add_argument("track1", help="Path to first audio file")
|
| 273 |
+
combine_parser.add_argument("track2", help="Path to second audio file")
|
| 274 |
+
combine_parser.add_argument(
|
| 275 |
+
"--weight1", type=float, default=0.5, help="Weight for first track (0.0-1.0)"
|
| 276 |
+
)
|
| 277 |
+
combine_parser.add_argument(
|
| 278 |
+
"--weight2", type=float, default=0.5, help="Weight for second track (0.0-1.0)"
|
| 279 |
+
)
|
| 280 |
+
combine_parser.add_argument(
|
| 281 |
+
"--fade-in", type=float, default=0.0, help="Fade in duration in seconds"
|
| 282 |
+
)
|
| 283 |
+
combine_parser.add_argument(
|
| 284 |
+
"--fade-out", type=float, default=0.0, help="Fade out duration in seconds"
|
| 285 |
+
)
|
| 286 |
+
combine_parser.add_argument(
|
| 287 |
+
"--no-normalize", action="store_true", help="Disable normalization"
|
| 288 |
+
)
|
| 289 |
+
combine_parser.add_argument(
|
| 290 |
+
"--output", type=str, default="output", help="Output file path"
|
| 291 |
+
)
|
| 292 |
+
|
| 293 |
+
# Create stereo mix
|
| 294 |
+
stereo_parser = subparsers.add_parser(
|
| 295 |
+
"stereo", help="Create stereo mix (left/right channels)"
|
| 296 |
+
)
|
| 297 |
+
stereo_parser.add_argument("left", help="Path to left channel audio file")
|
| 298 |
+
stereo_parser.add_argument("right", help="Path to right channel audio file")
|
| 299 |
+
stereo_parser.add_argument(
|
| 300 |
+
"--no-normalize", action="store_true", help="Disable normalization"
|
| 301 |
+
)
|
| 302 |
+
stereo_parser.add_argument(
|
| 303 |
+
"--output", type=str, default="stereo_output", help="Output file path"
|
| 304 |
+
)
|
| 305 |
+
|
| 306 |
+
# Create medley
|
| 307 |
+
medley_parser = subparsers.add_parser(
|
| 308 |
+
"medley", help="Create a vocal/instrumental medley using ffmpeg"
|
| 309 |
+
)
|
| 310 |
+
medley_parser.add_argument("vocals", help="Path to vocals stem audio file")
|
| 311 |
+
medley_parser.add_argument(
|
| 312 |
+
"instrumental", help="Path to instrumental stem audio file"
|
| 313 |
+
)
|
| 314 |
+
medley_parser.add_argument(
|
| 315 |
+
"--vocals-gain",
|
| 316 |
+
type=float,
|
| 317 |
+
default=1.2,
|
| 318 |
+
help="Linear gain for vocals (default: 1.2)",
|
| 319 |
+
)
|
| 320 |
+
medley_parser.add_argument(
|
| 321 |
+
"--instrumental-gain",
|
| 322 |
+
type=float,
|
| 323 |
+
default=0.9,
|
| 324 |
+
help="Linear gain for instrumental (default: 0.9)",
|
| 325 |
+
)
|
| 326 |
+
medley_parser.add_argument(
|
| 327 |
+
"--compressor",
|
| 328 |
+
type=str,
|
| 329 |
+
default="threshold=-18dB:ratio=3:attack=50:release=200",
|
| 330 |
+
help="FFmpeg acompressor parameters (default: threshold=-18dB:ratio=3:attack=50:release=200)",
|
| 331 |
+
)
|
| 332 |
+
medley_parser.add_argument(
|
| 333 |
+
"--audio-codec",
|
| 334 |
+
type=str,
|
| 335 |
+
default="libmp3lame",
|
| 336 |
+
help="Target audio codec (default: libmp3lame)",
|
| 337 |
+
)
|
| 338 |
+
medley_parser.add_argument(
|
| 339 |
+
"--audio-bitrate",
|
| 340 |
+
type=str,
|
| 341 |
+
default="192k",
|
| 342 |
+
help="Audio bitrate (default: 192k)",
|
| 343 |
+
)
|
| 344 |
+
medley_parser.add_argument(
|
| 345 |
+
"--output", type=str, help="Output file path (default: temporary file)"
|
| 346 |
+
)
|
| 347 |
+
|
| 348 |
+
args = parser.parse_args()
|
| 349 |
+
|
| 350 |
+
try:
|
| 351 |
+
if args.command == "combine":
|
| 352 |
+
output = combine_tracks(
|
| 353 |
+
args.track1,
|
| 354 |
+
args.track2,
|
| 355 |
+
weight1=args.weight1,
|
| 356 |
+
weight2=args.weight2,
|
| 357 |
+
normalize=not args.no_normalize,
|
| 358 |
+
fade_in=args.fade_in,
|
| 359 |
+
fade_out=args.fade_out,
|
| 360 |
+
output_path=args.output,
|
| 361 |
+
)
|
| 362 |
+
print(f"Combined audio saved to: {output}")
|
| 363 |
+
elif args.command == "stereo":
|
| 364 |
+
output = create_stereo_mix(
|
| 365 |
+
args.left,
|
| 366 |
+
args.right,
|
| 367 |
+
normalize=not args.no_normalize,
|
| 368 |
+
output_path=args.output,
|
| 369 |
+
)
|
| 370 |
+
print(f"Stereo mix saved to: {output}")
|
| 371 |
+
elif args.command == "medley":
|
| 372 |
+
output = create_medley(
|
| 373 |
+
args.vocals,
|
| 374 |
+
args.instrumental,
|
| 375 |
+
output_path=args.output,
|
| 376 |
+
vocals_gain=args.vocals_gain,
|
| 377 |
+
instrumental_gain=args.instrumental_gain,
|
| 378 |
+
compressor=args.compressor,
|
| 379 |
+
audio_codec=args.audio_codec,
|
| 380 |
+
audio_bitrate=args.audio_bitrate,
|
| 381 |
+
)
|
| 382 |
+
print(f"Medley saved to: {output}")
|
| 383 |
+
else:
|
| 384 |
+
parser.print_help()
|
| 385 |
+
except Exception as e:
|
| 386 |
+
print(f"Error: {e}")
|
| 387 |
+
exit(1)
|
tools/pitch_alignment.py
ADDED
|
@@ -0,0 +1,228 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from typing import Tuple
|
| 3 |
+
|
| 4 |
+
import librosa
|
| 5 |
+
import numpy as np
|
| 6 |
+
import soundfile as sf
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def _load_audio(audio_path: str, mono: bool = False) -> Tuple[np.ndarray, float]:
|
| 10 |
+
"""
|
| 11 |
+
Load an audio file in stereo format.
|
| 12 |
+
|
| 13 |
+
Args:
|
| 14 |
+
audio_path: Path to audio file
|
| 15 |
+
mono: Whether to load as mono or stereo (default: False)
|
| 16 |
+
|
| 17 |
+
Returns:
|
| 18 |
+
Tuple of (audio_data, sample_rate)
|
| 19 |
+
"""
|
| 20 |
+
y, sr = librosa.load(audio_path, sr=None, mono=mono, res_type="soxr_vhq")
|
| 21 |
+
return y, sr
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def estimate_key(audio_path: str) -> str:
|
| 25 |
+
"""
|
| 26 |
+
Estimate the musical key of an audio file using chroma features and harmonic analysis.
|
| 27 |
+
|
| 28 |
+
This function analyzes the harmonic content of an audio file to determine its musical key
|
| 29 |
+
using chroma features and statistical analysis of pitch class distributions.
|
| 30 |
+
|
| 31 |
+
Args:
|
| 32 |
+
audio_path: Path to audio file (supports common formats: WAV, MP3, FLAC)
|
| 33 |
+
|
| 34 |
+
Returns:
|
| 35 |
+
Estimated key as string (e.g., 'C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B')
|
| 36 |
+
|
| 37 |
+
Examples:
|
| 38 |
+
- Returns 'C' for audio in C major/A minor
|
| 39 |
+
- Returns 'F#' for audio in F# major/D# minor
|
| 40 |
+
- Returns 'A' for audio in A major/F# minor
|
| 41 |
+
|
| 42 |
+
Note:
|
| 43 |
+
Uses medium quality processing for faster analysis
|
| 44 |
+
Most accurate for music with clear harmonic content
|
| 45 |
+
May be less accurate for atonal or highly percussive music
|
| 46 |
+
"""
|
| 47 |
+
try:
|
| 48 |
+
y, sr = librosa.load(
|
| 49 |
+
audio_path, res_type="soxr_mq"
|
| 50 |
+
) # Medium quality for faster processing
|
| 51 |
+
|
| 52 |
+
# Extract chroma features
|
| 53 |
+
chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
|
| 54 |
+
|
| 55 |
+
# Get the most prominent pitch class
|
| 56 |
+
chroma_mean = np.mean(chroma, axis=1)
|
| 57 |
+
key_index = np.argmax(chroma_mean)
|
| 58 |
+
|
| 59 |
+
# Map index to key names
|
| 60 |
+
keys = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
|
| 61 |
+
estimated_key = keys[key_index]
|
| 62 |
+
|
| 63 |
+
return estimated_key
|
| 64 |
+
|
| 65 |
+
except Exception as e:
|
| 66 |
+
raise RuntimeError(f"Error estimating key: {str(e)}")
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def key_to_semitones(key: str, target_key: str = "C") -> int:
|
| 70 |
+
"""
|
| 71 |
+
Calculate semitone difference between two keys.
|
| 72 |
+
|
| 73 |
+
Args:
|
| 74 |
+
key: Source key
|
| 75 |
+
target_key: Target key to align to
|
| 76 |
+
|
| 77 |
+
Returns:
|
| 78 |
+
Number of semitones to shift
|
| 79 |
+
"""
|
| 80 |
+
keys = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
|
| 81 |
+
|
| 82 |
+
if key not in keys or target_key not in keys:
|
| 83 |
+
raise ValueError("Invalid key name")
|
| 84 |
+
|
| 85 |
+
key_index = keys.index(key)
|
| 86 |
+
target_index = keys.index(target_key)
|
| 87 |
+
|
| 88 |
+
# Calculate semitone difference (wrapping around 12 semitones)
|
| 89 |
+
semitones = (target_index - key_index) % 12
|
| 90 |
+
if semitones > 6:
|
| 91 |
+
semitones -= 12
|
| 92 |
+
|
| 93 |
+
return semitones
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def align_songs_by_key(
|
| 97 |
+
audio1_path: str,
|
| 98 |
+
audio2_path: str,
|
| 99 |
+
target_key: str = "C",
|
| 100 |
+
output_path: str = "output",
|
| 101 |
+
) -> Tuple[str, str]:
|
| 102 |
+
"""
|
| 103 |
+
Align two songs to the same musical key by pitch shifting.
|
| 104 |
+
|
| 105 |
+
Args:
|
| 106 |
+
audio1_path: Path to first audio file
|
| 107 |
+
audio2_path: Path to second audio file
|
| 108 |
+
target_key: Target key to align both songs to (default: 'C')
|
| 109 |
+
output_path: Directory to save the aligned audio files
|
| 110 |
+
|
| 111 |
+
Returns:
|
| 112 |
+
Tuple of (aligned_audio1_path, aligned_audio2_path) - paths to processed files
|
| 113 |
+
"""
|
| 114 |
+
try:
|
| 115 |
+
# Estimate keys for both tracks (handled internally by shift_to_key)
|
| 116 |
+
# key1 = estimate_key(audio1_path)
|
| 117 |
+
# key2 = estimate_key(audio2_path)
|
| 118 |
+
|
| 119 |
+
# Calculate semitone shifts (handled internally by shift_to_key)
|
| 120 |
+
# semitones1 = key_to_semitones(key1, target_key)
|
| 121 |
+
# semitones2 = key_to_semitones(key2, target_key)
|
| 122 |
+
|
| 123 |
+
# Load audio files
|
| 124 |
+
y1, sr1 = _load_audio(audio1_path)
|
| 125 |
+
y2, sr2 = _load_audio(audio2_path)
|
| 126 |
+
|
| 127 |
+
# res_type = "soxr_vhq" # Very high quality for final output (set in shift_to_key)
|
| 128 |
+
|
| 129 |
+
aligned1_path = shift_to_key(audio1_path, target_key, output_path)
|
| 130 |
+
aligned2_path = shift_to_key(audio2_path, target_key, output_path)
|
| 131 |
+
|
| 132 |
+
return aligned1_path, aligned2_path
|
| 133 |
+
|
| 134 |
+
except Exception as e:
|
| 135 |
+
raise RuntimeError(f"Error aligning audio keys: {str(e)}") from e
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def shift_to_key(audio_path: str, target_key: str, output_path: str = "output") -> str:
|
| 139 |
+
"""
|
| 140 |
+
Shift an audio file to a specific musical key.
|
| 141 |
+
|
| 142 |
+
Args:
|
| 143 |
+
audio_path: Path to audio file
|
| 144 |
+
target_key: Target key to shift to
|
| 145 |
+
output_path: Directory to save the shifted audio file
|
| 146 |
+
|
| 147 |
+
Returns:
|
| 148 |
+
Path to the pitch-shifted audio file
|
| 149 |
+
"""
|
| 150 |
+
try:
|
| 151 |
+
# Estimate current key
|
| 152 |
+
current_key = estimate_key(audio_path)
|
| 153 |
+
|
| 154 |
+
# Calculate semitone shift
|
| 155 |
+
semitones = key_to_semitones(current_key, target_key)
|
| 156 |
+
|
| 157 |
+
# Load and shift audio
|
| 158 |
+
y, sr = _load_audio(audio_path)
|
| 159 |
+
y_shifted = librosa.effects.pitch_shift(
|
| 160 |
+
y, n_steps=semitones, scale=True, sr=sr, res_type="soxr_vhq"
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
# Save to temporary file
|
| 164 |
+
audio_path = os.path.basename(audio_path).replace(".wav", "")
|
| 165 |
+
os.makedirs(output_path, exist_ok=True)
|
| 166 |
+
|
| 167 |
+
if y_shifted.ndim == 2:
|
| 168 |
+
y_shifted = y_shifted.T
|
| 169 |
+
|
| 170 |
+
final_audio_path = os.path.join(
|
| 171 |
+
output_path, f"{audio_path}_shifted_to_{target_key}.wav"
|
| 172 |
+
)
|
| 173 |
+
sf.write(final_audio_path, y_shifted, sr, format="wav", subtype="PCM_16")
|
| 174 |
+
|
| 175 |
+
return final_audio_path
|
| 176 |
+
|
| 177 |
+
except Exception as e:
|
| 178 |
+
raise RuntimeError(f"Error shifting key: {str(e)}")
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
if __name__ == "__main__":
|
| 182 |
+
import argparse
|
| 183 |
+
|
| 184 |
+
parser = argparse.ArgumentParser(
|
| 185 |
+
description="Pitch alignment tools for audio files"
|
| 186 |
+
)
|
| 187 |
+
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
| 188 |
+
|
| 189 |
+
# Estimate key of a single file
|
| 190 |
+
estimate_parser = subparsers.add_parser(
|
| 191 |
+
"estimate", help="Estimate the key of an audio file"
|
| 192 |
+
)
|
| 193 |
+
estimate_parser.add_argument("audio", help="Path to audio file")
|
| 194 |
+
|
| 195 |
+
# Align two songs by key
|
| 196 |
+
align_parser = subparsers.add_parser("align", help="Align two songs to same key")
|
| 197 |
+
align_parser.add_argument("audio1", help="Path to first audio file")
|
| 198 |
+
align_parser.add_argument("audio2", help="Path to second audio file")
|
| 199 |
+
align_parser.add_argument(
|
| 200 |
+
"--target-key", default="C", help="Target key to align to (default: C)"
|
| 201 |
+
)
|
| 202 |
+
|
| 203 |
+
# Shift single file to key
|
| 204 |
+
shift_parser = subparsers.add_parser("shift", help="Shift audio to specific key")
|
| 205 |
+
shift_parser.add_argument("audio", help="Path to audio file")
|
| 206 |
+
shift_parser.add_argument("target_key", help="Target key to shift to")
|
| 207 |
+
|
| 208 |
+
args = parser.parse_args()
|
| 209 |
+
|
| 210 |
+
try:
|
| 211 |
+
if args.command == "estimate":
|
| 212 |
+
key = estimate_key(args.audio)
|
| 213 |
+
print(f"Estimated key: {key}")
|
| 214 |
+
elif args.command == "align":
|
| 215 |
+
aligned1, aligned2 = align_songs_by_key(
|
| 216 |
+
args.audio1, args.audio2, args.target_key
|
| 217 |
+
)
|
| 218 |
+
print(f"Aligned audio 1: {aligned1}")
|
| 219 |
+
print(f"Aligned audio 2: {aligned2}")
|
| 220 |
+
elif args.command == "shift":
|
| 221 |
+
output = shift_to_key(args.audio, args.target_key)
|
| 222 |
+
print(f"Shifted audio saved to: {output}")
|
| 223 |
+
else:
|
| 224 |
+
parser.print_help()
|
| 225 |
+
except Exception as e:
|
| 226 |
+
print(f"Error: {e}")
|
| 227 |
+
raise e
|
| 228 |
+
exit(1)
|
tools/stems_separation.py
ADDED
|
@@ -0,0 +1,358 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import os
|
| 3 |
+
import subprocess
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import Tuple, List, Dict, Optional
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class Error(Exception):
|
| 9 |
+
pass
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def separate_audio(
|
| 13 |
+
audio_path: str, output_path: Optional[str] = None
|
| 14 |
+
) -> Tuple[str, str, str, str]:
|
| 15 |
+
"""
|
| 16 |
+
Separate audio into vocals, drums, bass, and other stems using Demucs.
|
| 17 |
+
|
| 18 |
+
This function uses the Demucs neural network model to separate a mixed audio file
|
| 19 |
+
into individual instrument stems. It's particularly effective for separating
|
| 20 |
+
vocals from instrumental backing tracks.
|
| 21 |
+
|
| 22 |
+
Args:
|
| 23 |
+
audio_path: Path to the input audio file (supports common formats: WAV, MP3, FLAC, M4A)
|
| 24 |
+
output_path: Directory to save the separated stems (default: 'output' directory)
|
| 25 |
+
|
| 26 |
+
Returns:
|
| 27 |
+
tuple[str, str, str, str]: Paths to the separated audio files in order:
|
| 28 |
+
- vocals: Isolated vocal track
|
| 29 |
+
- drums: Isolated drum/percussion track
|
| 30 |
+
- bass: Isolated bass track
|
| 31 |
+
- other: Remaining instruments (guitars, keyboards, etc.)
|
| 32 |
+
|
| 33 |
+
Examples:
|
| 34 |
+
- Extract vocals for karaoke creation
|
| 35 |
+
- Isolate drums for remixing
|
| 36 |
+
- Separate bass for transcription
|
| 37 |
+
- Create instrumental versions by combining drums+bass+other
|
| 38 |
+
|
| 39 |
+
Note:
|
| 40 |
+
Uses the htdemucs model which is optimized for high-quality separation
|
| 41 |
+
Processing time depends on audio length and system performance
|
| 42 |
+
Output files are saved in WAV format for maximum quality
|
| 43 |
+
"""
|
| 44 |
+
try:
|
| 45 |
+
# Prepare the output directory
|
| 46 |
+
if not output_path:
|
| 47 |
+
output_path = "output"
|
| 48 |
+
|
| 49 |
+
output_dir = os.path.join(output_path, "separated")
|
| 50 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 51 |
+
|
| 52 |
+
# Run Demucs separation
|
| 53 |
+
cmd = [
|
| 54 |
+
"python",
|
| 55 |
+
"-m",
|
| 56 |
+
"demucs.separate",
|
| 57 |
+
"--out",
|
| 58 |
+
output_dir,
|
| 59 |
+
"--name",
|
| 60 |
+
"htdemucs",
|
| 61 |
+
audio_path,
|
| 62 |
+
]
|
| 63 |
+
|
| 64 |
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
| 65 |
+
|
| 66 |
+
if result.returncode != 0:
|
| 67 |
+
raise Error(f"Demucs separation failed: {result.stderr}")
|
| 68 |
+
|
| 69 |
+
# Find the separated files
|
| 70 |
+
track_name = Path(audio_path).stem
|
| 71 |
+
htdemucs_dir = os.path.join(output_dir, "htdemucs", track_name)
|
| 72 |
+
|
| 73 |
+
vocals_path = os.path.join(htdemucs_dir, "vocals.wav")
|
| 74 |
+
drums_path = os.path.join(htdemucs_dir, "drums.wav")
|
| 75 |
+
bass_path = os.path.join(htdemucs_dir, "bass.wav")
|
| 76 |
+
other_path = os.path.join(htdemucs_dir, "other.wav")
|
| 77 |
+
|
| 78 |
+
# Verify all files exist
|
| 79 |
+
for file_path in [vocals_path, drums_path, bass_path, other_path]:
|
| 80 |
+
if not os.path.exists(file_path):
|
| 81 |
+
raise Error(f"Separated file not found: {file_path}")
|
| 82 |
+
|
| 83 |
+
return vocals_path, drums_path, bass_path, other_path
|
| 84 |
+
|
| 85 |
+
except Exception as e:
|
| 86 |
+
raise Error(f"Error processing audio: {str(e)}")
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def extract_selected_stems(
|
| 90 |
+
audio_path: str, stems_to_extract: List[str], output_path: Optional[str] = None
|
| 91 |
+
) -> Dict[str, str]:
|
| 92 |
+
"""
|
| 93 |
+
Extract only specific stems from an audio file.
|
| 94 |
+
|
| 95 |
+
This function allows selective extraction of specific stems rather than all four stems,
|
| 96 |
+
which can save processing time and storage space when only certain elements are needed.
|
| 97 |
+
|
| 98 |
+
Args:
|
| 99 |
+
audio_path: Path to the input audio file (supports common formats: WAV, MP3, FLAC, M4A)
|
| 100 |
+
stems_to_extract: List of stems to extract. Valid options: ['vocals', 'drums', 'bass', 'other']
|
| 101 |
+
output_path: Directory to save the selected stems (default: 'output' directory)
|
| 102 |
+
|
| 103 |
+
Returns:
|
| 104 |
+
dict[str, str]: Dictionary mapping stem names to their file paths
|
| 105 |
+
|
| 106 |
+
Examples:
|
| 107 |
+
- extract_selected_stems('song.mp3', ['vocals', 'drums']): Extract only vocals and drums
|
| 108 |
+
- extract_selected_stems('song.mp3', ['vocals']): Extract only vocals for karaoke
|
| 109 |
+
- extract_selected_stems('song.mp3', ['bass', 'drums']): Extract rhythm section
|
| 110 |
+
|
| 111 |
+
Note:
|
| 112 |
+
Valid stem names are: 'vocals', 'drums', 'bass', 'other'
|
| 113 |
+
Invalid stem names will be ignored with a warning
|
| 114 |
+
Uses the same high-quality Demucs model as separate_audio
|
| 115 |
+
"""
|
| 116 |
+
# Validate stem names
|
| 117 |
+
valid_stems = ["vocals", "drums", "bass", "other"]
|
| 118 |
+
invalid_stems = [stem for stem in stems_to_extract if stem not in valid_stems]
|
| 119 |
+
|
| 120 |
+
if invalid_stems:
|
| 121 |
+
print(f"Warning: Invalid stem names will be ignored: {invalid_stems}")
|
| 122 |
+
|
| 123 |
+
# Filter to only valid stems
|
| 124 |
+
valid_stems_to_extract = [stem for stem in stems_to_extract if stem in valid_stems]
|
| 125 |
+
|
| 126 |
+
if not valid_stems_to_extract:
|
| 127 |
+
raise ValueError("No valid stems specified for extraction")
|
| 128 |
+
|
| 129 |
+
# First, separate all stems
|
| 130 |
+
all_stems = separate_audio(audio_path, output_path)
|
| 131 |
+
vocals_path, drums_path, bass_path, other_path = all_stems
|
| 132 |
+
|
| 133 |
+
# Create mapping of all stems
|
| 134 |
+
stem_mapping = {
|
| 135 |
+
"vocals": vocals_path,
|
| 136 |
+
"drums": drums_path,
|
| 137 |
+
"bass": bass_path,
|
| 138 |
+
"other": other_path,
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
# Return only requested stems
|
| 142 |
+
result = {}
|
| 143 |
+
for stem in valid_stems_to_extract:
|
| 144 |
+
result[stem] = stem_mapping[stem]
|
| 145 |
+
|
| 146 |
+
return result
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
def extract_vocal_non_vocal(
|
| 150 |
+
audio_path: str, output_path: Optional[str] = None
|
| 151 |
+
) -> Tuple[str, str]:
|
| 152 |
+
"""
|
| 153 |
+
Extract vocals and non-vocals (instrumental) stems from an audio file.
|
| 154 |
+
|
| 155 |
+
This function provides a simple interface to separate audio into vocal and
|
| 156 |
+
non-vocal components, which is useful for karaoke creation, vocal isolation,
|
| 157 |
+
or instrumental extraction.
|
| 158 |
+
|
| 159 |
+
Args:
|
| 160 |
+
audio_path: Path to the input audio file (supports common formats: WAV, MP3, FLAC, M4A)
|
| 161 |
+
output_path: Directory to save the separated stems (default: 'output' directory)
|
| 162 |
+
|
| 163 |
+
Returns:
|
| 164 |
+
tuple[str, str]: Paths to (vocals_file, non_vocals_file)
|
| 165 |
+
- vocals_file: Path to the isolated vocal track
|
| 166 |
+
- non_vocals_file: Path to the combined instrumental track (drums + bass + other)
|
| 167 |
+
|
| 168 |
+
Examples:
|
| 169 |
+
- extract_vocal_non_vocal('song.mp3'): Separate into vocals and instrumental
|
| 170 |
+
- extract_vocal_non_vocal('song.wav', 'karaoke'): Create karaoke version
|
| 171 |
+
|
| 172 |
+
Note:
|
| 173 |
+
The non-vocals track combines drums, bass, and other stems into a single instrumental
|
| 174 |
+
Uses the same high-quality Demucs model as separate_audio
|
| 175 |
+
Non-vocals track is automatically mixed and normalized
|
| 176 |
+
"""
|
| 177 |
+
# Extract all stems
|
| 178 |
+
all_stems = separate_audio(audio_path, output_path)
|
| 179 |
+
vocals_path, drums_path, bass_path, other_path = all_stems
|
| 180 |
+
|
| 181 |
+
# Create non-vocals by combining drums, bass, and other
|
| 182 |
+
try:
|
| 183 |
+
# Load all non-vocal stems
|
| 184 |
+
import librosa
|
| 185 |
+
import numpy as np
|
| 186 |
+
import soundfile as sf
|
| 187 |
+
|
| 188 |
+
y_drums, sr_drums = librosa.load(drums_path, sr=None, mono=False)
|
| 189 |
+
y_bass, sr_bass = librosa.load(bass_path, sr=None, mono=False)
|
| 190 |
+
y_other, sr_other = librosa.load(other_path, sr=None, mono=False)
|
| 191 |
+
|
| 192 |
+
# Ensure same sample rate
|
| 193 |
+
target_sr = max(sr_drums, sr_bass, sr_other)
|
| 194 |
+
|
| 195 |
+
if sr_drums != target_sr:
|
| 196 |
+
y_drums = librosa.resample(y_drums, orig_sr=sr_drums, target_sr=target_sr)
|
| 197 |
+
if sr_bass != target_sr:
|
| 198 |
+
y_bass = librosa.resample(y_bass, orig_sr=sr_bass, target_sr=target_sr)
|
| 199 |
+
if sr_other != target_sr:
|
| 200 |
+
y_other = librosa.resample(y_other, orig_sr=sr_other, target_sr=target_sr)
|
| 201 |
+
|
| 202 |
+
# Ensure same shape
|
| 203 |
+
max_length = max(y_drums.shape[-1], y_bass.shape[-1], y_other.shape[-1])
|
| 204 |
+
|
| 205 |
+
def pad_to_length(y, target_length):
|
| 206 |
+
if y.shape[-1] < target_length:
|
| 207 |
+
if y.ndim == 1:
|
| 208 |
+
return np.pad(y, (0, target_length - y.shape[-1]), mode="constant")
|
| 209 |
+
else:
|
| 210 |
+
return np.pad(
|
| 211 |
+
y, ((0, 0), (0, target_length - y.shape[-1])), mode="constant"
|
| 212 |
+
)
|
| 213 |
+
return y
|
| 214 |
+
|
| 215 |
+
y_drums = pad_to_length(y_drums, max_length)
|
| 216 |
+
y_bass = pad_to_length(y_bass, max_length)
|
| 217 |
+
y_other = pad_to_length(y_other, max_length)
|
| 218 |
+
|
| 219 |
+
# Combine non-vocal stems
|
| 220 |
+
non_vocals = y_drums + y_bass + y_other
|
| 221 |
+
|
| 222 |
+
# Normalize to prevent clipping
|
| 223 |
+
max_val = np.max(np.abs(non_vocals))
|
| 224 |
+
if max_val > 0:
|
| 225 |
+
non_vocals = non_vocals / max_val * 0.95
|
| 226 |
+
|
| 227 |
+
# Save non-vocals file
|
| 228 |
+
if output_path:
|
| 229 |
+
os.makedirs(output_path, exist_ok=True)
|
| 230 |
+
non_vocals_filename = os.path.join(output_path, "non_vocals.wav")
|
| 231 |
+
else:
|
| 232 |
+
non_vocals_filename = os.path.join(
|
| 233 |
+
os.path.dirname(drums_path), "non_vocals.wav"
|
| 234 |
+
)
|
| 235 |
+
|
| 236 |
+
if non_vocals.ndim == 2:
|
| 237 |
+
non_vocals = non_vocals.T
|
| 238 |
+
|
| 239 |
+
sf.write(
|
| 240 |
+
non_vocals_filename, non_vocals, target_sr, format="wav", subtype="PCM_16"
|
| 241 |
+
)
|
| 242 |
+
|
| 243 |
+
return vocals_path, non_vocals_filename
|
| 244 |
+
|
| 245 |
+
except Exception as e:
|
| 246 |
+
raise RuntimeError(f"Error creating non-vocals track: {str(e)}")
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
def create_karaoke_track(audio_path: str, output_path: Optional[str] = None) -> str:
|
| 250 |
+
"""
|
| 251 |
+
Create a karaoke (instrumental) track by removing vocals from an audio file.
|
| 252 |
+
|
| 253 |
+
This is a convenience function that extracts the instrumental (non-vocal) portion
|
| 254 |
+
of a song, creating a karaoke-ready backing track.
|
| 255 |
+
|
| 256 |
+
Args:
|
| 257 |
+
audio_path: Path to the input audio file (supports common formats: WAV, MP3, FLAC, M4A)
|
| 258 |
+
output_path: Directory to save the karaoke track (default: 'output' directory)
|
| 259 |
+
|
| 260 |
+
Returns:
|
| 261 |
+
Path to the karaoke (instrumental) audio file
|
| 262 |
+
|
| 263 |
+
Examples:
|
| 264 |
+
- create_karaoke_track('song.mp3'): Create karaoke version
|
| 265 |
+
- create_karaoke_track('song.wav', 'karaoke_tracks'): Save to specific folder
|
| 266 |
+
|
| 267 |
+
Note:
|
| 268 |
+
Uses the same high-quality Demucs model as separate_audio
|
| 269 |
+
Combines drums, bass, and other stems into instrumental track
|
| 270 |
+
Automatically normalized for consistent volume
|
| 271 |
+
"""
|
| 272 |
+
vocals_path, instrumental_path = extract_vocal_non_vocal(audio_path, output_path)
|
| 273 |
+
return instrumental_path
|
| 274 |
+
|
| 275 |
+
|
| 276 |
+
if __name__ == "__main__":
|
| 277 |
+
parser = argparse.ArgumentParser(
|
| 278 |
+
description="Separate audio into stems using Demucs"
|
| 279 |
+
)
|
| 280 |
+
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
| 281 |
+
|
| 282 |
+
# Original separate command
|
| 283 |
+
separate_parser = subparsers.add_parser(
|
| 284 |
+
"separate", help="Separate into all four stems"
|
| 285 |
+
)
|
| 286 |
+
separate_parser.add_argument("audio_path", help="Path to the input audio file")
|
| 287 |
+
separate_parser.add_argument(
|
| 288 |
+
"--output-dir", help="Directory to save separated stems (default: output)"
|
| 289 |
+
)
|
| 290 |
+
|
| 291 |
+
# New selective stems command
|
| 292 |
+
select_parser = subparsers.add_parser("select", help="Extract specific stems only")
|
| 293 |
+
select_parser.add_argument("audio_path", help="Path to the input audio file")
|
| 294 |
+
select_parser.add_argument(
|
| 295 |
+
"stems",
|
| 296 |
+
nargs="+",
|
| 297 |
+
choices=["vocals", "drums", "bass", "other"],
|
| 298 |
+
help="Stems to extract (choose from: vocals, drums, bass, other)",
|
| 299 |
+
)
|
| 300 |
+
select_parser.add_argument(
|
| 301 |
+
"--output-dir", help="Directory to save separated stems (default: output)"
|
| 302 |
+
)
|
| 303 |
+
|
| 304 |
+
# New vocal/non-vocal command
|
| 305 |
+
vocal_parser = subparsers.add_parser(
|
| 306 |
+
"vocal-nonvocal", help="Extract vocals and instrumental only"
|
| 307 |
+
)
|
| 308 |
+
vocal_parser.add_argument("audio_path", help="Path to the input audio file")
|
| 309 |
+
vocal_parser.add_argument(
|
| 310 |
+
"--output-dir", help="Directory to save separated stems (default: output)"
|
| 311 |
+
)
|
| 312 |
+
|
| 313 |
+
# New karaoke command
|
| 314 |
+
karaoke_parser = subparsers.add_parser(
|
| 315 |
+
"karaoke", help="Create karaoke (instrumental) track"
|
| 316 |
+
)
|
| 317 |
+
karaoke_parser.add_argument("audio_path", help="Path to the input audio file")
|
| 318 |
+
karaoke_parser.add_argument(
|
| 319 |
+
"--output-dir", help="Directory to save karaoke track (default: output)"
|
| 320 |
+
)
|
| 321 |
+
|
| 322 |
+
args = parser.parse_args()
|
| 323 |
+
|
| 324 |
+
if not args.command:
|
| 325 |
+
parser.print_help()
|
| 326 |
+
exit(1)
|
| 327 |
+
|
| 328 |
+
try:
|
| 329 |
+
if args.command == "separate":
|
| 330 |
+
vocals, drums, bass, other = separate_audio(
|
| 331 |
+
args.audio_path, args.output_dir
|
| 332 |
+
)
|
| 333 |
+
print(f"Vocals: {vocals}")
|
| 334 |
+
print(f"Drums: {drums}")
|
| 335 |
+
print(f"Bass: {bass}")
|
| 336 |
+
print(f"Other: {other}")
|
| 337 |
+
|
| 338 |
+
elif args.command == "select":
|
| 339 |
+
selected_stems = extract_selected_stems(
|
| 340 |
+
args.audio_path, args.stems, args.output_dir
|
| 341 |
+
)
|
| 342 |
+
for stem, path in selected_stems.items():
|
| 343 |
+
print(f"{stem.capitalize()}: {path}")
|
| 344 |
+
|
| 345 |
+
elif args.command == "vocal-nonvocal":
|
| 346 |
+
vocals_path, non_vocals_path = extract_vocal_non_vocal(
|
| 347 |
+
args.audio_path, args.output_dir
|
| 348 |
+
)
|
| 349 |
+
print(f"Vocals: {vocals_path}")
|
| 350 |
+
print(f"Non-vocals (Instrumental): {non_vocals_path}")
|
| 351 |
+
|
| 352 |
+
elif args.command == "karaoke":
|
| 353 |
+
karaoke_path = create_karaoke_track(args.audio_path, args.output_dir)
|
| 354 |
+
print(f"Karaoke track: {karaoke_path}")
|
| 355 |
+
|
| 356 |
+
except Exception as e:
|
| 357 |
+
print(f"Error: {e}")
|
| 358 |
+
exit(1)
|
tools/time_strech.py
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from typing import Optional, Tuple
|
| 3 |
+
|
| 4 |
+
import librosa
|
| 5 |
+
import soundfile as sf
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def align_songs_by_bpm(
|
| 9 |
+
audio1_path: str, audio2_path: str, output_path: Optional[str] = None
|
| 10 |
+
) -> Tuple[str, str]:
|
| 11 |
+
"""
|
| 12 |
+
Align two songs to the same BPM by stretching the faster one to match the slower one.
|
| 13 |
+
|
| 14 |
+
This function analyzes the tempo of two audio files and automatically stretches the faster
|
| 15 |
+
track to match the BPM of the slower track, making them suitable for mixing or mashups.
|
| 16 |
+
|
| 17 |
+
Args:
|
| 18 |
+
audio1_path: Path to first audio file (supports common formats: WAV, MP3, FLAC)
|
| 19 |
+
audio2_path: Path to second audio file (supports common formats: WAV, MP3, FLAC)
|
| 20 |
+
output_path: Optional output directory (default: None, uses temporary directory)
|
| 21 |
+
|
| 22 |
+
Returns:
|
| 23 |
+
Tuple of (aligned_audio1_path, aligned_audio2_path): Paths to the processed audio files
|
| 24 |
+
Both files will have the same BPM (the slower of the two original tempos)
|
| 25 |
+
|
| 26 |
+
Examples:
|
| 27 |
+
- Song A: 140 BPM, Song B: 128 BPM → Both become 128 BPM
|
| 28 |
+
- Song A: 120 BPM, Song B: 130 BPM → Both become 120 BPM
|
| 29 |
+
|
| 30 |
+
Note:
|
| 31 |
+
Uses high-quality time-stretching to maintain audio quality
|
| 32 |
+
Preserves the original pitch of both tracks
|
| 33 |
+
Processing time depends on audio length and tempo difference
|
| 34 |
+
"""
|
| 35 |
+
try:
|
| 36 |
+
# Load both audio files
|
| 37 |
+
y1, sr1 = librosa.load(audio1_path)
|
| 38 |
+
y2, sr2 = librosa.load(audio2_path)
|
| 39 |
+
|
| 40 |
+
# Get BPM for both tracks
|
| 41 |
+
tempo1, _ = librosa.beat.beat_track(y=y1, sr=sr1)
|
| 42 |
+
tempo2, _ = librosa.beat.beat_track(y=y2, sr=sr2)
|
| 43 |
+
|
| 44 |
+
bpm1 = float(tempo1)
|
| 45 |
+
bpm2 = float(tempo2)
|
| 46 |
+
|
| 47 |
+
# Determine which track is faster and needs stretching
|
| 48 |
+
if bpm1 > bpm2:
|
| 49 |
+
# Stretch first track to match second track's BPM
|
| 50 |
+
aligned1_path = stretch_to_bpm(audio1_path, bpm2, output_path)
|
| 51 |
+
aligned2_path = stretch_to_bpm(audio2_path, bpm2, output_path)
|
| 52 |
+
else:
|
| 53 |
+
# Stretch second track to match first track's BPM
|
| 54 |
+
aligned1_path = stretch_to_bpm(audio1_path, bpm1, output_path)
|
| 55 |
+
aligned2_path = stretch_to_bpm(audio2_path, bpm1, output_path)
|
| 56 |
+
|
| 57 |
+
return aligned1_path, aligned2_path
|
| 58 |
+
|
| 59 |
+
except Exception as e:
|
| 60 |
+
raise RuntimeError(f"Error aligning audio files: {str(e)}")
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def stretch_to_bpm(
|
| 64 |
+
audio_path: str, target_bpm: float, output_path: Optional[str] = None
|
| 65 |
+
) -> str:
|
| 66 |
+
"""
|
| 67 |
+
Stretch an audio file to a specific BPM.
|
| 68 |
+
|
| 69 |
+
Args:
|
| 70 |
+
audio_path: Path to audio file
|
| 71 |
+
target_bpm: Target BPM to stretch to
|
| 72 |
+
output_path: Path to output file
|
| 73 |
+
|
| 74 |
+
Returns:
|
| 75 |
+
Path to the stretched audio file
|
| 76 |
+
"""
|
| 77 |
+
try:
|
| 78 |
+
y, sr = librosa.load(audio_path, sr=None, mono=False)
|
| 79 |
+
|
| 80 |
+
# Get current BPM
|
| 81 |
+
y_hat, sr_hat = librosa.load(audio_path)
|
| 82 |
+
tempo, _ = librosa.beat.beat_track(y=y_hat, sr=sr_hat)
|
| 83 |
+
current_bpm = float(tempo)
|
| 84 |
+
|
| 85 |
+
# Calculate stretch factor
|
| 86 |
+
stretch_factor = target_bpm / current_bpm
|
| 87 |
+
|
| 88 |
+
# Apply time stretching
|
| 89 |
+
y_stretched = librosa.effects.time_stretch(y, rate=stretch_factor)
|
| 90 |
+
|
| 91 |
+
# Save to temporary file
|
| 92 |
+
if not output_path:
|
| 93 |
+
output_path = "output"
|
| 94 |
+
os.makedirs(output_path, exist_ok=True)
|
| 95 |
+
|
| 96 |
+
original_audio_filename = os.path.basename(audio_path).replace(".wav", "")
|
| 97 |
+
output_file_path = os.path.join(
|
| 98 |
+
output_path,
|
| 99 |
+
f"{original_audio_filename}_stretched_to_{int(target_bpm)}_bpm.wav",
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
if y_stretched.ndim == 2:
|
| 103 |
+
y_stretched = y_stretched.T # Transpose for multi-channel audio
|
| 104 |
+
|
| 105 |
+
sf.write(output_file_path, y_stretched, sr)
|
| 106 |
+
|
| 107 |
+
return output_file_path
|
| 108 |
+
|
| 109 |
+
except Exception as e:
|
| 110 |
+
raise RuntimeError(f"Error stretching audio: {str(e)}")
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
if __name__ == "__main__":
|
| 114 |
+
import argparse
|
| 115 |
+
|
| 116 |
+
parser = argparse.ArgumentParser(description="Time stretch audio files")
|
| 117 |
+
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
| 118 |
+
|
| 119 |
+
# Align two songs by BPM
|
| 120 |
+
align_parser = subparsers.add_parser("align", help="Align two songs to same BPM")
|
| 121 |
+
align_parser.add_argument("audio1", help="Path to first audio file")
|
| 122 |
+
align_parser.add_argument("audio2", help="Path to second audio file")
|
| 123 |
+
|
| 124 |
+
# Stretch to specific BPM
|
| 125 |
+
stretch_parser = subparsers.add_parser(
|
| 126 |
+
"stretch", help="Stretch audio to specific BPM"
|
| 127 |
+
)
|
| 128 |
+
stretch_parser.add_argument("audio", help="Path to audio file")
|
| 129 |
+
stretch_parser.add_argument("target_bpm", type=float, help="Target BPM")
|
| 130 |
+
|
| 131 |
+
args = parser.parse_args()
|
| 132 |
+
|
| 133 |
+
try:
|
| 134 |
+
if args.command == "align":
|
| 135 |
+
aligned1, aligned2 = align_songs_by_bpm(args.audio1, args.audio2)
|
| 136 |
+
print(f"Aligned audio 1: {aligned1}")
|
| 137 |
+
print(f"Aligned audio 2: {aligned2}")
|
| 138 |
+
elif args.command == "stretch":
|
| 139 |
+
output = stretch_to_bpm(args.audio, args.target_bpm)
|
| 140 |
+
print(f"Stretched audio saved to: {output}")
|
| 141 |
+
else:
|
| 142 |
+
parser.print_help()
|
| 143 |
+
except Exception as e:
|
| 144 |
+
print(f"Error: {e}")
|
| 145 |
+
exit(1)
|
tools/youtube_extract.py
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
import yt_dlp
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def extract_audio_from_youtube(
|
| 7 |
+
youtube_url: str,
|
| 8 |
+
audio_format: str = "wav",
|
| 9 |
+
quality: str = "best",
|
| 10 |
+
output_path: str = "output",
|
| 11 |
+
) -> str:
|
| 12 |
+
"""
|
| 13 |
+
Extract high-quality audio from a YouTube video URL using yt-dlp.
|
| 14 |
+
|
| 15 |
+
This function downloads the audio stream from YouTube videos and converts it to
|
| 16 |
+
the specified format while maintaining the best available quality.
|
| 17 |
+
|
| 18 |
+
Args:
|
| 19 |
+
youtube_url: YouTube video URL (full URL format: https://www.youtube.com/watch?v=...)
|
| 20 |
+
audio_format: Output audio format (default: 'wav')
|
| 21 |
+
Supported: 'wav' (uncompressed), 'mp3' (compressed), 'flac' (lossless)
|
| 22 |
+
quality: Audio quality selection (default: 'best')
|
| 23 |
+
Options: 'best' (highest available), 'worst' (lowest available)
|
| 24 |
+
output_path: Directory to save the extracted audio (default: 'output')
|
| 25 |
+
|
| 26 |
+
Returns:
|
| 27 |
+
Path to the extracted audio file in the specified format
|
| 28 |
+
|
| 29 |
+
Examples:
|
| 30 |
+
- Extract WAV audio: extract_audio_from_youtube('https://youtube.com/watch?v=...', 'wav')
|
| 31 |
+
- Extract MP3 audio: extract_audio_from_youtube('https://youtube.com/watch?v=...', 'mp3')
|
| 32 |
+
- High quality WAV: extract_audio_from_youtube(url, 'wav', 'best')
|
| 33 |
+
|
| 34 |
+
Note:
|
| 35 |
+
Requires internet connection for downloading
|
| 36 |
+
Respects YouTube's terms of service
|
| 37 |
+
Processing time depends on video length and connection speed
|
| 38 |
+
Output files are saved with descriptive names including video title
|
| 39 |
+
"""
|
| 40 |
+
try:
|
| 41 |
+
# Create temporary directory for downloads if no output path is provided
|
| 42 |
+
output_path = output_path or "output"
|
| 43 |
+
os.makedirs(output_path, exist_ok=True)
|
| 44 |
+
|
| 45 |
+
# Configure yt-dlp options
|
| 46 |
+
ydl_opts = {
|
| 47 |
+
"format": "bestaudio/best",
|
| 48 |
+
"outtmpl": os.path.join(output_path, "%(title)s.%(ext)s"),
|
| 49 |
+
"postprocessors": [
|
| 50 |
+
{
|
| 51 |
+
"key": "FFmpegExtractAudio",
|
| 52 |
+
"preferredcodec": audio_format,
|
| 53 |
+
"preferredquality": "192" if quality == "best" else "128",
|
| 54 |
+
}
|
| 55 |
+
],
|
| 56 |
+
"quiet": True,
|
| 57 |
+
"no_warnings": True,
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
# Download and extract audio
|
| 61 |
+
with yt_dlp.YoutubeDL(params=ydl_opts) as ydl:
|
| 62 |
+
info = ydl.extract_info(youtube_url, download=False)
|
| 63 |
+
video_title = info.get("title", "audio")
|
| 64 |
+
|
| 65 |
+
ydl.download([youtube_url])
|
| 66 |
+
|
| 67 |
+
# Find the downloaded file
|
| 68 |
+
expected_filename = f"{video_title}.{audio_format}"
|
| 69 |
+
audio_path = os.path.join(output_path, expected_filename)
|
| 70 |
+
|
| 71 |
+
# Handle special characters in filename
|
| 72 |
+
if not os.path.exists(audio_path):
|
| 73 |
+
# Try to find any audio file in the directory
|
| 74 |
+
audio_files = [
|
| 75 |
+
f for f in os.listdir(output_path) if f.endswith(f".{audio_format}")
|
| 76 |
+
]
|
| 77 |
+
if audio_files:
|
| 78 |
+
audio_path = os.path.join(output_path, audio_files[0])
|
| 79 |
+
else:
|
| 80 |
+
raise RuntimeError("Audio file not found after download")
|
| 81 |
+
|
| 82 |
+
return audio_path
|
| 83 |
+
|
| 84 |
+
except Exception as e:
|
| 85 |
+
raise RuntimeError(f"Error extracting audio from YouTube: {str(e)}")
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def get_video_info(youtube_url: str) -> dict:
|
| 89 |
+
"""
|
| 90 |
+
Get information about a YouTube video without downloading.
|
| 91 |
+
|
| 92 |
+
Args:
|
| 93 |
+
youtube_url: YouTube video URL
|
| 94 |
+
|
| 95 |
+
Returns:
|
| 96 |
+
Dictionary with video information (title, duration, uploader, etc.)
|
| 97 |
+
"""
|
| 98 |
+
try:
|
| 99 |
+
ydl_opts = {
|
| 100 |
+
"quiet": True,
|
| 101 |
+
"no_warnings": True,
|
| 102 |
+
"skip_download": True,
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
with yt_dlp.YoutubeDL(params=ydl_opts) as ydl:
|
| 106 |
+
info = ydl.extract_info(youtube_url, download=False)
|
| 107 |
+
|
| 108 |
+
return {
|
| 109 |
+
"title": info.get("title"),
|
| 110 |
+
"duration": info.get("duration"),
|
| 111 |
+
"uploader": info.get("uploader"),
|
| 112 |
+
"upload_date": info.get("upload_date"),
|
| 113 |
+
"view_count": info.get("view_count"),
|
| 114 |
+
"description": info.get("description"),
|
| 115 |
+
"thumbnail": info.get("thumbnail"),
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
except Exception as e:
|
| 119 |
+
raise RuntimeError(f"Error getting video info: {str(e)}")
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
if __name__ == "__main__":
|
| 123 |
+
import argparse
|
| 124 |
+
|
| 125 |
+
parser = argparse.ArgumentParser(description="Extract audio from YouTube videos")
|
| 126 |
+
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
| 127 |
+
|
| 128 |
+
# Extract audio
|
| 129 |
+
extract_parser = subparsers.add_parser(
|
| 130 |
+
"extract", help="Extract audio from YouTube URL"
|
| 131 |
+
)
|
| 132 |
+
extract_parser.add_argument("url", help="YouTube video URL")
|
| 133 |
+
extract_parser.add_argument(
|
| 134 |
+
"--format",
|
| 135 |
+
default="wav",
|
| 136 |
+
choices=["wav", "mp3", "flac", "m4a"],
|
| 137 |
+
help="Output audio format (default: wav)",
|
| 138 |
+
)
|
| 139 |
+
extract_parser.add_argument(
|
| 140 |
+
"--quality",
|
| 141 |
+
default="best",
|
| 142 |
+
choices=["best", "worst"],
|
| 143 |
+
help="Audio quality (default: best)",
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
+
# Get video info
|
| 147 |
+
info_parser = subparsers.add_parser("info", help="Get video information")
|
| 148 |
+
info_parser.add_argument("url", help="YouTube video URL")
|
| 149 |
+
|
| 150 |
+
args = parser.parse_args()
|
| 151 |
+
|
| 152 |
+
try:
|
| 153 |
+
if args.command == "extract":
|
| 154 |
+
audio_path = extract_audio_from_youtube(args.url, args.format, args.quality)
|
| 155 |
+
print(f"Audio extracted to: {audio_path}")
|
| 156 |
+
elif args.command == "info":
|
| 157 |
+
info = get_video_info(args.url)
|
| 158 |
+
print(f"Title: {info['title']}")
|
| 159 |
+
print(f"Duration: {info['duration']} seconds")
|
| 160 |
+
print(f"Uploader: {info['uploader']}")
|
| 161 |
+
print(f"Upload date: {info['upload_date']}")
|
| 162 |
+
print(f"Views: {info['view_count']}")
|
| 163 |
+
else:
|
| 164 |
+
parser.print_help()
|
| 165 |
+
except Exception as e:
|
| 166 |
+
print(f"Error: {e}")
|
| 167 |
+
exit(1)
|