Spaces:
Sleeping
Sleeping
Refactor: modularize app, add AudioGallery, MCP, tests
Browse files- Move core logic to modules/; update imports in app.py
- Implement AudioGallery (gr.HTML) with waveform and controls
- Add progress callbacks to yt_audio_get_tracks functions
- Enhance Gradio UI: progress box, error/status, footer
- Enable MCP server; configure allowed_paths, settings.json
- Add Playwright UI/file endpoint test (test_gallery.py)
- Update .gitignore, CLAUDE.md, build.md, copilot-instructions.md
- Add favicon.ico for branding
- No new dependencies; follows modularity guidelines
- .claude/settings.json +9 -0
- .claude/settings.local.json +13 -0
- .github/copilot-instructions.md +1 -41
- .gitignore +2 -0
- CLAUDE.md +115 -32
- app.py +109 -187
- modules/AudioGallery.py +169 -0
- yt_audio_get_tracks.py → modules/yt_audio_get_tracks.py +13 -3
- separated/favicon.ico +0 -0
- specs/build.md +54 -94
- specs/test_gallery.py +105 -0
.claude/settings.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"mcpServers": {
|
| 3 |
+
"jcodemunch": {
|
| 4 |
+
"command": "C:\\Users\\cfettinger\\AppData\\Local\\Programs\\Python\\Python311\\Scripts\\jcodemunch-mcp.exe",
|
| 5 |
+
"args": [],
|
| 6 |
+
"type": "stdio"
|
| 7 |
+
}
|
| 8 |
+
}
|
| 9 |
+
}
|
.claude/settings.local.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"permissions": {
|
| 3 |
+
"allow": [
|
| 4 |
+
"WebFetch(domain:github.com)",
|
| 5 |
+
"Bash(pip install jcodemunch-mcp 2>&1)",
|
| 6 |
+
"Bash(cd /d/Projects/SeparateTracks && jcodemunch-mcp init --yes --index 2>&1)",
|
| 7 |
+
"Bash(jcodemunch-mcp --help 2>&1)",
|
| 8 |
+
"Bash(where jcodemunch-mcp 2>&1)",
|
| 9 |
+
"Bash(find /d/Projects/SeparateTracks/modules -type f | sort && ls /d/Projects/SeparateTracks/*.py 2>&1)",
|
| 10 |
+
"Bash(cd /d/Projects/SeparateTracks && python specs/test_gallery.py 2>&1)"
|
| 11 |
+
]
|
| 12 |
+
}
|
| 13 |
+
}
|
.github/copilot-instructions.md
CHANGED
|
@@ -17,45 +17,5 @@
|
|
| 17 |
|
| 18 |
## Project-Specific Rules
|
| 19 |
- gradio reference: https://www.gradio.app/docs/gradio/interface or use MCP server gradio
|
| 20 |
-
- main code is based upon yt_audio_get_tracks.py
|
| 21 |
-
- Footer should include modules/version_info.py
|
| 22 |
-
- huggingface dockerfile should be used as a base for the project containerization.
|
| 23 |
- This project is to also be an MCP server, so the code should be structured in a way that allows for easy integration with MCP. (https://huggingface.co/docs/hub/en/agents-mcp)
|
| 24 |
-
- Download: https://github.com/denoland/deno/releases/latest/download/deno-x86_64-pc-windows-msvc.zip Extract deno.exe to script folder or PATH. per dockerfile
|
| 25 |
-
- use the provided `AudioGallery` class as a reference for implementing the audio gallery component in the project.
|
| 26 |
-
sample: https://huggingface.co/spaces/fffiloni/audio-gallery
|
| 27 |
-
```
|
| 28 |
-
|
| 29 |
-
class AudioGallery(gr.HTML):
|
| 30 |
-
def __init__(self, audio_urls, *, value=None, labels=None,
|
| 31 |
-
columns=3, label=None, **kwargs):
|
| 32 |
-
html_template = """
|
| 33 |
-
<div class="audio-gallery-container">
|
| 34 |
-
${label ? `<label>${label}</label>` : ''}
|
| 35 |
-
<div class="audio-gallery-grid"
|
| 36 |
-
style="grid-template-columns: repeat(${columns}, 1fr);">
|
| 37 |
-
${audio_urls.map((url, i) => `
|
| 38 |
-
<div class="audio-item" data-index="${i}">
|
| 39 |
-
<div class="audio-label">
|
| 40 |
-
${labels && labels[i] ? labels[i] : 'Audio ' + (i+1)}
|
| 41 |
-
</div>
|
| 42 |
-
<canvas class="waveform-canvas" width="300" height="80"></canvas>
|
| 43 |
-
<audio src="${url}" preload="metadata"></audio>
|
| 44 |
-
<div class="audio-controls">
|
| 45 |
-
<button class="play-btn">▶</button>
|
| 46 |
-
<div class="time-display">0:00</div>
|
| 47 |
-
</div>
|
| 48 |
-
</div>
|
| 49 |
-
`).join('')}
|
| 50 |
-
</div>
|
| 51 |
-
</div>
|
| 52 |
-
"""
|
| 53 |
-
super().__init__(
|
| 54 |
-
value=value, audio_urls=audio_urls,
|
| 55 |
-
labels=labels, columns=columns, label=label,
|
| 56 |
-
html_template=html_template,
|
| 57 |
-
css_template=CSS_TEMPLATE,
|
| 58 |
-
js_on_load=JS_ON_LOAD, **kwargs
|
| 59 |
-
)
|
| 60 |
-
```
|
| 61 |
-
|
|
|
|
| 17 |
|
| 18 |
## Project-Specific Rules
|
| 19 |
- gradio reference: https://www.gradio.app/docs/gradio/interface or use MCP server gradio
|
|
|
|
|
|
|
|
|
|
| 20 |
- This project is to also be an MCP server, so the code should be structured in a way that allows for easy integration with MCP. (https://huggingface.co/docs/hub/en/agents-mcp)
|
| 21 |
+
- Download: https://github.com/denoland/deno/releases/latest/download/deno-x86_64-pc-windows-msvc.zip Extract deno.exe to script folder or PATH. per dockerfile
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.gitignore
CHANGED
|
@@ -9,9 +9,11 @@ node_modules/
|
|
| 9 |
.pip/
|
| 10 |
venv/
|
| 11 |
__pycache/
|
|
|
|
| 12 |
**.bat, **.ps1
|
| 13 |
.bak
|
| 14 |
/__pycache__
|
| 15 |
separated/htdemucs/
|
| 16 |
separated/htdemucs_6s/
|
| 17 |
*.webm
|
|
|
|
|
|
| 9 |
.pip/
|
| 10 |
venv/
|
| 11 |
__pycache/
|
| 12 |
+
__pycache__/
|
| 13 |
**.bat, **.ps1
|
| 14 |
.bak
|
| 15 |
/__pycache__
|
| 16 |
separated/htdemucs/
|
| 17 |
separated/htdemucs_6s/
|
| 18 |
*.webm
|
| 19 |
+
*.pyi
|
CLAUDE.md
CHANGED
|
@@ -1,5 +1,15 @@
|
|
| 1 |
# CLAUDE.md — SeparateTracks Project Context
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
## Project Overview
|
| 4 |
**SeparateTracks** (`Surn/SeparateTracks`) — A HuggingFace Docker Space that:
|
| 5 |
- Downloads audio from YouTube via `yt-dlp` + Deno
|
|
@@ -7,38 +17,116 @@
|
|
| 7 |
- Presents results in a Gradio UI with a custom `AudioGallery` HTML component
|
| 8 |
- Exposes an MCP server at `/gradio_api/mcp/sse`
|
| 9 |
|
|
|
|
|
|
|
| 10 |
## Key Files
|
| 11 |
|
| 12 |
| File | Purpose |
|
| 13 |
|------|---------|
|
| 14 |
-
| `app.py` |
|
| 15 |
-
| `
|
|
|
|
|
|
|
| 16 |
| `modules/constants.py` | Env vars (`HF_TOKEN`, `HF_REPO_ID`, etc.), shared constants |
|
| 17 |
| `modules/version_info.py` | `versions_html()` for Gradio footer |
|
| 18 |
| `modules/file_utils.py` | File utility helpers |
|
| 19 |
-
| `requirements.txt` | Pip dependencies
|
| 20 |
-
| `dockerfile` | Docker image
|
| 21 |
| `specs/build.md` | Step-by-step build plan |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
## Architecture
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
```
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
└── modules/version_info.versions_html() → footer HTML
|
| 30 |
```
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
## Copilot / Agent Rules (from `.github/copilot-instructions.md`)
|
| 33 |
- **Minimal changes** — preserve existing functionality
|
| 34 |
- **No new dependencies** without approval
|
| 35 |
- **Use existing `modules/` functions** before writing new code; prefer overloads
|
| 36 |
- **Gradio reference**: https://www.gradio.app/docs/gradio/interface
|
| 37 |
-
- **AudioGallery** —
|
| 38 |
- **Footer** must use `modules/version_info.versions_html()`
|
| 39 |
- **Dockerfile** is HuggingFace-compatible (base: `python:3.12-slim`)
|
| 40 |
-
- **MCP** —
|
| 41 |
-
- **Deno** — install
|
| 42 |
- **Testing** — Playwright MCP headless (Chrome/WebKit/Edge/Firefox), MSTest, UV
|
| 43 |
|
| 44 |
## Python Style (from `.github/instructions/py.instructions.md`)
|
|
@@ -48,35 +136,25 @@ app.py (Gradio Blocks + mcp_server=True)
|
|
| 48 |
- **In f-strings with `<script>` tags: use `{{ }}` for JS template literals**
|
| 49 |
- Tools: `black`, `ruff`, `isort`, `mypy` (optional)
|
| 50 |
|
| 51 |
-
|
|
|
|
|
|
|
| 52 |
| Variable | Purpose |
|
| 53 |
|----------|---------|
|
| 54 |
-
| `HF_TOKEN` | HuggingFace API token |
|
| 55 |
| `CRYPTO_PK` | Crypto private key |
|
| 56 |
| `HF_REPO_ID` | HF storage repo (`Surn/Storage`) |
|
| 57 |
| `SPACE_NAME` | HF Space ID (`Surn/SeparateTracks`) |
|
| 58 |
| `TMPDIR` | Temp directory for processing |
|
| 59 |
| `IS_LOCAL` | `true` when running locally |
|
| 60 |
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
## Stems Produced by Demucs `htdemucs_6s`
|
| 64 |
-
- `drums.mp3`, `vocals.mp3`, `guitar.mp3`, `bass.mp3`, `piano.mp3`, `other.mp3`
|
| 65 |
-
- `music.mp3` — synthesized as `bass + other` overlay (per existing code)
|
| 66 |
-
- Output path: `separated/htdemucs_6s/{video_id}/`
|
| 67 |
-
|
| 68 |
-
## What's Missing / TODO
|
| 69 |
-
See `specs/build.md` for the complete checklist. Summary:
|
| 70 |
-
1. Add `.env` to `.gitignore`
|
| 71 |
-
2. Complete `requirements.txt` (add `gradio[mcp]`, `python-dotenv`, `numpy`, `Pillow`, `requests`)
|
| 72 |
-
3. Fix `dockerfile` (add `ffmpeg` apt, install requirements.txt)
|
| 73 |
-
4. **Create `app.py`** — Gradio Blocks with AudioGallery and MCP server
|
| 74 |
-
5. Verify `modules/constants.py` doesn't crash locally (HF_TOKEN in .env handles this)
|
| 75 |
|
| 76 |
## Local Dev Commands
|
| 77 |
```bash
|
| 78 |
pip install -r requirements.txt
|
| 79 |
-
python app.py #
|
|
|
|
| 80 |
```
|
| 81 |
|
| 82 |
## Docker Commands
|
|
@@ -85,16 +163,21 @@ docker build -t separatetracks .
|
|
| 85 |
docker run -p 7860:7860 --env-file .env separatetracks
|
| 86 |
```
|
| 87 |
|
|
|
|
|
|
|
| 88 |
## Agent Personas (`.github/agents/`)
|
| 89 |
| Agent | Role |
|
| 90 |
|-------|------|
|
| 91 |
| `orchestrator` | Decomposes tasks → assigns to dev/qa |
|
| 92 |
| `dev` / `local_dev` | Implements features (Python 3.12, Gradio) |
|
| 93 |
| `qa` | Reviews, gates, risk assessment |
|
| 94 |
-
| `code-munch` | Repository indexing via MCP |
|
| 95 |
| `file-discovery` | Locates files across repo |
|
| 96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
## Security Notes
|
| 98 |
-
- `.env` contains sensitive credentials — never commit
|
| 99 |
- `constants.py` validates `HF_TOKEN` at import time; ensure `.env` is loaded first
|
| 100 |
-
- Rotate `HF_TOKEN` and `CRYPTO_PK` if they were ever exposed
|
|
|
|
| 1 |
# CLAUDE.md — SeparateTracks Project Context
|
| 2 |
|
| 3 |
+
## MCP Tools
|
| 4 |
+
Call the `jcodemunch_guide` tool and strictly follow its instructions for code retrieval.
|
| 5 |
+
The jCodeMunch MCP server is configured in `.claude/settings.json`. The project has
|
| 6 |
+
been indexed. Workflow:
|
| 7 |
+
1. Call `index_folder` on the project root to index (or re-index after changes)
|
| 8 |
+
2. Then use `search_symbols`, `get_symbol_source`, `get_file_outline`, `search_text`
|
| 9 |
+
for token-efficient code retrieval instead of reading whole files.
|
| 10 |
+
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
## Project Overview
|
| 14 |
**SeparateTracks** (`Surn/SeparateTracks`) — A HuggingFace Docker Space that:
|
| 15 |
- Downloads audio from YouTube via `yt-dlp` + Deno
|
|
|
|
| 17 |
- Presents results in a Gradio UI with a custom `AudioGallery` HTML component
|
| 18 |
- Exposes an MCP server at `/gradio_api/mcp/sse`
|
| 19 |
|
| 20 |
+
---
|
| 21 |
+
|
| 22 |
## Key Files
|
| 23 |
|
| 24 |
| File | Purpose |
|
| 25 |
|------|---------|
|
| 26 |
+
| `app.py` | Gradio entry point — UI, routing, progress, MCP launch |
|
| 27 |
+
| `modules/AudioGallery.py` | `AudioGallery(gr.HTML)` — 7-stem audio grid with waveform canvas |
|
| 28 |
+
| `modules/AudioGallery.pyi` | Type stub for AudioGallery |
|
| 29 |
+
| `modules/yt_audio_get_tracks.py` | `download_audio()` + `separate_tracks()` with progress callbacks |
|
| 30 |
| `modules/constants.py` | Env vars (`HF_TOKEN`, `HF_REPO_ID`, etc.), shared constants |
|
| 31 |
| `modules/version_info.py` | `versions_html()` for Gradio footer |
|
| 32 |
| `modules/file_utils.py` | File utility helpers |
|
| 33 |
+
| `requirements.txt` | Pip dependencies |
|
| 34 |
+
| `dockerfile` | Docker image — `python:3.12-slim` + ffmpeg + Deno + pip |
|
| 35 |
| `specs/build.md` | Step-by-step build plan |
|
| 36 |
+
| `.claude/settings.json` | MCP server config (jcodemunch) |
|
| 37 |
+
|
| 38 |
+
> **Note:** The original root-level `yt_audio_get_tracks.py` has been moved to
|
| 39 |
+
> `modules/yt_audio_get_tracks.py`. Do not recreate it at root.
|
| 40 |
+
|
| 41 |
+
---
|
| 42 |
|
| 43 |
## Architecture
|
| 44 |
+
|
| 45 |
+
```
|
| 46 |
+
app.py
|
| 47 |
+
├── SEPARATED_DIR = Path("separated").resolve()
|
| 48 |
+
├── _footer_html() → modules/version_info.versions_html()
|
| 49 |
+
├── process_video(video_id) → MCP-exposed tool (simple, no progress)
|
| 50 |
+
├── process_video_with_progress(video_id) → UI handler (returns html, status)
|
| 51 |
+
│ ├── modules.yt_audio_get_tracks.download_audio(url, id, progress_callback)
|
| 52 |
+
│ ├── modules.yt_audio_get_tracks.separate_tracks(wav, id, progress_callback)
|
| 53 |
+
│ └── AudioGallery._build_html(audio_urls, labels, columns)
|
| 54 |
+
└── demo.launch(mcp_server=True, allowed_paths=[SEPARATED_DIR])
|
| 55 |
+
|
| 56 |
+
modules/AudioGallery.py
|
| 57 |
+
└── AudioGallery(gr.HTML)
|
| 58 |
+
├── DEFAULT_LABELS = [Drums, Vocals, Guitar, Bass, Other, Piano, Music]
|
| 59 |
+
├── __init__(audio_urls, *, labels, columns, ...)
|
| 60 |
+
└── _build_html(audio_urls, labels, columns) → inline CSS + HTML + JS
|
| 61 |
+
|
| 62 |
+
modules/yt_audio_get_tracks.py
|
| 63 |
+
├── _emit_progress(progress_callback, message)
|
| 64 |
+
├── download_audio(url, video_id, progress_callback=None) → wav path
|
| 65 |
+
└── separate_tracks(input_wav, video_id, progress_callback=None)
|
| 66 |
+
→ (drums, vocals, guitar, bass, other, piano, music_path)
|
| 67 |
+
|
| 68 |
+
## Gradio Progress Pattern
|
| 69 |
+
|
| 70 |
+
Use `progress=gr.Progress(track_tqdm=True)` in processing handlers when you want
|
| 71 |
+
interactive progress updates in the UI. The current app supports this via the
|
| 72 |
+
shared processing helper in `app.py`, while still collecting the stage messages
|
| 73 |
+
emitted by `modules/yt_audio_get_tracks.py`.
|
| 74 |
+
```
|
| 75 |
+
|
| 76 |
+
---
|
| 77 |
+
|
| 78 |
+
## UI Layout
|
| 79 |
+
|
| 80 |
```
|
| 81 |
+
[YouTube Video ID input ............] [Separate Tracks btn]
|
| 82 |
+
[Progress textbox — 6 lines, read-only ]
|
| 83 |
+
[AudioGallery HTML — 3-column stem grid ]
|
| 84 |
+
[Footer — versions_html() ]
|
|
|
|
| 85 |
```
|
| 86 |
|
| 87 |
+
Button triggers `process_video_with_progress` → outputs `[audio_output, progress_output]`.
|
| 88 |
+
|
| 89 |
+
---
|
| 90 |
+
|
| 91 |
+
## Progress Callback Pattern
|
| 92 |
+
|
| 93 |
+
Both core functions accept an optional `progress_callback(message: str)` parameter.
|
| 94 |
+
`app.py` collects messages into a list and returns the joined string as status text.
|
| 95 |
+
|
| 96 |
+
```python
|
| 97 |
+
def on_progress(message):
|
| 98 |
+
progress_messages.append(message)
|
| 99 |
+
|
| 100 |
+
download_audio(url, video_id, progress_callback=on_progress)
|
| 101 |
+
separate_tracks(wav, video_id, progress_callback=on_progress)
|
| 102 |
+
```
|
| 103 |
+
|
| 104 |
+
Progress messages emitted:
|
| 105 |
+
1. `"Downloading audio from YouTube..."`
|
| 106 |
+
2. `"Converting downloaded audio to WAV..."`
|
| 107 |
+
3. `"Separating tracks with Demucs..."`
|
| 108 |
+
4. `"Creating combined music stem..."`
|
| 109 |
+
5. `"Separation complete."`
|
| 110 |
+
|
| 111 |
+
---
|
| 112 |
+
|
| 113 |
+
## Stems Order (always)
|
| 114 |
+
`drums, vocals, guitar, bass, other, piano, music_path`
|
| 115 |
+
Output dir: `separated/htdemucs_6s/{video_id}/`
|
| 116 |
+
`music.mp3` = `bass + other` overlay (pydub)
|
| 117 |
+
|
| 118 |
+
---
|
| 119 |
+
|
| 120 |
## Copilot / Agent Rules (from `.github/copilot-instructions.md`)
|
| 121 |
- **Minimal changes** — preserve existing functionality
|
| 122 |
- **No new dependencies** without approval
|
| 123 |
- **Use existing `modules/` functions** before writing new code; prefer overloads
|
| 124 |
- **Gradio reference**: https://www.gradio.app/docs/gradio/interface
|
| 125 |
+
- **AudioGallery** — `modules/AudioGallery.py`; extend `gr.HTML`
|
| 126 |
- **Footer** must use `modules/version_info.versions_html()`
|
| 127 |
- **Dockerfile** is HuggingFace-compatible (base: `python:3.12-slim`)
|
| 128 |
+
- **MCP** — Gradio built-in `mcp_server=True` in `demo.launch()`
|
| 129 |
+
- **Deno** — install via `deno.land/install.sh` (Docker) or add exe to PATH (local)
|
| 130 |
- **Testing** — Playwright MCP headless (Chrome/WebKit/Edge/Firefox), MSTest, UV
|
| 131 |
|
| 132 |
## Python Style (from `.github/instructions/py.instructions.md`)
|
|
|
|
| 136 |
- **In f-strings with `<script>` tags: use `{{ }}` for JS template literals**
|
| 137 |
- Tools: `black`, `ruff`, `isort`, `mypy` (optional)
|
| 138 |
|
| 139 |
+
---
|
| 140 |
+
|
| 141 |
+
## Environment Variables (`.env` — never commit)
|
| 142 |
| Variable | Purpose |
|
| 143 |
|----------|---------|
|
| 144 |
+
| `HF_TOKEN` | HuggingFace API token (required by `modules/constants.py`) |
|
| 145 |
| `CRYPTO_PK` | Crypto private key |
|
| 146 |
| `HF_REPO_ID` | HF storage repo (`Surn/Storage`) |
|
| 147 |
| `SPACE_NAME` | HF Space ID (`Surn/SeparateTracks`) |
|
| 148 |
| `TMPDIR` | Temp directory for processing |
|
| 149 |
| `IS_LOCAL` | `true` when running locally |
|
| 150 |
|
| 151 |
+
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
|
| 153 |
## Local Dev Commands
|
| 154 |
```bash
|
| 155 |
pip install -r requirements.txt
|
| 156 |
+
python app.py # http://localhost:7860
|
| 157 |
+
# MCP: http://localhost:7860/gradio_api/mcp/sse
|
| 158 |
```
|
| 159 |
|
| 160 |
## Docker Commands
|
|
|
|
| 163 |
docker run -p 7860:7860 --env-file .env separatetracks
|
| 164 |
```
|
| 165 |
|
| 166 |
+
---
|
| 167 |
+
|
| 168 |
## Agent Personas (`.github/agents/`)
|
| 169 |
| Agent | Role |
|
| 170 |
|-------|------|
|
| 171 |
| `orchestrator` | Decomposes tasks → assigns to dev/qa |
|
| 172 |
| `dev` / `local_dev` | Implements features (Python 3.12, Gradio) |
|
| 173 |
| `qa` | Reviews, gates, risk assessment |
|
| 174 |
+
| `code-munch` | Repository indexing via jcodemunch MCP |
|
| 175 |
| `file-discovery` | Locates files across repo |
|
| 176 |
|
| 177 |
+
## Status
|
| 178 |
+
Build plan steps 1-5 complete. Architecture refactored post-plan.
|
| 179 |
+
Next: Steps 6-10 — local verification, Docker build, HF Space deployment.
|
| 180 |
+
|
| 181 |
## Security Notes
|
| 182 |
+
- `.env` contains sensitive credentials — never commit (`.gitignore` updated)
|
| 183 |
- `constants.py` validates `HF_TOKEN` at import time; ensure `.env` is loaded first
|
|
|
app.py
CHANGED
|
@@ -3,199 +3,27 @@
|
|
| 3 |
# MCP endpoint: http://localhost:7860/gradio_api/mcp/sse
|
| 4 |
import os
|
| 5 |
import sys
|
|
|
|
| 6 |
|
| 7 |
import gradio as gr
|
| 8 |
|
| 9 |
-
from
|
|
|
|
| 10 |
|
| 11 |
|
|
|
|
|
|
|
| 12 |
# ---------------------------------------------------------------------------
|
| 13 |
# AudioGallery CSS — injected inline so the component is self-contained
|
| 14 |
# ---------------------------------------------------------------------------
|
| 15 |
_CSS = """
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
}
|
| 19 |
-
.audio-gallery-grid {
|
| 20 |
-
display: grid;
|
| 21 |
-
gap: 16px;
|
| 22 |
-
}
|
| 23 |
-
.audio-item {
|
| 24 |
-
background: var(--block-background-fill, #1e1e2e);
|
| 25 |
-
border: 1px solid var(--block-border-color, #3a3a5c);
|
| 26 |
-
border-radius: 8px;
|
| 27 |
-
padding: 12px;
|
| 28 |
-
display: flex;
|
| 29 |
-
flex-direction: column;
|
| 30 |
-
gap: 8px;
|
| 31 |
-
}
|
| 32 |
-
.audio-label {
|
| 33 |
-
font-weight: 600;
|
| 34 |
-
font-size: 0.9rem;
|
| 35 |
-
color: var(--body-text-color, #cdd6f4);
|
| 36 |
-
text-transform: uppercase;
|
| 37 |
-
letter-spacing: 0.05em;
|
| 38 |
-
}
|
| 39 |
-
.waveform-canvas {
|
| 40 |
width: 100%;
|
| 41 |
-
|
| 42 |
-
border-radius: 4px;
|
| 43 |
-
background: var(--background-fill-secondary, #181825);
|
| 44 |
-
display: block;
|
| 45 |
-
}
|
| 46 |
-
.audio-controls {
|
| 47 |
-
display: flex;
|
| 48 |
-
align-items: center;
|
| 49 |
-
gap: 8px;
|
| 50 |
-
}
|
| 51 |
-
.play-btn {
|
| 52 |
-
background: #4a9eff;
|
| 53 |
-
border: none;
|
| 54 |
-
border-radius: 50%;
|
| 55 |
-
width: 32px;
|
| 56 |
-
height: 32px;
|
| 57 |
-
cursor: pointer;
|
| 58 |
-
font-size: 0.85rem;
|
| 59 |
-
color: white;
|
| 60 |
-
flex-shrink: 0;
|
| 61 |
-
}
|
| 62 |
-
.play-btn:hover {
|
| 63 |
-
background: #6ab4ff;
|
| 64 |
-
}
|
| 65 |
-
.time-display {
|
| 66 |
-
font-size: 0.8rem;
|
| 67 |
-
color: var(--body-text-color, #a6adc8);
|
| 68 |
-
font-family: monospace;
|
| 69 |
}
|
| 70 |
"""
|
| 71 |
|
| 72 |
-
# ---------------------------------------------------------------------------
|
| 73 |
-
# AudioGallery JS — initialises waveform canvas + play/pause for each item.
|
| 74 |
-
# Uses a self-invoking function; data-initialized guard prevents double-bind
|
| 75 |
-
# when Gradio re-renders the component.
|
| 76 |
-
# Note: curly braces inside this plain string are NOT Python format braces.
|
| 77 |
-
# ---------------------------------------------------------------------------
|
| 78 |
-
_JS = """
|
| 79 |
-
(function () {
|
| 80 |
-
function formatTime(secs) {
|
| 81 |
-
var m = Math.floor(secs / 60);
|
| 82 |
-
var s = Math.floor(secs % 60).toString().padStart(2, '0');
|
| 83 |
-
return m + ':' + s;
|
| 84 |
-
}
|
| 85 |
-
|
| 86 |
-
function drawWaveform(canvas) {
|
| 87 |
-
var ctx = canvas.getContext('2d');
|
| 88 |
-
var w = canvas.offsetWidth || 300;
|
| 89 |
-
canvas.width = w;
|
| 90 |
-
var h = canvas.height;
|
| 91 |
-
ctx.clearRect(0, 0, w, h);
|
| 92 |
-
ctx.fillStyle = '#4a9eff';
|
| 93 |
-
var bars = 60;
|
| 94 |
-
for (var i = 0; i < bars; i++) {
|
| 95 |
-
var x = (i / bars) * w;
|
| 96 |
-
var bw = Math.max(1, w / bars - 2);
|
| 97 |
-
var amp = h * (0.2 + 0.7 * Math.abs(Math.sin(i * 0.45 + Math.random() * 0.3)));
|
| 98 |
-
var y = (h - amp) / 2;
|
| 99 |
-
ctx.fillRect(x, y, bw, amp);
|
| 100 |
-
}
|
| 101 |
-
}
|
| 102 |
-
|
| 103 |
-
function initItems() {
|
| 104 |
-
document.querySelectorAll('.audio-item[data-initialized="false"]').forEach(function (item) {
|
| 105 |
-
item.setAttribute('data-initialized', 'true');
|
| 106 |
-
var audio = item.querySelector('audio');
|
| 107 |
-
var canvas = item.querySelector('.waveform-canvas');
|
| 108 |
-
var btn = item.querySelector('.play-btn');
|
| 109 |
-
var timeDisplay = item.querySelector('.time-display');
|
| 110 |
-
|
| 111 |
-
drawWaveform(canvas);
|
| 112 |
-
|
| 113 |
-
btn.addEventListener('click', function () {
|
| 114 |
-
// Pause any other playing tracks
|
| 115 |
-
document.querySelectorAll('.audio-item audio').forEach(function (a) {
|
| 116 |
-
if (a !== audio && !a.paused) {
|
| 117 |
-
a.pause();
|
| 118 |
-
a.closest('.audio-item').querySelector('.play-btn').textContent = '\u25B6';
|
| 119 |
-
}
|
| 120 |
-
});
|
| 121 |
-
if (audio.paused) {
|
| 122 |
-
audio.play();
|
| 123 |
-
btn.textContent = '\u23F8';
|
| 124 |
-
} else {
|
| 125 |
-
audio.pause();
|
| 126 |
-
btn.textContent = '\u25B6';
|
| 127 |
-
}
|
| 128 |
-
});
|
| 129 |
-
|
| 130 |
-
audio.addEventListener('timeupdate', function () {
|
| 131 |
-
timeDisplay.textContent = formatTime(audio.currentTime);
|
| 132 |
-
});
|
| 133 |
-
|
| 134 |
-
audio.addEventListener('ended', function () {
|
| 135 |
-
btn.textContent = '\u25B6';
|
| 136 |
-
});
|
| 137 |
-
});
|
| 138 |
-
}
|
| 139 |
-
|
| 140 |
-
// Defer to ensure canvas dimensions are resolved after layout
|
| 141 |
-
setTimeout(initItems, 50);
|
| 142 |
-
})();
|
| 143 |
-
"""
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
# ---------------------------------------------------------------------------
|
| 147 |
-
# AudioGallery component
|
| 148 |
-
# ---------------------------------------------------------------------------
|
| 149 |
-
class AudioGallery(gr.HTML):
|
| 150 |
-
"""Gradio HTML component that renders audio stems in a responsive grid.
|
| 151 |
-
|
| 152 |
-
Extends gr.HTML; builds a self-contained HTML snippet with inline CSS
|
| 153 |
-
and JS for waveform visualisation and play/pause controls.
|
| 154 |
-
"""
|
| 155 |
-
|
| 156 |
-
DEFAULT_LABELS = ["Drums", "Vocals", "Guitar", "Bass", "Other", "Piano", "Music"]
|
| 157 |
-
|
| 158 |
-
def __init__(
|
| 159 |
-
self,
|
| 160 |
-
audio_urls,
|
| 161 |
-
*,
|
| 162 |
-
value=None,
|
| 163 |
-
labels=None,
|
| 164 |
-
columns=3,
|
| 165 |
-
label=None,
|
| 166 |
-
**kwargs,
|
| 167 |
-
):
|
| 168 |
-
labels = labels or self.DEFAULT_LABELS
|
| 169 |
-
html = self._build_html(audio_urls, labels=labels, columns=columns)
|
| 170 |
-
super().__init__(value=html, label=label, **kwargs)
|
| 171 |
-
|
| 172 |
-
@staticmethod
|
| 173 |
-
def _build_html(audio_urls, labels, columns):
|
| 174 |
-
items = ""
|
| 175 |
-
for i, url in enumerate(audio_urls):
|
| 176 |
-
lbl = labels[i] if i < len(labels) else f"Track {i + 1}"
|
| 177 |
-
items += (
|
| 178 |
-
f'<div class="audio-item" data-index="{i}" data-initialized="false">'
|
| 179 |
-
f'<div class="audio-label">{lbl}</div>'
|
| 180 |
-
f'<canvas class="waveform-canvas" width="300" height="60"></canvas>'
|
| 181 |
-
f'<audio src="{url}" preload="metadata"></audio>'
|
| 182 |
-
f'<div class="audio-controls">'
|
| 183 |
-
f'<button class="play-btn">▶</button>'
|
| 184 |
-
f'<div class="time-display">0:00</div>'
|
| 185 |
-
f'</div>'
|
| 186 |
-
f'</div>\n'
|
| 187 |
-
)
|
| 188 |
-
return (
|
| 189 |
-
f'<style>{_CSS}</style>'
|
| 190 |
-
f'<div class="audio-gallery-container">'
|
| 191 |
-
f'<div class="audio-gallery-grid" style="grid-template-columns: repeat({columns}, 1fr);">'
|
| 192 |
-
f'{items}'
|
| 193 |
-
f'</div>'
|
| 194 |
-
f'</div>'
|
| 195 |
-
f'<script>{_JS}</script>'
|
| 196 |
-
)
|
| 197 |
-
|
| 198 |
-
|
| 199 |
# ---------------------------------------------------------------------------
|
| 200 |
# Version footer (graceful fallback if torch/cuda not available)
|
| 201 |
# ---------------------------------------------------------------------------
|
|
@@ -211,7 +39,51 @@ def _footer_html():
|
|
| 211 |
# ---------------------------------------------------------------------------
|
| 212 |
# Core processing function (also exposed as MCP tool)
|
| 213 |
# ---------------------------------------------------------------------------
|
| 214 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
"""Download audio from a YouTube video and separate it into instrument stems.
|
| 216 |
|
| 217 |
Uses Demucs htdemucs_6s to produce drums, vocals, guitar, bass, piano,
|
|
@@ -235,14 +107,59 @@ def process_video(video_id: str) -> str:
|
|
| 235 |
return f"<p style='color:red;'>Error: {exc}</p>"
|
| 236 |
|
| 237 |
paths = [drums, vocals, guitar, bass, other, piano, music]
|
| 238 |
-
audio_urls = [f"/file={
|
| 239 |
-
return AudioGallery(audio_urls=audio_urls, columns=3)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
|
| 241 |
|
| 242 |
# ---------------------------------------------------------------------------
|
| 243 |
# Gradio UI
|
| 244 |
# ---------------------------------------------------------------------------
|
| 245 |
-
with gr.Blocks(title="SeparateTracks") as demo:
|
| 246 |
gr.Markdown(
|
| 247 |
"## \U0001f3bc SeparateTracks\n"
|
| 248 |
"Enter a YouTube video ID to separate the audio into instrument stems "
|
|
@@ -257,13 +174,14 @@ with gr.Blocks(title="SeparateTracks") as demo:
|
|
| 257 |
)
|
| 258 |
run_btn = gr.Button("Separate Tracks", variant="primary", scale=1)
|
| 259 |
|
|
|
|
| 260 |
audio_output = gr.HTML(label="Separated Tracks")
|
| 261 |
-
gr.HTML(value=_footer_html())
|
| 262 |
|
| 263 |
run_btn.click(
|
| 264 |
-
fn=
|
| 265 |
inputs=video_id_input,
|
| 266 |
-
outputs=audio_output,
|
| 267 |
)
|
| 268 |
|
| 269 |
if __name__ == "__main__":
|
|
@@ -271,4 +189,8 @@ if __name__ == "__main__":
|
|
| 271 |
mcp_server=True,
|
| 272 |
server_name="0.0.0.0",
|
| 273 |
server_port=7860,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
)
|
|
|
|
| 3 |
# MCP endpoint: http://localhost:7860/gradio_api/mcp/sse
|
| 4 |
import os
|
| 5 |
import sys
|
| 6 |
+
from pathlib import Path
|
| 7 |
|
| 8 |
import gradio as gr
|
| 9 |
|
| 10 |
+
from modules.AudioGallery import AudioGallery
|
| 11 |
+
from modules.yt_audio_get_tracks import download_audio, separate_tracks
|
| 12 |
|
| 13 |
|
| 14 |
+
SEPARATED_DIR = Path("separated").resolve()
|
| 15 |
+
gr.set_static_paths(paths=["separated/", SEPARATED_DIR.as_posix()])
|
| 16 |
# ---------------------------------------------------------------------------
|
| 17 |
# AudioGallery CSS — injected inline so the component is self-contained
|
| 18 |
# ---------------------------------------------------------------------------
|
| 19 |
_CSS = """
|
| 20 |
+
#versions {
|
| 21 |
+
margin-top: 1em;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
width: 100%;
|
| 23 |
+
text-align: center;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
}
|
| 25 |
"""
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
# ---------------------------------------------------------------------------
|
| 28 |
# Version footer (graceful fallback if torch/cuda not available)
|
| 29 |
# ---------------------------------------------------------------------------
|
|
|
|
| 39 |
# ---------------------------------------------------------------------------
|
| 40 |
# Core processing function (also exposed as MCP tool)
|
| 41 |
# ---------------------------------------------------------------------------
|
| 42 |
+
def _process_video_impl(video_id: str, progress=None):
|
| 43 |
+
progress_messages = []
|
| 44 |
+
|
| 45 |
+
def on_progress(message):
|
| 46 |
+
progress_messages.append(message)
|
| 47 |
+
|
| 48 |
+
video_id = video_id.strip()
|
| 49 |
+
if not video_id:
|
| 50 |
+
return (
|
| 51 |
+
"<p style='color:red;'>Please enter a YouTube video ID.</p>",
|
| 52 |
+
"No video ID provided.",
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
try:
|
| 56 |
+
if progress is not None:
|
| 57 |
+
progress(0.0, desc="Preparing request")
|
| 58 |
+
url = f"https://www.youtube.com/watch?v={video_id}"
|
| 59 |
+
if progress is not None:
|
| 60 |
+
progress(0.15, desc="Downloading audio")
|
| 61 |
+
wav = download_audio(url, video_id, progress_callback=on_progress)
|
| 62 |
+
if progress is not None:
|
| 63 |
+
progress(0.45, desc="Separating tracks")
|
| 64 |
+
drums, vocals, guitar, bass, other, piano, music = separate_tracks(
|
| 65 |
+
wav,
|
| 66 |
+
video_id,
|
| 67 |
+
progress_callback=on_progress,
|
| 68 |
+
)
|
| 69 |
+
if progress is not None:
|
| 70 |
+
progress(0.9, desc="Building audio gallery")
|
| 71 |
+
except Exception as exc:
|
| 72 |
+
status = "\n".join(progress_messages) if progress_messages else "Starting..."
|
| 73 |
+
return f"<p style='color:red;'>Error: {exc}</p>", f"{status}\nError: {exc}"
|
| 74 |
+
|
| 75 |
+
paths = [drums, vocals, guitar, bass, other, piano, music]
|
| 76 |
+
audio_urls = [f"/file={Path(p).as_posix()}" for p in paths]
|
| 77 |
+
status = "\n".join(progress_messages + ["Done."])
|
| 78 |
+
if progress is not None:
|
| 79 |
+
progress(1.0, desc="Done")
|
| 80 |
+
return (
|
| 81 |
+
AudioGallery._build_html(audio_urls=audio_urls, labels=AudioGallery.DEFAULT_LABELS, columns=3),
|
| 82 |
+
status,
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def process_video(video_id: str, progress=gr.Progress(track_tqdm=True)) -> str:
|
| 87 |
"""Download audio from a YouTube video and separate it into instrument stems.
|
| 88 |
|
| 89 |
Uses Demucs htdemucs_6s to produce drums, vocals, guitar, bass, piano,
|
|
|
|
| 107 |
return f"<p style='color:red;'>Error: {exc}</p>"
|
| 108 |
|
| 109 |
paths = [drums, vocals, guitar, bass, other, piano, music]
|
| 110 |
+
audio_urls = [f"/file={Path(p).as_posix()}" for p in paths]
|
| 111 |
+
return AudioGallery._build_html(audio_urls=audio_urls, labels=AudioGallery.DEFAULT_LABELS, columns=3)
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def process_video_with_progress(video_id: str, progress=gr.Progress(track_tqdm=True)):
|
| 115 |
+
status_lines = []
|
| 116 |
+
|
| 117 |
+
def on_progress(message):
|
| 118 |
+
status_lines.append(message)
|
| 119 |
+
|
| 120 |
+
video_id = video_id.strip()
|
| 121 |
+
if not video_id:
|
| 122 |
+
yield "<p style='color:red;'>Please enter a YouTube video ID.</p>", "No video ID provided."
|
| 123 |
+
return
|
| 124 |
+
|
| 125 |
+
url = f"https://www.youtube.com/watch?v={video_id}"
|
| 126 |
+
|
| 127 |
+
try:
|
| 128 |
+
progress(0.05, desc="Downloading audio")
|
| 129 |
+
yield "", "Downloading audio from YouTube..."
|
| 130 |
+
wav = download_audio(url, video_id, progress_callback=on_progress)
|
| 131 |
+
|
| 132 |
+
progress(0.4, desc="Separating tracks")
|
| 133 |
+
yield "", "\n".join(status_lines)
|
| 134 |
+
|
| 135 |
+
drums, vocals, guitar, bass, other, piano, music = separate_tracks(
|
| 136 |
+
wav, video_id, progress_callback=on_progress
|
| 137 |
+
)
|
| 138 |
+
progress(0.9, desc="Building gallery")
|
| 139 |
+
yield "", "\n".join(status_lines)
|
| 140 |
+
|
| 141 |
+
except Exception as exc:
|
| 142 |
+
yield (
|
| 143 |
+
f"<p style='color:red;'>Error: {exc}</p>",
|
| 144 |
+
"\n".join(status_lines) + f"\nError: {exc}",
|
| 145 |
+
)
|
| 146 |
+
return
|
| 147 |
+
|
| 148 |
+
paths = [drums, vocals, guitar, bass, other, piano, music]
|
| 149 |
+
# audio_urls = [f"/file={Path(p).resolve().as_posix()}" for p in paths]
|
| 150 |
+
audio_urls = [f"/file={p}" for p in paths]
|
| 151 |
+
status_lines.append("Done.")
|
| 152 |
+
progress(1.0, desc="Done")
|
| 153 |
+
yield (
|
| 154 |
+
AudioGallery._build_html(audio_urls=audio_urls, labels=AudioGallery.DEFAULT_LABELS, columns=3),
|
| 155 |
+
"\n".join(status_lines),
|
| 156 |
+
)
|
| 157 |
|
| 158 |
|
| 159 |
# ---------------------------------------------------------------------------
|
| 160 |
# Gradio UI
|
| 161 |
# ---------------------------------------------------------------------------
|
| 162 |
+
with gr.Blocks(title="SeparateTracks", css=_CSS) as demo:
|
| 163 |
gr.Markdown(
|
| 164 |
"## \U0001f3bc SeparateTracks\n"
|
| 165 |
"Enter a YouTube video ID to separate the audio into instrument stems "
|
|
|
|
| 174 |
)
|
| 175 |
run_btn = gr.Button("Separate Tracks", variant="primary", scale=1)
|
| 176 |
|
| 177 |
+
progress_output = gr.Textbox(label="Progress", interactive=False, lines=6)
|
| 178 |
audio_output = gr.HTML(label="Separated Tracks")
|
| 179 |
+
gr.HTML(value=_footer_html(), elem_id="versions", elem_classes="version-info")
|
| 180 |
|
| 181 |
run_btn.click(
|
| 182 |
+
fn=process_video_with_progress,
|
| 183 |
inputs=video_id_input,
|
| 184 |
+
outputs=[audio_output, progress_output],
|
| 185 |
)
|
| 186 |
|
| 187 |
if __name__ == "__main__":
|
|
|
|
| 189 |
mcp_server=True,
|
| 190 |
server_name="0.0.0.0",
|
| 191 |
server_port=7860,
|
| 192 |
+
allowed_paths=[SEPARATED_DIR.as_posix(), "separated/", ".separated/"],
|
| 193 |
+
favicon_path="separated/favicon.ico"
|
| 194 |
+
# css=_CSS,
|
| 195 |
+
# js=_JS
|
| 196 |
)
|
modules/AudioGallery.py
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
_CSS = """
|
| 5 |
+
.audio-gallery-container {
|
| 6 |
+
padding: 16px;
|
| 7 |
+
}
|
| 8 |
+
.audio-gallery-grid {
|
| 9 |
+
display: grid;
|
| 10 |
+
gap: 16px;
|
| 11 |
+
}
|
| 12 |
+
.audio-item {
|
| 13 |
+
background: var(--block-background-fill, #1e1e2e);
|
| 14 |
+
border: 1px solid var(--block-border-color, #3a3a5c);
|
| 15 |
+
border-radius: 8px;
|
| 16 |
+
padding: 12px;
|
| 17 |
+
display: flex;
|
| 18 |
+
flex-direction: column;
|
| 19 |
+
gap: 8px;
|
| 20 |
+
}
|
| 21 |
+
.audio-label {
|
| 22 |
+
font-weight: 600;
|
| 23 |
+
font-size: 0.9rem;
|
| 24 |
+
color: var(--body-text-color, #cdd6f4);
|
| 25 |
+
text-transform: uppercase;
|
| 26 |
+
letter-spacing: 0.05em;
|
| 27 |
+
}
|
| 28 |
+
.waveform-canvas {
|
| 29 |
+
width: 100%;
|
| 30 |
+
height: 60px;
|
| 31 |
+
border-radius: 4px;
|
| 32 |
+
background: var(--background-fill-secondary, #181825);
|
| 33 |
+
display: block;
|
| 34 |
+
}
|
| 35 |
+
.audio-controls {
|
| 36 |
+
display: flex;
|
| 37 |
+
align-items: center;
|
| 38 |
+
gap: 8px;
|
| 39 |
+
}
|
| 40 |
+
.play-btn {
|
| 41 |
+
background: #4a9eff;
|
| 42 |
+
border: none;
|
| 43 |
+
border-radius: 50%;
|
| 44 |
+
width: 32px;
|
| 45 |
+
height: 32px;
|
| 46 |
+
cursor: pointer;
|
| 47 |
+
font-size: 0.85rem;
|
| 48 |
+
color: white;
|
| 49 |
+
flex-shrink: 0;
|
| 50 |
+
}
|
| 51 |
+
.play-btn:hover {
|
| 52 |
+
background: #6ab4ff;
|
| 53 |
+
}
|
| 54 |
+
.time-display {
|
| 55 |
+
font-size: 0.8rem;
|
| 56 |
+
color: var(--body-text-color, #a6adc8);
|
| 57 |
+
font-family: monospace;
|
| 58 |
+
}
|
| 59 |
+
"""
|
| 60 |
+
|
| 61 |
+
_JS = """
|
| 62 |
+
(function () {
|
| 63 |
+
function formatTime(secs) {
|
| 64 |
+
var m = Math.floor(secs / 60);
|
| 65 |
+
var s = Math.floor(secs % 60).toString().padStart(2, '0');
|
| 66 |
+
return m + ':' + s;
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
function drawWaveform(canvas) {
|
| 70 |
+
var ctx = canvas.getContext('2d');
|
| 71 |
+
var w = canvas.offsetWidth || 300;
|
| 72 |
+
canvas.width = w;
|
| 73 |
+
var h = canvas.height;
|
| 74 |
+
ctx.clearRect(0, 0, w, h);
|
| 75 |
+
ctx.fillStyle = '#4a9eff';
|
| 76 |
+
var bars = 60;
|
| 77 |
+
for (var i = 0; i < bars; i++) {
|
| 78 |
+
var x = (i / bars) * w;
|
| 79 |
+
var bw = Math.max(1, w / bars - 2);
|
| 80 |
+
var amp = h * (0.2 + 0.7 * Math.abs(Math.sin(i * 0.45 + Math.random() * 0.3)));
|
| 81 |
+
var y = (h - amp) / 2;
|
| 82 |
+
ctx.fillRect(x, y, bw, amp);
|
| 83 |
+
}
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
function initItems() {
|
| 87 |
+
document.querySelectorAll('.audio-item[data-initialized="false"]').forEach(function (item) {
|
| 88 |
+
item.setAttribute('data-initialized', 'true');
|
| 89 |
+
var audio = item.querySelector('audio');
|
| 90 |
+
var canvas = item.querySelector('.waveform-canvas');
|
| 91 |
+
var btn = item.querySelector('.play-btn');
|
| 92 |
+
var timeDisplay = item.querySelector('.time-display');
|
| 93 |
+
|
| 94 |
+
drawWaveform(canvas);
|
| 95 |
+
|
| 96 |
+
btn.addEventListener('click', function () {
|
| 97 |
+
document.querySelectorAll('.audio-item audio').forEach(function (a) {
|
| 98 |
+
if (a !== audio && !a.paused) {
|
| 99 |
+
a.pause();
|
| 100 |
+
a.closest('.audio-item').querySelector('.play-btn').textContent = '\u25B6';
|
| 101 |
+
}
|
| 102 |
+
});
|
| 103 |
+
if (audio.paused) {
|
| 104 |
+
audio.play();
|
| 105 |
+
btn.textContent = '\u23F8';
|
| 106 |
+
} else {
|
| 107 |
+
audio.pause();
|
| 108 |
+
btn.textContent = '\u25B6';
|
| 109 |
+
}
|
| 110 |
+
});
|
| 111 |
+
|
| 112 |
+
audio.addEventListener('timeupdate', function () {
|
| 113 |
+
timeDisplay.textContent = formatTime(audio.currentTime);
|
| 114 |
+
});
|
| 115 |
+
|
| 116 |
+
audio.addEventListener('ended', function () {
|
| 117 |
+
btn.textContent = '\u25B6';
|
| 118 |
+
});
|
| 119 |
+
});
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
setTimeout(initItems, 50);
|
| 123 |
+
})();
|
| 124 |
+
"""
|
| 125 |
+
|
| 126 |
+
class AudioGallery(gr.HTML):
|
| 127 |
+
"""Gradio HTML component that renders audio stems in a responsive grid."""
|
| 128 |
+
|
| 129 |
+
DEFAULT_LABELS = ["Drums", "Vocals", "Guitar", "Bass", "Other", "Piano", "Music"]
|
| 130 |
+
|
| 131 |
+
def __init__(
|
| 132 |
+
self,
|
| 133 |
+
audio_urls,
|
| 134 |
+
*,
|
| 135 |
+
value=None,
|
| 136 |
+
labels=None,
|
| 137 |
+
columns=3,
|
| 138 |
+
label=None,
|
| 139 |
+
**kwargs,
|
| 140 |
+
):
|
| 141 |
+
labels = labels or self.DEFAULT_LABELS
|
| 142 |
+
html = self._build_html(audio_urls, labels=labels, columns=columns)
|
| 143 |
+
super().__init__(value=html, label=label, **kwargs)
|
| 144 |
+
|
| 145 |
+
@staticmethod
|
| 146 |
+
def _build_html(audio_urls, labels, columns):
|
| 147 |
+
items = ""
|
| 148 |
+
for i, url in enumerate(audio_urls):
|
| 149 |
+
lbl = labels[i] if i < len(labels) else f"Track {i + 1}"
|
| 150 |
+
items += (
|
| 151 |
+
f'<div class="audio-item" data-index="{i}" data-initialized="false">'
|
| 152 |
+
f'<div class="audio-label">{lbl}</div>'
|
| 153 |
+
f'<canvas class="waveform-canvas" width="300" height="60"></canvas>'
|
| 154 |
+
f'<audio src="{url}" preload="metadata"></audio>'
|
| 155 |
+
f'<div class="audio-controls">'
|
| 156 |
+
f'<button class="play-btn">▶</button>'
|
| 157 |
+
f'<div class="time-display">0:00</div>'
|
| 158 |
+
f'</div>'
|
| 159 |
+
f'</div>\n'
|
| 160 |
+
)
|
| 161 |
+
return (
|
| 162 |
+
f'<style>{_CSS}</style>'
|
| 163 |
+
f'<div class="audio-gallery-container">'
|
| 164 |
+
f'<div class="audio-gallery-grid" style="grid-template-columns: repeat({columns}, 1fr);">'
|
| 165 |
+
f'{items}'
|
| 166 |
+
f'</div>'
|
| 167 |
+
f'</div>'
|
| 168 |
+
f'<script>{_JS}</script>'
|
| 169 |
+
)
|
yt_audio_get_tracks.py → modules/yt_audio_get_tracks.py
RENAMED
|
@@ -5,9 +5,15 @@ import shutil
|
|
| 5 |
import yt_dlp
|
| 6 |
from pydub import AudioSegment
|
| 7 |
|
| 8 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
temp_dir = 'separated'
|
| 10 |
os.makedirs(temp_dir, exist_ok=True)
|
|
|
|
| 11 |
ydl_opts = {
|
| 12 |
'format': 'bestaudio/best',
|
| 13 |
'outtmpl': os.path.join(temp_dir, f'{video_id}.%(ext)s'),
|
|
@@ -26,16 +32,18 @@ def download_audio(url, video_id):
|
|
| 26 |
|
| 27 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 28 |
ydl.download([url])
|
|
|
|
| 29 |
return os.path.join(temp_dir, f'{video_id}.wav')
|
| 30 |
|
| 31 |
-
def separate_tracks(input_wav, video_id):
|
| 32 |
if not os.path.exists(input_wav):
|
| 33 |
raise FileNotFoundError(f"{input_wav} does not exist")
|
| 34 |
|
| 35 |
output_dir = 'separated'
|
|
|
|
| 36 |
subprocess.run(['demucs', '-n', 'htdemucs_6s', '--mp3', '--out', output_dir, input_wav], check=True)
|
| 37 |
|
| 38 |
-
base = os.path.join(output_dir, 'htdemucs_6s', video_id)
|
| 39 |
|
| 40 |
drums = f'{base}/drums.mp3'
|
| 41 |
vocals = f'{base}/vocals.mp3'
|
|
@@ -44,11 +52,13 @@ def separate_tracks(input_wav, video_id):
|
|
| 44 |
piano = f'{base}/piano.mp3'
|
| 45 |
other = f'{base}/other.mp3'
|
| 46 |
|
|
|
|
| 47 |
music = AudioSegment.from_mp3(bass).overlay(AudioSegment.from_mp3(other))
|
| 48 |
music_path = os.path.join(base, 'music.mp3')
|
| 49 |
music.export(music_path, format="mp3")
|
| 50 |
|
| 51 |
os.remove(input_wav)
|
|
|
|
| 52 |
|
| 53 |
return drums, vocals, guitar, bass, other, piano, music_path
|
| 54 |
|
|
|
|
| 5 |
import yt_dlp
|
| 6 |
from pydub import AudioSegment
|
| 7 |
|
| 8 |
+
def _emit_progress(progress_callback, message):
|
| 9 |
+
if progress_callback is not None:
|
| 10 |
+
progress_callback(message)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def download_audio(url, video_id, progress_callback=None):
|
| 14 |
temp_dir = 'separated'
|
| 15 |
os.makedirs(temp_dir, exist_ok=True)
|
| 16 |
+
_emit_progress(progress_callback, 'Downloading audio from YouTube...')
|
| 17 |
ydl_opts = {
|
| 18 |
'format': 'bestaudio/best',
|
| 19 |
'outtmpl': os.path.join(temp_dir, f'{video_id}.%(ext)s'),
|
|
|
|
| 32 |
|
| 33 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 34 |
ydl.download([url])
|
| 35 |
+
_emit_progress(progress_callback, 'Converting downloaded audio to WAV...')
|
| 36 |
return os.path.join(temp_dir, f'{video_id}.wav')
|
| 37 |
|
| 38 |
+
def separate_tracks(input_wav, video_id, progress_callback=None):
|
| 39 |
if not os.path.exists(input_wav):
|
| 40 |
raise FileNotFoundError(f"{input_wav} does not exist")
|
| 41 |
|
| 42 |
output_dir = 'separated'
|
| 43 |
+
_emit_progress(progress_callback, 'Separating tracks with Demucs...')
|
| 44 |
subprocess.run(['demucs', '-n', 'htdemucs_6s', '--mp3', '--out', output_dir, input_wav], check=True)
|
| 45 |
|
| 46 |
+
base = os.path.join('.', output_dir, 'htdemucs_6s', video_id)
|
| 47 |
|
| 48 |
drums = f'{base}/drums.mp3'
|
| 49 |
vocals = f'{base}/vocals.mp3'
|
|
|
|
| 52 |
piano = f'{base}/piano.mp3'
|
| 53 |
other = f'{base}/other.mp3'
|
| 54 |
|
| 55 |
+
_emit_progress(progress_callback, 'Creating combined music stem...')
|
| 56 |
music = AudioSegment.from_mp3(bass).overlay(AudioSegment.from_mp3(other))
|
| 57 |
music_path = os.path.join(base, 'music.mp3')
|
| 58 |
music.export(music_path, format="mp3")
|
| 59 |
|
| 60 |
os.remove(input_wav)
|
| 61 |
+
_emit_progress(progress_callback, 'Separation complete.')
|
| 62 |
|
| 63 |
return drums, vocals, guitar, bass, other, piano, music_path
|
| 64 |
|
separated/favicon.ico
ADDED
|
|
specs/build.md
CHANGED
|
@@ -12,14 +12,18 @@ Docker Space (`Surn/SeparateTracks`).
|
|
| 12 |
|
| 13 |
| File | Status | Purpose |
|
| 14 |
|------|--------|---------|
|
| 15 |
-
| `
|
| 16 |
-
| `
|
|
|
|
|
|
|
| 17 |
| `modules/constants.py` | exists | Env vars, shared constants |
|
| 18 |
| `modules/version_info.py` | exists | Footer HTML with versions |
|
| 19 |
| `modules/file_utils.py` | exists | File helper utilities |
|
| 20 |
-
| `requirements.txt` |
|
| 21 |
-
| `dockerfile` |
|
| 22 |
-
| `.gitignore` |
|
|
|
|
|
|
|
| 23 |
|
| 24 |
---
|
| 25 |
|
|
@@ -107,105 +111,52 @@ CMD ["python", "app.py"]
|
|
| 107 |
|
| 108 |
---
|
| 109 |
|
| 110 |
-
## Step 4 — Create `app.py`
|
| 111 |
-
|
| 112 |
-
`app.py` is the missing entry point. It must:
|
| 113 |
-
|
| 114 |
-
1. Import and wrap `yt_audio_get_tracks.download_audio` and `separate_tracks`
|
| 115 |
-
2. Build a Gradio `gr.Blocks` interface
|
| 116 |
-
3. Use the `AudioGallery` custom component (per copilot-instructions.md)
|
| 117 |
-
4. Show footer via `modules/version_info.versions_html()`
|
| 118 |
-
5. Launch with `mcp_server=True` for MCP endpoint at `/gradio_api/mcp/sse`
|
| 119 |
-
|
| 120 |
-
### `app.py` — Skeleton
|
| 121 |
-
|
| 122 |
-
```python
|
| 123 |
-
# app.py
|
| 124 |
-
import os
|
| 125 |
-
import gradio as gr
|
| 126 |
-
from yt_audio_get_tracks import download_audio, separate_tracks
|
| 127 |
-
from modules.version_info import versions_html
|
| 128 |
-
|
| 129 |
-
CSS_TEMPLATE = """...""" # AudioGallery CSS
|
| 130 |
-
JS_ON_LOAD = """...""" # AudioGallery waveform JS
|
| 131 |
-
|
| 132 |
-
class AudioGallery(gr.HTML):
|
| 133 |
-
def __init__(self, audio_urls, *, value=None, labels=None,
|
| 134 |
-
columns=3, label=None, **kwargs):
|
| 135 |
-
# build HTML grid from template (see copilot-instructions.md)
|
| 136 |
-
...
|
| 137 |
-
super().__init__(value=html, label=label, **kwargs)
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
def process_video(video_id: str):
|
| 141 |
-
"""Download YouTube audio and return separated stems."""
|
| 142 |
-
url = f"https://www.youtube.com/watch?v={video_id}"
|
| 143 |
-
wav = download_audio(url, video_id)
|
| 144 |
-
drums, vocals, guitar, bass, other, piano, music = separate_tracks(wav, video_id)
|
| 145 |
-
return drums, vocals, guitar, bass, other, piano, music
|
| 146 |
|
|
|
|
| 147 |
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
run_btn.click(fn=process_video, inputs=video_id_input, outputs=audio_output)
|
| 160 |
-
|
| 161 |
-
if __name__ == "__main__":
|
| 162 |
-
demo.launch(mcp_server=True, server_name="0.0.0.0", server_port=7860)
|
| 163 |
-
```
|
| 164 |
|
| 165 |
---
|
| 166 |
|
| 167 |
-
## Step 5 — Implement `AudioGallery` Component
|
| 168 |
|
| 169 |
-
|
| 170 |
-
an audio grid with waveform canvases.
|
| 171 |
|
| 172 |
-
|
| 173 |
-
-
|
| 174 |
-
|
| 175 |
-
-
|
| 176 |
-
-
|
| 177 |
-
|
| 178 |
-
-
|
| 179 |
-
-
|
| 180 |
|
| 181 |
-
|
| 182 |
|
| 183 |
---
|
| 184 |
|
| 185 |
-
## Step 6 — MCP Server Integration
|
| 186 |
-
|
| 187 |
-
Gradio 5+ exposes MCP automatically at `/gradio_api/mcp/sse` when
|
| 188 |
-
`demo.launch(mcp_server=True)`.
|
| 189 |
-
|
| 190 |
-
Per copilot-instructions.md:
|
| 191 |
-
- Reference: https://huggingface.co/docs/hub/en/agents-mcp
|
| 192 |
-
- The `process_video` function becomes an MCP tool automatically
|
| 193 |
-
- Ensure function has a clear docstring (used as MCP tool description)
|
| 194 |
|
| 195 |
-
|
|
|
|
|
|
|
| 196 |
|
| 197 |
---
|
| 198 |
|
| 199 |
-
## Step 7 — Fix `modules/constants.py` for Local Dev
|
| 200 |
|
| 201 |
-
`
|
| 202 |
-
development without a `.env` file.
|
| 203 |
-
|
| 204 |
-
**Options (pick one):**
|
| 205 |
-
- A) Wrap the raise in a try/except and warn instead of crash (preferred for local)
|
| 206 |
-
- B) Set `HF_TOKEN` in `.env` (already done — just ensure `.env` is present)
|
| 207 |
-
|
| 208 |
-
Since `.env` exists with `HF_TOKEN`, Option B is sufficient. Ensure `.env` is
|
| 209 |
loaded before `constants.py` is imported.
|
| 210 |
|
| 211 |
**Note:** `constants.py` also imports `numpy` and `python-dotenv` — both must be
|
|
@@ -254,16 +205,18 @@ docker run -p 7860:7860 --env-file .env separatetracks
|
|
| 254 |
|
| 255 |
```
|
| 256 |
app.py
|
| 257 |
-
├──
|
|
|
|
|
|
|
| 258 |
│ ├── yt-dlp (pip)
|
| 259 |
│ ├── pydub (pip) → ffmpeg (apt)
|
| 260 |
│ └── demucs (pip) → torch (pip)
|
| 261 |
-
├── modules/constants.py
|
| 262 |
│ ├── python-dotenv (pip)
|
| 263 |
│ └── numpy (pip)
|
| 264 |
-
├── modules/version_info.py
|
| 265 |
-
│ └── gradio
|
| 266 |
-
└── modules/file_utils.py
|
| 267 |
├── Pillow (pip)
|
| 268 |
└── requests (pip)
|
| 269 |
```
|
|
@@ -278,7 +231,14 @@ app.py
|
|
| 278 |
| 2 | `requirements.txt` | Add gradio, dotenv, numpy, Pillow, requests | [x] |
|
| 279 |
| 3 | `dockerfile` | Add ffmpeg apt, fix pip installs | [x] |
|
| 280 |
| 4 | `app.py` | Create Gradio app with AudioGallery + MCP | [x] |
|
| 281 |
-
| 5 | `modules/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
|
| 283 |
---
|
| 284 |
|
|
|
|
| 12 |
|
| 13 |
| File | Status | Purpose |
|
| 14 |
|------|--------|---------|
|
| 15 |
+
| `app.py` | ✅ created | Gradio UI entry point + MCP server |
|
| 16 |
+
| `modules/AudioGallery.py` | ✅ created | `AudioGallery(gr.HTML)` — 7-stem audio grid |
|
| 17 |
+
| `modules/AudioGallery.pyi` | ✅ created | Type stub for AudioGallery |
|
| 18 |
+
| `modules/yt_audio_get_tracks.py` | ✅ moved + updated | `download_audio()` + `separate_tracks()` with progress callbacks |
|
| 19 |
| `modules/constants.py` | exists | Env vars, shared constants |
|
| 20 |
| `modules/version_info.py` | exists | Footer HTML with versions |
|
| 21 |
| `modules/file_utils.py` | exists | File helper utilities |
|
| 22 |
+
| `requirements.txt` | ✅ updated | gradio[mcp], python-dotenv, numpy, Pillow, requests added |
|
| 23 |
+
| `dockerfile` | ✅ updated | ffmpeg apt, git, proper pip install order |
|
| 24 |
+
| `.gitignore` | ✅ updated | `.env` entry added |
|
| 25 |
+
|
| 26 |
+
> **Removed:** Root-level `yt_audio_get_tracks.py` — replaced by `modules/yt_audio_get_tracks.py`.
|
| 27 |
|
| 28 |
---
|
| 29 |
|
|
|
|
| 111 |
|
| 112 |
---
|
| 113 |
|
| 114 |
+
## Step 4 — Create `app.py` ✅ COMPLETE
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
+
**Actual implementation** (differs from original skeleton):
|
| 117 |
|
| 118 |
+
- Imports from `modules.AudioGallery` and `modules.yt_audio_get_tracks`
|
| 119 |
+
- `SEPARATED_DIR = Path("separated").resolve()` — used in `allowed_paths`
|
| 120 |
+
- Two processing functions:
|
| 121 |
+
- `process_video(video_id)` — simple, MCP-exposed tool (returns HTML only)
|
| 122 |
+
- `process_video_with_progress(video_id)` — UI handler (returns `(html, status_text)`)
|
| 123 |
+
- UI: Video ID input + button → Progress textbox (6 lines) → AudioGallery HTML → footer
|
| 124 |
+
- Button wired to `process_video_with_progress` → `[audio_output, progress_output]`
|
| 125 |
+
- `demo.launch(mcp_server=True, allowed_paths=[str(SEPARATED_DIR)])`
|
| 126 |
+
- Audio URLs: `/file={Path(p).resolve()}` format for Gradio file serving
|
| 127 |
+
- Progress support can use `progress=gr.Progress(track_tqdm=True)` so the
|
| 128 |
+
handler can surface interactive progress while the stem pipeline runs.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
|
| 130 |
---
|
| 131 |
|
| 132 |
+
## Step 5 — Implement `AudioGallery` Component ✅ COMPLETE
|
| 133 |
|
| 134 |
+
**Actual implementation** — moved to `modules/AudioGallery.py`:
|
|
|
|
| 135 |
|
| 136 |
+
- `_CSS` — module-level string: `.audio-gallery-container/grid/item`, `.waveform-canvas`, `.audio-controls`, `.play-btn`, `.time-display`
|
| 137 |
+
- `_JS` — module-level string: IIFE with `setTimeout(initItems, 50)`, `drawWaveform()` (sine-modulated bars, 60 bars), play/pause mutual exclusion, time display
|
| 138 |
+
- `AudioGallery(gr.HTML)`:
|
| 139 |
+
- `DEFAULT_LABELS = ["Drums", "Vocals", "Guitar", "Bass", "Other", "Piano", "Music"]`
|
| 140 |
+
- `__init__(audio_urls, *, labels, columns=3, ...)` → calls `_build_html` → `super().__init__(value=html)`
|
| 141 |
+
- `_build_html(audio_urls, labels, columns)` — static method, returns inline `<style>+<div>+<script>` HTML
|
| 142 |
+
- `data-initialized="false"` guard prevents double event binding on Gradio re-renders
|
| 143 |
+
- Called in `app.py` via `AudioGallery._build_html(...)` directly (not full instantiation)
|
| 144 |
|
| 145 |
+
Also created: `modules/AudioGallery.pyi` — type stub
|
| 146 |
|
| 147 |
---
|
| 148 |
|
| 149 |
+
## Step 6 — MCP Server Integration ✅ COMPLETE
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
|
| 151 |
+
- `demo.launch(mcp_server=True)` → endpoint at `/gradio_api/mcp/sse`
|
| 152 |
+
- `process_video()` is the MCP-exposed tool (has full docstring)
|
| 153 |
+
- jCodeMunch MCP server also configured in `.claude/settings.json`
|
| 154 |
|
| 155 |
---
|
| 156 |
|
| 157 |
+
## Step 7 — Fix `modules/constants.py` for Local Dev ✅ COMPLETE
|
| 158 |
|
| 159 |
+
`.env` present with `HF_TOKEN` — no code change needed. Option B: ensure `.env` is
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
loaded before `constants.py` is imported.
|
| 161 |
|
| 162 |
**Note:** `constants.py` also imports `numpy` and `python-dotenv` — both must be
|
|
|
|
| 205 |
|
| 206 |
```
|
| 207 |
app.py
|
| 208 |
+
├── modules/AudioGallery.py
|
| 209 |
+
│ └── gradio (pip)
|
| 210 |
+
├── modules/yt_audio_get_tracks.py ← moved from root
|
| 211 |
│ ├── yt-dlp (pip)
|
| 212 |
│ ├── pydub (pip) → ffmpeg (apt)
|
| 213 |
│ └── demucs (pip) → torch (pip)
|
| 214 |
+
├── modules/constants.py (not imported by app.py directly)
|
| 215 |
│ ├── python-dotenv (pip)
|
| 216 |
│ └── numpy (pip)
|
| 217 |
+
├── modules/version_info.py (lazy import in _footer_html)
|
| 218 |
+
│ └── gradio + torch (pip)
|
| 219 |
+
└── modules/file_utils.py (not imported by app.py directly)
|
| 220 |
├── Pillow (pip)
|
| 221 |
└── requests (pip)
|
| 222 |
```
|
|
|
|
| 231 |
| 2 | `requirements.txt` | Add gradio, dotenv, numpy, Pillow, requests | [x] |
|
| 232 |
| 3 | `dockerfile` | Add ffmpeg apt, fix pip installs | [x] |
|
| 233 |
| 4 | `app.py` | Create Gradio app with AudioGallery + MCP | [x] |
|
| 234 |
+
| 5 | `modules/AudioGallery.py` | AudioGallery(gr.HTML) component | [x] |
|
| 235 |
+
| 6 | `modules/AudioGallery.pyi` | Type stub | [x] |
|
| 236 |
+
| 7 | `modules/yt_audio_get_tracks.py` | Moved from root + progress callbacks added | [x] |
|
| 237 |
+
| 8 | `.claude/settings.json` | jCodeMunch MCP server config | [x] |
|
| 238 |
+
| 9 | `modules/constants.py` | Verify local-safe (`.env` present — no code change needed) | [x] |
|
| 239 |
+
| 10 | Local run | Step 8 — verify `python app.py` works | [ ] |
|
| 240 |
+
| 11 | Docker build | Step 9 — verify `docker build` + `docker run` | [ ] |
|
| 241 |
+
| 12 | HF Space deploy | Step 10 — push to `Surn/SeparateTracks` | [ ] |
|
| 242 |
|
| 243 |
---
|
| 244 |
|
specs/test_gallery.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Test: AudioGallery file serving and UI flow for video ID f-H9bbi0Vyw.
|
| 3 |
+
Server must already be running on http://localhost:7860.
|
| 4 |
+
"""
|
| 5 |
+
import sys
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from playwright.sync_api import sync_playwright, expect
|
| 8 |
+
|
| 9 |
+
VIDEO_ID = "f-H9bbi0Vyw"
|
| 10 |
+
BASE_URL = "http://localhost:7860"
|
| 11 |
+
STEMS = ["bass", "drums", "guitar", "music", "other", "piano", "vocals"]
|
| 12 |
+
SEPARATED = Path("D:/Projects/SeparateTracks/separated/htdemucs_6s") / VIDEO_ID
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def test_file_endpoint(page):
|
| 16 |
+
"""Part 1: verify each /file= URL returns audio data (200 + audio MIME)."""
|
| 17 |
+
print("\n=== Part 1: /file= endpoint ===")
|
| 18 |
+
all_ok = True
|
| 19 |
+
for stem in STEMS:
|
| 20 |
+
path = (SEPARATED / f"{stem}.mp3").as_posix()
|
| 21 |
+
url = f"{BASE_URL}/file={path}"
|
| 22 |
+
resp = page.request.head(url)
|
| 23 |
+
status = resp.status
|
| 24 |
+
ct = resp.headers.get("content-type", "")
|
| 25 |
+
ok = status == 200 and "audio" in ct
|
| 26 |
+
symbol = "OK" if ok else "FAIL"
|
| 27 |
+
print(f" {symbol} {stem:8s} HTTP {status} {ct}")
|
| 28 |
+
if not ok:
|
| 29 |
+
all_ok = False
|
| 30 |
+
return all_ok
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def test_ui_flow(page):
|
| 34 |
+
"""Part 2: enter video ID, click button, wait for gallery, verify audio elements."""
|
| 35 |
+
print("\n=== Part 2: UI flow ===")
|
| 36 |
+
|
| 37 |
+
page.goto(BASE_URL)
|
| 38 |
+
page.wait_for_load_state("networkidle")
|
| 39 |
+
page.screenshot(path="specs/screenshots/01_initial.png", full_page=True)
|
| 40 |
+
print(" Screenshot: 01_initial.png")
|
| 41 |
+
|
| 42 |
+
# Fill in the video ID
|
| 43 |
+
textbox = page.get_by_label("YouTube Video ID")
|
| 44 |
+
textbox.fill(VIDEO_ID)
|
| 45 |
+
page.screenshot(path="specs/screenshots/02_filled.png", full_page=True)
|
| 46 |
+
print(f" Entered video ID: {VIDEO_ID}")
|
| 47 |
+
|
| 48 |
+
# Click Separate Tracks
|
| 49 |
+
page.get_by_role("button", name="Separate Tracks").click()
|
| 50 |
+
print(" Clicked 'Separate Tracks' — waiting for pipeline (CPU may take ~10 min)…")
|
| 51 |
+
|
| 52 |
+
# Wait for the AudioGallery HTML to appear (long timeout for CPU demucs)
|
| 53 |
+
try:
|
| 54 |
+
page.wait_for_selector(".audio-gallery-container", timeout=720_000)
|
| 55 |
+
except Exception:
|
| 56 |
+
page.screenshot(path="specs/screenshots/03_timeout.png", full_page=True)
|
| 57 |
+
print(" ❌ Timed out waiting for .audio-gallery-container")
|
| 58 |
+
return False
|
| 59 |
+
|
| 60 |
+
page.screenshot(path="specs/screenshots/03_gallery.png", full_page=True)
|
| 61 |
+
print(" Screenshot: 03_gallery.png")
|
| 62 |
+
|
| 63 |
+
# Count audio elements
|
| 64 |
+
audio_els = page.locator("audio").all()
|
| 65 |
+
print(f" Found {len(audio_els)} <audio> element(s)")
|
| 66 |
+
|
| 67 |
+
# Check each audio src
|
| 68 |
+
all_ok = True
|
| 69 |
+
for i, el in enumerate(audio_els):
|
| 70 |
+
src = el.get_attribute("src") or ""
|
| 71 |
+
# Verify src ends in .mp3 and contains the video ID or /file=
|
| 72 |
+
ok = ".mp3" in src and ("/file=" in src or VIDEO_ID in src)
|
| 73 |
+
symbol = "OK" if ok else "FAIL"
|
| 74 |
+
print(f" {symbol} audio[{i}] src={src[:80]}")
|
| 75 |
+
if not ok:
|
| 76 |
+
all_ok = False
|
| 77 |
+
|
| 78 |
+
# Verify progress textbox shows "Done."
|
| 79 |
+
progress = page.get_by_label("Progress").input_value()
|
| 80 |
+
done_ok = "Done." in progress or "Separation complete" in progress
|
| 81 |
+
print(f" {'OK' if done_ok else 'FAIL'} Progress box: {progress[-60:].strip()!r}")
|
| 82 |
+
|
| 83 |
+
return all_ok and len(audio_els) == 7
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def main():
|
| 87 |
+
Path("specs/screenshots").mkdir(parents=True, exist_ok=True)
|
| 88 |
+
|
| 89 |
+
with sync_playwright() as p:
|
| 90 |
+
browser = p.chromium.launch(headless=True)
|
| 91 |
+
page = browser.new_page()
|
| 92 |
+
|
| 93 |
+
endpoint_ok = test_file_endpoint(page)
|
| 94 |
+
ui_ok = test_ui_flow(page)
|
| 95 |
+
|
| 96 |
+
browser.close()
|
| 97 |
+
|
| 98 |
+
print("\n=== Summary ===")
|
| 99 |
+
print(f" /file= endpoint: {'PASS' if endpoint_ok else 'FAIL'}")
|
| 100 |
+
print(f" UI flow: {'PASS' if ui_ok else 'FAIL'}")
|
| 101 |
+
sys.exit(0 if (endpoint_ok and ui_ok) else 1)
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
if __name__ == "__main__":
|
| 105 |
+
main()
|