anggars commited on
Commit
6319e2f
·
1 Parent(s): 18eaef5

Initial HF Space deployment

Browse files
Dockerfile ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use Python 3.10 with CUDA support for GPU
2
+ FROM python:3.10-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Install system dependencies
8
+ RUN apt-get update && apt-get install -y \
9
+ ffmpeg \
10
+ libsndfile1 \
11
+ git \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ # Copy requirements first for caching
15
+ COPY requirements.txt .
16
+
17
+ # Install Python dependencies
18
+ RUN pip install --no-cache-dir -r requirements.txt
19
+
20
+ # Copy application code
21
+ COPY . .
22
+
23
+ # Create directories for uploads and processed files
24
+ RUN mkdir -p uploads processed
25
+
26
+ # Expose port 7860 (HF Spaces default)
27
+ EXPOSE 7860
28
+
29
+ # Run the application
30
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,11 +1,25 @@
1
  ---
2
  title: Tunebase
3
- emoji: 🐠
4
- colorFrom: green
5
- colorTo: blue
6
  sdk: docker
7
  pinned: false
8
  license: mit
9
  ---
10
 
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: Tunebase
3
+ emoji: 🎵
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: docker
7
  pinned: false
8
  license: mit
9
  ---
10
 
11
+ # Tunebase - AI Audio Stem Separation
12
+
13
+ High-fidelity stem separation for musicians using Demucs.
14
+
15
+ ## Features
16
+
17
+ - 2-stem (Vocals + Instruments)
18
+ - 4-stem (Vocals, Drums, Bass, Other)
19
+ - 6-stem (+ Guitar, Piano split)
20
+
21
+ ## API Endpoints
22
+
23
+ - `POST /upload` - Upload audio file
24
+ - `POST /process/{file_id}?mode=4stem` - Process with specified mode
25
+ - `GET /status/{file_id}` - Check processing status
debug_audio.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchaudio
3
+ import soundfile
4
+ import os
5
+
6
+ print(f"Torch version: {torch.__version__}")
7
+ print(f"Torchaudio version: {torchaudio.__version__}")
8
+ print(f"Soundfile version: {soundfile.__version__}")
9
+
10
+ print("\nChecking available backends:")
11
+ try:
12
+ print(f"List audio backends: {torchaudio.list_audio_backends()}")
13
+ except:
14
+ print("torchaudio.list_audio_backends() not available")
15
+
16
+ try:
17
+ print(f"Get audio backend: {torchaudio.get_audio_backend()}")
18
+ except:
19
+ pass
20
+
21
+ print("\nTest writing and reading:")
22
+ test_file = "test_audio.wav"
23
+ try:
24
+ # Generate dummy audio
25
+ waveform = torch.rand(1, 16000)
26
+ sample_rate = 16000
27
+
28
+ print(f"Saving {test_file} with backend='soundfile'...")
29
+ torchaudio.save(test_file, waveform, sample_rate, backend="soundfile")
30
+ print("Save success.")
31
+
32
+ print(f"Loading {test_file} with backend='soundfile'...")
33
+ loaded_wav, loaded_sr = torchaudio.load(test_file, backend="soundfile")
34
+ print(f"Load success. Shape: {loaded_wav.shape}")
35
+
36
+ except Exception as e:
37
+ import traceback
38
+ traceback.print_exc()
39
+ print(f"Test failed: {e}")
40
+ finally:
41
+ if os.path.exists(test_file):
42
+ os.remove(test_file)
main.py ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File, BackgroundTasks, HTTPException
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from fastapi.staticfiles import StaticFiles
4
+ import shutil
5
+ import os
6
+ import uuid
7
+ from services.audio_separator import AudioSeparator
8
+ from services.chord_analyzer import ChordAnalyzer
9
+ from services.midi_converter import MidiConverter
10
+ from services.beat_tracker import BeatTracker
11
+
12
+ app = FastAPI(title="Tunebase AI Engine", description="High-performance audio separation for Math Rock", version="1.0.0")
13
+
14
+ # Setup CORS
15
+ origins = ["*"] # Allow all for dev
16
+ app.add_middleware(
17
+ CORSMiddleware,
18
+ allow_origins=origins,
19
+ allow_credentials=True,
20
+ allow_methods=["*"],
21
+ allow_headers=["*"],
22
+ )
23
+
24
+ # Directories
25
+ UPLOAD_DIR = "uploads"
26
+ PROCESSED_DIR = "processed"
27
+ os.makedirs(UPLOAD_DIR, exist_ok=True)
28
+ os.makedirs(PROCESSED_DIR, exist_ok=True)
29
+
30
+ # Mount statis files agar frontend bisa play hasil audio
31
+ app.mount("/processed", StaticFiles(directory=PROCESSED_DIR), name="processed")
32
+ # Mount uploads for verification if needed, but risky. Processed is enough.
33
+
34
+ # Initialize Services
35
+ # Warning: Loading models takes time and RAM.
36
+ try:
37
+ separator = AudioSeparator()
38
+ analyzer = ChordAnalyzer()
39
+ midi_converter = MidiConverter()
40
+ beat_tracker = BeatTracker()
41
+ except Exception as e:
42
+ print(f"Warning: Failed to load models on startup. {e}")
43
+ import traceback
44
+ traceback.print_exc()
45
+ separator = None
46
+ analyzer = None
47
+ midi_converter = None
48
+ beat_tracker = None
49
+
50
+ @app.get("/")
51
+ def read_root():
52
+ return {"message": "Tunebase AI Engine Ready 🎸"}
53
+
54
+ @app.post("/upload")
55
+ async def upload_audio(background_tasks: BackgroundTasks, file: UploadFile = File(...)):
56
+ """
57
+ Upload file audio dan mulai proses separasi di background.
58
+ """
59
+ file_id = str(uuid.uuid4())
60
+ file_ext = file.filename.split(".")[-1]
61
+ file_path = os.path.join(UPLOAD_DIR, f"{file_id}.{file_ext}")
62
+
63
+ with open(file_path, "wb") as buffer:
64
+ shutil.copyfileobj(file.file, buffer)
65
+
66
+ return {"id": file_id, "filename": file.filename, "status": "uploaded"}
67
+
68
+ # Cleanup Task
69
+ import time
70
+ import shutil
71
+
72
+ def cleanup_old_files():
73
+ """
74
+ Menghapus file di folder PROCESSED_DIR yang lebih tua dari 1 jam
75
+ untuk menghemat disk space di Hugging Face Spaces.
76
+ """
77
+ print("Running cleanup task...")
78
+ now = time.time()
79
+ cutoff = now - 3600 # 1 hour (3600 seconds)
80
+
81
+ for folder in os.listdir(PROCESSED_DIR):
82
+ folder_path = os.path.join(PROCESSED_DIR, folder)
83
+ if os.path.isdir(folder_path):
84
+ try:
85
+ # Check creation/modify time
86
+ mtime = os.path.getmtime(folder_path)
87
+ if mtime < cutoff:
88
+ print(f"Deleting old session: {folder}")
89
+ shutil.rmtree(folder_path)
90
+ except Exception as e:
91
+ print(f"Error cleaning {folder}: {e}")
92
+
93
+ # Global Progress Store (Simple in-memory)
94
+ processing_status = {}
95
+
96
+ @app.get("/status/{file_id}")
97
+ async def get_status(file_id: str):
98
+ return processing_status.get(file_id, {"status": "unknown", "progress": 0, "step": "Waiting"})
99
+
100
+ def update_progress(file_id, step, progress, status="processing", data=None):
101
+ state = {
102
+ "status": status,
103
+ "progress": progress,
104
+ "step": step
105
+ }
106
+ if data:
107
+ state.update(data)
108
+ processing_status[file_id] = state
109
+
110
+ @app.post("/process/{file_id}")
111
+ def process_audio(file_id: str, background_tasks: BackgroundTasks, mode: str = "4stem"):
112
+ """
113
+ Trigger separasi dan analisis.
114
+ Modes: 2stem (vocals+instruments), 4stem (default), 6stem (full)
115
+ """
116
+ if not separator:
117
+ raise HTTPException(status_code=503, detail="AI Engine not initialized")
118
+
119
+ # Cari file
120
+ input_path = None
121
+ # Simple search
122
+ for f in os.listdir(UPLOAD_DIR):
123
+ if f.startswith(file_id):
124
+ input_path = os.path.join(UPLOAD_DIR, f)
125
+ break
126
+
127
+ if not input_path:
128
+ raise HTTPException(status_code=404, detail="File not found")
129
+
130
+ output_dir = os.path.join(PROCESSED_DIR, file_id)
131
+
132
+ # Dynamic base URL for HF Space vs localhost
133
+ space_host = os.environ.get("SPACE_HOST")
134
+ if space_host:
135
+ base_url = f"https://{space_host}/processed"
136
+ else:
137
+ base_url = "http://localhost:8000/processed"
138
+
139
+ try:
140
+ update_progress(file_id, "Separating Audio Stems...", 10)
141
+
142
+ # 1. Separate Audio with specified mode
143
+ def progress_cb(step, prog):
144
+ update_progress(file_id, step, prog)
145
+
146
+ stems = separator.separate(input_path, output_dir, callback=progress_cb, mode=mode)
147
+
148
+ update_progress(file_id, "Analyzing Rhythm...", 50)
149
+
150
+ # 3. MIDI & Beat Analysis
151
+ update_progress(file_id, "Converting to MIDI...", 70)
152
+ midi_files = {}
153
+ if midi_converter:
154
+ # Convert separate stems to MIDI (e.g., Piano, Bass, Guitar)
155
+ # Basic Pitch works best on monophonic/polyphonic instruments, less on drums
156
+ target_stems = ['piano', 'bass', 'guitar_rhythm', 'guitar_lead', 'vocals']
157
+ for stem_name in target_stems:
158
+ if stem_name in stems:
159
+ stem_path = stems[stem_name]
160
+ midi_out = os.path.join(output_dir, f"{stem_name}.mid")
161
+ if midi_converter.convert(stem_path, midi_out):
162
+ midi_files[stem_name] = f"{base_url}/{file_id}/{stem_name}.mid"
163
+
164
+ update_progress(file_id, "Analyzing Rhythm...", 85)
165
+ bpm = 0
166
+ beats = []
167
+ if beat_tracker:
168
+ # Use Drums for beat tracking if available, otherwise 'other' or input
169
+ beat_source = stems.get('drums') or stems.get('other') or input_path
170
+ rhythm_data = beat_tracker.track(beat_source)
171
+ bpm = rhythm_data['bpm']
172
+ beats = rhythm_data['beats']
173
+
174
+ update_progress(file_id, "Finalizing...", 95)
175
+
176
+ update_progress(file_id, "Finalizing...", 95)
177
+
178
+ # Construct full URLs for frontend
179
+ stems_url = {k: f"{base_url}/{file_id}/{os.path.basename(v)}" for k, v in stems.items()}
180
+
181
+ final_data = {
182
+ "stems": stems_url,
183
+ "midi": midi_files,
184
+ "bpm": bpm,
185
+ "beats": beats
186
+ }
187
+ print(f"Final Data for {file_id}: {final_data}") # Debug
188
+ update_progress(file_id, "Completed", 100, status="completed", data=final_data)
189
+
190
+ return {
191
+ "status": "completed",
192
+ **final_data
193
+ }
194
+
195
+ except Exception as e:
196
+ import traceback
197
+ traceback.print_exc()
198
+ processing_status[file_id] = {"status": "error", "error": str(e)}
199
+ print(f"Error processing: {e}")
200
+ raise HTTPException(status_code=500, detail=str(e))
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ python-multipart
4
+ torch
5
+ torchaudio
6
+ demucs
7
+ librosa
8
+ numpy
9
+ scipy
10
+ soundfile
11
+ basic-pitch
services/__pycache__/audio_separator.cpython-310.pyc ADDED
Binary file (5.41 kB). View file
 
services/__pycache__/audio_separator.cpython-312.pyc ADDED
Binary file (4.71 kB). View file
 
services/__pycache__/beat_tracker.cpython-310.pyc ADDED
Binary file (1.37 kB). View file
 
services/__pycache__/chord_analyzer.cpython-310.pyc ADDED
Binary file (3.85 kB). View file
 
services/__pycache__/chord_analyzer.cpython-312.pyc ADDED
Binary file (4.32 kB). View file
 
services/__pycache__/midi_converter.cpython-310.pyc ADDED
Binary file (1.68 kB). View file
 
services/audio_separator.py ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchaudio
3
+
4
+ # Force backend to soundfile to avoid TorchCodec requirement on Windows
5
+ try:
6
+ torchaudio.set_audio_backend("soundfile")
7
+ except:
8
+ pass # In newer versions it might be automatic or different, but this is a safe attempt
9
+
10
+ from demucs.apply import apply_model
11
+ from demucs.pretrained import get_model
12
+ import os
13
+ import pathlib
14
+
15
+ # Konfigurasi Model
16
+ # Support multiple models for different separation modes
17
+ MODELS = {
18
+ "2stem": "htdemucs", # Will merge to 2 stems after
19
+ "4stem": "htdemucs", # Default 4 stem (drums, bass, vocals, other)
20
+ "6stem": "htdemucs_6s", # Full 6 stem with guitar and piano
21
+ }
22
+
23
+ class AudioSeparator:
24
+ def __init__(self):
25
+ # Load all models on startup
26
+ self.models = {}
27
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
28
+
29
+ # Load unique models only
30
+ unique_models = set(MODELS.values())
31
+ for model_name in unique_models:
32
+ print(f"Loading Demucs Model: {model_name}...")
33
+ model = get_model(model_name)
34
+ model.to(self.device)
35
+ self.models[model_name] = model
36
+ print(f"All models loaded on {self.device}")
37
+
38
+ def separate(self, audio_path: str, output_dir: str, callback=None, mode="4stem"):
39
+ """
40
+ Memisahkan file audio menjadi stems.
41
+ Modes: 2stem, 4stem, 6stem
42
+ """
43
+ # Select model based on mode
44
+ model_name = MODELS.get(mode, "htdemucs")
45
+ model = self.models[model_name]
46
+
47
+ # Load audio using soundfile directly (bypass torchaudio backend issues)
48
+ import soundfile as sf
49
+ wav_np, sr = sf.read(audio_path)
50
+
51
+ # Convert to tensor
52
+ wav = torch.from_numpy(wav_np).float()
53
+
54
+ if wav.ndim == 1:
55
+ wav = wav.unsqueeze(0)
56
+ else:
57
+ wav = wav.t()
58
+
59
+ # Resample if needed
60
+ if sr != 44100:
61
+ if callback: callback("Resampling audio...", 15)
62
+ resampler = torchaudio.transforms.Resample(sr, 44100)
63
+ wav = resampler(wav)
64
+ sr = 44100
65
+
66
+ wav = wav.unsqueeze(0).to(self.device)
67
+
68
+ # Apply model
69
+ ref = wav.mean(0)
70
+ wav = (wav - ref.mean()) / ref.std()
71
+
72
+ if callback: callback("Running Demucs Inference...", 20)
73
+ print(f"Starting separation with {model_name} (mode: {mode})...")
74
+
75
+ sources = apply_model(model, wav, shifts=1, split=True, overlap=0.25, progress=True)[0]
76
+ source_names = model.sources
77
+
78
+ results = {}
79
+ os.makedirs(output_dir, exist_ok=True)
80
+
81
+ total_sources = len(source_names)
82
+ source_tensors = {name: source for name, source in zip(source_names, sources)}
83
+
84
+ # Handle different modes
85
+ if mode == "2stem":
86
+ # Merge to Vocals + Instruments
87
+ if callback: callback("Merging to 2 stems...", 45)
88
+
89
+ vocals = source_tensors.get('vocals')
90
+ instruments = None
91
+ for name, src in source_tensors.items():
92
+ if name != 'vocals':
93
+ if instruments is None:
94
+ instruments = src.clone()
95
+ else:
96
+ instruments += src
97
+
98
+ if vocals is not None:
99
+ self._save_audio(vocals, sr, os.path.join(output_dir, "vocals.mp3"))
100
+ results['vocals'] = os.path.join(output_dir, "vocals.mp3")
101
+ if instruments is not None:
102
+ self._save_audio(instruments, sr, os.path.join(output_dir, "instruments.mp3"))
103
+ results['instruments'] = os.path.join(output_dir, "instruments.mp3")
104
+
105
+ elif mode == "6stem":
106
+ # Full separation with guitar processing
107
+ for i, (name, source) in enumerate(source_tensors.items()):
108
+ progress = 30 + int((i / total_sources) * 20)
109
+ if callback: callback(f"Saving stem: {name}", progress)
110
+
111
+ if name == 'guitar':
112
+ results.update(self._process_guitar(source, sr, output_dir))
113
+ else:
114
+ stem_path = os.path.join(output_dir, f"{name}.mp3")
115
+ self._save_audio(source, sr, stem_path)
116
+ results[name] = stem_path
117
+ else:
118
+ # 4stem - standard separation
119
+ for i, (name, source) in enumerate(source_tensors.items()):
120
+ progress = 30 + int((i / total_sources) * 20)
121
+ if callback: callback(f"Saving stem: {name}", progress)
122
+
123
+ stem_path = os.path.join(output_dir, f"{name}.mp3")
124
+ self._save_audio(source, sr, stem_path)
125
+ results[name] = stem_path
126
+
127
+ return results
128
+
129
+ def _process_guitar(self, source, sr, output_dir):
130
+ """
131
+ Memisahkan stem gitar menjadi Lead dan Rhythm menggunakan Mid-Side processing.
132
+ - Mid (center) = Rhythm (biasanya power chords, strumming di center)
133
+ - Side (stereo difference) = Lead (biasanya di-pan atau dengan stereo effects)
134
+ """
135
+ # source shape: (2, samples)
136
+
137
+ # Check integrity
138
+ if source.shape[0] < 2:
139
+ print("Warning: Guitar stem is Mono. Cannot split Rhythm/Lead.")
140
+ path = os.path.join(output_dir, "guitar.mp3")
141
+ self._save_audio(source, sr, path)
142
+ return {"guitar_rhythm": path, "guitar_lead": path}
143
+
144
+ # MID-SIDE PROCESSING
145
+ # Mid = (L + R) / 2 -> Center content (usually rhythm)
146
+ # Side = (L - R) / 2 -> Stereo difference (usually lead)
147
+ left = source[0:1, :] # (1, samples)
148
+ right = source[1:2, :] # (1, samples)
149
+
150
+ mid = (left + right) / 2.0 # Center content -> Rhythm
151
+ side = (left - right) / 2.0 # Stereo diff -> Lead
152
+
153
+ # Apply subtle frequency filtering for better separation
154
+ # Rhythm: Emphasize low-mid (100-2000Hz) for chunky rhythm tones
155
+ # Lead: Emphasize mid-high (800-8000Hz) for melodic clarity
156
+ try:
157
+ import scipy.signal as signal
158
+
159
+ # Design filters
160
+ nyquist = sr / 2
161
+
162
+ # Rhythm: Low-pass + slight presence boost (keep fundamentals)
163
+ rhythm_lowcut = 80 / nyquist
164
+ rhythm_highcut = 4000 / nyquist
165
+ b_rhythm, a_rhythm = signal.butter(4, [rhythm_lowcut, rhythm_highcut], btype='band')
166
+
167
+ # Lead: Band-pass for melodic range
168
+ lead_lowcut = 200 / nyquist
169
+ lead_highcut = 8000 / nyquist
170
+ b_lead, a_lead = signal.butter(4, [lead_lowcut, lead_highcut], btype='band')
171
+
172
+ # Apply filters
173
+ mid_filtered = signal.filtfilt(b_rhythm, a_rhythm, mid.numpy())
174
+ side_filtered = signal.filtfilt(b_lead, a_lead, side.numpy())
175
+
176
+ mid = torch.from_numpy(mid_filtered).float()
177
+ side = torch.from_numpy(side_filtered).float()
178
+ except Exception as e:
179
+ print(f"Warning: Frequency filtering failed ({e}), using raw Mid-Side")
180
+
181
+ # Make stereo for output (center both)
182
+ # SWAPPED: Side = Rhythm (strumming often panned wide), Mid = Lead (melody often center)
183
+ rhythm_stereo = torch.cat([side, side], dim=0)
184
+ lead_stereo = torch.cat([mid, mid], dim=0)
185
+
186
+ # If side is too quiet (song has no stereo separation), mix some mid into lead
187
+ side_rms = torch.sqrt(torch.mean(side ** 2))
188
+ mid_rms = torch.sqrt(torch.mean(mid ** 2))
189
+
190
+ if side_rms < mid_rms * 0.1: # Side is <10% of mid -> almost mono mix
191
+ print("Notice: Audio appears to be mostly mono. Rhythm separation may be limited.")
192
+ # Create pseudo-separation using low frequencies for rhythm
193
+ try:
194
+ rhythm_lowpass = 2000 / nyquist
195
+ b_lp, a_lp = signal.butter(4, rhythm_lowpass, btype='low')
196
+ rhythm_from_mid = signal.filtfilt(b_lp, a_lp, mid.numpy())
197
+ rhythm_stereo = torch.from_numpy(rhythm_from_mid).float()
198
+ rhythm_stereo = torch.cat([rhythm_stereo, rhythm_stereo], dim=0)
199
+ except:
200
+ pass
201
+
202
+ # Normalize to -3dB to prevent clipping
203
+ def normalize(tensor):
204
+ peak = tensor.abs().max()
205
+ if peak > 0:
206
+ target_peak = 0.707 # -3dB
207
+ return tensor * (target_peak / peak)
208
+ return tensor
209
+
210
+ rhythm_stereo = normalize(rhythm_stereo)
211
+ lead_stereo = normalize(lead_stereo)
212
+
213
+ # Save files
214
+ path_rhythm = os.path.join(output_dir, "guitar_rhythm.mp3")
215
+ path_lead = os.path.join(output_dir, "guitar_lead.mp3")
216
+
217
+ self._save_audio(rhythm_stereo, sr, path_rhythm)
218
+ self._save_audio(lead_stereo, sr, path_lead)
219
+
220
+ return {
221
+ "guitar_rhythm": path_rhythm,
222
+ "guitar_lead": path_lead
223
+ }
224
+
225
+ def _save_audio(self, source, sr, path):
226
+ # source is tensor (channels, samples) on device
227
+ # Move to cpu
228
+ source = source.cpu()
229
+ source = source.cpu()
230
+ # Save using soundfile
231
+ # source is (channels, samples) -> need (samples, channels)
232
+ import soundfile as sf
233
+ sf.write(path, source.t().numpy(), sr)
services/beat_tracker.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import librosa
2
+ import numpy as np
3
+
4
+ class BeatTracker:
5
+ def __init__(self):
6
+ pass
7
+
8
+ def track(self, audio_path: str):
9
+ """
10
+ Track beats and tempo from audio file.
11
+ Returns: {
12
+ "bpm": float,
13
+ "beats": list[float] # timestamps
14
+ }
15
+ """
16
+ try:
17
+ print(f"Tracking beats for {audio_path}...")
18
+ y, sr = librosa.load(audio_path)
19
+
20
+ # Estimate tempo and beats
21
+ tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
22
+
23
+ # Log output to be sure
24
+ # tempo is usually a scalar, but sometimes an array in older versions
25
+ if isinstance(tempo, np.ndarray):
26
+ tempo = tempo[0]
27
+
28
+ beat_times = librosa.frames_to_time(beat_frames, sr=sr)
29
+
30
+ return {
31
+ "bpm": round(float(tempo), 2),
32
+ "beats": [round(float(t), 2) for t in beat_times]
33
+ }
34
+
35
+ except Exception as e:
36
+ print(f"Error tracking beats: {e}")
37
+ return {"bpm": 0, "beats": []}
services/chord_analyzer.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import librosa
2
+ import numpy as np
3
+
4
+ class ChordAnalyzer:
5
+ def __init__(self):
6
+ # Template Chord - Prioritize basic triads for accuracy
7
+ self.templates = self._generate_chord_templates()
8
+
9
+ def _generate_chord_templates(self):
10
+ """
11
+ Membuat template chroma untuk berbagai jenis chord.
12
+ 12 Nada: C, C#, D, D#, E, F, F#, G, G#, A, A#, B
13
+ PRIORITIZED: Basic Major/Minor triads have higher matching priority
14
+ """
15
+ templates = {}
16
+ roots = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
17
+
18
+ # Define chord qualities with PRIORITY WEIGHTS
19
+ # Higher weight = more likely to be matched
20
+ # Format: (intervals, priority_boost)
21
+ qualities = {
22
+ # PRIORITY 1: Basic triads (most common, highest priority)
23
+ '': ([0, 4, 7], 1.3), # Major - HIGHEST priority
24
+ 'm': ([0, 3, 7], 1.3), # Minor - HIGHEST priority
25
+
26
+ # PRIORITY 2: Power chord & Suspended
27
+ '5': ([0, 7], 1.1), # Power chord
28
+ 'sus4': ([0, 5, 7], 1.0),
29
+ 'sus2': ([0, 2, 7], 1.0),
30
+
31
+ # PRIORITY 3: 7th chords
32
+ 'maj7': ([0, 4, 7, 11], 0.95),
33
+ 'm7': ([0, 3, 7, 10], 0.95),
34
+ '7': ([0, 4, 7, 10], 0.95), # Dominant 7
35
+
36
+ # PRIORITY 4: Extended & Other (lower priority to avoid false matches)
37
+ 'dim': ([0, 3, 6], 0.9),
38
+ 'aug': ([0, 4, 8], 0.9),
39
+ '6': ([0, 4, 7, 9], 0.85),
40
+ 'm6': ([0, 3, 7, 9], 0.85),
41
+ 'add9': ([0, 4, 7, 2], 0.85),
42
+ 'madd9': ([0, 3, 7, 2], 0.85),
43
+ }
44
+
45
+ for i, root in enumerate(roots):
46
+ for quality, (intervals, priority) in qualities.items():
47
+ # Build chroma vector with weighted notes
48
+ vec = np.zeros(12)
49
+ for j, interval in enumerate(intervals):
50
+ idx = (i + interval) % 12
51
+ # Root = 2.0, Fifth = 1.5, Third = 1.2, Others = 1.0
52
+ if j == 0: # Root
53
+ weight = 2.0
54
+ elif interval == 7: # Fifth
55
+ weight = 1.5
56
+ elif interval in [3, 4]: # Third (major or minor)
57
+ weight = 1.2
58
+ else:
59
+ weight = 1.0
60
+ vec[idx] = weight
61
+
62
+ # Apply priority boost
63
+ vec *= priority
64
+
65
+ chord_name = f"{root}{quality}"
66
+ # Normalize vector
67
+ norm = np.linalg.norm(vec)
68
+ if norm > 0:
69
+ vec /= norm
70
+ templates[chord_name] = vec
71
+
72
+ return templates
73
+
74
+ def analyze(self, audio_path: str, sr=22050):
75
+ """
76
+ Menganalisis file audio dan mengembalikan progresi chord dengan timestamp.
77
+ """
78
+ print(f"Analyzing chords for: {audio_path}")
79
+ try:
80
+ y, sr = librosa.load(audio_path, sr=sr)
81
+
82
+ # Harmonic-Percussive Source Separation
83
+ y_harmonic, _ = librosa.effects.hpss(y)
84
+
85
+ # Compute Chroma CQT
86
+ chroma = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr, bins_per_octave=24)
87
+
88
+ # 1. TEMPORAL SMOOTHING (Median Filter)
89
+ # Increased filter size for more stability (21 frames ~= 0.5s)
90
+ import scipy.ndimage
91
+ chroma = scipy.ndimage.median_filter(chroma, size=(1, 21))
92
+ chroma = librosa.util.normalize(chroma)
93
+
94
+ num_frames = chroma.shape[1]
95
+
96
+ # Template matching
97
+ template_names = list(self.templates.keys())
98
+ template_matrix = np.array([self.templates[name] for name in template_names])
99
+ scores = np.dot(template_matrix, chroma)
100
+
101
+ max_indices = np.argmax(scores, axis=0)
102
+ max_scores = np.max(scores, axis=0)
103
+
104
+ # 2. POST-PROCESSING (Merge Short Segments)
105
+ current_chord = None
106
+ start_time = 0.0
107
+
108
+ THRESHOLD = 0.6 # Lower threshold for basic chord detection
109
+ MIN_DURATION = 0.8 # Chord must last 0.8s to be valid
110
+
111
+ raw_segments = []
112
+
113
+ # First Pass: Collect segments
114
+ for i in range(num_frames):
115
+ idx = max_indices[i]
116
+ score = max_scores[i]
117
+ timestamp = librosa.frames_to_time(i, sr=sr)
118
+ chord_name = template_names[idx] if score > THRESHOLD else "N.C."
119
+
120
+ if chord_name != current_chord:
121
+ if current_chord is not None:
122
+ raw_segments.append({
123
+ "chord": current_chord,
124
+ "start": start_time,
125
+ "end": timestamp,
126
+ "duration": timestamp - start_time
127
+ })
128
+ current_chord = chord_name
129
+ start_time = timestamp
130
+
131
+ # Append last
132
+ if current_chord is not None:
133
+ end_time = librosa.get_duration(y=y, sr=sr)
134
+ raw_segments.append({
135
+ "chord": current_chord,
136
+ "start": start_time,
137
+ "end": end_time,
138
+ "duration": end_time - start_time
139
+ })
140
+
141
+ # Second Pass: Merge short segments to neighbor
142
+ final_results = []
143
+ if not raw_segments: return []
144
+
145
+ # Simple heuristic: If segment < MIN_DURATION, merge to previous if possible
146
+ for seg in raw_segments:
147
+ if not final_results:
148
+ final_results.append(seg)
149
+ continue
150
+
151
+ prev = final_results[-1]
152
+
153
+ # Jika segmen sekarang terlalu pendek, "makan" oleh segmen sebelumnya (atau abaikan)
154
+ # TAPI jika chord-nya SAMA dengan sebelumnya, gabung saja.
155
+ if seg["chord"] == prev["chord"]:
156
+ prev["end"] = seg["end"]
157
+ prev["duration"] += seg["duration"]
158
+ elif seg["duration"] < MIN_DURATION:
159
+ # Merge to previous (extend previous to cover this short blip)
160
+ prev["end"] = seg["end"]
161
+ prev["duration"] += seg["duration"]
162
+ else:
163
+ final_results.append(seg)
164
+
165
+ # Format output logic (remove internal keys if needed)
166
+ formatted_results = []
167
+ for r in final_results:
168
+ formatted_results.append({
169
+ "chord": r["chord"],
170
+ "start": round(r["start"], 2),
171
+ "end": round(r["end"], 2)
172
+ })
173
+
174
+ return formatted_results
175
+
176
+ except Exception as e:
177
+ print(f"Chord Analysis Error: {e}")
178
+ return []
services/midi_converter.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from basic_pitch.inference import predict_and_save
2
+ from basic_pitch import ICASSP_2022_MODEL_PATH
3
+ import os
4
+
5
+ class MidiConverter:
6
+ def __init__(self):
7
+ print("Initializing Basic Pitch for MIDI conversion...")
8
+ self.output_dir = "midi_output"
9
+
10
+ def convert(self, audio_path: str, output_path: str):
11
+ """
12
+ Convert audio file to MIDI using Basic Pitch.
13
+ """
14
+ try:
15
+ # basic-pitch handles loading internally
16
+ # It saves multiple files (mid, csv, npz), we only care about mid
17
+
18
+ # output_path should be a directory or file?
19
+ # predict_and_save takes output_directory
20
+
21
+ output_dir = os.path.dirname(output_path)
22
+ file_name_no_ext = os.path.splitext(os.path.basename(output_path))[0]
23
+
24
+ print(f"Converting {audio_path} to MIDI...")
25
+
26
+ # Tuned parameters for better accuracy
27
+ predict_and_save(
28
+ [audio_path],
29
+ output_dir,
30
+ True,
31
+ False,
32
+ False,
33
+ False,
34
+ ICASSP_2022_MODEL_PATH,
35
+ onset_threshold=0.6, # Higher threshold to reduce noise
36
+ frame_threshold=0.4,
37
+ minimum_note_length=100.0, # ms
38
+ minimum_frequency=None,
39
+ maximum_frequency=None
40
+ )
41
+
42
+ # Basic Pitch saves as <original_name>_basic_pitch.mid
43
+ # We need to rename it to expected output_path if needed
44
+ original_basename = os.path.splitext(os.path.basename(audio_path))[0]
45
+ generated_midi = os.path.join(output_dir, f"{original_basename}_basic_pitch.mid")
46
+
47
+ if os.path.exists(generated_midi):
48
+ if os.path.exists(output_path):
49
+ os.remove(output_path)
50
+ os.rename(generated_midi, output_path)
51
+ print(f"MIDI saved to {output_path}")
52
+ return output_path
53
+ else:
54
+ print("Warning: Expected MIDI file not found.")
55
+ return None
56
+
57
+ except Exception as e:
58
+ print(f"Error converting to MIDI: {e}")
59
+ import traceback
60
+ traceback.print_exc()
61
+ return None