| import os |
| from typing import List, Tuple |
|
|
| from loguru import logger |
|
|
| from .audio_io import get_audio_duration, load_lyrics_file |
| from .csv_metadata import load_csv_metadata |
| from .models import AudioSample, SUPPORTED_AUDIO_FORMATS |
|
|
|
|
| class ScanMixin: |
| """Directory scanning helpers.""" |
|
|
| def scan_directory(self, directory: str) -> Tuple[List[AudioSample], str]: |
| """Scan a directory for audio files.""" |
| if not os.path.exists(directory): |
| return [], f"β Directory not found: {directory}" |
|
|
| if not os.path.isdir(directory): |
| return [], f"β Not a directory: {directory}" |
|
|
| self._current_dir = directory |
| self.samples = [] |
|
|
| audio_files = [] |
| for root, _, files in os.walk(directory): |
| for file in files: |
| ext = os.path.splitext(file)[1].lower() |
| if ext in SUPPORTED_AUDIO_FORMATS: |
| audio_files.append(os.path.join(root, file)) |
|
|
| if not audio_files: |
| return [], ( |
| f"β No audio files found in {directory}\n" |
| f"Supported formats: {', '.join(SUPPORTED_AUDIO_FORMATS)}" |
| ) |
|
|
| audio_files.sort() |
|
|
| csv_metadata = load_csv_metadata(directory) |
| csv_count = 0 |
| lyrics_count = 0 |
|
|
| for audio_path in audio_files: |
| try: |
| duration = get_audio_duration(audio_path) |
| lyrics_content, has_lyrics_file = load_lyrics_file(audio_path) |
|
|
| is_instrumental = self.metadata.all_instrumental |
| if has_lyrics_file: |
| is_instrumental = False |
| lyrics_count += 1 |
|
|
| sample = AudioSample( |
| audio_path=audio_path, |
| filename=os.path.basename(audio_path), |
| duration=duration, |
| is_instrumental=is_instrumental, |
| custom_tag=self.metadata.custom_tag, |
| lyrics=lyrics_content if has_lyrics_file else "[Instrumental]", |
| raw_lyrics=lyrics_content if has_lyrics_file else "", |
| ) |
|
|
| if csv_metadata and sample.filename in csv_metadata: |
| meta = csv_metadata[sample.filename] |
| if meta.get("bpm"): |
| sample.bpm = meta["bpm"] |
| if meta.get("key"): |
| sample.keyscale = meta["key"] |
| if meta.get("caption"): |
| sample.caption = meta["caption"] |
| sample.labeled = True |
| csv_count += 1 |
|
|
| self.samples.append(sample) |
| except Exception as e: |
| logger.warning(f"Failed to process {audio_path}: {e}") |
|
|
| self.metadata.num_samples = len(self.samples) |
|
|
| status = f"β
Found {len(self.samples)} audio files in {directory}" |
| if lyrics_count > 0: |
| status += f"\n π {lyrics_count} files have accompanying lyrics (.txt)" |
| if csv_count > 0: |
| status += f"\n π {csv_count} files have metadata from CSV" |
|
|
| return self.samples, status |
|
|