# Test annotation with 1000 real Orpheus samples name: ensemble-annotate-test resources: use_spot: true accelerators: {A100:1, V100:1, T4:1} memory: 32+ disk_size: 100 setup: | echo "==================================================" echo "๐Ÿงช ENSEMBLE ANNOTATION TEST" echo "==================================================" pip install -q torch transformers librosa soundfile datasets huggingface_hub tqdm if [ ! -d "ensemble-tts-annotation" ]; then git clone -q https://huggingface.co/marcosremar2/ensemble-tts-annotation else cd ensemble-tts-annotation && git pull -q && cd .. fi echo "โœ… Setup complete!" run: | cd ensemble-tts-annotation echo "๐Ÿ“ฅ Downloading 1000 Orpheus samples..." python3 -c 'from datasets import load_dataset; from pathlib import Path; import soundfile as sf; ds = load_dataset("marcosremar2/orpheus-tts-portuguese-dataset", split="train", streaming=True); out = Path("data/raw/orpheus_test"); out.mkdir(parents=True, exist_ok=True); [(sf.write(out / f"orpheus_{i:05d}.wav", s["audio"]["array"], s["audio"]["sampling_rate"]), print(f" {i+1}/1000") if (i+1) % 100 == 0 else None) for i, s in enumerate(ds) if i < 1000]; print("โœ… Downloaded 1000 samples")' echo "" echo "๐Ÿค– Annotating with ensemble (quick mode)..." python3 -c 'import sys; sys.path.insert(0, "."); from ensemble_tts.annotator import EnsembleAnnotator; from pathlib import Path; import json; from tqdm import tqdm; import time; print("Loading..."); ann = EnsembleAnnotator(mode="quick", device="cuda", enable_events=False); print("โœ… Loaded\n"); files = sorted(Path("data/raw/orpheus_test").glob("*.wav")); print(f"Annotating {len(files)} files...\n"); start = time.time(); results = [{"file": f.name, "emotion": (r := ann.annotate(str(f))).get("emotion", {}).get("label", "error"), "confidence": r.get("emotion", {}).get("confidence", 0.0)} if not (i % 100) else {"file": f.name, "emotion": (r := ann.annotate(str(f))).get("emotion", {}).get("label", "error"), "confidence": r.get("emotion", {}).get("confidence", 0.0)} for i, f in enumerate(tqdm(files))]; out = Path("data/annotations/orpheus_test_annotations.json"); out.parent.mkdir(parents=True, exist_ok=True); json.dump(results, open(out, "w"), indent=2); elapsed = time.time() - start; ok = sum(1 for r in results if r["emotion"] != "error"); print(f"\nโœ… COMPLETE\nTotal: {len(results)}\nSuccess: {ok}\nTime: {elapsed/60:.1f} min\nRate: {len(results)/elapsed:.2f} files/s")' echo "" echo "๐Ÿ“Š Sample results:" head -30 data/annotations/orpheus_test_annotations.json