Spaces:
Running
Running
File size: 3,322 Bytes
0534c29 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import librosa
from scripts.explain import musiclime_combined
def explain_combined_runner(sample: str):
# Load test audio and lyrics
audio_path = f"data/external/{sample}.mp3"
lyrics_path = f"data/external/{sample}.txt"
# Load audio
audio_data, sr = librosa.load(audio_path)
# Load lyrics
with open(lyrics_path, "r", encoding="utf-8") as f:
lyrics_text = f.read()
print("Running combined MusicLIME explanation (optimized)...")
result = musiclime_combined(audio_data, lyrics_text)
# Display multimodal results
print(f"\n{'='*60}")
print("=== MULTIMODAL EXPLANATION RESULTS ===")
print(f"{'='*60}")
multimodal = result["multimodal"]
print(
f"Prediction: {multimodal['prediction']['class_name']} ({multimodal['prediction']['confidence']:.3f})"
)
print(f"Runtime: {multimodal['summary']['runtime_seconds']:.2f}s")
print("\n=== TOP MULTIMODAL FEATURES ===")
for feature in multimodal["explanations"]:
print(
f"Rank {feature['rank']}: {feature['modality']} | Weight: {feature['weight']:.4f} | Importance: {feature['importance']:.4f}"
)
print(f" Feature: {feature['feature_text'][:80]}...")
print()
# Display audio-only results
print(f"\n{'='*60}")
print("=== AUDIO-ONLY EXPLANATION RESULTS ===")
print(f"{'='*60}")
audio_only = result["audio_only"]
print(
f"Prediction: {audio_only['prediction']['class_name']} ({audio_only['prediction']['confidence']:.3f})"
)
print(f"Runtime: {audio_only['summary']['runtime_seconds']:.2f}s")
print("\n=== TOP AUDIO-ONLY FEATURES ===")
for feature in audio_only["explanations"]:
print(
f"Rank {feature['rank']}: {feature['modality']} | Weight: {feature['weight']:.4f} | Importance: {feature['importance']:.4f}"
)
print(f" Feature: {feature['feature_text'][:80]}...")
print()
# Display performance summary
print(f"\n{'='*60}")
print("=== PERFORMANCE SUMMARY ===")
print(f"{'='*60}")
summary = result["combined_summary"]
print(
f"Factorization time: {summary['factorization_time_seconds']:.2f}s (done once)"
)
print(f"Multimodal explanation: {multimodal['summary']['runtime_seconds']:.2f}s")
print(f"Audio-only explanation: {audio_only['summary']['runtime_seconds']:.2f}s")
print(f"Total runtime: {summary['total_runtime_seconds']:.2f}s")
print(f"Source separation reused: {summary['source_separation_reused']}")
# Comparison
print("\n=== PREDICTION COMPARISON ===")
print(
f"Multimodal: {multimodal['prediction']['class_name']} ({multimodal['prediction']['confidence']:.3f})"
)
print(
f"Audio-only: {audio_only['prediction']['class_name']} ({audio_only['prediction']['confidence']:.3f})"
)
if multimodal["prediction"]["class"] == audio_only["prediction"]["class"]:
print("Both modalities agree on the prediction")
else:
print("Modalities disagree on the prediction")
confidence_diff = abs(
multimodal["prediction"]["confidence"] - audio_only["prediction"]["confidence"]
)
print(f"Confidence difference: {confidence_diff:.3f}")
if __name__ == "__main__":
sample = "sample"
explain_combined_runner(sample)
|