Spaces:
Running
Running
| import os | |
| import numpy as np | |
| import pandas as pd | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| from pathlib import Path | |
| import math | |
| def freq_to_midi_note(frequency): | |
| if frequency <= 0: | |
| return None | |
| return 69 + 12 * math.log2(frequency / 440.0) | |
| def get_persistent_notes(frequencies, persistence_frames=4): | |
| if len(frequencies) < persistence_frames: | |
| return [] | |
| persistent_notes = [] | |
| current_note = None | |
| persistence_count = 0 | |
| for freq in frequencies: | |
| if freq <= 0: | |
| current_note = None | |
| persistence_count = 0 | |
| continue | |
| midi_note = round(freq_to_midi_note(freq)) | |
| if midi_note == current_note: | |
| persistence_count += 1 | |
| if persistence_count == persistence_frames: | |
| persistent_notes.append(midi_note) | |
| else: | |
| current_note = midi_note | |
| persistence_count = 1 | |
| return persistent_notes | |
| def analyze_file(file_path): | |
| try: | |
| with open(file_path, 'r') as f: | |
| frequencies = [float(line.strip()) for line in f if line.strip()] | |
| persistent_notes = get_persistent_notes(frequencies) | |
| if not persistent_notes: | |
| return None | |
| return { | |
| 'file': file_path.name, | |
| 'system': str(file_path).replace("/root/yue_pitch_evals/intermediate/", "").split("/")[0], | |
| 'min_note': min(persistent_notes), | |
| 'max_note': max(persistent_notes), | |
| 'range_semitones': max(persistent_notes) - min(persistent_notes) | |
| } | |
| except Exception as e: | |
| print(f"Error processing {file_path}: {e}") | |
| return None | |
| def process_directory(root_dir): | |
| results = [] | |
| root_path = Path(root_dir) | |
| # Process each file individually | |
| for file_path in root_path.rglob('*.txt'): | |
| analysis = analyze_file(file_path) | |
| if analysis: | |
| results.append(analysis) | |
| return pd.DataFrame(results) | |
| def create_violin_plot(df, output_path='vocal_ranges.png'): | |
| plt.figure(figsize=(12, 6)) | |
| sns.set_style("whitegrid") | |
| sns.violinplot(data=df, x='system', y='range_semitones') | |
| plt.title('Distribution of Vocal Ranges by System') | |
| plt.xlabel('System') | |
| plt.ylabel('Range (semitones)') | |
| plt.xticks(rotation=45) | |
| plt.tight_layout() | |
| plt.savefig(output_path) | |
| plt.close() | |
| def main(): | |
| root_directory = "raw_pitch_extracted" | |
| output_file = "vocal_range_analysis.csv" | |
| plot_file = "vocal_ranges.png" | |
| print("Processing frequency files...") | |
| results_df = process_directory(root_directory) | |
| # Save detailed results | |
| results_df.to_csv(output_file, index=False) | |
| print(f"Analysis results saved to {output_file}") | |
| print("\nSummary statistics by system:") | |
| summary = results_df.groupby('system').agg({ | |
| 'range_semitones': ['count', 'mean', 'std', 'min', 'max'] | |
| }).round(2) | |
| print(summary) | |
| create_violin_plot(results_df, plot_file) | |
| print(f"\nViolin plot saved to {plot_file}") | |
| if __name__ == "__main__": | |
| main() |