File size: 3,159 Bytes
15389e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from pathlib import Path
import math

def freq_to_midi_note(frequency):
    if frequency <= 0:
        return None
    return 69 + 12 * math.log2(frequency / 440.0)

def get_persistent_notes(frequencies, persistence_frames=4):
    if len(frequencies) < persistence_frames:
        return []
    
    persistent_notes = []
    current_note = None
    persistence_count = 0
    
    for freq in frequencies:
        if freq <= 0:
            current_note = None
            persistence_count = 0
            continue
            
        midi_note = round(freq_to_midi_note(freq))
        
        if midi_note == current_note:
            persistence_count += 1
            if persistence_count == persistence_frames:
                persistent_notes.append(midi_note)
        else:
            current_note = midi_note
            persistence_count = 1
    
    return persistent_notes

def analyze_file(file_path):
    try:
        
        with open(file_path, 'r') as f:
            frequencies = [float(line.strip()) for line in f if line.strip()]
        
        persistent_notes = get_persistent_notes(frequencies)
        
        if not persistent_notes:
            return None
            
        return {
            'file': file_path.name,
            'system': str(file_path).replace("/root/yue_pitch_evals/intermediate/", "").split("/")[0],
            'min_note': min(persistent_notes),
            'max_note': max(persistent_notes),
            'range_semitones': max(persistent_notes) - min(persistent_notes)
        }
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

def process_directory(root_dir):
    results = []
    root_path = Path(root_dir)
    
    # Process each file individually
    for file_path in root_path.rglob('*.txt'):
        analysis = analyze_file(file_path)
        if analysis:
            results.append(analysis)
    
    return pd.DataFrame(results)

def create_violin_plot(df, output_path='vocal_ranges.png'):
    plt.figure(figsize=(12, 6))
    sns.set_style("whitegrid")

    sns.violinplot(data=df, x='system', y='range_semitones')
    
    plt.title('Distribution of Vocal Ranges by System')
    plt.xlabel('System')
    plt.ylabel('Range (semitones)')
    plt.xticks(rotation=45)
    plt.tight_layout()
    
    plt.savefig(output_path)
    plt.close()

def main():
    root_directory = "raw_pitch_extracted"
    output_file = "vocal_range_analysis.csv"
    plot_file = "vocal_ranges.png"

    print("Processing frequency files...")
    results_df = process_directory(root_directory)
    
    # Save detailed results
    results_df.to_csv(output_file, index=False)
    print(f"Analysis results saved to {output_file}")

    print("\nSummary statistics by system:")
    summary = results_df.groupby('system').agg({
        'range_semitones': ['count', 'mean', 'std', 'min', 'max']
    }).round(2)
    print(summary)
    
    create_violin_plot(results_df, plot_file)
    print(f"\nViolin plot saved to {plot_file}")

if __name__ == "__main__":
    main()