Torgo-DSR-Lab / stats_data.py
st192011's picture
Update stats_data.py
1c0c2a7 verified
raw
history blame
2.01 kB
import pandas as pd
# Speaker meta lookup
SPEAKER_META = {
"M05": {"Gender": "Male", "Severity": "Severe", "Dataset": "Torgo"},
"F01": {"Gender": "Female", "Severity": "Severe", "Dataset": "Torgo"},
"M01": {"Gender": "Male", "Severity": "Moderate", "Dataset": "Torgo"},
"M04": {"Gender": "Male", "Severity": "Moderate", "Dataset": "Torgo"},
"M02": {"Gender": "Male", "Severity": "Mild", "Dataset": "Torgo"},
"M03": {"Gender": "Male", "Severity": "Mild", "Dataset": "Torgo"},
"F03": {"Gender": "Female", "Severity": "Mild", "Dataset": "Torgo"},
"F04": {"Gender": "Female", "Severity": "Mild", "Dataset": "Torgo"},
"F02 (UA)": {"Gender": "Female", "Severity": "Severe (Isolated)", "Dataset": "UA-Speech"}
}
def get_indomain_breakdown():
# Individual speaker results for In-Domain Torgo
data = {
"Speaker": ["M05", "F01", "M01", "M04", "M02", "M03", "F03", "F04"],
"Severity": ["Severe", "Severe", "Moderate", "Moderate", "Mild", "Mild", "Mild", "Mild"],
"Whisper Baseline": [12.1, 12.6, 32.7, 31.8, 62.1, 58.4, 61.2, 59.1],
"5K Pure Model": [33.1, 34.2, 47.2, 45.6, 84.5, 81.8, 83.5, 82.8]
}
df = pd.DataFrame(data)
df["Relative Gain"] = (((df["5K Pure Model"] - df["Whisper Baseline"]) / df["Whisper Baseline"]) * 100).round(1)
# Formatting
for col in ["Whisper Baseline", "5K Pure Model"]:
df[col] = df[col].astype(str) + "%"
df["Relative Gain"] = "+" + df["Relative Gain"].astype(str) + "%"
return df
def get_experimental_summary():
# Comparing 5K and 10K across the three specific research conditions
data = {
"Condition": ["In-Domain (Seen Torgo)", "LOSO (Unseen Torgo F01)", "Zero-Shot (UA-Speech F02)"],
"Whisper Baseline": ["41.50%", "12.38%", "4.33%"],
"5K Pure Model": ["58.77%", "N/A", "6.19%"],
"10K Triple-Mix": ["54.67%", "24.76%", "5.98%"],
"Best Relative Gain": ["+41.6%", "+100.0%", "+42.9%"]
}
return pd.DataFrame(data)