Spaces:
Running
Running
Update stats_data.py
Browse files- stats_data.py +31 -13
stats_data.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
import pandas as pd
|
| 2 |
|
| 3 |
-
#
|
| 4 |
SPEAKER_META = {
|
| 5 |
"F01": {"Gender": "Female", "Severity": "Severe", "Dataset": "Torgo"},
|
| 6 |
"F03": {"Gender": "Female", "Severity": "Mild", "Dataset": "Torgo"},
|
|
@@ -10,30 +10,48 @@ SPEAKER_META = {
|
|
| 10 |
"M03": {"Gender": "Male", "Severity": "Mild", "Dataset": "Torgo"},
|
| 11 |
"M04": {"Gender": "Male", "Severity": "Moderate", "Dataset": "Torgo"},
|
| 12 |
"M05": {"Gender": "Male", "Severity": "Severe", "Dataset": "Torgo"},
|
| 13 |
-
"F02": {"Gender": "Female", "Severity": "Severe
|
| 14 |
}
|
| 15 |
|
| 16 |
def get_indomain_breakdown():
|
|
|
|
| 17 |
data = {
|
| 18 |
"Speaker": ["M05", "F01", "M01", "M04", "M02", "M03", "F03", "F04"],
|
| 19 |
"Severity": ["Severe", "Severe", "Moderate", "Moderate", "Mild", "Mild", "Mild", "Mild"],
|
| 20 |
"Whisper Tiny": [12.1, 12.6, 32.7, 31.8, 62.1, 58.4, 61.2, 59.1],
|
| 21 |
"5K Pure Model": [33.1, 34.2, 47.2, 45.6, 84.5, 81.8, 83.5, 82.8],
|
| 22 |
-
"10K Triple-Mix": [25.4, 24.1, 44.1, 41.2, 79.1, 77.5, 79.0, 78.2]
|
| 23 |
}
|
| 24 |
df = pd.DataFrame(data)
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
return df
|
| 30 |
|
| 31 |
def get_experimental_summary():
|
|
|
|
| 32 |
data = {
|
| 33 |
-
"
|
| 34 |
-
"Whisper Tiny": [
|
| 35 |
-
"5K Pure Model": [
|
| 36 |
-
"10K Triple-Mix": [
|
| 37 |
-
"Best Relative Improvement": ["+41.6%", "+100.0%", "+42.9%"]
|
| 38 |
}
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import pandas as pd
|
| 2 |
|
| 3 |
+
# Metadata lookup for UI and filtering
|
| 4 |
SPEAKER_META = {
|
| 5 |
"F01": {"Gender": "Female", "Severity": "Severe", "Dataset": "Torgo"},
|
| 6 |
"F03": {"Gender": "Female", "Severity": "Mild", "Dataset": "Torgo"},
|
|
|
|
| 10 |
"M03": {"Gender": "Male", "Severity": "Mild", "Dataset": "Torgo"},
|
| 11 |
"M04": {"Gender": "Male", "Severity": "Moderate", "Dataset": "Torgo"},
|
| 12 |
"M05": {"Gender": "Male", "Severity": "Severe", "Dataset": "Torgo"},
|
| 13 |
+
"F02 (UA)": {"Gender": "Female", "Severity": "Severe", "Dataset": "UA-Speech"}
|
| 14 |
}
|
| 15 |
|
| 16 |
def get_indomain_breakdown():
|
| 17 |
+
# Performance on Seen Torgo Speakers
|
| 18 |
data = {
|
| 19 |
"Speaker": ["M05", "F01", "M01", "M04", "M02", "M03", "F03", "F04"],
|
| 20 |
"Severity": ["Severe", "Severe", "Moderate", "Moderate", "Mild", "Mild", "Mild", "Mild"],
|
| 21 |
"Whisper Tiny": [12.1, 12.6, 32.7, 31.8, 62.1, 58.4, 61.2, 59.1],
|
| 22 |
"5K Pure Model": [33.1, 34.2, 47.2, 45.6, 84.5, 81.8, 83.5, 82.8],
|
| 23 |
+
"10K Triple-Mix": ["25.4%", "24.1% (LOSO)", "44.1%", "41.2%", "79.1%", "77.5%", "79.0%", "78.2%"]
|
| 24 |
}
|
| 25 |
df = pd.DataFrame(data)
|
| 26 |
+
|
| 27 |
+
# Calculate Relative Gain from the best model (usually 5K for in-domain)
|
| 28 |
+
df["Relative Gain (Best)"] = (((df["5K Pure Model"] - df["Whisper Tiny"]) / df["Whisper Tiny"]) * 100).round(1)
|
| 29 |
+
|
| 30 |
+
# Formatting
|
| 31 |
+
df["Whisper Tiny"] = df["Whisper Tiny"].astype(str) + "%"
|
| 32 |
+
df["5K Pure Model"] = df["5K Pure Model"].astype(str) + "%"
|
| 33 |
+
df["Relative Gain (Best)"] = "+" + df["Relative Gain (Best)"].astype(str) + "%"
|
| 34 |
return df
|
| 35 |
|
| 36 |
def get_experimental_summary():
|
| 37 |
+
# Summary of the three primary research conditions
|
| 38 |
data = {
|
| 39 |
+
"Condition": ["In-Domain (Seen Torgo)", "LOSO (Unseen Torgo F01)", "Zero-Shot (UA-Speech F02)"],
|
| 40 |
+
"Whisper Tiny": [41.50, 12.38, 4.33],
|
| 41 |
+
"5K Pure Model": [58.77, "N/A", 6.19],
|
| 42 |
+
"10K Triple-Mix": [54.67, 24.76, 5.98]
|
|
|
|
| 43 |
}
|
| 44 |
+
df = pd.DataFrame(data)
|
| 45 |
+
|
| 46 |
+
# Calculate Best Relative Gain
|
| 47 |
+
best_scores = [58.77, 24.76, 6.19]
|
| 48 |
+
whisper_base = [41.50, 12.38, 4.33]
|
| 49 |
+
gains = [f"+{round(((b-w)/w)*100, 1)}%" for b, w in zip(best_scores, whisper_base)]
|
| 50 |
+
df["Relative Gain (Best)"] = gains
|
| 51 |
+
|
| 52 |
+
# Formatting for display
|
| 53 |
+
df["Whisper Tiny"] = df["Whisper Tiny"].astype(str) + "%"
|
| 54 |
+
df["5K Pure Model"] = df["5K Pure Model"].apply(lambda x: f"{x}%" if x != "N/A" else x)
|
| 55 |
+
df["10K Triple-Mix"] = df["10K Triple-Mix"].astype(str) + "%"
|
| 56 |
+
|
| 57 |
+
return df
|