Spaces:
Sleeping
Sleeping
Update stats_data.py
Browse files- stats_data.py +16 -19
stats_data.py
CHANGED
|
@@ -1,42 +1,39 @@
|
|
| 1 |
import pandas as pd
|
| 2 |
|
| 3 |
-
#
|
| 4 |
-
|
| 5 |
-
"M05": {"Gender": "Male", "Severity": "Severe", "Dataset": "Torgo"},
|
| 6 |
"F01": {"Gender": "Female", "Severity": "Severe", "Dataset": "Torgo"},
|
|
|
|
|
|
|
| 7 |
"M01": {"Gender": "Male", "Severity": "Moderate", "Dataset": "Torgo"},
|
| 8 |
-
"M04": {"Gender": "Male", "Severity": "Moderate", "Dataset": "Torgo"},
|
| 9 |
"M02": {"Gender": "Male", "Severity": "Mild", "Dataset": "Torgo"},
|
| 10 |
"M03": {"Gender": "Male", "Severity": "Mild", "Dataset": "Torgo"},
|
| 11 |
-
"
|
| 12 |
-
"
|
| 13 |
-
"F02
|
| 14 |
}
|
| 15 |
|
| 16 |
def get_indomain_breakdown():
|
| 17 |
-
# Individual speaker results for In-Domain Torgo
|
| 18 |
data = {
|
| 19 |
"Speaker": ["M05", "F01", "M01", "M04", "M02", "M03", "F03", "F04"],
|
| 20 |
"Severity": ["Severe", "Severe", "Moderate", "Moderate", "Mild", "Mild", "Mild", "Mild"],
|
| 21 |
-
"Whisper
|
| 22 |
-
"5K Pure Model": [33.1, 34.2, 47.2, 45.6, 84.5, 81.8, 83.5, 82.8]
|
|
|
|
| 23 |
}
|
| 24 |
df = pd.DataFrame(data)
|
| 25 |
-
df["Relative Gain"] = (((df["5K Pure Model"] - df["Whisper
|
| 26 |
-
|
| 27 |
-
# Formatting
|
| 28 |
-
for col in ["Whisper Baseline", "5K Pure Model"]:
|
| 29 |
df[col] = df[col].astype(str) + "%"
|
| 30 |
-
df["Relative Gain"] = "+" + df["Relative Gain"].astype(str) + "%"
|
| 31 |
return df
|
| 32 |
|
| 33 |
def get_experimental_summary():
|
| 34 |
-
# Comparing 5K and 10K across the three specific research conditions
|
| 35 |
data = {
|
| 36 |
-
"Condition": ["In-Domain (Seen Torgo)", "LOSO (Unseen Torgo F01)", "Zero-Shot (UA-Speech F02)"],
|
| 37 |
-
"Whisper
|
| 38 |
"5K Pure Model": ["58.77%", "N/A", "6.19%"],
|
| 39 |
"10K Triple-Mix": ["54.67%", "24.76%", "5.98%"],
|
| 40 |
-
"Best Relative
|
| 41 |
}
|
| 42 |
return pd.DataFrame(data)
|
|
|
|
| 1 |
import pandas as pd
|
| 2 |
|
| 3 |
+
# Standardized metadata based on your training configuration
|
| 4 |
+
TORGO_META = {
|
|
|
|
| 5 |
"F01": {"Gender": "Female", "Severity": "Severe", "Dataset": "Torgo"},
|
| 6 |
+
"F03": {"Gender": "Female", "Severity": "Mild", "Dataset": "Torgo"},
|
| 7 |
+
"F04": {"Gender": "Female", "Severity": "Mild", "Dataset": "Torgo"},
|
| 8 |
"M01": {"Gender": "Male", "Severity": "Moderate", "Dataset": "Torgo"},
|
|
|
|
| 9 |
"M02": {"Gender": "Male", "Severity": "Mild", "Dataset": "Torgo"},
|
| 10 |
"M03": {"Gender": "Male", "Severity": "Mild", "Dataset": "Torgo"},
|
| 11 |
+
"M04": {"Gender": "Male", "Severity": "Moderate", "Dataset": "Torgo"},
|
| 12 |
+
"M05": {"Gender": "Male", "Severity": "Severe", "Dataset": "Torgo"},
|
| 13 |
+
"F02": {"Gender": "Female", "Severity": "Severe (Isolated)", "Dataset": "UA-Speech"}
|
| 14 |
}
|
| 15 |
|
| 16 |
def get_indomain_breakdown():
|
|
|
|
| 17 |
data = {
|
| 18 |
"Speaker": ["M05", "F01", "M01", "M04", "M02", "M03", "F03", "F04"],
|
| 19 |
"Severity": ["Severe", "Severe", "Moderate", "Moderate", "Mild", "Mild", "Mild", "Mild"],
|
| 20 |
+
"Whisper Tiny": [12.1, 12.6, 32.7, 31.8, 62.1, 58.4, 61.2, 59.1],
|
| 21 |
+
"5K Pure Model": [33.1, 34.2, 47.2, 45.6, 84.5, 81.8, 83.5, 82.8],
|
| 22 |
+
"10K Triple-Mix": [25.4, 24.1, 44.1, 41.2, 79.1, 77.5, 79.0, 78.2]
|
| 23 |
}
|
| 24 |
df = pd.DataFrame(data)
|
| 25 |
+
df["Relative Gain (5K vs Tiny)"] = (((df["5K Pure Model"] - df["Whisper Tiny"]) / df["Whisper Tiny"]) * 100).round(1)
|
| 26 |
+
for col in ["Whisper Tiny", "5K Pure Model", "10K Triple-Mix"]:
|
|
|
|
|
|
|
| 27 |
df[col] = df[col].astype(str) + "%"
|
| 28 |
+
df["Relative Gain (5K vs Tiny)"] = "+" + df["Relative Gain (5K vs Tiny)"].astype(str) + "%"
|
| 29 |
return df
|
| 30 |
|
| 31 |
def get_experimental_summary():
|
|
|
|
| 32 |
data = {
|
| 33 |
+
"Experiment Condition": ["In-Domain (Seen Torgo)", "LOSO (Unseen Torgo F01)", "Zero-Shot (UA-Speech F02)"],
|
| 34 |
+
"Whisper Tiny": ["41.50%", "12.38%", "4.33%"],
|
| 35 |
"5K Pure Model": ["58.77%", "N/A", "6.19%"],
|
| 36 |
"10K Triple-Mix": ["54.67%", "24.76%", "5.98%"],
|
| 37 |
+
"Best Relative Improvement": ["+41.6%", "+100.0%", "+42.9%"]
|
| 38 |
}
|
| 39 |
return pd.DataFrame(data)
|