Spaces:
Sleeping
Sleeping
Update stats_data.py
Browse files- stats_data.py +13 -30
stats_data.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
import pandas as pd
|
| 2 |
|
| 3 |
-
# Speaker meta
|
| 4 |
SPEAKER_META = {
|
| 5 |
"M05": {"Gender": "Male", "Severity": "Severe", "Dataset": "Torgo"},
|
| 6 |
"F01": {"Gender": "Female", "Severity": "Severe", "Dataset": "Torgo"},
|
|
@@ -14,46 +14,29 @@ SPEAKER_META = {
|
|
| 14 |
}
|
| 15 |
|
| 16 |
def get_indomain_breakdown():
|
|
|
|
| 17 |
data = {
|
| 18 |
"Speaker": ["M05", "F01", "M01", "M04", "M02", "M03", "F03", "F04"],
|
| 19 |
"Severity": ["Severe", "Severe", "Moderate", "Moderate", "Mild", "Mild", "Mild", "Mild"],
|
| 20 |
"Whisper Baseline": [12.1, 12.6, 32.7, 31.8, 62.1, 58.4, 61.2, 59.1],
|
| 21 |
-
"
|
| 22 |
}
|
| 23 |
df = pd.DataFrame(data)
|
| 24 |
-
|
| 25 |
-
df["Relative Gain"] = (((df["Correction Layer (5K)"] - df["Whisper Baseline"]) / df["Whisper Baseline"]) * 100).round(1)
|
| 26 |
|
| 27 |
-
#
|
| 28 |
-
for col in ["Whisper Baseline", "
|
| 29 |
df[col] = df[col].astype(str) + "%"
|
| 30 |
df["Relative Gain"] = "+" + df["Relative Gain"].astype(str) + "%"
|
| 31 |
-
|
| 32 |
return df
|
| 33 |
|
| 34 |
def get_experimental_summary():
|
|
|
|
| 35 |
data = {
|
| 36 |
-
"
|
| 37 |
-
"Whisper Baseline": [41.50, 12.38, 4.33],
|
| 38 |
-
"5K Model
|
| 39 |
-
"10K
|
|
|
|
| 40 |
}
|
| 41 |
-
|
| 42 |
-
# Calculate gain based on the best performing model in that row
|
| 43 |
-
best_scores = [58.77, 24.76, 6.19]
|
| 44 |
-
whisper_base = [41.50, 12.38, 4.33]
|
| 45 |
-
gains = []
|
| 46 |
-
for b, w in zip(best_scores, whisper_base):
|
| 47 |
-
gains.append(f"+{round(((b-w)/w)*100, 1)}%")
|
| 48 |
-
|
| 49 |
-
df["Relative Gain (Best)"] = gains
|
| 50 |
-
|
| 51 |
-
# Formatting
|
| 52 |
-
df["Whisper Baseline"] = df["Whisper Baseline"].astype(str) + "%"
|
| 53 |
-
df.at[0, "5K Model Score"] = "58.77%"
|
| 54 |
-
df.at[2, "5K Model Score"] = "6.19%"
|
| 55 |
-
df.at[0, "10K Model Score"] = "54.67%"
|
| 56 |
-
df.at[1, "10K Model Score"] = "24.76%"
|
| 57 |
-
df.at[2, "10K Model Score"] = "5.98%"
|
| 58 |
-
|
| 59 |
-
return df
|
|
|
|
| 1 |
import pandas as pd
|
| 2 |
|
| 3 |
+
# Speaker meta lookup
|
| 4 |
SPEAKER_META = {
|
| 5 |
"M05": {"Gender": "Male", "Severity": "Severe", "Dataset": "Torgo"},
|
| 6 |
"F01": {"Gender": "Female", "Severity": "Severe", "Dataset": "Torgo"},
|
|
|
|
| 14 |
}
|
| 15 |
|
| 16 |
def get_indomain_breakdown():
|
| 17 |
+
# Individual speaker results for In-Domain Torgo
|
| 18 |
data = {
|
| 19 |
"Speaker": ["M05", "F01", "M01", "M04", "M02", "M03", "F03", "F04"],
|
| 20 |
"Severity": ["Severe", "Severe", "Moderate", "Moderate", "Mild", "Mild", "Mild", "Mild"],
|
| 21 |
"Whisper Baseline": [12.1, 12.6, 32.7, 31.8, 62.1, 58.4, 61.2, 59.1],
|
| 22 |
+
"5K Pure Model": [33.1, 34.2, 47.2, 45.6, 84.5, 81.8, 83.5, 82.8]
|
| 23 |
}
|
| 24 |
df = pd.DataFrame(data)
|
| 25 |
+
df["Relative Gain"] = (((df["5K Pure Model"] - df["Whisper Baseline"]) / df["Whisper Baseline"]) * 100).round(1)
|
|
|
|
| 26 |
|
| 27 |
+
# Formatting
|
| 28 |
+
for col in ["Whisper Baseline", "5K Pure Model"]:
|
| 29 |
df[col] = df[col].astype(str) + "%"
|
| 30 |
df["Relative Gain"] = "+" + df["Relative Gain"].astype(str) + "%"
|
|
|
|
| 31 |
return df
|
| 32 |
|
| 33 |
def get_experimental_summary():
|
| 34 |
+
# Comparing 5K and 10K across the three specific research conditions
|
| 35 |
data = {
|
| 36 |
+
"Condition": ["In-Domain (Seen Torgo)", "LOSO (Unseen Torgo F01)", "Zero-Shot (UA-Speech F02)"],
|
| 37 |
+
"Whisper Baseline": ["41.50%", "12.38%", "4.33%"],
|
| 38 |
+
"5K Pure Model": ["58.77%", "N/A", "6.19%"],
|
| 39 |
+
"10K Triple-Mix": ["54.67%", "24.76%", "5.98%"],
|
| 40 |
+
"Best Relative Gain": ["+41.6%", "+100.0%", "+42.9%"]
|
| 41 |
}
|
| 42 |
+
return pd.DataFrame(data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|