Spaces:
Running
Running
Update stats_data.py
Browse files- stats_data.py +27 -23
stats_data.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
import pandas as pd
|
| 2 |
|
| 3 |
-
#
|
| 4 |
SPEAKER_META = {
|
| 5 |
"M05": {"Gender": "Male", "Severity": "Severe", "Dataset": "Torgo"},
|
| 6 |
"F01": {"Gender": "Female", "Severity": "Severe", "Dataset": "Torgo"},
|
|
@@ -14,42 +14,46 @@ SPEAKER_META = {
|
|
| 14 |
}
|
| 15 |
|
| 16 |
def get_indomain_breakdown():
|
| 17 |
-
# Primary Data for Torgo In-Domain (5K Model is the Accuracy Champion)
|
| 18 |
data = {
|
| 19 |
"Speaker": ["M05", "F01", "M01", "M04", "M02", "M03", "F03", "F04"],
|
| 20 |
"Severity": ["Severe", "Severe", "Moderate", "Moderate", "Mild", "Mild", "Mild", "Mild"],
|
| 21 |
-
"Whisper
|
| 22 |
-
"
|
| 23 |
}
|
| 24 |
df = pd.DataFrame(data)
|
|
|
|
|
|
|
| 25 |
|
| 26 |
-
#
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
# Format for display
|
| 31 |
-
df["Whisper Tiny"] = df["Whisper Tiny"].astype(str) + "%"
|
| 32 |
-
df["DSR Lab (5K)"] = df["DSR Lab (5K)"].astype(str) + "%"
|
| 33 |
-
df["Absolute Gain (%)"] = "+" + df["Absolute Gain (%)"].astype(str) + "%"
|
| 34 |
-
df["Relative Improvement (%)"] = "+" + df["Relative Improvement (%)"].astype(str) + "%"
|
| 35 |
|
| 36 |
return df
|
| 37 |
|
| 38 |
def get_experimental_summary():
|
| 39 |
-
# Comparing conditions (In-Domain, LOSO, Zero-Shot)
|
| 40 |
data = {
|
| 41 |
-
"Condition": ["In-Domain (Seen Torgo)", "LOSO (Unseen Torgo F01)", "Zero-Shot (UA-Speech F02)"],
|
| 42 |
-
"Whisper
|
| 43 |
-
"
|
|
|
|
| 44 |
}
|
| 45 |
df = pd.DataFrame(data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
-
|
| 48 |
-
df["Relative Gain"] = (((df["Our Best Score"] - df["Whisper Tiny"]) / df["Whisper Tiny"]) * 100).round(1)
|
| 49 |
|
| 50 |
-
#
|
| 51 |
-
df["Whisper
|
| 52 |
-
df["
|
| 53 |
-
df["
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
return df
|
|
|
|
| 1 |
import pandas as pd
|
| 2 |
|
| 3 |
+
# Speaker meta remains the same for UI lookups
|
| 4 |
SPEAKER_META = {
|
| 5 |
"M05": {"Gender": "Male", "Severity": "Severe", "Dataset": "Torgo"},
|
| 6 |
"F01": {"Gender": "Female", "Severity": "Severe", "Dataset": "Torgo"},
|
|
|
|
| 14 |
}
|
| 15 |
|
| 16 |
def get_indomain_breakdown():
|
|
|
|
| 17 |
data = {
|
| 18 |
"Speaker": ["M05", "F01", "M01", "M04", "M02", "M03", "F03", "F04"],
|
| 19 |
"Severity": ["Severe", "Severe", "Moderate", "Moderate", "Mild", "Mild", "Mild", "Mild"],
|
| 20 |
+
"Whisper Baseline": [12.1, 12.6, 32.7, 31.8, 62.1, 58.4, 61.2, 59.1],
|
| 21 |
+
"Correction Layer (5K)": [33.1, 34.2, 47.2, 45.6, 84.5, 81.8, 83.5, 82.8]
|
| 22 |
}
|
| 23 |
df = pd.DataFrame(data)
|
| 24 |
+
# Relative Gain calculation: ((Ours - Whisper) / Whisper) * 100
|
| 25 |
+
df["Relative Gain"] = (((df["Correction Layer (5K)"] - df["Whisper Baseline"]) / df["Whisper Baseline"]) * 100).round(1)
|
| 26 |
|
| 27 |
+
# Add percentage signs for display
|
| 28 |
+
for col in ["Whisper Baseline", "Correction Layer (5K)"]:
|
| 29 |
+
df[col] = df[col].astype(str) + "%"
|
| 30 |
+
df["Relative Gain"] = "+" + df["Relative Gain"].astype(str) + "%"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
return df
|
| 33 |
|
| 34 |
def get_experimental_summary():
|
|
|
|
| 35 |
data = {
|
| 36 |
+
"Experiment Condition": ["In-Domain (Seen Torgo)", "LOSO (Unseen Torgo F01)", "Zero-Shot (UA-Speech F02)"],
|
| 37 |
+
"Whisper Baseline": [41.50, 12.38, 4.33],
|
| 38 |
+
"5K Model Score": [58.77, "N/A", 6.19],
|
| 39 |
+
"10K Model Score": [54.67, 24.76, 5.98]
|
| 40 |
}
|
| 41 |
df = pd.DataFrame(data)
|
| 42 |
+
# Calculate gain based on the best performing model in that row
|
| 43 |
+
best_scores = [58.77, 24.76, 6.19]
|
| 44 |
+
whisper_base = [41.50, 12.38, 4.33]
|
| 45 |
+
gains = []
|
| 46 |
+
for b, w in zip(best_scores, whisper_base):
|
| 47 |
+
gains.append(f"+{round(((b-w)/w)*100, 1)}%")
|
| 48 |
|
| 49 |
+
df["Relative Gain (Best)"] = gains
|
|
|
|
| 50 |
|
| 51 |
+
# Formatting
|
| 52 |
+
df["Whisper Baseline"] = df["Whisper Baseline"].astype(str) + "%"
|
| 53 |
+
df.at[0, "5K Model Score"] = "58.77%"
|
| 54 |
+
df.at[2, "5K Model Score"] = "6.19%"
|
| 55 |
+
df.at[0, "10K Model Score"] = "54.67%"
|
| 56 |
+
df.at[1, "10K Model Score"] = "24.76%"
|
| 57 |
+
df.at[2, "10K Model Score"] = "5.98%"
|
| 58 |
|
| 59 |
return df
|