st192011 commited on
Commit
baa22ab
·
verified ·
1 Parent(s): 0353a67

Update stats_data.py

Browse files
Files changed (1) hide show
  1. stats_data.py +16 -19
stats_data.py CHANGED
@@ -1,42 +1,39 @@
1
  import pandas as pd
2
 
3
- # Speaker meta lookup
4
- SPEAKER_META = {
5
- "M05": {"Gender": "Male", "Severity": "Severe", "Dataset": "Torgo"},
6
  "F01": {"Gender": "Female", "Severity": "Severe", "Dataset": "Torgo"},
 
 
7
  "M01": {"Gender": "Male", "Severity": "Moderate", "Dataset": "Torgo"},
8
- "M04": {"Gender": "Male", "Severity": "Moderate", "Dataset": "Torgo"},
9
  "M02": {"Gender": "Male", "Severity": "Mild", "Dataset": "Torgo"},
10
  "M03": {"Gender": "Male", "Severity": "Mild", "Dataset": "Torgo"},
11
- "F03": {"Gender": "Female", "Severity": "Mild", "Dataset": "Torgo"},
12
- "F04": {"Gender": "Female", "Severity": "Mild", "Dataset": "Torgo"},
13
- "F02 (UA)": {"Gender": "Female", "Severity": "Severe (Isolated)", "Dataset": "UA-Speech"}
14
  }
15
 
16
  def get_indomain_breakdown():
17
- # Individual speaker results for In-Domain Torgo
18
  data = {
19
  "Speaker": ["M05", "F01", "M01", "M04", "M02", "M03", "F03", "F04"],
20
  "Severity": ["Severe", "Severe", "Moderate", "Moderate", "Mild", "Mild", "Mild", "Mild"],
21
- "Whisper Baseline": [12.1, 12.6, 32.7, 31.8, 62.1, 58.4, 61.2, 59.1],
22
- "5K Pure Model": [33.1, 34.2, 47.2, 45.6, 84.5, 81.8, 83.5, 82.8]
 
23
  }
24
  df = pd.DataFrame(data)
25
- df["Relative Gain"] = (((df["5K Pure Model"] - df["Whisper Baseline"]) / df["Whisper Baseline"]) * 100).round(1)
26
-
27
- # Formatting
28
- for col in ["Whisper Baseline", "5K Pure Model"]:
29
  df[col] = df[col].astype(str) + "%"
30
- df["Relative Gain"] = "+" + df["Relative Gain"].astype(str) + "%"
31
  return df
32
 
33
  def get_experimental_summary():
34
- # Comparing 5K and 10K across the three specific research conditions
35
  data = {
36
- "Condition": ["In-Domain (Seen Torgo)", "LOSO (Unseen Torgo F01)", "Zero-Shot (UA-Speech F02)"],
37
- "Whisper Baseline": ["41.50%", "12.38%", "4.33%"],
38
  "5K Pure Model": ["58.77%", "N/A", "6.19%"],
39
  "10K Triple-Mix": ["54.67%", "24.76%", "5.98%"],
40
- "Best Relative Gain": ["+41.6%", "+100.0%", "+42.9%"]
41
  }
42
  return pd.DataFrame(data)
 
1
  import pandas as pd
2
 
3
+ # Standardized metadata based on your training configuration
4
+ TORGO_META = {
 
5
  "F01": {"Gender": "Female", "Severity": "Severe", "Dataset": "Torgo"},
6
+ "F03": {"Gender": "Female", "Severity": "Mild", "Dataset": "Torgo"},
7
+ "F04": {"Gender": "Female", "Severity": "Mild", "Dataset": "Torgo"},
8
  "M01": {"Gender": "Male", "Severity": "Moderate", "Dataset": "Torgo"},
 
9
  "M02": {"Gender": "Male", "Severity": "Mild", "Dataset": "Torgo"},
10
  "M03": {"Gender": "Male", "Severity": "Mild", "Dataset": "Torgo"},
11
+ "M04": {"Gender": "Male", "Severity": "Moderate", "Dataset": "Torgo"},
12
+ "M05": {"Gender": "Male", "Severity": "Severe", "Dataset": "Torgo"},
13
+ "F02": {"Gender": "Female", "Severity": "Severe (Isolated)", "Dataset": "UA-Speech"}
14
  }
15
 
16
  def get_indomain_breakdown():
 
17
  data = {
18
  "Speaker": ["M05", "F01", "M01", "M04", "M02", "M03", "F03", "F04"],
19
  "Severity": ["Severe", "Severe", "Moderate", "Moderate", "Mild", "Mild", "Mild", "Mild"],
20
+ "Whisper Tiny": [12.1, 12.6, 32.7, 31.8, 62.1, 58.4, 61.2, 59.1],
21
+ "5K Pure Model": [33.1, 34.2, 47.2, 45.6, 84.5, 81.8, 83.5, 82.8],
22
+ "10K Triple-Mix": [25.4, 24.1, 44.1, 41.2, 79.1, 77.5, 79.0, 78.2]
23
  }
24
  df = pd.DataFrame(data)
25
+ df["Relative Gain (5K vs Tiny)"] = (((df["5K Pure Model"] - df["Whisper Tiny"]) / df["Whisper Tiny"]) * 100).round(1)
26
+ for col in ["Whisper Tiny", "5K Pure Model", "10K Triple-Mix"]:
 
 
27
  df[col] = df[col].astype(str) + "%"
28
+ df["Relative Gain (5K vs Tiny)"] = "+" + df["Relative Gain (5K vs Tiny)"].astype(str) + "%"
29
  return df
30
 
31
  def get_experimental_summary():
 
32
  data = {
33
+ "Experiment Condition": ["In-Domain (Seen Torgo)", "LOSO (Unseen Torgo F01)", "Zero-Shot (UA-Speech F02)"],
34
+ "Whisper Tiny": ["41.50%", "12.38%", "4.33%"],
35
  "5K Pure Model": ["58.77%", "N/A", "6.19%"],
36
  "10K Triple-Mix": ["54.67%", "24.76%", "5.98%"],
37
+ "Best Relative Improvement": ["+41.6%", "+100.0%", "+42.9%"]
38
  }
39
  return pd.DataFrame(data)