st192011 commited on
Commit
f310c2d
·
verified ·
1 Parent(s): 9cfa845

Update stats_data.py

Browse files
Files changed (1) hide show
  1. stats_data.py +31 -13
stats_data.py CHANGED
@@ -1,6 +1,6 @@
1
  import pandas as pd
2
 
3
- # Standardized metadata based on your training configuration
4
  SPEAKER_META = {
5
  "F01": {"Gender": "Female", "Severity": "Severe", "Dataset": "Torgo"},
6
  "F03": {"Gender": "Female", "Severity": "Mild", "Dataset": "Torgo"},
@@ -10,30 +10,48 @@ SPEAKER_META = {
10
  "M03": {"Gender": "Male", "Severity": "Mild", "Dataset": "Torgo"},
11
  "M04": {"Gender": "Male", "Severity": "Moderate", "Dataset": "Torgo"},
12
  "M05": {"Gender": "Male", "Severity": "Severe", "Dataset": "Torgo"},
13
- "F02": {"Gender": "Female", "Severity": "Severe (Isolated)", "Dataset": "UA-Speech"}
14
  }
15
 
16
  def get_indomain_breakdown():
 
17
  data = {
18
  "Speaker": ["M05", "F01", "M01", "M04", "M02", "M03", "F03", "F04"],
19
  "Severity": ["Severe", "Severe", "Moderate", "Moderate", "Mild", "Mild", "Mild", "Mild"],
20
  "Whisper Tiny": [12.1, 12.6, 32.7, 31.8, 62.1, 58.4, 61.2, 59.1],
21
  "5K Pure Model": [33.1, 34.2, 47.2, 45.6, 84.5, 81.8, 83.5, 82.8],
22
- "10K Triple-Mix": [25.4, 24.1, 44.1, 41.2, 79.1, 77.5, 79.0, 78.2]
23
  }
24
  df = pd.DataFrame(data)
25
- df["Relative Gain (5K vs Tiny)"] = (((df["5K Pure Model"] - df["Whisper Tiny"]) / df["Whisper Tiny"]) * 100).round(1)
26
- for col in ["Whisper Tiny", "5K Pure Model", "10K Triple-Mix"]:
27
- df[col] = df[col].astype(str) + "%"
28
- df["Relative Gain (5K vs Tiny)"] = "+" + df["Relative Gain (5K vs Tiny)"].astype(str) + "%"
 
 
 
 
29
  return df
30
 
31
  def get_experimental_summary():
 
32
  data = {
33
- "Experiment Condition": ["In-Domain (Seen Torgo)", "LOSO (Unseen Torgo F01)", "Zero-Shot (UA-Speech F02)"],
34
- "Whisper Tiny": ["41.50%", "12.38%", "4.33%"],
35
- "5K Pure Model": ["58.77%", "N/A", "6.19%"],
36
- "10K Triple-Mix": ["54.67%", "24.76%", "5.98%"],
37
- "Best Relative Improvement": ["+41.6%", "+100.0%", "+42.9%"]
38
  }
39
- return pd.DataFrame(data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import pandas as pd
2
 
3
+ # Metadata lookup for UI and filtering
4
  SPEAKER_META = {
5
  "F01": {"Gender": "Female", "Severity": "Severe", "Dataset": "Torgo"},
6
  "F03": {"Gender": "Female", "Severity": "Mild", "Dataset": "Torgo"},
 
10
  "M03": {"Gender": "Male", "Severity": "Mild", "Dataset": "Torgo"},
11
  "M04": {"Gender": "Male", "Severity": "Moderate", "Dataset": "Torgo"},
12
  "M05": {"Gender": "Male", "Severity": "Severe", "Dataset": "Torgo"},
13
+ "F02 (UA)": {"Gender": "Female", "Severity": "Severe", "Dataset": "UA-Speech"}
14
  }
15
 
16
  def get_indomain_breakdown():
17
+ # Performance on Seen Torgo Speakers
18
  data = {
19
  "Speaker": ["M05", "F01", "M01", "M04", "M02", "M03", "F03", "F04"],
20
  "Severity": ["Severe", "Severe", "Moderate", "Moderate", "Mild", "Mild", "Mild", "Mild"],
21
  "Whisper Tiny": [12.1, 12.6, 32.7, 31.8, 62.1, 58.4, 61.2, 59.1],
22
  "5K Pure Model": [33.1, 34.2, 47.2, 45.6, 84.5, 81.8, 83.5, 82.8],
23
+ "10K Triple-Mix": ["25.4%", "24.1% (LOSO)", "44.1%", "41.2%", "79.1%", "77.5%", "79.0%", "78.2%"]
24
  }
25
  df = pd.DataFrame(data)
26
+
27
+ # Calculate Relative Gain from the best model (usually 5K for in-domain)
28
+ df["Relative Gain (Best)"] = (((df["5K Pure Model"] - df["Whisper Tiny"]) / df["Whisper Tiny"]) * 100).round(1)
29
+
30
+ # Formatting
31
+ df["Whisper Tiny"] = df["Whisper Tiny"].astype(str) + "%"
32
+ df["5K Pure Model"] = df["5K Pure Model"].astype(str) + "%"
33
+ df["Relative Gain (Best)"] = "+" + df["Relative Gain (Best)"].astype(str) + "%"
34
  return df
35
 
36
  def get_experimental_summary():
37
+ # Summary of the three primary research conditions
38
  data = {
39
+ "Condition": ["In-Domain (Seen Torgo)", "LOSO (Unseen Torgo F01)", "Zero-Shot (UA-Speech F02)"],
40
+ "Whisper Tiny": [41.50, 12.38, 4.33],
41
+ "5K Pure Model": [58.77, "N/A", 6.19],
42
+ "10K Triple-Mix": [54.67, 24.76, 5.98]
 
43
  }
44
+ df = pd.DataFrame(data)
45
+
46
+ # Calculate Best Relative Gain
47
+ best_scores = [58.77, 24.76, 6.19]
48
+ whisper_base = [41.50, 12.38, 4.33]
49
+ gains = [f"+{round(((b-w)/w)*100, 1)}%" for b, w in zip(best_scores, whisper_base)]
50
+ df["Relative Gain (Best)"] = gains
51
+
52
+ # Formatting for display
53
+ df["Whisper Tiny"] = df["Whisper Tiny"].astype(str) + "%"
54
+ df["5K Pure Model"] = df["5K Pure Model"].apply(lambda x: f"{x}%" if x != "N/A" else x)
55
+ df["10K Triple-Mix"] = df["10K Triple-Mix"].astype(str) + "%"
56
+
57
+ return df