st192011 commited on
Commit
1c0c2a7
·
verified ·
1 Parent(s): 741eb17

Update stats_data.py

Browse files
Files changed (1) hide show
  1. stats_data.py +13 -30
stats_data.py CHANGED
@@ -1,6 +1,6 @@
1
  import pandas as pd
2
 
3
- # Speaker meta remains the same for UI lookups
4
  SPEAKER_META = {
5
  "M05": {"Gender": "Male", "Severity": "Severe", "Dataset": "Torgo"},
6
  "F01": {"Gender": "Female", "Severity": "Severe", "Dataset": "Torgo"},
@@ -14,46 +14,29 @@ SPEAKER_META = {
14
  }
15
 
16
  def get_indomain_breakdown():
 
17
  data = {
18
  "Speaker": ["M05", "F01", "M01", "M04", "M02", "M03", "F03", "F04"],
19
  "Severity": ["Severe", "Severe", "Moderate", "Moderate", "Mild", "Mild", "Mild", "Mild"],
20
  "Whisper Baseline": [12.1, 12.6, 32.7, 31.8, 62.1, 58.4, 61.2, 59.1],
21
- "Correction Layer (5K)": [33.1, 34.2, 47.2, 45.6, 84.5, 81.8, 83.5, 82.8]
22
  }
23
  df = pd.DataFrame(data)
24
- # Relative Gain calculation: ((Ours - Whisper) / Whisper) * 100
25
- df["Relative Gain"] = (((df["Correction Layer (5K)"] - df["Whisper Baseline"]) / df["Whisper Baseline"]) * 100).round(1)
26
 
27
- # Add percentage signs for display
28
- for col in ["Whisper Baseline", "Correction Layer (5K)"]:
29
  df[col] = df[col].astype(str) + "%"
30
  df["Relative Gain"] = "+" + df["Relative Gain"].astype(str) + "%"
31
-
32
  return df
33
 
34
  def get_experimental_summary():
 
35
  data = {
36
- "Experiment Condition": ["In-Domain (Seen Torgo)", "LOSO (Unseen Torgo F01)", "Zero-Shot (UA-Speech F02)"],
37
- "Whisper Baseline": [41.50, 12.38, 4.33],
38
- "5K Model Score": [58.77, "N/A", 6.19],
39
- "10K Model Score": [54.67, 24.76, 5.98]
 
40
  }
41
- df = pd.DataFrame(data)
42
- # Calculate gain based on the best performing model in that row
43
- best_scores = [58.77, 24.76, 6.19]
44
- whisper_base = [41.50, 12.38, 4.33]
45
- gains = []
46
- for b, w in zip(best_scores, whisper_base):
47
- gains.append(f"+{round(((b-w)/w)*100, 1)}%")
48
-
49
- df["Relative Gain (Best)"] = gains
50
-
51
- # Formatting
52
- df["Whisper Baseline"] = df["Whisper Baseline"].astype(str) + "%"
53
- df.at[0, "5K Model Score"] = "58.77%"
54
- df.at[2, "5K Model Score"] = "6.19%"
55
- df.at[0, "10K Model Score"] = "54.67%"
56
- df.at[1, "10K Model Score"] = "24.76%"
57
- df.at[2, "10K Model Score"] = "5.98%"
58
-
59
- return df
 
1
  import pandas as pd
2
 
3
+ # Speaker meta lookup
4
  SPEAKER_META = {
5
  "M05": {"Gender": "Male", "Severity": "Severe", "Dataset": "Torgo"},
6
  "F01": {"Gender": "Female", "Severity": "Severe", "Dataset": "Torgo"},
 
14
  }
15
 
16
  def get_indomain_breakdown():
17
+ # Individual speaker results for In-Domain Torgo
18
  data = {
19
  "Speaker": ["M05", "F01", "M01", "M04", "M02", "M03", "F03", "F04"],
20
  "Severity": ["Severe", "Severe", "Moderate", "Moderate", "Mild", "Mild", "Mild", "Mild"],
21
  "Whisper Baseline": [12.1, 12.6, 32.7, 31.8, 62.1, 58.4, 61.2, 59.1],
22
+ "5K Pure Model": [33.1, 34.2, 47.2, 45.6, 84.5, 81.8, 83.5, 82.8]
23
  }
24
  df = pd.DataFrame(data)
25
+ df["Relative Gain"] = (((df["5K Pure Model"] - df["Whisper Baseline"]) / df["Whisper Baseline"]) * 100).round(1)
 
26
 
27
+ # Formatting
28
+ for col in ["Whisper Baseline", "5K Pure Model"]:
29
  df[col] = df[col].astype(str) + "%"
30
  df["Relative Gain"] = "+" + df["Relative Gain"].astype(str) + "%"
 
31
  return df
32
 
33
  def get_experimental_summary():
34
+ # Comparing 5K and 10K across the three specific research conditions
35
  data = {
36
+ "Condition": ["In-Domain (Seen Torgo)", "LOSO (Unseen Torgo F01)", "Zero-Shot (UA-Speech F02)"],
37
+ "Whisper Baseline": ["41.50%", "12.38%", "4.33%"],
38
+ "5K Pure Model": ["58.77%", "N/A", "6.19%"],
39
+ "10K Triple-Mix": ["54.67%", "24.76%", "5.98%"],
40
+ "Best Relative Gain": ["+41.6%", "+100.0%", "+42.9%"]
41
  }
42
+ return pd.DataFrame(data)