st192011 commited on
Commit
741eb17
·
verified ·
1 Parent(s): 9aa92d2

Update stats_data.py

Browse files
Files changed (1) hide show
  1. stats_data.py +27 -23
stats_data.py CHANGED
@@ -1,6 +1,6 @@
1
  import pandas as pd
2
 
3
- # Metadata for speakers available in the demo
4
  SPEAKER_META = {
5
  "M05": {"Gender": "Male", "Severity": "Severe", "Dataset": "Torgo"},
6
  "F01": {"Gender": "Female", "Severity": "Severe", "Dataset": "Torgo"},
@@ -14,42 +14,46 @@ SPEAKER_META = {
14
  }
15
 
16
  def get_indomain_breakdown():
17
- # Primary Data for Torgo In-Domain (5K Model is the Accuracy Champion)
18
  data = {
19
  "Speaker": ["M05", "F01", "M01", "M04", "M02", "M03", "F03", "F04"],
20
  "Severity": ["Severe", "Severe", "Moderate", "Moderate", "Mild", "Mild", "Mild", "Mild"],
21
- "Whisper Tiny": [12.1, 12.6, 32.7, 31.8, 62.1, 58.4, 61.2, 59.1],
22
- "DSR Lab (5K)": [33.1, 34.2, 47.2, 45.6, 84.5, 81.8, 83.5, 82.8]
23
  }
24
  df = pd.DataFrame(data)
 
 
25
 
26
- # Calculate Gains
27
- df["Absolute Gain (%)"] = (df["DSR Lab (5K)"] - df["Whisper Tiny"]).round(2)
28
- df["Relative Improvement (%)"] = (((df["DSR Lab (5K)"] - df["Whisper Tiny"]) / df["Whisper Tiny"]) * 100).round(1)
29
-
30
- # Format for display
31
- df["Whisper Tiny"] = df["Whisper Tiny"].astype(str) + "%"
32
- df["DSR Lab (5K)"] = df["DSR Lab (5K)"].astype(str) + "%"
33
- df["Absolute Gain (%)"] = "+" + df["Absolute Gain (%)"].astype(str) + "%"
34
- df["Relative Improvement (%)"] = "+" + df["Relative Improvement (%)"].astype(str) + "%"
35
 
36
  return df
37
 
38
  def get_experimental_summary():
39
- # Comparing conditions (In-Domain, LOSO, Zero-Shot)
40
  data = {
41
- "Condition": ["In-Domain (Seen Torgo)", "LOSO (Unseen Torgo F01)", "Zero-Shot (UA-Speech F02)"],
42
- "Whisper Tiny": [41.50, 12.38, 4.33],
43
- "Our Best Score": [58.77, 24.76, 6.19]
 
44
  }
45
  df = pd.DataFrame(data)
 
 
 
 
 
 
46
 
47
- # Calculate Relative Gain
48
- df["Relative Gain"] = (((df["Our Best Score"] - df["Whisper Tiny"]) / df["Whisper Tiny"]) * 100).round(1)
49
 
50
- # Format for display
51
- df["Whisper Tiny"] = df["Whisper Tiny"].astype(str) + "%"
52
- df["Our Best Score"] = df["Our Best Score"].astype(str) + "%"
53
- df["Relative Gain"] = "+" + df["Relative Gain"].astype(str) + "%"
 
 
 
54
 
55
  return df
 
1
  import pandas as pd
2
 
3
+ # Speaker meta remains the same for UI lookups
4
  SPEAKER_META = {
5
  "M05": {"Gender": "Male", "Severity": "Severe", "Dataset": "Torgo"},
6
  "F01": {"Gender": "Female", "Severity": "Severe", "Dataset": "Torgo"},
 
14
  }
15
 
16
  def get_indomain_breakdown():
 
17
  data = {
18
  "Speaker": ["M05", "F01", "M01", "M04", "M02", "M03", "F03", "F04"],
19
  "Severity": ["Severe", "Severe", "Moderate", "Moderate", "Mild", "Mild", "Mild", "Mild"],
20
+ "Whisper Baseline": [12.1, 12.6, 32.7, 31.8, 62.1, 58.4, 61.2, 59.1],
21
+ "Correction Layer (5K)": [33.1, 34.2, 47.2, 45.6, 84.5, 81.8, 83.5, 82.8]
22
  }
23
  df = pd.DataFrame(data)
24
+ # Relative Gain calculation: ((Ours - Whisper) / Whisper) * 100
25
+ df["Relative Gain"] = (((df["Correction Layer (5K)"] - df["Whisper Baseline"]) / df["Whisper Baseline"]) * 100).round(1)
26
 
27
+ # Add percentage signs for display
28
+ for col in ["Whisper Baseline", "Correction Layer (5K)"]:
29
+ df[col] = df[col].astype(str) + "%"
30
+ df["Relative Gain"] = "+" + df["Relative Gain"].astype(str) + "%"
 
 
 
 
 
31
 
32
  return df
33
 
34
  def get_experimental_summary():
 
35
  data = {
36
+ "Experiment Condition": ["In-Domain (Seen Torgo)", "LOSO (Unseen Torgo F01)", "Zero-Shot (UA-Speech F02)"],
37
+ "Whisper Baseline": [41.50, 12.38, 4.33],
38
+ "5K Model Score": [58.77, "N/A", 6.19],
39
+ "10K Model Score": [54.67, 24.76, 5.98]
40
  }
41
  df = pd.DataFrame(data)
42
+ # Calculate gain based on the best performing model in that row
43
+ best_scores = [58.77, 24.76, 6.19]
44
+ whisper_base = [41.50, 12.38, 4.33]
45
+ gains = []
46
+ for b, w in zip(best_scores, whisper_base):
47
+ gains.append(f"+{round(((b-w)/w)*100, 1)}%")
48
 
49
+ df["Relative Gain (Best)"] = gains
 
50
 
51
+ # Formatting
52
+ df["Whisper Baseline"] = df["Whisper Baseline"].astype(str) + "%"
53
+ df.at[0, "5K Model Score"] = "58.77%"
54
+ df.at[2, "5K Model Score"] = "6.19%"
55
+ df.at[0, "10K Model Score"] = "54.67%"
56
+ df.at[1, "10K Model Score"] = "24.76%"
57
+ df.at[2, "10K Model Score"] = "5.98%"
58
 
59
  return df