Spaces:
Running
Running
Commit
·
e48391e
1
Parent(s):
7540444
update whisperkit version and release
Browse files
.github/scripts/check_dataset_update.py
CHANGED
|
@@ -27,8 +27,8 @@ def check_dataset_updates(dataset_id):
|
|
| 27 |
{
|
| 28 |
"last_modified": last_modified,
|
| 29 |
"sha": current_sha,
|
| 30 |
-
"releases": ["
|
| 31 |
-
"whisperkit_version": "0.9.
|
| 32 |
},
|
| 33 |
f,
|
| 34 |
)
|
|
|
|
| 27 |
{
|
| 28 |
"last_modified": last_modified,
|
| 29 |
"sha": current_sha,
|
| 30 |
+
"releases": ["5254d82"],
|
| 31 |
+
"whisperkit_version": "0.9.4",
|
| 32 |
},
|
| 33 |
f,
|
| 34 |
)
|
.github/scripts/process_report.py
ADDED
|
@@ -0,0 +1,503 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
import re
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
from typing import Tuple
|
| 6 |
+
|
| 7 |
+
import pandas as pd
|
| 8 |
+
from bs4 import BeautifulSoup
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def format_datetime(dt_str: str) -> str:
|
| 12 |
+
"""
|
| 13 |
+
Format a datetime string for display.
|
| 14 |
+
|
| 15 |
+
:param dt_str: String representing a datetime in ISO format
|
| 16 |
+
:return: Formatted datetime string
|
| 17 |
+
"""
|
| 18 |
+
return dt_str.replace("T", " ").split("+")[0]
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def read_json_line_by_line(file_path):
|
| 22 |
+
"""
|
| 23 |
+
Read a JSON file line by line, parsing each line as a separate JSON object.
|
| 24 |
+
|
| 25 |
+
:param file_path: Path to the JSON file
|
| 26 |
+
:return: List of parsed JSON objects
|
| 27 |
+
|
| 28 |
+
This function is useful for reading large JSON files that contain one JSON object
|
| 29 |
+
per line. It handles JSON parsing errors gracefully, skipping invalid lines.
|
| 30 |
+
"""
|
| 31 |
+
data = []
|
| 32 |
+
with open(file_path, "r") as f:
|
| 33 |
+
for line in f:
|
| 34 |
+
try:
|
| 35 |
+
item = json.loads(line.strip())
|
| 36 |
+
data.append(item)
|
| 37 |
+
except json.JSONDecodeError:
|
| 38 |
+
print(f"Skipping invalid JSON in {file_path}: {line}")
|
| 39 |
+
return data
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def calculate_change(new: float, old: float, metric_name: str) -> Tuple[float, str]:
|
| 43 |
+
"""Calculate percentage change and return with appropriate emoji."""
|
| 44 |
+
pct_change = new - old
|
| 45 |
+
if abs(pct_change) < 1:
|
| 46 |
+
emoji = "↔️"
|
| 47 |
+
elif pct_change > 0:
|
| 48 |
+
emoji = "🟢" if "wer" not in metric_name.lower() else "❌"
|
| 49 |
+
else:
|
| 50 |
+
emoji = "❌" if "wer" not in metric_name.lower() else "🟢"
|
| 51 |
+
|
| 52 |
+
return (pct_change, emoji)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def has_changes(config, prev_dict, curr_dict):
|
| 56 |
+
"""Check if any metrics have changed."""
|
| 57 |
+
curr = curr_dict[config]
|
| 58 |
+
prev = prev_dict[config]
|
| 59 |
+
|
| 60 |
+
metrics = ["speed", "tokens_per_second", "average_wer", "qoi"]
|
| 61 |
+
for key in metrics:
|
| 62 |
+
if key in curr and key in prev:
|
| 63 |
+
curr_val = curr[key]
|
| 64 |
+
prev_val = prev[key]
|
| 65 |
+
if abs(curr_val - prev_val) >= 1: # 1% threshold
|
| 66 |
+
return True
|
| 67 |
+
return False
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def format_metrics_table(config, prev_dict, curr_dict):
|
| 71 |
+
"""Format metrics into a table string."""
|
| 72 |
+
curr = curr_dict[config]
|
| 73 |
+
prev = prev_dict[config]
|
| 74 |
+
|
| 75 |
+
metrics = [
|
| 76 |
+
("Speed", "speed"),
|
| 77 |
+
("Tok/s", "tokens_per_second"),
|
| 78 |
+
("WER", "average_wer"),
|
| 79 |
+
("QoI", "qoi"),
|
| 80 |
+
]
|
| 81 |
+
|
| 82 |
+
table = "```\nMetric Previous Current Change\n--------------------------------\n"
|
| 83 |
+
for metric_name, key in metrics:
|
| 84 |
+
if key in curr and key in prev:
|
| 85 |
+
curr_val = curr[key]
|
| 86 |
+
prev_val = prev[key]
|
| 87 |
+
pct_change, _ = calculate_change(curr_val, prev_val, metric_name)
|
| 88 |
+
if abs(pct_change) >= 1: # Only show metrics with changes
|
| 89 |
+
table += f"{metric_name:<9} {prev_val:<11.2f} {curr_val:<10.2f} {pct_change:.2f}\n"
|
| 90 |
+
table += "```"
|
| 91 |
+
return table
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def extract_status_and_os(cell_value):
|
| 95 |
+
"""
|
| 96 |
+
Extract status and OS versions from a cell, handling both HTML and plain text.
|
| 97 |
+
Returns list of tuples: [(status, os_version), ...]
|
| 98 |
+
"""
|
| 99 |
+
results = []
|
| 100 |
+
cell_value = str(cell_value)
|
| 101 |
+
|
| 102 |
+
# First, handle the case where there's no HTML tags
|
| 103 |
+
if cell_value == "Not Supported":
|
| 104 |
+
return results
|
| 105 |
+
|
| 106 |
+
# Split the cell into parts (first element and subsequent <p> elements)
|
| 107 |
+
parts = cell_value.split("<p>")
|
| 108 |
+
|
| 109 |
+
for part in parts:
|
| 110 |
+
part = part.strip("</p>")
|
| 111 |
+
if not part:
|
| 112 |
+
continue
|
| 113 |
+
|
| 114 |
+
# Check if part contains warning symbol
|
| 115 |
+
if "⚠️" in part:
|
| 116 |
+
# Parse HTML to extract OS version from anchor tag
|
| 117 |
+
soup = BeautifulSoup(part, "html.parser")
|
| 118 |
+
# Find text after href that contains OS version
|
| 119 |
+
text = soup.get_text()
|
| 120 |
+
os_match = re.search(r"(iOS|iPadOS|macOS)\s+[\d.]+", text)
|
| 121 |
+
if os_match:
|
| 122 |
+
os_version = os_match.group(0)
|
| 123 |
+
results.append(("⚠️", os_version))
|
| 124 |
+
else:
|
| 125 |
+
# For success cases, OS version is directly in the text
|
| 126 |
+
os_match = re.search(r"(iOS|iPadOS|macOS)\s+[\d.]+", part)
|
| 127 |
+
if os_match:
|
| 128 |
+
os_version = os_match.group(0)
|
| 129 |
+
results.append(("✅", os_version))
|
| 130 |
+
|
| 131 |
+
return results
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
def escape_string(s: str) -> str:
|
| 135 |
+
"""Escape a string to be used as a value in JSON."""
|
| 136 |
+
return (
|
| 137 |
+
s.replace("\\", "\\\\")
|
| 138 |
+
.replace('"', '\\"')
|
| 139 |
+
.replace("\n", "\\n")
|
| 140 |
+
.replace("\r", "\\r")
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def analyze_support_changes(prev_csv, curr_csv):
|
| 145 |
+
"""Analyze support changes between CSV files."""
|
| 146 |
+
# Read CSV files
|
| 147 |
+
prev_df = pd.read_csv(prev_csv)
|
| 148 |
+
prev_df.set_index(prev_df.columns[0], inplace=True)
|
| 149 |
+
|
| 150 |
+
curr_df = pd.read_csv(curr_csv)
|
| 151 |
+
curr_df.set_index(curr_df.columns[0], inplace=True)
|
| 152 |
+
|
| 153 |
+
# Get device lists (excluding first column which is the index)
|
| 154 |
+
prev_devices = sorted(prev_df.columns[1:])
|
| 155 |
+
curr_devices = sorted(curr_df.columns[1:])
|
| 156 |
+
|
| 157 |
+
# Calculate device ratio
|
| 158 |
+
device_ratio = len(curr_devices) / len(prev_devices) if prev_devices else 1
|
| 159 |
+
needs_alert = device_ratio < 0.9 # Alert if less than 90% of previous devices
|
| 160 |
+
|
| 161 |
+
# Convert to dictionary for easier comparison
|
| 162 |
+
prev_status = {}
|
| 163 |
+
curr_status = {}
|
| 164 |
+
|
| 165 |
+
# Process previous data
|
| 166 |
+
for idx in range(len(prev_df)):
|
| 167 |
+
model = prev_df.index[idx]
|
| 168 |
+
for col_idx in range(1, len(prev_df.columns)):
|
| 169 |
+
cell_value = prev_df.iloc[idx, col_idx]
|
| 170 |
+
device = prev_df.columns[col_idx]
|
| 171 |
+
statuses = extract_status_and_os(cell_value)
|
| 172 |
+
for status, os_version in statuses:
|
| 173 |
+
prev_status[(model, device, os_version)] = status
|
| 174 |
+
|
| 175 |
+
# Process current data and track new configurations
|
| 176 |
+
new_configs = []
|
| 177 |
+
for idx in range(len(curr_df)):
|
| 178 |
+
model = curr_df.index[idx]
|
| 179 |
+
for col_idx in range(1, len(curr_df.columns)):
|
| 180 |
+
cell_value = curr_df.iloc[idx, col_idx]
|
| 181 |
+
device = curr_df.columns[col_idx]
|
| 182 |
+
statuses = extract_status_and_os(cell_value)
|
| 183 |
+
for status, os_version in statuses:
|
| 184 |
+
curr_status[(model, device, os_version)] = status
|
| 185 |
+
# Check if this is a new configuration
|
| 186 |
+
if (model, device, os_version) not in prev_status:
|
| 187 |
+
new_configs.append((model, device, os_version))
|
| 188 |
+
|
| 189 |
+
# Find changes
|
| 190 |
+
fixed_errors = []
|
| 191 |
+
new_errors = []
|
| 192 |
+
|
| 193 |
+
# Check all configurations that exist in both datasets
|
| 194 |
+
common_configs = set(prev_status.keys()) & set(curr_status.keys())
|
| 195 |
+
for config in common_configs:
|
| 196 |
+
model, device, os_version = config
|
| 197 |
+
if prev_status[config] == "⚠️" and curr_status[config] == "✅":
|
| 198 |
+
fixed_errors.append((model, device, os_version))
|
| 199 |
+
elif prev_status[config] == "✅" and curr_status[config] == "⚠️":
|
| 200 |
+
new_errors.append((model, device, os_version))
|
| 201 |
+
|
| 202 |
+
return fixed_errors, new_errors, new_configs, needs_alert
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
def generate_report():
|
| 206 |
+
# Load current and previous data
|
| 207 |
+
prev_perf_data = read_json_line_by_line("report_data/performance_data.json")
|
| 208 |
+
curr_perf_data = read_json_line_by_line("dashboard_data/performance_data.json")
|
| 209 |
+
|
| 210 |
+
prev_dict = {(d["model"], d["device"], d["os"]): d for d in prev_perf_data}
|
| 211 |
+
curr_dict = {(d["model"], d["device"], d["os"]): d for d in curr_perf_data}
|
| 212 |
+
common_configs = set(curr_dict.keys()) & set(prev_dict.keys())
|
| 213 |
+
|
| 214 |
+
# Load version data
|
| 215 |
+
with open("report_data/version.json", "r") as f:
|
| 216 |
+
prev_version = json.load(f)
|
| 217 |
+
with open("dashboard_data/version.json", "r") as f:
|
| 218 |
+
curr_version = json.load(f)
|
| 219 |
+
|
| 220 |
+
prev_releases = set(prev_version.get("releases", []))
|
| 221 |
+
curr_releases = set(curr_version.get("releases", []))
|
| 222 |
+
new_releases = curr_releases - prev_releases
|
| 223 |
+
removed_releases = prev_releases - curr_releases
|
| 224 |
+
|
| 225 |
+
# Track metrics
|
| 226 |
+
total_configs = len(common_configs)
|
| 227 |
+
improved_metrics = {"speed": 0, "tokens_per_second": 0, "average_wer": 0, "qoi": 0}
|
| 228 |
+
regressed_metrics = {"speed": 0, "tokens_per_second": 0, "average_wer": 0, "qoi": 0}
|
| 229 |
+
new_data_points = len(set(curr_dict.keys()) - set(prev_dict.keys()))
|
| 230 |
+
|
| 231 |
+
# Analyze support changes
|
| 232 |
+
fixed_errors, new_errors, new_configs, needs_alert = analyze_support_changes(
|
| 233 |
+
"report_data/support_data.csv", "dashboard_data/support_data.csv"
|
| 234 |
+
)
|
| 235 |
+
|
| 236 |
+
# Create Slack blocks
|
| 237 |
+
current_time = datetime.now().strftime("%B %-d, %Y %H:%M:%S")
|
| 238 |
+
prev_release_tag, curr_release_tag = (
|
| 239 |
+
prev_version["whisperkit_version"],
|
| 240 |
+
curr_version["whisperkit_version"],
|
| 241 |
+
)
|
| 242 |
+
slack_blocks = {
|
| 243 |
+
"blocks": [
|
| 244 |
+
{
|
| 245 |
+
"type": "header",
|
| 246 |
+
"text": {
|
| 247 |
+
"type": "plain_text",
|
| 248 |
+
"text": "🔔 WhisperKit Dataset Update Report 🔔",
|
| 249 |
+
"emoji": True,
|
| 250 |
+
},
|
| 251 |
+
},
|
| 252 |
+
{
|
| 253 |
+
"type": "context",
|
| 254 |
+
"elements": [{"text": f"*{current_time}*", "type": "mrkdwn"}],
|
| 255 |
+
},
|
| 256 |
+
{"type": "divider"},
|
| 257 |
+
{
|
| 258 |
+
"type": "section",
|
| 259 |
+
"text": {"type": "mrkdwn", "text": "ℹ️ *CURRENT VERSION INFO* ℹ️"},
|
| 260 |
+
},
|
| 261 |
+
{
|
| 262 |
+
"type": "section",
|
| 263 |
+
"text": {
|
| 264 |
+
"type": "mrkdwn",
|
| 265 |
+
"text": f"• *Last Modified:* `{format_datetime(curr_version['last_modified'])}`",
|
| 266 |
+
},
|
| 267 |
+
},
|
| 268 |
+
{
|
| 269 |
+
"type": "section",
|
| 270 |
+
"text": {
|
| 271 |
+
"type": "mrkdwn",
|
| 272 |
+
"text": f"• *Dataset SHA:* `{curr_version['sha']}`",
|
| 273 |
+
},
|
| 274 |
+
},
|
| 275 |
+
{
|
| 276 |
+
"type": "section",
|
| 277 |
+
"text": {
|
| 278 |
+
"type": "mrkdwn",
|
| 279 |
+
"text": f"• *Current Releases:* {', '.join(f'`{r}`' for r in curr_version['releases'])}",
|
| 280 |
+
},
|
| 281 |
+
},
|
| 282 |
+
{
|
| 283 |
+
"type": "section",
|
| 284 |
+
"text": {
|
| 285 |
+
"type": "mrkdwn",
|
| 286 |
+
"text": f"• *Current Release Tag:* `{curr_release_tag}`",
|
| 287 |
+
},
|
| 288 |
+
},
|
| 289 |
+
{"type": "divider"},
|
| 290 |
+
{
|
| 291 |
+
"type": "section",
|
| 292 |
+
"text": {
|
| 293 |
+
"type": "mrkdwn",
|
| 294 |
+
"text": "🔄 *SUMMARY OF PERFORMANCE UPDATES* 🔄",
|
| 295 |
+
},
|
| 296 |
+
},
|
| 297 |
+
]
|
| 298 |
+
}
|
| 299 |
+
|
| 300 |
+
# Add release information
|
| 301 |
+
slack_blocks["blocks"].extend(
|
| 302 |
+
[
|
| 303 |
+
{
|
| 304 |
+
"type": "section",
|
| 305 |
+
"text": {
|
| 306 |
+
"type": "mrkdwn",
|
| 307 |
+
"text": f"• *Added Releases:* {', '.join(sorted(new_releases)) if new_releases else 'None'}",
|
| 308 |
+
},
|
| 309 |
+
},
|
| 310 |
+
{
|
| 311 |
+
"type": "section",
|
| 312 |
+
"text": {
|
| 313 |
+
"type": "mrkdwn",
|
| 314 |
+
"text": f"• *Removed Releases:* {', '.join(sorted(removed_releases)) if removed_releases else 'None'}",
|
| 315 |
+
},
|
| 316 |
+
},
|
| 317 |
+
]
|
| 318 |
+
)
|
| 319 |
+
if prev_release_tag != curr_release_tag:
|
| 320 |
+
slack_blocks["blocks"].append(
|
| 321 |
+
{
|
| 322 |
+
"type": "section",
|
| 323 |
+
"text": {
|
| 324 |
+
"type": "mrkdwn",
|
| 325 |
+
"text": f"• *Release Tag Change:* `{prev_release_tag}` → `{curr_release_tag}`",
|
| 326 |
+
},
|
| 327 |
+
}
|
| 328 |
+
)
|
| 329 |
+
slack_blocks["blocks"].extend(
|
| 330 |
+
[
|
| 331 |
+
{
|
| 332 |
+
"type": "section",
|
| 333 |
+
"text": {
|
| 334 |
+
"type": "mrkdwn",
|
| 335 |
+
"text": "\n",
|
| 336 |
+
},
|
| 337 |
+
},
|
| 338 |
+
{
|
| 339 |
+
"type": "section",
|
| 340 |
+
"text": {
|
| 341 |
+
"type": "mrkdwn",
|
| 342 |
+
"text": f"• *New Data Points:* `{new_data_points}` new configurations",
|
| 343 |
+
},
|
| 344 |
+
},
|
| 345 |
+
{
|
| 346 |
+
"type": "section",
|
| 347 |
+
"text": {
|
| 348 |
+
"type": "mrkdwn",
|
| 349 |
+
"text": "\n",
|
| 350 |
+
},
|
| 351 |
+
},
|
| 352 |
+
]
|
| 353 |
+
)
|
| 354 |
+
|
| 355 |
+
# Add metrics summary
|
| 356 |
+
for metric_name, key in [
|
| 357 |
+
("Speed", "speed"),
|
| 358 |
+
("Tok/s", "tokens_per_second"),
|
| 359 |
+
("WER", "average_wer"),
|
| 360 |
+
("QoI", "qoi"),
|
| 361 |
+
]:
|
| 362 |
+
slack_blocks["blocks"].append(
|
| 363 |
+
{
|
| 364 |
+
"type": "section",
|
| 365 |
+
"text": {
|
| 366 |
+
"type": "mrkdwn",
|
| 367 |
+
"text": f"• *{metric_name}:* `{improved_metrics[key]}` improved, `{regressed_metrics[key]}` regressed",
|
| 368 |
+
},
|
| 369 |
+
}
|
| 370 |
+
)
|
| 371 |
+
|
| 372 |
+
# Add support changes section
|
| 373 |
+
if fixed_errors or new_errors or new_configs:
|
| 374 |
+
slack_blocks["blocks"].extend(
|
| 375 |
+
[
|
| 376 |
+
{"type": "divider"},
|
| 377 |
+
{
|
| 378 |
+
"type": "section",
|
| 379 |
+
"text": {"type": "mrkdwn", "text": "📱 *DEVICE SUPPORT CHANGES* 📱"},
|
| 380 |
+
},
|
| 381 |
+
]
|
| 382 |
+
)
|
| 383 |
+
|
| 384 |
+
if fixed_errors:
|
| 385 |
+
slack_blocks["blocks"].extend(
|
| 386 |
+
[
|
| 387 |
+
{
|
| 388 |
+
"type": "section",
|
| 389 |
+
"text": {
|
| 390 |
+
"type": "mrkdwn",
|
| 391 |
+
"text": "*Successful Configurations That Override Previous Failures*",
|
| 392 |
+
},
|
| 393 |
+
}
|
| 394 |
+
]
|
| 395 |
+
)
|
| 396 |
+
for model, device, os_version in sorted(fixed_errors):
|
| 397 |
+
slack_blocks["blocks"].append(
|
| 398 |
+
{
|
| 399 |
+
"type": "section",
|
| 400 |
+
"text": {
|
| 401 |
+
"type": "mrkdwn",
|
| 402 |
+
"text": f"• {model} on {device} ({os_version})",
|
| 403 |
+
},
|
| 404 |
+
}
|
| 405 |
+
)
|
| 406 |
+
|
| 407 |
+
if new_errors:
|
| 408 |
+
slack_blocks["blocks"].extend(
|
| 409 |
+
[
|
| 410 |
+
{
|
| 411 |
+
"type": "section",
|
| 412 |
+
"text": {
|
| 413 |
+
"type": "mrkdwn",
|
| 414 |
+
"text": "*Failed Configurations That Override Previous Successes*",
|
| 415 |
+
},
|
| 416 |
+
}
|
| 417 |
+
]
|
| 418 |
+
)
|
| 419 |
+
for model, device, os_version in sorted(new_errors):
|
| 420 |
+
slack_blocks["blocks"].append(
|
| 421 |
+
{
|
| 422 |
+
"type": "section",
|
| 423 |
+
"text": {
|
| 424 |
+
"type": "mrkdwn",
|
| 425 |
+
"text": f"• {model} on {device} ({os_version})",
|
| 426 |
+
},
|
| 427 |
+
}
|
| 428 |
+
)
|
| 429 |
+
|
| 430 |
+
if new_configs:
|
| 431 |
+
slack_blocks["blocks"].extend(
|
| 432 |
+
[
|
| 433 |
+
{
|
| 434 |
+
"type": "section",
|
| 435 |
+
"text": {
|
| 436 |
+
"type": "mrkdwn",
|
| 437 |
+
"text": "*Newly Tested Configurations*",
|
| 438 |
+
},
|
| 439 |
+
}
|
| 440 |
+
]
|
| 441 |
+
)
|
| 442 |
+
for model, device, os_version in sorted(new_configs):
|
| 443 |
+
slack_blocks["blocks"].append(
|
| 444 |
+
{
|
| 445 |
+
"type": "section",
|
| 446 |
+
"text": {
|
| 447 |
+
"type": "mrkdwn",
|
| 448 |
+
"text": f"• {model} on {device} ({os_version})",
|
| 449 |
+
},
|
| 450 |
+
}
|
| 451 |
+
)
|
| 452 |
+
|
| 453 |
+
# Add alert if significant decrease in device count
|
| 454 |
+
if needs_alert:
|
| 455 |
+
slack_blocks["blocks"].append(
|
| 456 |
+
{
|
| 457 |
+
"type": "section",
|
| 458 |
+
"text": {
|
| 459 |
+
"type": "mrkdwn",
|
| 460 |
+
"text": "⚠️ *ALERT:* Current device count is less than 90% of previous version's device count, test on more devices before updating the benchmark website!",
|
| 461 |
+
},
|
| 462 |
+
}
|
| 463 |
+
)
|
| 464 |
+
|
| 465 |
+
# Create performance text as a single mrkdwn string
|
| 466 |
+
if common_configs:
|
| 467 |
+
performance_text = "💡 *Performance Updates* 💡\n\n"
|
| 468 |
+
|
| 469 |
+
# Group by model for better organization
|
| 470 |
+
models = sorted(set(model for model, _, _ in common_configs))
|
| 471 |
+
|
| 472 |
+
for model in models:
|
| 473 |
+
model_configs = sorted([cfg for cfg in common_configs if cfg[0] == model])
|
| 474 |
+
|
| 475 |
+
for config in model_configs:
|
| 476 |
+
device_info = f"*{model}* ({config[2]})"
|
| 477 |
+
|
| 478 |
+
if not has_changes(config, prev_dict, curr_dict):
|
| 479 |
+
# If no changes, just add the model with a checkmark
|
| 480 |
+
performance_text += f"{device_info} ✅\n\n"
|
| 481 |
+
else:
|
| 482 |
+
# If there are changes, show the metrics
|
| 483 |
+
performance_text += f"{device_info}\n"
|
| 484 |
+
performance_text += format_metrics_table(
|
| 485 |
+
config, prev_dict, curr_dict
|
| 486 |
+
)
|
| 487 |
+
performance_text += "\n\n"
|
| 488 |
+
|
| 489 |
+
# Write to GITHUB_OUTPUT
|
| 490 |
+
github_output = os.getenv("GITHUB_OUTPUT")
|
| 491 |
+
if github_output:
|
| 492 |
+
with open(github_output, "a") as f:
|
| 493 |
+
f.write("slack_message_payload<<EOF\n")
|
| 494 |
+
json.dump(slack_blocks, f, indent=2)
|
| 495 |
+
f.write("\nEOF\n")
|
| 496 |
+
|
| 497 |
+
with open(github_output, "a") as f:
|
| 498 |
+
escaped_text = escape_string(performance_text)
|
| 499 |
+
print(f"performance_message={escaped_text}", file=f)
|
| 500 |
+
|
| 501 |
+
|
| 502 |
+
if __name__ == "__main__":
|
| 503 |
+
generate_report()
|
.github/workflows/dataset_update.yml
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
name: WhisperKit
|
| 2 |
|
| 3 |
on:
|
| 4 |
schedule:
|
|
@@ -29,6 +29,28 @@ jobs:
|
|
| 29 |
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 30 |
run: python .github/scripts/check_dataset_update.py
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
- name: Install full requirements
|
| 33 |
if: steps.check_updates.outputs.has_updates == 'true'
|
| 34 |
run: |
|
|
@@ -54,4 +76,68 @@ jobs:
|
|
| 54 |
git add .
|
| 55 |
git commit -m "update dataset files" || echo "No changes to commit"
|
| 56 |
git push
|
| 57 |
-
git push https://HF_USERNAME:$HF_TOKEN@huggingface.co/spaces/argmaxinc/whisperkit-benchmarks
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: WhisperKit Evals Dataset Update Workflow
|
| 2 |
|
| 3 |
on:
|
| 4 |
schedule:
|
|
|
|
| 29 |
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 30 |
run: python .github/scripts/check_dataset_update.py
|
| 31 |
|
| 32 |
+
- name: Save workflow data
|
| 33 |
+
run: |
|
| 34 |
+
mkdir -p ./workflow_data
|
| 35 |
+
echo "${{ steps.check_updates.outputs.has_updates }}" > ./workflow_data/has_updates.txt
|
| 36 |
+
|
| 37 |
+
- name: Upload workflow data
|
| 38 |
+
uses: actions/upload-artifact@v4
|
| 39 |
+
with:
|
| 40 |
+
name: workflow_data
|
| 41 |
+
path: workflow_data/
|
| 42 |
+
overwrite: true
|
| 43 |
+
|
| 44 |
+
- name: Upload relevant dashboard data for report generation
|
| 45 |
+
if: steps.check_updates.outputs.has_updates == 'true'
|
| 46 |
+
uses: actions/upload-artifact@v4
|
| 47 |
+
with:
|
| 48 |
+
name: report_data
|
| 49 |
+
path: |
|
| 50 |
+
dashboard_data/performance_data.json
|
| 51 |
+
dashboard_data/support_data.csv
|
| 52 |
+
dashboard_data/version.json
|
| 53 |
+
|
| 54 |
- name: Install full requirements
|
| 55 |
if: steps.check_updates.outputs.has_updates == 'true'
|
| 56 |
run: |
|
|
|
|
| 76 |
git add .
|
| 77 |
git commit -m "update dataset files" || echo "No changes to commit"
|
| 78 |
git push
|
| 79 |
+
git push https://HF_USERNAME:$HF_TOKEN@huggingface.co/spaces/argmaxinc/whisperkit-benchmarks-internal
|
| 80 |
+
|
| 81 |
+
generate-report:
|
| 82 |
+
needs: update-datasets
|
| 83 |
+
runs-on: ubuntu-latest
|
| 84 |
+
steps:
|
| 85 |
+
- uses: actions/checkout@v4
|
| 86 |
+
|
| 87 |
+
- name: Set up Python
|
| 88 |
+
uses: actions/setup-python@v5
|
| 89 |
+
with:
|
| 90 |
+
python-version: "3.11"
|
| 91 |
+
|
| 92 |
+
- name: Download workflow data
|
| 93 |
+
uses: actions/download-artifact@v4
|
| 94 |
+
with:
|
| 95 |
+
name: workflow_data
|
| 96 |
+
path: workflow_data
|
| 97 |
+
|
| 98 |
+
- name: Check updates status
|
| 99 |
+
id: check
|
| 100 |
+
run: |
|
| 101 |
+
HAS_UPDATES=$(cat workflow_data/has_updates.txt)
|
| 102 |
+
echo "has_updates=$HAS_UPDATES" >> $GITHUB_OUTPUT
|
| 103 |
+
|
| 104 |
+
- name: Download report data
|
| 105 |
+
if: steps.check.outputs.has_updates == 'true'
|
| 106 |
+
uses: actions/download-artifact@v4
|
| 107 |
+
with:
|
| 108 |
+
name: report_data
|
| 109 |
+
path: report_data
|
| 110 |
+
|
| 111 |
+
- name: Install dependencies
|
| 112 |
+
run: |
|
| 113 |
+
python -m pip install --upgrade pip
|
| 114 |
+
pip install pandas beautifulsoup4
|
| 115 |
+
|
| 116 |
+
- name: Process report
|
| 117 |
+
if: steps.check.outputs.has_updates == 'true'
|
| 118 |
+
id: report
|
| 119 |
+
run: python .github/scripts/process_report.py
|
| 120 |
+
|
| 121 |
+
- name: Post to a Slack Channel
|
| 122 |
+
if: steps.check.outputs.has_updates == 'true'
|
| 123 |
+
id: slack_message
|
| 124 |
+
uses: slackapi/slack-github-action@v1.27.0
|
| 125 |
+
with:
|
| 126 |
+
channel-id: ${{ secrets.SLACK_CHANNEL_ID }}
|
| 127 |
+
payload: |
|
| 128 |
+
${{ steps.report.outputs.slack_message_payload }}
|
| 129 |
+
env:
|
| 130 |
+
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
| 131 |
+
|
| 132 |
+
- name: Send Thread Message
|
| 133 |
+
if: steps.check.outputs.has_updates == 'true'
|
| 134 |
+
uses: slackapi/slack-github-action@v1.27.0
|
| 135 |
+
with:
|
| 136 |
+
channel-id: ${{ secrets.SLACK_CHANNEL_ID }}
|
| 137 |
+
payload: |
|
| 138 |
+
{
|
| 139 |
+
"thread_ts": "${{ steps.slack_message.outputs.ts }}",
|
| 140 |
+
"text": "${{ steps.report.outputs.performance_message }}"
|
| 141 |
+
}
|
| 142 |
+
env:
|
| 143 |
+
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|