| import glob |
| import sys |
|
|
| import pandas as pd |
| from huggingface_hub import hf_hub_download, upload_file |
| from huggingface_hub.utils._errors import EntryNotFoundError |
|
|
|
|
| sys.path.append(".") |
| from utils import BASE_PATH, FINAL_CSV_FILE, GITHUB_SHA, REPO_ID, collate_csv |
|
|
|
|
| def has_previous_benchmark() -> str: |
| csv_path = None |
| try: |
| csv_path = hf_hub_download(repo_id=REPO_ID, repo_type="dataset", filename=FINAL_CSV_FILE) |
| except EntryNotFoundError: |
| csv_path = None |
| return csv_path |
|
|
|
|
| def filter_float(value): |
| if isinstance(value, str): |
| return float(value.split()[0]) |
| return value |
|
|
|
|
| def push_to_hf_dataset(): |
| all_csvs = sorted(glob.glob(f"{BASE_PATH}/*.csv")) |
| collate_csv(all_csvs, FINAL_CSV_FILE) |
|
|
| |
| csv_path = has_previous_benchmark() |
| if csv_path is not None: |
| current_results = pd.read_csv(FINAL_CSV_FILE) |
| previous_results = pd.read_csv(csv_path) |
|
|
| numeric_columns = current_results.select_dtypes(include=["float64", "int64"]).columns |
| numeric_columns = [ |
| c for c in numeric_columns if c not in ["batch_size", "num_inference_steps", "actual_gpu_memory (gbs)"] |
| ] |
|
|
| for column in numeric_columns: |
| previous_results[column] = previous_results[column].map(lambda x: filter_float(x)) |
|
|
| |
| current_results[column] = current_results[column].astype(float) |
| previous_results[column] = previous_results[column].astype(float) |
| percent_change = ((current_results[column] - previous_results[column]) / previous_results[column]) * 100 |
|
|
| |
| current_results[column] = current_results[column].map(str) + percent_change.map( |
| lambda x: f" ({'+' if x > 0 else ''}{x:.2f}%)" |
| ) |
| |
| current_results[column] = current_results[column].map(lambda x: x.replace(" (nan%)", "")) |
|
|
| |
| current_results.to_csv(FINAL_CSV_FILE, index=False) |
|
|
| commit_message = f"upload from sha: {GITHUB_SHA}" if GITHUB_SHA is not None else "upload benchmark results" |
| upload_file( |
| repo_id=REPO_ID, |
| path_in_repo=FINAL_CSV_FILE, |
| path_or_fileobj=FINAL_CSV_FILE, |
| repo_type="dataset", |
| commit_message=commit_message, |
| ) |
|
|
|
|
| if __name__ == "__main__": |
| push_to_hf_dataset() |
|
|