|
|
import argparse |
|
|
import os |
|
|
import sys |
|
|
|
|
|
import gpustat |
|
|
import pandas as pd |
|
|
import psycopg2 |
|
|
import psycopg2.extras |
|
|
from psycopg2.extensions import register_adapter |
|
|
from psycopg2.extras import Json |
|
|
|
|
|
|
|
|
register_adapter(dict, Json) |
|
|
|
|
|
FINAL_CSV_FILENAME = "collated_results.csv" |
|
|
|
|
|
BENCHMARKS_TABLE_NAME = "benchmarks" |
|
|
MEASUREMENTS_TABLE_NAME = "model_measurements" |
|
|
|
|
|
|
|
|
def _init_benchmark(conn, branch, commit_id, commit_msg): |
|
|
gpu_stats = gpustat.GPUStatCollection.new_query() |
|
|
metadata = {"gpu_name": gpu_stats[0]["name"]} |
|
|
repository = "huggingface/diffusers" |
|
|
with conn.cursor() as cur: |
|
|
cur.execute( |
|
|
f"INSERT INTO {BENCHMARKS_TABLE_NAME} (repository, branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s, %s) RETURNING benchmark_id", |
|
|
(repository, branch, commit_id, commit_msg, metadata), |
|
|
) |
|
|
benchmark_id = cur.fetchone()[0] |
|
|
print(f"Initialised benchmark #{benchmark_id}") |
|
|
return benchmark_id |
|
|
|
|
|
|
|
|
def parse_args(): |
|
|
parser = argparse.ArgumentParser() |
|
|
parser.add_argument( |
|
|
"branch", |
|
|
type=str, |
|
|
help="The branch name on which the benchmarking is performed.", |
|
|
) |
|
|
|
|
|
parser.add_argument( |
|
|
"commit_id", |
|
|
type=str, |
|
|
help="The commit hash on which the benchmarking is performed.", |
|
|
) |
|
|
|
|
|
parser.add_argument( |
|
|
"commit_msg", |
|
|
type=str, |
|
|
help="The commit message associated with the commit, truncated to 70 characters.", |
|
|
) |
|
|
args = parser.parse_args() |
|
|
return args |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
args = parse_args() |
|
|
try: |
|
|
conn = psycopg2.connect( |
|
|
host=os.getenv("PGHOST"), |
|
|
database=os.getenv("PGDATABASE"), |
|
|
user=os.getenv("PGUSER"), |
|
|
password=os.getenv("PGPASSWORD"), |
|
|
) |
|
|
print("DB connection established successfully.") |
|
|
except Exception as e: |
|
|
print(f"Problem during DB init: {e}") |
|
|
sys.exit(1) |
|
|
|
|
|
try: |
|
|
benchmark_id = _init_benchmark( |
|
|
conn=conn, |
|
|
branch=args.branch, |
|
|
commit_id=args.commit_id, |
|
|
commit_msg=args.commit_msg, |
|
|
) |
|
|
except Exception as e: |
|
|
print(f"Problem during initializing benchmark: {e}") |
|
|
sys.exit(1) |
|
|
|
|
|
cur = conn.cursor() |
|
|
|
|
|
df = pd.read_csv(FINAL_CSV_FILENAME) |
|
|
|
|
|
|
|
|
def _cast_value(val, dtype: str): |
|
|
if pd.isna(val): |
|
|
return None |
|
|
|
|
|
if dtype == "text": |
|
|
return str(val).strip() |
|
|
|
|
|
if dtype == "float": |
|
|
try: |
|
|
return float(val) |
|
|
except ValueError: |
|
|
return None |
|
|
|
|
|
if dtype == "bool": |
|
|
s = str(val).strip().lower() |
|
|
if s in ("true", "t", "yes", "1"): |
|
|
return True |
|
|
if s in ("false", "f", "no", "0"): |
|
|
return False |
|
|
if val in (1, 1.0): |
|
|
return True |
|
|
if val in (0, 0.0): |
|
|
return False |
|
|
return None |
|
|
|
|
|
return val |
|
|
|
|
|
try: |
|
|
rows_to_insert = [] |
|
|
for _, row in df.iterrows(): |
|
|
scenario = _cast_value(row.get("scenario"), "text") |
|
|
model_cls = _cast_value(row.get("model_cls"), "text") |
|
|
num_params_B = _cast_value(row.get("num_params_B"), "float") |
|
|
flops_G = _cast_value(row.get("flops_G"), "float") |
|
|
time_plain_s = _cast_value(row.get("time_plain_s"), "float") |
|
|
mem_plain_GB = _cast_value(row.get("mem_plain_GB"), "float") |
|
|
time_compile_s = _cast_value(row.get("time_compile_s"), "float") |
|
|
mem_compile_GB = _cast_value(row.get("mem_compile_GB"), "float") |
|
|
fullgraph = _cast_value(row.get("fullgraph"), "bool") |
|
|
mode = _cast_value(row.get("mode"), "text") |
|
|
|
|
|
|
|
|
if "github_sha" in df.columns: |
|
|
github_sha = _cast_value(row.get("github_sha"), "text") |
|
|
else: |
|
|
github_sha = None |
|
|
|
|
|
measurements = { |
|
|
"scenario": scenario, |
|
|
"model_cls": model_cls, |
|
|
"num_params_B": num_params_B, |
|
|
"flops_G": flops_G, |
|
|
"time_plain_s": time_plain_s, |
|
|
"mem_plain_GB": mem_plain_GB, |
|
|
"time_compile_s": time_compile_s, |
|
|
"mem_compile_GB": mem_compile_GB, |
|
|
"fullgraph": fullgraph, |
|
|
"mode": mode, |
|
|
"github_sha": github_sha, |
|
|
} |
|
|
rows_to_insert.append((benchmark_id, measurements)) |
|
|
|
|
|
|
|
|
insert_sql = f""" |
|
|
INSERT INTO {MEASUREMENTS_TABLE_NAME} ( |
|
|
benchmark_id, |
|
|
measurements |
|
|
) |
|
|
VALUES (%s, %s); |
|
|
""" |
|
|
|
|
|
psycopg2.extras.execute_batch(cur, insert_sql, rows_to_insert) |
|
|
conn.commit() |
|
|
|
|
|
cur.close() |
|
|
conn.close() |
|
|
except Exception as e: |
|
|
print(f"Exception: {e}") |
|
|
sys.exit(1) |
|
|
|