qwenillustrious / diffusers /benchmarks /populate_into_db.py

Add files using upload-large-folder tool

e31d39e verified 4 months ago

5.18 kB

	import argparse
	import os
	import sys

	import gpustat
	import pandas as pd
	import psycopg2
	import psycopg2.extras
	from psycopg2.extensions import register_adapter
	from psycopg2.extras import Json


	register_adapter(dict, Json)

	FINAL_CSV_FILENAME = "collated_results.csv"
	# https://github.com/huggingface/transformers/blob/593e29c5e2a9b17baec010e8dc7c1431fed6e841/benchmark/init_db.sql#L27
	BENCHMARKS_TABLE_NAME = "benchmarks"
	MEASUREMENTS_TABLE_NAME = "model_measurements"


	def _init_benchmark(conn, branch, commit_id, commit_msg):
	gpu_stats = gpustat.GPUStatCollection.new_query()
	metadata = {"gpu_name": gpu_stats[0]["name"]}
	repository = "huggingface/diffusers"
	with conn.cursor() as cur:
	cur.execute(
	f"INSERT INTO {BENCHMARKS_TABLE_NAME} (repository, branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s, %s) RETURNING benchmark_id",
	(repository, branch, commit_id, commit_msg, metadata),
	)
	benchmark_id = cur.fetchone()[0]
	print(f"Initialised benchmark #{benchmark_id}")
	return benchmark_id


	def parse_args():
	parser = argparse.ArgumentParser()
	parser.add_argument(
	"branch",
	type=str,
	help="The branch name on which the benchmarking is performed.",
	)

	parser.add_argument(
	"commit_id",
	type=str,
	help="The commit hash on which the benchmarking is performed.",
	)

	parser.add_argument(
	"commit_msg",
	type=str,
	help="The commit message associated with the commit, truncated to 70 characters.",
	)
	args = parser.parse_args()
	return args


	if __name__ == "__main__":
	args = parse_args()
	try:
	conn = psycopg2.connect(
	host=os.getenv("PGHOST"),
	database=os.getenv("PGDATABASE"),
	user=os.getenv("PGUSER"),
	password=os.getenv("PGPASSWORD"),
	)
	print("DB connection established successfully.")
	except Exception as e:
	print(f"Problem during DB init: {e}")
	sys.exit(1)

	try:
	benchmark_id = _init_benchmark(
	conn=conn,
	branch=args.branch,
	commit_id=args.commit_id,
	commit_msg=args.commit_msg,
	)
	except Exception as e:
	print(f"Problem during initializing benchmark: {e}")
	sys.exit(1)

	cur = conn.cursor()

	df = pd.read_csv(FINAL_CSV_FILENAME)

	# Helper to cast values (or None) given a dtype
	def _cast_value(val, dtype: str):
	if pd.isna(val):
	return None

	if dtype == "text":
	return str(val).strip()

	if dtype == "float":
	try:
	return float(val)
	except ValueError:
	return None

	if dtype == "bool":
	s = str(val).strip().lower()
	if s in ("true", "t", "yes", "1"):
	return True
	if s in ("false", "f", "no", "0"):
	return False
	if val in (1, 1.0):
	return True
	if val in (0, 0.0):
	return False
	return None

	return val

	try:
	rows_to_insert = []
	for _, row in df.iterrows():
	scenario = _cast_value(row.get("scenario"), "text")
	model_cls = _cast_value(row.get("model_cls"), "text")
	num_params_B = _cast_value(row.get("num_params_B"), "float")
	flops_G = _cast_value(row.get("flops_G"), "float")
	time_plain_s = _cast_value(row.get("time_plain_s"), "float")
	mem_plain_GB = _cast_value(row.get("mem_plain_GB"), "float")
	time_compile_s = _cast_value(row.get("time_compile_s"), "float")
	mem_compile_GB = _cast_value(row.get("mem_compile_GB"), "float")
	fullgraph = _cast_value(row.get("fullgraph"), "bool")
	mode = _cast_value(row.get("mode"), "text")

	# If "github_sha" column exists in the CSV, cast it; else default to None
	if "github_sha" in df.columns:
	github_sha = _cast_value(row.get("github_sha"), "text")
	else:
	github_sha = None

	measurements = {
	"scenario": scenario,
	"model_cls": model_cls,
	"num_params_B": num_params_B,
	"flops_G": flops_G,
	"time_plain_s": time_plain_s,
	"mem_plain_GB": mem_plain_GB,
	"time_compile_s": time_compile_s,
	"mem_compile_GB": mem_compile_GB,
	"fullgraph": fullgraph,
	"mode": mode,
	"github_sha": github_sha,
	}
	rows_to_insert.append((benchmark_id, measurements))

	# Batch-insert all rows
	insert_sql = f"""
	INSERT INTO {MEASUREMENTS_TABLE_NAME} (
	benchmark_id,
	measurements
	)
	VALUES (%s, %s);
	"""

	psycopg2.extras.execute_batch(cur, insert_sql, rows_to_insert)
	conn.commit()

	cur.close()
	conn.close()
	except Exception as e:
	print(f"Exception: {e}")
	sys.exit(1)