Spaces:

Sushovan
/

slm-estimator

Running

Chakraborty Sushovan (SX/EDI1-MM)

modified logic and updated UI

e268c11 about 1 month ago

2.64 kB

	from estimator import GPU_SPECS, estimate_tps_with_calibration, total_tokens
	from scaling import scale_tps
	from cost import estimate_cost
	from memory_estimator import estimate_memory, suggest_batch

	CLOUDS = ["AWS", "Azure", "GCP"]
	GPUS = list(GPU_SPECS.keys())


	def find_best_config(
	params,
	dataset,
	epochs,
	seq_len,
	precision,
	efficiency,
	correction,
	budget,
	pricing_mode,
	training_mode="QLoRA",
	gradient_checkpointing=True,
	selected_gpus=None,
	selected_clouds=None,
	max_gpus=8,
	calibration=None,
	dataset_mode="Examples / sequences",
	):
	if dataset_mode == "Total tokens":
	tokens = dataset * epochs
	else:
	tokens = total_tokens(dataset, epochs, seq_len)
	best = None
	candidate_gpus = selected_gpus or GPUS
	candidate_clouds = selected_clouds or CLOUDS
	gpu_count_options = [g for g in [1, 2, 4, 8] if g <= max_gpus]

	for gpu in candidate_gpus:
	gpu_mem = GPU_SPECS[gpu]["memory"]
	batch = suggest_batch(
	params,
	seq_len,
	gpu_mem,
	precision,
	training_mode,
	gradient_checkpointing,
	)

	if batch < 1:
	continue

	mem_gb = estimate_memory(
	params,
	seq_len,
	batch,
	precision,
	training_mode,
	gradient_checkpointing,
	)

	for cloud in candidate_clouds:
	for g in gpu_count_options:
	base_tps = estimate_tps_with_calibration(
	params,
	gpu,
	efficiency,
	correction,
	precision,
	seq_len,
	training_mode,
	calibration,
	)
	tps = scale_tps(base_tps, g)

	if tps <= 0:
	continue

	time_h = tokens / tps / 3600
	cost = estimate_cost(gpu, time_h, g, cloud, pricing_mode)

	if cost is None or cost > budget:
	continue

	if not best or time_h < best["time"] or (
	time_h == best["time"] and cost < best["cost"]
	):
	best = {
	"gpu": gpu,
	"cloud": cloud,
	"gpus": g,
	"batch": batch,
	"memory_gb": round(mem_gb, 2),
	"time": round(time_h, 2),
	"cost": round(cost, 2),
	}

	return best