slm-estimator / optimizer.py
Chakraborty Sushovan (SX/EDI1-MM)
modified logic and updated UI
e268c11
from estimator import GPU_SPECS, estimate_tps_with_calibration, total_tokens
from scaling import scale_tps
from cost import estimate_cost
from memory_estimator import estimate_memory, suggest_batch
CLOUDS = ["AWS", "Azure", "GCP"]
GPUS = list(GPU_SPECS.keys())
def find_best_config(
params,
dataset,
epochs,
seq_len,
precision,
efficiency,
correction,
budget,
pricing_mode,
training_mode="QLoRA",
gradient_checkpointing=True,
selected_gpus=None,
selected_clouds=None,
max_gpus=8,
calibration=None,
dataset_mode="Examples / sequences",
):
if dataset_mode == "Total tokens":
tokens = dataset * epochs
else:
tokens = total_tokens(dataset, epochs, seq_len)
best = None
candidate_gpus = selected_gpus or GPUS
candidate_clouds = selected_clouds or CLOUDS
gpu_count_options = [g for g in [1, 2, 4, 8] if g <= max_gpus]
for gpu in candidate_gpus:
gpu_mem = GPU_SPECS[gpu]["memory"]
batch = suggest_batch(
params,
seq_len,
gpu_mem,
precision,
training_mode,
gradient_checkpointing,
)
if batch < 1:
continue
mem_gb = estimate_memory(
params,
seq_len,
batch,
precision,
training_mode,
gradient_checkpointing,
)
for cloud in candidate_clouds:
for g in gpu_count_options:
base_tps = estimate_tps_with_calibration(
params,
gpu,
efficiency,
correction,
precision,
seq_len,
training_mode,
calibration,
)
tps = scale_tps(base_tps, g)
if tps <= 0:
continue
time_h = tokens / tps / 3600
cost = estimate_cost(gpu, time_h, g, cloud, pricing_mode)
if cost is None or cost > budget:
continue
if not best or time_h < best["time"] or (
time_h == best["time"] and cost < best["cost"]
):
best = {
"gpu": gpu,
"cloud": cloud,
"gpus": g,
"batch": batch,
"memory_gb": round(mem_gb, 2),
"time": round(time_h, 2),
"cost": round(cost, 2),
}
return best