from estimator import GPU_SPECS, estimate_tps_with_calibration, total_tokens from scaling import scale_tps from cost import estimate_cost from memory_estimator import estimate_memory, suggest_batch CLOUDS = ["AWS", "Azure", "GCP"] GPUS = list(GPU_SPECS.keys()) def find_best_config( params, dataset, epochs, seq_len, precision, efficiency, correction, budget, pricing_mode, training_mode="QLoRA", gradient_checkpointing=True, selected_gpus=None, selected_clouds=None, max_gpus=8, calibration=None, dataset_mode="Examples / sequences", ): if dataset_mode == "Total tokens": tokens = dataset * epochs else: tokens = total_tokens(dataset, epochs, seq_len) best = None candidate_gpus = selected_gpus or GPUS candidate_clouds = selected_clouds or CLOUDS gpu_count_options = [g for g in [1, 2, 4, 8] if g <= max_gpus] for gpu in candidate_gpus: gpu_mem = GPU_SPECS[gpu]["memory"] batch = suggest_batch( params, seq_len, gpu_mem, precision, training_mode, gradient_checkpointing, ) if batch < 1: continue mem_gb = estimate_memory( params, seq_len, batch, precision, training_mode, gradient_checkpointing, ) for cloud in candidate_clouds: for g in gpu_count_options: base_tps = estimate_tps_with_calibration( params, gpu, efficiency, correction, precision, seq_len, training_mode, calibration, ) tps = scale_tps(base_tps, g) if tps <= 0: continue time_h = tokens / tps / 3600 cost = estimate_cost(gpu, time_h, g, cloud, pricing_mode) if cost is None or cost > budget: continue if not best or time_h < best["time"] or ( time_h == best["time"] and cost < best["cost"] ): best = { "gpu": gpu, "cloud": cloud, "gpus": g, "batch": batch, "memory_gb": round(mem_gb, 2), "time": round(time_h, 2), "cost": round(cost, 2), } return best