param( [string]$Timestamp = "", [string]$RuntimeDir = "", [string]$SweepOutDir = "", [string]$AnalysisDir = "data\\analysis", [int]$Stage1Steps = 1000, [int]$Stage2Steps = 3750, [int]$TopK = 2, [int]$EvalSteps = 500, [int]$TestEvalEverySteps = 1000, [int]$MaxValSamples = 128, [int]$MaxTestSamples = 128, [string]$LrList = "1e-4,2e-4", [string]$LengthPenaltyList = "0.7,0.8,0.9", [string]$BeamsList = "4", [switch]$Force ) $ErrorActionPreference = "Stop" if ([string]::IsNullOrWhiteSpace($Timestamp)) { $Timestamp = Get-Date -Format "yyyyMMdd_HHmmss" } $repoRoot = Split-Path -Parent $PSScriptRoot Set-Location $repoRoot if ([string]::IsNullOrWhiteSpace($RuntimeDir)) { $RuntimeDir = "data\\runtime_metrics\\t5_sweep_$Timestamp" } if ([string]::IsNullOrWhiteSpace($SweepOutDir)) { $SweepOutDir = "models\\finetune\\t5-sweep-$Timestamp" } New-Item -ItemType Directory -Force -Path $RuntimeDir | Out-Null $stdoutLog = Join-Path $RuntimeDir "sweep.stdout.log" $stderrLog = Join-Path $RuntimeDir "sweep.stderr.log" $launchJson = Join-Path $RuntimeDir "launch.json" $pythonExe = (Resolve-Path ".venv-gpu\\Scripts\\python.exe").Path $gpuCheck = & $pythonExe -c "import torch,sys; ok=torch.cuda.is_available(); name=torch.cuda.get_device_name(0) if ok else ''; print(f'torch={torch.__version__} cuda={ok} device={name}'); sys.exit(0 if ok else 2)" if ($LASTEXITCODE -ne 0) { throw "GPU preflight failed in .venv-gpu. CUDA is not available." } $argList = @( "scripts\\run_t5_sweep.py", "--sweep-out-dir", $SweepOutDir, "--runtime-dir", $RuntimeDir, "--analysis-dir", $AnalysisDir, "--stage1-steps", "$Stage1Steps", "--stage2-steps", "$Stage2Steps", "--top-k", "$TopK", "--eval-steps", "$EvalSteps", "--test-eval-every-steps", "$TestEvalEverySteps", "--max-val-samples", "$MaxValSamples", "--max-test-samples", "$MaxTestSamples", "--lr-list", $LrList, "--length-penalty-list", $LengthPenaltyList, "--beams-list", $BeamsList ) if ($Force) { $argList += "--force" } $proc = Start-Process ` -FilePath $pythonExe ` -ArgumentList $argList ` -WorkingDirectory $repoRoot ` -RedirectStandardOutput $stdoutLog ` -RedirectStandardError $stderrLog ` -WindowStyle Hidden ` -PassThru $payload = [ordered]@{ timestamp = $Timestamp pid = $proc.Id python = $pythonExe gpu_preflight = ($gpuCheck -join "`n") runtime_dir = $RuntimeDir sweep_out_dir = $SweepOutDir analysis_dir = $AnalysisDir stdout_log = $stdoutLog stderr_log = $stderrLog progress_glob = (Join-Path $RuntimeDir "*__progress.json") history_glob = (Join-Path $RuntimeDir "*__history.jsonl") command = ($pythonExe + " " + ($argList -join " ")) } $payload | ConvertTo-Json -Depth 5 | Set-Content -Encoding UTF8 $launchJson $payload | ConvertTo-Json -Depth 5