Spaces:
Running
Running
| param( | |
| [string]$Timestamp = "", | |
| [string]$RuntimeDir = "", | |
| [string]$SweepOutDir = "", | |
| [string]$AnalysisDir = "data\\analysis", | |
| [int]$Stage1Steps = 1000, | |
| [int]$Stage2Steps = 3750, | |
| [int]$TopK = 2, | |
| [int]$EvalSteps = 500, | |
| [int]$TestEvalEverySteps = 1000, | |
| [int]$MaxValSamples = 128, | |
| [int]$MaxTestSamples = 128, | |
| [string]$LrList = "1e-4,2e-4", | |
| [string]$LengthPenaltyList = "0.7,0.8,0.9", | |
| [string]$BeamsList = "4", | |
| [switch]$Force | |
| ) | |
| $ErrorActionPreference = "Stop" | |
| if ([string]::IsNullOrWhiteSpace($Timestamp)) { | |
| $Timestamp = Get-Date -Format "yyyyMMdd_HHmmss" | |
| } | |
| $repoRoot = Split-Path -Parent $PSScriptRoot | |
| Set-Location $repoRoot | |
| if ([string]::IsNullOrWhiteSpace($RuntimeDir)) { | |
| $RuntimeDir = "data\\runtime_metrics\\t5_sweep_$Timestamp" | |
| } | |
| if ([string]::IsNullOrWhiteSpace($SweepOutDir)) { | |
| $SweepOutDir = "models\\finetune\\t5-sweep-$Timestamp" | |
| } | |
| New-Item -ItemType Directory -Force -Path $RuntimeDir | Out-Null | |
| $stdoutLog = Join-Path $RuntimeDir "sweep.stdout.log" | |
| $stderrLog = Join-Path $RuntimeDir "sweep.stderr.log" | |
| $launchJson = Join-Path $RuntimeDir "launch.json" | |
| $pythonExe = (Resolve-Path ".venv-gpu\\Scripts\\python.exe").Path | |
| $gpuCheck = & $pythonExe -c "import torch,sys; ok=torch.cuda.is_available(); name=torch.cuda.get_device_name(0) if ok else ''; print(f'torch={torch.__version__} cuda={ok} device={name}'); sys.exit(0 if ok else 2)" | |
| if ($LASTEXITCODE -ne 0) { | |
| throw "GPU preflight failed in .venv-gpu. CUDA is not available." | |
| } | |
| $argList = @( | |
| "scripts\\run_t5_sweep.py", | |
| "--sweep-out-dir", $SweepOutDir, | |
| "--runtime-dir", $RuntimeDir, | |
| "--analysis-dir", $AnalysisDir, | |
| "--stage1-steps", "$Stage1Steps", | |
| "--stage2-steps", "$Stage2Steps", | |
| "--top-k", "$TopK", | |
| "--eval-steps", "$EvalSteps", | |
| "--test-eval-every-steps", "$TestEvalEverySteps", | |
| "--max-val-samples", "$MaxValSamples", | |
| "--max-test-samples", "$MaxTestSamples", | |
| "--lr-list", $LrList, | |
| "--length-penalty-list", $LengthPenaltyList, | |
| "--beams-list", $BeamsList | |
| ) | |
| if ($Force) { | |
| $argList += "--force" | |
| } | |
| $proc = Start-Process ` | |
| -FilePath $pythonExe ` | |
| -ArgumentList $argList ` | |
| -WorkingDirectory $repoRoot ` | |
| -RedirectStandardOutput $stdoutLog ` | |
| -RedirectStandardError $stderrLog ` | |
| -WindowStyle Hidden ` | |
| -PassThru | |
| $payload = [ordered]@{ | |
| timestamp = $Timestamp | |
| pid = $proc.Id | |
| python = $pythonExe | |
| gpu_preflight = ($gpuCheck -join "`n") | |
| runtime_dir = $RuntimeDir | |
| sweep_out_dir = $SweepOutDir | |
| analysis_dir = $AnalysisDir | |
| stdout_log = $stdoutLog | |
| stderr_log = $stderrLog | |
| progress_glob = (Join-Path $RuntimeDir "*__progress.json") | |
| history_glob = (Join-Path $RuntimeDir "*__history.jsonl") | |
| command = ($pythonExe + " " + ($argList -join " ")) | |
| } | |
| $payload | ConvertTo-Json -Depth 5 | Set-Content -Encoding UTF8 $launchJson | |
| $payload | ConvertTo-Json -Depth 5 | |