Prompt_Squirrel_RAG / scripts /launch_t5_sweep.ps1
Food Desert
Roll out T5 rewrite updates, tooling, docs, and artifact ignore rules
34c53b5
param(
[string]$Timestamp = "",
[string]$RuntimeDir = "",
[string]$SweepOutDir = "",
[string]$AnalysisDir = "data\\analysis",
[int]$Stage1Steps = 1000,
[int]$Stage2Steps = 3750,
[int]$TopK = 2,
[int]$EvalSteps = 500,
[int]$TestEvalEverySteps = 1000,
[int]$MaxValSamples = 128,
[int]$MaxTestSamples = 128,
[string]$LrList = "1e-4,2e-4",
[string]$LengthPenaltyList = "0.7,0.8,0.9",
[string]$BeamsList = "4",
[switch]$Force
)
$ErrorActionPreference = "Stop"
if ([string]::IsNullOrWhiteSpace($Timestamp)) {
$Timestamp = Get-Date -Format "yyyyMMdd_HHmmss"
}
$repoRoot = Split-Path -Parent $PSScriptRoot
Set-Location $repoRoot
if ([string]::IsNullOrWhiteSpace($RuntimeDir)) {
$RuntimeDir = "data\\runtime_metrics\\t5_sweep_$Timestamp"
}
if ([string]::IsNullOrWhiteSpace($SweepOutDir)) {
$SweepOutDir = "models\\finetune\\t5-sweep-$Timestamp"
}
New-Item -ItemType Directory -Force -Path $RuntimeDir | Out-Null
$stdoutLog = Join-Path $RuntimeDir "sweep.stdout.log"
$stderrLog = Join-Path $RuntimeDir "sweep.stderr.log"
$launchJson = Join-Path $RuntimeDir "launch.json"
$pythonExe = (Resolve-Path ".venv-gpu\\Scripts\\python.exe").Path
$gpuCheck = & $pythonExe -c "import torch,sys; ok=torch.cuda.is_available(); name=torch.cuda.get_device_name(0) if ok else ''; print(f'torch={torch.__version__} cuda={ok} device={name}'); sys.exit(0 if ok else 2)"
if ($LASTEXITCODE -ne 0) {
throw "GPU preflight failed in .venv-gpu. CUDA is not available."
}
$argList = @(
"scripts\\run_t5_sweep.py",
"--sweep-out-dir", $SweepOutDir,
"--runtime-dir", $RuntimeDir,
"--analysis-dir", $AnalysisDir,
"--stage1-steps", "$Stage1Steps",
"--stage2-steps", "$Stage2Steps",
"--top-k", "$TopK",
"--eval-steps", "$EvalSteps",
"--test-eval-every-steps", "$TestEvalEverySteps",
"--max-val-samples", "$MaxValSamples",
"--max-test-samples", "$MaxTestSamples",
"--lr-list", $LrList,
"--length-penalty-list", $LengthPenaltyList,
"--beams-list", $BeamsList
)
if ($Force) {
$argList += "--force"
}
$proc = Start-Process `
-FilePath $pythonExe `
-ArgumentList $argList `
-WorkingDirectory $repoRoot `
-RedirectStandardOutput $stdoutLog `
-RedirectStandardError $stderrLog `
-WindowStyle Hidden `
-PassThru
$payload = [ordered]@{
timestamp = $Timestamp
pid = $proc.Id
python = $pythonExe
gpu_preflight = ($gpuCheck -join "`n")
runtime_dir = $RuntimeDir
sweep_out_dir = $SweepOutDir
analysis_dir = $AnalysisDir
stdout_log = $stdoutLog
stderr_log = $stderrLog
progress_glob = (Join-Path $RuntimeDir "*__progress.json")
history_glob = (Join-Path $RuntimeDir "*__history.jsonl")
command = ($pythonExe + " " + ($argList -join " "))
}
$payload | ConvertTo-Json -Depth 5 | Set-Content -Encoding UTF8 $launchJson
$payload | ConvertTo-Json -Depth 5