File size: 2,648 Bytes
2bee562 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 | # Upload watchdog: kill+restart upload if its disk Read hasn't grown for 5 min.
# Run this in its own detached PowerShell.
$LOG_FILE = "D:\hf_upload\.watchdog.log"
$STALL_SECONDS = 300 # 5 min no IO -> kill
function Log-Msg($msg) {
$line = "[$(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')] $msg"
Write-Host $line
Add-Content -Path $LOG_FILE -Value $line
}
function Get-UploadProc {
return Get-CimInstance Win32_Process -Filter "Name='python.exe'" |
Where-Object { $_.CommandLine -like "*upload_to_hf*" } |
Select-Object -First 1
}
function Restart-Upload {
Log-Msg "Restarting upload (LFS dedup will skip already uploaded chunks)..."
$cmdLine = 'cmd.exe /c "D:\hf_upload\start_upload_detached.cmd"'
$r = Invoke-CimMethod -ClassName Win32_Process -MethodName Create -Arguments @{ CommandLine = $cmdLine }
Log-Msg "Restart issued, WMI ReturnValue=$($r.ReturnValue), launcher PID=$($r.ProcessId)"
Start-Sleep -Seconds 20
}
Log-Msg "Watchdog started, stall threshold = $STALL_SECONDS s"
$lastRead = $null
$lastReadTime = Get-Date
while ($true) {
$proc = Get-UploadProc
if (-not $proc) {
Log-Msg "No upload python found. Restarting..."
Restart-Upload
$lastRead = $null
$lastReadTime = Get-Date
continue
}
$curRead = $proc.ReadTransferCount
$curPid = $proc.ProcessId
if ($null -eq $lastRead) {
$lastRead = $curRead
$lastReadTime = Get-Date
Log-Msg "Tracking PID $curPid, init Read=$([math]::Round($curRead/1GB,2)) GB"
} elseif ($curRead -gt $lastRead) {
$lastRead = $curRead
$lastReadTime = Get-Date
} else {
$stallSec = ((Get-Date) - $lastReadTime).TotalSeconds
if ($stallSec -ge $STALL_SECONDS) {
Log-Msg "STALL DETECTED on PID ${curPid}: no Read for $([math]::Round($stallSec,0))s, killing..."
Stop-Process -Id $curPid -Force -ErrorAction SilentlyContinue
Get-CimInstance Win32_Process -Filter "Name='powershell.exe'" |
Where-Object { $_.CommandLine -like "*upload_to_hf*" } |
ForEach-Object { Stop-Process -Id $_.ProcessId -Force -ErrorAction SilentlyContinue }
Get-CimInstance Win32_Process -Filter "Name='cmd.exe'" |
Where-Object { $_.CommandLine -like "*start_upload_detached*" } |
ForEach-Object { Stop-Process -Id $_.ProcessId -Force -ErrorAction SilentlyContinue }
Start-Sleep -Seconds 5
Restart-Upload
$lastRead = $null
$lastReadTime = Get-Date
}
}
Start-Sleep -Seconds 30
}
|