| |
| |
|
|
| $LOG_FILE = "D:\hf_upload\.watchdog.log" |
| $STALL_SECONDS = 300 |
|
|
| function Log-Msg($msg) { |
| $line = "[$(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')] $msg" |
| Write-Host $line |
| Add-Content -Path $LOG_FILE -Value $line |
| } |
|
|
| function Get-UploadProc { |
| return Get-CimInstance Win32_Process -Filter "Name='python.exe'" | |
| Where-Object { $_.CommandLine -like "*upload_to_hf*" } | |
| Select-Object -First 1 |
| } |
|
|
| function Restart-Upload { |
| Log-Msg "Restarting upload (LFS dedup will skip already uploaded chunks)..." |
| $cmdLine = 'cmd.exe /c "D:\hf_upload\start_upload_detached.cmd"' |
| $r = Invoke-CimMethod -ClassName Win32_Process -MethodName Create -Arguments @{ CommandLine = $cmdLine } |
| Log-Msg "Restart issued, WMI ReturnValue=$($r.ReturnValue), launcher PID=$($r.ProcessId)" |
| Start-Sleep -Seconds 20 |
| } |
|
|
| Log-Msg "Watchdog started, stall threshold = $STALL_SECONDS s" |
|
|
| $lastRead = $null |
| $lastReadTime = Get-Date |
|
|
| while ($true) { |
| $proc = Get-UploadProc |
| if (-not $proc) { |
| Log-Msg "No upload python found. Restarting..." |
| Restart-Upload |
| $lastRead = $null |
| $lastReadTime = Get-Date |
| continue |
| } |
|
|
| $curRead = $proc.ReadTransferCount |
| $curPid = $proc.ProcessId |
|
|
| if ($null -eq $lastRead) { |
| $lastRead = $curRead |
| $lastReadTime = Get-Date |
| Log-Msg "Tracking PID $curPid, init Read=$([math]::Round($curRead/1GB,2)) GB" |
| } elseif ($curRead -gt $lastRead) { |
| $lastRead = $curRead |
| $lastReadTime = Get-Date |
| } else { |
| $stallSec = ((Get-Date) - $lastReadTime).TotalSeconds |
| if ($stallSec -ge $STALL_SECONDS) { |
| Log-Msg "STALL DETECTED on PID ${curPid}: no Read for $([math]::Round($stallSec,0))s, killing..." |
| Stop-Process -Id $curPid -Force -ErrorAction SilentlyContinue |
| Get-CimInstance Win32_Process -Filter "Name='powershell.exe'" | |
| Where-Object { $_.CommandLine -like "*upload_to_hf*" } | |
| ForEach-Object { Stop-Process -Id $_.ProcessId -Force -ErrorAction SilentlyContinue } |
| Get-CimInstance Win32_Process -Filter "Name='cmd.exe'" | |
| Where-Object { $_.CommandLine -like "*start_upload_detached*" } | |
| ForEach-Object { Stop-Process -Id $_.ProcessId -Force -ErrorAction SilentlyContinue } |
| Start-Sleep -Seconds 5 |
| Restart-Upload |
| $lastRead = $null |
| $lastReadTime = Get-Date |
| } |
| } |
|
|
| Start-Sleep -Seconds 30 |
| } |
|
|