Zhen Ye Claude Opus 4.6 commited on
Commit
915e2d7
·
1 Parent(s): d9cce19

feat: add /gpu-monitor endpoint for live nvidia-smi streaming

Browse files

Streams nvidia-smi dmon output over HTTP for real-time GPU utilization
monitoring during benchmark runs.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +31 -0
app.py CHANGED
@@ -926,5 +926,36 @@ async def benchmark_endpoint(
926
  pass
927
 
928
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
929
  if __name__ == "__main__":
930
  uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)
 
926
  pass
927
 
928
 
929
+ @app.get("/gpu-monitor")
930
+ async def gpu_monitor_endpoint(duration: int = 180, interval: int = 1):
931
+ """Stream nvidia-smi dmon output for the given duration.
932
+
933
+ Usage: curl 'http://.../gpu-monitor?duration=180&interval=1'
934
+ Run this in one terminal while /benchmark runs in another.
935
+ """
936
+ import subprocess
937
+
938
+ async def _stream():
939
+ proc = subprocess.Popen(
940
+ ["nvidia-smi", "dmon", "-s", "u", "-d", str(interval)],
941
+ stdout=subprocess.PIPE,
942
+ stderr=subprocess.STDOUT,
943
+ text=True,
944
+ )
945
+ try:
946
+ elapsed = 0
947
+ for line in proc.stdout:
948
+ yield line
949
+ if interval > 0:
950
+ elapsed += interval
951
+ if elapsed > duration:
952
+ break
953
+ finally:
954
+ proc.terminate()
955
+ proc.wait()
956
+
957
+ return StreamingResponse(_stream(), media_type="text/plain")
958
+
959
+
960
  if __name__ == "__main__":
961
  uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)