Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python3 | |
| """ | |
| Script to tail all worker log files simultaneously. | |
| Usage: python tail_workers.py [--num-gpus N] | |
| """ | |
| import argparse | |
| import os | |
| import time | |
| import sys | |
| from typing import Dict | |
| def tail_all_workers(num_gpus: int): | |
| """Tail all worker log files simultaneously""" | |
| print(f"Tailing logs for {num_gpus} GPU workers...") | |
| print("=" * 60) | |
| # Keep track of file positions | |
| log_positions: Dict[int, int] = {} | |
| for i in range(num_gpus): | |
| log_positions[i] = 0 | |
| try: | |
| while True: | |
| has_new_output = False | |
| for i in range(num_gpus): | |
| log_file = f"worker_gpu_{i}.log" | |
| try: | |
| if os.path.exists(log_file): | |
| with open(log_file, 'r') as f: | |
| f.seek(log_positions[i]) | |
| new_lines = f.readlines() | |
| if new_lines: | |
| has_new_output = True | |
| for line in new_lines: | |
| timestamp = time.strftime("%H:%M:%S") | |
| print(f"[{timestamp}] [GPU {i}] {line.rstrip()}") | |
| log_positions[i] = f.tell() | |
| else: | |
| # File doesn't exist yet, check if we should show a message | |
| if log_positions[i] == 0: | |
| print(f"[INFO] Waiting for {log_file} to be created...") | |
| log_positions[i] = -1 # Mark as checked | |
| except Exception as e: | |
| print(f"[ERROR] Error reading {log_file}: {e}") | |
| # Only sleep if there was no new output to keep it responsive | |
| if not has_new_output: | |
| time.sleep(0.1) | |
| except KeyboardInterrupt: | |
| print("\nStopping log monitoring...") | |
| def main(): | |
| parser = argparse.ArgumentParser(description="Tail all worker log files") | |
| parser.add_argument("--num-gpus", type=int, default=2, | |
| help="Number of GPU workers to monitor (default: 2)") | |
| args = parser.parse_args() | |
| if args.num_gpus < 1: | |
| print("Error: Number of GPUs must be at least 1") | |
| sys.exit(1) | |
| tail_all_workers(args.num_gpus) | |
| if __name__ == "__main__": | |
| main() |