Buckets:

leideng
/

QCFuse

Files

xet

leideng/QCFuse / profiler.py

leideng

12 days ago

download

raw

4.85 kB

	"""
	Run live profiling.

	Usage:
	python3 -m sglang.profiler
	"""

	import argparse
	import json
	import os
	import time
	from argparse import ArgumentParser
	from pathlib import Path
	from typing import List, Optional

	import requests

	PROFILER_DIR = os.getenv("SGLANG_TORCH_PROFILER_DIR", "/tmp")


	def _run_profile(
	url: Optional[str],
	num_steps: int,
	activities: List[str],
	output_dir: Optional[str] = None,
	profile_name: Optional[str] = None,
	profile_by_stage: bool = False,
	merge_profiles: bool = False,
	) -> str:
	if output_dir is None:
	output_dir = PROFILER_DIR

	output_dir = os.path.normpath(output_dir)
	output_dir = os.path.abspath(output_dir)
	output_dir = Path(output_dir)

	# Add "profile_name/timestamp" to the path.
	if profile_name:
	output_dir = output_dir / profile_name
	output_dir = output_dir / str(time.time())
	output_dir.mkdir(exist_ok=True, parents=True)

	print(f"Dump profiling traces to {output_dir}")
	print(
	f"Waiting for {num_steps} steps and the trace to be flushed.... ({profile_by_stage=})"
	)

	# Dump server args.
	file_path = Path(output_dir) / "server_args.json"
	if not file_path.exists():
	response = requests.get(url + "/get_server_info")
	response.raise_for_status()
	server_args_data = response.json()
	with open(file_path, "w") as file:
	file.write(json.dumps(server_args_data))

	# Start profiler. The API replies when all steps are processed
	# and files are generated.
	json_data = {
	"output_dir": str(output_dir),
	"num_steps": str(num_steps),
	"activities": activities,
	"profile_by_stage": profile_by_stage,
	"merge_profiles": merge_profiles,
	}

	response = requests.post(url=url + "/start_profile", json=json_data)
	response.raise_for_status()

	trace_link = str(output_dir)
	return trace_link


	def run_profile(
	url: Optional[str],
	num_steps: int,
	activities: List[str],
	output_dir: Optional[str] = None,
	profile_name: Optional[str] = None,
	profile_by_stage: bool = False,
	merge_profiles: bool = False,
	):
	# step based profile will self terminate on num_steps constraints
	link = _run_profile(
	url,
	num_steps,
	activities,
	output_dir,
	profile_name,
	profile_by_stage,
	merge_profiles,
	)
	return link


	if __name__ == "__main__":
	parser = ArgumentParser(description="Benchmark the online serving throughput.")
	parser.add_argument(
	"--url",
	type=str,
	default="http://localhost:30000",
	help="Server or API base url if not using http host and port.",
	)
	parser.add_argument(
	"--output-dir",
	type=str,
	default=None,
	help="Profile directory to dump profile traces.",
	)
	parser.add_argument(
	"--profile-name",
	type=str,
	default=None,
	help="The name of this profile run.",
	)
	parser.add_argument(
	"--num-steps",
	type=int,
	default=5,
	help="The number of forward steps to profile.",
	)
	parser.add_argument(
	"--profile-by-stage",
	action=argparse.BooleanOptionalAction,
	type=bool,
	default=False,
	help="The number of forward steps to profile.",
	)
	parser.add_argument(
	"--cpu",
	action=argparse.BooleanOptionalAction,
	type=bool,
	default=True,
	help="Whether to profile CPU activity",
	)
	parser.add_argument(
	"--gpu",
	action=argparse.BooleanOptionalAction,
	type=bool,
	default=True,
	help="Whether to profile GPU activity",
	)
	parser.add_argument(
	"--mem",
	action=argparse.BooleanOptionalAction,
	type=bool,
	default=False,
	help="Whether to memory usage (https://pytorch.org/memory_viz)",
	)
	parser.add_argument(
	"--rpd",
	action=argparse.BooleanOptionalAction,
	type=bool,
	default=False,
	help="Whether to use rpd profiler (https://github.com/ROCm/rocmProfileData)",
	)
	parser.add_argument(
	"--merge-profiles",
	action=argparse.BooleanOptionalAction,
	type=bool,
	default=False,
	help="Whether to merge profiles from all ranks into a single trace file",
	)

	args = parser.parse_args()
	activities = []
	if args.cpu:
	activities.append("CPU")
	if args.gpu:
	activities.append("GPU")
	if args.mem:
	activities.append("MEM")
	if args.rpd:
	activities.append("RPD")
	run_profile(
	args.url,
	args.num_steps,
	activities,
	args.output_dir,
	args.profile_name,
	args.profile_by_stage,
	args.merge_profiles,
	)

Xet Storage Details

Size:: 4.85 kB
Xet hash:: 1b230af83ddd0ba98e0c2a57444a340f948b532317fc6f53f583b33dc366549d

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.