Buckets:

Tsukihjy
/

testcase

Files

xet

Tsukihjy/testcase / methods /CruxEval /inference.py

Tsukihjy

about 2 months ago

download

raw

2.5 kB

	import asyncio
	import os

	from openai import AsyncOpenAI
	from sglang.utils import launch_server_cmd, terminate_process, wait_for_server
	from tap import Tap
	from tqdm.auto import tqdm
	from collections.abc import Coroutine, Sequence


	def limit_concurrency(
	coroutines: Sequence[Coroutine], concurrency: int
	) -> list[Coroutine]:
	semaphore = asyncio.Semaphore(concurrency)

	async def with_concurrency_limit(coroutine: Coroutine) -> Coroutine:
	async with semaphore:
	return await coroutine

	return [with_concurrency_limit(coroutine) for coroutine in coroutines]


	class Argument(Tap):
	model_name_or_path: str = "/mnt/jfs/ckpt/checkpoints/Qwen2.5-32B-Instruct"
	temperature: float = 0.2
	max_completion_tokens: int = 10000
	concurrency: int = 100


	async def main(args: Argument, base_url: str, api_key: str = "sglang"):
	dataset = [
	"1+1=?",
	"Where is the capital of France?",
	]

	client = AsyncOpenAI(base_url=base_url, api_key=api_key)

	tasks = [
	client.chat.completions.create(
	model=args.model_name_or_path,
	messages=[{"role": "user", "content": data}],
	temperature=args.temperature,
	max_tokens=args.max_completion_tokens,
	)
	for data in dataset
	]
	tasks = limit_concurrency(tasks, args.concurrency)

	async for response in tqdm(
	asyncio.as_completed(tasks), total=len(tasks), desc="Running"
	):
	response = await response
	result = response.choices[0].message.content
	# print result, or save to file
	print(result)


	if __name__ == "__main__":
	args = Argument().parse_args()

	os.environ["SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN"] = "1"
	server_process, port = launch_server_cmd(
	(
	"python3 -m sglang.launch_server "
	"--tp 8 "
	"--dp 1 "
	f"--model-path {args.model_name_or_path} "
	# f"--served-model-name {args.model} "
	# "--reasoning-parser qwen3 "
	"--context-length 16000 "
	# """--json-model-override-args {"rope_scaling":{"rope_type":"yarn","factor":4.0,"original_max_position_embeddings":32768}} """
	"--host 0.0.0.0 "
	"--port 33333 "
	"--log-level warning "
	)
	)
	wait_for_server(f"http://localhost:{port}")

	asyncio.run(main(args, base_url=f"http://localhost:{port}/v1", api_key="sglang"))

	terminate_process(server_process)

Xet Storage Details

Size:: 2.5 kB
Xet hash:: fb937afe77144b3ca5cc699e69eca2a191699b3c067eb54e879d08c5c156e11a

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.