FasterDFlash
/

Hanrui

Model card Files Files and versions

Hanrui / sglang /scripts /playground /disaggregation /cli-so.py

Lekr0's picture

Add files using upload-large-folder tool

61ba51e verified 28 days ago

history blame contribute delete

1.07 kB

	import json

	import requests

	port = 8000

	json_schema = json.dumps(
	{
	"type": "object",
	"properties": {
	"name": {"type": "string", "pattern": "^[\\w]+$"},
	"population": {"type": "integer"},
	},
	"required": ["name", "population"],
	}
	)

	# JSON
	response = requests.post(
	f"http://localhost:{port}/generate",
	json={
	"text": "Here is the information of the capital of France in the JSON format.\n",
	"sampling_params": {
	"temperature": 0,
	"max_new_tokens": 64,
	"json_schema": json_schema,
	},
	},
	)

	print(response.json())


	# python3 -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --trust-remote-code --disaggregation-mode prefill --tp 2 --disaggregation-ib-device mlx5_roce0,mlx5_roce1 --speculative-algorithm EAGLE --speculative-draft-model-path lmsys/sglang-EAGLE-llama2-chat-7B --speculative-num-steps 3 --speculative-eagle-topk 4 --speculative-num-draft-tokens 16 --cuda-graph-max-bs 8 --host 127.0.0.1 --port 8100