FasterDFlash
/

Hanrui

Model card Files Files and versions

Hanrui / sglang /examples /frontend_language /quick_start /local_example_chat.py

Lekr0's picture

Add files using upload-large-folder tool

61ba51e verified 28 days ago

history blame contribute delete

1.77 kB

	"""
	Usage:
	python3 local_example_chat.py
	"""

	import sglang as sgl


	@sgl.function
	def multi_turn_question(s, question_1, question_2):
	s += sgl.user(question_1)
	s += sgl.assistant(sgl.gen("answer_1", max_tokens=256))
	s += sgl.user(question_2)
	s += sgl.assistant(sgl.gen("answer_2", max_tokens=256))


	def single():
	state = multi_turn_question.run(
	question_1="What is the capital of the United States?",
	question_2="List two local attractions.",
	)

	for m in state.messages():
	print(m["role"], ":", m["content"])

	print("\n-- answer_1 --\n", state["answer_1"])


	def stream():
	state = multi_turn_question.run(
	question_1="What is the capital of the United States?",
	question_2="List two local attractions.",
	stream=True,
	)

	for out in state.text_iter():
	print(out, end="", flush=True)
	print()


	def batch():
	states = multi_turn_question.run_batch(
	[
	{
	"question_1": "What is the capital of the United States?",
	"question_2": "List two local attractions.",
	},
	{
	"question_1": "What is the capital of France?",
	"question_2": "What is the population of this city?",
	},
	]
	)

	for s in states:
	print(s.messages())


	if __name__ == "__main__":
	runtime = sgl.Runtime(model_path="meta-llama/Llama-2-7b-chat-hf")
	sgl.set_default_backend(runtime)

	# Run a single request
	print("\n========== single ==========\n")
	single()

	# Stream output
	print("\n========== stream ==========\n")
	stream()

	# Run a batch of requests
	print("\n========== batch ==========\n")
	batch()

	runtime.shutdown()