FasterDFlash
/

Hanrui

Model card Files Files and versions

Hanrui / sglang /examples /frontend_language /quick_start /local_example_llava_next.py

Lekr0's picture

Add files using upload-large-folder tool

61ba51e verified about 1 month ago

history blame contribute delete

2.01 kB

	"""
	Usage: python3 local_example_llava_next.py
	"""

	import sglang as sgl
	from sglang.lang.chat_template import get_chat_template


	@sgl.function
	def image_qa(s, image_path, question):
	s += sgl.user(sgl.image(image_path) + question)
	s += sgl.assistant(sgl.gen("answer"))


	def single():
	state = image_qa.run(
	image_path="images/cat.jpeg", question="What is this?", max_new_tokens=128
	)
	print(state["answer"], "\n")


	def stream():
	state = image_qa.run(
	image_path="images/cat.jpeg",
	question="What is this?",
	max_new_tokens=64,
	stream=True,
	)

	for out in state.text_iter("answer"):
	print(out, end="", flush=True)
	print()


	def batch():
	states = image_qa.run_batch(
	[
	{"image_path": "images/cat.jpeg", "question": "What is this?"},
	{"image_path": "images/dog.jpeg", "question": "What is this?"},
	],
	max_new_tokens=128,
	)
	for s in states:
	print(s["answer"], "\n")


	if __name__ == "__main__":
	import multiprocessing as mp

	mp.set_start_method("spawn", force=True)

	runtime = sgl.Runtime(model_path="lmms-lab/llama3-llava-next-8b")
	runtime.endpoint.chat_template = get_chat_template("llama-3-instruct-llava")

	# Or you can use the 72B model
	# runtime = sgl.Runtime(model_path="lmms-lab/llava-next-72b", tp_size=8)
	# runtime.endpoint.chat_template = get_chat_template("chatml-llava")

	sgl.set_default_backend(runtime)
	print(f"chat template: {runtime.endpoint.chat_template.name}")

	# Or you can use API models
	# sgl.set_default_backend(sgl.OpenAI("gpt-4-vision-preview"))
	# sgl.set_default_backend(sgl.VertexAI("gemini-pro-vision"))

	# Run a single request
	print("\n========== single ==========\n")
	single()

	# Stream output
	print("\n========== stream ==========\n")
	stream()

	# Run a batch of requests
	print("\n========== batch ==========\n")
	batch()

	runtime.shutdown()