Spaces:

K00B404
/

ZephyrChat

Sleeping

Create app.py

9657faa verified over 1 year ago

414 Bytes

	from huggingface_hub import InferenceClient
	api_key=os.getenv("HF_TOKEN")
	client = InferenceClient(api_key=api_key)

	messages = [
	{ "role": "user", "content": "Tell me a story" }
	]

	stream = client.chat.completions.create(
	model="HuggingFaceH4/zephyr-7b-beta",
	messages=messages,
	temperature=0.5,
	max_tokens=2048,
	top_p=0.7,
	stream=True
	)

	for chunk in stream:
	print(chunk.choices[0].delta.content)