Spaces:
Sleeping
Sleeping
| from huggingface_hub import InferenceClient | |
| api_key=os.getenv("HF_TOKEN") | |
| client = InferenceClient(api_key=api_key) | |
| messages = [ | |
| { "role": "user", "content": "Tell me a story" } | |
| ] | |
| stream = client.chat.completions.create( | |
| model="HuggingFaceH4/zephyr-7b-beta", | |
| messages=messages, | |
| temperature=0.5, | |
| max_tokens=2048, | |
| top_p=0.7, | |
| stream=True | |
| ) | |
| for chunk in stream: | |
| print(chunk.choices[0].delta.content) |