Spaces:

ewingreen
/

first-chatbot

Sleeping

first-chatbot / app.py

Added debug=True

debe0fe verified 9 months ago

1.51 kB

	import gradio as gr
	from huggingface_hub import InferenceClient

	# this client will handle making requests to the model to generate responses
	client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

	def respond(message, history):

	system_message = "You are a poet chatbot. You always respond with a rhyme!"

	# initialize a list of dictionaries to store the messages
	messages = [{"role": "system",
	"content": system_message}]

	# add all previous messages to the messages list
	if history:
	messages.extend(history)

	# add the current user’s message to the messages list
	messages.append({"role": "user", "content": message})

	# makes the chat completion API call,
	# sending the messages and other parameters to the model
	# implements streaming, where one word/token appears at a time
	response = ""

	# iterate through each message in the method
	for message in client.chat_completion(
	messages,
	max_tokens=100,
	temperature=0.9,
	stream=True
	):
	# add the tokens to the output content
	token = message.choices[0].delta.content # capture the most recent toke
	response += token # Add it to the response
	yield response # yield the response:

	# extract and return the chatbot’s response
	#return response['choices'][0]['message']['content'].strip()

	chatbot = gr.ChatInterface(respond, type="messages", theme='NoCrypt/miku')

	chatbot.launch(debug=True)