Spaces:

Saffn
/

GenZ-AI

Sleeping

App Files Files Community

GenZ-AI / app.py

Saffn

Update app.py

5e305bf verified 4 months ago

Raw

History Blame Contribute Delete

2.1 kB

	import gradio as gr
	from huggingface_hub import InferenceClient

	def respond(message, history, system_message, max_tokens, temperature, top_p, hf_token):
	"""
	Streaming responses from Hugging Face Inference API
	"""
	if not hf_token:
	yield "Error: Please provide your Hugging Face token."
	return

	client = InferenceClient(token=hf_token, model="openai/gpt-oss-20b")

	messages = [{"role": "system", "content": system_message}]
	messages.extend(history)
	messages.append({"role": "user", "content": message})

	response = ""
	for message_chunk in client.chat_completion(
	messages,
	max_tokens=max_tokens,
	stream=True,
	temperature=temperature,
	top_p=top_p,
	):
	choices = message_chunk.choices
	token = ""
	if len(choices) and choices[0].delta.content:
	token = choices[0].delta.content
	response += token
	yield response

	# --- Gradio UI ---
	with gr.Blocks() as demo:
	with gr.Sidebar():
	gr.Markdown("## Chatbot Settings")
	hf_token = gr.Textbox(
	placeholder="Paste your Hugging Face token here",
	label="Hugging Face Token",
	type="password"
	)
	system_message = gr.Textbox(
	value="You are a friendly Chatbot.",
	label="System message"
	)
	max_tokens = gr.Slider(
	minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"
	)
	temperature = gr.Slider(
	minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"
	)
	top_p = gr.Slider(
	minimum=0.1, maximum=1.0, value=0.95, step=0.05,
	label="Top-p (nucleus sampling)"
	)

	chatbot = gr.Chatbot()
	state = gr.State([]) # conversation history
	msg = gr.Textbox(label="Your message")

	msg.submit(
	respond,
	inputs=[msg, state, system_message, max_tokens, temperature, top_p, hf_token],
	outputs=[chatbot],
	show_progress=True
	)

	if __name__ == "__main__":
	demo.launch()