Spaces:

Nullpointer-KK
/

Demo_OS_BaseLLM

Sleeping

App Files Files Community

Demo_OS_BaseLLM / app.py

Nullpointer-KK

Update app.py

1b56e81 verified 8 months ago

raw

history blame

2.77 kB

	import gradio as gr
	from huggingface_hub import InferenceClient

	# Available open-source base models
	MODEL_CHOICES = {
	"Mistral 7B Instruct (mistralai/Mistral-7B-Instruct-v0.2)": "mistralai/Mistral-7B-Instruct-v0.2",
	"Falcon 7B Instruct (tiiuae/falcon-7b-instruct)": "tiiuae/falcon-7b-instruct",
	"LLaMA-2 7B Chat (meta-llama/Llama-2-7b-chat-hf)": "meta-llama/Llama-2-7b-chat-hf",
	}


	def complete_text(prompt, max_tokens, temperature, top_p, model_choice, hf_token: gr.OAuthToken):
	"""
	Get a plain text completion from a Hugging Face-hosted open-source LLM.
	Streams output token-by-token.
	"""
	if not hf_token or not hf_token.token:
	yield "⚠️ Please log in with your Hugging Face account (needed for gated models like LLaMA-2)."
	return

	model_id = MODEL_CHOICES[model_choice]
	client = InferenceClient(model=model_id, token=hf_token.token)

	response_text = ""
	stream = client.text_generation(
	prompt,
	max_new_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	stream=True,
	repetition_penalty=1.0,
	)

	for chunk in stream:
	response_text += chunk
	yield response_text


	with gr.Blocks() as demo:
	gr.Markdown("## ✍️ Text Completion Demo with Open-Source Base LLMs")
	gr.Markdown(
	"Pick a model hosted on Hugging Face, enter a prompt, adjust decoding parameters, "
	"and watch the model complete your text."
	)

	with gr.Row():
	with gr.Column(scale=2):
	prompt = gr.Textbox(
	label="Prompt",
	placeholder="Type the beginning of your text...",
	lines=4,
	)
	max_tokens = gr.Slider(
	minimum=1, maximum=1024, value=100, step=1, label="Max tokens"
	)
	temperature = gr.Slider(
	minimum=0.0, maximum=2.0, value=0.7, step=0.1, label="Temperature"
	)
	top_p = gr.Slider(
	minimum=0.1, maximum=1.0, value=1.0, step=0.05, label="Top-p"
	)
	model_choice = gr.Dropdown(
	choices=list(MODEL_CHOICES.keys()),
	value=list(MODEL_CHOICES.keys())[0],
	label="Choose a model",
	)
	submit = gr.Button("Generate Completion")
	with gr.Column(scale=3):
	output = gr.Textbox(
	label="Generated Completion",
	lines=15,
	)

	# ✅ Pass gr.OAuthToken as an input type (no manual instantiation)
	submit.click(
	fn=complete_text,
	inputs=[prompt, max_tokens, temperature, top_p, model_choice, gr.OAuthToken],
	outputs=output,
	)

	if __name__ == "__main__":
	demo.launch()