Spaces:

francismurray
/

llm-compare

Running

App Files Files Community

llm-compare / app.py

francismurray

Initial commit: LLM Comparison App

aac64a6 7 months ago

raw

history blame

5.41 kB

	import os
	import gradio as gr
	from dotenv import load_dotenv
	from huggingface_hub import InferenceClient

	# Load environment variables
	load_dotenv()
	HF_TOKEN = os.getenv("HF_TOKEN")

	if not HF_TOKEN:
	raise ValueError("Please set HF_TOKEN environment variable")

	# Available models
	AVAILABLE_MODELS = [
	"HuggingFaceH4/zephyr-7b-beta",
	"meta-llama/Llama-3.1-8B-Instruct",
	"microsoft/Phi-3.5-mini-instruct",
	"Qwen/QwQ-32B",
	]

	# Initialize inference client
	inference_client = InferenceClient(token=HF_TOKEN)

	def get_model_response(prompt, model_name, temperature_value, do_sample):
	"""Get response from a Hugging Face model."""
	try:
	# Build kwargs dynamically
	generation_args = {
	"prompt": prompt,
	"model": model_name,
	"max_new_tokens": 100,
	"do_sample": do_sample,
	"return_full_text": False
	}

	# Only include temperature if sampling is enabled
	if do_sample and temperature_value > 0:
	generation_args["temperature"] = temperature_value

	response = inference_client.text_generation(**generation_args)
	return response

	except Exception as e:
	return f"Error: {str(e)}"

	def compare_models(prompt, model1, model2, temp1, temp2, do_sample1, do_sample2):
	"""Compare outputs from two selected models."""
	if not prompt.strip():
	return (
	[{"role": "user", "content": prompt}, {"role": "assistant", "content": "Please enter a prompt"}],
	[{"role": "user", "content": prompt}, {"role": "assistant", "content": "Please enter a prompt"}],
	gr.update(interactive=True)
	)

	response1 = get_model_response(prompt, model1, temp1, do_sample1)
	response2 = get_model_response(prompt, model2, temp2, do_sample2)

	# Format responses for chatbot display
	chat1 = [{"role": "user", "content": prompt}, {"role": "assistant", "content": response1}]
	chat2 = [{"role": "user", "content": prompt}, {"role": "assistant", "content": response2}]


	return chat1, chat2, gr.update(interactive=True)


	# Update temperature slider interactivity based on sampling checkbox
	def update_slider_state(enabled):
	return [
	gr.update(interactive=enabled),
	gr.update(
	elem_classes=[] if enabled else ["disabled-slider"],
	value=0 if not enabled else None
	)
	]

	# Create the Gradio interface
	with gr.Blocks(css="""
	.disabled-slider { opacity: 0.5; pointer-events: none; }
	""") as demo:
	gr.Markdown("# LLM Comparison Tool")
	gr.Markdown("Compare outputs from different Hugging Face models side by side.")

	with gr.Row():
	prompt = gr.Textbox(
	label="Enter your prompt",
	placeholder="Type your prompt here...",
	lines=3
	)

	with gr.Row():
	submit_btn = gr.Button("Generate Responses")

	with gr.Row():
	with gr.Column():
	model1_dropdown = gr.Dropdown(
	choices=AVAILABLE_MODELS,
	value=AVAILABLE_MODELS[0],
	label="Select Model 1"
	)
	do_sample1 = gr.Checkbox(
	label="Enable sampling (random outputs)",
	value=False
	)
	temp1 = gr.Slider(
	label="Temperature (Higher = more creative, lower = more predictable)",
	minimum=0,
	maximum=1,
	step=0.1,
	value=0.0,
	interactive=False,
	elem_classes=["disabled-slider"]
	)
	chatbot1 = gr.Chatbot(
	label="Model 1 Output",
	show_label=True,
	height=300,
	type="messages"
	)


	with gr.Column():
	model2_dropdown = gr.Dropdown(
	choices=AVAILABLE_MODELS,
	value=AVAILABLE_MODELS[1],
	label="Select Model 2"
	)
	do_sample2 = gr.Checkbox(
	label="Enable sampling (random outputs)",
	value=False
	)
	temp2 = gr.Slider(
	label="Temperature (Higher = more creative, lower = more predictable)",
	minimum=0,
	maximum=1,
	step=0.1,
	value=0.0,
	interactive=False,
	elem_classes=["disabled-slider"]
	)
	chatbot2 = gr.Chatbot(
	label="Model 2 Output",
	show_label=True,
	height=300,
	type="messages"
	)

	def start_loading():
	return gr.update(interactive=False)

	# Handle form submission
	submit_btn.click(
	fn=start_loading,
	inputs=None,
	outputs=submit_btn,
	queue=False
	).then(
	fn=compare_models,
	inputs=[prompt, model1_dropdown, model2_dropdown, temp1, temp2, do_sample1, do_sample2],
	outputs=[chatbot1, chatbot2, submit_btn]
	)



	do_sample1.change(
	fn=update_slider_state,
	inputs=[do_sample1],
	outputs=[temp1, temp1]
	)

	do_sample2.change(
	fn=update_slider_state,
	inputs=[do_sample2],
	outputs=[temp2, temp2]
	)

	if __name__ == "__main__":
	demo.launch()
	# demo.launch(share=True)