import os import gradio as gr from dotenv import load_dotenv from huggingface_hub import InferenceClient # Load environment variables load_dotenv() HF_TOKEN = os.getenv("HF_TOKEN") if not HF_TOKEN: raise ValueError("Please set HF_TOKEN environment variable") # Available models AVAILABLE_MODELS = [ "HuggingFaceH4/zephyr-7b-beta", "meta-llama/Llama-3.1-8B-Instruct", "microsoft/Phi-3.5-mini-instruct", "Qwen/QwQ-32B", ] # Initialize inference client inference_client = InferenceClient(token=HF_TOKEN) def get_model_response(prompt, model_name, temperature_value, do_sample): """Get response from a Hugging Face model.""" try: # Build kwargs dynamically generation_args = { "prompt": prompt, "model": model_name, "max_new_tokens": 100, "do_sample": do_sample, "return_full_text": False } # Only include temperature if sampling is enabled if do_sample and temperature_value > 0: generation_args["temperature"] = temperature_value response = inference_client.text_generation(**generation_args) return response except Exception as e: return f"Error: {str(e)}" def compare_models(prompt, model1, model2, temp1, temp2, do_sample1, do_sample2): """Compare outputs from two selected models.""" if not prompt.strip(): return ( [{"role": "user", "content": prompt}, {"role": "assistant", "content": "Please enter a prompt"}], [{"role": "user", "content": prompt}, {"role": "assistant", "content": "Please enter a prompt"}], gr.update(interactive=True) ) response1 = get_model_response(prompt, model1, temp1, do_sample1) response2 = get_model_response(prompt, model2, temp2, do_sample2) # Format responses for chatbot display chat1 = [{"role": "user", "content": prompt}, {"role": "assistant", "content": response1}] chat2 = [{"role": "user", "content": prompt}, {"role": "assistant", "content": response2}] return chat1, chat2, gr.update(interactive=True) # Update temperature slider interactivity based on sampling checkbox def update_slider_state(enabled): return [ gr.update(interactive=enabled), gr.update( elem_classes=[] if enabled else ["disabled-slider"], value=0 if not enabled else None ) ] # Create the Gradio interface with gr.Blocks(css=""" .disabled-slider { opacity: 0.5; pointer-events: none; } """) as demo: gr.Markdown("# LLM Comparison Tool") gr.Markdown("Compare outputs from different Hugging Face models side by side.") with gr.Row(): prompt = gr.Textbox( label="Enter your prompt", placeholder="Type your prompt here...", lines=3 ) with gr.Row(): submit_btn = gr.Button("Generate Responses") with gr.Row(): with gr.Column(): model1_dropdown = gr.Dropdown( choices=AVAILABLE_MODELS, value=AVAILABLE_MODELS[0], label="Select Model 1" ) do_sample1 = gr.Checkbox( label="Enable sampling (random outputs)", value=False ) temp1 = gr.Slider( label="Temperature (Higher = more creative, lower = more predictable)", minimum=0, maximum=1, step=0.1, value=0.0, interactive=False, elem_classes=["disabled-slider"] ) chatbot1 = gr.Chatbot( label="Model 1 Output", show_label=True, height=300, type="messages" ) with gr.Column(): model2_dropdown = gr.Dropdown( choices=AVAILABLE_MODELS, value=AVAILABLE_MODELS[1], label="Select Model 2" ) do_sample2 = gr.Checkbox( label="Enable sampling (random outputs)", value=False ) temp2 = gr.Slider( label="Temperature (Higher = more creative, lower = more predictable)", minimum=0, maximum=1, step=0.1, value=0.0, interactive=False, elem_classes=["disabled-slider"] ) chatbot2 = gr.Chatbot( label="Model 2 Output", show_label=True, height=300, type="messages" ) def start_loading(): return gr.update(interactive=False) # Handle form submission submit_btn.click( fn=start_loading, inputs=None, outputs=submit_btn, queue=False ).then( fn=compare_models, inputs=[prompt, model1_dropdown, model2_dropdown, temp1, temp2, do_sample1, do_sample2], outputs=[chatbot1, chatbot2, submit_btn] ) do_sample1.change( fn=update_slider_state, inputs=[do_sample1], outputs=[temp1, temp1] ) do_sample2.change( fn=update_slider_state, inputs=[do_sample2], outputs=[temp2, temp2] ) if __name__ == "__main__": demo.launch() # demo.launch(share=True)