import gradio as gr from huggingface_hub import InferenceClient import os from typing import Generator, List, Dict, Any, Optional # Default model configurations DEFAULT_MODEL = "meta-llama/Llama-3.2-11B-Vision-Instruct" DEFAULT_SYSTEM_MESSAGE = "You are a helpful, harmless, and honest AI assistant." # Available models from Hugging Face AVAILABLE_MODELS = [ "meta-llama/Llama-3.2-11B-Vision-Instruct", "meta-llama/Llama-3.2-3B-Instruct", "meta-llama/Llama-3.1-8B-Instruct", "mistralai/Mistral-7B-Instruct-v0.3", "HuggingFaceH4/zephyr-7b-beta", "microsoft/Phi-3-mini-4k-instruct", "google/gemma-2-2b-it", "Qwen/Qwen2.5-7B-Instruct", ] def get_inference_client(token: Optional[str] = None) -> InferenceClient: """Create an InferenceClient with optional token.""" return InferenceClient(token=token or os.getenv("HF_TOKEN")) def format_messages( message: str, history: List[Dict[str, str]], system_message: str, image: Optional[Any] = None ) -> List[Dict[str, Any]]: """Format messages for the chat API.""" messages = [] # Add system message if present if system_message: messages.append({"role": "system", "content": system_message}) # Add conversation history for msg in history: messages.append({"role": msg["role"], "content": msg["content"]}) # Add current message with image if multimodal if image is not None: # For multimodal, content is a list content = [] if image is not None: content.append({"type": "image", "url": image}) content.append({"type": "text", "text": message}) messages.append({"role": "user", "content": content}) else: messages.append({"role": "user", "content": message}) return messages def chat_response( message: str, history: List[Dict[str, str]], model: str, system_message: str, temperature: float, max_tokens: int, top_p: float, token: str, image: Optional[Any] = None, ) -> Generator[str, None, None]: """ Generate streaming chat response from Hugging Face model. """ try: client = get_inference_client(token if token else None) # Format messages messages = format_messages(message, history, system_message, image) # Stream the response stream = client.chat.completions.create( model=model, messages=messages, temperature=temperature, max_tokens=max_tokens, top_p=top_p, stream=True, ) partial_message = "" for chunk in stream: if chunk.choices and chunk.choices[0].delta.content: partial_message += chunk.choices[0].delta.content yield partial_message except Exception as e: error_msg = f"Error: {str(e)}" if "401" in str(e): error_msg = "Authentication Error: Please provide a valid Hugging Face token." elif "404" in str(e): error_msg = f"Model '{model}' not found or not available." elif "429" in str(e): error_msg = "Rate limit exceeded. Please try again later." yield error_msg def clear_chat(): """Clear the chat history.""" return None def get_model_info(model: str) -> str: """Get information about the selected model.""" info = { "meta-llama/Llama-3.2-11B-Vision-Instruct": "Multimodal model supporting both text and images. Great for vision tasks.", "meta-llama/Llama-3.2-3B-Instruct": "Efficient small model good for quick responses and simpler tasks.", "meta-llama/Llama-3.1-8B-Instruct": "Balanced performance and quality. Good general-purpose assistant.", "mistralai/Mistral-7B-Instruct-v0.3": "Strong performance on reasoning and coding tasks.", "HuggingFaceH4/zephyr-7b-beta": "Fine-tuned for helpful and engaging conversations.", "microsoft/Phi-3-mini-4k-instruct": "Compact model with strong reasoning capabilities.", "google/gemma-2-2b-it": "Lightweight model from Google, good for everyday tasks.", "Qwen/Qwen2.5-7B-Instruct": "Strong multilingual capabilities and long context understanding.", } return info.get(model, "No information available.") def toggle_multimodal(multimodal: bool) -> Dict[str, Any]: """Toggle multimodal input visibility.""" return { "visible": multimodal, "value": None } # Custom theme for modern appearance custom_theme = gr.themes.Soft( primary_hue="indigo", secondary_hue="blue", neutral_hue="slate", font=gr.themes.GoogleFont("Inter"), text_size="md", spacing_size="md", radius_size="lg" ).set( button_primary_background_fill="*primary_600", button_primary_background_fill_hover="*primary_700", button_secondary_background_fill="*neutral_100", button_secondary_background_fill_hover="*neutral_200", block_title_text_weight="600", block_label_text_weight="500", ) # CSS for additional styling custom_css = """ .gradio-container { max-width: 1400px !important; } .chatbot-container { min-height: 500px; } .settings-accordion { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); } .built-with { text-align: center; padding: 10px; margin-top: 20px; color: #6b7280; font-size: 0.875rem; } .built-with a { color: #4f46e5; text-decoration: none; font-weight: 500; } .built-with a:hover { text-decoration: underline; } """ with gr.Blocks(theme=custom_theme, css=custom_css) as demo: # Header gr.Markdown(""" # đ¤ Hugging Face Chat Interface Chat with state-of-the-art language models from the Hugging Face Hub. Supports both text-only and multimodal (text + image) conversations. """) # Built with anycoder link gr.Markdown("""
""") with gr.Row(): # Main chat area with gr.Column(scale=3): chatbot = gr.Chatbot( label="Conversation", height=500, type="messages", show_copy_button=True, avatar_images=( "https://cdn-icons-png.flaticon.com/512/1077/1077114.png", # user "https://cdn-icons-png.flaticon.com/512/4712/4712035.png", # assistant ), ) with gr.Row(): with gr.Column(scale=10): msg_input = gr.MultimodalTextbox( label="Message", placeholder="Type your message here...", show_label=False, sources=["upload", "clipboard"], file_count="single", file_types=["image"], submit_btn=True, stop_btn=True, ) with gr.Column(scale=1, min_width=80): clear_btn = gr.ClearButton( components=[chatbot, msg_input], value="đī¸", size="lg", ) # Settings sidebar with gr.Column(scale=1, min_width=300): with gr.Accordion("âī¸ Model Settings", open=True): model_dropdown = gr.Dropdown( choices=AVAILABLE_MODELS, value=DEFAULT_MODEL, label="Model", info="Select a Hugging Face model" ) model_info = gr.Textbox( value=get_model_info(DEFAULT_MODEL), label="Model Info", interactive=False, lines=3, ) hf_token = gr.Textbox( label="Hugging Face Token", placeholder="hf_... (optional)", type="password", info="Required for some models. Get yours at huggingface.co/settings/tokens", ) system_msg = gr.Textbox( label="System Message", value=DEFAULT_SYSTEM_MESSAGE, lines=3, info="Instructions for the AI's behavior", ) with gr.Accordion("đ§ Generation Parameters", open=False): temperature = gr.Slider( minimum=0.0, maximum=2.0, value=0.7, step=0.1, label="Temperature", info="Higher = more creative, lower = more focused", ) max_tokens = gr.Slider( minimum=50, maximum=4096, value=1024, step=64, label="Max Tokens", info="Maximum response length", ) top_p = gr.Slider( minimum=0.0, maximum=1.0, value=0.9, step=0.05, label="Top-p (Nucleus Sampling)", info="Controls diversity of outputs", ) with gr.Accordion("âšī¸ About", open=False): gr.Markdown(""" ### How to use: 1. **Select a model** from the dropdown 2. **Type your message** in the chat box 3. **Upload images** (for multimodal models) using the paperclip icon 4. **Adjust parameters** to control the response style ### Tips: - Use **temperature** to control creativity - **Vision models** (like Llama 3.2 11B) support image understanding - Add a **Hugging Face token** for better rate limits ### Privacy: Messages are sent to Hugging Face's inference API. Your token is only used for authentication and never stored. """) # Event handlers def user_message_handler(message: Dict[str, Any], history: List[Dict[str, str]]): """Handle user message submission.""" text = message.get("text", "") files = message.get("files", []) image = files[0] if files else None # Add user message to history history = history + [{"role": "user", "content": text}] return "", history, image def bot_response_handler( history: List[Dict[str, str]], model: str, system_msg: str, temperature: float, max_tokens: int, top_p: float, token: str, image: Any, ): """Generate bot response.""" if not history: return history # Get the last user message last_message = "" for msg in reversed(history): if msg["role"] == "user": last_message = msg["content"] break if not last_message: return history # Generate response full_response = "" for partial in chat_response( message=last_message, history=history[:-1], # Exclude the last user message we just added model=model, system_message=system_msg, temperature=temperature, max_tokens=max_tokens, top_p=top_p, token=token, image=image, ): full_response = partial # Update the last assistant message or add new one if history and history[-1]["role"] == "assistant": history[-1]["content"] = full_response else: history = history + [{"role": "assistant", "content": full_response}] yield history # Update model info when model changes model_dropdown.change( fn=get_model_info, inputs=model_dropdown, outputs=model_info, api_visibility="private", ) # Chat submission msg_input.submit( fn=user_message_handler, inputs=[msg_input, chatbot], outputs=[msg_input, chatbot, gr.State()], queue=False, ).then( fn=bot_response_handler, inputs=[ chatbot, model_dropdown, system_msg, temperature, max_tokens, top_p, hf_token, gr.State(), ], outputs=chatbot, api_visibility="public", ) # Example conversations gr.Examples( examples=[ [{"text": "Explain quantum computing in simple terms", "files": []}], [{"text": "Write a Python function to calculate fibonacci numbers", "files": []}], [{"text": "What can you see in this image?", "files": ["https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"]}], [{"text": "Help me brainstorm ideas for a science fiction story", "files": []}], ], inputs=msg_input, label="Example Prompts (click to try)", ) # Launch with Gradio 6 syntax - all parameters in launch() demo.launch( theme=custom_theme, css=custom_css, footer_links=[ {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"}, "gradio", "api", ], show_error=True, pwa=True, favicon_path="https://huggingface.co/front/assets/huggingface_logo-noborder.svg", )