Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from huggingface_hub import InferenceClient, login | |
| import os | |
| from typing import List, Tuple, Optional | |
| # Available models for selection | |
| AVAILABLE_MODELS = [ | |
| "Qwen/Qwen3-Coder-480B-A35B-Instruct", | |
| "mistralai/Mixtral-8x7B-Instruct-v0.1", | |
| "Orion-zhen/Qwen2.5-7B-Instruct-Uncensored", | |
| "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0", | |
| "DavidAU/Gemma-The-Writer-N-Restless-Quill-10B-Uncensored", | |
| "VIDraft/Gemma-3-R1984-12B", | |
| ] | |
| def initialize_client(token: str, model: str) -> Optional[InferenceClient]: | |
| """Initialize the InferenceClient with the provided token and model.""" | |
| try: | |
| login(token) | |
| return InferenceClient(model=model) | |
| except Exception as e: | |
| return gr.Error(f"Failed to initialize client: {str(e)}") | |
| def respond( | |
| message: str, | |
| history: List[Tuple[str, str]], | |
| system_message: str, | |
| max_tokens: int, | |
| temperature: float, | |
| top_p: float, | |
| model: str, | |
| token: str, | |
| ) -> str: | |
| """ | |
| Generate a response using the Hugging Face Inference API. | |
| Docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference | |
| """ | |
| if not token: | |
| raise gr.Error("Please provide a valid Hugging Face API token.") | |
| if not message.strip(): | |
| raise gr.Error("Input message cannot be empty.") | |
| client = initialize_client(token, model) | |
| if isinstance(client, gr.Error): | |
| raise client | |
| # Build message history | |
| messages = [{"role": "system", "content": system_message}] | |
| for user_msg, assistant_msg in history: | |
| if user_msg: | |
| messages.append({"role": "user", "content": user_msg}) | |
| if assistant_msg: | |
| messages.append({"role": "assistant", "content": assistant_msg}) | |
| messages.append({"role": "user", "content": message}) | |
| # Generate response | |
| response = "" | |
| try: | |
| for chunk in client.chat_completion( | |
| messages=messages, | |
| max_tokens=max_tokens, | |
| stream=True, | |
| temperature=temperature, | |
| top_p=top_p, | |
| ): | |
| token = chunk.choices[0].delta.content or "" | |
| response += token | |
| yield response | |
| except Exception as e: | |
| raise gr.Error(f"Error during inference: {str(e)}") | |
| # Load token from environment variable for security | |
| HF_TOKEN = os.getenv("HF_TOKEN", "") | |
| # Create Gradio interface | |
| demo = gr.ChatInterface( | |
| fn=respond, | |
| additional_inputs=[ | |
| gr.Textbox( | |
| value="You are a friendly and helpful Chatbot.", | |
| label="System Message", | |
| placeholder="Enter the system prompt here...", | |
| ), | |
| gr.Slider( | |
| minimum=1, | |
| maximum=2048, | |
| value=512, | |
| step=1, | |
| label="Max New Tokens", | |
| info="Controls the maximum length of the generated response.", | |
| ), | |
| gr.Slider( | |
| minimum=0.1, | |
| maximum=4.0, | |
| value=0.7, | |
| step=0.1, | |
| label="Temperature", | |
| info="Controls randomness (higher = more creative, lower = more deterministic).", | |
| ), | |
| gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.95, | |
| step=0.05, | |
| label="Top-p (Nucleus Sampling)", | |
| info="Controls diversity via nucleus sampling.", | |
| ), | |
| gr.Dropdown( | |
| choices=AVAILABLE_MODELS, | |
| value=AVAILABLE_MODELS[0], | |
| label="Model Selection", | |
| info="Select the model to use for inference.", | |
| ), | |
| gr.Textbox( | |
| value=HF_TOKEN, | |
| label="Hugging Face API Token", | |
| type="password", | |
| placeholder="Enter your HF API token (or set HF_TOKEN env variable)", | |
| ), | |
| ], | |
| title="Chatbot with Hugging Face Inference API", | |
| description="Interact with a chatbot powered by Hugging Face models. Provide your API token and customize settings.", | |
| theme="base", | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |