Spaces:
Running
Running
File size: 4,021 Bytes
92cf8da f96a832 733d870 f96a832 733d870 dc93b56 b7dda25 07ba57c 9267bc0 733d870 92cf8da 733d870 92cf8da 733d870 f96a832 733d870 f96a832 733d870 92cf8da 733d870 92cf8da 733d870 92cf8da 733d870 92cf8da 733d870 92cf8da 733d870 92cf8da 733d870 92cf8da 733d870 92cf8da 733d870 92cf8da 733d870 92cf8da 733d870 967bbbf 92cf8da 733d870 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
import gradio as gr
from huggingface_hub import InferenceClient, login
import os
from typing import List, Tuple, Optional
# Available models for selection
AVAILABLE_MODELS = [
"Qwen/Qwen3-Coder-480B-A35B-Instruct",
"mistralai/Mixtral-8x7B-Instruct-v0.1",
"Orion-zhen/Qwen2.5-7B-Instruct-Uncensored",
"jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0",
"DavidAU/Gemma-The-Writer-N-Restless-Quill-10B-Uncensored",
"VIDraft/Gemma-3-R1984-12B",
]
def initialize_client(token: str, model: str) -> Optional[InferenceClient]:
"""Initialize the InferenceClient with the provided token and model."""
try:
login(token)
return InferenceClient(model=model)
except Exception as e:
return gr.Error(f"Failed to initialize client: {str(e)}")
def respond(
message: str,
history: List[Tuple[str, str]],
system_message: str,
max_tokens: int,
temperature: float,
top_p: float,
model: str,
token: str,
) -> str:
"""
Generate a response using the Hugging Face Inference API.
Docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
if not token:
raise gr.Error("Please provide a valid Hugging Face API token.")
if not message.strip():
raise gr.Error("Input message cannot be empty.")
client = initialize_client(token, model)
if isinstance(client, gr.Error):
raise client
# Build message history
messages = [{"role": "system", "content": system_message}]
for user_msg, assistant_msg in history:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
messages.append({"role": "user", "content": message})
# Generate response
response = ""
try:
for chunk in client.chat_completion(
messages=messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = chunk.choices[0].delta.content or ""
response += token
yield response
except Exception as e:
raise gr.Error(f"Error during inference: {str(e)}")
# Load token from environment variable for security
HF_TOKEN = os.getenv("HF_TOKEN", "")
# Create Gradio interface
demo = gr.ChatInterface(
fn=respond,
additional_inputs=[
gr.Textbox(
value="You are a friendly and helpful Chatbot.",
label="System Message",
placeholder="Enter the system prompt here...",
),
gr.Slider(
minimum=1,
maximum=2048,
value=512,
step=1,
label="Max New Tokens",
info="Controls the maximum length of the generated response.",
),
gr.Slider(
minimum=0.1,
maximum=4.0,
value=0.7,
step=0.1,
label="Temperature",
info="Controls randomness (higher = more creative, lower = more deterministic).",
),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (Nucleus Sampling)",
info="Controls diversity via nucleus sampling.",
),
gr.Dropdown(
choices=AVAILABLE_MODELS,
value=AVAILABLE_MODELS[0],
label="Model Selection",
info="Select the model to use for inference.",
),
gr.Textbox(
value=HF_TOKEN,
label="Hugging Face API Token",
type="password",
placeholder="Enter your HF API token (or set HF_TOKEN env variable)",
),
],
title="Chatbot with Hugging Face Inference API",
description="Interact with a chatbot powered by Hugging Face models. Provide your API token and customize settings.",
theme="base",
)
if __name__ == "__main__":
demo.launch() |