TextGen / app.py
R-Kentaren's picture
Update app.py
9267bc0 verified
import gradio as gr
from huggingface_hub import InferenceClient, login
import os
from typing import List, Tuple, Optional
# Available models for selection
AVAILABLE_MODELS = [
"Qwen/Qwen3-Coder-480B-A35B-Instruct",
"mistralai/Mixtral-8x7B-Instruct-v0.1",
"Orion-zhen/Qwen2.5-7B-Instruct-Uncensored",
"jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0",
"DavidAU/Gemma-The-Writer-N-Restless-Quill-10B-Uncensored",
"VIDraft/Gemma-3-R1984-12B",
]
def initialize_client(token: str, model: str) -> Optional[InferenceClient]:
"""Initialize the InferenceClient with the provided token and model."""
try:
login(token)
return InferenceClient(model=model)
except Exception as e:
return gr.Error(f"Failed to initialize client: {str(e)}")
def respond(
message: str,
history: List[Tuple[str, str]],
system_message: str,
max_tokens: int,
temperature: float,
top_p: float,
model: str,
token: str,
) -> str:
"""
Generate a response using the Hugging Face Inference API.
Docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
if not token:
raise gr.Error("Please provide a valid Hugging Face API token.")
if not message.strip():
raise gr.Error("Input message cannot be empty.")
client = initialize_client(token, model)
if isinstance(client, gr.Error):
raise client
# Build message history
messages = [{"role": "system", "content": system_message}]
for user_msg, assistant_msg in history:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
messages.append({"role": "user", "content": message})
# Generate response
response = ""
try:
for chunk in client.chat_completion(
messages=messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = chunk.choices[0].delta.content or ""
response += token
yield response
except Exception as e:
raise gr.Error(f"Error during inference: {str(e)}")
# Load token from environment variable for security
HF_TOKEN = os.getenv("HF_TOKEN", "")
# Create Gradio interface
demo = gr.ChatInterface(
fn=respond,
additional_inputs=[
gr.Textbox(
value="You are a friendly and helpful Chatbot.",
label="System Message",
placeholder="Enter the system prompt here...",
),
gr.Slider(
minimum=1,
maximum=2048,
value=512,
step=1,
label="Max New Tokens",
info="Controls the maximum length of the generated response.",
),
gr.Slider(
minimum=0.1,
maximum=4.0,
value=0.7,
step=0.1,
label="Temperature",
info="Controls randomness (higher = more creative, lower = more deterministic).",
),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (Nucleus Sampling)",
info="Controls diversity via nucleus sampling.",
),
gr.Dropdown(
choices=AVAILABLE_MODELS,
value=AVAILABLE_MODELS[0],
label="Model Selection",
info="Select the model to use for inference.",
),
gr.Textbox(
value=HF_TOKEN,
label="Hugging Face API Token",
type="password",
placeholder="Enter your HF API token (or set HF_TOKEN env variable)",
),
],
title="Chatbot with Hugging Face Inference API",
description="Interact with a chatbot powered by Hugging Face models. Provide your API token and customize settings.",
theme="base",
)
if __name__ == "__main__":
demo.launch()