Spaces:

R-Kentaren
/

TextGen

Sleeping

App Files Files Community

TextGen / app.py

R-Kentaren

Update app.py

9267bc0 verified 5 months ago

raw

history blame contribute delete

4.02 kB

	import gradio as gr
	from huggingface_hub import InferenceClient, login
	import os
	from typing import List, Tuple, Optional

	# Available models for selection
	AVAILABLE_MODELS = [
	"Qwen/Qwen3-Coder-480B-A35B-Instruct",
	"mistralai/Mixtral-8x7B-Instruct-v0.1",
	"Orion-zhen/Qwen2.5-7B-Instruct-Uncensored",
	"jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0",
	"DavidAU/Gemma-The-Writer-N-Restless-Quill-10B-Uncensored",
	"VIDraft/Gemma-3-R1984-12B",
	]

	def initialize_client(token: str, model: str) -> Optional[InferenceClient]:
	"""Initialize the InferenceClient with the provided token and model."""
	try:
	login(token)
	return InferenceClient(model=model)
	except Exception as e:
	return gr.Error(f"Failed to initialize client: {str(e)}")

	def respond(
	message: str,
	history: List[Tuple[str, str]],
	system_message: str,
	max_tokens: int,
	temperature: float,
	top_p: float,
	model: str,
	token: str,
	) -> str:
	"""
	Generate a response using the Hugging Face Inference API.
	Docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
	"""
	if not token:
	raise gr.Error("Please provide a valid Hugging Face API token.")
	if not message.strip():
	raise gr.Error("Input message cannot be empty.")

	client = initialize_client(token, model)
	if isinstance(client, gr.Error):
	raise client

	# Build message history
	messages = [{"role": "system", "content": system_message}]
	for user_msg, assistant_msg in history:
	if user_msg:
	messages.append({"role": "user", "content": user_msg})
	if assistant_msg:
	messages.append({"role": "assistant", "content": assistant_msg})
	messages.append({"role": "user", "content": message})

	# Generate response
	response = ""
	try:
	for chunk in client.chat_completion(
	messages=messages,
	max_tokens=max_tokens,
	stream=True,
	temperature=temperature,
	top_p=top_p,
	):
	token = chunk.choices[0].delta.content or ""
	response += token
	yield response
	except Exception as e:
	raise gr.Error(f"Error during inference: {str(e)}")

	# Load token from environment variable for security
	HF_TOKEN = os.getenv("HF_TOKEN", "")

	# Create Gradio interface
	demo = gr.ChatInterface(
	fn=respond,
	additional_inputs=[
	gr.Textbox(
	value="You are a friendly and helpful Chatbot.",
	label="System Message",
	placeholder="Enter the system prompt here...",
	),
	gr.Slider(
	minimum=1,
	maximum=2048,
	value=512,
	step=1,
	label="Max New Tokens",
	info="Controls the maximum length of the generated response.",
	),
	gr.Slider(
	minimum=0.1,
	maximum=4.0,
	value=0.7,
	step=0.1,
	label="Temperature",
	info="Controls randomness (higher = more creative, lower = more deterministic).",
	),
	gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-p (Nucleus Sampling)",
	info="Controls diversity via nucleus sampling.",
	),
	gr.Dropdown(
	choices=AVAILABLE_MODELS,
	value=AVAILABLE_MODELS[0],
	label="Model Selection",
	info="Select the model to use for inference.",
	),
	gr.Textbox(
	value=HF_TOKEN,
	label="Hugging Face API Token",
	type="password",
	placeholder="Enter your HF API token (or set HF_TOKEN env variable)",
	),
	],
	title="Chatbot with Hugging Face Inference API",
	description="Interact with a chatbot powered by Hugging Face models. Provide your API token and customize settings.",
	theme="base",

	)

	if __name__ == "__main__":
	demo.launch()