File size: 4,021 Bytes
92cf8da
f96a832
733d870
 
f96a832
733d870
 
 
 
dc93b56
b7dda25
07ba57c
9267bc0
733d870
92cf8da
733d870
 
 
 
 
 
 
92cf8da
 
733d870
 
 
 
 
 
 
 
 
f96a832
733d870
 
f96a832
733d870
 
 
 
92cf8da
733d870
 
 
92cf8da
733d870
 
 
 
 
 
 
 
92cf8da
733d870
92cf8da
733d870
 
 
 
 
 
 
 
 
 
 
 
 
92cf8da
733d870
 
92cf8da
733d870
92cf8da
733d870
92cf8da
733d870
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92cf8da
 
 
 
 
733d870
 
 
 
 
 
 
 
 
 
 
 
 
 
92cf8da
 
733d870
 
967bbbf
 
92cf8da
 
 
733d870
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import gradio as gr
from huggingface_hub import InferenceClient, login
import os
from typing import List, Tuple, Optional

# Available models for selection
AVAILABLE_MODELS = [
    "Qwen/Qwen3-Coder-480B-A35B-Instruct",
    "mistralai/Mixtral-8x7B-Instruct-v0.1",
    "Orion-zhen/Qwen2.5-7B-Instruct-Uncensored",
    "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0",
    "DavidAU/Gemma-The-Writer-N-Restless-Quill-10B-Uncensored",
    "VIDraft/Gemma-3-R1984-12B",
]

def initialize_client(token: str, model: str) -> Optional[InferenceClient]:
    """Initialize the InferenceClient with the provided token and model."""
    try:
        login(token)
        return InferenceClient(model=model)
    except Exception as e:
        return gr.Error(f"Failed to initialize client: {str(e)}")

def respond(
    message: str,
    history: List[Tuple[str, str]],
    system_message: str,
    max_tokens: int,
    temperature: float,
    top_p: float,
    model: str,
    token: str,
) -> str:
    """
    Generate a response using the Hugging Face Inference API.
    Docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
    """
    if not token:
        raise gr.Error("Please provide a valid Hugging Face API token.")
    if not message.strip():
        raise gr.Error("Input message cannot be empty.")

    client = initialize_client(token, model)
    if isinstance(client, gr.Error):
        raise client

    # Build message history
    messages = [{"role": "system", "content": system_message}]
    for user_msg, assistant_msg in history:
        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if assistant_msg:
            messages.append({"role": "assistant", "content": assistant_msg})
    messages.append({"role": "user", "content": message})

    # Generate response
    response = ""
    try:
        for chunk in client.chat_completion(
            messages=messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            token = chunk.choices[0].delta.content or ""
            response += token
            yield response
    except Exception as e:
        raise gr.Error(f"Error during inference: {str(e)}")

# Load token from environment variable for security
HF_TOKEN = os.getenv("HF_TOKEN", "")

# Create Gradio interface
demo = gr.ChatInterface(
    fn=respond,
    additional_inputs=[
        gr.Textbox(
            value="You are a friendly and helpful Chatbot.",
            label="System Message",
            placeholder="Enter the system prompt here...",
        ),
        gr.Slider(
            minimum=1,
            maximum=2048,
            value=512,
            step=1,
            label="Max New Tokens",
            info="Controls the maximum length of the generated response.",
        ),
        gr.Slider(
            minimum=0.1,
            maximum=4.0,
            value=0.7,
            step=0.1,
            label="Temperature",
            info="Controls randomness (higher = more creative, lower = more deterministic).",
        ),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (Nucleus Sampling)",
            info="Controls diversity via nucleus sampling.",
        ),
        gr.Dropdown(
            choices=AVAILABLE_MODELS,
            value=AVAILABLE_MODELS[0],
            label="Model Selection",
            info="Select the model to use for inference.",
        ),
        gr.Textbox(
            value=HF_TOKEN,
            label="Hugging Face API Token",
            type="password",
            placeholder="Enter your HF API token (or set HF_TOKEN env variable)",
        ),
    ],
    title="Chatbot with Hugging Face Inference API",
    description="Interact with a chatbot powered by Hugging Face models. Provide your API token and customize settings.",
    theme="base",

)

if __name__ == "__main__":
    demo.launch()