|
|
import gradio as gr |
|
|
import os |
|
|
from huggingface_hub import InferenceClient |
|
|
|
|
|
|
|
|
|
|
|
HF_TOKEN = os.environ.get('HUGGINGFACE_TOKEN') |
|
|
|
|
|
|
|
|
client = InferenceClient(token=HF_TOKEN) |
|
|
|
|
|
def chatbot_hf(question, temperature=0.7, model='google/gemma-2-2b-it'): |
|
|
|
|
|
response = client.chat_completion( |
|
|
model=model, |
|
|
messages=[{"role": "user", "content": question}], |
|
|
temperature=temperature, |
|
|
max_tokens=500 |
|
|
) |
|
|
|
|
|
return response.choices[0].message.content |
|
|
|
|
|
def main(): |
|
|
|
|
|
AVAILABLE_MODELS = [ |
|
|
"google/gemma-2-2b-it", |
|
|
"meta-llama/Llama-2-7b-chat-hf", |
|
|
"mistralai/Mixtral-8x7B-Instruct-v0.1", |
|
|
"HuggingFaceH4/zephyr-7b-beta" |
|
|
] |
|
|
|
|
|
|
|
|
demo = gr.Interface( |
|
|
fn=chatbot_hf, |
|
|
inputs=[ |
|
|
gr.Textbox( |
|
|
label="Your Question", |
|
|
lines=2, |
|
|
placeholder="Type your message here...", |
|
|
scale=3 |
|
|
), |
|
|
gr.Slider( |
|
|
label="Temperature", |
|
|
minimum=0.0, |
|
|
maximum=1.0, |
|
|
step=0.01, |
|
|
value=0.7, |
|
|
info="Higher values make output more random, lower values more focused" |
|
|
), |
|
|
gr.Dropdown( |
|
|
label="Select Model", |
|
|
choices=AVAILABLE_MODELS, |
|
|
value=AVAILABLE_MODELS[0], |
|
|
info="Choose the AI model to chat with" |
|
|
), |
|
|
], |
|
|
outputs=gr.Textbox(label="AI Response", lines=20), |
|
|
title="🤖 HuggingFace Chat Interface", |
|
|
description=""" |
|
|
Chat with various large language models hosted on HuggingFace. |
|
|
Adjust the temperature to control response creativity. |
|
|
""", |
|
|
article=""" |
|
|
### Tips |
|
|
- For factual responses, use lower temperature (0.1-0.3) |
|
|
- For creative writing, use higher temperature (0.7-0.9) |
|
|
- Different models may have different strengths |
|
|
""" |
|
|
) |
|
|
|
|
|
demo.launch() |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |