chat_hf2 / app.py
vuminhtue's picture
Rename app2.py to app.py
485e3f4 verified
import gradio as gr
import os
from huggingface_hub import InferenceClient
# Get token from environment variable for security
# In HuggingFace Spaces, set this in the Settings tab
HF_TOKEN = os.environ.get('HUGGINGFACE_TOKEN')
# Initialize the HuggingFace Inference Client
client = InferenceClient(token=HF_TOKEN)
def chatbot_hf(question, temperature=0.7, model='google/gemma-2-2b-it'):
# Send the question to HuggingFace model
response = client.chat_completion(
model=model,
messages=[{"role": "user", "content": question}],
temperature=temperature,
max_tokens=500 # Maximum length of response
)
# Extract and return the response text
return response.choices[0].message.content
def main():
# Define available models
AVAILABLE_MODELS = [
"google/gemma-2-2b-it",
"meta-llama/Llama-2-7b-chat-hf",
"mistralai/Mixtral-8x7B-Instruct-v0.1",
"HuggingFaceH4/zephyr-7b-beta"
]
# Create the Gradio interface with a more polished layout
demo = gr.Interface(
fn=chatbot_hf,
inputs=[
gr.Textbox(
label="Your Question",
lines=2,
placeholder="Type your message here...",
scale=3
),
gr.Slider(
label="Temperature",
minimum=0.0,
maximum=1.0,
step=0.01,
value=0.7,
info="Higher values make output more random, lower values more focused"
),
gr.Dropdown(
label="Select Model",
choices=AVAILABLE_MODELS,
value=AVAILABLE_MODELS[0],
info="Choose the AI model to chat with"
),
],
outputs=gr.Textbox(label="AI Response", lines=20),
title="🤖 HuggingFace Chat Interface",
description="""
Chat with various large language models hosted on HuggingFace.
Adjust the temperature to control response creativity.
""",
article="""
### Tips
- For factual responses, use lower temperature (0.1-0.3)
- For creative writing, use higher temperature (0.7-0.9)
- Different models may have different strengths
"""
)
demo.launch()
if __name__ == "__main__":
main()