File size: 3,363 Bytes
ce1cc16 e9e04b4 ce1cc16 2551eda e9e04b4 2551eda 93014eb 2551eda ce1cc16 e9e04b4 2551eda e9e04b4 ce1cc16 f70222d e9e04b4 f70222d e9e04b4 ce1cc16 e9e04b4 f70222d e9e04b4 f70222d e9e04b4 ce1cc16 e9e04b4 2551eda 93014eb e9e04b4 ce1cc16 f70222d e9e04b4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
import gradio as gr
from huggingface_hub import InferenceClient, HfApi
from huggingface_hub.utils import HfHubHTTPError
import os
def check_api_status(model_id, token):
try:
client = InferenceClient(model_id, token=token)
# Test if we can connect to the API
response = client.chat_completion(
[{"role": "user", "content": "test"}],
max_tokens=1,
stream=False
)
return "API is accessible and responding"
except Exception as e:
if "rate limit" in str(e).lower():
return "API is accessible (rate limited)"
return f"API status: {str(e)}"
def get_api_status():
token = os.getenv('HF_TOKEN')
model_id = "HuggingFaceH4/zephyr-7b-beta"
if not token:
return "⚠️ No API token found. Please set HF_TOKEN environment variable."
try:
status = check_api_status(model_id, token)
return f"✅ Connected to {model_id} | {status}"
except Exception as e:
return f"❌ Error: {str(e)}"
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
token = os.getenv('HF_TOKEN')
if not token:
yield "Error: Please set your HuggingFace API token in the HF_TOKEN environment variable."
return
client = InferenceClient(
"HuggingFaceH4/zephyr-7b-beta",
token=token
)
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
try:
response = ""
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message.choices[0].delta.content
response += token
yield response
except Exception as e:
yield f"Error during chat completion: {str(e)}"
with gr.Blocks() as demo:
chatbot = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(
value="You are a friendly Chatbot.",
label="System message"
),
gr.Slider(
minimum=1,
maximum=2048,
value=512,
step=1,
label="Max new tokens"
),
gr.Slider(
minimum=0.1,
maximum=4.0,
value=0.7,
step=0.1,
label="Temperature"
),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
# Add API status at the footer with improved status check
footer = gr.HTML(
value=f"<div style='text-align: center; padding: 10px; background-color: #f0f0f0; border-top: 1px solid #ddd;'>{get_api_status()}</div>",
every=30 # Updates every 30 seconds
)
if __name__ == "__main__":
demo.launch()
|