Spaces:

engasm89
/

lama3

Sleeping

File size: 3,363 Bytes

ce1cc16
e9e04b4
 
 
ce1cc16
2551eda
e9e04b4
2551eda
 
 
 
 
 
 
 
93014eb
2551eda
 
 
ce1cc16
e9e04b4
 
 
 
 
 
 
 
2551eda
e9e04b4
 
 
ce1cc16
f70222d
e9e04b4
 
 
 
 
f70222d
 
e9e04b4
 
 
 
ce1cc16
e9e04b4
 
 
 
 
 
 
f70222d
 
 
 
 
e9e04b4
f70222d
e9e04b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ce1cc16
e9e04b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2551eda
93014eb
e9e04b4
 
 
ce1cc16
f70222d
e9e04b4

import gradio as gr
from huggingface_hub import InferenceClient, HfApi
from huggingface_hub.utils import HfHubHTTPError
import os

def check_api_status(model_id, token):
    try:
        client = InferenceClient(model_id, token=token)
        # Test if we can connect to the API
        response = client.chat_completion(
            [{"role": "user", "content": "test"}],
            max_tokens=1,
            stream=False
        )
        return "API is accessible and responding"
    except Exception as e:
        if "rate limit" in str(e).lower():
            return "API is accessible (rate limited)"
        return f"API status: {str(e)}"

def get_api_status():
    token = os.getenv('HF_TOKEN')
    model_id = "HuggingFaceH4/zephyr-7b-beta"
    
    if not token:
        return "⚠️ No API token found. Please set HF_TOKEN environment variable."
    
    try:
        status = check_api_status(model_id, token)
        return f"✅ Connected to {model_id} | {status}"
    except Exception as e:
        return f"❌ Error: {str(e)}"

def respond(
    message, 
    history: list[tuple[str, str]], 
    system_message, 
    max_tokens, 
    temperature, 
    top_p,
):
    token = os.getenv('HF_TOKEN')
    if not token:
        yield "Error: Please set your HuggingFace API token in the HF_TOKEN environment variable."
        return

    client = InferenceClient(
        "HuggingFaceH4/zephyr-7b-beta",
        token=token
    )
    
    messages = [{"role": "system", "content": system_message}]
    
    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})
    
    messages.append({"role": "user", "content": message})
    
    try:
        response = ""
        for message in client.chat_completion(
            messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            token = message.choices[0].delta.content
            response += token
            yield response
    except Exception as e:
        yield f"Error during chat completion: {str(e)}"

with gr.Blocks() as demo:
    chatbot = gr.ChatInterface(
        respond,
        additional_inputs=[
            gr.Textbox(
                value="You are a friendly Chatbot.",
                label="System message"
            ),
            gr.Slider(
                minimum=1,
                maximum=2048,
                value=512,
                step=1,
                label="Max new tokens"
            ),
            gr.Slider(
                minimum=0.1,
                maximum=4.0,
                value=0.7,
                step=0.1,
                label="Temperature"
            ),
            gr.Slider(
                minimum=0.1,
                maximum=1.0,
                value=0.95,
                step=0.05,
                label="Top-p (nucleus sampling)",
            ),
        ],
    )
    
    # Add API status at the footer with improved status check
    footer = gr.HTML(
        value=f"<div style='text-align: center; padding: 10px; background-color: #f0f0f0; border-top: 1px solid #ddd;'>{get_api_status()}</div>",
        every=30  # Updates every 30 seconds
    )

if __name__ == "__main__":
    demo.launch()