File size: 2,729 Bytes
afddc96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import os
import gradio as gr
from openai import OpenAI

# -------------------------------------------------
# πŸ” 1. Secure API setup
# -------------------------------------------------
# IMPORTANT:
# Do NOT hardcode your token.
# Instead, go to your Space β†’ Settings β†’ Repository secrets
# Add a secret with:
#   Key: HF_TOKEN
#   Value: your Hugging Face token (e.g. hf_xxx...)
# Then this will safely load it at runtime:
client = OpenAI(
    base_url="https://router.huggingface.co/v1",
    api_key=os.getenv("HF_TOKEN"),
)

# -------------------------------------------------
# 🧩 2. Chat function (with streaming)
# -------------------------------------------------
def chat_with_model(message, history, model_name):
    messages = []
    for human, assistant in history:
        messages.append({"role": "user", "content": human})
        if assistant:
            messages.append({"role": "assistant", "content": assistant})
    messages.append({"role": "user", "content": message})

    full_reply = ""
    stream = client.chat.completions.create(
        model=model_name,
        messages=messages,
        stream=True,
    )

    for chunk in stream:
        delta = chunk.choices[0].delta.content
        if delta:
            full_reply += delta
            yield full_reply  # stream live output

# -------------------------------------------------
# 🎨 3. Gradio interface
# -------------------------------------------------
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown(
        """
        <h2 style="text-align:center;">πŸ€– GPT-OSS Chat (via Hugging Face Router)</h2>
        <p style="text-align:center;">
        Stream responses from Fireworks-AI's GPT-OSS models β€” 7B, 20B, and 120B β€” through the Hugging Face Inference Router.
        </p>
        """,
        elem_id="title",
    )

    with gr.Row():
        model_selector = gr.Dropdown(
            label="Select Model",
            choices=[
                "openai/gpt-oss-7b:fireworks-ai",
                "openai/gpt-oss-20b:fireworks-ai",
                "openai/gpt-oss-120b:fireworks-ai",
            ],
            value="openai/gpt-oss-120b:fireworks-ai",
            interactive=True,
        )

    gr.ChatInterface(
        fn=lambda msg, hist: chat_with_model(msg, hist, model_selector.value),
        title="GPT-OSS Chat",
        chatbot=gr.Chatbot(height=500, show_label=False),
        textbox=gr.Textbox(placeholder="Ask me anything...", autofocus=True),
        retry_btn="↩ Retry",
        clear_btn="🧹 Clear Chat",
    )

# -------------------------------------------------
# πŸš€ 4. Launch app
# -------------------------------------------------
if __name__ == "__main__":
    demo.launch()