import gradio as gr
from huggingface_hub import InferenceClient


def respond(
    message,
    history: list[dict[str, str]],
    system_message,
    hf_token: gr.OAuthToken,
):
    client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")

    messages = [{"role": "system", "content": system_message}]
    messages.extend(history)
    messages.append({"role": "user", "content": message})

    response = ""
    for message in client.chat_completion(
        messages,
        max_tokens=512,   # fixed
        stream=True,
        temperature=0.7,  # fixed
        top_p=0.95,       # fixed
    ):
        choices = message.choices
        token = ""
        if len(choices) and choices[0].delta.content:
            token = choices[0].delta.content

        response += token
        yield response


chatbot = gr.ChatInterface(
    respond,
    type="messages",
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
    ],
)

with gr.Blocks(css="""
body {
    margin: 0;
    padding: 0;
    font-family: system-ui, sans-serif;
}
.gradio-container {
    height: 100vh;
    width: 100%;
    display: flex;
    flex-direction: column;
}
.gr-blocks {
    flex: 1;
    display: flex;
    flex-direction: column;
}
.gr-chatbot {
    flex: 1;
    overflow-y: auto;
    max-height: calc(100vh - 120px);
}
@media (max-width: 768px) {
    .gradio-container, .gr-blocks {
        padding: 0;
        margin: 0;
    }
    .gr-chatbot {
        max-height: calc(100vh - 100px);
    }
}
""") as demo:
    with gr.Sidebar():
        gr.LoginButton()
    chatbot.render()


if __name__ == "__main__":
    demo.launch()