import gradio as gr
from huggingface_hub import InferenceClient

# Three open-source models to choose from
MODEL_CHOICES = {
    "Mistral 7B Instruct (mistralai/Mistral-7B-Instruct-v0.2)": "mistralai/Mistral-7B-Instruct-v0.2",
    "Falcon 7B Instruct (tiiuae/falcon-7b-instruct)": "tiiuae/falcon-7b-instruct",
    "Zephyr 7B Beta (HuggingFaceH4/zephyr-7b-beta)": "HuggingFaceH4/zephyr-7b-beta",
}


def complete_text(prompt, max_tokens, temperature, top_p, model_choice, hf_token):
    """
    Generate a text completion from a Hugging Face model, streamed chunk by chunk.
    """
    if not prompt:
        yield "⚠️ Please enter a prompt."
        return

    if not hf_token or not getattr(hf_token, "token", None):
        yield "🔐 Please login with your Hugging Face account (see left sidebar)."
        return

    model_id = MODEL_CHOICES[model_choice]
    client = InferenceClient(model=model_id, token=hf_token.token)

    generated = ""
    try:
        for chunk in client.text_generation(
            prompt=prompt,
            max_new_tokens=int(max_tokens),
            temperature=float(temperature),
            top_p=float(top_p),
            repetition_penalty=1.0,
            stream=True,
        ):
            generated += chunk
            yield generated
    except Exception as e:
        yield f"❌ Error while generating: {e}"


with gr.Blocks() as demo:
    gr.Markdown("## ✍️ Text Completion Demo (Open-Source LLMs via Hugging Face Inference API)")
    gr.Markdown(
        "Pick a model, enter a prompt, and stream completions. "
        "Some models require logging in (click **Login** in the sidebar)."
    )

    with gr.Row():
        with gr.Column(scale=2):
            prompt = gr.Textbox(label="Prompt", placeholder="Type the beginning of your text...", lines=4)
            max_tokens = gr.Slider(1, 1024, value=100, step=1, label="Max new tokens")
            temperature = gr.Slider(0.0, 2.0, value=0.7, step=0.1, label="Temperature")
            top_p = gr.Slider(0.1, 1.0, value=1.0, step=0.05, label="Top-p")
            model_choice = gr.Dropdown(
                choices=list(MODEL_CHOICES.keys()),
                value=list(MODEL_CHOICES.keys())[0],
                label="Choose a model",
            )
            submit = gr.Button("Generate Completion")
        with gr.Column(scale=3):
            with gr.Sidebar():
                login = gr.LoginButton()  # supplies OAuth token
            output = gr.Textbox(label="Generated Completion", lines=15)

    # Function has 6 args, so inputs list must have exactly 6 items
    submit.click(
        fn=complete_text,
        inputs=[prompt, max_tokens, temperature, top_p, model_choice, login],
        outputs=output,
    )

if __name__ == "__main__":
    demo.launch()