File size: 2,787 Bytes
d974885
 
 
4aacb8c
3af1594
 
 
6135f2e
3af1594
d974885
4aacb8c
1a4d54f
d974885
4aacb8c
d974885
6135f2e
4aacb8c
3af1594
d974885
1a4d54f
4aacb8c
6135f2e
d974885
4aacb8c
 
6135f2e
4aacb8c
6135f2e
 
 
 
 
 
 
4aacb8c
6135f2e
 
 
 
4aacb8c
d974885
63b7a2a
d974885
4aacb8c
3af1594
4aacb8c
 
3af1594
 
 
 
1a4d54f
6135f2e
 
 
3af1594
 
 
 
 
4aacb8c
3af1594
6135f2e
1a4d54f
6135f2e
d974885
1a4d54f
3af1594
 
6135f2e
3af1594
 
d974885
 
3af1594
6135f2e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import gradio as gr
from huggingface_hub import InferenceClient

# Three open-source models to choose from
MODEL_CHOICES = {
    "Mistral 7B Instruct (mistralai/Mistral-7B-Instruct-v0.2)": "mistralai/Mistral-7B-Instruct-v0.2",
    "Falcon 7B Instruct (tiiuae/falcon-7b-instruct)": "tiiuae/falcon-7b-instruct",
    "Zephyr 7B Beta (HuggingFaceH4/zephyr-7b-beta)": "HuggingFaceH4/zephyr-7b-beta",
}


def complete_text(prompt, max_tokens, temperature, top_p, model_choice, hf_token):
    """
    Generate a text completion from a Hugging Face model, streamed chunk by chunk.
    """
    if not prompt:
        yield "⚠️ Please enter a prompt."
        return

    if not hf_token or not getattr(hf_token, "token", None):
        yield "🔐 Please login with your Hugging Face account (see left sidebar)."
        return

    model_id = MODEL_CHOICES[model_choice]
    client = InferenceClient(model=model_id, token=hf_token.token)

    generated = ""
    try:
        for chunk in client.text_generation(
            prompt=prompt,
            max_new_tokens=int(max_tokens),
            temperature=float(temperature),
            top_p=float(top_p),
            repetition_penalty=1.0,
            stream=True,
        ):
            generated += chunk
            yield generated
    except Exception as e:
        yield f"❌ Error while generating: {e}"


with gr.Blocks() as demo:
    gr.Markdown("## ✍️ Text Completion Demo (Open-Source LLMs via Hugging Face Inference API)")
    gr.Markdown(
        "Pick a model, enter a prompt, and stream completions. "
        "Some models require logging in (click **Login** in the sidebar)."
    )

    with gr.Row():
        with gr.Column(scale=2):
            prompt = gr.Textbox(label="Prompt", placeholder="Type the beginning of your text...", lines=4)
            max_tokens = gr.Slider(1, 1024, value=100, step=1, label="Max new tokens")
            temperature = gr.Slider(0.0, 2.0, value=0.7, step=0.1, label="Temperature")
            top_p = gr.Slider(0.1, 1.0, value=1.0, step=0.05, label="Top-p")
            model_choice = gr.Dropdown(
                choices=list(MODEL_CHOICES.keys()),
                value=list(MODEL_CHOICES.keys())[0],
                label="Choose a model",
            )
            submit = gr.Button("Generate Completion")
        with gr.Column(scale=3):
            with gr.Sidebar():
                login = gr.LoginButton()  # supplies OAuth token
            output = gr.Textbox(label="Generated Completion", lines=15)

    # Function has 6 args, so inputs list must have exactly 6 items
    submit.click(
        fn=complete_text,
        inputs=[prompt, max_tokens, temperature, top_p, model_choice, login],
        outputs=output,
    )

if __name__ == "__main__":
    demo.launch()