File size: 1,506 Bytes
f98d4f6
 
db2d9e2
f98d4f6
 
 
 
3fc0490
 
f98d4f6
 
 
f9a9018
db2d9e2
f98d4f6
 
 
 
 
1d3525b
f98d4f6
 
 
1d3525b
 
 
 
 
f98d4f6
1d3525b
c5ec9d1
1d3525b
 
 
 
 
 
f98d4f6
 
1d3525b
db2d9e2
c5ec9d1
db2d9e2
debe0fe
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import gradio as gr
from huggingface_hub import InferenceClient

# this client will handle making requests to the model to generate responses
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

def respond(message, history):

    system_message = "You are a poet chatbot. You always respond with a rhyme!"
    
    # initialize a list of dictionaries to store the messages
    messages = [{"role": "system", 
                 "content": system_message}]
    
    # add all previous messages to the messages list 
    if history:
        messages.extend(history)

    # add the current user’s message to the messages list
    messages.append({"role": "user", "content": message})

    # makes the chat completion API call, 
    # sending the messages and other parameters to the model
    # implements streaming, where one word/token appears at a time
    response = ""

    # iterate through each message in the method
    for message in client.chat_completion(
        messages,
        max_tokens=100,
        temperature=0.9,
        stream=True
    ):
        # add the tokens to the output content
        token = message.choices[0].delta.content # capture the most recent toke
        response += token # Add it to the response
        yield response # yield the response:

    # extract and return the chatbot’s response
    #return response['choices'][0]['message']['content'].strip()

chatbot = gr.ChatInterface(respond, type="messages", theme='NoCrypt/miku')

chatbot.launch(debug=True)