Spaces:

ewingreen
/

first-chatbot

Sleeping

ewingreen commited on Apr 23, 2025

Commit

1d3525b

verified ·

1 Parent(s): c5ec9d1

Update to yield text one token at a time

Files changed (1) hide show

app.py CHANGED Viewed

@@ -15,18 +15,27 @@ def respond(message, history):
         messages.extend(history)
     # add the current user’s message to the messages list
-    messages.append({"role": "user", "content": "message"})
     # makes the chat completion API call,
     # sending the messages and other parameters to the model
-    response = client.chat_completion(
         messages,
-        max_tokens = 100,
         temperature=0.9,
-    )
     # extract and return the chatbot’s response
-    return response['choices'][0]['message']['content'].strip()
 chatbot = gr.ChatInterface(respond, type="messages", theme='NoCrypt/miku')

         messages.extend(history)
     # add the current user’s message to the messages list
+    messages.append({"role": "user", "content": message})
     # makes the chat completion API call,
     # sending the messages and other parameters to the model
+    # implements streaming, where one word/token appears at a time
+    response = ""
+    # iterate through each message in the method
+    for message in client.chat_completion(
         messages,
+        max_tokens=100,
         temperature=0.9,
+        stream=True
+    ):
+        # add the tokens to the output content
+        token = message.choices[0].delta.content # capture the most recent toke
+        response += token # Add it to the response
+        yield response # yield the response:
     # extract and return the chatbot’s response
+    #return response['choices'][0]['message']['content'].strip()
 chatbot = gr.ChatInterface(respond, type="messages", theme='NoCrypt/miku')