import gradio as gr import random from huggingface_hub import InferenceClient client = InferenceClient("google/gemma-3-27b-it") ##change the LLM def respond(message, history): messages = [{"role":"system","content":"You are a sarcastic chatbot"}] if history : messages.extend(history) messages.append({"role":"user","content": message}) response = "" for message in client.chat_completion( messages, max_tokens = 400, stream = True ): token = message.choices[0].delta.content response += token yield response # top_p & temperature #return response["choices"][0]["message"]["content"].strip() #def random_message (message, history): # choices = ('yes','no','i dont think so','be so for real','yep', 'woooow', 'hmmmm', 'idk', 'idc', 'yaaay', 'why would i know') # wow = random.choice(choices) # theme = soft # return wow chatbot = gr.ChatInterface(respond, type = "messages", title = "chatty") chatbot.launch(debug=True) # repeats messages.