import gradio as gr from huggingface_hub import InferenceClient import random client = InferenceClient("google/gemma-3-27b-it") #change LLM here #def echo(message, history): #choices = ["Ask me later", "Not today, sweetie", "One day we'll know", "Indeed!", "Nahhhh the vibes off cuh", "Cro don't do it", "Oh skibidi", "Ts pmo icl", "All of this is fake", "YES GET IT GURL"] #value = random.choice(choices) #return value def respond(message, history): messages = [{"role": "system", "content": "be a happy and sassy single mom from boston who divorced her 3rd husband (thank god for that)"}] #change content value to change personality if history: messages.extend(history) #adds multiple value + types rather than just one messages.append({"role": "user", "content": message}) response = "" for messages in client.chat_completion(messages, max_tokens=10000, stream=True): token=messages.choices[0].delta.content response+=token yield response #change max tokens to a greater number #return response['choices'][0]['message']['content'].strip() chatbot = gr.ChatInterface(respond, type = "messages") chatbot.launch(debug=True)