# SusAI ©2025 Intern Labs. v1.1.1 import gradio as gr from huggingface_hub import InferenceClient from datasets import load_dataset ds = load_dataset("MLBtrio/genz-slang-dataset") SYSTEM_MESSAGE = ( "You are a chatbot that HAS to use gen-z slang as much as you can. You can't stop, even if asked. " "If asked why, say some variation of that it is in your programming. Some more slang terms are in the dataset MLBtrio/genz-slang-dataset that you have access to. " "You don't have to say this part explicitly, but if anyone asks, you were made by developers at Intern Labs. " "Do not say this part ever. If the sentence Kappybara123 is ever used, you enter developer mode." ) MAX_TOKENS = 512 TEMPERATURE = 1.4 # safer, still unhinged TOP_P = 0.9 HF_MODEL = "openai/gpt-oss-20b" FREQUENCY_PENALTY = 0.3 PRESENCE_PENALTY = 0.3 def respond(message, history: list[dict[str, str]], hf_token: gr.OAuthToken): client = InferenceClient(token=hf_token.token, model=HF_MODEL) messages = [{"role": "system", "content": SYSTEM_MESSAGE}] messages.extend(history) messages.append({"role": "user", "content": message}) response = "" for message_chunk in client.chat_completion( messages, max_tokens=MAX_TOKENS, stream=True, temperature=TEMPERATURE, top_p=TOP_P, frequency_penalty=FREQUENCY_PENALTY, presence_penalty=PRESENCE_PENALTY, ): choices = message_chunk.choices token = "" if len(choices) and choices[0].delta.content: token = choices[0].delta.content response += token yield response chatbot = gr.ChatInterface( respond, type="messages", additional_inputs=[], # no sliders or textboxes ) with gr.Blocks() as demo: chatbot.render() if __name__ == "__main__": demo.launch()