import openai import base64 import requests def simple_chat(client: openai.OpenAI, model_name: str): messages = [ {'role': 'system', 'content': 'You are Kimi, an AI assistant created by Moonshot AI.'}, { 'role': 'user', 'content': [ {'type': 'text', 'text': 'which one is bigger, 9.11 or 9.9? think carefully.'} ], }, ] response = client.chat.completions.create( model=model_name, messages=messages, stream=False, max_tokens=4096 ) print('====== Below is reasoning_content in Thinking Mode ======') print(f'reasoning content: {response.choices[0].message.reasoning_content}') print('====== Below is response in Thinking Mode ======') print(f'response: {response.choices[0].message.content}') # To use instant mode, pass {"thinking" = {"type":"disabled"}} response = client.chat.completions.create( model=model_name, messages=messages, stream=False, max_tokens=4096, extra_body={'thinking': {'type': 'disabled'}}, # this is for official API # extra_body= {'chat_template_kwargs': {"thinking": False}} # this is for vLLM/SGLang ) print('====== Below is response in Instant Mode ======') print(f'response: {response.choices[0].message.content}')