from mlc_llm import MLCEngine # Create engine model = "." engine = MLCEngine(model, mode="interactive") messages = [] print("輸入exit以結束") while True: user_input = input("你:").strip() if user_input.lower() == "exit": break messages.append( {"role": "user", "content": user_input} ) response_string = "" s_tmps = [] # Run chat completion in OpenAI API. # response = await engine.chat.completions.create( for response in engine.chat.completions.create( messages=messages, #messages=[{"role": "user", "content": user_input}], model=model, stream=True, ): # ) # s_tmps = [] for choice in response.choices: tmp = choice.delta.content print(tmp, end="", flush=True) s_tmps.append(tmp) response_string = "".join(s_tmps) messages.append( {"role": "assistant", "content": response_string} ) #print(response) #response = stmps #messages.append( {"role": "assistant", "content": response} ) print("\n") # print(messages) engine.terminate()