| from mlc_llm import MLCEngine | |
| # Create engine | |
| model = "." | |
| engine = MLCEngine(model, mode="interactive") | |
| messages = [] | |
| print("輸入exit以結束") | |
| while True: | |
| user_input = input("你:").strip() | |
| if user_input.lower() == "exit": | |
| break | |
| messages.append( {"role": "user", "content": user_input} ) | |
| response_string = "" | |
| s_tmps = [] | |
| # Run chat completion in OpenAI API. | |
| # response = await engine.chat.completions.create( | |
| for response in engine.chat.completions.create( | |
| messages=messages, | |
| #messages=[{"role": "user", "content": user_input}], | |
| model=model, | |
| stream=True, | |
| ): | |
| # ) | |
| # s_tmps = [] | |
| for choice in response.choices: | |
| tmp = choice.delta.content | |
| print(tmp, end="", flush=True) | |
| s_tmps.append(tmp) | |
| response_string = "".join(s_tmps) | |
| messages.append( {"role": "assistant", "content": response_string} ) | |
| #print(response) | |
| #response = stmps | |
| #messages.append( {"role": "assistant", "content": response} ) | |
| print("\n") | |
| # print(messages) | |
| engine.terminate() | |