| from vllm import LLM, SamplingParams | |
| # In this script, we demonstrate how to pass input to the chat method: | |
| conversation = [ | |
| { | |
| "role": "system", | |
| "content": "You are a helpful assistant" | |
| }, | |
| { | |
| "role": "user", | |
| "content": "Hello" | |
| }, | |
| { | |
| "role": "assistant", | |
| "content": "Hello! How can I assist you today?" | |
| }, | |
| { | |
| "role": "user", | |
| "content": "Write an essay about the importance of higher education.", | |
| }, | |
| ] | |
| # Create a sampling params object. | |
| sampling_params = SamplingParams(temperature=0.8, top_p=0.95) | |
| # Create an LLM. | |
| llm = LLM(model="/mnt/data/xiuying/Code/vllm-deploy/MiniCPM-V-4-Q4_K_M.gguf", | |
| tokenizer="openbmb/MiniCPM-V-4", | |
| trust_remote_code=True | |
| ) | |
| # Generate texts from the prompts. The output is a list of RequestOutput objects | |
| # that contain the prompt, generated text, and other information. | |
| outputs = llm.chat(conversation, sampling_params) | |
| # Print the outputs. | |
| for output in outputs: | |
| prompt = output.prompt | |
| generated_text = output.outputs[0].text | |
| print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") |