| from vllm import LLM, SamplingParams | |
| # Load a small model for testing | |
| llm = LLM(model="facebook/opt-125m") | |
| # Define sampling params | |
| sampling_params = SamplingParams(temperature=0.7, max_tokens=32) | |
| # Run inference | |
| outputs = llm.generate(["Hello, how are you today?"], sampling_params) | |
| # Print results | |
| for output in outputs: | |
| print(f"Prompt: {output.prompt}") | |
| print(f"Generated: {output.outputs[0].text}") | |