from vllm import LLM, SamplingParams # Load a small model for testing llm = LLM(model="facebook/opt-125m") # Define sampling params sampling_params = SamplingParams(temperature=0.7, max_tokens=32) # Run inference outputs = llm.generate(["Hello, how are you today?"], sampling_params) # Print results for output in outputs: print(f"Prompt: {output.prompt}") print(f"Generated: {output.outputs[0].text}")