File size: 415 Bytes
fed1832 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 |
from vllm import LLM, SamplingParams
# Load a small model for testing
llm = LLM(model="facebook/opt-125m")
# Define sampling params
sampling_params = SamplingParams(temperature=0.7, max_tokens=32)
# Run inference
outputs = llm.generate(["Hello, how are you today?"], sampling_params)
# Print results
for output in outputs:
print(f"Prompt: {output.prompt}")
print(f"Generated: {output.outputs[0].text}")
|