File size: 415 Bytes
fed1832
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
from vllm import LLM, SamplingParams

# Load a small model for testing
llm = LLM(model="facebook/opt-125m")

# Define sampling params
sampling_params = SamplingParams(temperature=0.7, max_tokens=32)

# Run inference
outputs = llm.generate(["Hello, how are you today?"], sampling_params)

# Print results
for output in outputs:
    print(f"Prompt: {output.prompt}")
    print(f"Generated: {output.outputs[0].text}")