Elias-Schwegler's picture
Upload folder using huggingface_hub
f16feb8 verified
import os
import torch
from vllm import LLM, SamplingParams
# Force V1 engine
os.environ["VLLM_USE_V1"] = "1"
model_path = "/model"
print(f"--- Final Blackwell NVFP4 Code Test ---")
# Sampling for code generation
sampling_params = SamplingParams(
temperature=0.01, # Almost greedy for code
top_p=0.95,
max_tokens=512
)
try:
llm = LLM(
model=model_path,
quantization="modelopt",
trust_remote_code=True,
tensor_parallel_size=1,
gpu_memory_utilization=0.6,
max_model_len=16384,
enforce_eager=True
)
# Specific code prompt
prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nWrite a Python function called `is_prime(n)` that uses a loop to check if a number is prime. Then test it with 29.\n\n### Response:\n"
print(f"Prompt: {prompt}\n")
outputs = llm.generate([prompt], sampling_params)
for output in outputs:
generated_text = output.outputs[0].text
print(f"--- Model Output ---")
print(f"{generated_text}")
print(f"--- End of Output ---")
except Exception as e:
print(f"CRITICAL ERROR: {e}")