Elias-Schwegler
/

IQuest-Coder-V1-40B-Loop-Instruct-NVFP4

Text Generation

iquestloopcoder

custom-architecture

Model card Files Files and versions

IQuest-Coder-V1-40B-Loop-Instruct-NVFP4 / deploy /test_vllm_blackwell.py

Elias-Schwegler's picture

Elias-Schwegler

Upload folder using huggingface_hub

f16feb8 verified 3 months ago

history blame contribute delete

1.22 kB

	import os
	import torch
	from vllm import LLM, SamplingParams

	# Force V1 engine
	os.environ["VLLM_USE_V1"] = "1"

	model_path = "/model"
	print(f"--- Final Blackwell NVFP4 Code Test ---")

	# Sampling for code generation
	sampling_params = SamplingParams(
	temperature=0.01, # Almost greedy for code
	top_p=0.95,
	max_tokens=512
	)

	try:
	llm = LLM(
	model=model_path,
	quantization="modelopt",
	trust_remote_code=True,
	tensor_parallel_size=1,
	gpu_memory_utilization=0.6,
	max_model_len=16384,
	enforce_eager=True
	)

	# Specific code prompt
	prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nWrite a Python function called `is_prime(n)` that uses a loop to check if a number is prime. Then test it with 29.\n\n### Response:\n"
	print(f"Prompt: {prompt}\n")

	outputs = llm.generate([prompt], sampling_params)

	for output in outputs:
	generated_text = output.outputs[0].text
	print(f"--- Model Output ---")
	print(f"{generated_text}")
	print(f"--- End of Output ---")

	except Exception as e:
	print(f"CRITICAL ERROR: {e}")