Spaces:
Sleeping
Sleeping
File size: 1,177 Bytes
7d2fea2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | import argparse
from openai import OpenAI
def main() -> None:
parser = argparse.ArgumentParser(description="Test an OpenAI-compatible Modal vLLM endpoint.")
parser.add_argument("--base-url", required=True, help="Endpoint base URL, for example https://...modal.run/v1")
parser.add_argument("--model", required=True, help="Served model name, for example minicpm-1b")
parser.add_argument("--prompt", default="Reply with one short sentence confirming the service works.")
args = parser.parse_args()
client = OpenAI(base_url=args.base_url.rstrip("/") + "/", api_key="EMPTY")
extra_body = None
if args.model.lower().startswith("qwen"):
extra_body = {"top_k": 20, "chat_template_kwargs": {"enable_thinking": False}}
elif "nemotron" in args.model.lower():
extra_body = {"chat_template_kwargs": {"enable_thinking": False}}
response = client.chat.completions.create(
model=args.model,
messages=[{"role": "user", "content": args.prompt}],
temperature=0.2,
max_tokens=128,
extra_body=extra_body,
)
print(response.choices[0].message.content)
if __name__ == "__main__":
main()
|