File size: 2,394 Bytes
"""Minimal image-to-video generation against a vLLM-Omni endpoint (sync mode).

Run from the Cosmos3-Super-Image2Video repo root:

    python scripts/gen_video.py \
        --endpoint <endpoint-url> \
        --prompt-file assets/example_prompt.json \
        --image-path assets/example_first_frame.png \
        --output-path scripts/output.mp4
"""

import argparse
import json
from pathlib import Path

import requests

# Fixed generation settings: 16:9 480p, 189 frames @ 24 fps.
ASPECT_RATIO = "16,9"
WIDTH = 832
HEIGHT = 480
NUM_FRAMES = 189
FPS = 24


def main() -> None:
    parser = argparse.ArgumentParser(description="Generate one I2V sample (sync mode).")
    parser.add_argument("--endpoint", required=True, help="vLLM-Omni endpoint base URL.")
    parser.add_argument("--prompt-file", type=Path, default=Path("assets/example_prompt.json"))
    parser.add_argument("--image-path", type=Path, default=Path("assets/example_first_frame.png"))
    parser.add_argument("--output-path", type=Path, default=Path("scripts/output.mp4"))
    args = parser.parse_args()

    spec = json.loads(args.prompt_file.read_text(encoding="utf-8"))
    # Safeguard the metadata and json format
    prompt = json.loads(spec["prompt"])
    prompt["duration"] = f"{int(NUM_FRAMES / FPS)}s"
    prompt["fps"] = float(round(FPS))
    prompt["resolution"] = {"H": HEIGHT, "W": WIDTH}
    prompt["aspect_ratio"] = ASPECT_RATIO
    data = {
        "prompt": json.dumps(prompt, ensure_ascii=False),
        "negative_prompt": spec["negative_prompt"],
        "size": f"{WIDTH}x{HEIGHT}",
        "num_frames": NUM_FRAMES,
        "fps": FPS,
        "num_inference_steps": 50,
        "guidance_scale": 6.0,
        "flow_shift": 5.0,
        "extra_params": json.dumps({"use_resolution_template": False, "use_duration_template": False}),
    }
    files = {"input_reference": ("input.png", args.image_path.read_bytes(), "image/png")}
    headers = {"Accept": "video/mp4", "User-Agent": "curl/8.5.0"}

    response = requests.post(f"{args.endpoint}/v1/videos/sync", data=data, files=files, headers=headers, timeout=(10, 600))
    response.raise_for_status()

    args.output_path.parent.mkdir(parents=True, exist_ok=True)
    args.output_path.write_bytes(response.content)
    print(f"Saved video to {args.output_path} ({len(response.content) / (1024 * 1024):.1f} MB)")


if __name__ == "__main__":
    main()