Bundle diffsynth library (no external repo dependency)

bc8c4af verified about 1 month ago

1.56 kB

	import av
	import numpy as np
	from io import BytesIO
	from .audio_video import write_video_audio as write_video_audio_ltx2


	def encode_single_frame(output_file: str, image_array: np.ndarray, crf: float) -> None:
	container = av.open(output_file, "w", format="mp4")
	try:
	stream = container.add_stream("libx264", rate=1, options={"crf": str(crf), "preset": "veryfast"})
	# Round to nearest multiple of 2 for compatibility with video codecs
	height = image_array.shape[0] // 2 * 2
	width = image_array.shape[1] // 2 * 2
	image_array = image_array[:height, :width]
	stream.height = height
	stream.width = width
	av_frame = av.VideoFrame.from_ndarray(image_array, format="rgb24").reformat(format="yuv420p")
	container.mux(stream.encode(av_frame))
	container.mux(stream.encode())
	finally:
	container.close()


	def decode_single_frame(video_file: str) -> np.array:
	container = av.open(video_file)
	try:
	stream = next(s for s in container.streams if s.type == "video")
	frame = next(container.decode(stream))
	finally:
	container.close()
	return frame.to_ndarray(format="rgb24")


	def ltx2_preprocess(image: np.array, crf: float = 33) -> np.array:
	if crf == 0:
	return image

	with BytesIO() as output_file:
	encode_single_frame(output_file, image, crf)
	video_bytes = output_file.getvalue()
	with BytesIO(video_bytes) as video_file:
	image_array = decode_single_frame(video_file)
	return image_array