Spaces:
Runtime error
Runtime error
Merge code
Browse files
app.py
CHANGED
|
@@ -939,286 +939,6 @@ def worker_video(input_video, end_frame, prompts, n_prompt, seed, batch, resolut
|
|
| 939 |
|
| 940 |
stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Starting ...'))))
|
| 941 |
|
| 942 |
-
try:
|
| 943 |
-
# 20250506 pftq: Processing input video instead of image
|
| 944 |
-
stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Video processing ...'))))
|
| 945 |
-
|
| 946 |
-
# 20250506 pftq: Encode video
|
| 947 |
-
start_latent, input_image_np, video_latents, fps, height, width = video_encode(input_video, resolution, no_resize, vae, vae_batch_size=vae_batch, device=gpu)
|
| 948 |
-
start_latent = start_latent.to(dtype=torch.float32, device=cpu)
|
| 949 |
-
video_latents = video_latents.cpu()
|
| 950 |
-
|
| 951 |
-
total_latent_sections = (total_second_length * fps) / (latent_window_size * 4)
|
| 952 |
-
total_latent_sections = int(max(round(total_latent_sections), 1))
|
| 953 |
-
|
| 954 |
-
# Clean GPU
|
| 955 |
-
if not high_vram:
|
| 956 |
-
unload_complete_models(
|
| 957 |
-
text_encoder, text_encoder_2, image_encoder, vae, transformer
|
| 958 |
-
)
|
| 959 |
-
|
| 960 |
-
# Text encoding
|
| 961 |
-
stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Text encoding ...'))))
|
| 962 |
-
|
| 963 |
-
if not high_vram:
|
| 964 |
-
fake_diffusers_current_device(text_encoder, gpu) # since we only encode one text - that is one model move and one encode, offload is same time consumption since it is also one load and one encode.
|
| 965 |
-
load_model_as_complete(text_encoder_2, target_device=gpu)
|
| 966 |
-
|
| 967 |
-
prompt_parameters = []
|
| 968 |
-
|
| 969 |
-
for prompt_part in prompts[:total_latent_sections]:
|
| 970 |
-
prompt_parameters.append(encode_prompt(prompt_part, n_prompt))
|
| 971 |
-
|
| 972 |
-
# Clean GPU
|
| 973 |
-
if not high_vram:
|
| 974 |
-
unload_complete_models(
|
| 975 |
-
text_encoder, text_encoder_2
|
| 976 |
-
)
|
| 977 |
-
|
| 978 |
-
# CLIP Vision
|
| 979 |
-
stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'CLIP Vision encoding ...'))))
|
| 980 |
-
|
| 981 |
-
if not high_vram:
|
| 982 |
-
load_model_as_complete(image_encoder, target_device=gpu)
|
| 983 |
-
|
| 984 |
-
image_encoder_output = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder)
|
| 985 |
-
|
| 986 |
-
# Clean GPU
|
| 987 |
-
if not high_vram:
|
| 988 |
-
unload_complete_models(image_encoder)
|
| 989 |
-
|
| 990 |
-
image_encoder_last_hidden_state = image_encoder_output.last_hidden_state
|
| 991 |
-
|
| 992 |
-
# Dtype
|
| 993 |
-
image_encoder_last_hidden_state = image_encoder_last_hidden_state.to(transformer.dtype)
|
| 994 |
-
|
| 995 |
-
if enable_preview:
|
| 996 |
-
def callback(d):
|
| 997 |
-
preview = d['denoised']
|
| 998 |
-
preview = vae_decode_fake(preview)
|
| 999 |
-
|
| 1000 |
-
preview = (preview * 255.0).detach().cpu().numpy().clip(0, 255).astype(np.uint8)
|
| 1001 |
-
preview = einops.rearrange(preview, 'b c t h w -> (b h) (t w) c')
|
| 1002 |
-
|
| 1003 |
-
if stream.input_queue.top() == 'end':
|
| 1004 |
-
stream.output_queue.push(('end', None))
|
| 1005 |
-
raise KeyboardInterrupt('User ends the task.')
|
| 1006 |
-
|
| 1007 |
-
current_step = d['i'] + 1
|
| 1008 |
-
percentage = int(100.0 * current_step / steps)
|
| 1009 |
-
hint = f'Sampling {current_step}/{steps}'
|
| 1010 |
-
desc = f'Total frames: {int(max(0, total_generated_latent_frames * 4 - 3))}, Video length: {max(0, (total_generated_latent_frames * 4 - 3) / fps) :.2f} seconds (FPS-{fps}), Resolution: {height}px * {width}px, Seed: {seed}, Video {idx+1} of {batch}. The video is generating part {section_index+1} of {total_latent_sections}...'
|
| 1011 |
-
stream.output_queue.push(('progress', (preview, desc, make_progress_bar_html(percentage, hint))))
|
| 1012 |
-
return
|
| 1013 |
-
else:
|
| 1014 |
-
def callback(d):
|
| 1015 |
-
return
|
| 1016 |
-
|
| 1017 |
-
def compute_latent(history_latents, latent_window_size, num_clean_frames, start_latent):
|
| 1018 |
-
# 20250506 pftq: Use user-specified number of context frames, matching original allocation for num_clean_frames=2
|
| 1019 |
-
available_frames = history_latents.shape[2] # Number of latent frames
|
| 1020 |
-
max_pixel_frames = min(latent_window_size * 4 - 3, available_frames * 4) # Cap at available pixel frames
|
| 1021 |
-
adjusted_latent_frames = max(1, (max_pixel_frames + 3) // 4) # Convert back to latent frames
|
| 1022 |
-
# Adjust num_clean_frames to match original behavior: num_clean_frames=2 means 1 frame for clean_latents_1x
|
| 1023 |
-
effective_clean_frames = max(0, num_clean_frames - 1)
|
| 1024 |
-
effective_clean_frames = min(effective_clean_frames, available_frames - 2) if available_frames > 2 else 0 # 20250507 pftq: changed 1 to 2 for edge case for <=1 sec videos
|
| 1025 |
-
num_2x_frames = min(2, max(1, available_frames - effective_clean_frames - 1)) if available_frames > effective_clean_frames + 1 else 0 # 20250507 pftq: subtracted 1 for edge case for <=1 sec videos
|
| 1026 |
-
num_4x_frames = min(16, max(1, available_frames - effective_clean_frames - num_2x_frames)) if available_frames > effective_clean_frames + num_2x_frames else 0 # 20250507 pftq: Edge case for <=1 sec
|
| 1027 |
-
|
| 1028 |
-
total_context_frames = num_4x_frames + num_2x_frames + effective_clean_frames
|
| 1029 |
-
total_context_frames = min(total_context_frames, available_frames) # 20250507 pftq: Edge case for <=1 sec videos
|
| 1030 |
-
|
| 1031 |
-
indices = torch.arange(0, 1 + num_4x_frames + num_2x_frames + effective_clean_frames + adjusted_latent_frames).unsqueeze(0) # 20250507 pftq: latent_window_size to adjusted_latent_frames for edge case for <=1 sec videos
|
| 1032 |
-
clean_latent_indices_start, clean_latent_4x_indices, clean_latent_2x_indices, clean_latent_1x_indices, latent_indices = indices.split(
|
| 1033 |
-
[1, num_4x_frames, num_2x_frames, effective_clean_frames, adjusted_latent_frames], dim=1 # 20250507 pftq: latent_window_size to adjusted_latent_frames for edge case for <=1 sec videos
|
| 1034 |
-
)
|
| 1035 |
-
clean_latent_indices = torch.cat([clean_latent_indices_start, clean_latent_1x_indices], dim=1)
|
| 1036 |
-
|
| 1037 |
-
# 20250506 pftq: Split history_latents dynamically based on available frames
|
| 1038 |
-
fallback_frame_count = 2 # 20250507 pftq: Changed 0 to 2 Edge case for <=1 sec videos
|
| 1039 |
-
context_frames = clean_latents_4x = clean_latents_2x = clean_latents_1x = history_latents[:, :, :fallback_frame_count, :, :]
|
| 1040 |
-
|
| 1041 |
-
if total_context_frames > 0:
|
| 1042 |
-
context_frames = history_latents[:, :, -total_context_frames:, :, :]
|
| 1043 |
-
split_sizes = [num_4x_frames, num_2x_frames, effective_clean_frames]
|
| 1044 |
-
split_sizes = [s for s in split_sizes if s > 0] # Remove zero sizes
|
| 1045 |
-
if split_sizes:
|
| 1046 |
-
splits = context_frames.split(split_sizes, dim=2)
|
| 1047 |
-
split_idx = 0
|
| 1048 |
-
|
| 1049 |
-
if num_4x_frames > 0:
|
| 1050 |
-
clean_latents_4x = splits[split_idx]
|
| 1051 |
-
split_idx = 1
|
| 1052 |
-
if clean_latents_4x.shape[2] < 2: # 20250507 pftq: edge case for <=1 sec videos
|
| 1053 |
-
print("Edge case for <=1 sec videos 4x")
|
| 1054 |
-
clean_latents_4x = clean_latents_4x.expand(-1, -1, 2, -1, -1)
|
| 1055 |
-
|
| 1056 |
-
if num_2x_frames > 0 and split_idx < len(splits):
|
| 1057 |
-
clean_latents_2x = splits[split_idx]
|
| 1058 |
-
if clean_latents_2x.shape[2] < 2: # 20250507 pftq: edge case for <=1 sec videos
|
| 1059 |
-
print("Edge case for <=1 sec videos 2x")
|
| 1060 |
-
clean_latents_2x = clean_latents_2x.expand(-1, -1, 2, -1, -1)
|
| 1061 |
-
split_idx += 1
|
| 1062 |
-
elif clean_latents_2x.shape[2] < 2: # 20250507 pftq: edge case for <=1 sec videos
|
| 1063 |
-
clean_latents_2x = clean_latents_4x
|
| 1064 |
-
|
| 1065 |
-
if effective_clean_frames > 0 and split_idx < len(splits):
|
| 1066 |
-
clean_latents_1x = splits[split_idx]
|
| 1067 |
-
|
| 1068 |
-
clean_latents = torch.cat([start_latent, clean_latents_1x], dim=2)
|
| 1069 |
-
|
| 1070 |
-
# 20250507 pftq: Fix for <=1 sec videos.
|
| 1071 |
-
max_frames = min(latent_window_size * 4 - 3, history_latents.shape[2] * 4)
|
| 1072 |
-
return [max_frames, clean_latents, clean_latents_2x, clean_latents_4x, latent_indices, clean_latents, clean_latent_indices, clean_latent_2x_indices, clean_latent_4x_indices]
|
| 1073 |
-
|
| 1074 |
-
for idx in range(batch):
|
| 1075 |
-
if batch > 1:
|
| 1076 |
-
print(f"Beginning video {idx+1} of {batch} with seed {seed} ")
|
| 1077 |
-
|
| 1078 |
-
#job_id = generate_timestamp()
|
| 1079 |
-
job_id = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")+f"_framepackf1-videoinput_{width}-{total_second_length}sec_seed-{seed}_steps-{steps}_distilled-{gs}_cfg-{cfg}" # 20250506 pftq: easier to read timestamp and filename
|
| 1080 |
-
|
| 1081 |
-
# Sampling
|
| 1082 |
-
stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Start sampling ...'))))
|
| 1083 |
-
|
| 1084 |
-
rnd = torch.Generator("cpu").manual_seed(seed)
|
| 1085 |
-
|
| 1086 |
-
# 20250506 pftq: Initialize history_latents with video latents
|
| 1087 |
-
history_latents = video_latents
|
| 1088 |
-
total_generated_latent_frames = history_latents.shape[2]
|
| 1089 |
-
# 20250506 pftq: Initialize history_pixels to fix UnboundLocalError
|
| 1090 |
-
history_pixels = None
|
| 1091 |
-
previous_video = None
|
| 1092 |
-
|
| 1093 |
-
for section_index in range(total_latent_sections):
|
| 1094 |
-
if stream.input_queue.top() == 'end':
|
| 1095 |
-
stream.output_queue.push(('end', None))
|
| 1096 |
-
return
|
| 1097 |
-
|
| 1098 |
-
print(f'section_index = {section_index}, total_latent_sections = {total_latent_sections}')
|
| 1099 |
-
|
| 1100 |
-
if len(prompt_parameters) > 0:
|
| 1101 |
-
[llama_vec, clip_l_pooler, llama_vec_n, clip_l_pooler_n, llama_attention_mask, llama_attention_mask_n] = prompt_parameters.pop(0)
|
| 1102 |
-
|
| 1103 |
-
if not high_vram:
|
| 1104 |
-
unload_complete_models()
|
| 1105 |
-
move_model_to_device_with_memory_preservation(transformer, target_device=gpu, preserved_memory_gb=gpu_memory_preservation)
|
| 1106 |
-
|
| 1107 |
-
if use_teacache:
|
| 1108 |
-
transformer.initialize_teacache(enable_teacache=True, num_steps=steps)
|
| 1109 |
-
else:
|
| 1110 |
-
transformer.initialize_teacache(enable_teacache=False)
|
| 1111 |
-
|
| 1112 |
-
[max_frames, clean_latents, clean_latents_2x, clean_latents_4x, latent_indices, clean_latents, clean_latent_indices, clean_latent_2x_indices, clean_latent_4x_indices] = compute_latent(history_latents, latent_window_size, num_clean_frames, start_latent)
|
| 1113 |
-
|
| 1114 |
-
generated_latents = sample_hunyuan(
|
| 1115 |
-
transformer=transformer,
|
| 1116 |
-
sampler='unipc',
|
| 1117 |
-
width=width,
|
| 1118 |
-
height=height,
|
| 1119 |
-
frames=max_frames,
|
| 1120 |
-
real_guidance_scale=cfg,
|
| 1121 |
-
distilled_guidance_scale=gs,
|
| 1122 |
-
guidance_rescale=rs,
|
| 1123 |
-
num_inference_steps=steps,
|
| 1124 |
-
generator=rnd,
|
| 1125 |
-
prompt_embeds=llama_vec,
|
| 1126 |
-
prompt_embeds_mask=llama_attention_mask,
|
| 1127 |
-
prompt_poolers=clip_l_pooler,
|
| 1128 |
-
negative_prompt_embeds=llama_vec_n,
|
| 1129 |
-
negative_prompt_embeds_mask=llama_attention_mask_n,
|
| 1130 |
-
negative_prompt_poolers=clip_l_pooler_n,
|
| 1131 |
-
device=gpu,
|
| 1132 |
-
dtype=torch.bfloat16,
|
| 1133 |
-
image_embeddings=image_encoder_last_hidden_state,
|
| 1134 |
-
latent_indices=latent_indices,
|
| 1135 |
-
clean_latents=clean_latents,
|
| 1136 |
-
clean_latent_indices=clean_latent_indices,
|
| 1137 |
-
clean_latents_2x=clean_latents_2x,
|
| 1138 |
-
clean_latent_2x_indices=clean_latent_2x_indices,
|
| 1139 |
-
clean_latents_4x=clean_latents_4x,
|
| 1140 |
-
clean_latent_4x_indices=clean_latent_4x_indices,
|
| 1141 |
-
callback=callback,
|
| 1142 |
-
)
|
| 1143 |
-
|
| 1144 |
-
total_generated_latent_frames += int(generated_latents.shape[2])
|
| 1145 |
-
history_latents = torch.cat([history_latents, generated_latents.to(history_latents)], dim=2)
|
| 1146 |
-
|
| 1147 |
-
if not high_vram:
|
| 1148 |
-
offload_model_from_device_for_memory_preservation(transformer, target_device=gpu, preserved_memory_gb=8)
|
| 1149 |
-
load_model_as_complete(vae, target_device=gpu)
|
| 1150 |
-
|
| 1151 |
-
if history_pixels is None:
|
| 1152 |
-
real_history_latents = history_latents[:, :, -total_generated_latent_frames:, :, :]
|
| 1153 |
-
history_pixels = vae_decode(real_history_latents, vae).cpu()
|
| 1154 |
-
else:
|
| 1155 |
-
section_latent_frames = latent_window_size * 2
|
| 1156 |
-
overlapped_frames = min(latent_window_size * 4 - 3, history_pixels.shape[2])
|
| 1157 |
-
|
| 1158 |
-
real_history_latents = history_latents[:, :, -min(total_generated_latent_frames, section_latent_frames):, :, :]
|
| 1159 |
-
history_pixels = soft_append_bcthw(history_pixels, vae_decode(real_history_latents, vae).cpu(), overlapped_frames)
|
| 1160 |
-
|
| 1161 |
-
if not high_vram:
|
| 1162 |
-
unload_complete_models(text_encoder, text_encoder_2, image_encoder, vae, transformer)
|
| 1163 |
-
|
| 1164 |
-
if enable_preview or section_index == total_latent_sections - 1:
|
| 1165 |
-
output_filename = os.path.join(outputs_folder, f'{job_id}_{total_generated_latent_frames}.mp4')
|
| 1166 |
-
|
| 1167 |
-
# 20250506 pftq: Use input video FPS for output
|
| 1168 |
-
save_bcthw_as_mp4(history_pixels, output_filename, fps=fps, crf=mp4_crf)
|
| 1169 |
-
print(f"Latest video saved: {output_filename}")
|
| 1170 |
-
# 20250508 pftq: Save prompt to mp4 metadata comments
|
| 1171 |
-
set_mp4_comments_imageio_ffmpeg(output_filename, f"Prompt: {prompts} | Negative Prompt: {n_prompt}");
|
| 1172 |
-
print(f"Prompt saved to mp4 metadata comments: {output_filename}")
|
| 1173 |
-
|
| 1174 |
-
# 20250506 pftq: Clean up previous partial files
|
| 1175 |
-
if previous_video is not None and os.path.exists(previous_video):
|
| 1176 |
-
try:
|
| 1177 |
-
os.remove(previous_video)
|
| 1178 |
-
print(f"Previous partial video deleted: {previous_video}")
|
| 1179 |
-
except Exception as e:
|
| 1180 |
-
print(f"Error deleting previous partial video {previous_video}: {e}")
|
| 1181 |
-
previous_video = output_filename
|
| 1182 |
-
|
| 1183 |
-
print(f'Decoded. Current latent shape {real_history_latents.shape}; pixel shape {history_pixels.shape}')
|
| 1184 |
-
|
| 1185 |
-
stream.output_queue.push(('file', output_filename))
|
| 1186 |
-
|
| 1187 |
-
seed = (seed + 1) % np.iinfo(np.int32).max
|
| 1188 |
-
|
| 1189 |
-
except:
|
| 1190 |
-
traceback.print_exc()
|
| 1191 |
-
|
| 1192 |
-
if not high_vram:
|
| 1193 |
-
unload_complete_models(
|
| 1194 |
-
text_encoder, text_encoder_2, image_encoder, vae, transformer
|
| 1195 |
-
)
|
| 1196 |
-
|
| 1197 |
-
stream.output_queue.push(('end', None))
|
| 1198 |
-
return
|
| 1199 |
-
|
| 1200 |
-
# 20250506 pftq: Modified worker to accept video input and clean frame count
|
| 1201 |
-
@torch.no_grad()
|
| 1202 |
-
def worker_video_end(input_video, end_frame, prompts, n_prompt, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
|
| 1203 |
-
def encode_prompt(prompt, n_prompt):
|
| 1204 |
-
llama_vec, clip_l_pooler = encode_prompt_conds(prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2)
|
| 1205 |
-
|
| 1206 |
-
if cfg == 1:
|
| 1207 |
-
llama_vec_n, clip_l_pooler_n = torch.zeros_like(llama_vec), torch.zeros_like(clip_l_pooler)
|
| 1208 |
-
else:
|
| 1209 |
-
llama_vec_n, clip_l_pooler_n = encode_prompt_conds(n_prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2)
|
| 1210 |
-
|
| 1211 |
-
llama_vec, llama_attention_mask = crop_or_pad_yield_mask(llama_vec, length=512)
|
| 1212 |
-
llama_vec_n, llama_attention_mask_n = crop_or_pad_yield_mask(llama_vec_n, length=512)
|
| 1213 |
-
|
| 1214 |
-
llama_vec = llama_vec.to(transformer.dtype)
|
| 1215 |
-
llama_vec_n = llama_vec_n.to(transformer.dtype)
|
| 1216 |
-
clip_l_pooler = clip_l_pooler.to(transformer.dtype)
|
| 1217 |
-
clip_l_pooler_n = clip_l_pooler_n.to(transformer.dtype)
|
| 1218 |
-
return [llama_vec, clip_l_pooler, llama_vec_n, clip_l_pooler_n, llama_attention_mask, llama_attention_mask_n]
|
| 1219 |
-
|
| 1220 |
-
stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Starting ...'))))
|
| 1221 |
-
|
| 1222 |
try:
|
| 1223 |
# 20250506 pftq: Processing input video instead of image
|
| 1224 |
stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Video processing ...'))))
|
|
@@ -1626,7 +1346,7 @@ def process_video_on_gpu(input_video, end_frame, prompts, generation_mode, n_pro
|
|
| 1626 |
stream = AsyncStream()
|
| 1627 |
|
| 1628 |
# 20250506 pftq: Pass num_clean_frames, vae_batch, etc
|
| 1629 |
-
async_run(
|
| 1630 |
|
| 1631 |
output_filename = None
|
| 1632 |
|
|
|
|
| 939 |
|
| 940 |
stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Starting ...'))))
|
| 941 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 942 |
try:
|
| 943 |
# 20250506 pftq: Processing input video instead of image
|
| 944 |
stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Video processing ...'))))
|
|
|
|
| 1346 |
stream = AsyncStream()
|
| 1347 |
|
| 1348 |
# 20250506 pftq: Pass num_clean_frames, vae_batch, etc
|
| 1349 |
+
async_run(worker_video, input_video, end_frame if generation_mode == "video_end" else None, prompts, n_prompt, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch)
|
| 1350 |
|
| 1351 |
output_filename = None
|
| 1352 |
|