junchenfu commited on
Commit
29cc382
·
verified ·
1 Parent(s): 4dcf6b1

Upload generating_images_videos_three.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. generating_images_videos_three.py +264 -0
generating_images_videos_three.py ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ import torch
4
+ import numpy as np
5
+ import random
6
+
7
+ from diffusers import StableDiffusionPipeline
8
+ from diffusers.utils import export_to_video
9
+
10
+ # Specify the GPU to use (adjust as needed)
11
+ os.environ["CUDA_VISIBLE_DEVICES"] = "0"
12
+
13
+ def set_seed(seed: int = 42):
14
+ """
15
+ Set random seed for reproducibility
16
+ """
17
+ random.seed(seed)
18
+ np.random.seed(seed)
19
+ torch.manual_seed(seed)
20
+ torch.cuda.manual_seed(seed)
21
+ torch.cuda.manual_seed_all(seed) # For multi-GPU
22
+ torch.backends.cudnn.deterministic = True
23
+ torch.backends.cudnn.benchmark = False
24
+
25
+ # Set random seed
26
+ set_seed(42)
27
+
28
+ def generate_image(pipeline, prompt: str, output_path: str):
29
+ """
30
+ Generate an image using the Stable Diffusion model and save it
31
+ """
32
+ with torch.autocast("cuda"):
33
+ image = pipeline(prompt).images[0]
34
+ image.save(output_path)
35
+
36
+ import torch
37
+ from diffusers.utils import export_to_video # Ensure these methods are correctly imported
38
+
39
+ def generate_video(pipeline, pipeline_type: str, prompt: str, output_path: str, **kwargs):
40
+ """
41
+ Generate a video using different video generation pipelines and save as mp4 or gif
42
+
43
+ Parameters:
44
+ pipeline: Loaded video generation pipeline
45
+ pipeline_type: Type of video model, options are "cogvideo", "ltx", "hunyuan", "animatediff"
46
+ prompt: Text description
47
+ output_path: Output video path (animatediff defaults to gif, others to mp4)
48
+ kwargs: Hyperparameter settings, e.g., width, height, num_frames, num_inference_steps, fps, guidance_scale, etc.
49
+ """
50
+ if pipeline_type == "cogvideo":
51
+ # Example call for CogVideoX (some hyperparameters may only apply to this pipeline)
52
+ video = pipeline(
53
+ prompt=prompt,
54
+ num_videos_per_prompt=kwargs.get("num_videos_per_prompt", 1),
55
+ num_inference_steps=kwargs.get("num_inference_steps", 50),
56
+ num_frames=kwargs.get("num_frames", 49),
57
+ guidance_scale=kwargs.get("guidance_scale", 6),
58
+ generator=kwargs.get("generator", torch.Generator(device="cuda").manual_seed(42))
59
+ ).frames[0]
60
+ export_to_video(video, output_path, fps=kwargs.get("fps", 8))
61
+ elif pipeline_type == "ltx":
62
+ # Example call for LTXPipeline
63
+ video = pipeline(
64
+ prompt=prompt,
65
+ negative_prompt=kwargs.get("negative_prompt", "worst quality, inconsistent motion, blurry, jittery, distorted"),
66
+ width=kwargs.get("width", 704),
67
+ height=kwargs.get("height", 480),
68
+ num_frames=kwargs.get("num_frames", 161),
69
+ num_inference_steps=kwargs.get("num_inference_steps", 50),
70
+ ).frames[0]
71
+ export_to_video(video, output_path, fps=kwargs.get("fps", 15))
72
+ elif pipeline_type == "hunyuan":
73
+ # Example call for HunyuanVideoPipeline
74
+ video = pipeline(
75
+ prompt=prompt,
76
+ width=kwargs.get("width", 512),
77
+ height=kwargs.get("height", 320),
78
+ num_frames=kwargs.get("num_frames", 61),
79
+ num_inference_steps=kwargs.get("num_inference_steps", 30),
80
+ ).frames[0]
81
+ export_to_video(video, output_path, fps=kwargs.get("fps", 15))
82
+ elif pipeline_type == "animatediff":
83
+ # Example call for AnimateDiff-Lightning (defaults to generating gif)
84
+ video = pipeline(
85
+ prompt=prompt,
86
+ guidance_scale=kwargs.get("guidance_scale", 1.0),
87
+ num_inference_steps=kwargs.get("num_inference_steps", 4) # Default step is 4, options are 1,2,4,8
88
+ ).frames[0]
89
+ export_to_video(video, output_path)
90
+ else:
91
+ raise ValueError(f"Unknown pipeline type: {pipeline_type}")
92
+
93
+ def load_video_pipeline(pipeline_type: str):
94
+ """
95
+ Load the corresponding video generation model based on pipeline_type
96
+
97
+ Parameters:
98
+ pipeline_type: Options are "cogvideo", "ltx", "hunyuan", "animatediff"
99
+ Returns:
100
+ Loaded and initialized video generation pipeline
101
+ """
102
+ if pipeline_type == "cogvideo":
103
+ from diffusers import CogVideoXPipeline
104
+ print("Loading video generation model (CogVideoX-5b)...")
105
+ pipe = CogVideoXPipeline.from_pretrained(
106
+ "THUDM/CogVideoX-5b",
107
+ torch_dtype=torch.bfloat16
108
+ )
109
+ pipe.vae.enable_slicing()
110
+ pipe.vae.enable_tiling()
111
+ pipe.to("cuda")
112
+ return pipe
113
+ elif pipeline_type == "ltx":
114
+ from diffusers import LTXPipeline
115
+ print("Loading video generation model (LTX-Video)...")
116
+ pipe = LTXPipeline.from_pretrained(
117
+ "Lightricks/LTX-Video",
118
+ torch_dtype=torch.bfloat16
119
+ )
120
+ pipe.to("cuda")
121
+ return pipe
122
+ elif pipeline_type == "hunyuan":
123
+ from diffusers import BitsAndBytesConfig, HunyuanVideoTransformer3DModel, HunyuanVideoPipeline
124
+ from diffusers.hooks import apply_layerwise_casting
125
+ from transformers import LlamaModel
126
+ print("Loading video generation model (HunyuanVideo)...")
127
+ model_id = "hunyuanvideo-community/HunyuanVideo"
128
+ quantization_config = BitsAndBytesConfig(
129
+ load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16
130
+ )
131
+ text_encoder = LlamaModel.from_pretrained(model_id, subfolder="text_encoder", torch_dtype=torch.float16)
132
+ apply_layerwise_casting(text_encoder, storage_dtype=torch.float8_e4m3fn, compute_dtype=torch.float16)
133
+ transformer = HunyuanVideoTransformer3DModel.from_pretrained(
134
+ model_id,
135
+ subfolder="transformer",
136
+ quantization_config=quantization_config,
137
+ torch_dtype=torch.bfloat16,
138
+ )
139
+ pipe = HunyuanVideoPipeline.from_pretrained(
140
+ model_id, transformer=transformer, text_encoder=text_encoder, torch_dtype=torch.float16
141
+ )
142
+ pipe.vae.enable_tiling()
143
+ pipe.enable_model_cpu_offload()
144
+ return pipe
145
+ elif pipeline_type == "animatediff":
146
+ from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler
147
+ from huggingface_hub import hf_hub_download
148
+ from safetensors.torch import load_file
149
+ print("Loading video generation model (AnimateDiff-Lightning)...")
150
+ device = "cuda"
151
+ dtype = torch.float16
152
+ step = 4 # Options: [1,2,4,8], default is 4
153
+ repo = "ByteDance/AnimateDiff-Lightning"
154
+ ckpt = f"animatediff_lightning_{step}step_diffusers.safetensors"
155
+ base = "emilianJR/epiCRealism" # Choose base model as preferred
156
+ adapter = MotionAdapter().to(device, dtype)
157
+ # Download and load weights
158
+ adapter.load_state_dict(load_file(hf_hub_download(repo, ckpt), device=device))
159
+ pipe = AnimateDiffPipeline.from_pretrained(base, motion_adapter=adapter, torch_dtype=dtype).to(device)
160
+ pipe.scheduler = EulerDiscreteScheduler.from_config(
161
+ pipe.scheduler.config, timestep_spacing="trailing", beta_schedule="linear"
162
+ )
163
+ return pipe
164
+ else:
165
+ raise ValueError(f"Unknown pipeline type: {pipeline_type}")
166
+
167
+ def main():
168
+ # ============ 1. Load/Initialize Models ============
169
+ # (1) Image generation model: Stable Diffusion
170
+ print("Loading image generation model (Stable Diffusion)...")
171
+ pipe_image = StableDiffusionPipeline.from_pretrained(
172
+ "runwayml/stable-diffusion-v1-5",
173
+ torch_dtype=torch.float16
174
+ )
175
+ pipe_image.to("cuda")
176
+ # Enable xformers acceleration if needed
177
+ # pipe_image.enable_xformers_memory_efficient_attention()
178
+
179
+ # (2) Video generation model: Choose "cogvideo", "ltx", or "hunyuan"
180
+ video_pipeline_type = "ltx" # Change here to select other models: "ltx" or "hunyuan" animatediff
181
+
182
+ # ============ 2. Define Task List ============
183
+ tasks1 = [
184
+ {
185
+ "csv_file": "output_prompt_rag_more/prompt_ai_concrete_rag_10_testset.csv",
186
+ "image_dir": "output_ai_covers_concrete_rag_10_testset",
187
+ "video_dir": "output_ai_videos_concrete_rag_10_testset_ltx"
188
+ },
189
+ {
190
+ "csv_file": "output_prompt_rag_more/prompt_ai_abstract_rag_10_testset.csv",
191
+ "image_dir": "output_ai_covers_abstract_rag_10_testset",
192
+ "video_dir": "output_ai_videos_abstract_rag_10_testset_ltx"
193
+ }
194
+
195
+ ]
196
+
197
+
198
+ # Only the first task is used in the example
199
+ #tasks = [tasks[-4],tasks[-2]]
200
+ #tasks=tasks_ablation_abstract_5b+tasks_ablation_concrete_5b
201
+ #tasks= tasks_ablation_concrete2
202
+ tasks = tasks1
203
+ pipe_video = load_video_pipeline(video_pipeline_type)
204
+
205
+ # ============ 3. Iterate over CSV files to generate images and videos ============
206
+ for task in tasks:
207
+ csv_file = task["csv_file"]
208
+ image_dir = task["image_dir"]
209
+ video_dir = task["video_dir"]
210
+ os.makedirs(image_dir, exist_ok=True)
211
+ print(f"Ensuring directory exists: {image_dir}")
212
+ os.makedirs(video_dir, exist_ok=True)
213
+ print(f"Ensuring directory exists: {video_dir}")
214
+
215
+ if not os.path.exists(csv_file):
216
+ print(f"Error: CSV file {csv_file} not found, please check the path.")
217
+ continue
218
+
219
+ df = pd.read_csv(csv_file)
220
+ for idx, row in df.iterrows():
221
+ user_prompt = str(row["user prompt"])
222
+ title = str(row["title"])
223
+ cover_prompt = str(row["cover prompt"])
224
+ video_prompt = str(row["video prompt"])
225
+
226
+ # Generate filenames
227
+ image_filename = os.path.join(image_dir, f"{user_prompt}.png")
228
+ video_filename = os.path.join(video_dir, f"{user_prompt}.mp4")
229
+
230
+ print("-" * 50)
231
+ print(f"[CSV: {csv_file}] - [{idx}] Starting generation: {user_prompt}")
232
+ print(f"Title: {title}")
233
+ print(f"Cover Prompt: {cover_prompt}")
234
+ print(f"Video Prompt: {video_prompt}")
235
+
236
+ if os.path.exists(image_filename) and os.path.exists(video_filename):
237
+ print(f"File already exists, skipping generation: {video_filename}")
238
+ continue
239
+
240
+ # 4. Generate image
241
+ try:
242
+ generate_image(pipe_image, cover_prompt, image_filename)
243
+ print(f"Image saved to {image_filename}")
244
+ except Exception as e:
245
+ print(f"Image generation failed: {e}")
246
+
247
+ # 5. Generate video (customize hyperparameters by passing additional arguments)
248
+ try:
249
+ generate_video(
250
+ pipe_video,
251
+ pipeline_type=video_pipeline_type,
252
+ prompt=video_prompt,
253
+ output_path=video_filename
254
+ # To modify hyperparameters, pass them here, e.g.:
255
+ # num_inference_steps=60, num_frames=50, fps=10, width=640, height=360, guidance_scale=7, ...
256
+ )
257
+ print(f"Video saved to {video_filename}")
258
+ except Exception as e:
259
+ print(f"Video generation failed: {e}")
260
+
261
+ print("All generation tasks completed!")
262
+
263
+ if __name__ == "__main__":
264
+ main()