AhmadMustafa commited on
Commit
cdbde72
·
1 Parent(s): 8e6e7dd
Files changed (1) hide show
  1. app.py +39 -41
app.py CHANGED
@@ -1,19 +1,11 @@
1
- import time
2
  import os
3
- from huggingface_hub import hf_hub_download, login
4
 
5
  import gradio as gr
6
-
7
- # Authenticate with Hugging Face (token is auto-available in HF Spaces)
8
- try:
9
- login(token=os.environ.get("HF_TOKEN"))
10
- print("Successfully authenticated with Hugging Face")
11
- except Exception as e:
12
- print(f"Warning: Could not authenticate with HF: {e}")
13
- import spaces
14
  import torch
15
  from diffusers import AutoencoderKLCogVideoX, CogVideoXDDIMScheduler
16
  from diffusers.utils import export_to_video
 
17
  from PIL import Image
18
  from transformers import T5EncoderModel, T5Tokenizer
19
 
@@ -21,22 +13,23 @@ from cogvideo_transformer import CustomCogVideoXTransformer3DModel
21
  from EF_Net import EF_Net
22
  from Sci_Fi_inbetweening_pipeline import CogVideoXEFNetInbetweeningPipeline
23
 
24
- # Global variables for the pipeline
25
- pipe = None
 
 
 
 
 
26
  device = "cuda" if torch.cuda.is_available() else "cpu"
27
 
28
 
29
- def _load_pipeline_internal(
30
  pretrained_model_path="LiuhanChen/Sci-Fi",
31
  ef_net_path="weights/EF_Net.pth",
32
  dtype_str="bfloat16",
33
  ):
34
- """Internal function to load the Sci-Fi pipeline"""
35
- global pipe
36
-
37
- # Return early if pipeline is already loaded
38
- if pipe is not None:
39
- return "Pipeline already loaded!"
40
 
41
  dtype = torch.float16 if dtype_str == "float16" else torch.bfloat16
42
 
@@ -48,29 +41,37 @@ def _load_pipeline_internal(
48
  repo_id="LiuhanChen/Sci-Fi",
49
  subfolder="EF_Net",
50
  filename="EF_Net.pth",
51
- local_dir="weights"
52
  )
53
  ef_net_path = "weights/EF_Net/EF_Net.pth"
54
  print(f"EF-Net weights downloaded to {ef_net_path}")
55
 
56
  # Load models from Hugging Face
 
57
  tokenizer = T5Tokenizer.from_pretrained(
58
  pretrained_model_path, subfolder="CogVideoX-5b-I2V/tokenizer"
59
  )
60
  text_encoder = T5EncoderModel.from_pretrained(
61
  pretrained_model_path, subfolder="CogVideoX-5b-I2V/text_encoder"
62
  )
 
 
63
  transformer = CustomCogVideoXTransformer3DModel.from_pretrained(
64
  pretrained_model_path, subfolder="CogVideoX-5b-I2V/transformer"
65
  )
 
 
66
  vae = AutoencoderKLCogVideoX.from_pretrained(
67
  pretrained_model_path, subfolder="CogVideoX-5b-I2V/vae"
68
  )
 
 
69
  scheduler = CogVideoXDDIMScheduler.from_pretrained(
70
  pretrained_model_path, subfolder="CogVideoX-5b-I2V/scheduler"
71
  )
72
 
73
  # Load EF-Net
 
74
  EF_Net_model = (
75
  EF_Net(num_layers=4, downscale_coef=8, in_channels=2, num_attention_heads=48)
76
  .requires_grad_(False)
@@ -83,7 +84,8 @@ def _load_pipeline_internal(
83
  print(f"[EF-Net loaded] Missing: {len(m)} | Unexpected: {len(u)}")
84
 
85
  # Create pipeline
86
- pipe = CogVideoXEFNetInbetweeningPipeline(
 
87
  tokenizer=tokenizer,
88
  text_encoder=text_encoder,
89
  transformer=transformer,
@@ -91,20 +93,26 @@ def _load_pipeline_internal(
91
  EF_Net_model=EF_Net_model,
92
  scheduler=scheduler,
93
  )
94
- pipe.scheduler = CogVideoXDDIMScheduler.from_config(
95
- pipe.scheduler.config, timestep_spacing="trailing"
96
  )
97
 
98
- pipe.to(device)
99
- pipe = pipe.to(dtype=dtype)
 
 
 
 
100
 
101
- pipe.vae.enable_slicing()
102
- pipe.vae.enable_tiling()
103
 
104
- return "Pipeline loaded successfully!"
 
 
 
105
 
106
 
107
- @spaces.GPU(duration=500)
108
  def generate_inbetweening(
109
  first_image: Image.Image,
110
  last_image: Image.Image,
@@ -118,16 +126,6 @@ def generate_inbetweening(
118
  progress=gr.Progress(),
119
  ):
120
  """Generate frame inbetweening video"""
121
- global pipe
122
-
123
- # Load pipeline on first use (lazy loading with GPU access)
124
- if pipe is None:
125
- progress(0, desc="Loading pipeline (first run)...")
126
- try:
127
- _load_pipeline_internal()
128
- except Exception as e:
129
- return None, f"ERROR: Failed to load pipeline: {str(e)}"
130
-
131
  if first_image is None or last_image is None:
132
  return None, "Please upload both start and end frames!"
133
 
@@ -177,7 +175,7 @@ with gr.Blocks(title="Sci-Fi: Frame Inbetweening") as demo:
177
 
178
  Upload start and end frames to generate smooth inbetweening video.
179
 
180
- **Note:** Pipeline loads on first generation (this may take 1-2 minutes).
181
  """
182
  )
183
 
@@ -280,5 +278,5 @@ with gr.Blocks(title="Sci-Fi: Frame Inbetweening") as demo:
280
  )
281
 
282
  if __name__ == "__main__":
283
- print("App starting - pipeline will load on first generation request")
284
  demo.launch()
 
 
1
  import os
2
+ import time
3
 
4
  import gradio as gr
 
 
 
 
 
 
 
 
5
  import torch
6
  from diffusers import AutoencoderKLCogVideoX, CogVideoXDDIMScheduler
7
  from diffusers.utils import export_to_video
8
+ from huggingface_hub import hf_hub_download, login
9
  from PIL import Image
10
  from transformers import T5EncoderModel, T5Tokenizer
11
 
 
13
  from EF_Net import EF_Net
14
  from Sci_Fi_inbetweening_pipeline import CogVideoXEFNetInbetweeningPipeline
15
 
16
+ # Authenticate with Hugging Face
17
+ try:
18
+ login(token=os.environ.get("HF_TOKEN"))
19
+ print("Successfully authenticated with Hugging Face")
20
+ except Exception as e:
21
+ print(f"Warning: Could not authenticate with HF: {e}")
22
+
23
  device = "cuda" if torch.cuda.is_available() else "cpu"
24
 
25
 
26
+ def load_pipeline(
27
  pretrained_model_path="LiuhanChen/Sci-Fi",
28
  ef_net_path="weights/EF_Net.pth",
29
  dtype_str="bfloat16",
30
  ):
31
+ """Load the Sci-Fi pipeline at startup"""
32
+ print("Loading Sci-Fi pipeline...")
 
 
 
 
33
 
34
  dtype = torch.float16 if dtype_str == "float16" else torch.bfloat16
35
 
 
41
  repo_id="LiuhanChen/Sci-Fi",
42
  subfolder="EF_Net",
43
  filename="EF_Net.pth",
44
+ local_dir="weights",
45
  )
46
  ef_net_path = "weights/EF_Net/EF_Net.pth"
47
  print(f"EF-Net weights downloaded to {ef_net_path}")
48
 
49
  # Load models from Hugging Face
50
+ print("Loading tokenizer and text encoder...")
51
  tokenizer = T5Tokenizer.from_pretrained(
52
  pretrained_model_path, subfolder="CogVideoX-5b-I2V/tokenizer"
53
  )
54
  text_encoder = T5EncoderModel.from_pretrained(
55
  pretrained_model_path, subfolder="CogVideoX-5b-I2V/text_encoder"
56
  )
57
+
58
+ print("Loading transformer...")
59
  transformer = CustomCogVideoXTransformer3DModel.from_pretrained(
60
  pretrained_model_path, subfolder="CogVideoX-5b-I2V/transformer"
61
  )
62
+
63
+ print("Loading VAE...")
64
  vae = AutoencoderKLCogVideoX.from_pretrained(
65
  pretrained_model_path, subfolder="CogVideoX-5b-I2V/vae"
66
  )
67
+
68
+ print("Loading scheduler...")
69
  scheduler = CogVideoXDDIMScheduler.from_pretrained(
70
  pretrained_model_path, subfolder="CogVideoX-5b-I2V/scheduler"
71
  )
72
 
73
  # Load EF-Net
74
+ print("Loading EF-Net...")
75
  EF_Net_model = (
76
  EF_Net(num_layers=4, downscale_coef=8, in_channels=2, num_attention_heads=48)
77
  .requires_grad_(False)
 
84
  print(f"[EF-Net loaded] Missing: {len(m)} | Unexpected: {len(u)}")
85
 
86
  # Create pipeline
87
+ print("Creating pipeline...")
88
+ pipeline = CogVideoXEFNetInbetweeningPipeline(
89
  tokenizer=tokenizer,
90
  text_encoder=text_encoder,
91
  transformer=transformer,
 
93
  EF_Net_model=EF_Net_model,
94
  scheduler=scheduler,
95
  )
96
+ pipeline.scheduler = CogVideoXDDIMScheduler.from_config(
97
+ pipeline.scheduler.config, timestep_spacing="trailing"
98
  )
99
 
100
+ print(f"Moving pipeline to {device}...")
101
+ pipeline.to(device)
102
+ pipeline = pipeline.to(dtype=dtype)
103
+
104
+ pipeline.vae.enable_slicing()
105
+ pipeline.vae.enable_tiling()
106
 
107
+ print("Pipeline loaded successfully!")
108
+ return pipeline
109
 
110
+
111
+ # Load pipeline at startup
112
+ print("Initializing Sci-Fi pipeline at startup...")
113
+ pipe = load_pipeline()
114
 
115
 
 
116
  def generate_inbetweening(
117
  first_image: Image.Image,
118
  last_image: Image.Image,
 
126
  progress=gr.Progress(),
127
  ):
128
  """Generate frame inbetweening video"""
 
 
 
 
 
 
 
 
 
 
129
  if first_image is None or last_image is None:
130
  return None, "Please upload both start and end frames!"
131
 
 
175
 
176
  Upload start and end frames to generate smooth inbetweening video.
177
 
178
+ **Model is pre-loaded and ready to use!**
179
  """
180
  )
181
 
 
278
  )
279
 
280
  if __name__ == "__main__":
281
+ print("App ready - pipeline is loaded and ready for inference!")
282
  demo.launch()