AhmadMustafa commited on
Commit
6cd58a3
·
1 Parent(s): cdbde72
Files changed (1) hide show
  1. app.py +35 -31
app.py CHANGED
@@ -5,7 +5,7 @@ import gradio as gr
5
  import torch
6
  from diffusers import AutoencoderKLCogVideoX, CogVideoXDDIMScheduler
7
  from diffusers.utils import export_to_video
8
- from huggingface_hub import hf_hub_download, login
9
  from PIL import Image
10
  from transformers import T5EncoderModel, T5Tokenizer
11
 
@@ -15,63 +15,67 @@ from Sci_Fi_inbetweening_pipeline import CogVideoXEFNetInbetweeningPipeline
15
 
16
  # Authenticate with Hugging Face
17
  try:
18
- login(token=os.environ.get("HF_TOKEN"))
19
- print("Successfully authenticated with Hugging Face")
 
 
 
 
20
  except Exception as e:
21
  print(f"Warning: Could not authenticate with HF: {e}")
22
 
23
  device = "cuda" if torch.cuda.is_available() else "cpu"
24
 
25
 
26
- def load_pipeline(
27
- pretrained_model_path="LiuhanChen/Sci-Fi",
28
- ef_net_path="weights/EF_Net.pth",
29
- dtype_str="bfloat16",
30
- ):
31
  """Load the Sci-Fi pipeline at startup"""
32
  print("Loading Sci-Fi pipeline...")
33
 
34
  dtype = torch.float16 if dtype_str == "float16" else torch.bfloat16
35
 
36
- # Download EF-Net weights if not exists
37
- if not os.path.exists(ef_net_path):
38
- print("Downloading EF-Net weights from Hugging Face...")
39
- os.makedirs("weights", exist_ok=True)
40
- ef_net_path = hf_hub_download(
41
- repo_id="LiuhanChen/Sci-Fi",
42
- subfolder="EF_Net",
43
- filename="EF_Net.pth",
44
- local_dir="weights",
45
- )
46
- ef_net_path = "weights/EF_Net/EF_Net.pth"
47
- print(f"EF-Net weights downloaded to {ef_net_path}")
 
 
48
 
49
- # Load models from Hugging Face
 
 
 
50
  print("Loading tokenizer and text encoder...")
51
- tokenizer = T5Tokenizer.from_pretrained(
52
- pretrained_model_path, subfolder="CogVideoX-5b-I2V/tokenizer"
53
- )
54
  text_encoder = T5EncoderModel.from_pretrained(
55
- pretrained_model_path, subfolder="CogVideoX-5b-I2V/text_encoder"
56
  )
57
 
58
  print("Loading transformer...")
59
  transformer = CustomCogVideoXTransformer3DModel.from_pretrained(
60
- pretrained_model_path, subfolder="CogVideoX-5b-I2V/transformer"
61
  )
62
 
63
  print("Loading VAE...")
64
- vae = AutoencoderKLCogVideoX.from_pretrained(
65
- pretrained_model_path, subfolder="CogVideoX-5b-I2V/vae"
66
- )
67
 
68
  print("Loading scheduler...")
69
  scheduler = CogVideoXDDIMScheduler.from_pretrained(
70
- pretrained_model_path, subfolder="CogVideoX-5b-I2V/scheduler"
71
  )
72
 
73
  # Load EF-Net
74
- print("Loading EF-Net...")
 
 
 
75
  EF_Net_model = (
76
  EF_Net(num_layers=4, downscale_coef=8, in_channels=2, num_attention_heads=48)
77
  .requires_grad_(False)
 
5
  import torch
6
  from diffusers import AutoencoderKLCogVideoX, CogVideoXDDIMScheduler
7
  from diffusers.utils import export_to_video
8
+ from huggingface_hub import login, snapshot_download
9
  from PIL import Image
10
  from transformers import T5EncoderModel, T5Tokenizer
11
 
 
15
 
16
  # Authenticate with Hugging Face
17
  try:
18
+ token = os.environ.get("HF_TOKEN")
19
+ if token:
20
+ login(token=token)
21
+ print("Successfully authenticated with Hugging Face")
22
+ else:
23
+ print("Warning: HF_TOKEN not found")
24
  except Exception as e:
25
  print(f"Warning: Could not authenticate with HF: {e}")
26
 
27
  device = "cuda" if torch.cuda.is_available() else "cpu"
28
 
29
 
30
+ def load_pipeline(dtype_str="bfloat16"):
 
 
 
 
31
  """Load the Sci-Fi pipeline at startup"""
32
  print("Loading Sci-Fi pipeline...")
33
 
34
  dtype = torch.float16 if dtype_str == "float16" else torch.bfloat16
35
 
36
+ # Download the entire model repository
37
+ print("Downloading model repository from Hugging Face...")
38
+ repo_path = snapshot_download(
39
+ repo_id="LiuhanChen/Sci-Fi",
40
+ local_dir="./Sci-Fi-models",
41
+ token=os.environ.get("HF_TOKEN"),
42
+ ignore_patterns=["*.md", "*.txt", ".gitattributes"], # Skip unnecessary files
43
+ )
44
+ print(f"Models downloaded to: {repo_path}")
45
+
46
+ # Set paths
47
+ model_base_path = repo_path
48
+ cogvideo_path = os.path.join(model_base_path, "CogVideoX-5b-I2V")
49
+ ef_net_path = os.path.join(model_base_path, "EF_Net", "EF_Net.pth")
50
 
51
+ print(f"CogVideo path: {cogvideo_path}")
52
+ print(f"EF-Net path: {ef_net_path}")
53
+
54
+ # Load models
55
  print("Loading tokenizer and text encoder...")
56
+ tokenizer = T5Tokenizer.from_pretrained(os.path.join(cogvideo_path, "tokenizer"))
 
 
57
  text_encoder = T5EncoderModel.from_pretrained(
58
+ os.path.join(cogvideo_path, "text_encoder")
59
  )
60
 
61
  print("Loading transformer...")
62
  transformer = CustomCogVideoXTransformer3DModel.from_pretrained(
63
+ os.path.join(cogvideo_path, "transformer")
64
  )
65
 
66
  print("Loading VAE...")
67
+ vae = AutoencoderKLCogVideoX.from_pretrained(os.path.join(cogvideo_path, "vae"))
 
 
68
 
69
  print("Loading scheduler...")
70
  scheduler = CogVideoXDDIMScheduler.from_pretrained(
71
+ os.path.join(cogvideo_path, "scheduler")
72
  )
73
 
74
  # Load EF-Net
75
+ print(f"Loading EF-Net from {ef_net_path}...")
76
+ if not os.path.exists(ef_net_path):
77
+ raise FileNotFoundError(f"EF-Net weights not found at {ef_net_path}")
78
+
79
  EF_Net_model = (
80
  EF_Net(num_layers=4, downscale_coef=8, in_channels=2, num_attention_heads=48)
81
  .requires_grad_(False)