Spaces:

roll-ai
/

Sci-Fi

Paused

App Files Files Community

AhmadMustafa commited on Oct 3

Commit

6cd58a3

1 Parent(s): cdbde72

rename

Browse files

Files changed (1) hide show

app.py +35 -31

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import gradio as gr
 import torch
 from diffusers import AutoencoderKLCogVideoX, CogVideoXDDIMScheduler
 from diffusers.utils import export_to_video
-from huggingface_hub import hf_hub_download, login
 from PIL import Image
 from transformers import T5EncoderModel, T5Tokenizer
@@ -15,63 +15,67 @@ from Sci_Fi_inbetweening_pipeline import CogVideoXEFNetInbetweeningPipeline
 # Authenticate with Hugging Face
 try:
-    login(token=os.environ.get("HF_TOKEN"))
-    print("Successfully authenticated with Hugging Face")
 except Exception as e:
     print(f"Warning: Could not authenticate with HF: {e}")
 device = "cuda" if torch.cuda.is_available() else "cpu"
-def load_pipeline(
-    pretrained_model_path="LiuhanChen/Sci-Fi",
-    ef_net_path="weights/EF_Net.pth",
-    dtype_str="bfloat16",
-):
     """Load the Sci-Fi pipeline at startup"""
     print("Loading Sci-Fi pipeline...")
     dtype = torch.float16 if dtype_str == "float16" else torch.bfloat16
-    # Download EF-Net weights if not exists
-    if not os.path.exists(ef_net_path):
-        print("Downloading EF-Net weights from Hugging Face...")
-        os.makedirs("weights", exist_ok=True)
-        ef_net_path = hf_hub_download(
-            repo_id="LiuhanChen/Sci-Fi",
-            subfolder="EF_Net",
-            filename="EF_Net.pth",
-            local_dir="weights",
-        )
-        ef_net_path = "weights/EF_Net/EF_Net.pth"
-        print(f"EF-Net weights downloaded to {ef_net_path}")
-    # Load models from Hugging Face
     print("Loading tokenizer and text encoder...")
-    tokenizer = T5Tokenizer.from_pretrained(
-        pretrained_model_path, subfolder="CogVideoX-5b-I2V/tokenizer"
-    )
     text_encoder = T5EncoderModel.from_pretrained(
-        pretrained_model_path, subfolder="CogVideoX-5b-I2V/text_encoder"
     )
     print("Loading transformer...")
     transformer = CustomCogVideoXTransformer3DModel.from_pretrained(
-        pretrained_model_path, subfolder="CogVideoX-5b-I2V/transformer"
     )
     print("Loading VAE...")
-    vae = AutoencoderKLCogVideoX.from_pretrained(
-        pretrained_model_path, subfolder="CogVideoX-5b-I2V/vae"
-    )
     print("Loading scheduler...")
     scheduler = CogVideoXDDIMScheduler.from_pretrained(
-        pretrained_model_path, subfolder="CogVideoX-5b-I2V/scheduler"
     )
     # Load EF-Net
-    print("Loading EF-Net...")
     EF_Net_model = (
         EF_Net(num_layers=4, downscale_coef=8, in_channels=2, num_attention_heads=48)
         .requires_grad_(False)

 import torch
 from diffusers import AutoencoderKLCogVideoX, CogVideoXDDIMScheduler
 from diffusers.utils import export_to_video
+from huggingface_hub import login, snapshot_download
 from PIL import Image
 from transformers import T5EncoderModel, T5Tokenizer
 # Authenticate with Hugging Face
 try:
+    token = os.environ.get("HF_TOKEN")
+    if token:
+        login(token=token)
+        print("Successfully authenticated with Hugging Face")
+    else:
+        print("Warning: HF_TOKEN not found")
 except Exception as e:
     print(f"Warning: Could not authenticate with HF: {e}")
 device = "cuda" if torch.cuda.is_available() else "cpu"
+def load_pipeline(dtype_str="bfloat16"):
     """Load the Sci-Fi pipeline at startup"""
     print("Loading Sci-Fi pipeline...")
     dtype = torch.float16 if dtype_str == "float16" else torch.bfloat16
+    # Download the entire model repository
+    print("Downloading model repository from Hugging Face...")
+    repo_path = snapshot_download(
+        repo_id="LiuhanChen/Sci-Fi",
+        local_dir="./Sci-Fi-models",
+        token=os.environ.get("HF_TOKEN"),
+        ignore_patterns=["*.md", "*.txt", ".gitattributes"],  # Skip unnecessary files
+    )
+    print(f"Models downloaded to: {repo_path}")
+    # Set paths
+    model_base_path = repo_path
+    cogvideo_path = os.path.join(model_base_path, "CogVideoX-5b-I2V")
+    ef_net_path = os.path.join(model_base_path, "EF_Net", "EF_Net.pth")
+    print(f"CogVideo path: {cogvideo_path}")
+    print(f"EF-Net path: {ef_net_path}")
+    # Load models
     print("Loading tokenizer and text encoder...")
+    tokenizer = T5Tokenizer.from_pretrained(os.path.join(cogvideo_path, "tokenizer"))
     text_encoder = T5EncoderModel.from_pretrained(
+        os.path.join(cogvideo_path, "text_encoder")
     )
     print("Loading transformer...")
     transformer = CustomCogVideoXTransformer3DModel.from_pretrained(
+        os.path.join(cogvideo_path, "transformer")
     )
     print("Loading VAE...")
+    vae = AutoencoderKLCogVideoX.from_pretrained(os.path.join(cogvideo_path, "vae"))
     print("Loading scheduler...")
     scheduler = CogVideoXDDIMScheduler.from_pretrained(
+        os.path.join(cogvideo_path, "scheduler")
     )
     # Load EF-Net
+    print(f"Loading EF-Net from {ef_net_path}...")
+    if not os.path.exists(ef_net_path):
+        raise FileNotFoundError(f"EF-Net weights not found at {ef_net_path}")
     EF_Net_model = (
         EF_Net(num_layers=4, downscale_coef=8, in_channels=2, num_attention_heads=48)
         .requires_grad_(False)