sam-motamed commited on
Commit
9743ceb
Β·
verified Β·
1 Parent(s): bad41bb

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -6
app.py CHANGED
@@ -14,7 +14,7 @@ import imageio
14
  import mediapy as media
15
  import spaces
16
  import gradio as gr
17
- from huggingface_hub import hf_hub_download
18
  from safetensors.torch import load_file
19
  from diffusers import DDIMScheduler
20
  from PIL import Image
@@ -52,8 +52,11 @@ NEG_PROMPT = (
52
  # ── model loading (once at startup, lives in CPU RAM between GPU requests) ─────
53
  print("Loading VOID pipeline …")
54
 
 
 
 
55
  transformer = CogVideoXTransformer3DModel.from_pretrained(
56
- BASE_MODEL_ID,
57
  subfolder="transformer",
58
  low_cpu_mem_usage=True,
59
  torch_dtype=torch.float8_e4m3fn, # qfloat8 to save VRAM
@@ -78,13 +81,13 @@ if state_dict[param_name].size(1) != transformer.state_dict()[param_name].size(1
78
  transformer.load_state_dict(state_dict, strict=False)
79
 
80
  vae = AutoencoderKLCogVideoX.from_pretrained(
81
- BASE_MODEL_ID, subfolder="vae"
82
  ).to(WEIGHT_DTYPE)
83
- tokenizer = T5Tokenizer.from_pretrained(BASE_MODEL_ID, subfolder="tokenizer")
84
  text_encoder = T5EncoderModel.from_pretrained(
85
- BASE_MODEL_ID, subfolder="text_encoder", torch_dtype=WEIGHT_DTYPE
86
  )
87
- scheduler = DDIMScheduler.from_pretrained(BASE_MODEL_ID, subfolder="scheduler")
88
 
89
  pipeline = CogVideoXFunInpaintPipeline(
90
  vae=vae,
 
14
  import mediapy as media
15
  import spaces
16
  import gradio as gr
17
+ from huggingface_hub import hf_hub_download, snapshot_download
18
  from safetensors.torch import load_file
19
  from diffusers import DDIMScheduler
20
  from PIL import Image
 
52
  # ── model loading (once at startup, lives in CPU RAM between GPU requests) ─────
53
  print("Loading VOID pipeline …")
54
 
55
+ # Download base model to local cache (custom from_pretrained needs a local path)
56
+ base_model_path = snapshot_download(repo_id=BASE_MODEL_ID)
57
+
58
  transformer = CogVideoXTransformer3DModel.from_pretrained(
59
+ base_model_path,
60
  subfolder="transformer",
61
  low_cpu_mem_usage=True,
62
  torch_dtype=torch.float8_e4m3fn, # qfloat8 to save VRAM
 
81
  transformer.load_state_dict(state_dict, strict=False)
82
 
83
  vae = AutoencoderKLCogVideoX.from_pretrained(
84
+ base_model_path, subfolder="vae"
85
  ).to(WEIGHT_DTYPE)
86
+ tokenizer = T5Tokenizer.from_pretrained(base_model_path, subfolder="tokenizer")
87
  text_encoder = T5EncoderModel.from_pretrained(
88
+ base_model_path, subfolder="text_encoder", torch_dtype=WEIGHT_DTYPE
89
  )
90
+ scheduler = DDIMScheduler.from_pretrained(base_model_path, subfolder="scheduler")
91
 
92
  pipeline = CogVideoXFunInpaintPipeline(
93
  vae=vae,