Spaces:
Runtime error
Runtime error
Update sonic.py
Browse files
sonic.py
CHANGED
|
@@ -1,27 +1,29 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
import os, math, torch, cv2
|
| 5 |
-
import torch.utils.checkpoint
|
| 6 |
from PIL import Image
|
| 7 |
from omegaconf import OmegaConf
|
| 8 |
-
from tqdm import tqdm
|
| 9 |
-
from diffusers import AutoencoderKLTemporalDecoder
|
| 10 |
-
from diffusers.schedulers import EulerDiscreteScheduler
|
| 11 |
from transformers import WhisperModel, CLIPVisionModelWithProjection, AutoFeatureExtractor
|
| 12 |
-
|
| 13 |
from src.utils.util import save_videos_grid, seed_everything
|
| 14 |
-
from src.dataset.test_preprocess import
|
| 15 |
-
from src.models.base.unet_spatio_temporal_condition import
|
| 16 |
-
|
| 17 |
-
)
|
| 18 |
from src.models.audio_adapter.audio_proj import AudioProjModel
|
| 19 |
from src.models.audio_adapter.audio_to_bucket import Audio2bucketModel
|
| 20 |
-
from src.pipelines.pipeline_sonic import SonicPipeline
|
| 21 |
from src.utils.RIFE.RIFE_HDv3 import RIFEModel
|
| 22 |
from src.dataset.face_align.align import AlignImage
|
| 23 |
-
|
| 24 |
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
|
|
|
| 25 |
|
| 26 |
|
| 27 |
# ------------------------------------------------------------------
|
|
@@ -123,19 +125,30 @@ def test(pipe, cfg, wav_enc, audio_pe, audio2bucket, img_enc,
|
|
| 123 |
# ------------------------------------------------------------------
|
| 124 |
# Sonic wrapper
|
| 125 |
# ------------------------------------------------------------------
|
|
|
|
| 126 |
class Sonic:
|
| 127 |
config_file = os.path.join(BASE_DIR, "config/inference/sonic.yaml")
|
| 128 |
config = OmegaConf.load(config_file)
|
| 129 |
|
| 130 |
-
def __init__(self, device_id=0, enable_interpolate_frame=True):
|
| 131 |
cfg = self.config
|
| 132 |
cfg.use_interframe = enable_interpolate_frame
|
| 133 |
-
self.device = f"cuda:{device_id}" if torch.cuda.is_available() and device_id>=0 else "cpu"
|
| 134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
|
| 136 |
self._load_models(cfg)
|
| 137 |
print("Sonic init done")
|
| 138 |
|
|
|
|
| 139 |
# model-loader (unchanged, but with tiny clean-ups) ------------------------
|
| 140 |
def _load_models(self, cfg):
|
| 141 |
dtype = {"fp16": torch.float16, "fp32": torch.float32, "bf16": torch.bfloat16}[cfg.weight_dtype]
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
sonic.py – 2025-05 hot-fix
|
| 3 |
+
주요 수정
|
| 4 |
+
• config.pretrained_model_name_or_path 가 실제 폴더인지 확인
|
| 5 |
+
• 없다면 huggingface_hub.snapshot_download 로 자동 다운로드
|
| 6 |
+
• 경로가 준비된 뒤 모델 로드 진행
|
| 7 |
+
"""
|
| 8 |
import os, math, torch, cv2
|
|
|
|
| 9 |
from PIL import Image
|
| 10 |
from omegaconf import OmegaConf
|
| 11 |
+
from tqdm.auto import tqdm
|
| 12 |
+
from diffusers import AutoencoderKLTemporalDecoder, EulerDiscreteScheduler
|
|
|
|
| 13 |
from transformers import WhisperModel, CLIPVisionModelWithProjection, AutoFeatureExtractor
|
| 14 |
+
from huggingface_hub import snapshot_download, hf_hub_download
|
| 15 |
from src.utils.util import save_videos_grid, seed_everything
|
| 16 |
+
from src.dataset.test_preprocess import process_bbox, image_audio_to_tensor
|
| 17 |
+
from src.models.base.unet_spatio_temporal_condition import UNetSpatioTemporalConditionModel, add_ip_adapters
|
| 18 |
+
from src.pipelines.pipeline_sonic import SonicPipeline
|
|
|
|
| 19 |
from src.models.audio_adapter.audio_proj import AudioProjModel
|
| 20 |
from src.models.audio_adapter.audio_to_bucket import Audio2bucketModel
|
|
|
|
| 21 |
from src.utils.RIFE.RIFE_HDv3 import RIFEModel
|
| 22 |
from src.dataset.face_align.align import AlignImage
|
| 23 |
+
# ------------------------------
|
| 24 |
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 25 |
+
HF_STABLE_REPO = "stabilityai/stable-video-diffusion-img2vid-xt"
|
| 26 |
+
LOCAL_STABLE_DIR = os.path.join(BASE_DIR, "checkpoints", "stable-video-diffusion-img2vid-xt")
|
| 27 |
|
| 28 |
|
| 29 |
# ------------------------------------------------------------------
|
|
|
|
| 125 |
# ------------------------------------------------------------------
|
| 126 |
# Sonic wrapper
|
| 127 |
# ------------------------------------------------------------------
|
| 128 |
+
|
| 129 |
class Sonic:
|
| 130 |
config_file = os.path.join(BASE_DIR, "config/inference/sonic.yaml")
|
| 131 |
config = OmegaConf.load(config_file)
|
| 132 |
|
| 133 |
+
def __init__(self, device_id: int = 0, enable_interpolate_frame: bool = True):
|
| 134 |
cfg = self.config
|
| 135 |
cfg.use_interframe = enable_interpolate_frame
|
| 136 |
+
self.device = f"cuda:{device_id}" if torch.cuda.is_available() and device_id >= 0 else "cpu"
|
| 137 |
+
|
| 138 |
+
# ----------- ✨ [NEW] pretrained 모델 폴더 확보 ----------------------
|
| 139 |
+
if not os.path.isdir(LOCAL_STABLE_DIR) or not os.path.isfile(os.path.join(LOCAL_STABLE_DIR, "vae", "config.json")):
|
| 140 |
+
print("[INFO] 1st-run – downloading base model (~2 GB)…")
|
| 141 |
+
snapshot_download(repo_id=HF_STABLE_REPO,
|
| 142 |
+
local_dir=LOCAL_STABLE_DIR,
|
| 143 |
+
resume_download=True,
|
| 144 |
+
local_dir_use_symlinks=False)
|
| 145 |
+
cfg.pretrained_model_name_or_path = LOCAL_STABLE_DIR
|
| 146 |
+
# ------------------------------------------------------------------
|
| 147 |
|
| 148 |
self._load_models(cfg)
|
| 149 |
print("Sonic init done")
|
| 150 |
|
| 151 |
+
|
| 152 |
# model-loader (unchanged, but with tiny clean-ups) ------------------------
|
| 153 |
def _load_models(self, cfg):
|
| 154 |
dtype = {"fp16": torch.float16, "fp32": torch.float32, "bf16": torch.bfloat16}[cfg.weight_dtype]
|