rookie9
/

PicoAudio2

Safetensors

PicoAudio2

custom_code

Model card Files Files and versions

xet

Community

rookie9 commited on Sep 29, 2025

Commit

32aa2ea

verified ·

1 Parent(s): 131e947

Update model.py

Browse files

Files changed (1) hide show

model.py +2 -41

model.py CHANGED Viewed

@@ -43,10 +43,6 @@ class PicoAudio2HF(PreTrainedModel):
         content_encoder = self.build_content_encoder_from_config(config.content_encoder)
         backbone = self._build_submodule(config.backbone)
-        state_dict = load_file("model.safetensors")
-        new_state_dict = {k.replace("backbone.", ""): v for k, v in state_dict.items()}
-        backbone.load_state_dict(new_state_dict, strict=False, assign=True)
         self.inner_model = AudioDiffusion(
             autoencoder=autoencoder,
             content_encoder=content_encoder,
@@ -57,6 +53,7 @@ class PicoAudio2HF(PreTrainedModel):
             classifier_free_guidance=config.classifier_free_guidance,
             cfg_drop_ratio=config.cfg_drop_ratio,
         )
     def build_content_encoder_from_config(self, content_encoder_cfg):
         te_cfg = content_encoder_cfg['text_encoder']
         te_mod_path, te_cls_name = te_cfg['_target_'].rsplit('.', 1)
@@ -88,36 +85,9 @@ class PicoAudio2HF(PreTrainedModel):
             module = __import__(module_path, fromlist=[class_name])
             cls = getattr(module, class_name)
             obj = cls(**kwargs)
-            if "pretrained_ckpt" in sub_config:
-                state_dict = torch.load(sub_config["pretrained_ckpt"])
-                if "state_dict" in state_dict:
-                    new_state_dict = state_dict["state_dict"]
-                state_dict = {k.replace("autoencoder.", ""): v for k, v in new_state_dict.items()}
-                sig = inspect.signature(obj.load_state_dict)
-                if "assign" in sig.parameters:
-                    result = obj.load_state_dict(state_dict, strict=False, assign=True)
-                else:
-                    result = obj.load_state_dict(state_dict, strict=False)
-                self._check_param_stats(obj, class_name)
             return obj
         else:
             return sub_config
-    def _check_weights(self, module, name):
-        if hasattr(module, "load_state_dict") and hasattr(module, "state_dict"):
-            print(f"[{name}] parameter keys:", list(module.state_dict().keys())[:5], "...")
-            for idx, (k, v) in enumerate(module.state_dict().items()):
-                print(f"[{name}] {k}: mean={v.float().mean():.5f}, std={v.float().std():.5f}")
-                if idx >= 2:
-                    break
-    def _check_param_stats(self, module, name):
-        if hasattr(module, "named_parameters"):
-            for idx, (k, v) in enumerate(module.named_parameters()):
-                print(f"[{name}] {k}: mean={v.data.float().mean():.5f}, std={v.data.float().std():.5f}")
-                if idx >= 2:
-                    break
     def forward(
         self,
@@ -139,13 +109,4 @@ class PicoAudio2HF(PreTrainedModel):
             disable_progress=disable_progress,
             num_samples_per_content=num_samples_per_content,
             **kwargs
-        )
-    @classmethod
-    def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
-        config = PicoAudio2Config.from_pretrained(pretrained_model_name_or_path, **kwargs)
-        model = cls(config)
-        return model
-    def load_state_dict(self, state_dict, *args, **kwargs):
-        pass

         content_encoder = self.build_content_encoder_from_config(config.content_encoder)
         backbone = self._build_submodule(config.backbone)
         self.inner_model = AudioDiffusion(
             autoencoder=autoencoder,
             content_encoder=content_encoder,
             classifier_free_guidance=config.classifier_free_guidance,
             cfg_drop_ratio=config.cfg_drop_ratio,
         )
     def build_content_encoder_from_config(self, content_encoder_cfg):
         te_cfg = content_encoder_cfg['text_encoder']
         te_mod_path, te_cls_name = te_cfg['_target_'].rsplit('.', 1)
             module = __import__(module_path, fromlist=[class_name])
             cls = getattr(module, class_name)
             obj = cls(**kwargs)
             return obj
         else:
             return sub_config
     def forward(
         self,
             disable_progress=disable_progress,
             num_samples_per_content=num_samples_per_content,
             **kwargs
+        )