OpenMOSS-Team
/

MOSS-Speech-Codec

@@ -475,33 +475,79 @@ class MossSpeechCodec(PreTrainedModel):
     @classmethod
     def from_pretrained(
         cls,
-        model_dir: Union[str, os.PathLike],
-        *args,
         **kwargs,
     ):
-        """Instantiate codec from a directory containing encoder and decoder assets.
-        Expected layout:
-        - `model.safetensors` (Whisper VQ encoder weights)
-        - `config.json` (Whisper VQ config)
-        - `preprocessor_config.json` (WhisperFeatureExtractor params)
-        - `flow/{config.yaml, flow.pt, hift.pt, campplus.onnx}`
         """
-        base = Path(str(model_dir))
-        # Support both layouts:
-        # 1) <base>/{model.safetensors, config.json, preprocessor_config.json, flow/}
-        # 2) <base>/speech_tokenizer/{model.safetensors, ...} and <base>/flow/
-        if (base / "model.safetensors").exists():
-            tokenizer_dir = base
-            flow_dir = base / "flow"
         else:
-            tokenizer_dir = base / "speech_tokenizer"
-            flow_dir = base / "flow"
         encoder_weight_path = str(tokenizer_dir / "model.safetensors")
         encoder_config_path = str(tokenizer_dir / "config.json")
         encoder_feature_extractor_path = str(tokenizer_dir)
         flow_path = str(flow_dir)
         return cls(
             encoder_weight_path=encoder_weight_path,
             encoder_config_path=encoder_config_path,

     @classmethod
     def from_pretrained(
         cls,
+        pretrained_model_name_or_path: Union[str, os.PathLike],
+        *,
+        revision: Optional[str] = None,
+        cache_dir: Optional[Union[str, os.PathLike]] = None,
+        force_download: bool = False,
+        local_files_only: bool = False,
+        token: Optional[Union[str, bool]] = None,
+        use_auth_token: Optional[Union[str, bool]] = None,  # back-compat with HF Transformers kwarg
+        subfolder: Optional[str] = None,
         **kwargs,
     ):
+        """Instantiate codec from a local directory or a Hugging Face Hub repo.
+        This mirrors the typical Hugging Face ``from_pretrained`` behavior:
+        - If ``pretrained_model_name_or_path`` is a local folder, files are loaded from it.
+        - Otherwise, it is treated as a Hub repo ID and downloaded with ``snapshot_download``.
+        Expected layout inside the resolved base folder:
+        - ``model.safetensors`` (Whisper VQ encoder weights)
+        - ``config.json`` (Whisper VQ config)
+        - ``preprocessor_config.json`` (WhisperFeatureExtractor params)
+        - ``flow/{config.yaml, flow.pt, hift.pt, campplus.onnx}``
         """
+        # Resolve local directory vs HF Hub repo.
+        base: Path
+        path_str = str(pretrained_model_name_or_path)
+        if os.path.isdir(path_str):
+            base = Path(path_str)
         else:
+            try:
+                from huggingface_hub import snapshot_download  # lazy import to avoid hard dependency at import time
+            except Exception as exc:  # pragma: no cover
+                raise RuntimeError(
+                    "huggingface_hub is required to load from a repo id; please `pip install huggingface_hub`."
+                ) from exc
+            # HF Transformers historically supports both `token` and deprecated `use_auth_token`.
+            if token is None and use_auth_token is not None:
+                token = use_auth_token
+            snapshot_path = snapshot_download(
+                repo_id=path_str,
+                revision=revision,
+                cache_dir=str(cache_dir) if cache_dir is not None else None,
+                force_download=force_download,
+                local_files_only=local_files_only,
+                token=token,
+            )
+            base = Path(snapshot_path)
+        if subfolder:
+            base = base / subfolder
+        tokenizer_dir = base
+        flow_dir = base / "flow"
+        # Validate expected files and provide actionable error messages, similar to HF patterns.
+        missing: List[str] = []
+        if not (tokenizer_dir / "model.safetensors").exists():
+            missing.append(str(tokenizer_dir / "model.safetensors"))
+        if not (tokenizer_dir / "config.json").exists():
+            missing.append(str(tokenizer_dir / "config.json"))
+        if not (tokenizer_dir / "preprocessor_config.json").exists():
+            missing.append(str(tokenizer_dir / "preprocessor_config.json"))
+        for fname in ("config.yaml", "flow.pt", "hift.pt"):
+            if not (flow_dir / fname).exists():
+                missing.append(str(flow_dir / fname))
+        # `campplus.onnx` may be named differently in some drops; only warn if absent.
+        has_campplus = (flow_dir / "campplus.onnx").exists()
+        if missing:
+            raise FileNotFoundError(
+                "Missing required codec assets under resolved path. The following files were not found: "
+                + ", ".join(missing)
+            )
+        if not has_campplus:
+            logger.warning("campplus.onnx not found under %s; decoding speaker embedding may fail.", flow_dir)
         encoder_weight_path = str(tokenizer_dir / "model.safetensors")
         encoder_config_path = str(tokenizer_dir / "config.json")
         encoder_feature_extractor_path = str(tokenizer_dir)
         flow_path = str(flow_dir)
         return cls(
             encoder_weight_path=encoder_weight_path,
             encoder_config_path=encoder_config_path,