Upload folder using huggingface_hub

Browse files

Files changed (11) hide show

.DS_Store +0 -0
backbone.onnx +3 -0
backbone_keys.onnx +3 -0
length_pred.onnx +3 -0
length_pred_style.onnx +3 -0
obfuscate_onnx.py +699 -0
reference_encoder.onnx +3 -0
stats.npz +3 -0
text_encoder.onnx +3 -0
uncond.npz +3 -0
vocoder.onnx +3 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

backbone.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:92d02eac3c6f9b2c4b347e87d18c825b5a5e44158c341ce62714f20324cc74b5
+size 132644653

backbone_keys.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b465da29ba2cff4cfcb7c8ae7b70420bb88fd9d9d7a306e3a81cfee303297550
+size 132592456

length_pred.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b31959fb99a04a7b907d63ed8edf5e202e484064e41ad3ba8723c7bf9fc04a8c
+size 2055214

length_pred_style.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1ab708ab77ea16e2d7bde0f906fb13b46feeba12802859bec1208cec8eed3ee0
+size 1418679

obfuscate_onnx.py ADDED Viewed

	@@ -0,0 +1,699 @@

+#!/usr/bin/env python3
+"""
+Secure ONNX Export Pipeline for Light-BlueTTS
+==============================================
+Combines two obfuscation techniques:
+  1. Self-Contained Output Scrambling - permanently scrambles layer weights
+     (Conv1d, Linear, Embedding) and injects inverse Gather nodes in the
+     computation graph. The model still produces identical outputs, but
+     stored weights are "poisoned". Uses Dynamic Zero anti-optimizer trick
+     to prevent ONNX Runtime constant folding.
+  2. ONNX Name Obfuscation - randomizes all internal node/tensor/weight names
+     so the graph is unreadable in tools like Netron.
+Usage:
+  python obfuscate_onnx.py --config hebrew/tts.json \\
+    --ttl_ckpt ckpt_step_580000.pt \\
+    --ae_ckpt ae_latest_newer.pt \\
+    --dp_ckpt duration_predictor_final.pt \\
+    --onnx_dir onnx_obfuscated
+"""
+import os
+import argparse
+import glob as glob_mod
+import random
+import string
+import numpy as np
+import torch
+import torch.nn as nn
+import onnx
+from onnx import numpy_helper as onnx_numpy_helper
+# Model imports
+from models.text2latent.text_encoder import TextEncoder
+from models.text2latent.vf_estimator import VectorFieldEstimator
+from models.autoencoder.latent_decoder import LatentDecoder1D
+from models.text2latent.dp_network import DPNetwork
+from models.text2latent.reference_encoder import ReferenceEncoder
+from models.utils import load_ttl_config
+# Reuse ONNX-safe MHA replacement and wrappers from export pipeline
+from export_onnx import (
+    _replace_mha_with_safe,
+    VectorFieldEstimatorWrapper,
+    VectorFieldEstimatorKeysWrapper,
+    export_one,
+)
+# =====================================================================
+# Part 1: Self-Contained Output Scrambling (Weight Poisoning)
+# =====================================================================
+class SelfScrambledConv1d(nn.Module):
+    """
+    Wraps an existing Conv1d (or subclass like CausalConv1d).
+    Permanently scrambles its output-channel weights, and un-scrambles
+    the output at runtime via an inverse permutation index.
+    Uses the "Dynamic Zero" anti-optimizer trick: the inverse permutation
+    indices are added to a runtime-derived zero value (x[0] * 0), creating
+    a data dependency that prevents ONNX Runtime's constant folding from
+    pre-computing the Gather operation. The result is mathematically
+    identical (always +0) but the optimizer cannot prove this statically.
+    """
+    def __init__(self, original_conv):
+        super().__init__()
+        self.conv = original_conv
+        out_channels = self.conv.out_channels
+        perm = torch.randperm(out_channels)
+        inv_perm = torch.empty_like(perm)
+        inv_perm[perm] = torch.arange(out_channels)
+        self.register_buffer('inv_shuffle_indices', inv_perm)
+        with torch.no_grad():
+            self.conv.weight.data = self.conv.weight.data[perm, :, :]
+            if self.conv.bias is not None:
+                self.conv.bias.data = self.conv.bias.data[perm]
+    def forward(self, x):
+        x = self.conv(x)
+        # Anti-optimizer: derive a runtime zero from the data tensor.
+        # Mathematically always 0, but ORT cannot constant-fold it.
+        dynamic_zero = (x.reshape(-1)[0] * 0.0).long()
+        safe_indices = self.inv_shuffle_indices + dynamic_zero
+        x = x[:, safe_indices, :]
+        return x
+class SelfScrambledLinear(nn.Module):
+    """
+    Same principle as SelfScrambledConv1d but for nn.Linear layers.
+    Scrambles output features and immediately un-scrambles them.
+    Uses the same Dynamic Zero anti-optimizer trick.
+    """
+    def __init__(self, original_linear):
+        super().__init__()
+        self.linear = original_linear
+        out_features = self.linear.out_features
+        perm = torch.randperm(out_features)
+        inv_perm = torch.empty_like(perm)
+        inv_perm[perm] = torch.arange(out_features)
+        self.register_buffer('inv_shuffle_indices', inv_perm)
+        with torch.no_grad():
+            self.linear.weight.data = self.linear.weight.data[perm, :]
+            if self.linear.bias is not None:
+                self.linear.bias.data = self.linear.bias.data[perm]
+    def forward(self, x):
+        x = self.linear(x)
+        dynamic_zero = (x.reshape(-1)[0] * 0.0).long()
+        safe_indices = self.inv_shuffle_indices + dynamic_zero
+        x = x[..., safe_indices]
+        return x
+class SelfScrambledEmbedding(nn.Module):
+    """
+    Wraps an existing nn.Embedding. Permanently scrambles the embedding
+    table rows (num_embeddings dimension), and remaps input indices at
+    runtime via an inverse permutation.
+    Uses the Dynamic Zero anti-optimizer trick: the index remap is added to
+    a runtime-derived zero value (x.float()[0] * 0.0), creating a data
+    dependency that prevents ONNX Runtime's constant folding from
+    pre-computing the Gather remap. Mathematically identical (always +0)
+    but the optimizer cannot prove this statically.
+    """
+    def __init__(self, original_embedding):
+        super().__init__()
+        self.embedding = original_embedding
+        num_embeddings = self.embedding.num_embeddings
+        perm = torch.randperm(num_embeddings)
+        inv_perm = torch.empty_like(perm)
+        inv_perm[perm] = torch.arange(num_embeddings)
+        self.register_buffer('inv_shuffle_indices', inv_perm)
+        with torch.no_grad():
+            self.embedding.weight.data = self.embedding.weight.data[perm, :]
+    def forward(self, x):
+        # Anti-optimizer: derive a runtime zero tied to the dynamic input x.
+        # Cast int indices to float for the zero derivation, then back to long.
+        dynamic_zero = (x.float().reshape(-1)[0] * 0.0).long()
+        safe_indices = self.inv_shuffle_indices + dynamic_zero
+        return self.embedding(safe_indices[x])
+def scramble_all_layers(module, scramble_linear=False, scramble_embedding=False, _depth=0):
+    """
+    Recursively replace layers with their SelfScrambled equivalents.
+    Handles:
+      - nn.Conv1d (groups==1, out_channels>1) -> SelfScrambledConv1d
+      - nn.Linear (out_features>1, if scramble_linear=True) -> SelfScrambledLinear
+      - nn.Embedding (num_embeddings>1, if scramble_embedding=True) -> SelfScrambledEmbedding
+    Skips grouped/depthwise convolutions (groups>1) to avoid breaking them.
+    Returns count of scrambled layers.
+    """
+    count = 0
+    for name, child in list(module.named_children()):
+        # Conv1d (including subclasses like CausalConv1d)
+        if isinstance(child, nn.Conv1d) and child.groups == 1 and child.out_channels > 1:
+            setattr(module, name, SelfScrambledConv1d(child))
+            count += 1
+        elif scramble_linear and isinstance(child, nn.Linear) and child.out_features > 1:
+            setattr(module, name, SelfScrambledLinear(child))
+            count += 1
+        elif scramble_embedding and isinstance(child, nn.Embedding) and child.num_embeddings > 1:
+            setattr(module, name, SelfScrambledEmbedding(child))
+            count += 1
+        else:
+            # Recurse into containers (Sequential, ModuleList, custom blocks, etc.)
+            count += scramble_all_layers(child, scramble_linear=scramble_linear,
+                                         scramble_embedding=scramble_embedding, _depth=_depth + 1)
+    return count
+# =====================================================================
+# Part 2: ONNX Name Obfuscation
+# =====================================================================
+def _random_name(length=12):
+    """Generate a random alphanumeric string."""
+    return ''.join(random.choices(string.ascii_letters + string.digits, k=length))
+def obfuscate_onnx_names(input_path, output_path=None, keep_io_names=True):
+    """
+    Load an ONNX model and replace all internal names recursively (including
+    inside If/Loop subgraphs) with random strings.
+    """
+    if output_path is None:
+        output_path = input_path
+    model = onnx.load(input_path)
+    name_map = {}
+    preserved = set()
+    # 1. Preserve main I/O names so inference doesn't break
+    if keep_io_names:
+        for inp in model.graph.input: preserved.add(inp.name)
+        for out in model.graph.output: preserved.add(out.name)
+    def remap(old_name):
+        if old_name == "": return ""
+        if old_name in preserved: return old_name
+        if old_name not in name_map:
+            name_map[old_name] = _random_name()
+        return name_map[old_name]
+    def process_graph(g):
+        """Recursively process a graph and all its subgraphs."""
+        # Value info
+        for vi in g.value_info:
+            vi.name = remap(vi.name)
+        # Initializers
+        for init in g.initializer:
+            init.name = remap(init.name)
+        # Graph inputs and outputs (handles both main graph and subgraphs safely)
+        for inp in g.input:
+            inp.name = remap(inp.name)
+        for out in g.output:
+            out.name = remap(out.name)
+        # Nodes
+        for node in g.node:
+            if node.name:
+                node.name = remap(node.name)
+            for i, n in enumerate(node.input):
+                node.input[i] = remap(n)
+            for i, n in enumerate(node.output):
+                node.output[i] = remap(n)
+            # --- CRITICAL FIX: Recurse into Subgraphs (If, Loop, etc.) ---
+            for attr in node.attribute:
+                if attr.type == onnx.AttributeProto.GRAPH:
+                    process_graph(attr.g)
+                elif attr.type == onnx.AttributeProto.GRAPHS:
+                    for sub_g in attr.graphs:
+                        process_graph(sub_g)
+    # Run the recursive obfuscator starting at the top-level graph
+    process_graph(model.graph)
+    onnx.save(model, output_path)
+    return len(name_map)
+# =====================================================================
+# Part 2b: Shuffle Key Extraction (Optimizer-Proof)
+# =====================================================================
+def extract_shuffle_keys(onnx_path):
+    """
+    Extract inv_shuffle_indices arrays from ONNX initializers and convert
+    them to dynamic graph inputs.
+    Because the indices are no longer embedded as Constants, ONNX Runtime's
+    graph optimizer cannot constant-fold the Gather nodes away. The model
+    file stays permanently scrambled even if an attacker runs:
+        sess_options.graph_optimization_level = ORT_ENABLE_ALL
+        sess_options.optimized_model_filepath = "cracked.onnx"
+    The extracted arrays must be fed at inference time via keys.npz.
+    Returns:
+        dict of {input_name: numpy_array} for the extracted keys.
+    """
+    model = onnx.load(onnx_path)
+    graph = model.graph
+    extracted = {}
+    to_remove = []
+    for init in graph.initializer:
+        if "inv_shuffle_indices" in init.name:
+            arr = onnx_numpy_helper.to_array(init)
+            extracted[init.name] = arr
+            to_remove.append(init)
+    if not to_remove:
+        return extracted
+    # Remove from initializers (no longer a constant in the file)
+    for init in to_remove:
+        graph.initializer.remove(init)
+    # Ensure each extracted key is registered as a dynamic graph input
+    existing_inputs = {inp.name for inp in graph.input}
+    for name, arr in extracted.items():
+        if name not in existing_inputs:
+            inp = onnx.helper.make_tensor_value_info(
+                name, onnx.TensorProto.INT64, list(arr.shape)
+            )
+            graph.input.append(inp)
+    onnx.save(model, onnx_path)
+    return extracted
+# =====================================================================
+# Part 3: Full Obfuscation Pipeline
+# =====================================================================
+def get_latest_ckpt(dir_path):
+    """Find the latest checkpoint (by step number or mtime) in a directory."""
+    ckpt_step = glob_mod.glob(os.path.join(dir_path, "ckpt_step_*.pt"))
+    if ckpt_step:
+        def step_num(p):
+            try:
+                return int(os.path.basename(p).split("ckpt_step_")[-1].split(".pt")[0])
+            except Exception:
+                return -1
+        ckpt_step.sort(key=step_num)
+        return ckpt_step[-1]
+    ckpts = glob_mod.glob(os.path.join(dir_path, "*.pt"))
+    return max(ckpts, key=os.path.getmtime) if ckpts else None
+def main():
+    parser = argparse.ArgumentParser(
+        description="Obfuscate Light-BlueTTS models: weight scrambling + ONNX name randomization"
+    )
+    parser.add_argument("--config", type=str, default="hebrew/tts.json",
+                        help="Path to tts.json config")
+    parser.add_argument("--onnx_dir", type=str, default="onnx_obfuscated",
+                        help="Output directory for obfuscated ONNX models")
+    parser.add_argument("--ckpt_dir", type=str, default=None,
+                        help="Text2Latent checkpoint dir (auto-finds latest ckpt_step_*.pt)")
+    parser.add_argument("--ttl_ckpt", type=str, default=None,
+                        help="Explicit TTL checkpoint file (overrides --ckpt_dir)")
+    parser.add_argument("--ae_ckpt", type=str, default="ae_latest_newer.pt",
+                        help="AutoEncoder checkpoint (.pt)")
+    parser.add_argument("--dp_ckpt", type=str, default="duration_predictor_final.pt",
+                        help="Duration Predictor checkpoint (.pt)")
+    parser.add_argument("--scramble-linear", action="store_true",
+                        help="Also scramble nn.Linear layers (extra obfuscation)")
+    parser.add_argument("--scramble-embedding", action="store_true",
+                        help="Also scramble nn.Embedding layers (token table obfuscation)")
+    parser.add_argument("--scramble-all", action="store_true",
+                        help="Scramble all supported layer types (Conv1d + Linear + Embedding)")
+    parser.add_argument("--no-name-obfuscation", action="store_true",
+                        help="Skip ONNX name randomization (only do weight scrambling)")
+    parser.add_argument("--extract-keys", action="store_true",
+                        help="Also extract shuffle keys to keys.npz (defense-in-depth, not needed with Dynamic Zero)")
+    args = parser.parse_args()
+    device = "cpu"
+    do_name_obfuscation = not args.no_name_obfuscation
+    do_key_extraction = args.extract_keys
+    # Resolve scrambling flags (--scramble-all enables everything)
+    scramble_linear = args.scramble_linear or args.scramble_all
+    scramble_embedding = args.scramble_embedding or args.scramble_all
+    # ---- Load Config ----
+    if not os.path.exists(args.config):
+        print(f"[ERROR] Config not found: {args.config}")
+        return
+    cfg = load_ttl_config(args.config)
+    print(f"[INFO] Loaded config: {args.config}")
+    os.makedirs(args.onnx_dir, exist_ok=True)
+    # ---- Find Checkpoints ----
+    # TTL checkpoint
+    ttl_ckpt = args.ttl_ckpt
+    if ttl_ckpt is None and args.ckpt_dir:
+        ttl_ckpt = get_latest_ckpt(args.ckpt_dir)
+    if ttl_ckpt is None:
+        # Try to find .pt files matching ckpt_step_*.pt in current dir
+        candidates = glob_mod.glob("ckpt_step_*.pt")
+        if candidates:
+            candidates.sort()
+            ttl_ckpt = candidates[-1]
+    ae_ckpt = args.ae_ckpt
+    dp_ckpt = args.dp_ckpt
+    print(f"[INFO] TTL checkpoint: {ttl_ckpt or '(none - random weights)'}")
+    print(f"[INFO] AE  checkpoint: {ae_ckpt}")
+    print(f"[INFO] DP  checkpoint: {dp_ckpt}")
+    # ---- Load Checkpoint State Dicts ----
+    t2l_state = torch.load(ttl_ckpt, map_location=device) if ttl_ckpt and os.path.exists(ttl_ckpt) else {}
+    ae_state = torch.load(ae_ckpt, map_location=device) if os.path.exists(ae_ckpt) else {}
+    dp_state_raw = torch.load(dp_ckpt, map_location=device) if os.path.exists(dp_ckpt) else None
+    # ---- Dimensions from config ----
+    vocab_size = cfg["vocab_size"]
+    compressed_channels = cfg["compressed_channels"]
+    latent_dim = cfg["latent_dim"]
+    chunk_compress_factor = cfg["chunk_compress_factor"]
+    te_d_model = cfg["te_d_model"]
+    se_d_model = cfg["se_d_model"]
+    se_n_style = cfg["se_n_style"]
+    total_scrambled = 0
+    exported_files = []
+    # ==============================================================
+    # 1. Reference Encoder
+    # ==============================================================
+    print("\n[1/5] Reference Encoder")
+    ref_enc = ReferenceEncoder(
+        in_channels=compressed_channels,
+        d_model=se_d_model,
+        hidden_dim=cfg["se_hidden_dim"],
+        num_blocks=cfg["se_num_blocks"],
+        num_tokens=se_n_style,
+        num_heads=cfg["se_n_heads"],
+    ).to(device).eval()
+    if "reference_encoder" in t2l_state:
+        ref_enc.load_state_dict(t2l_state["reference_encoder"], strict=True)
+    _replace_mha_with_safe(ref_enc)
+    n = scramble_all_layers(ref_enc, scramble_linear=scramble_linear, scramble_embedding=scramble_embedding)
+    total_scrambled += n
+    print(f"  Scrambled {n} layers")
+    B, C_lat, T_audio_ref, T_text, T_lat = 1, compressed_channels, 256, 32, 100
+    z_ref = torch.randn(B, C_lat, T_audio_ref, device=device)
+    ref_mask = torch.ones(B, 1, T_audio_ref, device=device)
+    ref_path = os.path.join(args.onnx_dir, "reference_encoder.onnx")
+    export_one(ref_enc, ref_path, (z_ref, ref_mask),
+               input_names=["z_ref", "mask"],
+               output_names=["ref_values", "ref_keys"],
+               dynamic_axes={"z_ref": {2: "T_ref_in"}, "mask": {2: "T_ref_in"}})
+    exported_files.append(ref_path)
+    # ==============================================================
+    # 2. Text Encoder
+    # ==============================================================
+    print("\n[2/5] Text Encoder")
+    text_enc = TextEncoder(
+        vocab_size=vocab_size,
+        d_model=te_d_model,
+        n_conv_layers=cfg["te_convnext_layers"],
+        n_attn_layers=cfg["te_attn_n_layers"],
+        expansion_factor=cfg["te_expansion_factor"],
+        p_dropout=cfg["te_attn_p_dropout"],
+    ).to(device).eval()
+    if "text_encoder" in t2l_state:
+        text_enc.load_state_dict(t2l_state["text_encoder"], strict=True)
+    n = scramble_all_layers(text_enc, scramble_linear=scramble_linear, scramble_embedding=scramble_embedding)
+    total_scrambled += n
+    print(f"  Scrambled {n} layers")
+    text_ids = torch.zeros(B, T_text, dtype=torch.long, device=device)
+    text_mask = torch.ones(B, 1, T_text, device=device)
+    style_ttl = torch.randn(B, se_n_style, se_d_model, device=device)
+    te_path = os.path.join(args.onnx_dir, "text_encoder.onnx")
+    export_one(text_enc, te_path, (text_ids, style_ttl, text_mask),
+               input_names=["text_ids", "style_ttl", "text_mask"],
+               output_names=["text_emb"],
+               dynamic_axes={
+                   "text_ids": {1: "T_text"}, "style_ttl": {1: "T_ref"},
+                   "text_mask": {2: "T_text"}, "text_emb": {2: "T_text"},
+               })
+    exported_files.append(te_path)
+    # ==============================================================
+    # 3. Vector Field Estimator (two variants)
+    # ==============================================================
+    print("\n[3/5] Vector Field Estimator")
+    vf = VectorFieldEstimator(
+        in_channels=compressed_channels,
+        out_channels=compressed_channels,
+        hidden_channels=cfg["vf_hidden"],
+        text_dim=cfg["vf_text_dim"],
+        style_dim=cfg["vf_style_dim"],
+        num_style_tokens=se_n_style,
+        num_superblocks=cfg["vf_n_blocks"],
+        time_embed_dim=cfg["vf_time_dim"],
+        rope_gamma=cfg["vf_rotary_scale"],
+    ).to(device).eval()
+    if "vf_estimator" in t2l_state:
+        vf.load_state_dict(t2l_state["vf_estimator"], strict=False)
+    # Sync baked-in style key with text encoder
+    with torch.no_grad():
+        vf.style_key.copy_(text_enc.ref_keys)
+    n = scramble_all_layers(vf, scramble_linear=scramble_linear, scramble_embedding=scramble_embedding)
+    total_scrambled += n
+    print(f"  Scrambled {n} layers")
+    noisy_latent = torch.randn(B, C_lat, T_lat, device=device)
+    latent_mask = torch.ones(B, 1, T_lat, device=device)
+    text_emb = torch.randn(B, se_d_model, T_text, device=device)
+    current_step = torch.tensor([0.0], device=device)
+    total_step = torch.tensor([1.0], device=device)
+    # Variant A: no style_keys input
+    vf_wrapped = VectorFieldEstimatorWrapper(vf)
+    vf_path = os.path.join(args.onnx_dir, "vector_estimator.onnx")
+    vf_inputs = (noisy_latent, text_emb, style_ttl, latent_mask, text_mask, current_step, total_step)
+    vf_names = ["noisy_latent", "text_emb", "style_ttl", "latent_mask", "text_mask", "current_step", "total_step"]
+    export_one(vf_wrapped, vf_path, vf_inputs,
+               input_names=vf_names, output_names=["denoised_latent"],
+               dynamic_axes={
+                   "noisy_latent": {2: "T_lat"}, "text_emb": {2: "T_text"},
+                   "style_ttl": {1: "T_ref"}, "latent_mask": {2: "T_lat"},
+                   "text_mask": {2: "T_text"}, "denoised_latent": {2: "T_lat"},
+               })
+    exported_files.append(vf_path)
+    # Variant B: with style_keys input (for CFG)
+    style_keys_dummy = text_enc.ref_keys.expand(B, -1, -1).to(device)
+    vf_keys_wrapped = VectorFieldEstimatorKeysWrapper(vf)
+    vfk_path = os.path.join(args.onnx_dir, "vector_estimator_keys.onnx")
+    vfk_inputs = (noisy_latent, text_emb, style_ttl, style_keys_dummy, latent_mask, text_mask, current_step, total_step)
+    vfk_names = ["noisy_latent", "text_emb", "style_ttl", "style_keys", "latent_mask", "text_mask", "current_step", "total_step"]
+    export_one(vf_keys_wrapped, vfk_path, vfk_inputs,
+               input_names=vfk_names, output_names=["denoised_latent"],
+               dynamic_axes={
+                   "noisy_latent": {2: "T_lat"}, "text_emb": {2: "T_text"},
+                   "style_ttl": {1: "T_ref"}, "style_keys": {1: "T_ref"},
+                   "latent_mask": {2: "T_lat"}, "text_mask": {2: "T_text"},
+                   "denoised_latent": {2: "T_lat"},
+               })
+    exported_files.append(vfk_path)
+    # ==============================================================
+    # 4. Vocoder (Latent Decoder)
+    # ==============================================================
+    print("\n[4/5] Vocoder")
+    ae_dec_cfg = cfg["ae_dec_cfg"]
+    vocoder = LatentDecoder1D(cfg=ae_dec_cfg).to(device).eval()
+    if "decoder" in ae_state:
+        vocoder.load_state_dict(ae_state["decoder"], strict=True)
+    n = scramble_all_layers(vocoder, scramble_linear=scramble_linear, scramble_embedding=scramble_embedding)
+    total_scrambled += n
+    print(f"  Scrambled {n} layers")
+    C_dec = latent_dim
+    latent_dec = torch.randn(B, C_dec, T_lat * chunk_compress_factor, device=device)
+    voc_path = os.path.join(args.onnx_dir, "vocoder.onnx")
+    export_one(vocoder, voc_path, (latent_dec,),
+               input_names=["latent"], output_names=["waveform"],
+               dynamic_axes={"latent": {2: "T_dec"}, "waveform": {2: "T_wav"}})
+    exported_files.append(voc_path)
+    # ==============================================================
+    # 5. Duration Predictor (two variants)
+    # ==============================================================
+    print("\n[5/5] Duration Predictor")
+    dp_style_tokens = cfg["dp_style_tokens"]
+    dp_style_dim = cfg["dp_style_dim"]
+    dp = DPNetwork(
+        vocab_size=cfg["dp_vocab_size"],
+        style_tokens=dp_style_tokens,
+        style_dim=dp_style_dim,
+    ).to(device).eval()
+    if dp_state_raw is not None:
+        ds = dp_state_raw
+        if isinstance(ds, dict) and "state_dict" in ds:
+            ds = ds["state_dict"]
+        dp.load_state_dict(ds, strict=False)
+    elif "dp_network" in t2l_state:
+        dp.load_state_dict(t2l_state["dp_network"], strict=True)
+    elif "dp_model" in t2l_state:
+        dp.load_state_dict(t2l_state["dp_model"], strict=True)
+    _replace_mha_with_safe(dp)
+    n = scramble_all_layers(dp, scramble_linear=scramble_linear, scramble_embedding=scramble_embedding)
+    total_scrambled += n
+    print(f"  Scrambled {n} layers")
+    # Standard path (z_ref)
+    dp_path = os.path.join(args.onnx_dir, "duration_predictor.onnx")
+    dp_inputs = (text_ids, z_ref, text_mask, ref_mask)
+    dp_names = ["text_ids", "z_ref", "text_mask", "ref_mask"]
+    export_one(dp, dp_path, dp_inputs,
+               input_names=dp_names, output_names=["duration"],
+               dynamic_axes={
+                   "text_ids": {1: "T_text"}, "text_mask": {2: "T_text"},
+                   "z_ref": {2: "T_ref_audio"}, "ref_mask": {2: "T_ref_audio"},
+               })
+    exported_files.append(dp_path)
+    # Style path (pre-computed style tokens)
+    class DPStyleWrapper(nn.Module):
+        """Wrap DPNetwork for the style_tokens input path (no z_ref)."""
+        def __init__(self, dp_model):
+            super().__init__()
+            self.dp = dp_model
+        def forward(self, text_ids, style_dp, text_mask):
+            return self.dp(text_ids, text_mask=text_mask, style_tokens=style_dp)
+    dp_style_wrapper = DPStyleWrapper(dp).eval()
+    style_dp_dummy = torch.randn(B, dp_style_tokens, dp_style_dim, device=device)
+    dp_style_path = os.path.join(args.onnx_dir, "duration_predictor_style.onnx")
+    dp_style_inputs = (text_ids, style_dp_dummy, text_mask)
+    dp_style_names = ["text_ids", "style_dp", "text_mask"]
+    export_one(dp_style_wrapper, dp_style_path, dp_style_inputs,
+               input_names=dp_style_names, output_names=["duration"],
+               dynamic_axes={"text_ids": {1: "T_text"}, "text_mask": {2: "T_text"}})
+    exported_files.append(dp_style_path)
+    # ==============================================================
+    # Unconditional Tokens (for CFG)
+    # ==============================================================
+    print("\nExporting uncond.npz...")
+    uncond_data = {}
+    for key in ("u_text", "u_ref", "u_keys"):
+        if key in t2l_state:
+            uncond_data[key] = t2l_state[key].cpu().numpy()
+    with torch.no_grad():
+        uncond_data["cond_keys"] = text_enc.ref_keys.cpu().numpy()
+    if uncond_data:
+        np.savez(os.path.join(args.onnx_dir, "uncond.npz"), **uncond_data)
+        print(f"[OK] Saved uncond.npz")
+    # ==============================================================
+    # Extract Shuffle Keys (Optimizer-Proof)
+    # ==============================================================
+    total_keys_extracted = 0
+    if do_key_extraction:
+        print("\nExtracting shuffle keys from models...")
+        all_extracted_keys = {}
+        for fpath in exported_files:
+            model_name = os.path.splitext(os.path.basename(fpath))[0]
+            keys = extract_shuffle_keys(fpath)
+            for input_name, arr in keys.items():
+                all_extracted_keys[f"{model_name}/{input_name}"] = arr
+            if keys:
+                print(f"  {model_name}: {len(keys)} key arrays extracted")
+            total_keys_extracted += len(keys)
+        if all_extracted_keys:
+            keys_path = os.path.join(args.onnx_dir, "keys.npz")
+            np.savez(keys_path, **all_extracted_keys)
+            print(f"  Saved {total_keys_extracted} total keys to keys.npz")
+    else:
+        print("\nSkipping shuffle key extraction (--no-key-extraction).")
+    # ==============================================================
+    # Apply ONNX Name Obfuscation
+    # ==============================================================
+    total_names_randomized = 0
+    if do_name_obfuscation:
+        print("\nApplying ONNX name obfuscation...")
+        for fpath in exported_files:
+            n_names = obfuscate_onnx_names(fpath, fpath, keep_io_names=True)
+            total_names_randomized += n_names
+            print(f"  {os.path.basename(fpath)}: {n_names} names randomized")
+    # ==============================================================
+    # Summary
+    # ==============================================================
+    print("\n" + "=" * 60)
+    print("OBFUSCATION COMPLETE")
+    print("=" * 60)
+    print(f"  Layers scrambled         : {total_scrambled}")
+    print(f"    (Conv1d always, Linear={'ON' if scramble_linear else 'OFF'}, Embedding={'ON' if scramble_embedding else 'OFF'})")
+    print(f"  ONNX files exported      : {len(exported_files)}")
+    if do_key_extraction:
+        print(f"  Shuffle keys extracted   : {total_keys_extracted}")
+    if do_name_obfuscation:
+        print(f"  Internal names randomized: {total_names_randomized}")
+    print(f"  Output directory         : {args.onnx_dir}/")
+    print()
+    print("Weight poisoning: Layer weights are permanently permuted.")
+    print("Gather injection: Every scrambled layer has an inverse-Gather node.")
+    if do_key_extraction:
+        print("Key extraction : Shuffle indices moved to keys.npz (optimizer-proof).")
+        print("                 Models REQUIRE keys.npz at inference time.")
+    else:
+        print("WARNING: Shuffle keys are embedded as constants (vulnerable to ORT optimizer).")
+    if do_name_obfuscation:
+        print("Name obfuscation: All internal node/tensor names are random strings.")
+    print("I/O tensor names are preserved for inference compatibility.")
+    print("=" * 60)
+if __name__ == "__main__":
+    main()

reference_encoder.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:775ed3896b688411d37934edc9b827dc20e676c11fae78baa77ad29bb1f1dbdb
+size 24416182

stats.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bcb4f3cf96356860dea9e161fe4f5704b19d76467826611e3548891ea58986c2
+size 1920

text_encoder.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c70b6d4983b0e96e157ac4f4672834c7034991dfdda40d1d55b3ed26edd55ed3
+size 27745455

uncond.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3681ac7a4959a1217fd9af26a93fe653df5c3d7a74261d843cb2ebcd0fbef79c
+size 155626

vocoder.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d1df1cee4f4205ed5d10accec49d295a3521cdd7c04fcf0db3bfd926c78bd96d
+size 101638298