| { | |
| "model_type": "wav2vec2-a2e", | |
| "task": "audio-to-expression", | |
| "framework": "onnx", | |
| "opset_version": 14, | |
| "min_ort_version": "1.17.0", | |
| "sample_rate": 16000, | |
| "input_samples": 16000, | |
| "output_fps": 30, | |
| "num_blendshapes": 52, | |
| "blendshape_standard": "ARKit", | |
| "parameters": 100528020, | |
| "upstream": { | |
| "repo": "https://github.com/aigc3d/LAM_Audio2Expression", | |
| "paper": "LAM: Large Avatar Model for One-Shot Animatable Gaussian Head", | |
| "venue": "SIGGRAPH 2025", | |
| "license": "Apache-2.0" | |
| }, | |
| "inputs": { | |
| "audio": { | |
| "shape": [ | |
| "batch", | |
| "samples" | |
| ], | |
| "dtype": "float32", | |
| "description": "Raw audio at 16kHz. Use 16000 samples (1s) for 30fps output." | |
| }, | |
| "identity": { | |
| "shape": [ | |
| "batch", | |
| 12 | |
| ], | |
| "dtype": "float32", | |
| "description": "One-hot identity vector. 12 classes, use [1,0,...,0] for neutral." | |
| } | |
| }, | |
| "outputs": { | |
| "blendshapes": { | |
| "shape": [ | |
| "batch", | |
| 30, | |
| 52 | |
| ], | |
| "dtype": "float32", | |
| "description": "ARKit blendshape weights at 30fps" | |
| }, | |
| "asr_logits": { | |
| "shape": [ | |
| "batch", | |
| 49, | |
| 32 | |
| ], | |
| "dtype": "float32", | |
| "description": "CTC ASR logits at 50fps (~24K params auxiliary head)" | |
| } | |
| }, | |
| "recommended": { | |
| "path": "model_fp16.onnx", | |
| "format": "external_data", | |
| "precision": "float16_surgical", | |
| "graph_size_kb": 385, | |
| "weights_size_mb": 192, | |
| "conversion": "Surgical fp16: decomposed LayerNorm subgraphs kept in fp32", | |
| "fidelity": "cosine >0.9999 vs fp32, magnitude ratio 0.998-1.002", | |
| "backends": [ | |
| "webgpu", | |
| "wasm" | |
| ] | |
| }, | |
| "variants": { | |
| "fp16_surgical": { | |
| "path": "model_fp16.onnx", | |
| "format": "external_data", | |
| "size_mb": 192, | |
| "precision": "float16", | |
| "note": "Recommended. Decomposed LayerNorm preserved in fp32.", | |
| "backends": [ | |
| "webgpu", | |
| "wasm" | |
| ] | |
| }, | |
| "fp32": { | |
| "path": "fp32/model.onnx", | |
| "format": "external_data", | |
| "size_mb": 384, | |
| "precision": "float32", | |
| "backends": [ | |
| "webgpu", | |
| "wasm" | |
| ] | |
| }, | |
| "fp32_single_file": { | |
| "path": "model.onnx", | |
| "format": "single_file", | |
| "size_mb": 384, | |
| "precision": "float32", | |
| "note": "Legacy backwards-compat. Prefer external data variants.", | |
| "backends": [ | |
| "webgpu", | |
| "wasm" | |
| ] | |
| }, | |
| "fp16_naive": { | |
| "path": "fp16/model.onnx", | |
| "format": "external_data", | |
| "size_mb": 192, | |
| "precision": "float16", | |
| "note": "Superseded by root model_fp16.onnx (surgical conversion).", | |
| "backends": [ | |
| "webgpu", | |
| "wasm" | |
| ] | |
| }, | |
| "int8": { | |
| "path": "int8/model.onnx", | |
| "format": "external_data", | |
| "size_mb": 97, | |
| "precision": "int8_dynamic", | |
| "note": "NOT RECOMMENDED. Visibly degraded output. Wav2Vec2 weights too sensitive for int8.", | |
| "backends": [ | |
| "wasm" | |
| ] | |
| } | |
| }, | |
| "blendshape_names": [ | |
| "eyeBlinkLeft", | |
| "eyeLookDownLeft", | |
| "eyeLookInLeft", | |
| "eyeLookOutLeft", | |
| "eyeLookUpLeft", | |
| "eyeSquintLeft", | |
| "eyeWideLeft", | |
| "eyeBlinkRight", | |
| "eyeLookDownRight", | |
| "eyeLookInRight", | |
| "eyeLookOutRight", | |
| "eyeLookUpRight", | |
| "eyeSquintRight", | |
| "eyeWideRight", | |
| "jawForward", | |
| "jawLeft", | |
| "jawRight", | |
| "jawOpen", | |
| "mouthClose", | |
| "mouthFunnel", | |
| "mouthPucker", | |
| "mouthLeft", | |
| "mouthRight", | |
| "mouthSmileLeft", | |
| "mouthSmileRight", | |
| "mouthFrownLeft", | |
| "mouthFrownRight", | |
| "mouthDimpleLeft", | |
| "mouthDimpleRight", | |
| "mouthStretchLeft", | |
| "mouthStretchRight", | |
| "mouthRollLower", | |
| "mouthRollUpper", | |
| "mouthShrugLower", | |
| "mouthShrugUpper", | |
| "mouthPressLeft", | |
| "mouthPressRight", | |
| "mouthLowerDownLeft", | |
| "mouthLowerDownRight", | |
| "mouthUpperUpLeft", | |
| "mouthUpperUpRight", | |
| "browDownLeft", | |
| "browDownRight", | |
| "browInnerUp", | |
| "browOuterUpLeft", | |
| "browOuterUpRight", | |
| "cheekPuff", | |
| "cheekSquintLeft", | |
| "cheekSquintRight", | |
| "noseSneerLeft", | |
| "noseSneerRight", | |
| "tongueOut" | |
| ] | |
| } | |