AEmotionStudio commited on
Commit
64e6ccd
·
verified ·
1 Parent(s): dd6a5d7

Upload config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +92 -0
config.json ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "in_channels": 768,
3
+ "audio_codec": {
4
+ "encoder_dim": 64,
5
+ "encoder_rates": [
6
+ 2,
7
+ 8,
8
+ 10,
9
+ 12
10
+ ],
11
+ "latent_dim": 1024,
12
+ "decoder_dim": 1536,
13
+ "decoder_rates": [
14
+ 12,
15
+ 10,
16
+ 8,
17
+ 2
18
+ ],
19
+ "n_codebooks": 16,
20
+ "codebook_size": 1024,
21
+ "codebook_dim": 128,
22
+ "quantizer_dropout": false,
23
+ "sample_rate": 48000,
24
+ "mean": 0.0,
25
+ "std": 1.0
26
+ },
27
+ "text_encoder": {
28
+ "dim": 768,
29
+ "name": "t5-base",
30
+ "max_length": 512,
31
+ "pad_mode": "longest"
32
+ },
33
+ "vision_encoder": {
34
+ "dim": 1024,
35
+ "batch_size": 300,
36
+ "name": "PE-Core-L14-336",
37
+ "normalize_feature": true,
38
+ "interpolation_mode": "BICUBIC",
39
+ "image_size": 336
40
+ },
41
+ "transformer": {
42
+ "dim": 2816,
43
+ "n_heads": 22,
44
+ "n_layers": 22,
45
+ "dropout": 0.1,
46
+ "norm_eps": 1e-05,
47
+ "qk_norm": true,
48
+ "fc_bias": false,
49
+ "ffn_exp": 4,
50
+ "ffn_dim_multiplier": 1,
51
+ "multiple_of": 64,
52
+ "non_linearity": "swiglu",
53
+ "use_rope": true,
54
+ "max_positions": 10000,
55
+ "frequency_embedding_dim": 256,
56
+ "timestep_non_linearity": "swiglu",
57
+ "t_block_non_linearity": "silu",
58
+ "t_block_bias": true,
59
+ "context_dim": 2816,
60
+ "context_non_linearity": "swiglu",
61
+ "context_embedder_dropout": 0.0,
62
+ "context_norm": false,
63
+ "out_channels": 256,
64
+ "in_channels": null
65
+ },
66
+ "num_anchors": 3,
67
+ "anchor_embedding_dim": 128,
68
+ "visual_ranker": {
69
+ "checkpoint": null,
70
+ "kind": "imagebind"
71
+ },
72
+ "text_ranker": {
73
+ "rankers": {
74
+ "clap": [
75
+ {
76
+ "checkpoint": null,
77
+ "kind": "clap"
78
+ },
79
+ 5.0
80
+ ],
81
+ "judge": [
82
+ {
83
+ "checkpoint_or_model_id": "facebook/sam-audio-judge",
84
+ "kind": "judge"
85
+ },
86
+ 1.0
87
+ ]
88
+ },
89
+ "kind": "ensemble"
90
+ },
91
+ "span_predictor": "pe-a-frame-large"
92
+ }