File size: 2,430 Bytes
28e2d77
d5cfa8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from transformers import PretrainedConfig


class AuroraConfig(PretrainedConfig):
    model_type = "aurora"

    def __init__(
            self,
            token_len: int = 48,
            hidden_size: int = 512,
            intermediate_size: int = 1024,
            num_enc_layers: int = 12,
            num_dec_layers: int = 12,
            num_attention_heads: int = 8,
            hidden_act: str = "silu",
            rope_theta: int = 10000,
            dropout_rate: float = 0.2,
            max_position_embeddings: int = 10000,
            num_sampling_steps: int = 50,
            flow_loss_depth: int = 3,
            diffusion_batch_mul: int = 4,
            threshold_ratio: list[float] = [0.2, 0.3, 0.4, 0.5],
            mask_ratio: float = 0.5,
            norm_mode: str = 'batch',
            num_prototypes: int = 1024,
            num_retriever_enc_layers: int = 1,
            num_retriever_dec_layers: int = 1,
            num_text_cross_layers: int = 1,
            num_vision_cross_layers: int = 1,
            num_text_connect_layers: int = 1,
            num_vision_connect_layers: int = 1,
            num_distill: int = 10,
            **kwargs,
    ):
        self.token_len = token_len
        self.hidden_size = hidden_size
        self.intermediate_size = intermediate_size
        self.num_enc_layers = num_enc_layers
        self.num_dec_layers = num_dec_layers
        self.num_attention_heads = num_attention_heads
        self.hidden_act = hidden_act
        self.rope_theta = rope_theta
        self.dropout_rate = dropout_rate
        self.max_position_embeddings = max_position_embeddings
        self.num_sampling_steps = num_sampling_steps
        self.flow_loss_depth = flow_loss_depth
        self.diffusion_batch_mul = diffusion_batch_mul
        self.threshold_ratio = threshold_ratio
        self.mask_ratio = mask_ratio
        self.norm_mode = norm_mode
        self.num_prototypes = num_prototypes
        self.num_retriever_enc_layers = num_retriever_enc_layers
        self.num_retriever_dec_layers = num_retriever_dec_layers
        self.num_text_cross_layers = num_text_cross_layers
        self.num_vision_cross_layers = num_vision_cross_layers
        self.num_text_connect_layers = num_text_connect_layers
        self.num_vision_connect_layers = num_vision_connect_layers
        self.num_distill = num_distill

        super().__init__(
            **kwargs,
        )