AaronCIH commited on
Commit
02c6a81
·
verified ·
1 Parent(s): c02d8b6

Upload folder using huggingface_hub

Browse files
RAR/checkpoints/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_attn_implementation_autoset": true,
3
+ "_name_or_path": "/home/work/shared-fi-datasets-01/users/hsiang.chen/Project/ModelZoo/LLM/vicuna/vicuna-7b-v1.5",
4
+ "architectures": [
5
+ "LlamaForCausalLM"
6
+ ],
7
+ "attention_bias": false,
8
+ "attention_dropout": 0.0,
9
+ "bos_token_id": 1,
10
+ "eos_token_id": 2,
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 4096,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 11008,
16
+ "max_position_embeddings": 4096,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "num_attention_heads": 32,
20
+ "num_hidden_layers": 32,
21
+ "num_key_value_heads": 32,
22
+ "pad_token_id": 0,
23
+ "pretraining_tp": 1,
24
+ "rms_norm_eps": 1e-05,
25
+ "rope_scaling": null,
26
+ "rope_theta": 10000.0,
27
+ "tie_word_embeddings": false,
28
+ "torch_dtype": "float32",
29
+ "transformers_version": "4.49.0",
30
+ "use_cache": true,
31
+ "vocab_size": 32000
32
+ }
RAR/checkpoints/epoch_5_step_60506_weight.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:964dbb146117044040fad4f15b35149909279ec99228b6c0017de269df55a2d7
3
+ size 4824028258
RAR/checkpoints/epoch_6_step_65000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46f3ef2e3be6be302a2989175ea699112c1abe3b4fe22bd0eedf53792c2c7b0c
3
+ size 28780817146
RAR/checkpoints/latest.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46f3ef2e3be6be302a2989175ea699112c1abe3b4fe22bd0eedf53792c2c7b0c
3
+ size 28780817146
RAR/checkpoints/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
RAR/checkpoints/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
RAR/checkpoints/tokenizer_config.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": true,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "extra_special_tokens": {},
35
+ "legacy": false,
36
+ "model_max_length": 4096,
37
+ "pad_token": "</s>",
38
+ "padding_side": "right",
39
+ "sp_model_kwargs": {},
40
+ "spaces_between_special_tokens": false,
41
+ "tokenizer_class": "LlamaTokenizer",
42
+ "unk_token": "<unk>",
43
+ "use_default_system_prompt": false,
44
+ "use_fast": false
45
+ }
RAR/config.yaml ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ data:
2
+ data_dir:
3
+ - None
4
+ caption_proportion:
5
+ prompt: 1
6
+ external_caption_suffixes: []
7
+ external_clipscore_suffixes: []
8
+ clip_thr_temperature: 0.1
9
+ clip_thr: 25.0
10
+ del_img_clip_thr: 0.0
11
+ sort_dataset: false
12
+ load_text_feat: false
13
+ load_vae_feat: false
14
+ transform: default_train
15
+ type: IRImgDataset
16
+ image_size: 256
17
+ hq_only: false
18
+ valid_num: 0
19
+ data: null
20
+ extra: null
21
+ dset: train_brief
22
+ max_samples: null
23
+ use_tokens: ALL
24
+ max_tokens: 400
25
+ caption_type: question
26
+ return_meta: true
27
+ need_resize: true
28
+ synthetic_composite: true
29
+ model:
30
+ model: SD35M_D2C
31
+ model_pretrained: /home/CORP/hsiang.chen/Projects/RAR/Models/ModelZoo/stable-diffusion-3.5-medium/sd3.5_medium.safetensors
32
+ shift: 3.0
33
+ teacher: null
34
+ input_channel: 16
35
+ image_size: 256
36
+ mixed_precision: bf16
37
+ fp32_attention: true
38
+ load_from: /home/CORP/hsiang.chen/Projects/RAR/Models/ModelZoo/backbone/epoch_100_step_302601.pth
39
+ discriminator_model: null
40
+ teacher_model: null
41
+ teacher_model_weight_dtype: null
42
+ resume_from: null
43
+ aspect_ratio_type: ASPECT_RATIO_1024
44
+ multi_scale: false
45
+ pe_interpolation: 1.0
46
+ micro_condition: false
47
+ attn_type: linear
48
+ autocast_linear_attn: false
49
+ ffn_type: glumbconv
50
+ mlp_acts:
51
+ - silu
52
+ - silu
53
+ - null
54
+ mlp_ratio: 2.5
55
+ use_pe: false
56
+ pos_embed_type: sincos
57
+ qk_norm: false
58
+ class_dropout_prob: 0.1
59
+ linear_head_dim: 32
60
+ cross_norm: false
61
+ cross_attn_type: flash
62
+ logvar: false
63
+ cfg_scale: 4
64
+ cfg_embed: false
65
+ cfg_embed_scale: 1.0
66
+ guidance_type: classifier-free
67
+ pag_applied_layers:
68
+ - 8
69
+ ladd_multi_scale: true
70
+ head_block_ids: null
71
+ extra: null
72
+ vae:
73
+ vae_type: SDVAE
74
+ vae_pretrained: /home/CORP/hsiang.chen/Projects/RAR/Models/ModelZoo/stable-diffusion-3.5-medium/sd3.5_medium.safetensors
75
+ weight_dtype: float32
76
+ scale_factor: 0.41407
77
+ vae_latent_dim: 16
78
+ vae_downsample_rate: 8
79
+ sample_posterior: true
80
+ extra: null
81
+ text_encoder:
82
+ text_encoder_name: empty
83
+ text_encoder_pretrained: /home/CORP/hsiang.chen/Projects/RAR/Models/ModelZoo/stable-diffusion-3.5-medium/text_encoders
84
+ caption_channels: 4096
85
+ y_norm: true
86
+ y_norm_scale_factor: 0.01
87
+ model_max_length: 300
88
+ chi_prompt:
89
+ - a photo of a cat
90
+ - Convenience store entrance at night. On the glass door, a vinyl decal reads
91
+ 'OPEN FOR QUALITY'. Inside, shelves and fluorescent lights; outside, a cyclist
92
+ passing by
93
+ - Sunrise beach, shallow tide washing over smooth sand. A piece of weathered driftwood
94
+ lies near the shoreline with a subtle branded text [SOS] on its surface; wet
95
+ sand reflections, micro-ripples, sun flare at horizon.
96
+ extra: null
97
+ connector:
98
+ model: QFormer
99
+ model_pretrained: None
100
+ input_channel: 4096
101
+ num_input_tokens: 400
102
+ num_query_tokens: 77
103
+ hidden_dim: 1024
104
+ layers: 8
105
+ heads: 16
106
+ resume_from: None
107
+ load_from: /home/CORP/hsiang.chen/Projects/RAR/Models/ModelZoo/connector/epoch_100_step_19001.pth
108
+ assessment:
109
+ model: SDQA
110
+ model_config: ./iqa/config.yaml
111
+ scheduler:
112
+ train_sampling_steps: 1000
113
+ predict_flow_v: true
114
+ noise_schedule: linear_flow
115
+ pred_sigma: false
116
+ learn_sigma: true
117
+ vis_sampler: flow_dpm-solver
118
+ flow_shift: 3.0
119
+ weighting_scheme: logit_normal
120
+ weighting_scheme_discriminator: logit_normal_trigflow
121
+ add_noise_timesteps:
122
+ - 1.5708
123
+ logit_mean: 0.0
124
+ logit_std: 1.0
125
+ logit_mean_discriminator: 0.0
126
+ logit_std_discriminator: 1.0
127
+ sigma_data: 0.5
128
+ timestep_norm_scale_factor: 1.0
129
+ extra: null
130
+ recur_round: 4
131
+ recur_truncated: false
132
+ recur_loss_weights:
133
+ - 0.2
134
+ - 0.4
135
+ - 0.6
136
+ - 1.0
137
+ update_image: true
138
+ train:
139
+ train_type: bf16
140
+ resume_from: None
141
+ num_workers: 10
142
+ seed: 1229
143
+ train_batch_size: 1
144
+ num_epochs: 100
145
+ gradient_accumulation_steps: 16
146
+ grad_checkpointing: true
147
+ gradient_clip: 0.1
148
+ gc_step: 1
149
+ optimizer:
150
+ betas:
151
+ - 0.9
152
+ - 0.999
153
+ - 0.9999
154
+ eps:
155
+ - 1.0e-30
156
+ - 1.0e-16
157
+ lr: 2.0e-06
158
+ type: CAMEWrapper
159
+ weight_decay: 0.0
160
+ optimizer_D:
161
+ eps: 1.0e-10
162
+ lr: 0.0001
163
+ type: AdamW
164
+ weight_decay: 0.03
165
+ load_from_optimizer: false
166
+ load_from_lr_scheduler: false
167
+ resume_lr_scheduler: true
168
+ lr_schedule: cosine
169
+ lr_schedule_args:
170
+ num_warmup_steps: 2000
171
+ auto_lr:
172
+ rule: sqrt
173
+ eval_batch_size: 16
174
+ use_fsdp: false
175
+ use_flash_attn: false
176
+ eval_sampling_steps: 1000
177
+ lora_rank: 4
178
+ log_interval: 100
179
+ mask_type: 'null'
180
+ mask_loss_coef: 0.0
181
+ load_mask_index: false
182
+ snr_loss: false
183
+ real_prompt_ratio: 1.0
184
+ early_stop_hours: 10000.0
185
+ save_image_epochs: 1
186
+ save_model_epochs: 1
187
+ save_model_steps: 5000
188
+ visualize: true
189
+ null_embed_root: output/pretrained_models/
190
+ valid_prompt_embed_root: output/tmp_embed/
191
+ validation_prompts:
192
+ - dog
193
+ - portrait photo of a girl, photograph, highly detailed face, depth of field
194
+ - Self-portrait oil painting, a beautiful cyborg with golden hair, 8k
195
+ - Astronaut in a jungle, cold color palette, muted colors, detailed, 8k
196
+ - A photo of beautiful mountain with realistic sunset and blue lake, highly detailed,
197
+ masterpiece
198
+ local_save_vis: true
199
+ deterministic_validation: true
200
+ online_metric: false
201
+ eval_metric_step: 2000
202
+ online_metric_dir: metric_helper
203
+ work_dir: output_c2c/sd35m_d2c_multi
204
+ skip_step: 0
205
+ loss_type: huber
206
+ huber_c: 0.001
207
+ num_ddim_timesteps: 50
208
+ ema_decay: 0.95
209
+ debug_nan: false
210
+ ema_update: false
211
+ ema_rate: 0.9999
212
+ tangent_warmup_steps: 10000
213
+ scm_cfg_scale:
214
+ - 1.0
215
+ cfg_interval: null
216
+ scm_logvar_loss: true
217
+ norm_invariant_to_spatial_dim: true
218
+ norm_same_as_512_scale: false
219
+ g_norm_constant: 0.1
220
+ g_norm_r: 1.0
221
+ show_gradient: false
222
+ lr_scale: null
223
+ adv_lambda: 1.0
224
+ scm_loss: true
225
+ scm_lambda: 1.0
226
+ loss_scale: 1.0
227
+ r1_penalty: false
228
+ r1_penalty_weight: 1.0e-05
229
+ diff_timesteps_D: true
230
+ suffix_checkpoints: disc
231
+ misaligned_pairs_D: false
232
+ discriminator_loss: cross entropy
233
+ largest_timestep: 1.5708
234
+ train_largest_timestep: false
235
+ largest_timestep_prob: 0.5
236
+ extra: null
237
+ controlnet: null
238
+ model_growth: null
239
+ work_dir: output_composite/sd35m_d2c_multi_bf16_lr2e6
240
+ resume_from: null
241
+ load_from: null
242
+ debug: true
243
+ caching: false
244
+ report_to: tensorboard
245
+ tracker_project_name: sana-baseline
246
+ name: tmp
247
+ loss_report_name: loss
RAR/logs/sana-baseline/events.out.tfevents.1761237723.main1.2421250.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8aa76105a458f56decff22888f7cb696ad675901cb8c11014a53d11331e5daf6
3
+ size 58201510
RAR/logs/tb_2025-10-23_17:42:03/events.out.tfevents.1761237723.main1.2421250.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:582306f4ebfecf4f0a20941ae4e3758ff7e52e04d01c417b3b738ceea4021bc8
3
+ size 58201510
RAR/train_log.log ADDED
The diff for this file is too large to render. See raw diff