KangLiao commited on
Commit
20c1475
·
1 Parent(s): 0e03f9f
configs/models/qwen2_5_1_5b_radio_sd3_dynamic_puffin copy.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from src.models.puffin.model import Qwen2p5RadioStableDiffusion3HFDynamic
3
+ from src.models.stable_diffusion3.transformer_sd3_dynamic import SD3Transformer2DModel
4
+ from src.models.radiov3.hf_model import RADIOModel
5
+ from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler
6
+ from transformers import AutoModelForCausalLM, AutoTokenizer
7
+
8
+ llm_name_or_path = 'Qwen/Qwen2.5-1.5B-Instruct'
9
+ sd3_model_name_or_path = "stabilityai/stable-diffusion-3-medium-diffusers"
10
+
11
+ prompt_template = dict(
12
+ SYSTEM=('<|im_start|>system\n{system}<|im_end|>\n'),
13
+ INSTRUCTION=('<|im_start|>user\n{input}<|im_end|>\n'
14
+ '<|im_start|>assistant\n'),
15
+ SUFFIX='<|im_end|>',
16
+ IMG_START_TOKEN='<|vision_start|>',
17
+ IMG_END_TOKEN='<|vision_end|>',
18
+ IMG_CONTEXT_TOKEN='<|image_pad|>',
19
+ GENERATION='Generate an image: {input}',
20
+ GENERATION_CROSS='Generate a target image given an initial view: {input}',
21
+ SUFFIX_AS_EOS=True,
22
+ SEP='\n',
23
+ STOP_WORDS=['<|im_end|>', '<|endoftext|>']
24
+ )
25
+
26
+ model = dict(type=Qwen2p5RadioStableDiffusion3HFDynamic,
27
+ num_queries=64,
28
+ connector_1=dict(
29
+ hidden_size=1024,
30
+ intermediate_size=4096,
31
+ num_hidden_layers=6,
32
+ #_attn_implementation='flash_attention_2',
33
+ num_attention_heads=16, ),
34
+ connector_2=dict(
35
+ hidden_size=1024,
36
+ intermediate_size=4096,
37
+ num_hidden_layers=6,
38
+ #_attn_implementation='flash_attention_2',
39
+ num_attention_heads=16,
40
+ ),
41
+ transformer=dict(
42
+ type=SD3Transformer2DModel.from_pretrained,
43
+ pretrained_model_name_or_path=sd3_model_name_or_path,
44
+ subfolder="transformer",
45
+ torch_dtype=torch.bfloat16,
46
+ #local_files_only=True,
47
+ ),
48
+ test_scheduler=dict(
49
+ type=FlowMatchEulerDiscreteScheduler.from_pretrained,
50
+ pretrained_model_name_or_path=sd3_model_name_or_path,
51
+ subfolder="scheduler",
52
+ #local_files_only=True,
53
+ ),
54
+ train_scheduler=dict(
55
+ type=FlowMatchEulerDiscreteScheduler.from_pretrained,
56
+ pretrained_model_name_or_path=sd3_model_name_or_path,
57
+ subfolder="scheduler",
58
+ #local_files_only=True,
59
+ ),
60
+ vae=dict(
61
+ type=AutoencoderKL.from_pretrained,
62
+ pretrained_model_name_or_path=sd3_model_name_or_path,
63
+ subfolder="vae",
64
+ torch_dtype=torch.bfloat16,
65
+ #local_files_only=True,
66
+ ),
67
+ freeze_visual_encoder=True,
68
+ freeze_llm=True,
69
+ llm=dict(
70
+ type=AutoModelForCausalLM.from_pretrained,
71
+ pretrained_model_name_or_path=llm_name_or_path,
72
+ torch_dtype=torch.bfloat16,
73
+ #local_files_only=True,
74
+ #attn_implementation='flash_attention_2',
75
+ ),
76
+ tokenizer=dict(
77
+ type=AutoTokenizer.from_pretrained,
78
+ pretrained_model_name_or_path=llm_name_or_path,
79
+ #local_files_only=True,
80
+ ),
81
+ prompt_template=prompt_template,
82
+ pretrained_pth=None,
83
+ use_activation_checkpointing=False,
84
+ visual_encoder=dict(
85
+ type=RADIOModel.from_pretrained,
86
+ pretrained_model_name_or_path="nvidia/C-RADIOv3-H",
87
+ torch_dtype=torch.bfloat16,
88
+ #local_files_only=True,
89
+ ),
90
+ )
configs/models/qwen2_5_1_5b_radio_sd3_dynamic_puffin.py CHANGED
@@ -38,13 +38,7 @@ model = dict(type=Qwen2p5RadioStableDiffusion3HFDynamic,
38
  #_attn_implementation='flash_attention_2',
39
  num_attention_heads=16,
40
  ),
41
- transformer=dict(
42
- type=SD3Transformer2DModel.from_pretrained,
43
- pretrained_model_name_or_path=sd3_model_name_or_path,
44
- subfolder="transformer",
45
- torch_dtype=torch.bfloat16,
46
- #local_files_only=True,
47
- ),
48
  test_scheduler=dict(
49
  type=FlowMatchEulerDiscreteScheduler.from_pretrained,
50
  pretrained_model_name_or_path=sd3_model_name_or_path,
 
38
  #_attn_implementation='flash_attention_2',
39
  num_attention_heads=16,
40
  ),
41
+ transformer=SD3Transformer2DModel,
 
 
 
 
 
 
42
  test_scheduler=dict(
43
  type=FlowMatchEulerDiscreteScheduler.from_pretrained,
44
  pretrained_model_name_or_path=sd3_model_name_or_path,