stmdit-anon commited on
Commit
6e1812e
·
verified ·
1 Parent(s): 64b581f

Add xattn-perceiver-p05 (XAttn-Perceiver-p05)

Browse files
xattn-perceiver-p05/README.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ tags:
4
+ - histopathology
5
+ - diffusion
6
+ - spatial-transcriptomics
7
+ - icml-2026-sd4h-workshop
8
+ ---
9
+
10
+ # XAttn-Perceiver-p05
11
+
12
+ EMA-only inference weights for the **XAttn-Perceiver-p05** row reported in the
13
+ ICML 2026 SD4H workshop submission *Transcriptomics-Conditioned Virtual Tissue
14
+ Synthesis via Diffusion Transformers*.
15
+
16
+ - **Source checkpoint**: `step_2323000_ema.pt`
17
+ - **Architecture**: see `training_config.yaml` in this folder.
18
+ - **License**: Apache-2.0.
19
+
20
+ See the umbrella repo README at `stmdit-anon/stmdit-checkpoints` for usage.
xattn-perceiver-p05/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85be3206cf0ce4625056a2f006d8c5767bbb941b8d70093516cd83849add75c8
3
+ size 715792736
xattn-perceiver-p05/training_config.yaml ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Training Configuration - PixArtGEXAttn-B with Perceiver Token Source
2
+ # Dropout p=0.5 — equal weight across all 4 conditioning regimes.
3
+ # bf16-mixed + torch.compile for faster training.
4
+
5
+ output_dir: "/cluster/work/grlab/projects/projects2025-virtual-tissue-gen/scratch/10x_TuPro/PixCell-GE/training/pixart-ge-cf-B-xattn-perceiver-p05"
6
+ device: "cuda"
7
+
8
+ # ============================================================================
9
+ # MODEL
10
+ # ============================================================================
11
+
12
+ model:
13
+ type: "pixart_ge_xattn"
14
+ variant: "B" # 130M params: depth=12, hidden=768, heads=12
15
+ ge_encoder_type: "cancerfoundation"
16
+ ge_hidden_dim: 512
17
+ cf_model_dir: "/cluster/home/pvlachas/leomed-home/pretrained_model_weights/cancer-foundation"
18
+ cf_freeze_backbone: true
19
+ ge_token_source: "perceiver"
20
+ ge_num_tokens: 32
21
+ ge_perceiver_depth: 4
22
+ ge_xattn_fusion: "xattn_only"
23
+
24
+ # ============================================================================
25
+ # DATA
26
+ # ============================================================================
27
+
28
+ data:
29
+ features_dir: "/cluster/work/grlab/projects/projects2025-virtual-tissue-gen/scratch/10x_TuPro/feat-extraction/features_train"
30
+ load_gene_expression: true
31
+ load_gene_expression_binned: true
32
+ num_workers: 8
33
+ pin_memory: true
34
+ val_split: 0.1
35
+
36
+ # ============================================================================
37
+ # DIFFUSION
38
+ # ============================================================================
39
+
40
+ diffusion:
41
+ timesteps: 1000
42
+ beta_schedule: "linear"
43
+ image_size: 256
44
+ latent_size: 32
45
+
46
+ # ============================================================================
47
+ # TRAINING
48
+ # ============================================================================
49
+
50
+ training:
51
+ batch_size: 32
52
+ batch_size_val: 32
53
+ gradient_accumulation_steps: 4 # effective batch = 128
54
+ num_epochs: 1000
55
+ seed: 42
56
+ gradient_clip: 0.01
57
+ ema_rate: 0.9999
58
+
59
+ optimizer:
60
+ lr: 2e-5
61
+ weight_decay: 0.01
62
+ betas: [0.9, 0.999]
63
+
64
+ scheduler:
65
+ warmup_steps: 1000
66
+ min_lr_ratio: 0.1
67
+
68
+ classifier_free_guidance:
69
+ conditioning_schedule:
70
+ - mask: [uni, ge] # full conditioning (UNI + GE active)
71
+ weight: 25
72
+ - mask: [ge] # GE only (UNI dropped)
73
+ weight: 25
74
+ - mask: [uni] # UNI only (GE dropped)
75
+ weight: 25
76
+ - mask: [] # unconditional (both dropped)
77
+ weight: 25
78
+
79
+ modality_monitor:
80
+ enabled: true
81
+ diagnostic_freq: 10
82
+ diagnostic_batch_size: 64
83
+
84
+ convergence:
85
+ monitor_timestep_range: [900, 1000]
86
+ patience: 50
87
+ min_epochs: 300
88
+
89
+ # ============================================================================
90
+ # DISTRIBUTED
91
+ # ============================================================================
92
+
93
+ distributed:
94
+ precision: "bf16-mixed"
95
+ compile_model: true
96
+
97
+ # ============================================================================
98
+ # CHECKPOINT
99
+ # ============================================================================
100
+
101
+ checkpoint:
102
+ save_every: 1000
103
+ resume: null
104
+
105
+ # ============================================================================
106
+ # LOGGING
107
+ # ============================================================================
108
+
109
+ logging:
110
+ log_every: 100
111
+ validate_every: 0
112
+ gpu_monitor: true
113
+ gpu_monitor_interval: 60.0
114
+ sample_every_epochs: 10
115
+ sample_every_steps: 0
116
+ num_samples: 16
117
+ sample_guidance_scale: 3.0
118
+ sample_num_steps: 20
119
+ sample_vae_path: "/cluster/home/pvlachas/leomed-home/pretrained_model_weights/stability-ai-stable-diffusion-3-5-large/models--stabilityai--stable-diffusion-3.5-large/snapshots/ceddf0a7fdf2064ea28e2213e3b84e4afa170a0f/vae"