dmolino commited on
Commit
4c97d38
·
verified ·
1 Parent(s): 31e659b

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -1,3 +1,71 @@
1
  ---
2
  license: apache-2.0
 
 
 
 
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
+ datasets:
4
+ - ibrahimhamamci/CT-RATE
5
+ language:
6
+ - en
7
+ pipeline_tag: text-to-3d
8
+ tags:
9
+ - medical
10
+ - ct
11
+ - diffusion
12
+ - controlnet
13
+ - retrieval-augmented-generation
14
  ---
15
+
16
+ # RAGText2CT Weights
17
+
18
+ Weights for **RAGText2CT: Retrieval-Augmented Anatomical Guidance for Text-to-CT Generation**.
19
+
20
+ This release is independent from `dmolino/text2ct-weights` and contains the full checkpoint set needed by the `RAGText2CT-Release` codebase.
21
+
22
+ ## Included Files
23
+
24
+ Under `models/`:
25
+
26
+ - `autoencoder_epoch273.pt`
27
+ - `unet_rflow_200ep.pt`
28
+ - `CLIP3D_Finding_Impression_30ep.pt`
29
+ - `controlnet_rag_best.pt`
30
+
31
+ Under `configs/`:
32
+
33
+ - `config_rag_rflow.json`
34
+
35
+ ## What Each Weight Does
36
+
37
+ - `autoencoder_epoch273.pt`: 3D VAE for latent compression and decoding.
38
+ - `unet_rflow_200ep.pt`: text-conditioned latent diffusion UNet from the Text2CT backbone.
39
+ - `CLIP3D_Finding_Impression_30ep.pt`: CLIP3D report encoder checkpoint.
40
+ - `controlnet_rag_best.pt`: retrieval-guided anatomical ControlNet checkpoint for RAGText2CT.
41
+
42
+ ## Intended Use
43
+
44
+ These checkpoints are intended for research on text-conditioned 3D CT generation and retrieval-augmented anatomical guidance.
45
+
46
+ They are not intended for clinical use or diagnostic decision making.
47
+
48
+ ## Code
49
+
50
+ Use these weights with the companion repository:
51
+
52
+ - `RAGText2CT-Release`
53
+
54
+ The code release expects the files to live under `models/` with the names above.
55
+
56
+ ## Notes
57
+
58
+ - The first three checkpoints are shared with the original Text2CT pipeline.
59
+ - `controlnet_rag_best.pt` is the additional checkpoint specific to the retrieval-augmented extension.
60
+ - Retrieval-bank artifacts such as `impression_embeddings.npy` and `impression_paths.json` are not included in this weights repo.
61
+
62
+ ## Citation
63
+
64
+ ```bibtex
65
+ @article{Molino2026RAGText2CT,
66
+ title={Retrieval-Augmented Anatomical Guidance for Text-to-CT Generation},
67
+ author={Molino, Daniele and Caruso, Camillo Maria and Soda, Paolo and Guarrasi, Valerio},
68
+ year={2026},
69
+ journal={arXiv preprint arXiv:2603.08305}
70
+ }
71
+ ```
config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "LDM+VAE",
3
+ "task": "Text Conditioned CT Generation",
4
+ "framework": "monai",
5
+ "modality": "ct"
6
+ }
configs/config_rag_rflow.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "spatial_dims": 3,
3
+ "image_channels": 1,
4
+ "latent_channels": 4,
5
+ "include_body_region": false,
6
+ "autoencoder_def": {
7
+ "_target_": "monai.apps.generation.maisi.networks.autoencoderkl_maisi.AutoencoderKlMaisi",
8
+ "spatial_dims": "@spatial_dims",
9
+ "in_channels": "@image_channels",
10
+ "out_channels": "@image_channels",
11
+ "latent_channels": "@latent_channels",
12
+ "num_channels": [64, 128, 256],
13
+ "num_res_blocks": [2, 2, 2],
14
+ "norm_num_groups": 32,
15
+ "norm_eps": 1e-06,
16
+ "attention_levels": [false, false, false],
17
+ "with_encoder_nonlocal_attn": false,
18
+ "with_decoder_nonlocal_attn": false,
19
+ "use_checkpointing": false,
20
+ "use_convtranspose": false,
21
+ "norm_float16": true,
22
+ "num_splits": 4,
23
+ "dim_split": 1
24
+ },
25
+ "diffusion_unet_def": {
26
+ "_target_": "monai.apps.generation.maisi.networks.diffusion_model_unet_maisi.DiffusionModelUNetMaisi",
27
+ "with_conditioning": true,
28
+ "cross_attention_dim": 768,
29
+ "spatial_dims": "@spatial_dims",
30
+ "in_channels": "@latent_channels",
31
+ "out_channels": "@latent_channels",
32
+ "num_channels": [64, 128, 256, 512],
33
+ "attention_levels": [false, false, true, true],
34
+ "num_head_channels": [0, 0, 32, 32],
35
+ "num_res_blocks": 2,
36
+ "use_flash_attention": true,
37
+ "include_top_region_index_input": "@include_body_region",
38
+ "include_bottom_region_index_input": "@include_body_region",
39
+ "include_spacing_input": true,
40
+ "num_class_embeds": 128,
41
+ "resblock_updown": true,
42
+ "include_fc": true
43
+ },
44
+ "controlnet_def": {
45
+ "_target_": "monai.apps.generation.maisi.networks.controlnet_maisi.ControlNetMaisi",
46
+ "spatial_dims": "@spatial_dims",
47
+ "in_channels": "@latent_channels",
48
+ "num_channels": [64, 128, 256, 512],
49
+ "attention_levels": [false, false, true, true],
50
+ "num_head_channels": [0, 0, 32, 32],
51
+ "num_res_blocks": 2,
52
+ "use_flash_attention": true,
53
+ "conditioning_embedding_in_channels": 8,
54
+ "conditioning_embedding_num_channels": [8, 32, 64],
55
+ "num_class_embeds": 128,
56
+ "resblock_updown": true,
57
+ "include_fc": true,
58
+ "with_conditioning": true,
59
+ "cross_attention_dim": 768
60
+ },
61
+ "noise_scheduler": {
62
+ "_target_": "monai.networks.schedulers.rectified_flow.RFlowScheduler",
63
+ "num_train_timesteps": 1000,
64
+ "use_discrete_timesteps": false,
65
+ "use_timestep_transform": true,
66
+ "sample_method": "uniform",
67
+ "scale": 1.4
68
+ }
69
+ }
models/CLIP3D_Finding_Impression_30ep.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d737b62e2ad8ed2758426fa3562db65ca0840b33416f982c45d8d3bd7fea7130
3
+ size 3143438178
models/autoencoder_epoch273.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f8a7a056d0ebc00486edc43c26768bf1c12eaa6df9dd172e34598003be95eb3
3
+ size 83831868
models/controlnet_rag_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:907ceef649c1f165253ee3aba54311e6b971ad12357acfc1eea219fb448db4ff
3
+ size 359767731
models/unet_rflow_200ep.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:674f99fd5436d4f857fe6c830ef50a97fa5ae6c181ed51160cf8229a7cc297c3
3
+ size 870898552