Upload folder using huggingface_hub
Browse files- README.md +68 -0
- config.json +6 -0
- configs/config_rag_rflow.json +69 -0
- models/CLIP3D_Finding_Impression_30ep.pt +3 -0
- models/autoencoder_epoch273.pt +3 -0
- models/controlnet_rag_best.pt +3 -0
- models/unet_rflow_200ep.pt +3 -0
README.md
CHANGED
|
@@ -1,3 +1,71 @@
|
|
| 1 |
---
|
| 2 |
license: apache-2.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
license: apache-2.0
|
| 3 |
+
datasets:
|
| 4 |
+
- ibrahimhamamci/CT-RATE
|
| 5 |
+
language:
|
| 6 |
+
- en
|
| 7 |
+
pipeline_tag: text-to-3d
|
| 8 |
+
tags:
|
| 9 |
+
- medical
|
| 10 |
+
- ct
|
| 11 |
+
- diffusion
|
| 12 |
+
- controlnet
|
| 13 |
+
- retrieval-augmented-generation
|
| 14 |
---
|
| 15 |
+
|
| 16 |
+
# RAGText2CT Weights
|
| 17 |
+
|
| 18 |
+
Weights for **RAGText2CT: Retrieval-Augmented Anatomical Guidance for Text-to-CT Generation**.
|
| 19 |
+
|
| 20 |
+
This release is independent from `dmolino/text2ct-weights` and contains the full checkpoint set needed by the `RAGText2CT-Release` codebase.
|
| 21 |
+
|
| 22 |
+
## Included Files
|
| 23 |
+
|
| 24 |
+
Under `models/`:
|
| 25 |
+
|
| 26 |
+
- `autoencoder_epoch273.pt`
|
| 27 |
+
- `unet_rflow_200ep.pt`
|
| 28 |
+
- `CLIP3D_Finding_Impression_30ep.pt`
|
| 29 |
+
- `controlnet_rag_best.pt`
|
| 30 |
+
|
| 31 |
+
Under `configs/`:
|
| 32 |
+
|
| 33 |
+
- `config_rag_rflow.json`
|
| 34 |
+
|
| 35 |
+
## What Each Weight Does
|
| 36 |
+
|
| 37 |
+
- `autoencoder_epoch273.pt`: 3D VAE for latent compression and decoding.
|
| 38 |
+
- `unet_rflow_200ep.pt`: text-conditioned latent diffusion UNet from the Text2CT backbone.
|
| 39 |
+
- `CLIP3D_Finding_Impression_30ep.pt`: CLIP3D report encoder checkpoint.
|
| 40 |
+
- `controlnet_rag_best.pt`: retrieval-guided anatomical ControlNet checkpoint for RAGText2CT.
|
| 41 |
+
|
| 42 |
+
## Intended Use
|
| 43 |
+
|
| 44 |
+
These checkpoints are intended for research on text-conditioned 3D CT generation and retrieval-augmented anatomical guidance.
|
| 45 |
+
|
| 46 |
+
They are not intended for clinical use or diagnostic decision making.
|
| 47 |
+
|
| 48 |
+
## Code
|
| 49 |
+
|
| 50 |
+
Use these weights with the companion repository:
|
| 51 |
+
|
| 52 |
+
- `RAGText2CT-Release`
|
| 53 |
+
|
| 54 |
+
The code release expects the files to live under `models/` with the names above.
|
| 55 |
+
|
| 56 |
+
## Notes
|
| 57 |
+
|
| 58 |
+
- The first three checkpoints are shared with the original Text2CT pipeline.
|
| 59 |
+
- `controlnet_rag_best.pt` is the additional checkpoint specific to the retrieval-augmented extension.
|
| 60 |
+
- Retrieval-bank artifacts such as `impression_embeddings.npy` and `impression_paths.json` are not included in this weights repo.
|
| 61 |
+
|
| 62 |
+
## Citation
|
| 63 |
+
|
| 64 |
+
```bibtex
|
| 65 |
+
@article{Molino2026RAGText2CT,
|
| 66 |
+
title={Retrieval-Augmented Anatomical Guidance for Text-to-CT Generation},
|
| 67 |
+
author={Molino, Daniele and Caruso, Camillo Maria and Soda, Paolo and Guarrasi, Valerio},
|
| 68 |
+
year={2026},
|
| 69 |
+
journal={arXiv preprint arXiv:2603.08305}
|
| 70 |
+
}
|
| 71 |
+
```
|
config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_type": "LDM+VAE",
|
| 3 |
+
"task": "Text Conditioned CT Generation",
|
| 4 |
+
"framework": "monai",
|
| 5 |
+
"modality": "ct"
|
| 6 |
+
}
|
configs/config_rag_rflow.json
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"spatial_dims": 3,
|
| 3 |
+
"image_channels": 1,
|
| 4 |
+
"latent_channels": 4,
|
| 5 |
+
"include_body_region": false,
|
| 6 |
+
"autoencoder_def": {
|
| 7 |
+
"_target_": "monai.apps.generation.maisi.networks.autoencoderkl_maisi.AutoencoderKlMaisi",
|
| 8 |
+
"spatial_dims": "@spatial_dims",
|
| 9 |
+
"in_channels": "@image_channels",
|
| 10 |
+
"out_channels": "@image_channels",
|
| 11 |
+
"latent_channels": "@latent_channels",
|
| 12 |
+
"num_channels": [64, 128, 256],
|
| 13 |
+
"num_res_blocks": [2, 2, 2],
|
| 14 |
+
"norm_num_groups": 32,
|
| 15 |
+
"norm_eps": 1e-06,
|
| 16 |
+
"attention_levels": [false, false, false],
|
| 17 |
+
"with_encoder_nonlocal_attn": false,
|
| 18 |
+
"with_decoder_nonlocal_attn": false,
|
| 19 |
+
"use_checkpointing": false,
|
| 20 |
+
"use_convtranspose": false,
|
| 21 |
+
"norm_float16": true,
|
| 22 |
+
"num_splits": 4,
|
| 23 |
+
"dim_split": 1
|
| 24 |
+
},
|
| 25 |
+
"diffusion_unet_def": {
|
| 26 |
+
"_target_": "monai.apps.generation.maisi.networks.diffusion_model_unet_maisi.DiffusionModelUNetMaisi",
|
| 27 |
+
"with_conditioning": true,
|
| 28 |
+
"cross_attention_dim": 768,
|
| 29 |
+
"spatial_dims": "@spatial_dims",
|
| 30 |
+
"in_channels": "@latent_channels",
|
| 31 |
+
"out_channels": "@latent_channels",
|
| 32 |
+
"num_channels": [64, 128, 256, 512],
|
| 33 |
+
"attention_levels": [false, false, true, true],
|
| 34 |
+
"num_head_channels": [0, 0, 32, 32],
|
| 35 |
+
"num_res_blocks": 2,
|
| 36 |
+
"use_flash_attention": true,
|
| 37 |
+
"include_top_region_index_input": "@include_body_region",
|
| 38 |
+
"include_bottom_region_index_input": "@include_body_region",
|
| 39 |
+
"include_spacing_input": true,
|
| 40 |
+
"num_class_embeds": 128,
|
| 41 |
+
"resblock_updown": true,
|
| 42 |
+
"include_fc": true
|
| 43 |
+
},
|
| 44 |
+
"controlnet_def": {
|
| 45 |
+
"_target_": "monai.apps.generation.maisi.networks.controlnet_maisi.ControlNetMaisi",
|
| 46 |
+
"spatial_dims": "@spatial_dims",
|
| 47 |
+
"in_channels": "@latent_channels",
|
| 48 |
+
"num_channels": [64, 128, 256, 512],
|
| 49 |
+
"attention_levels": [false, false, true, true],
|
| 50 |
+
"num_head_channels": [0, 0, 32, 32],
|
| 51 |
+
"num_res_blocks": 2,
|
| 52 |
+
"use_flash_attention": true,
|
| 53 |
+
"conditioning_embedding_in_channels": 8,
|
| 54 |
+
"conditioning_embedding_num_channels": [8, 32, 64],
|
| 55 |
+
"num_class_embeds": 128,
|
| 56 |
+
"resblock_updown": true,
|
| 57 |
+
"include_fc": true,
|
| 58 |
+
"with_conditioning": true,
|
| 59 |
+
"cross_attention_dim": 768
|
| 60 |
+
},
|
| 61 |
+
"noise_scheduler": {
|
| 62 |
+
"_target_": "monai.networks.schedulers.rectified_flow.RFlowScheduler",
|
| 63 |
+
"num_train_timesteps": 1000,
|
| 64 |
+
"use_discrete_timesteps": false,
|
| 65 |
+
"use_timestep_transform": true,
|
| 66 |
+
"sample_method": "uniform",
|
| 67 |
+
"scale": 1.4
|
| 68 |
+
}
|
| 69 |
+
}
|
models/CLIP3D_Finding_Impression_30ep.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d737b62e2ad8ed2758426fa3562db65ca0840b33416f982c45d8d3bd7fea7130
|
| 3 |
+
size 3143438178
|
models/autoencoder_epoch273.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1f8a7a056d0ebc00486edc43c26768bf1c12eaa6df9dd172e34598003be95eb3
|
| 3 |
+
size 83831868
|
models/controlnet_rag_best.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:907ceef649c1f165253ee3aba54311e6b971ad12357acfc1eea219fb448db4ff
|
| 3 |
+
size 359767731
|
models/unet_rflow_200ep.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:674f99fd5436d4f857fe6c830ef50a97fa5ae6c181ed51160cf8229a7cc297c3
|
| 3 |
+
size 870898552
|