Add library_name and pipeline_tag to metadata

#1
by nielsr HF Staff - opened
Files changed (1) hide show
  1. README.md +22 -8
README.md CHANGED
@@ -1,11 +1,13 @@
1
  ---
2
- license: mit
 
3
  datasets:
4
  - internlm/EndoCoT-Data
5
  language:
6
  - en
7
- base_model:
8
- - Qwen/Qwen-Image-Edit-2511
 
9
  ---
10
 
11
  <p align="center"> <img src="fig/banner.svg" alt="EndoCoT" width="900"/> </p>
@@ -30,6 +32,8 @@ base_model:
30
 
31
  # EndoCoT: Scaling Endogenous Chain-of-Thought Reasoning in Diffusion Models
32
 
 
 
33
  ## 📝TODO
34
 
35
  - [x] Open source the training code
@@ -102,7 +106,7 @@ pip install -r requirements.txt
102
 
103
  1. Download the datasets & metadata.csv
104
 
105
- - You may find our training data at: [**EndoCoT dataset**](https://huggingface.co/datasets/InternLM/EndoCoT)
106
 
107
  > Since the metadata uses relative paths, please ensure the dataset files are placed in the same directory as `metadata.csv`
108
 
@@ -130,7 +134,12 @@ def encode_prompt_edit(self, pipe: QwenImagePipeline, prompt, edit_image, is_fin
130
 
131
  drop_idx = 64
132
  if type(prompt[0])==str:
133
- template = "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>{}<|im_end|>\n<|im_start|>assistant\n"
 
 
 
 
 
134
  txt = template.format(prompt[0])
135
  model_inputs = pipe.processor(text=txt, images=edit_image, padding=True, return_tensors="pt").to(pipe.device)
136
  embedding_layers = pipe.text_encoder.model.language_model.get_input_embeddings()
@@ -194,10 +203,15 @@ def encode_prompt_edit(self, pipe: QwenImagePipeline, prompt, edit_image, is_fin
194
 
195
  ## 📖 Citation
196
 
197
- ```
198
- Coming Soon
 
 
 
 
 
199
  ```
200
 
201
  ## ⚖️ License
202
 
203
- ![Code License](https://img.shields.io/badge/Code%20License-MIT-green.svg) ![Data License](https://img.shields.io/badge/Data%20License-CC%20By%20NC%204.0-red.svg)
 
1
  ---
2
+ base_model:
3
+ - Qwen/Qwen-Image-Edit-2511
4
  datasets:
5
  - internlm/EndoCoT-Data
6
  language:
7
  - en
8
+ license: mit
9
+ library_name: diffusers
10
+ pipeline_tag: image-to-image
11
  ---
12
 
13
  <p align="center"> <img src="fig/banner.svg" alt="EndoCoT" width="900"/> </p>
 
32
 
33
  # EndoCoT: Scaling Endogenous Chain-of-Thought Reasoning in Diffusion Models
34
 
35
+ This repository contains the official model checkpoints for **EndoCoT**, as presented in the paper [EndoCoT: Scaling Endogenous Chain-of-Thought Reasoning in Diffusion Models](https://huggingface.co/papers/2603.12252).
36
+
37
  ## 📝TODO
38
 
39
  - [x] Open source the training code
 
106
 
107
  1. Download the datasets & metadata.csv
108
 
109
+ - You may find our training data at: [**EndoCoT dataset**](https://huggingface.co/datasets/internlm/EndoCoT-Data)
110
 
111
  > Since the metadata uses relative paths, please ensure the dataset files are placed in the same directory as `metadata.csv`
112
 
 
134
 
135
  drop_idx = 64
136
  if type(prompt[0])==str:
137
+ template = "<|im_start|>system
138
+ Describe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>
139
+ <|im_start|>user
140
+ <|vision_start|><|image_pad|><|vision_end|>{}<|im_end|>
141
+ <|im_start|>assistant
142
+ "
143
  txt = template.format(prompt[0])
144
  model_inputs = pipe.processor(text=txt, images=edit_image, padding=True, return_tensors="pt").to(pipe.device)
145
  embedding_layers = pipe.text_encoder.model.language_model.get_input_embeddings()
 
203
 
204
  ## 📖 Citation
205
 
206
+ ```bibtex
207
+ @article{dai2026endocot,
208
+ title={EndoCoT: Scaling Endogenous Chain-of-Thought Reasoning in Diffusion Models},
209
+ author={Dai, Xuanlang and Zhou, Yujie and Xing, Long and Bu, Jiazi and Wei, Xilin and Liu, Yuhong and Zhang, Beichen and Chen, Kai and Zang, Yuhang},
210
+ journal={arXiv preprint arXiv:2603.12252},
211
+ year={2026}
212
+ }
213
  ```
214
 
215
  ## ⚖️ License
216
 
217
+ ![Code License](https://img.shields.io/badge/Code%20License-MIT-green.svg) ![Data License](https://img.shields.io/badge/Data%20License-CC%20By%20NC%204.0-red.svg)