Update README.md
Browse files
README.md
CHANGED
|
@@ -5,8 +5,12 @@
|
|
| 5 |
|
| 6 |
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
# 1. Introduction 📚
|
| 9 |
-
**TL;DR: ChatRex is
|
| 10 |
|
| 11 |
ChatRex is a Multimodal Large Language Model (MLLM) designed to seamlessly integrate fine-grained object perception and robust language understanding. By adopting a decoupled architecture with a retrieval-based approach for object detection and leveraging high-resolution visual inputs, ChatRex addresses key challenges in perception tasks. It is powered by the Rexverse-2M dataset with diverse image-region-text annotations. ChatRex can be applied to various scenarios requiring fine-grained perception, such as object detection, grounded conversation, grounded image captioning and region
|
| 12 |
understanding.
|
|
@@ -29,7 +33,7 @@ pip install -v -e .
|
|
| 29 |
|
| 30 |
## 2.1 Download Pre-trained Models
|
| 31 |
We provide model checkpoints for both the ***Universal Proposal Network (UPN)*** and the ***ChatRex model***. You can download the pre-trained models from the following links:
|
| 32 |
-
- [UPN Checkpoint](https://
|
| 33 |
- [ChatRex-7B Checkpoint](https://huggingface.co/IDEA-Research/ChatRex-7B)
|
| 34 |
|
| 35 |
Or you can also using the following command to download the pre-trained models:
|
|
@@ -37,7 +41,7 @@ Or you can also using the following command to download the pre-trained models:
|
|
| 37 |
mkdir checkpoints
|
| 38 |
mkdir checkpoints/upn
|
| 39 |
# download UPN checkpoint
|
| 40 |
-
wget -O checkpoints/upn/upn_large.pth https://
|
| 41 |
# download ChatRex checkpoint from huggingface IDEA-Research/ChatRex-7B
|
| 42 |
# Download ChatRex checkpoint from Hugging Face
|
| 43 |
git lfs install
|
|
@@ -174,7 +178,7 @@ from chatrex.upn import UPNWrapper
|
|
| 174 |
if __name__ == "__main__":
|
| 175 |
# load the processor
|
| 176 |
processor = AutoProcessor.from_pretrained(
|
| 177 |
-
"
|
| 178 |
trust_remote_code=True,
|
| 179 |
device_map="cuda",
|
| 180 |
)
|
|
@@ -182,7 +186,7 @@ if __name__ == "__main__":
|
|
| 182 |
print(f"loading chatrex model...")
|
| 183 |
# load chatrex model
|
| 184 |
model = AutoModelForCausalLM.from_pretrained(
|
| 185 |
-
"
|
| 186 |
trust_remote_code=True,
|
| 187 |
use_safetensors=True,
|
| 188 |
).to("cuda")
|
|
@@ -292,7 +296,7 @@ from chatrex.upn import UPNWrapper
|
|
| 292 |
if __name__ == "__main__":
|
| 293 |
# load the processor
|
| 294 |
processor = AutoProcessor.from_pretrained(
|
| 295 |
-
"
|
| 296 |
trust_remote_code=True,
|
| 297 |
device_map="cuda",
|
| 298 |
)
|
|
@@ -300,7 +304,7 @@ if __name__ == "__main__":
|
|
| 300 |
print(f"loading chatrex model...")
|
| 301 |
# load chatrex model
|
| 302 |
model = AutoModelForCausalLM.from_pretrained(
|
| 303 |
-
"
|
| 304 |
trust_remote_code=True,
|
| 305 |
use_safetensors=True,
|
| 306 |
).to("cuda")
|
|
@@ -386,7 +390,7 @@ from chatrex.upn import UPNWrapper
|
|
| 386 |
if __name__ == "__main__":
|
| 387 |
# load the processor
|
| 388 |
processor = AutoProcessor.from_pretrained(
|
| 389 |
-
"
|
| 390 |
trust_remote_code=True,
|
| 391 |
device_map="cuda",
|
| 392 |
)
|
|
@@ -394,7 +398,7 @@ if __name__ == "__main__":
|
|
| 394 |
print(f"loading chatrex model...")
|
| 395 |
# load chatrex model
|
| 396 |
model = AutoModelForCausalLM.from_pretrained(
|
| 397 |
-
"
|
| 398 |
trust_remote_code=True,
|
| 399 |
use_safetensors=True,
|
| 400 |
).to("cuda")
|
|
@@ -490,7 +494,7 @@ from chatrex.upn import UPNWrapper
|
|
| 490 |
if __name__ == "__main__":
|
| 491 |
# load the processor
|
| 492 |
processor = AutoProcessor.from_pretrained(
|
| 493 |
-
"
|
| 494 |
trust_remote_code=True,
|
| 495 |
device_map="cuda",
|
| 496 |
)
|
|
@@ -498,7 +502,7 @@ if __name__ == "__main__":
|
|
| 498 |
print(f"loading chatrex model...")
|
| 499 |
# load chatrex model
|
| 500 |
model = AutoModelForCausalLM.from_pretrained(
|
| 501 |
-
"
|
| 502 |
trust_remote_code=True,
|
| 503 |
use_safetensors=True,
|
| 504 |
).to("cuda")
|
|
@@ -572,35 +576,6 @@ The visualization of the output is like:
|
|
| 572 |
|
| 573 |
----
|
| 574 |
|
| 575 |
-
# 4. Gradio Demos 🎨
|
| 576 |
-
## 4.1 Gradio Demo for UPN
|
| 577 |
-
We provide a gradio demo for UPN to visualize the object proposals generated by UPN. You can run the following command to start the gradio demo:
|
| 578 |
-
```bash
|
| 579 |
-
python gradio_demos/upn_demo.py
|
| 580 |
-
# if there is permission error, please run the following command
|
| 581 |
-
mkdir tmp
|
| 582 |
-
TMPDIR='/tmp' python gradio_demos/upn_demo.py
|
| 583 |
-
```
|
| 584 |
-
|
| 585 |
-
<div align=center>
|
| 586 |
-
<img src="assets/upn_gradio.jpg" width=600 >
|
| 587 |
-
</div>
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
## 4.2 Gradio Demo for ChatRex
|
| 591 |
-
We also provide a gradio demo for ChatRex.
|
| 592 |
-
```bash
|
| 593 |
-
python gradio_demos/chatrex_demo.py
|
| 594 |
-
# if there is permission error, please run the following command
|
| 595 |
-
mkdir tmp
|
| 596 |
-
TMPDIR='/tmp' python gradio_demos/upn_demo.py
|
| 597 |
-
```
|
| 598 |
-
|
| 599 |
-
<div align=center>
|
| 600 |
-
<img src="assets/chatrex_gradio.jpg" width=600 >
|
| 601 |
-
</div>
|
| 602 |
-
|
| 603 |
-
|
| 604 |
|
| 605 |
# 5. LICENSE
|
| 606 |
|
|
|
|
| 5 |
|
| 6 |
|
| 7 |
|
| 8 |
+
<div align=center>
|
| 9 |
+
|
| 10 |
+
----
|
| 11 |
+
|
| 12 |
# 1. Introduction 📚
|
| 13 |
+
**TL;DR: ChatRex is an MLLM skilled in perception that can respond to questions while simultaneously grounding its answers to the referenced objects.**
|
| 14 |
|
| 15 |
ChatRex is a Multimodal Large Language Model (MLLM) designed to seamlessly integrate fine-grained object perception and robust language understanding. By adopting a decoupled architecture with a retrieval-based approach for object detection and leveraging high-resolution visual inputs, ChatRex addresses key challenges in perception tasks. It is powered by the Rexverse-2M dataset with diverse image-region-text annotations. ChatRex can be applied to various scenarios requiring fine-grained perception, such as object detection, grounded conversation, grounded image captioning and region
|
| 16 |
understanding.
|
|
|
|
| 33 |
|
| 34 |
## 2.1 Download Pre-trained Models
|
| 35 |
We provide model checkpoints for both the ***Universal Proposal Network (UPN)*** and the ***ChatRex model***. You can download the pre-trained models from the following links:
|
| 36 |
+
- [UPN Checkpoint](https://github.com/IDEA-Research/ChatRex/releases/download/upn-large/upn_large.pth)
|
| 37 |
- [ChatRex-7B Checkpoint](https://huggingface.co/IDEA-Research/ChatRex-7B)
|
| 38 |
|
| 39 |
Or you can also using the following command to download the pre-trained models:
|
|
|
|
| 41 |
mkdir checkpoints
|
| 42 |
mkdir checkpoints/upn
|
| 43 |
# download UPN checkpoint
|
| 44 |
+
wget -O checkpoints/upn/upn_large.pth https://github.com/IDEA-Research/ChatRex/releases/download/upn-large/upn_large.pth
|
| 45 |
# download ChatRex checkpoint from huggingface IDEA-Research/ChatRex-7B
|
| 46 |
# Download ChatRex checkpoint from Hugging Face
|
| 47 |
git lfs install
|
|
|
|
| 178 |
if __name__ == "__main__":
|
| 179 |
# load the processor
|
| 180 |
processor = AutoProcessor.from_pretrained(
|
| 181 |
+
"IDEA-Research/ChatRex-7B",
|
| 182 |
trust_remote_code=True,
|
| 183 |
device_map="cuda",
|
| 184 |
)
|
|
|
|
| 186 |
print(f"loading chatrex model...")
|
| 187 |
# load chatrex model
|
| 188 |
model = AutoModelForCausalLM.from_pretrained(
|
| 189 |
+
"IDEA-Research/ChatRex-7B",
|
| 190 |
trust_remote_code=True,
|
| 191 |
use_safetensors=True,
|
| 192 |
).to("cuda")
|
|
|
|
| 296 |
if __name__ == "__main__":
|
| 297 |
# load the processor
|
| 298 |
processor = AutoProcessor.from_pretrained(
|
| 299 |
+
"IDEA-Research/ChatRex-7B",
|
| 300 |
trust_remote_code=True,
|
| 301 |
device_map="cuda",
|
| 302 |
)
|
|
|
|
| 304 |
print(f"loading chatrex model...")
|
| 305 |
# load chatrex model
|
| 306 |
model = AutoModelForCausalLM.from_pretrained(
|
| 307 |
+
"IDEA-Research/ChatRex-7B",
|
| 308 |
trust_remote_code=True,
|
| 309 |
use_safetensors=True,
|
| 310 |
).to("cuda")
|
|
|
|
| 390 |
if __name__ == "__main__":
|
| 391 |
# load the processor
|
| 392 |
processor = AutoProcessor.from_pretrained(
|
| 393 |
+
"IDEA-Research/ChatRex-7B",
|
| 394 |
trust_remote_code=True,
|
| 395 |
device_map="cuda",
|
| 396 |
)
|
|
|
|
| 398 |
print(f"loading chatrex model...")
|
| 399 |
# load chatrex model
|
| 400 |
model = AutoModelForCausalLM.from_pretrained(
|
| 401 |
+
"IDEA-Research/ChatRex-7B",
|
| 402 |
trust_remote_code=True,
|
| 403 |
use_safetensors=True,
|
| 404 |
).to("cuda")
|
|
|
|
| 494 |
if __name__ == "__main__":
|
| 495 |
# load the processor
|
| 496 |
processor = AutoProcessor.from_pretrained(
|
| 497 |
+
"IDEA-Research/ChatRex-7B",
|
| 498 |
trust_remote_code=True,
|
| 499 |
device_map="cuda",
|
| 500 |
)
|
|
|
|
| 502 |
print(f"loading chatrex model...")
|
| 503 |
# load chatrex model
|
| 504 |
model = AutoModelForCausalLM.from_pretrained(
|
| 505 |
+
"IDEA-Research/ChatRex-7B",
|
| 506 |
trust_remote_code=True,
|
| 507 |
use_safetensors=True,
|
| 508 |
).to("cuda")
|
|
|
|
| 576 |
|
| 577 |
----
|
| 578 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 579 |
|
| 580 |
# 5. LICENSE
|
| 581 |
|