Use transformers as the library name
#2
by
ariG23498
HF Staff
- opened
- README.md +6 -6
- llm/config.json +1 -1
- sound_mm_projector/config.json +1 -1
README.md
CHANGED
|
@@ -4,7 +4,7 @@ library_name: transformers
|
|
| 4 |
---
|
| 5 |
# <span style="background: linear-gradient(45deg, #667eea 0%, #764ba2 25%, #f093fb 50%, #f5576c 75%, #4facfe 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; font-weight: bold; font-size: 1.1em;">**OmniVinci: Enhancing Architecture and Data for Omni-Modal Understanding LLM**</span> <br />
|
| 6 |
|
| 7 |
-
[](
|
| 8 |
[](https://github.com/NVlabs/OmniVinci)
|
| 9 |
[](https://huggingface.co/nvidia/omnivinci)
|
| 10 |
[](https://nvlabs.github.io/OmniVinci)
|
|
@@ -103,10 +103,10 @@ The model is released under the [NVIDIA OneWay Noncommercial License](asset/NVID
|
|
| 103 |
Please consider to cite our paper and this framework, if they are helpful in your research.
|
| 104 |
|
| 105 |
```bibtex
|
| 106 |
-
@article{
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
}
|
| 112 |
```
|
|
|
|
| 4 |
---
|
| 5 |
# <span style="background: linear-gradient(45deg, #667eea 0%, #764ba2 25%, #f093fb 50%, #f5576c 75%, #4facfe 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; font-weight: bold; font-size: 1.1em;">**OmniVinci: Enhancing Architecture and Data for Omni-Modal Understanding LLM**</span> <br />
|
| 6 |
|
| 7 |
+
[](arxiv.org/abs/2510.15870 )
|
| 8 |
[](https://github.com/NVlabs/OmniVinci)
|
| 9 |
[](https://huggingface.co/nvidia/omnivinci)
|
| 10 |
[](https://nvlabs.github.io/OmniVinci)
|
|
|
|
| 103 |
Please consider to cite our paper and this framework, if they are helpful in your research.
|
| 104 |
|
| 105 |
```bibtex
|
| 106 |
+
@article{omnivinci2025,
|
| 107 |
+
title={OmniVinci: Enhancing Architecture and Data for Omni-Modal Understanding LLM},
|
| 108 |
+
author={Hanrong Ye, Chao-Han Huck Yang, Arushi Goel, Wei Huang, Ligeng Zhu, Yuanhang Su, Sean Lin, An-Chieh Cheng, Zhen Wan, Jinchuan Tian, Yuming Lou, Dong Yang, Zhijian Liu, Yukang Chen, Ambrish Dantrey, Ehsan Jahangiri, Sreyan Ghosh, Daguang Xu, Ehsan Hosseini-Asl, Danial Mohseni Taheri, Vidya Murali, Sifei Liu, Jason Lu, Oluwatobi Olabiyi, Frank Wang, Rafael Valle, Bryan Catanzaro, Andrew Tao, Song Han, Jan Kautz, Hongxu Yin, Pavlo Molchanov},
|
| 109 |
+
journal={arXiv},
|
| 110 |
+
year={2025},
|
| 111 |
}
|
| 112 |
```
|
llm/config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "",
|
| 3 |
"architectures": [
|
| 4 |
"Qwen2ForCausalLM"
|
| 5 |
],
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "/home/hanrongy/user_path/project/vila/VILA-Internal/../exp_log/nvomni-8b-video-0d1-trope128_omniTwds_ras_audfilter_boost_lr5e6_demoonly_n1_bs128_ga8_mstep-1_j20250923/outputs/model/llm",
|
| 3 |
"architectures": [
|
| 4 |
"Qwen2ForCausalLM"
|
| 5 |
],
|
sound_mm_projector/config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "",
|
| 3 |
"architectures": [
|
| 4 |
"SoundMultimodalProjector"
|
| 5 |
],
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "/lustre/fs12/portfolios/llmservice/projects/llmservice_fm_vision/users/hanrongy/project/vila/VILA-Internal/../exp_log/nvomni-8b-video-0d1-trope128_omniT_ras_n16_bs2048_ga8_mstep-1_j20250718/outputs/model/sound_mm_projector",
|
| 3 |
"architectures": [
|
| 4 |
"SoundMultimodalProjector"
|
| 5 |
],
|