Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -329,7 +329,33 @@ def synthesize_video_with_audio(video_file, caption):
|
|
| 329 |
|
| 330 |
# Gradio界面
|
| 331 |
with gr.Blocks() as demo:
|
| 332 |
-
gr.Markdown(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 333 |
with gr.Row():
|
| 334 |
video_input = gr.Video(label="upload video")
|
| 335 |
caption_input = gr.Textbox(label="caption(optional)", placeholder="can be empty", lines=1)
|
|
|
|
| 329 |
|
| 330 |
# Gradio界面
|
| 331 |
with gr.Blocks() as demo:
|
| 332 |
+
gr.Markdown(
|
| 333 |
+
"""
|
| 334 |
+
# ThinkSound\n
|
| 335 |
+
ThinkSound is a unified Any2Audio generation framework with flow matching guided by Chain-of-Thought (CoT) reasoning.
|
| 336 |
+
|
| 337 |
+
Upload video and caption (optional), and get video with audio!
|
| 338 |
+
|
| 339 |
+
[Project page is here](https://huggingface.co/spaces/FunAudioLLM/ThinkSound)
|
| 340 |
+
[Model weights is here](https://huggingface.co/liuhuadai/ThinkSound)
|
| 341 |
+
|
| 342 |
+
## Citation
|
| 343 |
+
|
| 344 |
+
If you find our work useful, please cite our paper:
|
| 345 |
+
|
| 346 |
+
```bibtex
|
| 347 |
+
@misc{liu2025thinksoundchainofthoughtreasoningmultimodal,
|
| 348 |
+
title={ThinkSound: Chain-of-Thought Reasoning in Multimodal Large Language Models for Audio Generation and Editing},
|
| 349 |
+
author={Huadai Liu and Jialei Wang and Kaicheng Luo and Wen Wang and Qian Chen and Zhou Zhao and Wei Xue},
|
| 350 |
+
year={2025},
|
| 351 |
+
eprint={2506.21448},
|
| 352 |
+
archivePrefix={arXiv},
|
| 353 |
+
primaryClass={eess.AS},
|
| 354 |
+
url={https://arxiv.org/abs/2506.21448},
|
| 355 |
+
}
|
| 356 |
+
```
|
| 357 |
+
"""
|
| 358 |
+
)
|
| 359 |
with gr.Row():
|
| 360 |
video_input = gr.Video(label="upload video")
|
| 361 |
caption_input = gr.Textbox(label="caption(optional)", placeholder="can be empty", lines=1)
|