Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -48,11 +48,14 @@ BADGE = """
|
|
| 48 |
<a href="https://github.com/yeliudev/VideoMind/blob/main/README.md" target="_blank">
|
| 49 |
<img src="https://img.shields.io/badge/License-BSD--3--Clause-purple">
|
| 50 |
</a>
|
|
|
|
|
|
|
|
|
|
| 51 |
</div>
|
| 52 |
"""
|
| 53 |
|
| 54 |
LOGO = '<p align="center"><img width="350" src="https://raw.githubusercontent.com/yeliudev/VideoMind/refs/heads/main/.github/logo.png"></p>'
|
| 55 |
-
DISC = '**VideoMind** is a multi-modal agent framework that enhances video reasoning by emulating *human-like* processes, such as *breaking down tasks*, *localizing and verifying moments*, and *synthesizing answers*.
|
| 56 |
|
| 57 |
# yapf:disable
|
| 58 |
EXAMPLES = [
|
|
@@ -562,7 +565,7 @@ def main(video, prompt, role, temperature, max_new_tokens):
|
|
| 562 |
def build_demo():
|
| 563 |
chat = gr.Chatbot(
|
| 564 |
type='messages',
|
| 565 |
-
height='
|
| 566 |
avatar_images=[f'{PATH}/assets/user.png', f'{PATH}/assets/bot.png'],
|
| 567 |
placeholder='A conversation with VideoMind',
|
| 568 |
label='VideoMind')
|
|
@@ -570,7 +573,7 @@ def build_demo():
|
|
| 570 |
prompt = gr.Textbox(label='Text Prompt', placeholder='Ask a question about the video...')
|
| 571 |
|
| 572 |
with gr.Blocks(title=TITLE) as demo:
|
| 573 |
-
gr.
|
| 574 |
gr.HTML(BADGE)
|
| 575 |
gr.Markdown(DISC)
|
| 576 |
|
|
@@ -606,11 +609,7 @@ def build_demo():
|
|
| 606 |
label='Max Output Tokens',
|
| 607 |
info='The maximum number of output tokens for each role (Default: 256)')
|
| 608 |
|
| 609 |
-
|
| 610 |
-
prompt.render()
|
| 611 |
-
|
| 612 |
-
with gr.Accordion(label='Examples', open=False):
|
| 613 |
-
gr.Examples(examples=EXAMPLES, inputs=[video, prompt, role], examples_per_page=3)
|
| 614 |
|
| 615 |
with gr.Row():
|
| 616 |
random_btn = gr.Button(value='🔮 Random')
|
|
@@ -624,7 +623,7 @@ def build_demo():
|
|
| 624 |
submit_ctx = submit_ctx.then(main, [video, prompt, role, temperature, max_new_tokens], chat)
|
| 625 |
submit_ctx.then(enable_btns, None, [random_btn, reset_btn, submit_btn])
|
| 626 |
|
| 627 |
-
gr.
|
| 628 |
|
| 629 |
with gr.Column(scale=5):
|
| 630 |
chat.render()
|
|
|
|
| 48 |
<a href="https://github.com/yeliudev/VideoMind/blob/main/README.md" target="_blank">
|
| 49 |
<img src="https://img.shields.io/badge/License-BSD--3--Clause-purple">
|
| 50 |
</a>
|
| 51 |
+
<a href="https://github.com/yeliudev/VideoMind" target="_blank">
|
| 52 |
+
<img src="https://img.shields.io/github/stars/yeliudev/VideoMind">
|
| 53 |
+
</a>
|
| 54 |
</div>
|
| 55 |
"""
|
| 56 |
|
| 57 |
LOGO = '<p align="center"><img width="350" src="https://raw.githubusercontent.com/yeliudev/VideoMind/refs/heads/main/.github/logo.png"></p>'
|
| 58 |
+
DISC = '**VideoMind** is a multi-modal agent framework that enhances video reasoning by emulating *human-like* processes, such as *breaking down tasks*, *localizing and verifying moments*, and *synthesizing answers*. This demo showcases how VideoMind-2B handles video-language tasks. Please open an <a href="https://github.com/yeliudev/VideoMind/issues/new" target="_blank">issue</a> if you meet any problems.' # noqa
|
| 59 |
|
| 60 |
# yapf:disable
|
| 61 |
EXAMPLES = [
|
|
|
|
| 565 |
def build_demo():
|
| 566 |
chat = gr.Chatbot(
|
| 567 |
type='messages',
|
| 568 |
+
height='70em',
|
| 569 |
avatar_images=[f'{PATH}/assets/user.png', f'{PATH}/assets/bot.png'],
|
| 570 |
placeholder='A conversation with VideoMind',
|
| 571 |
label='VideoMind')
|
|
|
|
| 573 |
prompt = gr.Textbox(label='Text Prompt', placeholder='Ask a question about the video...')
|
| 574 |
|
| 575 |
with gr.Blocks(title=TITLE) as demo:
|
| 576 |
+
gr.HTML(LOGO)
|
| 577 |
gr.HTML(BADGE)
|
| 578 |
gr.Markdown(DISC)
|
| 579 |
|
|
|
|
| 609 |
label='Max Output Tokens',
|
| 610 |
info='The maximum number of output tokens for each role (Default: 256)')
|
| 611 |
|
| 612 |
+
prompt.render()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 613 |
|
| 614 |
with gr.Row():
|
| 615 |
random_btn = gr.Button(value='🔮 Random')
|
|
|
|
| 623 |
submit_ctx = submit_ctx.then(main, [video, prompt, role, temperature, max_new_tokens], chat)
|
| 624 |
submit_ctx.then(enable_btns, None, [random_btn, reset_btn, submit_btn])
|
| 625 |
|
| 626 |
+
gr.Examples(examples=EXAMPLES, inputs=[video, prompt, role], examples_per_page=3)
|
| 627 |
|
| 628 |
with gr.Column(scale=5):
|
| 629 |
chat.render()
|