Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,11 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
|
| 4 |
from kokoro import KPipeline
|
|
@@ -18,8 +25,8 @@ class GradioBackend:
|
|
| 18 |
def __call__(self, message: str = None, history: list[str] = None, state: dict = {}, mode: str = 'Real-Time Commentary', **kwargs):
|
| 19 |
return getattr(self.infer, self.mode2api[mode])(message=message, history=history, state=state, **kwargs)
|
| 20 |
|
| 21 |
-
gradio_backend = None
|
| 22 |
-
|
| 23 |
with gr.Blocks() as demo:
|
| 24 |
gr.Markdown("## LiveCC Conversation and Real-Time Commentary - Gradio Demo")
|
| 25 |
gr.Markdown("### [LiveCC: Learning Video LLM with Streaming Speech Transcription at Scale (CVPR 2025)](https://showlab.github.io/livecc/)")
|
|
@@ -40,7 +47,6 @@ with gr.Blocks() as demo:
|
|
| 40 |
visible=True,
|
| 41 |
sources=['upload'],
|
| 42 |
autoplay=True,
|
| 43 |
-
include_audio=False,
|
| 44 |
width=720,
|
| 45 |
height=480
|
| 46 |
)
|
|
@@ -57,34 +63,37 @@ with gr.Blocks() as demo:
|
|
| 57 |
with gr.Row():
|
| 58 |
gr_radio_mode = gr.Radio(label="Select Mode", choices=["Real-Time Commentary", "Conversation"], elem_id="gr_radio_mode", value='Real-Time Commentary', interactive=True)
|
| 59 |
|
| 60 |
-
@spaces.GPU
|
| 61 |
def gr_chatinterface_fn(message, history, state, video_path, mode):
|
| 62 |
global gradio_backend
|
| 63 |
-
yield '(initializing model, thanks for waiting...)'
|
| 64 |
if gradio_backend is None:
|
|
|
|
| 65 |
gradio_backend = GradioBackend()
|
|
|
|
| 66 |
state['video_path'] = video_path
|
| 67 |
-
yield '(finished initialization, responding...)'
|
| 68 |
if mode != 'Conversation':
|
| 69 |
yield 'waiting video input...'
|
| 70 |
-
response, state = gradio_backend(message=message, history=history, state=state, mode=mode)
|
| 71 |
-
yield response
|
| 72 |
|
| 73 |
def gr_chatinterface_chatbot_clear_fn():
|
| 74 |
return {}, {}, 0, 0
|
| 75 |
gr_chatinterface = gr.ChatInterface(
|
| 76 |
fn=gr_chatinterface_fn,
|
| 77 |
type="messages",
|
| 78 |
-
additional_inputs=[gr_state, gr_video, gr_radio_mode]
|
|
|
|
| 79 |
)
|
| 80 |
gr_chatinterface.chatbot.clear(fn=gr_chatinterface_chatbot_clear_fn, outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
|
| 81 |
gr_clean_button.click(fn=lambda :[[], *gr_chatinterface_chatbot_clear_fn()], outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
|
| 82 |
|
| 83 |
def gr_for_streaming(history: list[gr.ChatMessage], video_state: dict, state: dict, mode: str, static_trigger: int, dynamic_trigger: int):
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
|
|
|
|
|
|
| 88 |
state.update(video_state)
|
| 89 |
query, assistant_waiting_message = None, None
|
| 90 |
for message in history[::-1]:
|
|
@@ -100,7 +109,7 @@ with gr.Blocks() as demo:
|
|
| 100 |
elif message['content'] == GradioBackend.waiting_video_response:
|
| 101 |
assistant_waiting_message = message
|
| 102 |
|
| 103 |
-
for (start_timestamp, stop_timestamp), response, state in gradio_backend(query=query, state=state, mode=mode):
|
| 104 |
if start_timestamp >= 0:
|
| 105 |
response_with_timestamp = f'{start_timestamp:.1f}s-{stop_timestamp:.1f}s: {response}'
|
| 106 |
if assistant_waiting_message is None:
|
|
@@ -109,7 +118,10 @@ with gr.Blocks() as demo:
|
|
| 109 |
assistant_waiting_message['content'] = response_with_timestamp
|
| 110 |
assistant_waiting_message = None
|
| 111 |
yield history, state, dynamic_trigger
|
| 112 |
-
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
js_video_timestamp_fetcher = """
|
| 115 |
(state, video_state) => {
|
|
|
|
| 1 |
+
hf_spaces = False
|
| 2 |
+
js_monitor = False # if False, will not care about the actual video timestamp in front end. Suitable for enviroment with unsolvable latency (e.g. hf spaces)
|
| 3 |
+
if hf_spaces:
|
| 4 |
+
try:
|
| 5 |
+
import spaces
|
| 6 |
+
except Exception as e:
|
| 7 |
+
print(e)
|
| 8 |
+
import os
|
| 9 |
import gradio as gr
|
| 10 |
|
| 11 |
from kokoro import KPipeline
|
|
|
|
| 25 |
def __call__(self, message: str = None, history: list[str] = None, state: dict = {}, mode: str = 'Real-Time Commentary', **kwargs):
|
| 26 |
return getattr(self.infer, self.mode2api[mode])(message=message, history=history, state=state, **kwargs)
|
| 27 |
|
| 28 |
+
gradio_backend = None if hf_spaces else GradioBackend()
|
| 29 |
+
|
| 30 |
with gr.Blocks() as demo:
|
| 31 |
gr.Markdown("## LiveCC Conversation and Real-Time Commentary - Gradio Demo")
|
| 32 |
gr.Markdown("### [LiveCC: Learning Video LLM with Streaming Speech Transcription at Scale (CVPR 2025)](https://showlab.github.io/livecc/)")
|
|
|
|
| 47 |
visible=True,
|
| 48 |
sources=['upload'],
|
| 49 |
autoplay=True,
|
|
|
|
| 50 |
width=720,
|
| 51 |
height=480
|
| 52 |
)
|
|
|
|
| 63 |
with gr.Row():
|
| 64 |
gr_radio_mode = gr.Radio(label="Select Mode", choices=["Real-Time Commentary", "Conversation"], elem_id="gr_radio_mode", value='Real-Time Commentary', interactive=True)
|
| 65 |
|
| 66 |
+
# @spaces.GPU
|
| 67 |
def gr_chatinterface_fn(message, history, state, video_path, mode):
|
| 68 |
global gradio_backend
|
|
|
|
| 69 |
if gradio_backend is None:
|
| 70 |
+
yield '(ZeroGPU needs to initialize model under @spaces.GPU, thanks for waiting...)', state
|
| 71 |
gradio_backend = GradioBackend()
|
| 72 |
+
yield '(finished initialization, responding...)', state
|
| 73 |
state['video_path'] = video_path
|
|
|
|
| 74 |
if mode != 'Conversation':
|
| 75 |
yield 'waiting video input...'
|
| 76 |
+
response, state = gradio_backend(message=message, history=history, state=state, mode=mode, hf_spaces=hf_spaces)
|
| 77 |
+
yield response, state
|
| 78 |
|
| 79 |
def gr_chatinterface_chatbot_clear_fn():
|
| 80 |
return {}, {}, 0, 0
|
| 81 |
gr_chatinterface = gr.ChatInterface(
|
| 82 |
fn=gr_chatinterface_fn,
|
| 83 |
type="messages",
|
| 84 |
+
additional_inputs=[gr_state, gr_video, gr_radio_mode],
|
| 85 |
+
additional_outputs=[gr_state]
|
| 86 |
)
|
| 87 |
gr_chatinterface.chatbot.clear(fn=gr_chatinterface_chatbot_clear_fn, outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
|
| 88 |
gr_clean_button.click(fn=lambda :[[], *gr_chatinterface_chatbot_clear_fn()], outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
|
| 89 |
|
| 90 |
def gr_for_streaming(history: list[gr.ChatMessage], video_state: dict, state: dict, mode: str, static_trigger: int, dynamic_trigger: int):
|
| 91 |
+
if static_trigger == 0:
|
| 92 |
+
yield [], {}, dynamic_trigger
|
| 93 |
+
return
|
| 94 |
+
yield history + [gr.ChatMessage(role="assistant", content='Loading video... thanks for waiting...')], state, dynamic_trigger
|
| 95 |
+
if not js_monitor:
|
| 96 |
+
video_state['video_timestamp'] = 19260817 # 👓
|
| 97 |
state.update(video_state)
|
| 98 |
query, assistant_waiting_message = None, None
|
| 99 |
for message in history[::-1]:
|
|
|
|
| 109 |
elif message['content'] == GradioBackend.waiting_video_response:
|
| 110 |
assistant_waiting_message = message
|
| 111 |
|
| 112 |
+
for (start_timestamp, stop_timestamp), response, state in gradio_backend(query=query, state=state, mode=mode, hf_spaces=hf_spaces):
|
| 113 |
if start_timestamp >= 0:
|
| 114 |
response_with_timestamp = f'{start_timestamp:.1f}s-{stop_timestamp:.1f}s: {response}'
|
| 115 |
if assistant_waiting_message is None:
|
|
|
|
| 118 |
assistant_waiting_message['content'] = response_with_timestamp
|
| 119 |
assistant_waiting_message = None
|
| 120 |
yield history, state, dynamic_trigger
|
| 121 |
+
if js_monitor:
|
| 122 |
+
yield history, state, 1 - dynamic_trigger
|
| 123 |
+
else:
|
| 124 |
+
yield history, state, dynamic_trigger
|
| 125 |
|
| 126 |
js_video_timestamp_fetcher = """
|
| 127 |
(state, video_state) => {
|