Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,9 +6,9 @@ if hf_spaces:
|
|
| 6 |
except Exception as e:
|
| 7 |
print(e)
|
| 8 |
import os
|
|
|
|
| 9 |
import gradio as gr
|
| 10 |
|
| 11 |
-
from kokoro import KPipeline
|
| 12 |
from demo.infer import LiveCCDemoInfer
|
| 13 |
|
| 14 |
class GradioBackend:
|
|
@@ -20,7 +20,6 @@ class GradioBackend:
|
|
| 20 |
}
|
| 21 |
def __init__(self, model_path: str = 'chenjoya/LiveCC-7B-Instruct'):
|
| 22 |
self.infer = LiveCCDemoInfer(model_path)
|
| 23 |
-
self.audio_pipeline = KPipeline(lang_code='a')
|
| 24 |
|
| 25 |
def __call__(self, message: str = None, history: list[str] = None, state: dict = {}, mode: str = 'Real-Time Commentary', **kwargs):
|
| 26 |
return getattr(self.infer, self.mode2api[mode])(message=message, history=history, state=state, **kwargs)
|
|
@@ -33,7 +32,7 @@ with gr.Blocks() as demo:
|
|
| 33 |
gr.Markdown("1️⃣ Select Mode, Real-Time Commentary (LiveCC) or Conversation (Common QA)")
|
| 34 |
gr.Markdown("2️⃣🅰️ **Real-Time Commentary: Input a query (optional) -> Click or upload a video**.")
|
| 35 |
gr.Markdown("2️⃣🅱️ **Conversation: Click or upload a video -> Input a query**. But as the past_key_values support in ZeroGPU is not good, multi-turn conversation could be slower.")
|
| 36 |
-
gr.Markdown("*
|
| 37 |
gr_state = gr.State({}, render=False) # control all useful state, including kv cache
|
| 38 |
gr_video_state = gr.JSON({}, visible=False) # only record video state, belong to gr_state but lightweight
|
| 39 |
gr_static_trigger = gr.Number(value=0, visible=False) # control start streaming or stop
|
|
@@ -53,7 +52,10 @@ with gr.Blocks() as demo:
|
|
| 53 |
gr_examples = gr.Examples(
|
| 54 |
examples=[
|
| 55 |
'demo/sources/howto_fix_laptop_mute_1080p.mp4',
|
| 56 |
-
'demo/sources/writing_mute_1080p.mp4'
|
|
|
|
|
|
|
|
|
|
| 57 |
],
|
| 58 |
inputs=[gr_video],
|
| 59 |
)
|
|
@@ -76,22 +78,29 @@ with gr.Blocks() as demo:
|
|
| 76 |
response, state = gradio_backend(message=message, history=history, state=state, mode=mode, hf_spaces=hf_spaces)
|
| 77 |
yield response, state
|
| 78 |
|
| 79 |
-
def gr_chatinterface_chatbot_clear_fn():
|
| 80 |
-
return {}, {}, 0,
|
| 81 |
gr_chatinterface = gr.ChatInterface(
|
| 82 |
fn=gr_chatinterface_fn,
|
| 83 |
type="messages",
|
| 84 |
additional_inputs=[gr_state, gr_video, gr_radio_mode],
|
| 85 |
additional_outputs=[gr_state]
|
| 86 |
)
|
| 87 |
-
gr_chatinterface.chatbot.clear(fn=gr_chatinterface_chatbot_clear_fn, outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
|
| 88 |
-
gr_clean_button.click(fn=lambda :[[], *gr_chatinterface_chatbot_clear_fn()], outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
|
| 89 |
-
|
|
|
|
| 90 |
def gr_for_streaming(history: list[gr.ChatMessage], video_state: dict, state: dict, mode: str, static_trigger: int, dynamic_trigger: int):
|
| 91 |
if static_trigger == 0:
|
| 92 |
yield [], {}, dynamic_trigger
|
| 93 |
return
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
if not js_monitor:
|
| 96 |
video_state['video_timestamp'] = 19260817 # 👓
|
| 97 |
state.update(video_state)
|
|
@@ -141,6 +150,7 @@ with gr.Blocks() as demo:
|
|
| 141 |
inputs=[gr_radio_mode],
|
| 142 |
outputs=[gr_static_trigger, gr_dynamic_trigger]
|
| 143 |
)
|
|
|
|
| 144 |
gr_dynamic_trigger.change(
|
| 145 |
fn=gr_get_video_state,
|
| 146 |
inputs=[gr_video_state],
|
|
@@ -151,28 +161,6 @@ with gr.Blocks() as demo:
|
|
| 151 |
inputs=[gr_chatinterface.chatbot, gr_video_state, gr_state, gr_radio_mode, gr_static_trigger, gr_dynamic_trigger],
|
| 152 |
outputs=[gr_chatinterface.chatbot, gr_state, gr_dynamic_trigger],
|
| 153 |
)
|
| 154 |
-
|
| 155 |
demo.queue(max_size=5, default_concurrency_limit=5)
|
| 156 |
-
demo.launch(share=True)
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
# --- for streaming ---
|
| 160 |
-
|
| 161 |
-
# gr_tts = gr.Audio(visible=False, elem_id="gr_tts", streaming=True, autoplay=True)
|
| 162 |
-
# def tts():
|
| 163 |
-
# while True:
|
| 164 |
-
# contents = ''
|
| 165 |
-
# while not gradio_backend.contents.empty():
|
| 166 |
-
# content = gradio_backend.contents.get()
|
| 167 |
-
# contents += ' ' + content.rstrip(' ...')
|
| 168 |
-
# contents = contents.strip()
|
| 169 |
-
# if contents:
|
| 170 |
-
# generator = gradio_backend.audio_pipeline(contents, voice='af_heart', speed=1.2)
|
| 171 |
-
# for _, _, audio_torch in generator:
|
| 172 |
-
# audio_np = audio_torch.cpu().numpy()
|
| 173 |
-
# max_val = np.max(np.abs(audio_np))
|
| 174 |
-
# if max_val > 0:
|
| 175 |
-
# audio_np = audio_np / max_val
|
| 176 |
-
# audio_int16 = (audio_np * 32767).astype(np.int16)
|
| 177 |
-
# yield (24000, audio_int16)
|
| 178 |
-
# gr_video.change(fn=tts, outputs=[gr_tts])
|
|
|
|
| 6 |
except Exception as e:
|
| 7 |
print(e)
|
| 8 |
import os
|
| 9 |
+
import numpy as np
|
| 10 |
import gradio as gr
|
| 11 |
|
|
|
|
| 12 |
from demo.infer import LiveCCDemoInfer
|
| 13 |
|
| 14 |
class GradioBackend:
|
|
|
|
| 20 |
}
|
| 21 |
def __init__(self, model_path: str = 'chenjoya/LiveCC-7B-Instruct'):
|
| 22 |
self.infer = LiveCCDemoInfer(model_path)
|
|
|
|
| 23 |
|
| 24 |
def __call__(self, message: str = None, history: list[str] = None, state: dict = {}, mode: str = 'Real-Time Commentary', **kwargs):
|
| 25 |
return getattr(self.infer, self.mode2api[mode])(message=message, history=history, state=state, **kwargs)
|
|
|
|
| 32 |
gr.Markdown("1️⃣ Select Mode, Real-Time Commentary (LiveCC) or Conversation (Common QA)")
|
| 33 |
gr.Markdown("2️⃣🅰️ **Real-Time Commentary: Input a query (optional) -> Click or upload a video**.")
|
| 34 |
gr.Markdown("2️⃣🅱️ **Conversation: Click or upload a video -> Input a query**. But as the past_key_values support in ZeroGPU is not good, multi-turn conversation could be slower.")
|
| 35 |
+
gr.Markdown("*HF Space Gradio has unsolvable latency (10s~20s). If you want to enjoy the very real-time experience, please deploy locally https://github.com/showlab/livecc*")
|
| 36 |
gr_state = gr.State({}, render=False) # control all useful state, including kv cache
|
| 37 |
gr_video_state = gr.JSON({}, visible=False) # only record video state, belong to gr_state but lightweight
|
| 38 |
gr_static_trigger = gr.Number(value=0, visible=False) # control start streaming or stop
|
|
|
|
| 52 |
gr_examples = gr.Examples(
|
| 53 |
examples=[
|
| 54 |
'demo/sources/howto_fix_laptop_mute_1080p.mp4',
|
| 55 |
+
'demo/sources/writing_mute_1080p.mp4',
|
| 56 |
+
'demo/sources/spacex_falcon9_mute_1080p.mp4',
|
| 57 |
+
'demo/sources/warriors_vs_rockets_2025wcr1_mute_1080p.mp4',
|
| 58 |
+
'demo/sources/dota2_facelessvoid_mute_1080p.mp4'
|
| 59 |
],
|
| 60 |
inputs=[gr_video],
|
| 61 |
)
|
|
|
|
| 78 |
response, state = gradio_backend(message=message, history=history, state=state, mode=mode, hf_spaces=hf_spaces)
|
| 79 |
yield response, state
|
| 80 |
|
| 81 |
+
def gr_chatinterface_chatbot_clear_fn(gr_dynamic_trigger):
|
| 82 |
+
return {}, {}, 0, gr_dynamic_trigger
|
| 83 |
gr_chatinterface = gr.ChatInterface(
|
| 84 |
fn=gr_chatinterface_fn,
|
| 85 |
type="messages",
|
| 86 |
additional_inputs=[gr_state, gr_video, gr_radio_mode],
|
| 87 |
additional_outputs=[gr_state]
|
| 88 |
)
|
| 89 |
+
gr_chatinterface.chatbot.clear(fn=gr_chatinterface_chatbot_clear_fn, inputs=[gr_dynamic_trigger], outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
|
| 90 |
+
gr_clean_button.click(fn=lambda :[[], *gr_chatinterface_chatbot_clear_fn()], inputs=[gr_dynamic_trigger], outputs=[gr_video_state, gr_state, gr_static_trigger, gr_dynamic_trigger])
|
| 91 |
+
|
| 92 |
+
@spaces.GPU
|
| 93 |
def gr_for_streaming(history: list[gr.ChatMessage], video_state: dict, state: dict, mode: str, static_trigger: int, dynamic_trigger: int):
|
| 94 |
if static_trigger == 0:
|
| 95 |
yield [], {}, dynamic_trigger
|
| 96 |
return
|
| 97 |
+
global gradio_backend
|
| 98 |
+
if gradio_backend is None:
|
| 99 |
+
yield '(ZeroGPU needs to initialize model under @spaces.GPU, thanks for waiting...)', state
|
| 100 |
+
gradio_backend = GradioBackend()
|
| 101 |
+
yield '(finished initialization, responding...)', state
|
| 102 |
+
waiting_prompt = 'Loading video now... thanks for waiting...'
|
| 103 |
+
yield history + [gr.ChatMessage(role="assistant", content=waiting_prompt)], state, dynamic_trigger
|
| 104 |
if not js_monitor:
|
| 105 |
video_state['video_timestamp'] = 19260817 # 👓
|
| 106 |
state.update(video_state)
|
|
|
|
| 150 |
inputs=[gr_radio_mode],
|
| 151 |
outputs=[gr_static_trigger, gr_dynamic_trigger]
|
| 152 |
)
|
| 153 |
+
|
| 154 |
gr_dynamic_trigger.change(
|
| 155 |
fn=gr_get_video_state,
|
| 156 |
inputs=[gr_video_state],
|
|
|
|
| 161 |
inputs=[gr_chatinterface.chatbot, gr_video_state, gr_state, gr_radio_mode, gr_static_trigger, gr_dynamic_trigger],
|
| 162 |
outputs=[gr_chatinterface.chatbot, gr_state, gr_dynamic_trigger],
|
| 163 |
)
|
| 164 |
+
|
| 165 |
demo.queue(max_size=5, default_concurrency_limit=5)
|
| 166 |
+
demo.launch(share=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|