Fix plot
Browse files
app.py
CHANGED
|
@@ -15,14 +15,14 @@ from nncore.engine import load_checkpoint
|
|
| 15 |
from nncore.nn import build_model
|
| 16 |
|
| 17 |
TITLE = '🌀R2-Tuning: Efficient Image-to-Video Transfer Learning for Video Temporal Grounding' # noqa
|
| 18 |
-
DESCRIPTION = 'R2-Tuning is a parameter- and memory efficient transfer learning method for video temporal grounding. Please find more details in our <a href="https://arxiv.org/abs/2404.00801" target="_blank">Tech Report</a> and <a href="https://github.com/yeliudev/R2-Tuning" target="_blank">GitHub Repo</a>.\n\nUser Guide:\n1. Upload or record a video using web camera.\n2. Input a text query. A good practice is to
|
| 19 |
|
| 20 |
CONFIG = 'configs/qvhighlights/r2_tuning_qvhighlights.py'
|
| 21 |
WEIGHT = 'https://huggingface.co/yeliudev/R2-Tuning/resolve/main/checkpoints/r2_tuning_qvhighlights-ed516355.pth' # noqa
|
| 22 |
|
| 23 |
|
| 24 |
def convert_time(seconds):
|
| 25 |
-
minutes, seconds = divmod(round(seconds), 60)
|
| 26 |
return f'{minutes:02d}:{seconds:02d}'
|
| 27 |
|
| 28 |
|
|
@@ -59,7 +59,7 @@ def init_model(config, checkpoint):
|
|
| 59 |
return model, cfg
|
| 60 |
|
| 61 |
|
| 62 |
-
def main(video, query, model, cfg):
|
| 63 |
if len(query) == 0:
|
| 64 |
raise gr.Error('Text query can not be empty.')
|
| 65 |
|
|
@@ -82,23 +82,24 @@ def main(video, query, model, cfg):
|
|
| 82 |
hd = pred['_out']['saliency'].cpu()
|
| 83 |
hd = ((hd - hd.min()) / (hd.max() - hd.min())).tolist()
|
| 84 |
|
| 85 |
-
|
| 86 |
ax.plot(range(0, len(hd) * 2, 2), hd)
|
| 87 |
|
| 88 |
ax.set_xlabel('Time (s)', fontsize=15)
|
| 89 |
ax.set_ylabel('Saliency Score', fontsize=15)
|
| 90 |
|
| 91 |
ax.tick_params(labelsize=14)
|
| 92 |
-
plt.tight_layout(rect=(0.02, 0.02, 0.95, 0.885))
|
| 93 |
|
| 94 |
return mr, fig
|
| 95 |
|
| 96 |
|
| 97 |
model, cfg = init_model(CONFIG, WEIGHT)
|
| 98 |
-
|
|
|
|
|
|
|
| 99 |
|
| 100 |
demo = gr.Interface(
|
| 101 |
-
fn=main,
|
| 102 |
inputs=[gr.Video(label='Video'),
|
| 103 |
gr.Textbox(label='Text Query')],
|
| 104 |
outputs=[
|
|
|
|
| 15 |
from nncore.nn import build_model
|
| 16 |
|
| 17 |
TITLE = '🌀R2-Tuning: Efficient Image-to-Video Transfer Learning for Video Temporal Grounding' # noqa
|
| 18 |
+
DESCRIPTION = 'R2-Tuning is a parameter- and memory efficient transfer learning method for video temporal grounding. Please find more details in our <a href="https://arxiv.org/abs/2404.00801" target="_blank">Tech Report</a> and <a href="https://github.com/yeliudev/R2-Tuning" target="_blank">GitHub Repo</a>.\n\nUser Guide:\n1. Upload or record a video using web camera.\n2. Input a text query. A good practice is to write a sentence with 5~10 words.\n3. Click "submit" and you\'ll see the moment retrieval and highlight detection results on the right.' # noqa
|
| 19 |
|
| 20 |
CONFIG = 'configs/qvhighlights/r2_tuning_qvhighlights.py'
|
| 21 |
WEIGHT = 'https://huggingface.co/yeliudev/R2-Tuning/resolve/main/checkpoints/r2_tuning_qvhighlights-ed516355.pth' # noqa
|
| 22 |
|
| 23 |
|
| 24 |
def convert_time(seconds):
|
| 25 |
+
minutes, seconds = divmod(round(max(seconds, 0)), 60)
|
| 26 |
return f'{minutes:02d}:{seconds:02d}'
|
| 27 |
|
| 28 |
|
|
|
|
| 59 |
return model, cfg
|
| 60 |
|
| 61 |
|
| 62 |
+
def main(video, query, model, cfg, fig, ax):
|
| 63 |
if len(query) == 0:
|
| 64 |
raise gr.Error('Text query can not be empty.')
|
| 65 |
|
|
|
|
| 82 |
hd = pred['_out']['saliency'].cpu()
|
| 83 |
hd = ((hd - hd.min()) / (hd.max() - hd.min())).tolist()
|
| 84 |
|
| 85 |
+
ax.cla()
|
| 86 |
ax.plot(range(0, len(hd) * 2, 2), hd)
|
| 87 |
|
| 88 |
ax.set_xlabel('Time (s)', fontsize=15)
|
| 89 |
ax.set_ylabel('Saliency Score', fontsize=15)
|
| 90 |
|
| 91 |
ax.tick_params(labelsize=14)
|
|
|
|
| 92 |
|
| 93 |
return mr, fig
|
| 94 |
|
| 95 |
|
| 96 |
model, cfg = init_model(CONFIG, WEIGHT)
|
| 97 |
+
|
| 98 |
+
plt.tight_layout(rect=(0.02, 0.02, 0.95, 0.885))
|
| 99 |
+
fig, ax = plt.subplots(figsize=(10, 5.5))
|
| 100 |
|
| 101 |
demo = gr.Interface(
|
| 102 |
+
fn=partial(main, model=model, cfg=cfg, fig=fig, ax=ax),
|
| 103 |
inputs=[gr.Video(label='Video'),
|
| 104 |
gr.Textbox(label='Text Query')],
|
| 105 |
outputs=[
|