Spaces:
Build error
Build error
WIP: Making the demo work on hf spaces
Browse files
app.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
| 1 |
"""Streamlit demo to visualize auto-annotated Foley segments from movie clips."""
|
| 2 |
import os
|
| 3 |
-
from os.path import join, exists, dirname, abspath
|
| 4 |
import json
|
|
|
|
| 5 |
|
| 6 |
from tqdm import tqdm
|
| 7 |
import numpy as np
|
|
@@ -147,78 +148,72 @@ if __name__ == "__main__":
|
|
| 147 |
"**Instructions**: Click the **Reload** button to see segments from a new clip. "\
|
| 148 |
"Reloading the page is not necessary."
|
| 149 |
)
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
|
|
|
|
|
|
|
|
|
| 168 |
|
| 169 |
|
| 170 |
reload_button = st.button("Reload")
|
| 171 |
-
|
| 172 |
-
index = np.random.randint(0, len(st.session_state.subdf))
|
| 173 |
if reload_button:
|
| 174 |
-
index = np.random.randint(0, len(st.session_state.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
|
| 176 |
-
row = st.session_state.subdf.iloc[index].to_dict()
|
| 177 |
-
if use_local:
|
| 178 |
-
clip_paths, labels, segments, durations = process_sample(row)
|
| 179 |
-
else:
|
| 180 |
-
annot = load_json(row["annot_filtered"])
|
| 181 |
-
seg_indices = [i for i, flag in enumerate(annot["keep_status"]) if flag]
|
| 182 |
-
keys = ["non_speech_segments", "silence_prob", "audiomae_on_audioset", "duration"]
|
| 183 |
-
for k in keys:
|
| 184 |
-
annot[k] = [x for i, x in enumerate(annot[k]) if i in seg_indices]
|
| 185 |
-
del annot["keep_status"]
|
| 186 |
-
labels = [
|
| 187 |
-
summarize_classification_probs(
|
| 188 |
-
annot["silence_prob"][i], annot["audiomae_on_audioset"][i]
|
| 189 |
-
) for i in range(len(annot["non_speech_segments"]))
|
| 190 |
-
]
|
| 191 |
-
segments, durations = annot["non_speech_segments"], annot["duration"]
|
| 192 |
-
clip_paths = [f"https://www.youtube.com/watch?v={row['videoid']}"] * len(segments)
|
| 193 |
-
|
| 194 |
-
# Make a grid of videos and captions in streamlit
|
| 195 |
-
videos = clip_paths
|
| 196 |
-
video_id = row["videoid"]
|
| 197 |
-
movie = row["title"]
|
| 198 |
st.markdown(f"Showing Foley segments from a clip in movie: **{movie}**")
|
| 199 |
|
| 200 |
# Create a grid of videos
|
| 201 |
grid = make_grid(3, 3)
|
| 202 |
|
| 203 |
# Add videos to the grid
|
| 204 |
-
for idx in range(0, min(len(
|
| 205 |
i, j = idx // 3, idx % 3
|
| 206 |
|
| 207 |
start, end = segments[idx]
|
| 208 |
duration = durations[idx]
|
| 209 |
|
| 210 |
grid[i][j].caption(f"Segment duration: {duration}")
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
grid[i][j].markdown(html_code, unsafe_allow_html=True)
|
| 217 |
-
else:
|
| 218 |
-
grid[i][j].video(videos[idx])
|
| 219 |
grid[i][j].caption(f"{labels[idx]}")
|
| 220 |
-
|
| 221 |
|
| 222 |
st.markdown("##### Some stats")
|
| 223 |
-
st.write(f"Total number of unique clips: {len(st.session_state.
|
| 224 |
-
st.write(
|
|
|
|
| 1 |
"""Streamlit demo to visualize auto-annotated Foley segments from movie clips."""
|
| 2 |
import os
|
| 3 |
+
from os.path import join, exists, dirname, abspath, basename
|
| 4 |
import json
|
| 5 |
+
from glob import glob
|
| 6 |
|
| 7 |
from tqdm import tqdm
|
| 8 |
import numpy as np
|
|
|
|
| 148 |
"**Instructions**: Click the **Reload** button to see segments from a new clip. "\
|
| 149 |
"Reloading the page is not necessary."
|
| 150 |
)
|
| 151 |
+
|
| 152 |
+
csv_path = "./clips.csv"
|
| 153 |
+
ann_dirs = glob(join(".", "annotations_", "*"))
|
| 154 |
+
annot_paths = glob(join(".", "annotations_*", "*_filtered.json"))
|
| 155 |
+
print("Total number of clips: {}".format(len(annot_paths)))
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
if "data" not in st.session_state:
|
| 159 |
+
# store video ids
|
| 160 |
+
video_ids = [basename(x).split("_filtered.json")[0] for x in annot_paths]
|
| 161 |
+
|
| 162 |
+
# load annotation data
|
| 163 |
+
data = [load_json(p) for p in annot_paths]
|
| 164 |
+
num_foley_per_clip = [sum(d["keep_status"]) for d in data]
|
| 165 |
+
num_foley_segments = np.sum(num_foley_per_clip)
|
| 166 |
+
data = [d for d, n in zip(data, num_foley_per_clip) if n > 0]
|
| 167 |
+
|
| 168 |
+
# store variables
|
| 169 |
+
st.session_state.video_ids = video_ids
|
| 170 |
+
st.session_state.data = data
|
| 171 |
+
st.session_state.num_foley_segments = num_foley_segments
|
| 172 |
|
| 173 |
|
| 174 |
reload_button = st.button("Reload")
|
| 175 |
+
index = np.random.randint(0, len(st.session_state.data))
|
|
|
|
| 176 |
if reload_button:
|
| 177 |
+
index = np.random.randint(0, len(st.session_state.data))
|
| 178 |
+
|
| 179 |
+
# Gather data
|
| 180 |
+
annot = st.session_state.data[index]
|
| 181 |
+
video_id = st.session_state.video_ids[index]
|
| 182 |
+
seg_indices = [i for i, flag in enumerate(annot["keep_status"]) if flag]
|
| 183 |
+
keys = ["non_speech_segments", "silence_prob", "audiomae_on_audioset", "duration"]
|
| 184 |
+
for k in keys:
|
| 185 |
+
annot[k] = [x for i, x in enumerate(annot[k]) if i in seg_indices]
|
| 186 |
+
del annot["keep_status"]
|
| 187 |
+
labels = [
|
| 188 |
+
summarize_classification_probs(
|
| 189 |
+
annot["silence_prob"][i], annot["audiomae_on_audioset"][i]
|
| 190 |
+
) for i in range(len(annot["non_speech_segments"]))
|
| 191 |
+
]
|
| 192 |
+
segments, durations = annot["non_speech_segments"], annot["duration"]
|
| 193 |
+
movie = annot["title"]
|
| 194 |
+
|
| 195 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
st.markdown(f"Showing Foley segments from a clip in movie: **{movie}**")
|
| 197 |
|
| 198 |
# Create a grid of videos
|
| 199 |
grid = make_grid(3, 3)
|
| 200 |
|
| 201 |
# Add videos to the grid
|
| 202 |
+
for idx in range(0, min(len(segments), 9)):
|
| 203 |
i, j = idx // 3, idx % 3
|
| 204 |
|
| 205 |
start, end = segments[idx]
|
| 206 |
duration = durations[idx]
|
| 207 |
|
| 208 |
grid[i][j].caption(f"Segment duration: {duration}")
|
| 209 |
+
url = f"https://www.youtube.com/embed/{video_id}?start={int(start)}&end={int(end)}"
|
| 210 |
+
html_code = f"""
|
| 211 |
+
<iframe height="320" width="420" src="{url}" frameborder="0" allowfullscreen></iframe>
|
| 212 |
+
"""
|
| 213 |
+
grid[i][j].markdown(html_code, unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
| 214 |
grid[i][j].caption(f"{labels[idx]}")
|
| 215 |
+
|
| 216 |
|
| 217 |
st.markdown("##### Some stats")
|
| 218 |
+
st.write(f"Total number of unique clips: {len(st.session_state.data)}")
|
| 219 |
+
st.write("Total number of foley segments: {}".format(st.session_state.num_foley_segments))
|