Spaces:
Runtime error
Runtime error
Vincent Claes
commited on
Commit
·
5c823d7
1
Parent(s):
a861406
add examples
Browse files
app.py
CHANGED
|
@@ -21,6 +21,30 @@ examples = [
|
|
| 21 |
"movies/bathroom.mp4",
|
| 22 |
ROOMS,
|
| 23 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
]
|
| 25 |
|
| 26 |
|
|
@@ -45,28 +69,12 @@ def get_num_total_frames(file_path: str):
|
|
| 45 |
return len(videoreader)
|
| 46 |
|
| 47 |
|
| 48 |
-
# def convert_frames_to_gif(frames, save_path: str = "frames.gif"):
|
| 49 |
-
# converted_frames = frames.astype(np.uint8)
|
| 50 |
-
# Path(save_path).parent.mkdir(parents=True, exist_ok=True)
|
| 51 |
-
# imageio.mimsave(save_path, converted_frames, fps=8)
|
| 52 |
-
# return save_path
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
# def create_gif_from_video_file(
|
| 56 |
-
# file_path: str,
|
| 57 |
-
# num_frames: int = 16,
|
| 58 |
-
# frame_sampling_rate: int = 1,
|
| 59 |
-
# save_path: str = "frames.gif",
|
| 60 |
-
# ):
|
| 61 |
-
# frames = sample_frames_from_video_file(file_path, num_frames, frame_sampling_rate)
|
| 62 |
-
# return convert_frames_to_gif(frames, save_path)
|
| 63 |
-
|
| 64 |
-
|
| 65 |
def select_model(model_name):
|
| 66 |
global processor, model
|
| 67 |
processor = AutoProcessor.from_pretrained(model_name)
|
| 68 |
model = AutoModel.from_pretrained(model_name)
|
| 69 |
|
|
|
|
| 70 |
def get_frame_sampling_rate(video_path, num_model_input_frames):
|
| 71 |
# rearrange sampling rate based on video length and model input length
|
| 72 |
num_total_frames = get_num_total_frames(video_path)
|
|
@@ -76,6 +84,7 @@ def get_frame_sampling_rate(video_path, num_model_input_frames):
|
|
| 76 |
frame_sampling_rate = FRAME_SAMPLING_RATE
|
| 77 |
return frame_sampling_rate
|
| 78 |
|
|
|
|
| 79 |
def predict(video_path, labels_text):
|
| 80 |
labels = labels_text.split(",")
|
| 81 |
num_model_input_frames = model.config.vision_config.num_frames
|
|
@@ -83,8 +92,6 @@ def predict(video_path, labels_text):
|
|
| 83 |
frames = sample_frames_from_video_file(
|
| 84 |
video_path, num_model_input_frames, frame_sampling_rate
|
| 85 |
)
|
| 86 |
-
# gif_path = convert_frames_to_gif(frames, save_path="video.gif")
|
| 87 |
-
|
| 88 |
inputs = processor(
|
| 89 |
text=labels, videos=list(frames), return_tensors="pt", padding=True
|
| 90 |
)
|
|
@@ -103,49 +110,33 @@ def predict(video_path, labels_text):
|
|
| 103 |
|
| 104 |
app = gr.Blocks()
|
| 105 |
with app:
|
|
|
|
| 106 |
gr.Markdown(
|
| 107 |
-
"
|
| 108 |
-
)
|
| 109 |
-
gr.Markdown(
|
| 110 |
-
"### **<p align='center'>Upload a video of a room and provide a list of type of rooms the model should select from.</p>**"
|
| 111 |
-
|
| 112 |
)
|
| 113 |
|
| 114 |
with gr.Row():
|
| 115 |
with gr.Column():
|
| 116 |
video_file = gr.Video(label="Video File:", show_label=True)
|
| 117 |
-
local_video_labels_text = gr.Textbox(
|
| 118 |
-
label="Labels Text:", show_label=True
|
| 119 |
-
)
|
| 120 |
submit_button = gr.Button(value="Predict")
|
| 121 |
-
# with gr.Column():
|
| 122 |
-
# video_gif = gr.Image(
|
| 123 |
-
# label="Input Clip",
|
| 124 |
-
# show_label=True,
|
| 125 |
-
# )
|
| 126 |
with gr.Column():
|
| 127 |
predictions = gr.Label(label="Predictions:", show_label=True)
|
| 128 |
|
| 129 |
gr.Markdown("**Examples:**")
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
submit_button.click(
|
| 139 |
predict,
|
| 140 |
inputs=[video_file, local_video_labels_text],
|
| 141 |
-
# outputs=[predictions, video_gif],
|
| 142 |
outputs=predictions,
|
| 143 |
)
|
| 144 |
-
|
| 145 |
-
# """
|
| 146 |
-
# \n Created by: Vincent Claes, <a href=\"https://www.meet-drift.ai/\">Drift</a>.
|
| 147 |
-
# \n Inspired by: <a href=\"https://huggingface.co/spaces/fcakyon/zero-shot-video-classification\">fcakyon</a>.
|
| 148 |
-
# """
|
| 149 |
-
# )
|
| 150 |
|
| 151 |
app.launch()
|
|
|
|
| 21 |
"movies/bathroom.mp4",
|
| 22 |
ROOMS,
|
| 23 |
],
|
| 24 |
+
[
|
| 25 |
+
"movies/bedroom.mp4",
|
| 26 |
+
ROOMS,
|
| 27 |
+
],
|
| 28 |
+
[
|
| 29 |
+
"movies/dressing.mp4",
|
| 30 |
+
ROOMS,
|
| 31 |
+
],
|
| 32 |
+
[
|
| 33 |
+
"movies/home-office.mp4",
|
| 34 |
+
ROOMS,
|
| 35 |
+
],
|
| 36 |
+
[
|
| 37 |
+
"movies/kitchen.mp4",
|
| 38 |
+
ROOMS,
|
| 39 |
+
],
|
| 40 |
+
[
|
| 41 |
+
"movies/living-room.mp4",
|
| 42 |
+
ROOMS,
|
| 43 |
+
],
|
| 44 |
+
[
|
| 45 |
+
"movies/toilet.mp4",
|
| 46 |
+
ROOMS,
|
| 47 |
+
],
|
| 48 |
]
|
| 49 |
|
| 50 |
|
|
|
|
| 69 |
return len(videoreader)
|
| 70 |
|
| 71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
def select_model(model_name):
|
| 73 |
global processor, model
|
| 74 |
processor = AutoProcessor.from_pretrained(model_name)
|
| 75 |
model = AutoModel.from_pretrained(model_name)
|
| 76 |
|
| 77 |
+
|
| 78 |
def get_frame_sampling_rate(video_path, num_model_input_frames):
|
| 79 |
# rearrange sampling rate based on video length and model input length
|
| 80 |
num_total_frames = get_num_total_frames(video_path)
|
|
|
|
| 84 |
frame_sampling_rate = FRAME_SAMPLING_RATE
|
| 85 |
return frame_sampling_rate
|
| 86 |
|
| 87 |
+
|
| 88 |
def predict(video_path, labels_text):
|
| 89 |
labels = labels_text.split(",")
|
| 90 |
num_model_input_frames = model.config.vision_config.num_frames
|
|
|
|
| 92 |
frames = sample_frames_from_video_file(
|
| 93 |
video_path, num_model_input_frames, frame_sampling_rate
|
| 94 |
)
|
|
|
|
|
|
|
| 95 |
inputs = processor(
|
| 96 |
text=labels, videos=list(frames), return_tensors="pt", padding=True
|
| 97 |
)
|
|
|
|
| 110 |
|
| 111 |
app = gr.Blocks()
|
| 112 |
with app:
|
| 113 |
+
gr.Markdown("# **<p align='center'>Classification of Rooms</p>**")
|
| 114 |
gr.Markdown(
|
| 115 |
+
"#### **<p align='center'>Upload a video (mp4) of a room and provide a list of type of rooms the model should select from.</p>**"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
)
|
| 117 |
|
| 118 |
with gr.Row():
|
| 119 |
with gr.Column():
|
| 120 |
video_file = gr.Video(label="Video File:", show_label=True)
|
| 121 |
+
local_video_labels_text = gr.Textbox(label="Labels Text:", show_label=True)
|
|
|
|
|
|
|
| 122 |
submit_button = gr.Button(value="Predict")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
with gr.Column():
|
| 124 |
predictions = gr.Label(label="Predictions:", show_label=True)
|
| 125 |
|
| 126 |
gr.Markdown("**Examples:**")
|
| 127 |
+
gr.Examples(
|
| 128 |
+
examples,
|
| 129 |
+
[video_file, local_video_labels_text],
|
| 130 |
+
predictions,
|
| 131 |
+
fn=predict,
|
| 132 |
+
cache_examples=True,
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
submit_button.click(
|
| 136 |
predict,
|
| 137 |
inputs=[video_file, local_video_labels_text],
|
|
|
|
| 138 |
outputs=predictions,
|
| 139 |
)
|
| 140 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
|
| 142 |
app.launch()
|