Spaces:
Build error
Build error
Iskaj
commited on
Commit
·
3b3290d
1
Parent(s):
7f2c8f8
added comments, added data aggregation for decision making
Browse files- Matching Exploration.ipynb +0 -0
- app.py +51 -17
Matching Exploration.ipynb
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
app.py
CHANGED
|
@@ -4,15 +4,20 @@ import logging
|
|
| 4 |
import os
|
| 5 |
import hashlib
|
| 6 |
import datetime
|
|
|
|
| 7 |
|
| 8 |
import pandas
|
| 9 |
import gradio as gr
|
| 10 |
from moviepy.editor import VideoFileClip
|
| 11 |
|
|
|
|
|
|
|
|
|
|
| 12 |
import imagehash
|
| 13 |
from PIL import Image
|
| 14 |
|
| 15 |
import numpy as np
|
|
|
|
| 16 |
import faiss
|
| 17 |
|
| 18 |
FPS = 5
|
|
@@ -26,6 +31,8 @@ def download_video_from_url(url):
|
|
| 26 |
with (urllib.request.urlopen(url)) as f, open(filename, 'wb') as fileout:
|
| 27 |
fileout.write(f.read())
|
| 28 |
logging.info(f"Downloaded video from {url} to {filename}.")
|
|
|
|
|
|
|
| 29 |
return filename
|
| 30 |
|
| 31 |
def change_ffmpeg_fps(clip, fps=FPS):
|
|
@@ -51,13 +58,19 @@ def binary_array_to_uint8s(arr):
|
|
| 51 |
|
| 52 |
def compute_hashes(clip, fps=FPS):
|
| 53 |
for index, frame in enumerate(change_ffmpeg_fps(clip, fps).iter_frames()):
|
|
|
|
|
|
|
|
|
|
| 54 |
hashed = np.array(binary_array_to_uint8s(compute_hash(frame).hash), dtype='uint8')
|
| 55 |
yield {"frame": 1+index*fps, "hash": hashed}
|
| 56 |
|
| 57 |
def index_hashes_for_video(url):
|
| 58 |
filename = download_video_from_url(url)
|
| 59 |
if os.path.exists(f'{filename}.index'):
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
hash_vectors = np.array([x['hash'] for x in compute_hashes(VideoFileClip(filename))])
|
| 63 |
logging.info(f"Computed hashes for {hash_vectors.shape} frames.")
|
|
@@ -87,33 +100,54 @@ def compare_videos(url, target, MIN_DISTANCE = 3):
|
|
| 87 |
"""
|
| 88 |
# TODO: Fix crash if no matches are found
|
| 89 |
|
|
|
|
| 90 |
video_index = index_hashes_for_video(url)
|
| 91 |
-
target_indices = [index_hashes_for_video(x) for x in [target]]
|
| 92 |
-
|
| 93 |
video_index.make_direct_map() # Make sure the index is indexable
|
| 94 |
hash_vectors = np.array([video_index.reconstruct(i) for i in range(video_index.ntotal)]) # Retrieve original indices
|
| 95 |
|
|
|
|
|
|
|
|
|
|
| 96 |
# The results are returned as a triplet of 1D arrays
|
| 97 |
# lims, D, I, where result for query i is in I[lims[i]:lims[i+1]]
|
| 98 |
# (indices of neighbors), D[lims[i]:lims[i+1]] (distances).
|
| 99 |
-
|
| 100 |
lims, D, I = target_indices[0].range_search(hash_vectors, MIN_DISTANCE)
|
| 101 |
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
x = [(lims[i+1]-lims[i]) * [i] for i in range(hash_vectors.shape[0])]
|
| 105 |
-
x = [
|
| 106 |
-
y = [
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
plt.
|
| 116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
video_urls = ["https://www.dropbox.com/s/8c89a9aba0w8gjg/Ploumen.mp4?dl=1",
|
| 119 |
"https://www.dropbox.com/s/rzmicviu1fe740t/Bram%20van%20Ojik%20krijgt%20reprimande.mp4?dl=1",
|
|
|
|
| 4 |
import os
|
| 5 |
import hashlib
|
| 6 |
import datetime
|
| 7 |
+
import time
|
| 8 |
|
| 9 |
import pandas
|
| 10 |
import gradio as gr
|
| 11 |
from moviepy.editor import VideoFileClip
|
| 12 |
|
| 13 |
+
import seaborn as sns
|
| 14 |
+
import matplotlib.pyplot as plt
|
| 15 |
+
|
| 16 |
import imagehash
|
| 17 |
from PIL import Image
|
| 18 |
|
| 19 |
import numpy as np
|
| 20 |
+
import pandas as pd
|
| 21 |
import faiss
|
| 22 |
|
| 23 |
FPS = 5
|
|
|
|
| 31 |
with (urllib.request.urlopen(url)) as f, open(filename, 'wb') as fileout:
|
| 32 |
fileout.write(f.read())
|
| 33 |
logging.info(f"Downloaded video from {url} to {filename}.")
|
| 34 |
+
else:
|
| 35 |
+
logging.info(f"Skipping downloading from {url} because {filename} already exists.")
|
| 36 |
return filename
|
| 37 |
|
| 38 |
def change_ffmpeg_fps(clip, fps=FPS):
|
|
|
|
| 58 |
|
| 59 |
def compute_hashes(clip, fps=FPS):
|
| 60 |
for index, frame in enumerate(change_ffmpeg_fps(clip, fps).iter_frames()):
|
| 61 |
+
# Each frame is a triplet of size (height, width, 3) of the video since it is RGB
|
| 62 |
+
# The hash itself is of size (hash_size, hash_size)
|
| 63 |
+
# The uint8 version of the hash is of size (hash_size * highfreq_factor,) and represents the hash
|
| 64 |
hashed = np.array(binary_array_to_uint8s(compute_hash(frame).hash), dtype='uint8')
|
| 65 |
yield {"frame": 1+index*fps, "hash": hashed}
|
| 66 |
|
| 67 |
def index_hashes_for_video(url):
|
| 68 |
filename = download_video_from_url(url)
|
| 69 |
if os.path.exists(f'{filename}.index'):
|
| 70 |
+
logging.info(f"Loading indexed hashes from {filename}.index")
|
| 71 |
+
binary_index = faiss.read_index_binary(f'{filename}.index')
|
| 72 |
+
logging.info(f"Index {filename}.index has in total {binary_index.ntotal} frames")
|
| 73 |
+
return binary_index
|
| 74 |
|
| 75 |
hash_vectors = np.array([x['hash'] for x in compute_hashes(VideoFileClip(filename))])
|
| 76 |
logging.info(f"Computed hashes for {hash_vectors.shape} frames.")
|
|
|
|
| 100 |
"""
|
| 101 |
# TODO: Fix crash if no matches are found
|
| 102 |
|
| 103 |
+
# Url (short video)
|
| 104 |
video_index = index_hashes_for_video(url)
|
|
|
|
|
|
|
| 105 |
video_index.make_direct_map() # Make sure the index is indexable
|
| 106 |
hash_vectors = np.array([video_index.reconstruct(i) for i in range(video_index.ntotal)]) # Retrieve original indices
|
| 107 |
|
| 108 |
+
# Target video (long video)
|
| 109 |
+
target_indices = [index_hashes_for_video(x) for x in [target]]
|
| 110 |
+
|
| 111 |
# The results are returned as a triplet of 1D arrays
|
| 112 |
# lims, D, I, where result for query i is in I[lims[i]:lims[i+1]]
|
| 113 |
# (indices of neighbors), D[lims[i]:lims[i+1]] (distances).
|
|
|
|
| 114 |
lims, D, I = target_indices[0].range_search(hash_vectors, MIN_DISTANCE)
|
| 115 |
|
| 116 |
+
return plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = MIN_DISTANCE)
|
| 117 |
+
|
| 118 |
+
def plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = 3):
|
| 119 |
+
sns.set_theme()
|
| 120 |
|
| 121 |
x = [(lims[i+1]-lims[i]) * [i] for i in range(hash_vectors.shape[0])]
|
| 122 |
+
x = [i/FPS for j in x for i in j]
|
| 123 |
+
y = [i/FPS for i in I]
|
| 124 |
+
|
| 125 |
+
# Create figure and dataframe to plot with sns
|
| 126 |
+
fig = plt.figure()
|
| 127 |
+
# plt.tight_layout()
|
| 128 |
+
df = pd.DataFrame(zip(x, y), columns = ['X', 'Y'])
|
| 129 |
+
g = sns.scatterplot(data=df, x='X', y='Y', s=2*(1-D/(MIN_DISTANCE+1)), alpha=1-D/MIN_DISTANCE)
|
| 130 |
+
|
| 131 |
+
# Set x-labels to be more readable
|
| 132 |
+
x_locs, x_labels = plt.xticks() # Get original locations and labels for x ticks
|
| 133 |
+
x_labels = [time.strftime('%H:%M:%S', time.gmtime(x)) for x in x_locs]
|
| 134 |
+
plt.xticks(x_locs, x_labels)
|
| 135 |
+
plt.xticks(rotation=90)
|
| 136 |
+
plt.xlabel('Time in source video (H:M:S)')
|
| 137 |
+
plt.xlim(0, None)
|
| 138 |
+
|
| 139 |
+
# Set y-labels to be more readable
|
| 140 |
+
y_locs, y_labels = plt.yticks() # Get original locations and labels for x ticks
|
| 141 |
+
y_labels = [time.strftime('%H:%M:%S', time.gmtime(y)) for y in y_locs]
|
| 142 |
+
plt.yticks(y_locs, y_labels)
|
| 143 |
+
plt.ylabel('Time in target video (H:M:S)')
|
| 144 |
+
|
| 145 |
+
# Adjust padding to fit gradio
|
| 146 |
+
plt.subplots_adjust(bottom=0.25, left=0.20)
|
| 147 |
+
return fig
|
| 148 |
+
|
| 149 |
+
logging.basicConfig()
|
| 150 |
+
logging.getLogger().setLevel(logging.DEBUG)
|
| 151 |
|
| 152 |
video_urls = ["https://www.dropbox.com/s/8c89a9aba0w8gjg/Ploumen.mp4?dl=1",
|
| 153 |
"https://www.dropbox.com/s/rzmicviu1fe740t/Bram%20van%20Ojik%20krijgt%20reprimande.mp4?dl=1",
|