Spaces:
Build error
Build error
Move plotting code to plot.py
Browse filesCo-authored-by: iskaj <iskaj@users.noreply.github.com>
Co-authored-by: prajaktashouche <prajaktashouche@users.noreply.github.com>
- app.py +23 -118
- plot.py +58 -0
- videomatch.py +60 -1
app.py
CHANGED
|
@@ -1,131 +1,16 @@
|
|
| 1 |
import logging
|
| 2 |
-
import time
|
| 3 |
|
| 4 |
-
import pandas
|
| 5 |
import gradio as gr
|
| 6 |
|
| 7 |
-
import seaborn as sns
|
| 8 |
-
import matplotlib.pyplot as plt
|
| 9 |
-
|
| 10 |
-
import numpy as np
|
| 11 |
-
import pandas as pd
|
| 12 |
-
|
| 13 |
from config import *
|
| 14 |
from videomatch import index_hashes_for_video, get_decent_distance, \
|
| 15 |
-
get_video_indices, compare_videos, get_change_points
|
| 16 |
-
|
| 17 |
|
| 18 |
logging.basicConfig()
|
| 19 |
logging.getLogger().setLevel(logging.INFO)
|
| 20 |
|
| 21 |
|
| 22 |
-
def plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = 3):
|
| 23 |
-
sns.set_theme()
|
| 24 |
-
|
| 25 |
-
x = [(lims[i+1]-lims[i]) * [i] for i in range(hash_vectors.shape[0])]
|
| 26 |
-
x = [i/FPS for j in x for i in j]
|
| 27 |
-
y = [i/FPS for i in I]
|
| 28 |
-
|
| 29 |
-
# Create figure and dataframe to plot with sns
|
| 30 |
-
fig = plt.figure()
|
| 31 |
-
# plt.tight_layout()
|
| 32 |
-
df = pd.DataFrame(zip(x, y), columns = ['X', 'Y'])
|
| 33 |
-
g = sns.scatterplot(data=df, x='X', y='Y', s=2*(1-D/(MIN_DISTANCE+1)), alpha=1-D/MIN_DISTANCE)
|
| 34 |
-
|
| 35 |
-
# Set x-labels to be more readable
|
| 36 |
-
x_locs, x_labels = plt.xticks() # Get original locations and labels for x ticks
|
| 37 |
-
x_labels = [time.strftime('%H:%M:%S', time.gmtime(x)) for x in x_locs]
|
| 38 |
-
plt.xticks(x_locs, x_labels)
|
| 39 |
-
plt.xticks(rotation=90)
|
| 40 |
-
plt.xlabel('Time in source video (H:M:S)')
|
| 41 |
-
plt.xlim(0, None)
|
| 42 |
-
|
| 43 |
-
# Set y-labels to be more readable
|
| 44 |
-
y_locs, y_labels = plt.yticks() # Get original locations and labels for x ticks
|
| 45 |
-
y_labels = [time.strftime('%H:%M:%S', time.gmtime(y)) for y in y_locs]
|
| 46 |
-
plt.yticks(y_locs, y_labels)
|
| 47 |
-
plt.ylabel('Time in target video (H:M:S)')
|
| 48 |
-
|
| 49 |
-
# Adjust padding to fit gradio
|
| 50 |
-
plt.subplots_adjust(bottom=0.25, left=0.20)
|
| 51 |
-
return fig
|
| 52 |
-
|
| 53 |
-
def plot_multi_comparison(df, change_points):
|
| 54 |
-
""" From the dataframe plot the current set of plots, where the bottom right is most indicative """
|
| 55 |
-
fig, ax_arr = plt.subplots(3, 2, figsize=(12, 6), dpi=100, sharex=True)
|
| 56 |
-
sns.scatterplot(data = df, x='time', y='SOURCE_S', ax=ax_arr[0,0])
|
| 57 |
-
sns.lineplot(data = df, x='time', y='SOURCE_LIP_S', ax=ax_arr[0,1])
|
| 58 |
-
sns.scatterplot(data = df, x='time', y='OFFSET', ax=ax_arr[1,0])
|
| 59 |
-
sns.lineplot(data = df, x='time', y='OFFSET_LIP', ax=ax_arr[1,1])
|
| 60 |
-
|
| 61 |
-
# Plot change point as lines
|
| 62 |
-
sns.lineplot(data = df, x='time', y='OFFSET_LIP', ax=ax_arr[2,1])
|
| 63 |
-
for x in change_points:
|
| 64 |
-
cp_time = x.start_time
|
| 65 |
-
plt.vlines(x=cp_time, ymin=np.min(df['OFFSET_LIP']), ymax=np.max(df['OFFSET_LIP']), colors='red', lw=2)
|
| 66 |
-
rand_y_pos = np.random.uniform(low=np.min(df['OFFSET_LIP']), high=np.max(df['OFFSET_LIP']), size=None)
|
| 67 |
-
plt.text(x=cp_time, y=rand_y_pos, s=str(np.round(x.confidence, 2)), color='r', rotation=-0.0, fontsize=14)
|
| 68 |
-
plt.xticks(rotation=90)
|
| 69 |
-
return fig
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
def get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False):
|
| 73 |
-
distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
|
| 74 |
-
video_index, hash_vectors, target_indices = get_video_indices(url, target, MIN_DISTANCE = distance)
|
| 75 |
-
lims, D, I, hash_vectors = compare_videos(hash_vectors, target_indices, MIN_DISTANCE = distance)
|
| 76 |
-
|
| 77 |
-
target = [(lims[i+1]-lims[i]) * [i] for i in range(hash_vectors.shape[0])]
|
| 78 |
-
target_s = [i/FPS for j in target for i in j]
|
| 79 |
-
source_s = [i/FPS for i in I]
|
| 80 |
-
|
| 81 |
-
# Make df
|
| 82 |
-
df = pd.DataFrame(zip(target_s, source_s, D, I), columns = ['TARGET_S', 'SOURCE_S', 'DISTANCE', 'INDICES'])
|
| 83 |
-
if vanilla_df:
|
| 84 |
-
return df
|
| 85 |
-
|
| 86 |
-
# Minimum distance dataframe ----
|
| 87 |
-
# Group by X so for every second/x there will be 1 value of Y in the end
|
| 88 |
-
# index_min_distance = df.groupby('TARGET_S')['DISTANCE'].idxmin()
|
| 89 |
-
# df_min = df.loc[index_min_distance]
|
| 90 |
-
# df_min
|
| 91 |
-
# -------------------------------
|
| 92 |
-
|
| 93 |
-
df['TARGET_WEIGHT'] = 1 - df['DISTANCE']/distance # Higher value means a better match
|
| 94 |
-
df['SOURCE_WEIGHTED_VALUE'] = df['SOURCE_S'] * df['TARGET_WEIGHT'] # Multiply the weight (which indicates a better match) with the value for Y and aggregate to get a less noisy estimate of Y
|
| 95 |
-
|
| 96 |
-
# Group by X so for every second/x there will be 1 value of Y in the end
|
| 97 |
-
grouped_X = df.groupby('TARGET_S').agg({'SOURCE_WEIGHTED_VALUE' : 'sum', 'TARGET_WEIGHT' : 'sum'})
|
| 98 |
-
grouped_X['FINAL_SOURCE_VALUE'] = grouped_X['SOURCE_WEIGHTED_VALUE'] / grouped_X['TARGET_WEIGHT']
|
| 99 |
-
|
| 100 |
-
# Remake the dataframe
|
| 101 |
-
df = grouped_X.reset_index()
|
| 102 |
-
df = df.drop(columns=['SOURCE_WEIGHTED_VALUE', 'TARGET_WEIGHT'])
|
| 103 |
-
df = df.rename({'FINAL_SOURCE_VALUE' : 'SOURCE_S'}, axis='columns')
|
| 104 |
-
|
| 105 |
-
# Add NAN to "missing" x values (base it off hash vector, not target_s)
|
| 106 |
-
step_size = 1/FPS
|
| 107 |
-
x_complete = np.round(np.arange(start=0.0, stop = max(df['TARGET_S'])+step_size, step = step_size), 1) # More robust
|
| 108 |
-
df['TARGET_S'] = np.round(df['TARGET_S'], 1)
|
| 109 |
-
df_complete = pd.DataFrame(x_complete, columns=['TARGET_S'])
|
| 110 |
-
|
| 111 |
-
# Merge dataframes to get NAN values for every missing SOURCE_S
|
| 112 |
-
df = df_complete.merge(df, on='TARGET_S', how='left')
|
| 113 |
-
|
| 114 |
-
# Interpolate between frames since there are missing values
|
| 115 |
-
df['SOURCE_LIP_S'] = df['SOURCE_S'].interpolate(method='linear', limit_direction='both', axis=0)
|
| 116 |
-
|
| 117 |
-
# Add timeshift col and timeshift col with Linearly Interpolated Values
|
| 118 |
-
df['TIMESHIFT'] = df['SOURCE_S'].shift(1) - df['SOURCE_S']
|
| 119 |
-
df['TIMESHIFT_LIP'] = df['SOURCE_LIP_S'].shift(1) - df['SOURCE_LIP_S']
|
| 120 |
-
|
| 121 |
-
# Add Offset col that assumes the video is played at the same speed as the other to do a "timeshift"
|
| 122 |
-
df['OFFSET'] = df['SOURCE_S'] - df['TARGET_S'] - np.min(df['SOURCE_S'])
|
| 123 |
-
df['OFFSET_LIP'] = df['SOURCE_LIP_S'] - df['TARGET_S'] - np.min(df['SOURCE_LIP_S'])
|
| 124 |
-
|
| 125 |
-
# Add time column for plotting
|
| 126 |
-
df['time'] = pd.to_datetime(df["TARGET_S"], unit='s') # Needs a datetime as input
|
| 127 |
-
return df
|
| 128 |
-
|
| 129 |
def get_comparison(url, target, MIN_DISTANCE = 4):
|
| 130 |
""" Function for Gradio to combine all helper functions"""
|
| 131 |
video_index, hash_vectors, target_indices = get_video_indices(url, target, MIN_DISTANCE = MIN_DISTANCE)
|
|
@@ -147,6 +32,26 @@ def get_auto_comparison(url, target, smoothing_window_size=10, method="CUSUM"):
|
|
| 147 |
fig = plot_multi_comparison(df, change_points)
|
| 148 |
return fig
|
| 149 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
|
| 151 |
|
| 152 |
video_urls = ["https://www.dropbox.com/s/8c89a9aba0w8gjg/Ploumen.mp4?dl=1",
|
|
@@ -166,7 +71,7 @@ compare_iface = gr.Interface(fn=get_comparison,
|
|
| 166 |
examples=[[x, video_urls[-1]] for x in video_urls[:-1]])
|
| 167 |
|
| 168 |
auto_compare_iface = gr.Interface(fn=get_auto_comparison,
|
| 169 |
-
inputs=["text", "text", gr.Slider(1, 50, 10, step=1), gr.Dropdown(choices=["CUSUM", "Robust"], value="
|
| 170 |
outputs="plot",
|
| 171 |
examples=[[x, video_urls[-1]] for x in video_urls[:-1]])
|
| 172 |
|
|
|
|
| 1 |
import logging
|
|
|
|
| 2 |
|
|
|
|
| 3 |
import gradio as gr
|
| 4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
from config import *
|
| 6 |
from videomatch import index_hashes_for_video, get_decent_distance, \
|
| 7 |
+
get_video_indices, compare_videos, get_change_points, get_videomatch_df
|
| 8 |
+
from plot import plot_comparison, plot_multi_comparison
|
| 9 |
|
| 10 |
logging.basicConfig()
|
| 11 |
logging.getLogger().setLevel(logging.INFO)
|
| 12 |
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
def get_comparison(url, target, MIN_DISTANCE = 4):
|
| 15 |
""" Function for Gradio to combine all helper functions"""
|
| 16 |
video_index, hash_vectors, target_indices = get_video_indices(url, target, MIN_DISTANCE = MIN_DISTANCE)
|
|
|
|
| 32 |
fig = plot_multi_comparison(df, change_points)
|
| 33 |
return fig
|
| 34 |
|
| 35 |
+
def get_auto_edit_decision(url, target, smoothing_window_size=10):
|
| 36 |
+
""" Function for Gradio to combine all helper functions"""
|
| 37 |
+
distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
|
| 38 |
+
if distance == None:
|
| 39 |
+
return None
|
| 40 |
+
raise gr.Error("No matches found!")
|
| 41 |
+
video_index, hash_vectors, target_indices = get_video_indices(url, target, MIN_DISTANCE = distance)
|
| 42 |
+
lims, D, I, hash_vectors = compare_videos(hash_vectors, target_indices, MIN_DISTANCE = distance)
|
| 43 |
+
|
| 44 |
+
df = get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False)
|
| 45 |
+
change_points = get_change_points(df, smoothing_window_size=smoothing_window_size, method="ROBUST")
|
| 46 |
+
edit_decision_list = []
|
| 47 |
+
for cp in change_points:
|
| 48 |
+
decision = f"Video at time {cp.start_time} returns {cp.metric}"
|
| 49 |
+
# edit_decision_list.append(f"Video at time {cp.start_time} returns {cp.metric}")
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
fig = plot_multi_comparison(df, change_points)
|
| 53 |
+
return fig
|
| 54 |
+
|
| 55 |
|
| 56 |
|
| 57 |
video_urls = ["https://www.dropbox.com/s/8c89a9aba0w8gjg/Ploumen.mp4?dl=1",
|
|
|
|
| 71 |
examples=[[x, video_urls[-1]] for x in video_urls[:-1]])
|
| 72 |
|
| 73 |
auto_compare_iface = gr.Interface(fn=get_auto_comparison,
|
| 74 |
+
inputs=["text", "text", gr.Slider(1, 50, 10, step=1), gr.Dropdown(choices=["CUSUM", "Robust"], value="Robust")],
|
| 75 |
outputs="plot",
|
| 76 |
examples=[[x, video_urls[-1]] for x in video_urls[:-1]])
|
| 77 |
|
plot.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import time
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import seaborn as sns
|
| 6 |
+
import matplotlib.pyplot as plt
|
| 7 |
+
|
| 8 |
+
from config import FPS
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = 3):
|
| 12 |
+
sns.set_theme()
|
| 13 |
+
|
| 14 |
+
x = [(lims[i+1]-lims[i]) * [i] for i in range(hash_vectors.shape[0])]
|
| 15 |
+
x = [i/FPS for j in x for i in j]
|
| 16 |
+
y = [i/FPS for i in I]
|
| 17 |
+
|
| 18 |
+
# Create figure and dataframe to plot with sns
|
| 19 |
+
fig = plt.figure()
|
| 20 |
+
# plt.tight_layout()
|
| 21 |
+
df = pd.DataFrame(zip(x, y), columns = ['X', 'Y'])
|
| 22 |
+
g = sns.scatterplot(data=df, x='X', y='Y', s=2*(1-D/(MIN_DISTANCE+1)), alpha=1-D/MIN_DISTANCE)
|
| 23 |
+
|
| 24 |
+
# Set x-labels to be more readable
|
| 25 |
+
x_locs, x_labels = plt.xticks() # Get original locations and labels for x ticks
|
| 26 |
+
x_labels = [time.strftime('%H:%M:%S', time.gmtime(x)) for x in x_locs]
|
| 27 |
+
plt.xticks(x_locs, x_labels)
|
| 28 |
+
plt.xticks(rotation=90)
|
| 29 |
+
plt.xlabel('Time in source video (H:M:S)')
|
| 30 |
+
plt.xlim(0, None)
|
| 31 |
+
|
| 32 |
+
# Set y-labels to be more readable
|
| 33 |
+
y_locs, y_labels = plt.yticks() # Get original locations and labels for x ticks
|
| 34 |
+
y_labels = [time.strftime('%H:%M:%S', time.gmtime(y)) for y in y_locs]
|
| 35 |
+
plt.yticks(y_locs, y_labels)
|
| 36 |
+
plt.ylabel('Time in target video (H:M:S)')
|
| 37 |
+
|
| 38 |
+
# Adjust padding to fit gradio
|
| 39 |
+
plt.subplots_adjust(bottom=0.25, left=0.20)
|
| 40 |
+
return fig
|
| 41 |
+
|
| 42 |
+
def plot_multi_comparison(df, change_points):
|
| 43 |
+
""" From the dataframe plot the current set of plots, where the bottom right is most indicative """
|
| 44 |
+
fig, ax_arr = plt.subplots(3, 2, figsize=(12, 6), dpi=100, sharex=True)
|
| 45 |
+
sns.scatterplot(data = df, x='time', y='SOURCE_S', ax=ax_arr[0,0])
|
| 46 |
+
sns.lineplot(data = df, x='time', y='SOURCE_LIP_S', ax=ax_arr[0,1])
|
| 47 |
+
sns.scatterplot(data = df, x='time', y='OFFSET', ax=ax_arr[1,0])
|
| 48 |
+
sns.lineplot(data = df, x='time', y='OFFSET_LIP', ax=ax_arr[1,1])
|
| 49 |
+
|
| 50 |
+
# Plot change point as lines
|
| 51 |
+
sns.lineplot(data = df, x='time', y='OFFSET_LIP', ax=ax_arr[2,1])
|
| 52 |
+
for x in change_points:
|
| 53 |
+
cp_time = x.start_time
|
| 54 |
+
plt.vlines(x=cp_time, ymin=np.min(df['OFFSET_LIP']), ymax=np.max(df['OFFSET_LIP']), colors='red', lw=2)
|
| 55 |
+
rand_y_pos = np.random.uniform(low=np.min(df['OFFSET_LIP']), high=np.max(df['OFFSET_LIP']), size=None)
|
| 56 |
+
plt.text(x=cp_time, y=rand_y_pos, s=str(np.round(x.confidence, 2)), color='r', rotation=-0.0, fontsize=14)
|
| 57 |
+
plt.xticks(rotation=90)
|
| 58 |
+
return fig
|
videomatch.py
CHANGED
|
@@ -7,9 +7,11 @@ from kats.detectors.cusum_detection import CUSUMDetector
|
|
| 7 |
from kats.detectors.robust_stat_detection import RobustStatDetector
|
| 8 |
from kats.consts import TimeSeriesData
|
| 9 |
|
| 10 |
-
import numpy as np
|
|
|
|
| 11 |
|
| 12 |
from videohash import compute_hashes, filepath_from_url
|
|
|
|
| 13 |
|
| 14 |
def index_hashes_for_video(url: str) -> faiss.IndexBinaryIVF:
|
| 15 |
""" Compute hashes of a video and index the video using faiss indices and return the index. """
|
|
@@ -98,3 +100,60 @@ def get_change_points(df, smoothing_window_size=10, method='CUSUM'):
|
|
| 98 |
jump_s = mean_offset_postchange - mean_offset_prechange
|
| 99 |
print(f"Video jumps {jump_s:.1f}s in time at {mean_offset_prechange:.1f} seconds")
|
| 100 |
return change_points
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
from kats.detectors.robust_stat_detection import RobustStatDetector
|
| 8 |
from kats.consts import TimeSeriesData
|
| 9 |
|
| 10 |
+
import numpy as np
|
| 11 |
+
import pandas as pd
|
| 12 |
|
| 13 |
from videohash import compute_hashes, filepath_from_url
|
| 14 |
+
from config import FPS, MIN_DISTANCE, MAX_DISTANCE
|
| 15 |
|
| 16 |
def index_hashes_for_video(url: str) -> faiss.IndexBinaryIVF:
|
| 17 |
""" Compute hashes of a video and index the video using faiss indices and return the index. """
|
|
|
|
| 100 |
jump_s = mean_offset_postchange - mean_offset_prechange
|
| 101 |
print(f"Video jumps {jump_s:.1f}s in time at {mean_offset_prechange:.1f} seconds")
|
| 102 |
return change_points
|
| 103 |
+
|
| 104 |
+
def get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False):
|
| 105 |
+
distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
|
| 106 |
+
video_index, hash_vectors, target_indices = get_video_indices(url, target, MIN_DISTANCE = distance)
|
| 107 |
+
lims, D, I, hash_vectors = compare_videos(hash_vectors, target_indices, MIN_DISTANCE = distance)
|
| 108 |
+
|
| 109 |
+
target = [(lims[i+1]-lims[i]) * [i] for i in range(hash_vectors.shape[0])]
|
| 110 |
+
target_s = [i/FPS for j in target for i in j]
|
| 111 |
+
source_s = [i/FPS for i in I]
|
| 112 |
+
|
| 113 |
+
# Make df
|
| 114 |
+
df = pd.DataFrame(zip(target_s, source_s, D, I), columns = ['TARGET_S', 'SOURCE_S', 'DISTANCE', 'INDICES'])
|
| 115 |
+
if vanilla_df:
|
| 116 |
+
return df
|
| 117 |
+
|
| 118 |
+
# Minimum distance dataframe ----
|
| 119 |
+
# Group by X so for every second/x there will be 1 value of Y in the end
|
| 120 |
+
# index_min_distance = df.groupby('TARGET_S')['DISTANCE'].idxmin()
|
| 121 |
+
# df_min = df.loc[index_min_distance]
|
| 122 |
+
# df_min
|
| 123 |
+
# -------------------------------
|
| 124 |
+
|
| 125 |
+
df['TARGET_WEIGHT'] = 1 - df['DISTANCE']/distance # Higher value means a better match
|
| 126 |
+
df['SOURCE_WEIGHTED_VALUE'] = df['SOURCE_S'] * df['TARGET_WEIGHT'] # Multiply the weight (which indicates a better match) with the value for Y and aggregate to get a less noisy estimate of Y
|
| 127 |
+
|
| 128 |
+
# Group by X so for every second/x there will be 1 value of Y in the end
|
| 129 |
+
grouped_X = df.groupby('TARGET_S').agg({'SOURCE_WEIGHTED_VALUE' : 'sum', 'TARGET_WEIGHT' : 'sum'})
|
| 130 |
+
grouped_X['FINAL_SOURCE_VALUE'] = grouped_X['SOURCE_WEIGHTED_VALUE'] / grouped_X['TARGET_WEIGHT']
|
| 131 |
+
|
| 132 |
+
# Remake the dataframe
|
| 133 |
+
df = grouped_X.reset_index()
|
| 134 |
+
df = df.drop(columns=['SOURCE_WEIGHTED_VALUE', 'TARGET_WEIGHT'])
|
| 135 |
+
df = df.rename({'FINAL_SOURCE_VALUE' : 'SOURCE_S'}, axis='columns')
|
| 136 |
+
|
| 137 |
+
# Add NAN to "missing" x values (base it off hash vector, not target_s)
|
| 138 |
+
step_size = 1/FPS
|
| 139 |
+
x_complete = np.round(np.arange(start=0.0, stop = max(df['TARGET_S'])+step_size, step = step_size), 1) # More robust
|
| 140 |
+
df['TARGET_S'] = np.round(df['TARGET_S'], 1)
|
| 141 |
+
df_complete = pd.DataFrame(x_complete, columns=['TARGET_S'])
|
| 142 |
+
|
| 143 |
+
# Merge dataframes to get NAN values for every missing SOURCE_S
|
| 144 |
+
df = df_complete.merge(df, on='TARGET_S', how='left')
|
| 145 |
+
|
| 146 |
+
# Interpolate between frames since there are missing values
|
| 147 |
+
df['SOURCE_LIP_S'] = df['SOURCE_S'].interpolate(method='linear', limit_direction='both', axis=0)
|
| 148 |
+
|
| 149 |
+
# Add timeshift col and timeshift col with Linearly Interpolated Values
|
| 150 |
+
df['TIMESHIFT'] = df['SOURCE_S'].shift(1) - df['SOURCE_S']
|
| 151 |
+
df['TIMESHIFT_LIP'] = df['SOURCE_LIP_S'].shift(1) - df['SOURCE_LIP_S']
|
| 152 |
+
|
| 153 |
+
# Add Offset col that assumes the video is played at the same speed as the other to do a "timeshift"
|
| 154 |
+
df['OFFSET'] = df['SOURCE_S'] - df['TARGET_S'] - np.min(df['SOURCE_S'])
|
| 155 |
+
df['OFFSET_LIP'] = df['SOURCE_LIP_S'] - df['TARGET_S'] - np.min(df['SOURCE_LIP_S'])
|
| 156 |
+
|
| 157 |
+
# Add time column for plotting
|
| 158 |
+
df['time'] = pd.to_datetime(df["TARGET_S"], unit='s') # Needs a datetime as input
|
| 159 |
+
return df
|