Spaces:
Build error
Build error
| import numpy as np | |
| import pandas as pd | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| from scipy import stats as st | |
| def change_points_to_segments(df, change_points): | |
| """ Convert change points from kats detector to segment indicators. | |
| Args: | |
| df (DataFrame): Dataframe with information regarding the match between videos. | |
| change_points ([TimeSeriesChangePoint]): Array of time series change point objects. | |
| Returns: | |
| List of numpy.datetime64 objects where the first element is '0.0 in time' and the final element is the last | |
| element of the video in time so the segment starts and ends in a logical place. | |
| """ | |
| return [pd.to_datetime(0.0, unit='s').to_datetime64()] + [cp.start_time for cp in change_points] + [pd.to_datetime(df.iloc[-1]['TARGET_S'], unit='s').to_datetime64()] | |
| def add_seconds_to_datetime64(datetime64, seconds, subtract=False): | |
| """ Add or substract a number of seconds to a numpy.datetime64 object. | |
| Args: | |
| datetime64 (numpy.datetime64): Datetime object that we want to increase or decrease by number of seconds. | |
| seconds (float): Amount of seconds we want to add or subtract. | |
| subtract (bool): Toggle for subtracting or adding. | |
| Returns: | |
| A numpy.datetime64 object. | |
| """ | |
| s, m = divmod(seconds, 1.0) | |
| if subtract: | |
| return datetime64 - np.timedelta64(int(s), 's') - np.timedelta64(int(m * 1000), 'ms') | |
| return datetime64 + np.timedelta64(int(s), 's') + np.timedelta64(int(m * 1000), 'ms') | |
| def plot_segment_comparison(df, change_points, video_mp4 = "Placeholder.mp4", video_id="Placeholder.videoID", threshold_diff = 1.5): | |
| """ Based on the dataframe and detected change points do two things: | |
| 1. Make a decision on where each segment belongs in time and return that info as a list of dicts | |
| 2. Plot how this decision got made as an informative plot | |
| Args: | |
| df (DataFrame): Dataframe with information regarding the match between videos. | |
| change_points ([TimeSeriesChangePoint]): Array of time series change point objects. | |
| video_mp4 (str): Name of the source video to return as extra info. | |
| video_id (str): The unique identifier for the video currently being compared | |
| threshold_diff (float): Threshold for the average distance to plot which segments are likely bad matches. | |
| Returns: | |
| fig (Figure): Figure that shows the comparison between two videos. | |
| segment_decisions (dict): JSON-style dictionary containing the decision information of the comparison between two videos. | |
| """ | |
| # Plot it with certain characteristics | |
| fig, ax_arr = plt.subplots(4, 1, figsize=(16, 6), dpi=300, sharex=True) | |
| ax_arr[0].set_title(video_id) | |
| sns.scatterplot(data = df, x='time', y='SOURCE_S', ax=ax_arr[0], label="SOURCE_S", color='blue', alpha=1.0) | |
| # Plot original datapoints without linear interpolation, offset by target video time | |
| sns.scatterplot(data = df, x='time', y='OFFSET', ax=ax_arr[1], label="OFFSET", color='orange', alpha=1.0) | |
| # Plot linearly interpolated values next to metric vales | |
| metric = 'ROLL_OFFSET_MODE' # 'OFFSET' | |
| sns.lineplot(data = df, x='time', y='OFFSET_LIP', ax=ax_arr[2], label="OFFSET_LIP", color='orange') | |
| sns.scatterplot(data = df, x='time', y=metric, ax=ax_arr[2], label=metric, alpha=0.5) | |
| # Plot detected change points as lines which will indicate the segments | |
| sns.scatterplot(data = df, x='time', y=metric, ax=ax_arr[3], label=metric, s=20) | |
| timestamps = change_points_to_segments(df, change_points) | |
| for x in timestamps: | |
| plt.vlines(x=x, ymin=np.min(df[metric]), ymax=np.max(df[metric]), colors='black', lw=2, alpha=0.5) | |
| # To store "decisions" about segments | |
| segment_decisions = [] | |
| seg_i = 0 | |
| # Average segment difference threshold for plotting | |
| for start_time, end_time in zip(timestamps[:-1], timestamps[1:]): | |
| # Time to add to each origin time to get the correct time back since it is offset by add_offset | |
| add_offset = np.min(df['SOURCE_S']) | |
| # Cut out the segment between the segment lines | |
| segment = df[(df['time'] > start_time) & (df['time'] < end_time)] # Not offset LIP | |
| segment_no_nan = segment[~np.isnan(segment[metric])] # Remove NaNs | |
| segment_offsets = segment_no_nan[metric] # np.round(segment_no_nan['OFFSET'], 1) | |
| # Calculate mean/median/mode | |
| # seg_sum_stat = np.mean(segment_offsets) | |
| # seg_sum_stat = np.median(segment_offsets) | |
| seg_sum_stat = st.mode(segment_offsets)[0][0] | |
| # Get average difference from mean/median/mode of the segment to see if it is a "straight line" or not | |
| average_diff = np.median(np.abs(segment_no_nan['OFFSET_LIP'] - seg_sum_stat)) | |
| average_offset = np.mean(segment_no_nan['OFFSET_LIP']) | |
| # If the time where the segment comes from (origin time) is close to the start_time, it's a "good match", so no editing | |
| noisy = False if average_diff < threshold_diff else True | |
| origin_start_time = add_seconds_to_datetime64(start_time, seg_sum_stat + add_offset) | |
| origin_end_time = add_seconds_to_datetime64(end_time, seg_sum_stat + add_offset) | |
| # Plot green for a confident prediction (straight line), red otherwise | |
| if not noisy: | |
| # Plot estimated straight line | |
| plt.hlines(y=seg_sum_stat, xmin=start_time, xmax=end_time, color='green', lw=5, alpha=0.5) | |
| plt.text(x=start_time, y=seg_sum_stat, s=str(np.round(average_diff, 1)), color='green', rotation=-0.0, fontsize=14) | |
| else: | |
| # Plot estimated straight line | |
| plt.hlines(y=seg_sum_stat, xmin=start_time, xmax=end_time, color='red', lw=5, alpha=0.5) | |
| plt.text(x=start_time, y=seg_sum_stat, s=str(np.round(average_diff, 1)), color='red', rotation=-0.0, fontsize=14) | |
| # Decisions about segments | |
| decision = {"Target Start Time" : pd.to_datetime(start_time).strftime('%H:%M:%S'), | |
| "Target End Time" : pd.to_datetime(end_time).strftime('%H:%M:%S'), | |
| "Source Start Time" : pd.to_datetime(origin_start_time).strftime('%H:%M:%S'), | |
| "Source End Time" : pd.to_datetime(origin_end_time).strftime('%H:%M:%S'), | |
| "Source Video ID" : video_id, | |
| "Source Video .mp4" : video_mp4, | |
| "Uncertainty" : np.round(average_diff, 3), | |
| "Average Offset in Seconds" : np.round(average_offset, 3), | |
| } | |
| segment_decisions.append(decision) | |
| seg_i += 1 | |
| # Return figure | |
| plt.xticks(rotation=90) | |
| return fig, segment_decisions | |