File size: 3,239 Bytes
c456c14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import argparse
import os.path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import torch
from sklearn.manifold import TSNE

from uvd.decomp.decomp import (
    embedding_decomp,
)
from uvd.models.preprocessors import *
import uvd.utils as U

from decord import VideoReader


def vis_2d_tsne(embeddings: np.ndarray, labels: list):
    tsne = TSNE(n_components=2)
    tsne_result = tsne.fit_transform(embeddings)
    tsne_result_df = pd.DataFrame(
        {"tsne_1": tsne_result[:, 0], "tsne_2": tsne_result[:, 1], "label": labels}
    )
    fig, ax = plt.subplots(1)
    sns.scatterplot(x="tsne_1", y="tsne_2", hue="label", data=tsne_result_df, ax=ax, s=120)
    lim = (tsne_result.min() - 5, tsne_result.max() + 5)
    ax.set_xlim(lim)
    ax.set_ylim(lim)
    ax.set_aspect("equal")
    ax.set_title(f"{preprocessor.__class__.__name__}")
    plt.show()


def vis_3d_tsne(embeddings: np.ndarray, labels: list):
    tsne = TSNE(n_components=3)
    tsne_result = tsne.fit_transform(embeddings)
    tsne_result_df = pd.DataFrame(
        {
            "tsne_1": tsne_result[:, 0],
            "tsne_2": tsne_result[:, 1],
            "tsne_3": tsne_result[:, 2],
            "label": labels,
        }
    )

    fig = plt.figure()
    ax = fig.add_subplot(111, projection="3d")

    palette = sns.color_palette("viridis", as_cmap=True)
    unique_labels = tsne_result_df["label"].unique()
    colors = palette(np.linspace(0, 1, len(unique_labels)))
    color_dict = dict(zip(unique_labels, colors))

    for label in unique_labels:
        subset = tsne_result_df[tsne_result_df["label"] == label]
        ax.scatter(
            subset["tsne_1"],
            subset["tsne_2"],
            subset["tsne_3"],
            c=[color_dict[label]],
            label=label,
            s=120,
        )
    ax.set_title(f"{preprocessor.__class__.__name__}")
    plt.show()


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--video_file",
        default=U.f_join(
         os.path.dirname(__file__), "examples/microwave-bottom_burner-light_switch-slide_cabinet.mp4"
        )
    )
    parser.add_argument("--preprocessor_name", default="vip")
    args = parser.parse_args()

    use_gpu = torch.cuda.is_available()
    if not use_gpu:
        print("NO GPU FOUND")

    frames = VideoReader(args.video_file, height=224, width=224)[:].asnumpy()
    preprocessor = get_preprocessor(
        args.preprocessor_name, device="cuda" if use_gpu else None
    )
    embeddings = preprocessor.process(frames, return_numpy=True)
    _, decomp_meta = embedding_decomp(
        embeddings=embeddings,
        fill_embeddings=False,
        return_intermediate_curves=False,
        normalize_curve=False,
        min_interval=20,
        smooth_method="kernel",
        gamma=0.1,
    )
    milestone_indices = decomp_meta.milestone_indices
    milestone_rgbs = frames[milestone_indices]

    labels = [
        i
        for i, count in enumerate(milestone_indices)
        for _ in range(count - milestone_indices[i - 1] if i > 0 else count)
    ]
    labels = [labels[0]] + labels

    vis_2d_tsne(embeddings, labels)
    vis_3d_tsne(embeddings, labels)