File size: 3,346 Bytes
f6ffda2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import json
import os

import pandas as pd
import torch
import glob

from eval_func.bleu.bleu import Bleu
from eval_func.rouge.rouge import Rouge
from eval_func.cider.cider import Cider


def get_eval_score(ref, hypo):
    scorers = [
        (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
        (Rouge(), "ROUGE_L"),
        (Cider(), "CIDEr")
    ]

    score = []
    method = []
    for scorer, method_i in scorers:
        score_i, scores_i = scorer.compute_score(ref, hypo)
        score.extend(score_i) if isinstance(score_i, list) else score.append(score_i)
        method.extend(method_i) if isinstance(method_i, list) else method.append(method_i)

    score_dict = dict(zip(method, score))
    return score_dict


def get_vocabulary(in_path, out_file):
    if 'levir' in in_path.lower():
        return get_levir_vocabulary(in_path, out_file)
    elif 'dubai' in in_path.lower():
        return get_dubai_vocabulary(in_path, out_file)
    elif 'clevr' in in_path.lower():
        return get_clevr_vocabulary(in_path, out_file)


def get_levir_vocabulary(in_path, out_file):
    with open(in_path) as fin:
        data = json.load(fin)['images']

    sents = [y for x in data for y in x['sentences']]
    tokens = [y for x in sents for y in x['tokens']]
    occurencies = pd.Series(tokens).value_counts()
    selected = occurencies[occurencies > 5]
    vocab = {w: i + 4 for i, w in enumerate(selected.index)}
    vocab['PAD'] = 0
    vocab['START'] = 1
    vocab['UNK'] = 2
    vocab['END'] = 3

    with open(out_file, 'w') as fout:
        json.dump(vocab, fout)

    return vocab


def get_dubai_vocabulary(in_path, out_file):
    data = []
    for path in glob.glob(in_path + '/*.json'):
        with open(path) as fin:
            data.extend(json.load(fin)['images'])

    sents = [y for x in data for y in x['sentences']]
    tokens = [y for x in sents for y in x['tokens']]
    selected = pd.Series(tokens).value_counts()
    vocab = {w: i + 4 for i, w in enumerate(selected.index)}
    vocab['PAD'] = 0
    vocab['START'] = 1
    vocab['UNK'] = 2
    vocab['END'] = 3

    with open(out_file, 'w') as fout:
        json.dump(vocab, fout)

    return vocab


def get_clevr_vocabulary(in_path, out_file):
    sents = []
    with open(os.path.join(in_path, 'change_captions.json'), 'r', encoding='utf-8') as fin:
        data = json.load(fin)
        sents += [y for x in data for y in data[x]]

    with open(os.path.join(in_path, 'no_change_captions.json'), 'r', encoding='utf-8') as fin:
        data = json.load(fin)
        sents += [y for x in data for y in data[x]]

    tokens = [y for x in sents for y in x.split(' ')]
    occurencies = pd.Series(tokens).value_counts()
    vocab = {w: i + 4 for i, w in enumerate(occurencies.index)}
    vocab['PAD'] = 0
    vocab['START'] = 1
    vocab['UNK'] = 2
    vocab['END'] = 3

    with open(out_file, 'w') as fout:
        json.dump(vocab, fout)

    return vocab


def unormalize(tensor, mean=None, std=None):
    if mean is not None and std is not None:
        for t, m, s in zip(tensor, mean, std):
            t.mul_(s).add_(m)
        return torch.clip(tensor, min=0, max=1)

    b, c, h, w = tensor.shape
    tensor = tensor.view(b, -1)
    tensor -= tensor.min(1, keepdim=True)[0]
    tensor /= tensor.max(1, keepdim=True)[0]
    return tensor.view(b, c, h, w)