ImAMJayKIM commited on
Commit
c1596ac
·
verified ·
1 Parent(s): 7d4e3cc

Upload 96 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. app.py +486 -0
  2. outputs/captioning/swin-transformer_final_best.pt +3 -0
  3. outputs/classification/cls_swin-t_base_cls_raw-20260525-v2_lr-0005_bs-32_adamw_none_wdc-0.05_ls-0.0_best.pth +3 -0
  4. params.yaml +242 -0
  5. requirements.txt +342 -0
  6. src/caption/check_clip_score.py +440 -0
  7. src/caption/generate_captions_blip.py +220 -0
  8. src/caption/generate_captions_florence2.py +345 -0
  9. src/caption/generate_captions_git.py +600 -0
  10. src/caption/generate_captions_vit_gpt2.py +457 -0
  11. src/collection/check_class_counts.py +81 -0
  12. src/collection/collect_filtering_images.py +228 -0
  13. src/collection/count_label_hf.py +115 -0
  14. src/collection/download_dataset_hf.py +187 -0
  15. src/collection/download_dataset_kg.py +56 -0
  16. src/collection/download_dataset_us.py +233 -0
  17. src/collection/get_label_list_hf.py +98 -0
  18. src/collection/select_60_images.py +115 -0
  19. src/collection/unzip_data_kg.py +45 -0
  20. src/dataset/.ipynb_checkpoints/captioning_dataset-checkpoint.py +124 -0
  21. src/dataset/.ipynb_checkpoints/train_sub_tokenizer-checkpoint.py +55 -0
  22. src/dataset/__pycache__/build_vocab.cpython-310.pyc +0 -0
  23. src/dataset/__pycache__/captioning_dataset.cpython-310.pyc +0 -0
  24. src/dataset/__pycache__/classification_dataset.cpython-310.pyc +0 -0
  25. src/dataset/__pycache__/collate_caption.cpython-310.pyc +0 -0
  26. src/dataset/build_vocab.py +62 -0
  27. src/dataset/captioning_dataset.py +124 -0
  28. src/dataset/classification_dataset.py +69 -0
  29. src/dataset/collate_caption.py +14 -0
  30. src/dataset/sub_tokenizer1000.model +3 -0
  31. src/dataset/sub_tokenizer1000.vocab +1000 -0
  32. src/dataset/sub_tokenizer1500.model +3 -0
  33. src/dataset/sub_tokenizer1500.vocab +1500 -0
  34. src/dataset/sub_tokenizer2000.model +3 -0
  35. src/dataset/sub_tokenizer2000.vocab +2000 -0
  36. src/dataset/sub_tokenizing_captions.txt +0 -0
  37. src/dataset/train_sub_tokenizer.py +55 -0
  38. src/debug/test_forward.py +208 -0
  39. src/engines/__pycache__/captioning_trainer.cpython-310.pyc +0 -0
  40. src/engines/__pycache__/captioning_validator.cpython-310.pyc +0 -0
  41. src/engines/__pycache__/classification_trainer.cpython-310.pyc +0 -0
  42. src/engines/__pycache__/classification_validator.cpython-310.pyc +0 -0
  43. src/engines/__pycache__/resnet18_decoder_trainer.cpython-310.pyc +0 -0
  44. src/engines/__pycache__/resnet18_decoder_validator.cpython-310.pyc +0 -0
  45. src/engines/captioning_trainer.py +43 -0
  46. src/engines/captioning_validator.py +36 -0
  47. src/engines/classification_trainer.py +70 -0
  48. src/engines/classification_validator.py +90 -0
  49. src/metrics/.ipynb_checkpoints/evaluate_caption-checkpoint.py +35 -0
  50. src/metrics/.ipynb_checkpoints/make_show_all_caption-checkpoint.py +105 -0
app.py ADDED
@@ -0,0 +1,486 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import tempfile
4
+ from pathlib import Path
5
+
6
+ import gradio as gr
7
+ import numpy as np
8
+ import torch
9
+ import yaml
10
+ from PIL import Image
11
+ from pytorch_grad_cam import GradCAM
12
+ from pytorch_grad_cam.utils.image import show_cam_on_image
13
+
14
+
15
+ WORKSPACE_ROOT = Path(
16
+ os.environ.get("WORKSPACE_ROOT", Path(__file__).resolve().parents[1])
17
+ )
18
+ SRC_DIR = WORKSPACE_ROOT / "src"
19
+ sys.path.insert(0, str(SRC_DIR))
20
+
21
+ from models.swin import EncoderSwinTiny
22
+ from transforms.image_transform import get_classification_valid_transform
23
+ from utils.captioning_inference import build_caption_runtime, decode_tokens
24
+ from visualization.generate_gradcam import (
25
+ SwinClassifierWrapper,
26
+ reshape_transform,
27
+ )
28
+
29
+
30
+ CLASSIFICATION_STATE = None
31
+ CAPTIONING_STATE = None
32
+
33
+
34
+ def load_params():
35
+ """params.yaml을 읽어서 데모, 모델, 체크포인트 설정을 가져온다."""
36
+ with open(WORKSPACE_ROOT / "params.yaml", "r", encoding="utf-8") as f:
37
+ return yaml.safe_load(f)
38
+
39
+ # params.yaml의 demo.class_names에서 학습 당시 클래스 목록을 가져온다.
40
+ def load_class_names(params):
41
+ class_names = params.get("demo", {}).get("class_names", [])
42
+
43
+ if not isinstance(class_names, list) or not all(
44
+ isinstance(class_name, str)
45
+ for class_name in class_names
46
+ ):
47
+ raise ValueError("demo.class_names must be a list of class name strings.")
48
+
49
+ if not class_names:
50
+ raise ValueError("No class names found in params.yaml demo.class_names.")
51
+
52
+ return class_names
53
+
54
+ # CUDA 사용 가능 여부를 기준으로 장치를 선택
55
+ def get_device(params):
56
+ device_name = params.get("train", {}).get("device", "cuda")
57
+
58
+ # 설정이 cuda이고 실제 CUDA가 있으면 GPU를 사용한다.
59
+ if device_name == "cuda" and torch.cuda.is_available():
60
+ return torch.device("cuda")
61
+
62
+ return torch.device("cpu")
63
+
64
+
65
+ def load_classification_checkpoint(model, checkpoint_path, device):
66
+ """분류 모델 체크포인트를 로드하고 model_state_dict 형식이면 내부 state_dict만 꺼낸다."""
67
+ checkpoint = torch.load(
68
+ checkpoint_path,
69
+ map_location=device,
70
+ )
71
+
72
+ # 저장 포맷이 {"model_state_dict": ...} 형태인 경우 실제 가중치만 사용한다.
73
+ if isinstance(checkpoint, dict) and "model_state_dict" in checkpoint:
74
+ checkpoint = checkpoint["model_state_dict"]
75
+
76
+ model.load_state_dict(checkpoint)
77
+
78
+
79
+ def build_classification_runtime():
80
+ """분류 모델, transform, 클래스명, 체크포인트 경로를 묶은 런타임 상태를 만든다."""
81
+ params = load_params()
82
+ model_name = params["classification"]["model_name"]
83
+
84
+ # 현재 Grad-CAM wrapper와 모델 생성 로직은 Swin-T 전용이므로 다른 모델은 명시적으로 막는다.
85
+ if model_name != "swin_t":
86
+ raise ValueError(
87
+ "The combined Gradio demo currently supports only swin_t "
88
+ f"for classification, got: {model_name}"
89
+ )
90
+
91
+ class_names = load_class_names(params)
92
+ device = get_device(params)
93
+
94
+ model = EncoderSwinTiny(
95
+ num_classes=len(class_names)
96
+ ).to(device)
97
+
98
+ checkpoint_path = WORKSPACE_ROOT / params["classification"]["final_checkpoint"]
99
+ load_classification_checkpoint(
100
+ model,
101
+ checkpoint_path,
102
+ device,
103
+ )
104
+ model.eval()
105
+
106
+ return {
107
+ "params": params,
108
+ "model": model,
109
+ "model_name": model_name,
110
+ "device": device,
111
+ "class_names": class_names,
112
+ "transform": get_classification_valid_transform(),
113
+ "checkpoint_path": checkpoint_path,
114
+ }
115
+
116
+
117
+ def get_classification_runtime():
118
+ """분류 런타임을 최초 요청 시 한 번만 만들고 이후에는 캐시된 상태를 재사용한다."""
119
+ global CLASSIFICATION_STATE
120
+
121
+ # 버튼 클릭 전에는 모델을 로드하지 않고, 첫 예측 시점에만 로드한다.
122
+ if CLASSIFICATION_STATE is None:
123
+ CLASSIFICATION_STATE = build_classification_runtime()
124
+
125
+ return CLASSIFICATION_STATE
126
+
127
+
128
+ def get_caption_checkpoint_path(params):
129
+ """캡셔닝 체크포인트 경로를 params.yaml에서 우선 찾고, 없으면 기본 파일명 규칙으로 만든다."""
130
+ checkpoint_config = params["captioning"]["checkpoint"]
131
+ final_checkpoint = checkpoint_config.get("final_checkpoint")
132
+
133
+ # final_checkpoint가 명시되어 있으면 그 파일을 우선 사용한다.
134
+ if final_checkpoint:
135
+ return WORKSPACE_ROOT / checkpoint_config["save_dir"] / final_checkpoint
136
+
137
+ # 명시 경로가 없으면 학습 코드의 encoder-decoder_version_best.pt 규칙으로 fallback한다.
138
+ encoder_name = params["captioning"]["encoder"]
139
+ decoder_name = params["captioning"]["decoder"]
140
+ version = params["captioning"]["version"]
141
+ return (
142
+ WORKSPACE_ROOT
143
+ / checkpoint_config["save_dir"]
144
+ / f"{encoder_name}-{decoder_name}_{version}_best.pt"
145
+ )
146
+
147
+
148
+ def get_captioning_runtime():
149
+ """캡셔닝 런타임을 최초 요청 시 한 번만 만들고 이후에는 캐시된 상태를 재사용한다."""
150
+ global CAPTIONING_STATE
151
+
152
+ # 캡셔닝 탭을 실제로 실행하기 전까지 encoder/decoder 로딩을 미룬다.
153
+ if CAPTIONING_STATE is None:
154
+ params = load_params()
155
+ CAPTIONING_STATE = build_caption_runtime(
156
+ WORKSPACE_ROOT,
157
+ checkpoint_path=get_caption_checkpoint_path(params),
158
+ )
159
+ return CAPTIONING_STATE
160
+
161
+
162
+ def make_gradcam_overlay(model, image, tensor, device):
163
+ """분류 모델의 마지막 Swin block을 대상으로 Grad-CAM overlay 이미지를 생성한다."""
164
+ # Grad-CAM은 gradient가 필요하므로 frozen backbone/classifier도 일시적으로 gradient를 켠다.
165
+ for param in model.backbone.parameters():
166
+ param.requires_grad = True
167
+
168
+ for param in model.classifier.parameters():
169
+ param.requires_grad = True
170
+
171
+ gradcam_model = SwinClassifierWrapper(model).to(device)
172
+ gradcam_model.eval()
173
+
174
+ resized_image = image.resize((224, 224))
175
+ image_np = np.array(resized_image).astype(np.float32) / 255.0
176
+ target_layer = model.backbone.features[-1][-1].norm2
177
+
178
+ with GradCAM(
179
+ model=gradcam_model,
180
+ target_layers=[target_layer],
181
+ reshape_transform=reshape_transform,
182
+ ) as cam:
183
+ grayscale_cam = cam(input_tensor=tensor)[0]
184
+
185
+ overlay = show_cam_on_image(
186
+ image_np,
187
+ grayscale_cam,
188
+ use_rgb=True,
189
+ )
190
+
191
+ return Image.fromarray(overlay)
192
+
193
+
194
+ def predict_classification(image, show_gradcam):
195
+ """업로드된 이미지를 분류하고, 선택 시 Grad-CAM 결과까지 함께 반환한다."""
196
+ # 이미지가 없으면 Gradio 출력 개수에 맞춰 빈 결과를 반환한다.
197
+ if image is None:
198
+ return None, "Please upload an image.", {}, []
199
+
200
+ runtime = get_classification_runtime()
201
+ params = runtime["params"]
202
+ model = runtime["model"]
203
+ device = runtime["device"]
204
+ class_names = runtime["class_names"]
205
+ transform = runtime["transform"]
206
+
207
+ image = image.convert("RGB")
208
+ tensor = transform(image).unsqueeze(0).to(device)
209
+
210
+ with torch.no_grad():
211
+ logits = model(tensor)
212
+ probs = torch.softmax(logits, dim=1)[0]
213
+
214
+ top_k = max(
215
+ 1,
216
+ min(
217
+ int(params["demo"].get("top_k", 5)),
218
+ len(class_names),
219
+ ),
220
+ )
221
+ top_probs, top_indices = torch.topk(
222
+ probs,
223
+ k=top_k,
224
+ )
225
+
226
+ top_probs = top_probs.detach().cpu().tolist()
227
+ top_indices = top_indices.detach().cpu().tolist()
228
+
229
+ confidences = {
230
+ class_names[idx]: float(prob)
231
+ for idx, prob in zip(top_indices, top_probs)
232
+ }
233
+
234
+ predicted_idx = top_indices[0]
235
+ predicted_label = class_names[predicted_idx]
236
+ predicted_confidence = top_probs[0]
237
+ summary = (
238
+ f"Prediction: {predicted_label} "
239
+ f"({predicted_confidence * 100:.2f}%)"
240
+ )
241
+
242
+ table = [
243
+ [
244
+ rank,
245
+ class_names[idx],
246
+ f"{prob * 100:.2f}%",
247
+ ]
248
+ for rank, (idx, prob) in enumerate(
249
+ zip(top_indices, top_probs),
250
+ start=1,
251
+ )
252
+ ]
253
+
254
+ gradcam_image = None
255
+
256
+ # 사용자가 체크박스를 켠 경우에만 비용이 큰 Grad-CAM을 생성한다.
257
+ if show_gradcam:
258
+ gradcam_image = make_gradcam_overlay(
259
+ model,
260
+ image,
261
+ tensor,
262
+ device,
263
+ )
264
+
265
+ return gradcam_image, summary, confidences, table
266
+
267
+
268
+ def caption_token_labels(generated_tokens, runtime, caption):
269
+ """attention heatmap 제목으로 사용할 생성 토큰 라벨을 만든다."""
270
+ special_ids = {
271
+ runtime["w2i"].get("<pad>"),
272
+ runtime["w2i"].get("<sos>"),
273
+ runtime["w2i"].get("<eos>"),
274
+ }
275
+ labels = [
276
+ runtime["i2w"].get(token, "<unk>")
277
+ for token in generated_tokens
278
+ if token not in special_ids
279
+ ]
280
+
281
+ # 토큰 id 기반 라벨이 있으면 attention 길이와 맞기 쉬운 이 라벨을 사용한다.
282
+ if labels:
283
+ return labels
284
+
285
+ # 예외적으로 라벨이 비어 있으면 문장 문자열을 단어 단위로 나눠 fallback한다.
286
+ return caption.split()
287
+
288
+
289
+ @torch.no_grad()
290
+ def predict_captioning(image):
291
+ """업로드된 이미지에 대해 캡션을 생성하고 cross-attention heatmap들을 반환한다."""
292
+ # 이미지가 없으면 Gradio 출력 개수에 맞춰 빈 결과를 반환한다.
293
+ if image is None:
294
+ return "Please upload an image.", []
295
+
296
+ runtime = get_captioning_runtime()
297
+ params = runtime["params"]
298
+ image = image.convert("RGB")
299
+ image_tensor = runtime["transform"](image)
300
+ image_tensor = image_tensor.unsqueeze(0).to(runtime["device"])
301
+
302
+ features = runtime["encoder"](
303
+ image_tensor,
304
+ return_features=True,
305
+ )
306
+ start_token = torch.full(
307
+ (features.size(0),),
308
+ runtime["w2i"]["<sos>"],
309
+ dtype=torch.long,
310
+ device=runtime["device"],
311
+ )
312
+
313
+ beam_config = params["captioning"]["beam_search"]
314
+ use_beam_search = beam_config.get("use_beam_search", True)
315
+ beam_size = beam_config.get("beam_size", 3)
316
+
317
+ # params.yaml에서 beam search를 켠 경우 여러 후보를 탐색해 캡션을 생성한다.
318
+ if use_beam_search:
319
+ generated_tokens, _, enc_dec_atten = runtime["decoder"].generate_beam(
320
+ features,
321
+ start_token,
322
+ runtime["w2i"]["<eos>"],
323
+ beam_size,
324
+ )
325
+ else:
326
+ # beam search를 끈 경우 매 step에서 가장 확률 높은 토큰을 선택하는 greedy 생성을 사용한다.
327
+ generated_tokens, _, enc_dec_atten = runtime["decoder"].generate(
328
+ features,
329
+ start_token,
330
+ runtime["w2i"]["<eos>"],
331
+ )
332
+
333
+ caption = decode_tokens(
334
+ generated_tokens[0],
335
+ runtime["w2i"],
336
+ runtime["i2w"],
337
+ params["captioning"]["tokenizer"]["use_subword"],
338
+ sp_model_path=runtime["sp_model_path"],
339
+ )
340
+
341
+ caption_tokens = caption_token_labels(
342
+ generated_tokens[0],
343
+ runtime,
344
+ caption,
345
+ )
346
+
347
+ tmp_dir = tempfile.mkdtemp(prefix="combined_captioning_gradio_")
348
+ heatmap_images = []
349
+ n_layers = len(runtime["decoder"].layers)
350
+
351
+ # 각 decoder layer별 cross-attention heatmap 이미지를 만들어 Gallery에 표시한다.
352
+ for layer in range(1, n_layers + 1):
353
+ cross_atten_path = Path(tmp_dir) / f"cross_attention_layer_{layer}.jpg"
354
+ runtime["decoder"].show_cross_atten(
355
+ enc_dec_atten[0],
356
+ caption_tokens,
357
+ layer,
358
+ image_tensor.squeeze(0).detach().cpu(),
359
+ str(cross_atten_path),
360
+ )
361
+ heatmap_images.append(
362
+ (
363
+ str(cross_atten_path),
364
+ f"Layer {layer}",
365
+ )
366
+ )
367
+
368
+ return caption, heatmap_images
369
+
370
+
371
+ def create_demo():
372
+ """분류 탭과 캡셔닝 탭을 가진 하나의 Gradio Blocks 앱을 만든다."""
373
+ params = load_params()
374
+ top_k = max(1, int(params["demo"].get("top_k", 5)))
375
+ caption_checkpoint = get_caption_checkpoint_path(params)
376
+
377
+ with gr.Blocks(title="ImageNet Classification and Captioning Demo") as demo:
378
+ gr.Markdown("# ImageNet Classification and Captioning Demo")
379
+
380
+ with gr.Tabs():
381
+ with gr.Tab("Classification"):
382
+ gr.Markdown(
383
+ "Upload an image and classify it with the final checkpoint."
384
+ )
385
+ gr.Markdown(
386
+ f"checkpoint: {WORKSPACE_ROOT / params['classification']['final_checkpoint']}"
387
+ )
388
+
389
+ with gr.Row():
390
+ with gr.Column():
391
+ classification_image_input = gr.Image(
392
+ type="pil",
393
+ label="Input Image",
394
+ )
395
+ gradcam_checkbox = gr.Checkbox(
396
+ value=bool(params["demo"].get("show_gradcam", True)),
397
+ label="Show Grad-CAM",
398
+ )
399
+ classification_button = gr.Button(
400
+ "Predict",
401
+ variant="primary",
402
+ )
403
+
404
+ with gr.Column():
405
+ gradcam_output = gr.Image(
406
+ type="pil",
407
+ label="Grad-CAM",
408
+ )
409
+ classification_summary_output = gr.Textbox(
410
+ label="Prediction",
411
+ )
412
+ confidence_output = gr.Label(
413
+ label="Top Prediction Scores",
414
+ num_top_classes=top_k,
415
+ )
416
+ table_output = gr.Dataframe(
417
+ headers=["Rank", "Class", "Confidence"],
418
+ datatype=["number", "str", "str"],
419
+ label=f"Top-{top_k}",
420
+ interactive=False,
421
+ )
422
+
423
+ classification_button.click(
424
+ fn=predict_classification,
425
+ inputs=[
426
+ classification_image_input,
427
+ gradcam_checkbox,
428
+ ],
429
+ outputs=[
430
+ gradcam_output,
431
+ classification_summary_output,
432
+ confidence_output,
433
+ table_output,
434
+ ],
435
+ )
436
+
437
+ with gr.Tab("Captioning"):
438
+ gr.Markdown(
439
+ "Upload an image and generate a caption with cross-attention heatmaps."
440
+ )
441
+ gr.Markdown(f"checkpoint: {caption_checkpoint}")
442
+
443
+ with gr.Row():
444
+ with gr.Column():
445
+ captioning_image_input = gr.Image(
446
+ type="pil",
447
+ label="Input Image",
448
+ )
449
+ captioning_button = gr.Button(
450
+ "Generate Caption",
451
+ variant="primary",
452
+ )
453
+
454
+ with gr.Column():
455
+ caption_output = gr.Textbox(
456
+ label="Generated Caption",
457
+ lines=4,
458
+ )
459
+ cross_atten_output = gr.Gallery(
460
+ label="Cross Attention Heatmaps",
461
+ columns=2,
462
+ object_fit="contain",
463
+ height="auto",
464
+ )
465
+
466
+ captioning_button.click(
467
+ fn=predict_captioning,
468
+ inputs=[captioning_image_input],
469
+ outputs=[
470
+ caption_output,
471
+ cross_atten_output,
472
+ ],
473
+ )
474
+
475
+ return demo
476
+
477
+
478
+ if __name__ == "__main__":
479
+ params = load_params()
480
+
481
+ demo = create_demo()
482
+ demo.launch(
483
+ server_name=params["demo"]["host"],
484
+ server_port=params["demo"]["port"],
485
+ share=params["demo"]["share"],
486
+ )
outputs/captioning/swin-transformer_final_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9897bcdee87a9b20241c9c742c27feb0f1204cf2d65456f3d892300a23b59adc
3
+ size 468449515
outputs/classification/cls_swin-t_base_cls_raw-20260525-v2_lr-0005_bs-32_adamw_none_wdc-0.05_ls-0.0_best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec30dab609101cc6a58722968771d66a81d44cd3098e79975c4c4ab59141b1b2
3
+ size 112163475
params.yaml ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ project:
2
+ name: imagenet-project
3
+
4
+ data:
5
+ raw_dir: ./data/raw/
6
+ captions_file: ./data/captioning/annotations/train.json
7
+ dataset_version: cls_raw-20260525-v2
8
+ # dataset_version: raw-20260509-v1
9
+
10
+ split:
11
+ train_ratio: 0.7
12
+ val_ratio: 0.15
13
+ test_ratio: 0.15
14
+
15
+ train:
16
+ seed: 42
17
+ # repeated experiment
18
+ # seed: 7
19
+ # seed: 21
20
+ epochs: 20
21
+ batch_size: 32
22
+ num_workers: 4
23
+ device: cuda
24
+ optimizer: adam
25
+
26
+ preprocess:
27
+ image_size: 224
28
+ normalize: true
29
+
30
+ loss:
31
+ name: cross_entropy
32
+ ignore_index: pad_token
33
+
34
+ evaluate:
35
+ batch_size: 32
36
+ metrics:
37
+ - bleu
38
+ - rouge_l
39
+ - meteor
40
+
41
+ logging:
42
+ use_wandb: true
43
+ project_name: imagenet-project
44
+ log_interval: 10
45
+
46
+ outputs:
47
+ base_dir: outputs
48
+
49
+ demo:
50
+ host: 0.0.0.0
51
+ port: 7860
52
+ share: false
53
+ top_k: 5
54
+ show_gradcam: true
55
+ class_names : [airplane, apple, aster, banana, bicycle, bracelet, bulldog, bus, butterfly, car, carrot, cucumber, cup-cake, daisy, dandelion, dumpling, earrings, elephant, glasses, golden-retriever, hamburger, horse, iris, lavender, lily, marigold, motorcycle, necklace, orange, orchid, pants, pasta, penguin, persian-cat, pizza, rose, salad, sandwich, sheep, siamese-cat, sneakers, squirrel, steak, strawberry, sunflower, sushi, tomato, t-shirt, tulip, waffle]
56
+
57
+ cnn:
58
+ backbone: resnet18
59
+ pretrained: true
60
+ freeze: true
61
+ output_dim: 512
62
+ dropout: 0.3
63
+ pooling: avg
64
+
65
+ captioning:
66
+
67
+ # encoder: resnet18
68
+ encoder: swin
69
+ # encoder: vit
70
+ decoder: transformer
71
+ # decoder: lstm
72
+ # decoder: gru
73
+ version: final
74
+
75
+ epochs: 25
76
+ learning_rate: 0.0001
77
+ batch_size: 32
78
+ optimizer: adamw
79
+ max_caption_length: 30
80
+ train_num_caption: 2
81
+
82
+ debug: False
83
+
84
+ lstm:
85
+ embed_dim: 256
86
+ hidden_dim: 512
87
+ num_layers: 1
88
+
89
+ gru:
90
+ embed_dim: 256
91
+ hidden_dim: 512
92
+ num_layers: 1
93
+
94
+ transformer:
95
+ n_layers: 6
96
+ nhead: 8
97
+ d_model: 512
98
+ drop_p: 0.3
99
+ label_smoothing: 0
100
+ weight_decay: 0.001
101
+
102
+ data:
103
+ dataset_version: cap_raw-20260524-v1
104
+ train_img: ./data/captioning/raw/train/
105
+ train_caption: ./data/captioning/annotations/train.json
106
+ val_img: ./data/captioning/raw/val/
107
+ val_caption: ./data/captioning/annotations/val.json
108
+ test_img: ./data/captioning/raw/test/
109
+ test_caption: ./data/captioning/annotations/test.json
110
+
111
+ tokenizer:
112
+ min_freq: 3
113
+ max_vocab_size: 10000
114
+ sp_vocab_size: 2000
115
+ use_subword: False
116
+ sp_model_path: ./src/dataset/sub_tokenizer2000.model
117
+
118
+ checkpoint:
119
+ save_dir: ./outputs/captioning
120
+ final_checkpoint: swin-transformer_final_best.pt
121
+ resume: False
122
+
123
+ heatmap:
124
+ dec_atten_dir: /workspace/outputs/captioning/heatmap/
125
+ enc_dec_atten_dir: /workspace/outputs/captioning/heatmap/
126
+ layer: 6 # 몇번째 층
127
+ sample: [0, 410, 820, 1230, 1640] # caption & heatmap 몇번째 샘플(batch)
128
+
129
+ scheduler:
130
+ use_scheduler: False
131
+ warmup_step: 500
132
+ lr_scale: 0.5
133
+
134
+ beam_search:
135
+ use_beam_search: True
136
+ beam_size: 3
137
+
138
+
139
+
140
+ classification:
141
+
142
+ # model_name: resnet18
143
+ # model_name: efficientnet_b0
144
+ # model_name: convnext_tiny
145
+ # model_name: mobilenet_v3_small
146
+ # model_name: vit_b_16
147
+ model_name: swin_t
148
+ # model_name: deit_tiny_patch16_224
149
+
150
+ final_checkpoint: ./outputs/classification/cls_swin-t_base_cls_raw-20260525-v2_lr-0005_bs-32_adamw_none_wdc-0.05_ls-0.0_best.pth
151
+
152
+ epochs: 50
153
+
154
+ learning_rate:
155
+
156
+ # baseline
157
+ cnn: 0.001
158
+ transformer: 0.0005
159
+
160
+ # hyperparameter tuning
161
+ # cnn: 0.0005
162
+ # transformer: 0.0001
163
+
164
+ # optimizer: adam
165
+ # optimizer: sgd
166
+ optimizer: adamw
167
+
168
+ # default
169
+ # weight_decay: 0.01
170
+
171
+ # tuning
172
+ weight_decay: 0.05
173
+
174
+ scheduler:
175
+ use: false
176
+
177
+ # use: true
178
+ # name: cosineannealinglr
179
+
180
+ augmentation:
181
+
182
+ # baseline
183
+ use_aug: false
184
+ type: none
185
+
186
+ # mixup
187
+ # use_aug: true
188
+ # type: mixup
189
+
190
+ # cutmix
191
+ # use_aug: true
192
+ # type: cutmix
193
+
194
+ label_smoothing: 0.0
195
+
196
+ # label smoothing experiment
197
+ # label_smoothing: 0.05
198
+ # label_smoothing: 0.1
199
+
200
+
201
+ metrics:
202
+
203
+ train:
204
+ - loss
205
+ - accuracy
206
+
207
+ validation:
208
+ - loss
209
+ - accuracy
210
+ - macro_f1
211
+
212
+ final_test:
213
+ - accuracy
214
+ - macro_f1
215
+ - precision
216
+ - recall
217
+ - confusion_matrix
218
+
219
+ checkpoint:
220
+ save_dir: /workspace/outputs/classification
221
+
222
+
223
+ latent_space:
224
+ data_dir: /workspace/data/raw
225
+ checkpoint: /workspace/outputs/classification/cls_swin-t_base_cls_raw-20260525-v2_lr-0005_bs-32_adamw_none_wdc-0.05_ls-0.0_best.pth
226
+ output_dir: /workspace/outputs/latent_space
227
+ output_umap_npy: cls_swin-t_best_umap_2d_test_nb10_md05
228
+ output_umap_png: cls_swin-t_best_umap_plt_test_nb10_md05
229
+ output_meta_csv: cls_swin-t_best_metadata_test_nb10_md05
230
+ split: test
231
+ batch_size: 32
232
+ num_workers: 4
233
+ device: cuda
234
+ seed: 42
235
+ save_meta: true
236
+ use_wandb: true
237
+ wandb_name: latent_space_umap
238
+
239
+ umap:
240
+ n_neighbors: 10
241
+ min_dist: 0.5
242
+ metric: cosine
requirements.txt ADDED
@@ -0,0 +1,342 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==2.4.0
2
+ accelerate==0.27.2
3
+ aiobotocore==3.5.0
4
+ aiohappyeyeballs==2.6.1
5
+ aiohttp==3.13.5
6
+ aiohttp-retry==2.9.1
7
+ aioitertools==0.13.0
8
+ aiosignal==1.4.0
9
+ albumentations==1.4.7
10
+ amqp==5.3.1
11
+ annotated-doc==0.0.4
12
+ annotated-types==0.7.0
13
+ antlr4-python3-runtime==4.9.3
14
+ anyio==4.13.0
15
+ appdirs==1.4.4
16
+ archspec @ file:///croot/archspec_1697725767277/work
17
+ argon2-cffi==25.1.0
18
+ argon2-cffi-bindings==25.1.0
19
+ arrow==1.4.0
20
+ asttokens @ file:///opt/conda/conda-bld/asttokens_1646925590279/work
21
+ astunparse==1.6.3
22
+ async-lru==2.3.0
23
+ async-timeout==5.0.1
24
+ asyncssh==2.22.0
25
+ atpublic==7.0.0
26
+ attrs==26.1.0
27
+ babel==2.18.0
28
+ backoff==2.2.1
29
+ beautifulsoup4 @ file:///croot/beautifulsoup4-split_1681493039619/work
30
+ billiard==4.2.4
31
+ bleach==6.3.0
32
+ boltons @ file:///croot/boltons_1677628692245/work
33
+ boto3==1.42.91
34
+ botocore==1.42.91
35
+ Brotli @ file:///tmp/abs_ecyw11_7ze/croots/recipe/brotli-split_1659616059936/work
36
+ celery==5.6.3
37
+ certifi==2026.4.22
38
+ cffi==2.0.0
39
+ chardet @ file:///home/builder/ci_310/chardet_1640804867535/work
40
+ charset-normalizer==3.4.7
41
+ click==8.3.3
42
+ click-didyoumean==0.3.1
43
+ click-plugins==1.1.1.2
44
+ click-repl==0.3.0
45
+ colorama==0.4.6
46
+ comm==0.2.3
47
+ conda @ file:///croot/conda_1696257509808/work
48
+ conda-build @ file:///croot/conda-build_1708025865815/work
49
+ conda-content-trust @ file:///croot/conda-content-trust_1693490622020/work
50
+ conda-libmamba-solver @ file:///croot/conda-libmamba-solver_1691418897561/work/src
51
+ conda-package-handling @ file:///croot/conda-package-handling_1690999929514/work
52
+ conda_index @ file:///croot/conda-index_1706633791028/work
53
+ conda_package_streaming @ file:///croot/conda-package-streaming_1690987966409/work
54
+ configobj==5.0.9
55
+ contourpy==1.3.2
56
+ cryptography @ file:///croot/cryptography_1707523700518/work
57
+ cuda-bindings==12.9.4
58
+ cuda-pathfinder==1.5.4
59
+ cuda-toolkit==12.8.1
60
+ cycler==0.12.1
61
+ dacite==1.6.0
62
+ dagshub==0.7.0
63
+ dagshub-annotation-converter==0.2.0
64
+ dataclasses-json==0.6.7
65
+ datasets==2.18.0
66
+ debugpy==1.8.20
67
+ decorator @ file:///opt/conda/conda-bld/decorator_1643638310831/work
68
+ defusedxml==0.7.1
69
+ dictdiffer==0.9.0
70
+ dill==0.3.8
71
+ diskcache==5.6.3
72
+ distro @ file:///croot/distro_1701455004953/work
73
+ dnspython==2.6.1
74
+ docker-pycreds==0.4.0
75
+ dpath==2.2.0
76
+ dulwich==1.2.0
77
+ dvc==3.67.1
78
+ dvc-data==3.18.3
79
+ dvc-http==2.32.0
80
+ dvc-objects==5.2.0
81
+ dvc-render==1.0.2
82
+ dvc-s3==3.3.0
83
+ dvc-studio-client==0.22.0
84
+ dvc-task==0.40.2
85
+ entrypoints==0.4
86
+ exceptiongroup==1.3.1
87
+ executing @ file:///opt/conda/conda-bld/executing_1646925071911/work
88
+ expecttest==0.2.1
89
+ fastjsonschema==2.21.2
90
+ filelock==3.29.0
91
+ flatten-dict==0.4.2
92
+ flufl.lock==9.0.0
93
+ fonttools==4.62.1
94
+ fqdn==1.5.1
95
+ frozenlist==1.8.0
96
+ fsspec==2024.2.0
97
+ ftfy==6.3.1
98
+ funcy==2.0
99
+ gitdb==4.0.12
100
+ GitPython==3.1.49
101
+ gmpy2 @ file:///tmp/build/80754af9/gmpy2_1645455533097/work
102
+ gql==4.0.0
103
+ grandalf==0.8
104
+ graphql-core==3.2.8
105
+ gto==1.9.0
106
+ h11==0.16.0
107
+ hf-xet==1.4.3
108
+ httpcore==1.0.9
109
+ httpx==0.28.1
110
+ huggingface-hub==0.20.3
111
+ hydra-core==1.3.2
112
+ hypothesis==6.98.10
113
+ idna==3.13
114
+ ImageHash==4.3.1
115
+ ImageIO==2.37.3
116
+ ipykernel==7.2.0
117
+ ipython @ file:///croot/ipython_1704833016303/work
118
+ isoduration==20.11.0
119
+ iterative-telemetry==0.0.10
120
+ jedi @ file:///tmp/build/80754af9/jedi_1644315229345/work
121
+ Jinja2==3.1.6
122
+ jmespath==1.1.0
123
+ joblib==1.5.3
124
+ json5==0.14.0
125
+ jsonpatch @ file:///tmp/build/80754af9/jsonpatch_1615747632069/work
126
+ jsonpointer==2.1
127
+ jsonschema @ file:///croot/jsonschema_1699041609003/work
128
+ jsonschema-specifications @ file:///croot/jsonschema-specifications_1699032386549/work
129
+ jupyter-events==0.12.1
130
+ jupyter-lsp==2.3.1
131
+ jupyter_client==8.8.0
132
+ jupyter_core==5.9.1
133
+ jupyter_server==2.18.2
134
+ jupyter_server_terminals==0.5.4
135
+ jupyterlab==4.5.6
136
+ jupyterlab_pygments==0.3.0
137
+ jupyterlab_server==2.28.0
138
+ kiwisolver==1.5.0
139
+ kombu==5.6.2
140
+ lazy-loader==0.5
141
+ libarchive-c @ file:///tmp/build/80754af9/python-libarchive-c_1617780486945/work
142
+ libmambapy @ file:///croot/mamba-split_1698782620632/work/libmambapy
143
+ lightning-utilities==0.15.3
144
+ lxml==6.1.0
145
+ markdown-it-py==4.0.0
146
+ MarkupSafe==3.0.3
147
+ marshmallow==3.26.2
148
+ matplotlib==3.8.3
149
+ matplotlib-inline @ file:///opt/conda/conda-bld/matplotlib-inline_1662014470464/work
150
+ mdurl==0.1.2
151
+ menuinst @ file:///croot/menuinst_1706732933928/work
152
+ mistune==3.2.1
153
+ mkl-fft @ file:///croot/mkl_fft_1695058164594/work
154
+ mkl-random @ file:///croot/mkl_random_1695059800811/work
155
+ mkl-service==2.4.0
156
+ more-itertools @ file:///croot/more-itertools_1700662129964/work
157
+ mpmath==1.3.0
158
+ multidict==6.7.1
159
+ multiprocess==0.70.16
160
+ mypy_extensions==1.1.0
161
+ nbclient==0.10.4
162
+ nbconvert==7.17.1
163
+ nbformat==5.10.4
164
+ nest-asyncio==1.6.0
165
+ networkx==3.3
166
+ nltk==3.8.1
167
+ notebook_shim==0.2.4
168
+ numpy==1.26.4
169
+ nvidia-cublas==13.1.0.3
170
+ nvidia-cublas-cu12==12.8.4.1
171
+ nvidia-cuda-cupti==13.0.85
172
+ nvidia-cuda-cupti-cu12==12.8.90
173
+ nvidia-cuda-nvrtc==13.0.88
174
+ nvidia-cuda-nvrtc-cu12==12.8.93
175
+ nvidia-cuda-runtime==13.0.96
176
+ nvidia-cuda-runtime-cu12==12.8.90
177
+ nvidia-cudnn-cu12==9.19.0.56
178
+ nvidia-cudnn-cu13==9.19.0.56
179
+ nvidia-cufft==12.0.0.61
180
+ nvidia-cufft-cu12==11.3.3.83
181
+ nvidia-cufile==1.15.1.6
182
+ nvidia-cufile-cu12==1.13.1.3
183
+ nvidia-curand==10.4.0.35
184
+ nvidia-curand-cu12==10.3.9.90
185
+ nvidia-cusolver==12.0.4.66
186
+ nvidia-cusolver-cu12==11.7.3.90
187
+ nvidia-cusparse==12.6.3.3
188
+ nvidia-cusparse-cu12==12.5.8.93
189
+ nvidia-cusparselt-cu12==0.7.1
190
+ nvidia-cusparselt-cu13==0.8.0
191
+ nvidia-nccl-cu12==2.28.9
192
+ nvidia-nccl-cu13==2.28.9
193
+ nvidia-nvjitlink==13.0.88
194
+ nvidia-nvjitlink-cu12==12.8.93
195
+ nvidia-nvshmem-cu12==3.4.5
196
+ nvidia-nvshmem-cu13==3.4.5
197
+ nvidia-nvtx==13.0.85
198
+ nvidia-nvtx-cu12==12.8.90
199
+ omegaconf==2.3.0
200
+ open-clip-torch==2.24.0
201
+ opencv-python-headless==4.9.0.80
202
+ optree==0.10.0
203
+ orjson==3.11.8
204
+ overrides==7.7.0
205
+ packaging==26.2
206
+ pandas==2.1.4
207
+ pandocfilters==1.5.1
208
+ parso @ file:///opt/conda/conda-bld/parso_1641458642106/work
209
+ pathspec==1.1.1
210
+ pathvalidate==3.3.1
211
+ pexpect @ file:///tmp/build/80754af9/pexpect_1605563209008/work
212
+ pillow==10.3.0
213
+ pkginfo @ file:///croot/pkginfo_1679431160147/work
214
+ platformdirs @ file:///croot/platformdirs_1692205439124/work
215
+ pluggy @ file:///tmp/build/80754af9/pluggy_1648024709248/work
216
+ portalocker==3.2.0
217
+ prometheus_client==0.25.0
218
+ prompt-toolkit @ file:///croot/prompt-toolkit_1704404351921/work
219
+ propcache==0.4.1
220
+ protobuf==4.25.9
221
+ psutil==7.2.2
222
+ ptyprocess @ file:///tmp/build/80754af9/ptyprocess_1609355006118/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl
223
+ pure-eval @ file:///opt/conda/conda-bld/pure_eval_1646925070566/work
224
+ pyarrow==24.0.0
225
+ pyarrow-hotfix==0.7
226
+ pycocoevalcap==1.2
227
+ pycocotools==2.0.11
228
+ pycosat @ file:///croot/pycosat_1696536503704/work
229
+ pycparser @ file:///tmp/build/80754af9/pycparser_1636541352034/work
230
+ pydantic==2.9.2
231
+ pydantic-settings==2.14.0
232
+ pydantic_core==2.23.4
233
+ pydot==4.0.1
234
+ pygit2==1.18.2
235
+ Pygments @ file:///croot/pygments_1684279966437/work
236
+ pygtrie==2.5.0
237
+ pyOpenSSL @ file:///croot/pyopenssl_1708380408460/work
238
+ pyparsing==3.3.2
239
+ PySocks @ file:///home/builder/ci_310/pysocks_1640793678128/work
240
+ python-dateutil==2.9.0.post0
241
+ python-dotenv==1.2.2
242
+ python-etcd==0.4.5
243
+ python-json-logger==4.1.0
244
+ pytz==2026.1.post1
245
+ PyWavelets==1.8.0
246
+ PyYAML==6.0.1
247
+ pyzmq==27.1.0
248
+ referencing @ file:///croot/referencing_1699012038513/work
249
+ regex==2026.4.4
250
+ requests==2.32.3
251
+ requests-toolbelt==1.0.0
252
+ rfc3339-validator==0.1.4
253
+ rfc3986-validator==0.1.1
254
+ rich==15.0.0
255
+ rouge_score==0.1.2
256
+ rpds-py @ file:///croot/rpds-py_1698945930462/work
257
+ ruamel.yaml @ file:///croot/ruamel.yaml_1666304550667/work
258
+ ruamel.yaml.clib @ file:///croot/ruamel.yaml.clib_1666302247304/work
259
+ s3fs==2026.3.0
260
+ s3transfer==0.16.1
261
+ sacrebleu==2.4.0
262
+ safetensors==0.4.2
263
+ scikit-image==0.25.2
264
+ scikit-learn==1.7.2
265
+ scipy==1.15.3
266
+ scmrepo==3.6.2
267
+ semver==3.0.4
268
+ Send2Trash==2.1.0
269
+ sentence-transformers==2.7.0
270
+ sentencepiece==0.2.0
271
+ sentry-sdk==2.58.0
272
+ setproctitle==1.3.7
273
+ shellingham==1.5.4
274
+ shortuuid==1.0.13
275
+ shtab==1.8.0
276
+ six==1.17.0
277
+ smmap==5.0.3
278
+ sortedcontainers==2.4.0
279
+ soupsieve @ file:///croot/soupsieve_1696347547217/work
280
+ sqltrie==0.11.2
281
+ stack-data @ file:///opt/conda/conda-bld/stack_data_1646927590127/work
282
+ sympy==1.14.0
283
+ tabulate==0.10.0
284
+ tenacity==9.1.4
285
+ terminado==0.18.1
286
+ threadpoolctl==3.6.0
287
+ tifffile==2025.5.10
288
+ timm==0.9.12
289
+ tinycss2==1.4.0
290
+ tokenizers==0.15.2
291
+ tomli @ file:///opt/conda/conda-bld/tomli_1657175507142/work
292
+ tomlkit==0.14.0
293
+ toolz @ file:///croot/toolz_1667464077321/work
294
+ torch==2.11.0+cu128
295
+ torchaudio==2.11.0+cu128
296
+ torchelastic==0.2.2
297
+ torchmetrics==1.9.0
298
+ torchvision==0.26.0+cu128
299
+ tornado==6.5.5
300
+ tqdm==4.66.2
301
+ traitlets @ file:///croot/traitlets_1671143879854/work
302
+ transformers==4.38.2
303
+ treelib==1.8.0
304
+ triton==3.6.0
305
+ truststore @ file:///croot/truststore_1695244293384/work
306
+ typer==0.25.0
307
+ types-dataclasses==0.6.6
308
+ typing-inspect==0.9.0
309
+ typing-inspection==0.4.2
310
+ typing_extensions==4.10.0
311
+ tzdata==2026.2
312
+ tzlocal==5.3.1
313
+ uri-template==1.3.0
314
+ urllib3==2.6.3
315
+ vine==5.1.0
316
+ voluptuous==0.16.0
317
+ wandb==0.26.1
318
+ wcwidth==0.7.0
319
+ webcolors==25.10.0
320
+ webencodings==0.5.1
321
+ websocket-client==1.9.0
322
+ wrapt==2.1.2
323
+ xxhash==3.7.0
324
+ yarl==1.23.0
325
+ zc.lockfile==4.0
326
+ zstandard @ file:///croot/zstandard_1677013143055/work
327
+
328
+ # 5/25 추가필요
329
+ notebook==7.5.0
330
+ einops==0.8.2
331
+
332
+
333
+ # 5/30 추가
334
+ umap-learn==0.5.12
335
+
336
+ # 5/31 가인 (추가 필요)
337
+ grad-cam==1.5.5
338
+ opencv-python==4.11.0.86
339
+ ttach==0.0.3
340
+ gradio==4.44.1
341
+ fastapi==0.112.4
342
+ starlette==0.38.6
src/caption/check_clip_score.py ADDED
@@ -0,0 +1,440 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from pathlib import Path
3
+ from typing import Any
4
+
5
+ import numpy as np
6
+ import torch
7
+ import torch.nn.functional as F
8
+ from PIL import Image
9
+ from tqdm import tqdm
10
+ from transformers import CLIPModel, CLIPProcessor
11
+ import os
12
+ from dotenv import load_dotenv
13
+
14
+ # ============================================================
15
+ # 설정값
16
+ # ============================================================
17
+
18
+ load_dotenv()
19
+
20
+ # .env 안의 HF_TOKEN 읽기
21
+ hf_token = os.getenv("HF_TOKEN")
22
+
23
+ # 전체 클래스를 검수하려면 True
24
+ # 특정 클래스만 검수하려면 False
25
+ CHECK_ALL_CLASSES = True
26
+
27
+ # 전체 클래스 검수 시 기준이 되는 raw 데이터 루트
28
+ DATA_RAW_ROOT_DIR = Path("data/raw")
29
+
30
+ # 특정 클래스만 검수할 때 사용할 클래스 폴더 경로
31
+ # CHECK_ALL_CLASSES = False 일 때만 사용됨
32
+ TARGET_CLASS_DIR = Path("data/raw")
33
+
34
+ # 입력 JSON 파일
35
+ INPUT_JSON_PATH = Path("data/annotations/captions_flo_all.json")
36
+
37
+ # 출력 JSON 파일
38
+ OUTPUT_JSON_PATH = Path("data/annotations/clip_checked_flo_all.json")
39
+
40
+ # 사용할 CLIP 모델
41
+ MODEL_NAME = "openai/clip-vit-base-patch32"
42
+
43
+ # 한 번에 처리할 이미지-캡션 쌍 개수
44
+ BATCH_SIZE = 32
45
+
46
+ # 하위 몇 %를 fail / review로 볼지
47
+ FAIL_BOTTOM_PERCENT = 10
48
+ REVIEW_BOTTOM_PERCENT = 20
49
+
50
+ print("경로 : " , INPUT_JSON_PATH)
51
+
52
+ # ============================================================
53
+ # JSON 입출력
54
+ # ============================================================
55
+
56
+ def load_json(path: Path) -> list[dict[str, Any]]:
57
+ with path.open("r", encoding="utf-8") as f:
58
+ data = json.load(f)
59
+
60
+ if not isinstance(data, list):
61
+ raise ValueError("입력 JSON은 반드시 배열 형태여야 합니다.")
62
+
63
+ return data
64
+
65
+
66
+ def save_json(data: list[dict[str, Any]], path: Path) -> None:
67
+ path.parent.mkdir(parents=True, exist_ok=True)
68
+
69
+ with path.open("w", encoding="utf-8") as f:
70
+ json.dump(data, f, ensure_ascii=False, indent=4)
71
+
72
+
73
+ # ============================================================
74
+ # 클래스 / 경로 처리
75
+ # ============================================================
76
+
77
+ def get_target_class_name() -> str:
78
+ """
79
+ TARGET_CLASS_DIR = data/raw/airplane 이면 airplane 반환
80
+ """
81
+ return TARGET_CLASS_DIR.name
82
+
83
+
84
+ def get_class_name_from_image_value(image_value: str) -> str:
85
+ """
86
+ JSON의 image 값이 airplane/hf_airplane_001.jpg 라면 airplane 반환
87
+ """
88
+ image_value = image_value.replace("\\", "/")
89
+ image_path = Path(image_value)
90
+
91
+ if len(image_path.parts) < 2:
92
+ return ""
93
+
94
+ return image_path.parts[0]
95
+
96
+
97
+ def is_target_item(item: dict[str, Any]) -> bool:
98
+ """
99
+ CHECK_ALL_CLASSES = True:
100
+ 모든 item 처리
101
+
102
+ CHECK_ALL_CLASSES = False:
103
+ TARGET_CLASS_DIR.name과 JSON image의 첫 번째 폴더명이 같은 item만 처리
104
+ """
105
+ if CHECK_ALL_CLASSES:
106
+ return True
107
+
108
+ image_value = str(item.get("image", ""))
109
+ image_class_name = get_class_name_from_image_value(image_value)
110
+
111
+ return image_class_name == get_target_class_name()
112
+
113
+
114
+ def resolve_image_path(image_value: str) -> Path:
115
+ """
116
+ JSON:
117
+ "image": "airplane/hf_airplane_001.jpg"
118
+
119
+ 전체 클래스 검수:
120
+ DATA_RAW_ROOT_DIR / image
121
+ → data/raw/airplane/hf_airplane_001.jpg
122
+
123
+ 특정 클래스 검수:
124
+ TARGET_CLASS_DIR / 파일명
125
+ → data/raw/airplane/hf_airplane_001.jpg
126
+ """
127
+ image_value = image_value.replace("\\", "/")
128
+ image_path = Path(image_value)
129
+
130
+ if CHECK_ALL_CLASSES:
131
+ return DATA_RAW_ROOT_DIR / image_path
132
+
133
+ return TARGET_CLASS_DIR / image_path.name
134
+
135
+
136
+ def load_image(image_path: Path) -> Image.Image | None:
137
+ try:
138
+ with Image.open(image_path) as img:
139
+ return img.convert("RGB").copy()
140
+ except Exception:
141
+ return None
142
+
143
+
144
+ # ============================================================
145
+ # 캡션 펼치기
146
+ # ============================================================
147
+
148
+ def flatten_caption_items(data: list[dict[str, Any]]) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
149
+ """
150
+ 이미지 1장에 caption 3개가 있으면
151
+ 이미지-캡션 쌍 3개로 펼친다.
152
+ """
153
+ target_data = []
154
+ flat_items = []
155
+
156
+ for item in data:
157
+ if not is_target_item(item):
158
+ continue
159
+
160
+ target_item_index = len(target_data)
161
+ target_data.append(item)
162
+
163
+ image_value = str(item.get("image", ""))
164
+ captions = item.get("captions", [])
165
+
166
+ if not isinstance(captions, list):
167
+ captions = []
168
+
169
+ for caption_index, caption in enumerate(captions):
170
+ flat_items.append({
171
+ "item_index": target_item_index,
172
+ "caption_index": caption_index,
173
+ "image": image_value,
174
+ "class": item.get("class", ""),
175
+ "split": item.get("split", ""),
176
+ "caption": str(caption).strip()
177
+ })
178
+
179
+ return target_data, flat_items
180
+
181
+
182
+ # ============================================================
183
+ # CLIP Score 계산
184
+ # ============================================================
185
+
186
+ @torch.no_grad()
187
+ def compute_clip_scores(
188
+ flat_items: list[dict[str, Any]],
189
+ model: CLIPModel,
190
+ processor: CLIPProcessor,
191
+ device: torch.device
192
+ ) -> list[dict[str, Any]]:
193
+
194
+ results = []
195
+
196
+ for start in tqdm(range(0, len(flat_items), BATCH_SIZE), desc="computing CLIP scores"):
197
+ batch_items = flat_items[start:start + BATCH_SIZE]
198
+
199
+ valid_items = []
200
+ images = []
201
+ texts = []
202
+
203
+ for item in batch_items:
204
+ image_path = resolve_image_path(item["image"])
205
+ image = load_image(image_path)
206
+
207
+ if image is None:
208
+ results.append({
209
+ **item,
210
+ "resolved_image_path": str(image_path).replace("\\", "/"),
211
+ "clip_cosine": None,
212
+ "clip_score": None,
213
+ "clip_status": "missing_image",
214
+ "clip_reason": f"image file could not be opened: {image_path}"
215
+ })
216
+ continue
217
+
218
+ caption = item["caption"]
219
+
220
+ if not caption:
221
+ results.append({
222
+ **item,
223
+ "resolved_image_path": str(image_path).replace("\\", "/"),
224
+ "clip_cosine": None,
225
+ "clip_score": None,
226
+ "clip_status": "empty_caption",
227
+ "clip_reason": "caption is empty"
228
+ })
229
+ continue
230
+
231
+ valid_items.append({
232
+ **item,
233
+ "resolved_image_path": str(image_path).replace("\\", "/")
234
+ })
235
+ images.append(image)
236
+ texts.append(caption)
237
+
238
+ if not valid_items:
239
+ continue
240
+
241
+ inputs = processor(
242
+ text=texts,
243
+ images=images,
244
+ return_tensors="pt",
245
+ padding=True,
246
+ truncation=True
247
+ )
248
+
249
+ inputs = {
250
+ key: value.to(device)
251
+ for key, value in inputs.items()
252
+ }
253
+
254
+ outputs = model(
255
+ input_ids=inputs["input_ids"],
256
+ attention_mask=inputs["attention_mask"],
257
+ pixel_values=inputs["pixel_values"]
258
+ )
259
+
260
+ image_features = outputs.image_embeds
261
+ text_features = outputs.text_embeds
262
+
263
+ image_features = F.normalize(image_features, p=2, dim=1)
264
+ text_features = F.normalize(text_features, p=2, dim=1)
265
+
266
+ cosine_scores = (image_features * text_features).sum(dim=1)
267
+
268
+ for item, cosine in zip(valid_items, cosine_scores):
269
+ cosine_value = float(cosine.detach().cpu().item())
270
+ clip_score = 2.5 * max(cosine_value, 0.0)
271
+
272
+ results.append({
273
+ **item,
274
+ "clip_cosine": round(cosine_value, 6),
275
+ "clip_score": round(clip_score, 6),
276
+ "clip_status": "pending",
277
+ "clip_reason": ""
278
+ })
279
+
280
+ return results
281
+
282
+
283
+ # ============================================================
284
+ # pass / review / fail 판정
285
+ # ============================================================
286
+
287
+ def assign_clip_status(results: list[dict[str, Any]]) -> None:
288
+ valid_scores = [
289
+ result["clip_score"]
290
+ for result in results
291
+ if isinstance(result.get("clip_score"), float)
292
+ ]
293
+
294
+ if not valid_scores:
295
+ return
296
+
297
+ fail_threshold = np.percentile(valid_scores, FAIL_BOTTOM_PERCENT)
298
+ review_threshold = np.percentile(valid_scores, REVIEW_BOTTOM_PERCENT)
299
+
300
+ for result in results:
301
+ clip_score = result.get("clip_score")
302
+
303
+ if clip_score is None:
304
+ continue
305
+
306
+ if clip_score <= fail_threshold:
307
+ result["clip_status"] = "fail"
308
+ result["clip_reason"] = f"clip score is in the bottom {FAIL_BOTTOM_PERCENT}%"
309
+ elif clip_score <= review_threshold:
310
+ result["clip_status"] = "review"
311
+ result["clip_reason"] = f"clip score is in the bottom {REVIEW_BOTTOM_PERCENT}%"
312
+ else:
313
+ result["clip_status"] = "pass"
314
+ result["clip_reason"] = "clip score is acceptable"
315
+
316
+
317
+ # ============================================================
318
+ # 결과를 원래 JSON 구조에 붙이기
319
+ # ============================================================
320
+
321
+ def attach_results_to_data(
322
+ target_data: list[dict[str, Any]],
323
+ results: list[dict[str, Any]]
324
+ ) -> list[dict[str, Any]]:
325
+
326
+ for item in target_data:
327
+ item["caption_checks"] = []
328
+
329
+ results = sorted(
330
+ results,
331
+ key=lambda x: (x["item_index"], x["caption_index"])
332
+ )
333
+
334
+ for result in results:
335
+ item_index = result["item_index"]
336
+
337
+ check = {
338
+ "caption_index": result["caption_index"],
339
+ "caption": result["caption"],
340
+ "resolved_image_path": result.get("resolved_image_path"),
341
+ "clip_cosine": result.get("clip_cosine"),
342
+ "clip_score": result.get("clip_score"),
343
+ "clip_status": result.get("clip_status"),
344
+ "clip_reason": result.get("clip_reason", "")
345
+ }
346
+
347
+ target_data[item_index]["caption_checks"].append(check)
348
+
349
+ return target_data
350
+
351
+
352
+ # ============================================================
353
+ # 요약 출력
354
+ # ============================================================
355
+
356
+ def print_summary(
357
+ target_data: list[dict[str, Any]],
358
+ flat_items: list[dict[str, Any]],
359
+ results: list[dict[str, Any]]
360
+ ) -> None:
361
+
362
+ status_count = {}
363
+ valid_scores = []
364
+
365
+ for result in results:
366
+ status = result.get("clip_status", "unknown")
367
+ status_count[status] = status_count.get(status, 0) + 1
368
+
369
+ if isinstance(result.get("clip_score"), float):
370
+ valid_scores.append(result["clip_score"])
371
+
372
+ print("\n===== CLIP Score Summary =====")
373
+ print(f"check all classes: {CHECK_ALL_CLASSES}")
374
+
375
+ if CHECK_ALL_CLASSES:
376
+ print(f"data raw root dir: {DATA_RAW_ROOT_DIR}")
377
+ else:
378
+ print(f"target class dir: {TARGET_CLASS_DIR}")
379
+ print(f"target class name: {get_target_class_name()}")
380
+
381
+ print(f"target images: {len(target_data)}")
382
+ print(f"target image-caption pairs: {len(flat_items)}")
383
+ print(f"status count: {status_count}")
384
+
385
+ if valid_scores:
386
+ print(f"min score: {min(valid_scores):.4f}")
387
+ print(f"max score: {max(valid_scores):.4f}")
388
+ print(f"mean score: {np.mean(valid_scores):.4f}")
389
+ print(f"bottom {FAIL_BOTTOM_PERCENT}% threshold: {np.percentile(valid_scores, FAIL_BOTTOM_PERCENT):.4f}")
390
+ print(f"bottom {REVIEW_BOTTOM_PERCENT}% threshold: {np.percentile(valid_scores, REVIEW_BOTTOM_PERCENT):.4f}")
391
+
392
+
393
+ # ============================================================
394
+ # 실행
395
+ # ============================================================
396
+
397
+ def main():
398
+ if not INPUT_JSON_PATH.exists():
399
+ raise FileNotFoundError(f"input file not found: {INPUT_JSON_PATH}")
400
+
401
+ if CHECK_ALL_CLASSES:
402
+ if not DATA_RAW_ROOT_DIR.exists():
403
+ raise FileNotFoundError(f"data raw root directory not found: {DATA_RAW_ROOT_DIR}")
404
+ else:
405
+ if not TARGET_CLASS_DIR.exists():
406
+ raise FileNotFoundError(f"target class directory not found: {TARGET_CLASS_DIR}")
407
+
408
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
409
+ print(f"device: {device}")
410
+
411
+ print(f"loading model: {MODEL_NAME}")
412
+ model = CLIPModel.from_pretrained(MODEL_NAME, token=hf_token).to(device)
413
+ processor = CLIPProcessor.from_pretrained(MODEL_NAME, token=hf_token)
414
+ model.eval()
415
+
416
+ data = load_json(INPUT_JSON_PATH)
417
+ target_data, flat_items = flatten_caption_items(data)
418
+
419
+ if not target_data:
420
+ raise ValueError("검수 대상 데이터가 없습니다. CHECK_ALL_CLASSES 또는 TARGET_CLASS_DIR 설정을 확인하세요.")
421
+
422
+ results = compute_clip_scores(
423
+ flat_items=flat_items,
424
+ model=model,
425
+ processor=processor,
426
+ device=device
427
+ )
428
+
429
+ assign_clip_status(results)
430
+
431
+ checked_data = attach_results_to_data(target_data, results)
432
+
433
+ save_json(checked_data, OUTPUT_JSON_PATH)
434
+
435
+ print_summary(target_data, flat_items, results)
436
+ print(f"\nsaved: {OUTPUT_JSON_PATH}")
437
+
438
+
439
+ if __name__ == "__main__":
440
+ main()
src/caption/generate_captions_blip.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os # 파일/폴더 탐색
2
+ import json # JSON 저장
3
+ import random # 데이터 섞기
4
+ import torch # GPU 사용
5
+ import re # 정규식 (문장 필터링)
6
+ from collections import defaultdict # 클래스별 그룹화
7
+ from PIL import Image # 이미지 로드
8
+ from transformers import BlipProcessor, BlipForConditionalGeneration # BLIP
9
+ from sentence_transformers import SentenceTransformer, util # SBERT
10
+
11
+ # ----------------------
12
+ # 1. 설정
13
+ # ----------------------
14
+ ROOT_DIR = "data/raw" # 이미지 루트 폴더 (raw/클래스/이미지)
15
+ OUTPUT_JSON = "annotation.json" # 결과 JSON 파일 이름
16
+
17
+ TARGET_CAPTIONS = 3 # 이미지당 캡션 개수 (3 또는 5 추천)
18
+ SIM_THRESHOLD = 0.85 # 문장 유사도 기준 (높을수록 엄격)
19
+ MIN_WORDS = 3 # 최소 단어 수 (짧은 문장 제거)
20
+ MAX_ATTEMPTS = 10 # 캡션 생성 최대 반복 횟수
21
+
22
+ TRAIN_RATIO = 0.7 # train 비율
23
+ VAL_RATIO = 0.15 # val 비율
24
+ TEST_RATIO = 0.15 # test 비율
25
+
26
+ device = "cuda" if torch.cuda.is_available() else "cpu" # GPU 사용 여부
27
+ print("device : ", device)
28
+ # ----------------------
29
+ # 2. 모델 로드
30
+ # ----------------------
31
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
32
+ # 이미지 → 토큰 변환
33
+
34
+ blip_model = BlipForConditionalGeneration.from_pretrained(
35
+ "Salesforce/blip-image-captioning-base"
36
+ ).to(device)
37
+ # 캡션 생성 모델
38
+
39
+ embedder = SentenceTransformer("all-MiniLM-L6-v2", device=device)
40
+ # 문장 → 벡터 (유사도 계산용)
41
+
42
+ # ----------------------
43
+ # 3. 캡션 생성 함수
44
+ # ----------------------
45
+ def generate_captions(image, n):
46
+ inputs = processor(images=image, return_tensors="pt").to(device) # 이미지 전처리
47
+
48
+ outputs = blip_model.generate(
49
+ **inputs,
50
+ do_sample=True, # 다양성 확보 (샘플링)
51
+ top_k=50,
52
+ top_p=0.95,
53
+ temperature=0.9,
54
+ num_return_sequences=n, # n개 생성
55
+ max_length=30
56
+ )
57
+
58
+ # 토큰 → 문자열 변환
59
+ return [
60
+ processor.decode(o, skip_special_tokens=True).strip().lower()
61
+ for o in outputs
62
+ ]
63
+
64
+ # ----------------------
65
+ # 4. 기본 품질 필터
66
+ # ----------------------
67
+ def basic_filter(captions):
68
+ filtered = []
69
+
70
+ for c in captions:
71
+ words = c.split()
72
+
73
+ if len(words) < MIN_WORDS: # 너무 짧은 문장 제거
74
+ continue
75
+
76
+ if len(set(words)) < len(words) * 0.6: # 반복 단어 많은 문장 제거
77
+ continue
78
+
79
+ if re.search(r"[^a-z0-9\s]", c): # 이상한 문자 제거
80
+ continue
81
+
82
+ filtered.append(c)
83
+
84
+ return filtered
85
+
86
+ # ----------------------
87
+ # 5. 키워드 추출
88
+ # ----------------------
89
+ def extract_keywords(caption):
90
+ stopwords = {"a","the","on","in","at","with","and","of","to","is","are"} # 불용어
91
+ return set([w for w in caption.split() if w not in stopwords]) # 핵심 단어만 추출
92
+
93
+ # ----------------------
94
+ # 6. 유사도 + 키워드 필터
95
+ # ----------------------
96
+ def advanced_filter(captions):
97
+ if not captions:
98
+ return []
99
+
100
+ embeddings = embedder.encode(captions, convert_to_tensor=True) # 문장 → 벡터
101
+
102
+ selected = []
103
+ selected_idx = []
104
+
105
+ for i, cap in enumerate(captions):
106
+ keep = True
107
+ kw_i = extract_keywords(cap)
108
+
109
+ for j in selected_idx:
110
+ sim = util.cos_sim(embeddings[i], embeddings[j]).item() # cosine similarity
111
+
112
+ if sim > SIM_THRESHOLD: # 의미가 너무 비슷하면 제거
113
+ keep = False
114
+ break
115
+
116
+ kw_j = extract_keywords(captions[j])
117
+ overlap = len(kw_i & kw_j) / max(len(kw_i), 1)
118
+
119
+ if overlap > 0.7: # 키워드 많이 겹치면 제거
120
+ keep = False
121
+ break
122
+
123
+ if keep:
124
+ selected.append(cap)
125
+ selected_idx.append(i)
126
+
127
+ return selected
128
+
129
+ # ----------------------
130
+ # 7. 캡션 생성 루프
131
+ # ----------------------
132
+ def get_captions(image):
133
+ final_caps = []
134
+ attempts = 0
135
+
136
+ while len(final_caps) < TARGET_CAPTIONS and attempts < MAX_ATTEMPTS:
137
+ needed = TARGET_CAPTIONS - len(final_caps)
138
+
139
+ new_caps = generate_captions(image, needed * 3) # 부족분보다 넉넉히 생성
140
+ new_caps = basic_filter(new_caps) # 1차 필터
141
+
142
+ combined = list(set(final_caps + new_caps)) # 중복 제거
143
+ filtered = advanced_filter(combined) # 유사도 필터
144
+
145
+ final_caps = filtered[:TARGET_CAPTIONS] # 목표 개수 맞춤
146
+ attempts += 1
147
+
148
+ return final_caps
149
+
150
+ # ----------------------
151
+ # 8. 데이터 수집
152
+ # ----------------------
153
+ dataset = []
154
+
155
+ for class_name in os.listdir(ROOT_DIR): # 클래스 폴더 순회
156
+ class_path = os.path.join(ROOT_DIR, class_name)
157
+
158
+ if not os.path.isdir(class_path):
159
+ continue
160
+
161
+ for filename in os.listdir(class_path): # 이미지 순회
162
+ if not filename.lower().endswith((".jpg", ".jpeg", ".png")):
163
+ continue
164
+
165
+ path = os.path.join(class_path, filename)
166
+ image = Image.open(path).convert("RGB") # 이미지 로드
167
+
168
+ captions = get_captions(image) # 캡션 생성
169
+
170
+ dataset.append({
171
+ "image": f"{class_name}/{filename}", # 상대 경로 저장
172
+ "class": class_name, # 클래스 라벨
173
+ "captions": captions # 캡션 리스트
174
+ })
175
+
176
+ print(f"\n{class_name}/{filename}")
177
+ for i, c in enumerate(captions):
178
+ print(f"{i+1}. {c}")
179
+
180
+ # ----------------------
181
+ # 9. Stratified Split
182
+ # ----------------------
183
+ class_groups = defaultdict(list)
184
+
185
+ for item in dataset:
186
+ class_groups[item["class"]].append(item) # 클래스별 묶기
187
+
188
+ train_set, val_set, test_set = [], [], []
189
+
190
+ for class_name, items in class_groups.items():
191
+ random.shuffle(items) # 클래스 내부 shuffle
192
+
193
+ total = len(items)
194
+
195
+ train_end = max(1, int(total * TRAIN_RATIO)) # 최소 1개 보장
196
+ val_end = train_end + max(1, int(total * VAL_RATIO))
197
+
198
+ train_set += items[:train_end]
199
+ val_set += items[train_end:val_end]
200
+ test_set += items[val_end:]
201
+
202
+ # split 라벨 부여
203
+ for item in train_set:
204
+ item["split"] = "train"
205
+
206
+ for item in val_set:
207
+ item["split"] = "val"
208
+
209
+ for item in test_set:
210
+ item["split"] = "test"
211
+
212
+ dataset = train_set + val_set + test_set # 다시 하나로 합침
213
+
214
+ # ----------------------
215
+ # 10. JSON 저장
216
+ # ----------------------
217
+ with open(OUTPUT_JSON, "w", encoding="utf-8") as f:
218
+ json.dump(dataset, f, indent=4, ensure_ascii=False)
219
+
220
+ print(f"\n완료: {OUTPUT_JSON} 생성됨")
src/caption/generate_captions_florence2.py ADDED
@@ -0,0 +1,345 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import random
4
+ from pathlib import Path
5
+
6
+ import torch
7
+ from PIL import Image
8
+ from tqdm import tqdm
9
+ from dotenv import load_dotenv
10
+ from transformers import AutoProcessor, Florence2ForConditionalGeneration
11
+
12
+
13
+ # =========================================================
14
+ # 1. 설정값
15
+ # =========================================================
16
+
17
+ # 전체 클래스 캡셔닝: "data/raw"
18
+ # 특정 클래스만 캡셔닝: "data/raw/apple"
19
+ INPUT_IMAGE_DIR = "data/raw"
20
+
21
+ # image 값을 "pizza/hf_pizza_001.jpg" 형태로 만들기 위한 기준 경로
22
+ DATA_RAW_ROOT = "data/raw"
23
+
24
+ # 결과 JSON 저장 경로
25
+ OUTPUT_JSON_PATH = "data/annotations/captions_flo_all.json"
26
+
27
+ # transformers 5.7.0에서는 florence-community 모델 사용 권장
28
+ # base-ft: 가볍고 다운스트림 task에 fine-tuning된 모델
29
+ # large-ft: 더 무겁지만 품질이 더 좋을 수 있음
30
+ MODEL_ID = "florence-community/Florence-2-base-ft"
31
+ # MODEL_ID = "florence-community/Florence-2-large-ft"
32
+
33
+ # .env 파일에서 읽을 Hugging Face 토큰 이름
34
+ # 공개 모델이면 없어도 동작할 수 있지만, 토큰을 넣어두는 편이 안정적입니다.
35
+ HF_TOKEN_ENV_NAME = "HF_TOKEN"
36
+
37
+ # split 비율: 기본 7 : 1.5 : 1.5
38
+ TRAIN_RATIO = 0.7
39
+ VAL_RATIO = 0.15
40
+ TEST_RATIO = 0.15
41
+
42
+ # split 재현을 위한 seed
43
+ RANDOM_SEED = 42
44
+
45
+ # 이미지당 캡션 3개 생성
46
+ # Florence-2 문서에서 지원하는 caption task입니다.
47
+ CAPTION_TASKS = [
48
+ "<CAPTION>",
49
+ "<DETAILED_CAPTION>",
50
+ "<MORE_DETAILED_CAPTION>",
51
+ ]
52
+
53
+ # 생성 옵션
54
+ NUM_BEAMS = 3
55
+ MAX_NEW_TOKENS = 64
56
+
57
+ # 몇 장마다 중간 저장할지
58
+ SAVE_EVERY = 220
59
+
60
+ # 이미 JSON에 있는 이미지는 건너뛸지 여부
61
+ SKIP_ALREADY_DONE = True
62
+
63
+ # 허용 이미지 확장자
64
+ IMAGE_EXTENSIONS = [".jpg", ".jpeg", ".png", ".webp", ".bmp"]
65
+
66
+
67
+ # =========================================================
68
+ # 2. 이미지 목록 가져오기
69
+ # =========================================================
70
+
71
+ def get_image_list():
72
+ input_dir = Path(INPUT_IMAGE_DIR).resolve()
73
+ raw_root = Path(DATA_RAW_ROOT).resolve()
74
+
75
+ if not input_dir.exists():
76
+ raise FileNotFoundError(f"입력 경로가 없습니다: {input_dir}")
77
+
78
+ image_list = []
79
+
80
+ for image_path in sorted(input_dir.rglob("*")):
81
+ if image_path.suffix.lower() not in IMAGE_EXTENSIONS:
82
+ continue
83
+
84
+ # 예:
85
+ # /workspace/data/raw/pizza/hf_pizza_001.jpg
86
+ # -> pizza/hf_pizza_001.jpg
87
+ relative_image_path = image_path.resolve().relative_to(raw_root).as_posix()
88
+
89
+ # 예:
90
+ # pizza/hf_pizza_001.jpg
91
+ # -> pizza
92
+ class_name = relative_image_path.split("/")[0]
93
+
94
+ image_list.append({
95
+ "path": image_path,
96
+ "image": relative_image_path,
97
+ "class": class_name,
98
+ })
99
+
100
+ return image_list
101
+
102
+
103
+ # =========================================================
104
+ # 3. train / val / test 나누기
105
+ # =========================================================
106
+
107
+ def add_split(image_list):
108
+ random.seed(RANDOM_SEED)
109
+
110
+ total_ratio = TRAIN_RATIO + VAL_RATIO + TEST_RATIO
111
+
112
+ result = []
113
+
114
+ # 클래스별로 이미지 모으기
115
+ class_map = {}
116
+
117
+ for item in image_list:
118
+ class_name = item["class"]
119
+
120
+ if class_name not in class_map:
121
+ class_map[class_name] = []
122
+
123
+ class_map[class_name].append(item)
124
+
125
+ # 클래스별로 train / val / test 나누기
126
+ for class_name, items in class_map.items():
127
+ random.shuffle(items)
128
+
129
+ total_count = len(items)
130
+
131
+ train_count = round(total_count * TRAIN_RATIO / total_ratio)
132
+ val_count = round(total_count * VAL_RATIO / total_ratio)
133
+
134
+ for index, item in enumerate(items):
135
+ if index < train_count:
136
+ split = "train"
137
+ elif index < train_count + val_count:
138
+ split = "val"
139
+ else:
140
+ split = "test"
141
+
142
+ item["split"] = split
143
+ result.append(item)
144
+
145
+ return result
146
+
147
+
148
+ # =========================================================
149
+ # 4. Florence-2 모델 준비
150
+ # =========================================================
151
+
152
+ def load_model():
153
+ load_dotenv()
154
+
155
+ hf_token = os.getenv(HF_TOKEN_ENV_NAME)
156
+
157
+ if torch.cuda.is_available():
158
+ device = "cuda"
159
+
160
+ # GPU가 bfloat16을 지원하면 bfloat16 사용
161
+ # 아니면 float16 사용
162
+ if torch.cuda.is_bf16_supported():
163
+ torch_dtype = torch.bfloat16
164
+ else:
165
+ torch_dtype = torch.float16
166
+ else:
167
+ device = "cpu"
168
+ torch_dtype = torch.float32
169
+
170
+ print(f"device: {device}")
171
+ print(f"dtype: {torch_dtype}")
172
+ print(f"model: {MODEL_ID}")
173
+
174
+ processor = AutoProcessor.from_pretrained(
175
+ MODEL_ID,
176
+ token=hf_token,
177
+ )
178
+
179
+ model = Florence2ForConditionalGeneration.from_pretrained(
180
+ MODEL_ID,
181
+ dtype=torch_dtype,
182
+ token=hf_token,
183
+ ).to(device)
184
+
185
+ model.eval()
186
+
187
+ return model, processor, device, torch_dtype
188
+
189
+
190
+ # =========================================================
191
+ # 5. 이미지 1장 캡셔닝
192
+ # =========================================================
193
+
194
+ def make_caption(image, task, model, processor, device, torch_dtype):
195
+ inputs = processor(
196
+ text=task,
197
+ images=image,
198
+ return_tensors="pt",
199
+ )
200
+
201
+ inputs = inputs.to(device, torch_dtype)
202
+
203
+ with torch.no_grad():
204
+ generated_ids = model.generate(
205
+ **inputs,
206
+ max_new_tokens=MAX_NEW_TOKENS,
207
+ num_beams=NUM_BEAMS,
208
+ do_sample=False,
209
+ )
210
+
211
+ generated_text = processor.batch_decode(
212
+ generated_ids,
213
+ skip_special_tokens=False,
214
+ )[0]
215
+
216
+ parsed_result = processor.post_process_generation(
217
+ generated_text,
218
+ task=task,
219
+ image_size=image.size,
220
+ )
221
+
222
+ caption = parsed_result.get(task, "")
223
+
224
+ if not isinstance(caption, str):
225
+ caption = str(caption)
226
+
227
+ return caption.strip()
228
+
229
+
230
+ def make_three_captions(image_path, model, processor, device, torch_dtype):
231
+ image = Image.open(image_path).convert("RGB")
232
+
233
+ captions = []
234
+
235
+ for task in CAPTION_TASKS:
236
+ caption = make_caption(
237
+ image=image,
238
+ task=task,
239
+ model=model,
240
+ processor=processor,
241
+ device=device,
242
+ torch_dtype=torch_dtype,
243
+ )
244
+
245
+ captions.append(caption)
246
+
247
+ return captions
248
+
249
+
250
+ # =========================================================
251
+ # 6. 기존 JSON 읽기 / 저장하기
252
+ # =========================================================
253
+
254
+ def load_existing_result():
255
+ output_path = Path(OUTPUT_JSON_PATH)
256
+
257
+ if not output_path.exists():
258
+ return {}
259
+
260
+ with output_path.open("r", encoding="utf-8") as f:
261
+ data = json.load(f)
262
+
263
+ result = {}
264
+
265
+ for item in data:
266
+ result[item["image"]] = item
267
+
268
+ return result
269
+
270
+
271
+ def save_result(result_map):
272
+ output_path = Path(OUTPUT_JSON_PATH)
273
+ output_path.parent.mkdir(parents=True, exist_ok=True)
274
+
275
+ result_list = list(result_map.values())
276
+ result_list.sort(key=lambda x: x["image"])
277
+
278
+ with output_path.open("w", encoding="utf-8") as f:
279
+ json.dump(result_list, f, ensure_ascii=False, indent=4)
280
+
281
+
282
+ # =========================================================
283
+ # 7. 실행
284
+ # =========================================================
285
+
286
+ def main():
287
+ print("이미지 목록을 읽는 중입니다.")
288
+
289
+ image_list = get_image_list()
290
+ image_list = add_split(image_list)
291
+
292
+ print(f"총 이미지 수: {len(image_list)}")
293
+
294
+ result_map = load_existing_result()
295
+
296
+ model, processor, device, torch_dtype = load_model()
297
+
298
+ new_count = 0
299
+ skip_count = 0
300
+ fail_count = 0
301
+
302
+ for item in tqdm(image_list):
303
+ image_key = item["image"]
304
+
305
+ if SKIP_ALREADY_DONE and image_key in result_map:
306
+ skip_count += 1
307
+ continue
308
+
309
+ try:
310
+ captions = make_three_captions(
311
+ image_path=item["path"],
312
+ model=model,
313
+ processor=processor,
314
+ device=device,
315
+ torch_dtype=torch_dtype,
316
+ )
317
+
318
+ result_map[image_key] = {
319
+ "image": item["image"],
320
+ "class": item["class"],
321
+ "captions": captions,
322
+ "split": item["split"],
323
+ }
324
+
325
+ new_count += 1
326
+
327
+ if new_count % SAVE_EVERY == 0:
328
+ save_result(result_map)
329
+
330
+ except Exception as e:
331
+ fail_count += 1
332
+ print(f"\n실패한 이미지: {item['path']}")
333
+ print(f"에러 내용: {e}")
334
+
335
+ save_result(result_map)
336
+
337
+ print("\n캡셔닝 완료")
338
+ print(f"새로 처리한 이미지 수: {new_count}")
339
+ print(f"건너뛴 이미지 수: {skip_count}")
340
+ print(f"실패한 이미지 수: {fail_count}")
341
+ print(f"저장 위치: {OUTPUT_JSON_PATH}")
342
+
343
+
344
+ if __name__ == "__main__":
345
+ main()
src/caption/generate_captions_git.py ADDED
@@ -0,0 +1,600 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import math
3
+ import random
4
+ import re
5
+ from collections import defaultdict
6
+ from dataclasses import dataclass
7
+ from pathlib import Path
8
+ from typing import Dict, List, Tuple
9
+
10
+ import os
11
+ import torch
12
+ from dotenv import load_dotenv
13
+ from PIL import Image
14
+ from tqdm import tqdm
15
+ from transformers import AutoModelForCausalLM, AutoProcessor
16
+
17
+
18
+ # ============================================================
19
+ # 1. 설정값
20
+ # ============================================================
21
+
22
+ load_dotenv()
23
+
24
+ # .env 안의 HF_TOKEN 읽기
25
+ hf_token = os.getenv("HF_TOKEN")
26
+
27
+ # 이미지 원본 루트 경로
28
+ # 예:
29
+ # - 전체 클래스 캡셔닝: "data/raw"
30
+ # - 특정 클래스만 캡셔닝: "data/raw/apple"
31
+ INPUT_IMAGE_PATH = "data/raw/airplane"
32
+
33
+ # image 필드를 만들 때 기준이 되는 root
34
+ # JSON에는 "pizza/hf_pizza_001.jpg" 형태로 저장됨
35
+ DATA_RAW_ROOT = "data/raw"
36
+
37
+ # 결과 저장 경로
38
+ OUTPUT_JSON_PATH = "data/annotations/captions_git.json"
39
+
40
+ # 에러 이미지 목록 저장 경로
41
+ ERROR_JSON_PATH = "data/annotations/caption_git_errors.json"
42
+
43
+ # GIT 모델
44
+ # 기본 추천: microsoft/git-base-coco
45
+ # 더 큰 모델을 쓰고 싶으면: microsoft/git-large-coco
46
+ MODEL_NAME = "microsoft/git-large-coco"
47
+
48
+ # 이미지당 생성할 캡션 개수
49
+ CAPTIONS_PER_IMAGE = 3
50
+
51
+ # split 비율
52
+ # 기본 7 : 1.5 : 1.5
53
+ SPLIT_RATIO = {
54
+ "train": 0.7,
55
+ "val": 0.15,
56
+ "test": 0.15,
57
+ }
58
+
59
+ # split 재현성을 위한 seed
60
+ RANDOM_SEED = 42
61
+
62
+ # 추론 배치 크기
63
+ # GPU 메모리가 부족하면 8 -> 4 -> 2 -> 1 순서로 줄이기
64
+ BATCH_SIZE = 8
65
+
66
+ # 장치 설정
67
+ # "auto": CUDA 가능하면 GPU, 아니면 CPU
68
+ # 직접 지정 가능: "cuda", "cpu"
69
+ DEVICE = "auto"
70
+
71
+ # dtype 설정
72
+ # "auto": CUDA면 float16, CPU면 float32
73
+ # 직접 지정 가능: "float32", "float16", "bfloat16"
74
+ TORCH_DTYPE = "auto"
75
+
76
+ # 중간 저장 간격
77
+ # 이미지가 많을 때 중간에 오류가 나도 일부 결과를 보존하기 위한 설정
78
+ SAVE_EVERY_N_IMAGES = 100
79
+
80
+ # 기존 OUTPUT_JSON_PATH가 있으면 이미 캡셔닝된 이미지는 건너뛸지 여부
81
+ RESUME_FROM_EXISTING_JSON = True
82
+
83
+ # 지원 이미지 확장자
84
+ SUPPORTED_EXTENSIONS = {
85
+ ".jpg", ".jpeg", ".png", ".webp", ".bmp"
86
+ }
87
+
88
+ # 캡션 생성 설정
89
+ # num_beams >= num_return_sequences 여야 함
90
+ # num_beam_groups를 사용해서 서로 조금 다른 caption을 생성하도록 함
91
+ GENERATION_CONFIG = {
92
+ "max_length": 40,
93
+ "num_beams": 5,
94
+ "num_return_sequences": CAPTIONS_PER_IMAGE,
95
+ "early_stopping": True,
96
+ "no_repeat_ngram_size": 2,
97
+ }
98
+
99
+ # beam search 결과가 너무 중복될 때 추가 샘플링으로 보완할지 여부
100
+ ENABLE_SAMPLING_FALLBACK = True
101
+
102
+ SAMPLING_FALLBACK_CONFIG = {
103
+ "max_length": 40,
104
+ "do_sample": True,
105
+ "top_p": 0.9,
106
+ "temperature": 0.8,
107
+ "num_return_sequences": CAPTIONS_PER_IMAGE,
108
+ "no_repeat_ngram_size": 2,
109
+ }
110
+
111
+
112
+ # ============================================================
113
+ # 2. 데이터 구조
114
+ # ============================================================
115
+
116
+ @dataclass
117
+ class ImageItem:
118
+ path: Path
119
+ image_field: str
120
+ class_name: str
121
+ split: str = ""
122
+
123
+
124
+ # ============================================================
125
+ # 3. 유틸 함수
126
+ # ============================================================
127
+
128
+ def resolve_device() -> torch.device:
129
+ if DEVICE == "auto":
130
+ return torch.device("cuda" if torch.cuda.is_available() else "cpu")
131
+ return torch.device(DEVICE)
132
+
133
+
134
+ def resolve_dtype(device: torch.device) -> torch.dtype:
135
+ if TORCH_DTYPE == "auto":
136
+ return torch.float16 if device.type == "cuda" else torch.float32
137
+
138
+ dtype_map = {
139
+ "float32": torch.float32,
140
+ "float16": torch.float16,
141
+ "bfloat16": torch.bfloat16,
142
+ }
143
+
144
+ if TORCH_DTYPE not in dtype_map:
145
+ raise ValueError(f"지원하지 않는 TORCH_DTYPE입니다: {TORCH_DTYPE}")
146
+
147
+ if device.type == "cpu" and TORCH_DTYPE in {"float16", "bfloat16"}:
148
+ print("[WARN] CPU에서는 float16/bfloat16이 불안정할 수 있어 float32로 변경합니다.")
149
+ return torch.float32
150
+
151
+ return dtype_map[TORCH_DTYPE]
152
+
153
+
154
+ def normalize_caption(text: str) -> str:
155
+ text = text.strip()
156
+ text = re.sub(r"\s+", " ", text)
157
+ return text
158
+
159
+
160
+ def deduplicate_captions(captions: List[str]) -> List[str]:
161
+ result = []
162
+ seen = set()
163
+
164
+ for caption in captions:
165
+ caption = normalize_caption(caption)
166
+
167
+ if not caption:
168
+ continue
169
+
170
+ key = caption.lower()
171
+
172
+ if key in seen:
173
+ continue
174
+
175
+ seen.add(key)
176
+ result.append(caption)
177
+
178
+ return result
179
+
180
+
181
+ def ensure_caption_count(captions: List[str], target_count: int) -> List[str]:
182
+ captions = deduplicate_captions(captions)
183
+
184
+ if len(captions) >= target_count:
185
+ return captions[:target_count]
186
+
187
+ if len(captions) == 0:
188
+ return [""] * target_count
189
+
190
+ while len(captions) < target_count:
191
+ captions.append(captions[-1])
192
+
193
+ return captions
194
+
195
+
196
+ def save_json(path: Path, data: List[dict]) -> None:
197
+ path.parent.mkdir(parents=True, exist_ok=True)
198
+
199
+ with path.open("w", encoding="utf-8") as f:
200
+ json.dump(data, f, ensure_ascii=False, indent=2)
201
+
202
+
203
+ def load_existing_json(path: Path) -> Dict[str, dict]:
204
+ if not path.exists():
205
+ return {}
206
+
207
+ with path.open("r", encoding="utf-8") as f:
208
+ data = json.load(f)
209
+
210
+ result = {}
211
+
212
+ for item in data:
213
+ image_key = item.get("image")
214
+
215
+ if image_key:
216
+ result[image_key] = item
217
+
218
+ return result
219
+
220
+
221
+ # ============================================================
222
+ # 4. 이미지 수집
223
+ # ============================================================
224
+
225
+ def collect_images(input_path: Path, data_raw_root: Path) -> List[ImageItem]:
226
+ if not input_path.exists():
227
+ raise FileNotFoundError(f"입력 경로가 존재하지 않습니다: {input_path}")
228
+
229
+ image_paths = sorted([
230
+ path
231
+ for path in input_path.rglob("*")
232
+ if path.is_file() and path.suffix.lower() in SUPPORTED_EXTENSIONS
233
+ ])
234
+
235
+ if not image_paths:
236
+ raise RuntimeError(f"이미지를 찾지 못했습니다: {input_path}")
237
+
238
+ items = []
239
+
240
+ for image_path in image_paths:
241
+ try:
242
+ relative_path = image_path.relative_to(data_raw_root)
243
+ except ValueError:
244
+ raise ValueError(
245
+ f"이미지 경로가 DATA_RAW_ROOT 하위에 있어야 합니다.\n"
246
+ f"image_path={image_path}\n"
247
+ f"DATA_RAW_ROOT={data_raw_root}"
248
+ )
249
+
250
+ if len(relative_path.parts) < 2:
251
+ raise ValueError(
252
+ f"이미지는 클래스 폴더 하위에 있어야 합니다: {image_path}\n"
253
+ f"예: data/raw/pizza/hf_pizza_001.jpg"
254
+ )
255
+
256
+ class_name = relative_path.parts[0]
257
+ image_field = relative_path.as_posix()
258
+
259
+ items.append(
260
+ ImageItem(
261
+ path=image_path,
262
+ image_field=image_field,
263
+ class_name=class_name,
264
+ )
265
+ )
266
+
267
+ return items
268
+
269
+
270
+ # ============================================================
271
+ # 5. split 분리
272
+ # ============================================================
273
+
274
+ def calculate_split_counts(total_count: int) -> Dict[str, int]:
275
+ ratio_sum = sum(SPLIT_RATIO.values())
276
+
277
+ raw_counts = {
278
+ split_name: total_count * ratio / ratio_sum
279
+ for split_name, ratio in SPLIT_RATIO.items()
280
+ }
281
+
282
+ counts = {
283
+ split_name: int(math.floor(count))
284
+ for split_name, count in raw_counts.items()
285
+ }
286
+
287
+ remaining = total_count - sum(counts.values())
288
+
289
+ # 소수점이 큰 split부터 남은 개수 배분
290
+ sorted_splits = sorted(
291
+ raw_counts.keys(),
292
+ key=lambda split_name: raw_counts[split_name] - counts[split_name],
293
+ reverse=True,
294
+ )
295
+
296
+ for split_name in sorted_splits[:remaining]:
297
+ counts[split_name] += 1
298
+
299
+ return counts
300
+
301
+
302
+ def assign_splits(items: List[ImageItem]) -> List[ImageItem]:
303
+ rng = random.Random(RANDOM_SEED)
304
+
305
+ class_map = defaultdict(list)
306
+
307
+ for item in items:
308
+ class_map[item.class_name].append(item)
309
+
310
+ for class_name, class_items in class_map.items():
311
+ rng.shuffle(class_items)
312
+
313
+ counts = calculate_split_counts(len(class_items))
314
+
315
+ start = 0
316
+
317
+ for split_name in ["train", "val", "test"]:
318
+ end = start + counts.get(split_name, 0)
319
+
320
+ for item in class_items[start:end]:
321
+ item.split = split_name
322
+
323
+ start = end
324
+
325
+ return items
326
+
327
+
328
+ # ============================================================
329
+ # 6. 모델 로드
330
+ # ============================================================
331
+
332
+ def load_model():
333
+ device = resolve_device()
334
+ torch_dtype = resolve_dtype(device)
335
+
336
+ print(f"[INFO] device={device}")
337
+ print(f"[INFO] dtype={torch_dtype}")
338
+ print(f"[INFO] model={MODEL_NAME}")
339
+
340
+ processor = AutoProcessor.from_pretrained(MODEL_NAME, token=hf_token)
341
+
342
+ model = AutoModelForCausalLM.from_pretrained(
343
+ MODEL_NAME,
344
+ dtype=torch_dtype,
345
+ token=hf_token
346
+ )
347
+ model.to(device)
348
+ model.eval()
349
+
350
+ return model, processor, device, torch_dtype
351
+
352
+
353
+ # ============================================================
354
+ # 7. 캡셔닝
355
+ # ============================================================
356
+
357
+ def load_batch_images(batch_items: List[ImageItem]) -> Tuple[List[Image.Image], List[ImageItem], List[dict]]:
358
+ images = []
359
+ valid_items = []
360
+ errors = []
361
+
362
+ for item in batch_items:
363
+ try:
364
+ with Image.open(item.path) as img:
365
+ images.append(img.convert("RGB"))
366
+ valid_items.append(item)
367
+ except Exception as e:
368
+ errors.append({
369
+ "image": item.image_field,
370
+ "class": item.class_name,
371
+ "split": item.split,
372
+ "error": str(e),
373
+ })
374
+
375
+ return images, valid_items, errors
376
+
377
+
378
+ @torch.inference_mode()
379
+ def generate_batch_captions(
380
+ model,
381
+ processor,
382
+ device: torch.device,
383
+ torch_dtype: torch.dtype,
384
+ images: List[Image.Image],
385
+ ) -> List[List[str]]:
386
+ inputs = processor(images=images, return_tensors="pt")
387
+
388
+ inputs = {
389
+ key: value.to(device)
390
+ for key, value in inputs.items()
391
+ }
392
+
393
+ if "pixel_values" in inputs:
394
+ inputs["pixel_values"] = inputs["pixel_values"].to(dtype=torch_dtype)
395
+
396
+ generated_ids = model.generate(
397
+ **inputs,
398
+ **GENERATION_CONFIG,
399
+ )
400
+
401
+ decoded = processor.batch_decode(
402
+ generated_ids,
403
+ skip_special_tokens=True,
404
+ )
405
+
406
+ grouped_captions = []
407
+
408
+ for i in range(len(images)):
409
+ start = i * CAPTIONS_PER_IMAGE
410
+ end = start + CAPTIONS_PER_IMAGE
411
+
412
+ captions = decoded[start:end]
413
+ captions = deduplicate_captions(captions)
414
+
415
+ grouped_captions.append(captions)
416
+
417
+ return grouped_captions
418
+
419
+
420
+ @torch.inference_mode()
421
+ def generate_sampling_fallback_captions(
422
+ model,
423
+ processor,
424
+ device: torch.device,
425
+ torch_dtype: torch.dtype,
426
+ image: Image.Image,
427
+ ) -> List[str]:
428
+ inputs = processor(images=[image], return_tensors="pt")
429
+
430
+ inputs = {
431
+ key: value.to(device)
432
+ for key, value in inputs.items()
433
+ }
434
+
435
+ if "pixel_values" in inputs:
436
+ inputs["pixel_values"] = inputs["pixel_values"].to(dtype=torch_dtype)
437
+
438
+ generated_ids = model.generate(
439
+ **inputs,
440
+ **SAMPLING_FALLBACK_CONFIG,
441
+ )
442
+
443
+ decoded = processor.batch_decode(
444
+ generated_ids,
445
+ skip_special_tokens=True,
446
+ )
447
+
448
+ return deduplicate_captions(decoded)
449
+
450
+
451
+ def make_result_item(item: ImageItem, captions: List[str]) -> dict:
452
+ return {
453
+ "image": item.image_field,
454
+ "class": item.class_name,
455
+ "captions": captions,
456
+ "split": item.split,
457
+ }
458
+
459
+
460
+ def caption_images(
461
+ model,
462
+ processor,
463
+ device: torch.device,
464
+ torch_dtype: torch.dtype,
465
+ items: List[ImageItem],
466
+ existing_result_map: Dict[str, dict],
467
+ ) -> Tuple[Dict[str, dict], List[dict]]:
468
+ result_map = dict(existing_result_map)
469
+ error_list = []
470
+
471
+ target_items = [
472
+ item
473
+ for item in items
474
+ if item.image_field not in result_map
475
+ ]
476
+
477
+ print(f"[INFO] 전체 이미지 수: {len(items)}")
478
+ print(f"[INFO] 기존 결과 수: {len(existing_result_map)}")
479
+ print(f"[INFO] 새로 캡셔닝할 이미지 수: {len(target_items)}")
480
+
481
+ processed_count = 0
482
+
483
+ for batch_start in tqdm(range(0, len(target_items), BATCH_SIZE), desc="Captioning"):
484
+ batch_items = target_items[batch_start:batch_start + BATCH_SIZE]
485
+
486
+ images, valid_items, errors = load_batch_images(batch_items)
487
+ error_list.extend(errors)
488
+
489
+ if not images:
490
+ continue
491
+
492
+ try:
493
+ batch_captions = generate_batch_captions(
494
+ model=model,
495
+ processor=processor,
496
+ device=device,
497
+ torch_dtype=torch_dtype,
498
+ images=images,
499
+ )
500
+
501
+ for image, item, captions in zip(images, valid_items, batch_captions):
502
+ if ENABLE_SAMPLING_FALLBACK and len(captions) < CAPTIONS_PER_IMAGE:
503
+ fallback_captions = generate_sampling_fallback_captions(
504
+ model=model,
505
+ processor=processor,
506
+ device=device,
507
+ torch_dtype=torch_dtype,
508
+ image=image,
509
+ )
510
+
511
+ captions = deduplicate_captions(captions + fallback_captions)
512
+
513
+ captions = ensure_caption_count(
514
+ captions=captions,
515
+ target_count=CAPTIONS_PER_IMAGE,
516
+ )
517
+
518
+ result_map[item.image_field] = make_result_item(
519
+ item=item,
520
+ captions=captions,
521
+ )
522
+
523
+ processed_count += 1
524
+
525
+ except Exception as e:
526
+ print("[ERROR] 배치 캡셔닝 실패")
527
+ print(f"[ERROR] {type(e).__name__}: {e}")
528
+
529
+ for item in valid_items:
530
+ error_list.append({
531
+ "image": item.image_field,
532
+ "class": item.class_name,
533
+ "split": item.split,
534
+ "error": str(e),
535
+ })
536
+
537
+ if SAVE_EVERY_N_IMAGES > 0 and processed_count > 0:
538
+ if processed_count % SAVE_EVERY_N_IMAGES == 0:
539
+ current_results = [
540
+ result_map[item.image_field]
541
+ for item in items
542
+ if item.image_field in result_map
543
+ ]
544
+ save_json(Path(OUTPUT_JSON_PATH), current_results)
545
+ save_json(Path(ERROR_JSON_PATH), error_list)
546
+
547
+ return result_map, error_list
548
+
549
+
550
+ # ============================================================
551
+ # 8. main
552
+ # ============================================================
553
+
554
+ def main():
555
+ input_path = Path(INPUT_IMAGE_PATH).resolve()
556
+ data_raw_root = Path(DATA_RAW_ROOT).resolve()
557
+ output_json_path = Path(OUTPUT_JSON_PATH)
558
+ error_json_path = Path(ERROR_JSON_PATH)
559
+
560
+ items = collect_images(
561
+ input_path=input_path,
562
+ data_raw_root=data_raw_root,
563
+ )
564
+
565
+ items = assign_splits(items)
566
+
567
+ existing_result_map = {}
568
+
569
+ if RESUME_FROM_EXISTING_JSON:
570
+ existing_result_map = load_existing_json(output_json_path)
571
+
572
+ model, processor, device, torch_dtype = load_model()
573
+
574
+ result_map, error_list = caption_images(
575
+ model=model,
576
+ processor=processor,
577
+ device=device,
578
+ torch_dtype=torch_dtype,
579
+ items=items,
580
+ existing_result_map=existing_result_map,
581
+ )
582
+
583
+ final_results = [
584
+ result_map[item.image_field]
585
+ for item in items
586
+ if item.image_field in result_map
587
+ ]
588
+
589
+ save_json(output_json_path, final_results)
590
+ save_json(error_json_path, error_list)
591
+
592
+ print("[DONE] 캡셔닝 완료")
593
+ print(f"[DONE] 결과 저장: {output_json_path}")
594
+ print(f"[DONE] 에러 저장: {error_json_path}")
595
+ print(f"[DONE] 정상 결과 수: {len(final_results)}")
596
+ print(f"[DONE] 에러 수: {len(error_list)}")
597
+
598
+
599
+ if __name__ == "__main__":
600
+ main()
src/caption/generate_captions_vit_gpt2.py ADDED
@@ -0,0 +1,457 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import random
3
+ from pathlib import Path
4
+ from collections import defaultdict
5
+
6
+ import torch
7
+ from PIL import Image, UnidentifiedImageError
8
+ from tqdm import tqdm
9
+ from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
10
+
11
+
12
+ # =========================================================
13
+ # 1. 설정값
14
+ # =========================================================
15
+
16
+ # 전체 클래스 캡셔닝:
17
+ # INPUT_IMAGE_DIR = "/workspace/data/raw"
18
+ #
19
+ # 특정 클래스만 캡셔닝:
20
+ # INPUT_IMAGE_DIR = "/workspace/data/raw/apple"
21
+ INPUT_IMAGE_DIR = "/workspace/data/raw/airplane"
22
+
23
+ OUTPUT_JSON_PATH = "/workspace/data/annotations/annotation.json"
24
+
25
+ MODEL_NAME = "nlpconnect/vit-gpt2-image-captioning"
26
+
27
+ CAPTIONS_PER_IMAGE = 3
28
+
29
+ SPLIT_RATIO = {
30
+ "train": 0.7,
31
+ "val": 0.15,
32
+ "test": 0.15,
33
+ }
34
+
35
+ RANDOM_SEED = 42
36
+
37
+ BATCH_SIZE = 8
38
+
39
+ IMAGE_EXTENSIONS = [".jpg", ".jpeg", ".png", ".webp", ".bmp"]
40
+
41
+ # "auto": data/raw 입력 시 전체 클래스, data/raw/apple 입력 시 apple 클래스만 자동 판단
42
+ # "raw": INPUT_IMAGE_DIR 아래를 전체 raw 폴더로 간주
43
+ # "class": INPUT_IMAGE_DIR 자체를 하나의 클래스 폴더로 간주
44
+ INPUT_MODE = "auto"
45
+
46
+ # 캡션 문장 끝의 마침표 제거 여부
47
+ REMOVE_TRAILING_PERIOD = True
48
+
49
+ # beam search 설정
50
+ GENERATION_CONFIG = {
51
+ "max_new_tokens": 32,
52
+ "num_beams": 8,
53
+ "num_return_sequences": CAPTIONS_PER_IMAGE,
54
+ "early_stopping": True,
55
+ "no_repeat_ngram_size": 2,
56
+ "repetition_penalty": 1.1,
57
+ "length_penalty": 0.8,
58
+ }
59
+
60
+ # beam search 결과가 중복될 때 샘플링으로 보충
61
+ ENABLE_SAMPLING_FALLBACK = True
62
+
63
+ SAMPLING_FALLBACK_CONFIG = {
64
+ "max_new_tokens": 32,
65
+ "do_sample": True,
66
+ "top_p": 0.9,
67
+ "temperature": 0.8,
68
+ "num_return_sequences": CAPTIONS_PER_IMAGE * 2,
69
+ "no_repeat_ngram_size": 2,
70
+ "repetition_penalty": 1.1,
71
+ }
72
+
73
+ MAX_FALLBACK_ROUNDS = 3
74
+
75
+ # 그래도 3개를 못 채우면 중복을 허용해서라도 3개를 맞출지 여부
76
+ FILL_WITH_DUPLICATES_IF_NEEDED = True
77
+
78
+
79
+ # =========================================================
80
+ # 2. 기본 유틸 함수
81
+ # =========================================================
82
+
83
+ def validate_config():
84
+ total_ratio = sum(SPLIT_RATIO.values())
85
+
86
+ if abs(total_ratio - 1.0) > 1e-6:
87
+ raise ValueError(f"SPLIT_RATIO의 합은 1이어야 합니다. 현재 합: {total_ratio}")
88
+
89
+ if GENERATION_CONFIG["num_beams"] < CAPTIONS_PER_IMAGE:
90
+ raise ValueError("num_beams는 CAPTIONS_PER_IMAGE보다 크거나 같아야 합니다.")
91
+
92
+ if GENERATION_CONFIG["num_return_sequences"] != CAPTIONS_PER_IMAGE:
93
+ raise ValueError("GENERATION_CONFIG의 num_return_sequences는 CAPTIONS_PER_IMAGE와 같아야 합니다.")
94
+
95
+
96
+ def is_image_file(path: Path) -> bool:
97
+ return path.suffix.lower() in IMAGE_EXTENSIONS
98
+
99
+
100
+ def clean_caption(text: str) -> str:
101
+ caption = " ".join(text.strip().split())
102
+
103
+ if REMOVE_TRAILING_PERIOD:
104
+ caption = caption.rstrip(".")
105
+
106
+ return caption
107
+
108
+
109
+ def unique_captions(captions):
110
+ result = []
111
+ seen = set()
112
+
113
+ for caption in captions:
114
+ caption = clean_caption(caption)
115
+ key = caption.lower()
116
+
117
+ if caption and key not in seen:
118
+ result.append(caption)
119
+ seen.add(key)
120
+
121
+ return result
122
+
123
+
124
+ def load_image(image_path: Path):
125
+ try:
126
+ return Image.open(image_path).convert("RGB")
127
+ except (UnidentifiedImageError, OSError) as e:
128
+ print(f"[SKIP] 이미지를 열 수 없습니다: {image_path} / error: {e}")
129
+ return None
130
+
131
+
132
+ # =========================================================
133
+ # 3. 이미지 목록 수집
134
+ # =========================================================
135
+
136
+ def has_direct_images(input_dir: Path) -> bool:
137
+ for child in input_dir.iterdir():
138
+ if child.is_file() and is_image_file(child):
139
+ return True
140
+ return False
141
+
142
+
143
+ def get_relative_base_dir(input_dir: Path) -> Path:
144
+ """
145
+ JSON의 image 값을 '클래스폴더/이미지명' 형태로 만들기 위한 기준 경로를 정한다.
146
+
147
+ 예시 1)
148
+ INPUT_IMAGE_DIR = /workspace/data/raw
149
+ image file = /workspace/data/raw/pizza/hf_pizza_001.jpg
150
+ relative base = /workspace/data/raw
151
+ result = pizza/hf_pizza_001.jpg
152
+
153
+ 예시 2)
154
+ INPUT_IMAGE_DIR = /workspace/data/raw/apple
155
+ image file = /workspace/data/raw/apple/hf_apple_001.jpg
156
+ relative base = /workspace/data/raw
157
+ result = apple/hf_apple_001.jpg
158
+ """
159
+
160
+ if INPUT_MODE == "raw":
161
+ return input_dir
162
+
163
+ if INPUT_MODE == "class":
164
+ return input_dir.parent
165
+
166
+ if INPUT_MODE == "auto":
167
+ if has_direct_images(input_dir):
168
+ return input_dir.parent
169
+ return input_dir
170
+
171
+ raise ValueError("INPUT_MODE은 'auto', 'raw', 'class' 중 하나여야 합니다.")
172
+
173
+
174
+ def collect_image_records(input_dir: str):
175
+ input_path = Path(input_dir)
176
+
177
+ if not input_path.exists():
178
+ raise FileNotFoundError(f"��미지 경로가 존재하지 않습니다: {input_path}")
179
+
180
+ relative_base_dir = get_relative_base_dir(input_path)
181
+
182
+ records = []
183
+
184
+ for image_path in sorted(input_path.rglob("*")):
185
+ if not image_path.is_file():
186
+ continue
187
+
188
+ if not is_image_file(image_path):
189
+ continue
190
+
191
+ relative_path = image_path.relative_to(relative_base_dir)
192
+ relative_path_str = relative_path.as_posix()
193
+
194
+ # image 값이 apple/xxx.jpg 라면 class는 apple
195
+ class_name = relative_path.parts[0]
196
+
197
+ records.append({
198
+ "path": image_path,
199
+ "image": relative_path_str,
200
+ "class": class_name,
201
+ })
202
+
203
+ if not records:
204
+ raise ValueError(f"캡셔닝할 이미지가 없습니다: {input_path}")
205
+
206
+ return records
207
+
208
+
209
+ # =========================================================
210
+ # 4. train / val / test split 배정
211
+ # =========================================================
212
+
213
+ def assign_split(records):
214
+ random.seed(RANDOM_SEED)
215
+
216
+ class_map = defaultdict(list)
217
+
218
+ for record in records:
219
+ class_map[record["class"]].append(record)
220
+
221
+ result = []
222
+
223
+ for class_name, items in class_map.items():
224
+ random.shuffle(items)
225
+
226
+ total = len(items)
227
+ train_count = int(total * SPLIT_RATIO["train"])
228
+ val_count = int(total * SPLIT_RATIO["val"])
229
+
230
+ for idx, item in enumerate(items):
231
+ if idx < train_count:
232
+ item["split"] = "train"
233
+ elif idx < train_count + val_count:
234
+ item["split"] = "val"
235
+ else:
236
+ item["split"] = "test"
237
+
238
+ result.append(item)
239
+
240
+ result.sort(key=lambda x: x["image"])
241
+
242
+ return result
243
+
244
+
245
+ # =========================================================
246
+ # 5. 모델 로드
247
+ # =========================================================
248
+
249
+ def load_model():
250
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
251
+
252
+ print(f"[INFO] device: {device}")
253
+ print(f"[INFO] model: {MODEL_NAME}")
254
+
255
+ model = VisionEncoderDecoderModel.from_pretrained(MODEL_NAME)
256
+ processor = ViTImageProcessor.from_pretrained(MODEL_NAME)
257
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
258
+
259
+ if tokenizer.pad_token is None:
260
+ tokenizer.pad_token = tokenizer.eos_token
261
+
262
+ model.config.pad_token_id = tokenizer.pad_token_id
263
+ model.to(device)
264
+ model.eval()
265
+
266
+ return model, processor, tokenizer, device
267
+
268
+
269
+ # =========================================================
270
+ # 6. 캡션 생성
271
+ # =========================================================
272
+
273
+ def decode_output_ids(output_ids, tokenizer):
274
+ captions = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
275
+ return [clean_caption(caption) for caption in captions]
276
+
277
+
278
+ @torch.no_grad()
279
+ def generate_by_beam_search(images, model, processor, tokenizer, device):
280
+ pixel_values = processor(
281
+ images=images,
282
+ return_tensors="pt"
283
+ ).pixel_values.to(device)
284
+
285
+ output_ids = model.generate(
286
+ pixel_values,
287
+ **GENERATION_CONFIG
288
+ )
289
+
290
+ captions = decode_output_ids(output_ids, tokenizer)
291
+
292
+ grouped = []
293
+ start = 0
294
+
295
+ for _ in images:
296
+ end = start + CAPTIONS_PER_IMAGE
297
+ grouped.append(captions[start:end])
298
+ start = end
299
+
300
+ return grouped
301
+
302
+
303
+ @torch.no_grad()
304
+ def generate_by_sampling(image, model, processor, tokenizer, device):
305
+ pixel_values = processor(
306
+ images=[image],
307
+ return_tensors="pt"
308
+ ).pixel_values.to(device)
309
+
310
+ output_ids = model.generate(
311
+ pixel_values,
312
+ **SAMPLING_FALLBACK_CONFIG
313
+ )
314
+
315
+ return decode_output_ids(output_ids, tokenizer)
316
+
317
+
318
+ def complete_caption_count(captions, original_candidates):
319
+ """
320
+ 기본 목표:
321
+ - 최대한 중복 없는 캡션 3개를 만든다.
322
+
323
+ 단, 모델이 비슷한 문장만 계속 만들면 3개를 못 채울 수 있다.
324
+ 이때 FILL_WITH_DUPLICATES_IF_NEEDED=True이면 중복을 허용해서 3개를 맞춘다.
325
+ """
326
+
327
+ captions = unique_captions(captions)
328
+
329
+ if len(captions) >= CAPTIONS_PER_IMAGE:
330
+ return captions[:CAPTIONS_PER_IMAGE]
331
+
332
+ if not FILL_WITH_DUPLICATES_IF_NEEDED:
333
+ return captions
334
+
335
+ for caption in original_candidates:
336
+ caption = clean_caption(caption)
337
+
338
+ if caption:
339
+ captions.append(caption)
340
+
341
+ if len(captions) >= CAPTIONS_PER_IMAGE:
342
+ break
343
+
344
+ return captions[:CAPTIONS_PER_IMAGE]
345
+
346
+
347
+ def generate_captions_for_batch(batch_records, model, processor, tokenizer, device):
348
+ images = []
349
+ valid_records = []
350
+
351
+ for record in batch_records:
352
+ image = load_image(record["path"])
353
+
354
+ if image is None:
355
+ continue
356
+
357
+ images.append(image)
358
+ valid_records.append(record)
359
+
360
+ if not images:
361
+ return []
362
+
363
+ beam_caption_groups = generate_by_beam_search(
364
+ images=images,
365
+ model=model,
366
+ processor=processor,
367
+ tokenizer=tokenizer,
368
+ device=device
369
+ )
370
+
371
+ results = []
372
+
373
+ for record, image, beam_captions in zip(valid_records, images, beam_caption_groups):
374
+ all_candidates = list(beam_captions)
375
+ captions = unique_captions(beam_captions)
376
+
377
+ if ENABLE_SAMPLING_FALLBACK:
378
+ fallback_round = 0
379
+
380
+ while len(captions) < CAPTIONS_PER_IMAGE and fallback_round < MAX_FALLBACK_ROUNDS:
381
+ sampled_captions = generate_by_sampling(
382
+ image=image,
383
+ model=model,
384
+ processor=processor,
385
+ tokenizer=tokenizer,
386
+ device=device
387
+ )
388
+
389
+ all_candidates.extend(sampled_captions)
390
+ captions = unique_captions(captions + sampled_captions)
391
+ fallback_round += 1
392
+
393
+ captions = complete_caption_count(
394
+ captions=captions,
395
+ original_candidates=all_candidates
396
+ )
397
+
398
+ results.append({
399
+ "image": record["image"],
400
+ "class": record["class"],
401
+ "captions": captions,
402
+ "split": record["split"],
403
+ })
404
+
405
+ return results
406
+
407
+
408
+ # =========================================================
409
+ # 7. JSON 저장
410
+ # =========================================================
411
+
412
+ def save_json(data, output_path: str):
413
+ output_path = Path(output_path)
414
+ output_path.parent.mkdir(parents=True, exist_ok=True)
415
+
416
+ with open(output_path, "w", encoding="utf-8") as f:
417
+ json.dump(data, f, ensure_ascii=False, indent=4)
418
+
419
+ print(f"[DONE] JSON 저장 완료: {output_path}")
420
+ print(f"[DONE] 총 이미지 수: {len(data)}")
421
+
422
+
423
+ # =========================================================
424
+ # 8. 실행
425
+ # =========================================================
426
+
427
+ def main():
428
+ validate_config()
429
+
430
+ records = collect_image_records(INPUT_IMAGE_DIR)
431
+ records = assign_split(records)
432
+
433
+ print(f"[INFO] 캡셔닝 대상 이미지 수: {len(records)}")
434
+
435
+ model, processor, tokenizer, device = load_model()
436
+
437
+ results = []
438
+
439
+ for start in tqdm(range(0, len(records), BATCH_SIZE), desc="captioning"):
440
+ end = start + BATCH_SIZE
441
+ batch_records = records[start:end]
442
+
443
+ batch_results = generate_captions_for_batch(
444
+ batch_records=batch_records,
445
+ model=model,
446
+ processor=processor,
447
+ tokenizer=tokenizer,
448
+ device=device
449
+ )
450
+
451
+ results.extend(batch_results)
452
+
453
+ save_json(results, OUTPUT_JSON_PATH)
454
+
455
+
456
+ if __name__ == "__main__":
457
+ main()
src/collection/check_class_counts.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ # ================================
4
+ # 0. 설정
5
+ # ================================
6
+ TARGET_COUNT = 60
7
+ MIN_RES = 256 # 해상도 256
8
+ PREFIX = "kg"
9
+ BASE_DIR = "./data/raw"
10
+
11
+ # ================================
12
+ # 1. 경로
13
+ # ================================
14
+ HOME = os.path.expanduser("~")
15
+
16
+ DATA_DIR = os.path.join(
17
+ HOME,
18
+ "Desktop",
19
+ "raw_kg"
20
+ )
21
+
22
+ THRESHOLD = TARGET_COUNT
23
+
24
+ # ================================
25
+ # 2. 클래스 목록
26
+ # ================================
27
+ CLASS_LIST = [
28
+ # 음식 및 식재료
29
+ "pizza","hamburger","sushi","pasta","salad",
30
+ "steak","cup_cake","sandwich","waffle","dumpling",
31
+
32
+ # 동물
33
+ "golden-retriever","bulldog","siamese_cat",
34
+ "persian_cat","elephant","sheep","horse",
35
+ "penguin","butterfly","squirrel",
36
+
37
+ # 꽃
38
+ "rose","sunflower","daisy","tulip","dandelion",
39
+ "lily","lavender","orchid","iris","marigold","aster",
40
+
41
+ # 과일
42
+ "apple","banana","strawberry","orange",
43
+ "carrot","tomato","cucumber",
44
+
45
+ # 탈것
46
+ "car","bicycle","motorcycle","airplane","bus",
47
+
48
+ # 패션 및 잡화
49
+ "t-shirt","sneakers","earrings","glasses",
50
+ "pants","bracelet","necklace"
51
+ ]
52
+
53
+ print(f"{THRESHOLD}장 이하 클래스 목록 (0장 포함)\n")
54
+
55
+ low_classes = []
56
+
57
+ # ================================
58
+ # 3. 클래스별 개수 체크
59
+ # ================================
60
+ for cls in sorted(CLASS_LIST):
61
+
62
+ cls_path = os.path.join(DATA_DIR, cls)
63
+
64
+ if not os.path.exists(cls_path):
65
+ count = 0
66
+
67
+ else:
68
+ count = len([
69
+ f for f in os.listdir(cls_path)
70
+ if os.path.isfile(os.path.join(cls_path, f))
71
+ ])
72
+
73
+ if count < THRESHOLD:
74
+ print(f"{cls}: {count}장")
75
+ low_classes.append((cls, count))
76
+
77
+ # ================================
78
+ # 4. 요약
79
+ # ================================
80
+ print("\n요약")
81
+ print(f"{THRESHOLD}장 미만 클래스 수: {len(low_classes)}개")
src/collection/collect_filtering_images.py ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import hashlib
3
+ from PIL import Image
4
+
5
+ # ================================
6
+ # 0. 설정
7
+ # ================================
8
+ TARGET_COUNT = 60
9
+ MIN_RES = 128 # 해상도 128
10
+ PREFIX = "kg"
11
+ BASE_DIR = "./data/raw"
12
+
13
+ # ================================
14
+ # 1. 클래스 + 유사어 매핑
15
+ # ================================
16
+ CLASS_MAP = {
17
+ # 음식
18
+ "pizza": ["pizza"],
19
+ "hamburger": ["hamburger"],
20
+ "sushi": ["sushi"],
21
+ "pasta": ["pasta", "spaghetti"],
22
+ "salad": ["salad"],
23
+ "steak": ["steak"],
24
+ "cup_cake": ["cup_cake", "cup cake"],
25
+ "sandwich": ["sandwich"],
26
+ "waffle": ["waffle"],
27
+ "dumpling": ["dumpling"],
28
+
29
+ # 동물
30
+ "golden-retriever": ["golden retriever"],
31
+ "bulldog": ["bulldog"],
32
+ "siamese-cat": ["siamese"],
33
+ "persian-cat": ["persian"],
34
+ "elephant": ["elephant"],
35
+ "sheep": ["sheep"],
36
+ "horse": ["horse"],
37
+ "penguin": ["penguin"],
38
+ "butterfly": ["butterfly"],
39
+ "squirrel": ["squirrel"],
40
+
41
+ # 꽃
42
+ "rose": ["rose"],
43
+ "sunflower": ["sunflower"],
44
+ "daisy": ["daisy"],
45
+ "tulip": ["tulip"],
46
+ "dandelion": ["dandelion"],
47
+ "lily": ["lily"],
48
+ "lavender": ["lavender"],
49
+ "orchid": ["orchid"],
50
+ "iris": ["iris"],
51
+ "marigold": ["marigold"],
52
+ "aster": ["aster"],
53
+
54
+ # 과일
55
+ "apple": ["apple"],
56
+ "banana": ["banana"],
57
+ "strawberry": ["strawberry"],
58
+ "orange": ["orange"],
59
+ "carrot": ["carrot"],
60
+ "tomato": ["tomato"],
61
+ "cucumber": ["cucumber"],
62
+
63
+ # 탈것
64
+ "car": ["car"],
65
+ "bicycle": ["bicycle"],
66
+ "motorcycle": ["motorcycle"],
67
+ "airplane": ["airplane"],
68
+ "bus": ["bus"],
69
+
70
+ # 패션 및 잡화
71
+ "t-shirt": ["t shirt", "t-shirt"],
72
+ "sneakers": ["sneakers"],
73
+ "earrings": ["earring", "earrings"],
74
+ "glasses": ["glasses"],
75
+ "pants": ["pants"],
76
+ "bracelet": ["bracelet"],
77
+ "necklace": ["necklace"]
78
+ }
79
+
80
+ # ================================
81
+ # 2. 경로
82
+ # ================================
83
+ HOME = os.path.expanduser("~")
84
+
85
+ SRC_ROOT = os.path.join(
86
+ HOME,
87
+ "Desktop",
88
+ "raw_full_kg",
89
+ "extracted"
90
+ )
91
+
92
+ DST_ROOT = os.path.join(
93
+ HOME,
94
+ "Desktop",
95
+ "raw_kg"
96
+ )
97
+
98
+ os.makedirs(DST_ROOT, exist_ok=True)
99
+
100
+ # ================================
101
+ # 3. 해상도 필터
102
+ # ================================
103
+ def is_valid_image(path, min_size=MIN_RES):
104
+ try:
105
+ with Image.open(path) as img:
106
+ w, h = img.size
107
+ return w >= min_size and h >= min_size
108
+ except:
109
+ return False
110
+
111
+ # ================================
112
+ # 4. 중복 제거
113
+ # ================================
114
+ def get_hash(path):
115
+ try:
116
+ with open(path, "rb") as f:
117
+ return hashlib.md5(f.read()).hexdigest()
118
+ except:
119
+ return None
120
+
121
+ seen_hashes = set()
122
+
123
+ # ================================
124
+ # 5. 클래스 매칭
125
+ # ================================
126
+ def match_class(folder_name):
127
+ name = folder_name.lower()
128
+ name = name.replace("-", " ").replace("_", " ")
129
+
130
+ words = name.split()
131
+
132
+ for target, keywords in CLASS_MAP.items():
133
+ for kw in keywords:
134
+ kw_words = kw.split()
135
+
136
+ if all(word in words for word in kw_words):
137
+ return target
138
+
139
+ return None
140
+
141
+ # ================================
142
+ # 6. 메인 로직
143
+ # ================================
144
+ class_counter = {
145
+ cls: 1 for cls in CLASS_MAP.keys()
146
+ }
147
+
148
+ copied = 0
149
+ skipped = 0
150
+ no_match = 0
151
+
152
+ for root, dirs, files in os.walk(SRC_ROOT):
153
+
154
+ for d in dirs:
155
+ matched_class = match_class(d)
156
+
157
+ if matched_class is None:
158
+ no_match += 1
159
+ continue
160
+
161
+ src_path = os.path.join(root, d)
162
+ dst_path = os.path.join(DST_ROOT, matched_class)
163
+
164
+ for img in os.listdir(src_path):
165
+
166
+ src_file = os.path.join(src_path, img)
167
+
168
+ if not os.path.isfile(src_file):
169
+ continue
170
+
171
+ # 이미지 검증
172
+ try:
173
+ with Image.open(src_file) as im:
174
+ im.verify()
175
+
176
+ except:
177
+ skipped += 1
178
+ continue
179
+
180
+ # 해상도 필터
181
+ if not is_valid_image(src_file):
182
+ skipped += 1
183
+ continue
184
+
185
+ # 중복 제거
186
+ img_hash = get_hash(src_file)
187
+
188
+ if img_hash is None or img_hash in seen_hashes:
189
+ skipped += 1
190
+ continue
191
+
192
+ seen_hashes.add(img_hash)
193
+
194
+ if not os.path.exists(dst_path):
195
+ os.makedirs(dst_path, exist_ok=True)
196
+
197
+ number = str(
198
+ class_counter[matched_class]
199
+ ).zfill(3)
200
+
201
+ class_name_for_file = matched_class.replace("_", "-")
202
+
203
+ new_name = (
204
+ f"{PREFIX}_{class_name_for_file}_{number}.jpg"
205
+ )
206
+
207
+ dst_file = os.path.join(dst_path, new_name)
208
+
209
+ try:
210
+ with Image.open(src_file) as im:
211
+ im.convert("RGB").save(
212
+ dst_file,
213
+ "JPEG"
214
+ )
215
+
216
+ class_counter[matched_class] += 1
217
+ copied += 1
218
+
219
+ if copied % 100 == 0:
220
+ print(f"{copied}장 처리 중...")
221
+
222
+ except:
223
+ skipped += 1
224
+
225
+ print("\n완료!")
226
+ print(f"복사: {copied}")
227
+ print(f"스킵: {skipped}")
228
+ print(f"매칭 실패 폴더: {no_match}")
src/collection/count_label_hf.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset
2
+ from collections import Counter
3
+ from dotenv import load_dotenv
4
+ import os
5
+
6
+ # ============================================================
7
+ # [설정 부분]
8
+ # ============================================================
9
+ load_dotenv()
10
+ HF_TOKEN = os.environ.get("HF_TOKEN")
11
+
12
+ # 확인할 Hugging Face 데이터셋 이름
13
+ DATASET_NAME = "jbarat/plant_species" # 예: "uran66/animals"
14
+
15
+ # 확인할 split 이름
16
+ SPLIT_NAME = "train"
17
+
18
+ # 라벨 필드명
19
+ LABEL_FIELD_NAME = "label"
20
+
21
+ # streaming 사용 여부
22
+ # True : 전체 데이터셋을 미리 다운로드하지 않고 하나씩 읽으면서 확인
23
+ # False : 로컬 캐시에 데이터셋을 다운로드한 뒤 확인
24
+ USE_STREAMING = True
25
+
26
+ # 문자열 라벨 데이터셋일 경우 전체 데이터를 훑어야 정확한 개수를 알 수 있다.
27
+ # None이면 전체 확인, 숫자를 넣으면 일부 샘플만 확인한다.
28
+ MAX_SCAN_ITEMS = None
29
+
30
+ # ============================================================
31
+
32
+
33
+ def get_label_name(dataset, label_value):
34
+
35
+ label_feature = dataset.features[LABEL_FIELD_NAME]
36
+
37
+ # ClassLabel 타입이면 숫자 라벨을 문자열 라벨명으로 변환한다.
38
+ if hasattr(label_feature, "int2str") and isinstance(label_value, int):
39
+ return label_feature.int2str(label_value)
40
+
41
+ # 이미 문자열 라벨이면 그대로 문자열로 변환해서 사용한다.
42
+ return str(label_value)
43
+
44
+
45
+ def get_unique_labels_with_counts():
46
+
47
+ print(f"[{DATASET_NAME}] 데이터셋 로드 중...")
48
+
49
+ dataset = load_dataset(
50
+ DATASET_NAME,
51
+ split=SPLIT_NAME,
52
+ streaming=USE_STREAMING,
53
+ token=HF_TOKEN
54
+ )
55
+
56
+ # 데이터셋의 feature 정보에서 라벨 필드를 가져온다.
57
+ label_feature = dataset.features[LABEL_FIELD_NAME]
58
+
59
+ # 클래스별 이미지 개수를 저장할 Counter
60
+ label_counter = Counter()
61
+
62
+ print("\n클래스별 이미지 개수 집계 중...")
63
+
64
+ # streaming=True인 경우에도 dataset을 순회하면서 개수를 셀 수 있다.
65
+ for idx, item in enumerate(dataset):
66
+ # MAX_SCAN_ITEMS가 설정되어 있으면 지정한 개수까지만 확인한다.
67
+ if MAX_SCAN_ITEMS is not None and idx >= MAX_SCAN_ITEMS:
68
+ break
69
+
70
+ label_value = item.get(LABEL_FIELD_NAME)
71
+
72
+ # 라벨 값이 없는 데이터는 건너뛴다.
73
+ if label_value is None:
74
+ continue
75
+
76
+ # 숫자 라벨이면 실제 라벨명으로 변환하고,
77
+ # 문자열 라벨이면 그대로 사용한다.
78
+ label_name = get_label_name(dataset, label_value)
79
+
80
+ # 해당 라벨의 이미지 개수를 1 증가시킨다.
81
+ label_counter[label_name] += 1
82
+
83
+ print("\n라벨 목록 및 클래스별 이미지 개수")
84
+ print("-" * 60)
85
+
86
+ # ------------------------------------------------------------
87
+ # 1. Food101처럼 label이 ClassLabel 타입인 경우
88
+ # ------------------------------------------------------------
89
+ # label_feature.names가 있으면 원래 데이터셋의 라벨 순서대로 출력한다.
90
+ if hasattr(label_feature, "names") and label_feature.names is not None:
91
+ label_names = label_feature.names
92
+
93
+ for idx, label_name in enumerate(label_names):
94
+ count = label_counter.get(label_name, 0)
95
+ print(f"{idx}: {label_name} - {count} 장")
96
+
97
+ # ------------------------------------------------------------
98
+ # 2. label이 문자열로 직접 들어있는 데이터셋인 경우
99
+ # ------------------------------------------------------------
100
+ # Counter에 모인 라벨명을 이름순으로 정렬해서 출력한다.
101
+ else:
102
+ label_names = sorted(label_counter.keys())
103
+
104
+ for idx, label_name in enumerate(label_names):
105
+ count = label_counter[label_name]
106
+ print(f"{idx}: {label_name} - {count} 장")
107
+
108
+ print("-" * 60)
109
+ print(f"총 라벨 개수: {len(label_counter)}")
110
+ print(f"총 이미지 개수: {sum(label_counter.values())}")
111
+
112
+ return label_counter
113
+
114
+ if __name__ == "__main__":
115
+ get_unique_labels_with_counts()
src/collection/download_dataset_hf.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import glob
4
+ from dotenv import load_dotenv
5
+ from datasets import load_dataset
6
+ from PIL import Image
7
+
8
+ # =====================================================================
9
+ # [설정 부분]
10
+ # =====================================================================
11
+ # 토큰
12
+ load_dotenv()
13
+ HF_TOKEN = os.environ.get("HF_TOKEN")
14
+
15
+ print(f"이거 토큰 : {HF_TOKEN}")
16
+
17
+ # 수집할 데이터셋
18
+ DATASET_NAME = "KrushiJethe/fashion_data"
19
+ # 데이터셋 내의 이미지 데이터가 있는 필드명
20
+ IMAGE_FIELD_NAME = "image"
21
+ # 데이터셋 내의 라벨 데이터가 있는 필드명
22
+ LABEL_FIELD_NAME = "articleType"
23
+
24
+ # 여러 라벨을 하나의 대표 클래스로 묶는 매핑 딕셔너리
25
+ CLASS_MAPPING = {
26
+ "t-shirt": ["Tshirts", "Tops"],
27
+ "sneakers":["Casual Shoes"],
28
+ #"umbrella":["Umbrellas"],
29
+ "glasses":["Sunglasses"],
30
+ "pants":["Jeans"],
31
+ }
32
+
33
+ # 클래스별로 수집할 이미지의 최대 개수
34
+ NUM_IMAGES_PER_CLASS = 100
35
+ # 저장할 이미지의 해상도 (width, height)
36
+ TARGET_RESOLUTION = 256
37
+ # 이미지를 저장할 최상위 디렉토리명
38
+ BASE_SAVE_DIR = "./dataset_output"
39
+ # 수집할 데이터셋의 split 이름 (예: "train", "validation", "test")
40
+ SPLIT_NAME = "train"
41
+
42
+ # 컨테이너를 실행한 상태에서는 컨테이너에 캐시 저장됨
43
+ # 캐시 확인 -> ls -lah ~/.cache/huggingface
44
+ # 캐시 삭제 -> rm -rf ~/.cache/huggingface
45
+ USE_STREAMING = False
46
+ # =====================================================================
47
+
48
+ # 클래스 명명 규칙 적용
49
+ def format_class_name(class_name: str) -> str:
50
+ """
51
+ 클래스명은 소문자로 하고 띄어쓰기가 있을 경우 "-"로 대체
52
+ """
53
+ return str(class_name).lower().replace("_", "-").replace(" ", "-")
54
+
55
+ # 마지막 이미지의 번호 + 1
56
+ def get_next_image_index(save_dir: str, formatted_class_name: str) -> int:
57
+ """
58
+ 이미지를 여러 차례 이어서 수집할 수 있도록 마지막 이미지 번호를 탐색
59
+ 디렉토리를 스캔하여 가장 높은 번호를 찾은 뒤 +1을 반환
60
+ """
61
+ if not os.path.exists(save_dir):
62
+ return 1
63
+
64
+ # jpg와 jpeg 확장자 모두 검색
65
+ search_pattern_jpg = os.path.join(save_dir, f"hf_{formatted_class_name}_*.jpg")
66
+ search_pattern_jpeg = os.path.join(save_dir, f"hf_{formatted_class_name}_*.jpeg")
67
+
68
+ existing_files = glob.glob(search_pattern_jpg) + glob.glob(search_pattern_jpeg)
69
+
70
+ max_idx = 0
71
+ # 파일명에서 정규표현식을 통해 번호 추출 (예: hf_fried-chicken_001.jpg -> 1)
72
+ regex = re.compile(rf"hf_{formatted_class_name}_(\d+)\.jpe?g$")
73
+
74
+ for file_path in existing_files:
75
+ basename = os.path.basename(file_path)
76
+ match = regex.match(basename)
77
+ if match:
78
+ idx = int(match.group(1))
79
+ if idx > max_idx:
80
+ max_idx = idx
81
+
82
+ return max_idx + 1
83
+
84
+ def collect_hf_images():
85
+ """
86
+ 메인 데이터 수집 함수.
87
+ Hugging Face 데이터셋에서 설정을 반영하여 이미지를 수집하고 저장
88
+ """
89
+
90
+ label_to_rep_class = {}
91
+ for rep_class, labels in CLASS_MAPPING.items():
92
+ for label in labels:
93
+ label_to_rep_class[label] = rep_class
94
+
95
+
96
+ print(label_to_rep_class)
97
+ # 데이터셋별 낱개로 수집
98
+ # streaming=True 속성을 사용하면 전체 데이터셋을 메모리나 디스크에 한 번에 다운로드하지 않고
99
+ # generator 형태로 하나씩(낱개로) 가져오므로 메모리와 네트워크 효율성이 극대화
100
+ print(f"[{DATASET_NAME}] 데이터셋 스트리밍 로드 시작...")
101
+ dataset = load_dataset(DATASET_NAME, split=SPLIT_NAME, streaming=USE_STREAMING, token=HF_TOKEN)
102
+
103
+ # 랜덤으로 가져오기
104
+ # random_seed = random.randint(0, 10000)
105
+ # dataset = load_dataset(DATASET_NAME, split=SPLIT_NAME, streaming=USE_STREAMING).shuffle(seed=random_seed, buffer_size=1000)
106
+
107
+ # 클래스별로 포맷팅된 폴더명과, 현재까지 수집된 개수, 그리고 저장될 시작 번호를 관리할 딕셔너리
108
+ class_info = {}
109
+ for label in CLASS_MAPPING.keys():
110
+ formatted_name = format_class_name(label)
111
+ save_path = os.path.join(BASE_SAVE_DIR, formatted_name)
112
+
113
+ # [규칙 1, 4] 클래스를 폴더로 관리하며 폴더명은 변환된 클래스명을 따른다.
114
+ os.makedirs(save_path, exist_ok=True)
115
+
116
+ # 이어서 수집하기 위한 시작 인덱스 탐색
117
+ start_idx = get_next_image_index(save_path, formatted_name)
118
+
119
+ class_info[label] = {
120
+ "formatted_name": formatted_name,
121
+ "save_path": save_path,
122
+ "collected_count": 0,
123
+ "current_idx": start_idx
124
+ }
125
+
126
+ print("데이터 수집을 시작합니다...")
127
+
128
+ # 스트리밍 데이터 순회
129
+ for item in dataset:
130
+
131
+ print("1. 데이터셋 로드 시작...")
132
+ # 모든 클래스가 목표 수집량을 채웠��지 확인
133
+ if all(info["collected_count"] >= NUM_IMAGES_PER_CLASS for info in class_info.values()):
134
+ print("모든 클래스의 이미지 수집이 완료되었습니다.")
135
+ break
136
+
137
+ print("2. 데이터셋 라벨 아이템 꺼내기...")
138
+ current_label = item.get(LABEL_FIELD_NAME)
139
+
140
+ print(current_label)
141
+ # 현재 뽑힌 라벨이 정의한 매핑 딕셔너리에 존재하는지 확인
142
+ if current_label in label_to_rep_class:
143
+ rep_class = label_to_rep_class[current_label]
144
+ target_info = class_info[rep_class]
145
+
146
+
147
+ print("4. 이미지 유효성 검사...")
148
+ # 이미 목표 개수를 채운 클래스라면 스킵
149
+ if target_info["collected_count"] >= NUM_IMAGES_PER_CLASS:
150
+ continue
151
+
152
+ # 이미지 유효성 체크
153
+ image = item.get(IMAGE_FIELD_NAME)
154
+ if image is None:
155
+ continue
156
+
157
+ print("5. 이미지 변환...")
158
+ try:
159
+ # 이미지를 jpg/jpeg로만 취급하기 위해 RGB 모드로 변환 (알파 채널 등 제거)
160
+ if image.mode != "RGB":
161
+ image = image.convert("RGB")
162
+
163
+ #이미지 해상도가 최소 256px만 수집
164
+ if image.width < TARGET_RESOLUTION or image.height < TARGET_RESOLUTION:
165
+ continue
166
+
167
+ print("6. 클래스 명명 규칙에 따라...")
168
+ # [규칙 3, 4] 이미지 명명 규칙 (hf_[클래스명]_[3자리숫자].jpg)
169
+ # {:03d}를 통해 3자리 숫자로 맞추고 빈자리는 0으로 채움
170
+ file_name = f"hf_{target_info['formatted_name']}_{target_info['current_idx']:03d}.jpg"
171
+ file_path = os.path.join(target_info["save_path"], file_name)
172
+
173
+ print("7. 이미지 저장...")
174
+ image.save(file_path, "JPEG", quality=95)
175
+
176
+ # 카운트 및 인덱스 증가
177
+ target_info["collected_count"] += 1
178
+ target_info["current_idx"] += 1
179
+
180
+ print(f"Saved: {file_path} ({target_info['collected_count']}/{NUM_IMAGES_PER_CLASS})")
181
+
182
+ except Exception as e:
183
+ # 오류 발생 시 스크립트가 멈추지 않도록 예외 처리
184
+ print(f"이미지 저장 중 오류 발생 (Label: {current_label}): {e}")
185
+
186
+ if __name__ == "__main__":
187
+ collect_hf_images()
src/collection/download_dataset_kg.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ # ================================
4
+ # 0. 설정
5
+ # ================================
6
+ TARGET_COUNT = 60
7
+ MIN_RES = 128 # 해상도 128
8
+ PREFIX = "kg"
9
+ BASE_DIR = "./data/raw"
10
+
11
+ # ================================
12
+ # 1. 다운로드 경로
13
+ # ================================
14
+ DOWNLOAD_PATH = "data/raw_full_kg"
15
+
16
+ os.makedirs(DOWNLOAD_PATH, exist_ok=True)
17
+
18
+ # ================================
19
+ # 2. 사용할 Kaggle 데이터셋 (slug 기준)
20
+ # ================================
21
+ DATASETS = [
22
+ # 음식 및 식재료
23
+ "kmader/food41",
24
+
25
+ # 동물
26
+ "alessiocorrado99/animals10",
27
+ "gpiosenka/100-bird-species",
28
+
29
+ # 꽃
30
+ "alxmamaev/flowers-recognition",
31
+
32
+ # 과일
33
+ "moltean/fruits",
34
+ "yihfeng/strawberry-maturity",
35
+
36
+ # 탈것
37
+ "sshikamaru/car-object-detection",
38
+ "jessicali9530/stanford-cars-dataset",
39
+ "dataclusterlabs/vehicle-detection-image-dataset",
40
+ "meowmeowmeowmeowmeow/vehicle-type-recognition",
41
+
42
+ # 패션 및 잡화
43
+ "promptcloudhq/jewelry-text-to-image-dataset",
44
+ "ashwingupta3012/glasses-dataset",
45
+ "agrigorev/clothing-dataset-full",
46
+ "paramaggarwal/fashion-product-images-small"
47
+ ]
48
+
49
+ # ================================
50
+ # 3. 다운로드 실행
51
+ # ================================
52
+ for ds in DATASETS:
53
+ print(f"\nDownloading {ds} ...")
54
+ os.system(f"kaggle datasets download -d {ds} -p {DOWNLOAD_PATH}")
55
+
56
+ print("\n모든 데이터셋 다운로드 완료!")
src/collection/download_dataset_us.py ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import requests
4
+ from PIL import Image
5
+ from io import BytesIO
6
+ import time
7
+ from dotenv import load_dotenv
8
+
9
+ # =========================
10
+ # 1. 설정 (여기만 수정하면 됨)
11
+ # =========================
12
+ load_dotenv()
13
+ US_TOKEN = os.environ.get("US_TOKEN")
14
+
15
+ ACCESS_KEY = "US_TOKEN"
16
+
17
+ TARGET_COUNT = 100
18
+ MIN_WIDTH = 256
19
+ MIN_HEIGHT = 256
20
+ SLEEP_TIME = 2
21
+
22
+ BASE_DIR = "un_images"
23
+
24
+ MASTER_CLASSES = [
25
+ "pizza","hamburger","sushi","pasta","salad","steak","cake","sandwich","fried_chicken","bread",
26
+ "apple","banana","strawberry","orange","carrot",
27
+ "golden_retriever","bulldog","siamese_cat","persian_cat","eagle","owl","lion","elephant","zebra","giraffe",
28
+ "rose","sunflower","daisy","tulip","palm_tree","pine_tree","maple_tree","bamboo",
29
+ "laptop","watch","camera","chair","clock","microwave","refrigerator",
30
+ "car","bicycle","motorcycle","airplane","bus",
31
+ "backpack","sneakers","umbrella","glasses","hat"
32
+ ]
33
+
34
+ SIMPLE_CLASSES = [
35
+ "pizza","burger","sushi","pasta","salad","steak","cake","sandwich","fried_chicken","bread",
36
+ "apple","banana","strawberry","orange","carrot",
37
+ "golden_retriever","bulldog","siamese_cat","persian_cat","eagle","owl","lion","elephant","zebra","giraffe",
38
+ "rose","sunflower","daisy","tulip","palm_tree","pine_tree","maple_tree","bamboo",
39
+ "laptop","wristwatch","camera","chair","wall_clock","microwave","refrigerator",
40
+ "car","bicycle","motorcycle","airplane","bus",
41
+ "backpack","sneakers","umbrella","glasses","hat"
42
+ ]
43
+
44
+ os.makedirs(BASE_DIR, exist_ok=True)
45
+
46
+ # =========================
47
+ # 2. 유틸
48
+ # =========================
49
+ def format_name(name):
50
+ return name.replace("_", "-")
51
+
52
+ def get_start_index(folder, simple_cls):
53
+ pattern = re.compile(rf"un_{simple_cls}_(\d+)\.jpg")
54
+ max_idx = 0
55
+
56
+ for f in os.listdir(folder):
57
+ match = pattern.match(f)
58
+ if match:
59
+ num = int(match.group(1))
60
+ max_idx = max(max_idx, num)
61
+
62
+ return max_idx + 1
63
+
64
+ # =========================
65
+ # 3. API
66
+ # =========================
67
+ def search_images(query, page):
68
+ url = "https://api.unsplash.com/search/photos"
69
+
70
+ headers = {
71
+ "Authorization": f"Client-ID {ACCESS_KEY}"
72
+ }
73
+
74
+ params = {
75
+ "query": query.replace("_", " "),
76
+ "per_page": 30,
77
+ "page": page
78
+ }
79
+
80
+ res = requests.get(url, headers=headers, params=params)
81
+
82
+ if res.status_code == 429:
83
+ print("⏳ Rate limit → 60초 대기")
84
+ time.sleep(60)
85
+ return []
86
+
87
+ if res.status_code != 200:
88
+ print("API ERROR:", res.text)
89
+ return []
90
+
91
+ return [item["urls"]["regular"] for item in res.json().get("results", [])]
92
+
93
+ def download_image(url):
94
+ try:
95
+ res = requests.get(url, timeout=10)
96
+ if res.status_code != 200:
97
+ return None
98
+
99
+ img = Image.open(BytesIO(res.content))
100
+ w, h = img.size
101
+
102
+ if w < MIN_WIDTH or h < MIN_HEIGHT:
103
+ return None
104
+
105
+ return res.content
106
+ except:
107
+ return None
108
+
109
+ # =========================
110
+ # 4. 메인
111
+ # =========================
112
+ for idx in range(len(MASTER_CLASSES)):
113
+
114
+ master_cls = MASTER_CLASSES[idx]
115
+ simple_cls = format_name(SIMPLE_CLASSES[idx])
116
+
117
+ print(f"\n[START] {master_cls}")
118
+
119
+ class_dir = os.path.join(BASE_DIR, master_cls)
120
+ os.makedirs(class_dir, exist_ok=True)
121
+
122
+ start_idx = get_start_index(class_dir, simple_cls)
123
+ count = start_idx - 1
124
+
125
+ page = 1
126
+ seen = set()
127
+
128
+ while count < TARGET_COUNT:
129
+ urls = search_images(simple_cls, page)
130
+
131
+ if not urls:
132
+ print("이미지 없음")
133
+ break
134
+
135
+ for url in urls:
136
+ if count >= TARGET_COUNT:
137
+ break
138
+
139
+ if url in seen:
140
+ continue
141
+ seen.add(url)
142
+
143
+ img_data = download_image(url)
144
+ if img_data is None:
145
+ continue
146
+
147
+ count += 1
148
+
149
+ file_name = f"un_{simple_cls}_{count:03d}.jpg"
150
+ path = os.path.join(class_dir, file_name)
151
+
152
+ with open(path, "wb") as f:
153
+ f.write(img_data)
154
+
155
+ print(f"Saved: {path}")
156
+
157
+ page += 1
158
+ time.sleep(SLEEP_TIME)
159
+
160
+ print(f"[DONE] {master_cls} -> {count}/{TARGET_COUNT}")
161
+
162
+ # =========================
163
+ # 5. 검증 + 부족분 자동 보충
164
+ # =========================
165
+ print("\n[검증 시작]\n")
166
+
167
+ for idx in range(len(MASTER_CLASSES)):
168
+
169
+ master_cls = MASTER_CLASSES[idx]
170
+ simple_cls = format_name(SIMPLE_CLASSES[idx])
171
+
172
+ class_dir = os.path.join(BASE_DIR, master_cls)
173
+ files = [f for f in os.listdir(class_dir) if f.endswith(".jpg")]
174
+
175
+ valid_files = []
176
+
177
+ for f in files:
178
+ path = os.path.join(class_dir, f)
179
+ try:
180
+ img = Image.open(path)
181
+ w, h = img.size
182
+
183
+ if w >= MIN_WIDTH and h >= MIN_HEIGHT:
184
+ valid_files.append(f)
185
+ else:
186
+ os.remove(path)
187
+ except:
188
+ os.remove(path)
189
+
190
+ count = len(valid_files)
191
+
192
+ print(f"{master_cls}: {count}/{TARGET_COUNT}")
193
+
194
+ if count < TARGET_COUNT:
195
+ print(f"→ 부족분 재수집 시작")
196
+
197
+ page = 1
198
+ seen = set()
199
+ start_idx = get_start_index(class_dir, simple_cls)
200
+
201
+ while count < TARGET_COUNT:
202
+ urls = search_images(simple_cls, page)
203
+
204
+ if not urls:
205
+ break
206
+
207
+ for url in urls:
208
+ if count >= TARGET_COUNT:
209
+ break
210
+
211
+ if url in seen:
212
+ continue
213
+ seen.add(url)
214
+
215
+ img_data = download_image(url)
216
+ if img_data is None:
217
+ continue
218
+
219
+ file_name = f"un_{simple_cls}_{start_idx:03d}.jpg"
220
+ path = os.path.join(class_dir, file_name)
221
+
222
+ with open(path, "wb") as f:
223
+ f.write(img_data)
224
+
225
+ print(f"ReSaved: {path}")
226
+
227
+ start_idx += 1
228
+ count += 1
229
+
230
+ page += 1
231
+ time.sleep(SLEEP_TIME)
232
+
233
+ print("\n[완료]")
src/collection/get_label_list_hf.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset
2
+
3
+
4
+ # ============================================================
5
+ # [설정 부분]
6
+ # ============================================================
7
+
8
+ # 확인할 Hugging Face 데이터셋 이름
9
+ DATASET_NAME = "KrushiJethe/fashion_data" #uran66/animals
10
+
11
+ # 확인할 split 이름
12
+ SPLIT_NAME = "train"
13
+
14
+ # 라벨 필드명
15
+ LABEL_FIELD_NAME = "articleType"
16
+
17
+ # streaming 사용 여부
18
+ # 라벨 구조만 확인할 때는 streaming=True로 해도 된다.
19
+ USE_STREAMING = True
20
+
21
+ # 문자열 라벨 데이터셋일 경우 전체 데이터를 훑어야 할 수 있다.
22
+ # None이면 전체 확인, 숫자를 넣으면 일부 샘플만 확인한다.
23
+ MAX_SCAN_ITEMS = None
24
+
25
+ # ============================================================
26
+
27
+
28
+ def get_unique_labels():
29
+ """
30
+ Hugging Face 데이터셋에서 라벨 목록을 중복 없이 출력한다.
31
+ """
32
+
33
+ print(f"[{DATASET_NAME}] 데이터셋 로드 중...")
34
+
35
+ dataset = load_dataset(
36
+ DATASET_NAME,
37
+ split=SPLIT_NAME,
38
+ streaming=USE_STREAMING,
39
+ )
40
+
41
+ # 데이터셋의 feature 정보에서 라벨 필드를 가져온다.
42
+ label_feature = dataset.features[LABEL_FIELD_NAME]
43
+
44
+ # ------------------------------------------------------------
45
+ # 1. Food101처럼 label이 ClassLabel 타입인 경우
46
+ # ------------------------------------------------------------
47
+ # 이 경우 데이터 전체를 순회하지 않아도
48
+ # dataset.features["label"].names 에서 전체 라벨명을 바로 확인할 수 있다.
49
+ if hasattr(label_feature, "names") and label_feature.names is not None:
50
+ label_names = label_feature.names
51
+
52
+ print("\n라벨 목록")
53
+ print("-" * 50)
54
+
55
+ for idx, label_name in enumerate(label_names):
56
+ print(f"{idx}: {label_name}")
57
+
58
+ print("-" * 50)
59
+ print(f"총 라벨 개수: {len(label_names)}")
60
+
61
+ return label_names
62
+
63
+ # ------------------------------------------------------------
64
+ # 2. label이 문자열로 직접 들어있는 데이터셋인 경우
65
+ # ------------------------------------------------------------
66
+ # 이 경우에는 데이터를 직접 순회하면서 중복을 제거해야 한다.
67
+ unique_labels = set()
68
+
69
+ print("\n라벨 필드가 ClassLabel 타입이 아니므로 데이터를 순회합니다...")
70
+
71
+ for idx, item in enumerate(dataset):
72
+ if MAX_SCAN_ITEMS is not None and idx >= MAX_SCAN_ITEMS:
73
+ break
74
+
75
+ label_value = item.get(LABEL_FIELD_NAME)
76
+
77
+ if label_value is None:
78
+ continue
79
+
80
+ unique_labels.add(str(label_value))
81
+
82
+ label_names = sorted(unique_labels)
83
+
84
+ print("\n라벨 목록")
85
+ print("-" * 50)
86
+
87
+
88
+ for idx, label_name in enumerate(label_names):
89
+ print(f"{idx}: {label_name}")
90
+
91
+ print("-" * 50)
92
+ print(f"총 라벨 개수: {len(label_names)}")
93
+
94
+ return label_names
95
+
96
+
97
+ if __name__ == "__main__":
98
+ get_unique_labels()
src/collection/select_60_images.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import random
3
+ import shutil
4
+
5
+ # ================================
6
+ # 0. 설정
7
+ # ================================
8
+ TARGET_COUNT = 60
9
+ MIN_RES = 128 # 해상도 128
10
+ PREFIX = "kg"
11
+ BASE_DIR = "./data/raw"
12
+
13
+ # ================================
14
+ # 1. 경로
15
+ # ================================
16
+ DATA_DIR = r"C:\Users\qud46\Desktop\raw_kg"
17
+
18
+ # ================================
19
+ # 2. 클래스 목록
20
+ # ================================
21
+ CLASS_LIST = [
22
+ # 음식 및 식재료
23
+ "pizza","hamburger","sushi","pasta","salad",
24
+ "steak","cup_cake","sandwich","waffle","dumpling",
25
+
26
+ # 동물
27
+ "golden-retriever","bulldog","siamese-cat",
28
+ "persian-cat","elephant","sheep","horse",
29
+ "penguin","butterfly","squirrel",
30
+
31
+ # 꽃
32
+ "rose","sunflower","daisy","tulip","dandelion",
33
+ "lily","lavender","orchid","iris","marigold","aster",
34
+
35
+ # 과일
36
+ "apple","banana","strawberry","orange",
37
+ "carrot","tomato","cucumber",
38
+
39
+ # 탈것
40
+ "car","bicycle","motorcycle","airplane","bus",
41
+
42
+ # 패션 및 잡화
43
+ "t-shirt","sneakers","earrings","glasses",
44
+ "pants","bracelet","necklace"
45
+ ]
46
+
47
+ print("클래스별 이미지 60장 맞추기 시작\n")
48
+
49
+ # ================================
50
+ # 3. 메인 로직
51
+ # ================================
52
+ for cls in CLASS_LIST:
53
+
54
+ cls_path = os.path.join(DATA_DIR, cls)
55
+
56
+ if not os.path.exists(cls_path):
57
+ print(f"{cls}: 폴더 없음 (skip)")
58
+ continue
59
+
60
+ # 이미지 파일 목록
61
+ images = [
62
+ f for f in os.listdir(cls_path)
63
+ if os.path.isfile(os.path.join(cls_path, f))
64
+ ]
65
+
66
+ current_count = len(images)
67
+
68
+ print(
69
+ f"{cls}: 현재 {current_count}장 "
70
+ f"→ 목표 {TARGET_COUNT}장"
71
+ )
72
+
73
+ # ================================
74
+ # 1) 60장 초과 → 랜덤 삭제
75
+ # ================================
76
+ if current_count > TARGET_COUNT:
77
+
78
+ delete_count = current_count - TARGET_COUNT
79
+
80
+ to_delete = random.sample(
81
+ images,
82
+ delete_count
83
+ )
84
+
85
+ for file in to_delete:
86
+
87
+ file_path = os.path.join(cls_path, file)
88
+
89
+ try:
90
+ os.remove(file_path)
91
+
92
+ except:
93
+ continue
94
+
95
+ print(f" → {delete_count}장 삭제 완료")
96
+
97
+ # ================================
98
+ # 2) 60장 미만 → 부족 개수 출력
99
+ # ================================
100
+ elif current_count < TARGET_COUNT:
101
+
102
+ need_count = TARGET_COUNT - current_count
103
+
104
+ print(
105
+ f" → {need_count}장 부족 "
106
+ f"(추가 수집 필요)"
107
+ )
108
+
109
+ # ================================
110
+ # 3) 정확히 60장
111
+ # ================================
112
+ else:
113
+ print(" → 이미 60장 완료")
114
+
115
+ print("\n전체 정리 완료!")
src/collection/unzip_data_kg.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import zipfile
3
+
4
+ # ================================
5
+ # 0. 설정
6
+ # ================================
7
+ TARGET_COUNT = 60
8
+ MIN_RES = 128 # 해상도 128
9
+ PREFIX = "kg"
10
+ BASE_DIR = "./data/raw"
11
+
12
+ # ================================
13
+ # 1. 경로
14
+ # ================================
15
+ ZIP_DIR = "data/raw_full_kg"
16
+
17
+ # 압축 해제 위치
18
+ EXTRACT_DIR = os.path.join(ZIP_DIR, "extracted")
19
+
20
+ os.makedirs(EXTRACT_DIR, exist_ok=True)
21
+
22
+ # ================================
23
+ # 2. zip 파일 목록
24
+ # ================================
25
+ zip_files = [
26
+ f for f in os.listdir(ZIP_DIR)
27
+ if f.endswith(".zip")
28
+ ]
29
+
30
+ # ================================
31
+ # 3. 압축 해제
32
+ # ================================
33
+ for zip_file in zip_files:
34
+ zip_path = os.path.join(ZIP_DIR, zip_file)
35
+
36
+ print(f"{zip_file} 압축 해제 중...")
37
+
38
+ try:
39
+ with zipfile.ZipFile(zip_path, "r") as zip_ref:
40
+ zip_ref.extractall(EXTRACT_DIR)
41
+
42
+ except Exception as e:
43
+ print(f"오류 발생: {zip_file} → {e}")
44
+
45
+ print("모든 압축 해제 완료!")
src/dataset/.ipynb_checkpoints/captioning_dataset-checkpoint.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import random
4
+ from PIL import Image
5
+ from torch.utils.data import Dataset
6
+ import torch
7
+
8
+
9
+ class CaptionDataset(Dataset):
10
+
11
+ def __init__(
12
+ self,
13
+ json_path,
14
+ image_dir,
15
+ w2i,
16
+ tokenizer: callable,
17
+ split='train',
18
+ transform=None,
19
+ max_len=30,
20
+ train_num_caption=1,
21
+ debug=False,
22
+ use_subword=False,
23
+ sp_model_path="tokenizer.model"
24
+ ):
25
+
26
+ with open(json_path, 'r') as f:
27
+ self.data = json.load(f)
28
+
29
+ # 디버깅용
30
+ if debug:
31
+ self.data= self.data[:10]
32
+
33
+ if split == "val":
34
+ self.is_val = True
35
+ else:
36
+ self.is_val = False
37
+
38
+ self.image_dir = image_dir
39
+ self.w2i = w2i
40
+ self.transform = transform
41
+ self.max_len = max_len
42
+ self.tokenizer = tokenizer
43
+ self.train_num_caption = train_num_caption
44
+ self.use_subword = use_subword
45
+ if self.use_subword:
46
+ import sentencepiece as spm
47
+
48
+ self.sp = spm.SentencePieceProcessor()
49
+ self.sp.load(sp_model_path)
50
+
51
+
52
+ def __len__(self):
53
+ return len(self.data)
54
+
55
+ def encode_caption(self, caption):
56
+
57
+ if self.use_subword:
58
+ words = self.sp.encode(caption, out_type=str)
59
+
60
+ tokens = (
61
+ [self.w2i["<sos>"]] +
62
+ [self.w2i.get(w, self.w2i["<unk>"]) for w in words] +
63
+ [self.w2i["<eos>"]]
64
+ )
65
+ else:
66
+ words = self.tokenizer(caption)
67
+
68
+ tokens = (
69
+ [self.w2i["<sos>"]] +
70
+ [self.w2i.get(w, self.w2i["<unk>"]) for w in words] +
71
+ [self.w2i["<eos>"]]
72
+ )
73
+
74
+ # truncation
75
+ if len(tokens) > self.max_len:
76
+ tokens = (tokens[:self.max_len - 1])
77
+ tokens.append(self.w2i["<eos>"])
78
+ else:
79
+ tokens += ([self.w2i["<pad>"]] * (self.max_len - len(tokens)))
80
+
81
+ return torch.tensor(tokens, dtype=torch.long)
82
+
83
+ def __getitem__(self, index):
84
+
85
+ data = self.data[index]
86
+ file_name = data["file_name"]
87
+
88
+ image_path = os.path.join(self.image_dir, file_name)
89
+
90
+ image = Image.open(image_path).convert('RGB')
91
+
92
+ if self.transform:
93
+ image = self.transform(image)
94
+
95
+
96
+ captions = data["captions"]
97
+
98
+ captions = captions[:5] # 캡션 5개 초과시 5개까지만 씀
99
+
100
+ while len(captions) < 5: # 캡션 5개 보다 부족할 시 마지막 캡션 복제해서 씀
101
+ captions.append(captions[-1])
102
+
103
+ # validation
104
+ if self.is_val:
105
+ caption = random.choice(captions)
106
+
107
+ tokens = (self.encode_caption(caption))
108
+
109
+ return image, tokens, captions, file_name
110
+
111
+ # train
112
+ selected_captions = (random.sample(captions, k=self.train_num_caption))
113
+
114
+ images = []
115
+ token_list = []
116
+ for caption in selected_captions:
117
+ images.append(image)
118
+ token_list.append(self.encode_caption(caption))
119
+
120
+ images = torch.stack(images)
121
+ tokens = torch.stack(token_list)
122
+
123
+ return images, tokens
124
+
src/dataset/.ipynb_checkpoints/train_sub_tokenizer-checkpoint.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import sentencepiece as spm
3
+ import yaml
4
+
5
+ # params
6
+ with open("/workspace/params.yaml", "r", encoding="utf-8") as f:
7
+ params = yaml.safe_load(f)
8
+
9
+
10
+ def train_sentencepiece(
11
+ json_path,
12
+ model_prefix="sub_tokenizer",
13
+ vocab_size=500,
14
+ model_type="unigram"
15
+ ):
16
+
17
+ with open(json_path, 'r') as f:
18
+ data = json.load(f)
19
+
20
+ txt_path = "/workspace/src/dataset/sub_tokenizing_captions.txt"
21
+
22
+ with open(txt_path, "w", encoding="utf-8") as f:
23
+ for item in data:
24
+ captions = item["captions"]
25
+
26
+ for caption in captions:
27
+ f.write(caption.lower() + "\n")
28
+
29
+ spm.SentencePieceTrainer.train(
30
+ input=txt_path,
31
+ model_prefix=model_prefix,
32
+ vocab_size=vocab_size,
33
+ model_type=model_type,
34
+
35
+ pad_piece="<pad>",
36
+ bos_piece="<sos>",
37
+ eos_piece="<eos>",
38
+ unk_piece="<unk>",
39
+
40
+ pad_id=0,
41
+ bos_id=1,
42
+ eos_id=2,
43
+ unk_id=3
44
+ )
45
+
46
+ print("tokenizer training done")
47
+
48
+
49
+ if __name__ == "__main__":
50
+ train_sentencepiece(
51
+ json_path="/workspace/data/captioning/annotations/train.json",
52
+ model_prefix="/workspace/src/dataset/sub_tokenizer",
53
+ vocab_size=params["captioning"]["tokenizer"]["sp_vocab_size"],
54
+ model_type="unigram"
55
+ )
src/dataset/__pycache__/build_vocab.cpython-310.pyc ADDED
Binary file (1.62 kB). View file
 
src/dataset/__pycache__/captioning_dataset.cpython-310.pyc ADDED
Binary file (2.74 kB). View file
 
src/dataset/__pycache__/classification_dataset.cpython-310.pyc ADDED
Binary file (1.53 kB). View file
 
src/dataset/__pycache__/collate_caption.cpython-310.pyc ADDED
Binary file (410 Bytes). View file
 
src/dataset/build_vocab.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from collections import Counter
3
+ import re
4
+ import sentencepiece as spm
5
+
6
+ def tokenizer(captions):
7
+ text = captions.lower()
8
+ text = re.sub(r"([.,!?])", r" \1 ", text) # 특수문자 제거
9
+ tokens = text.split()
10
+
11
+ return tokens
12
+
13
+ def sub_tokenizer(caption, sp):
14
+ tokens = sp.encode(caption, out_type=str)
15
+
16
+ return tokens
17
+
18
+
19
+ def build_vocab(json_path, min_freq=3, max_size=10000, use_subword=False, sp_model_path="/workspace/src/dataset/sub_tokenizer.model"):
20
+ w2i = dict()
21
+ i2w = dict()
22
+
23
+ # ==================================================
24
+ # SentencePiece tokenizer 사용
25
+ # ==================================================
26
+ if use_subword:
27
+
28
+ sp = spm.SentencePieceProcessor()
29
+ sp.load(sp_model_path)
30
+
31
+ voca_size = sp.get_piece_size()
32
+
33
+ for i in range(voca_size):
34
+ token = sp.id_to_piece(i)
35
+
36
+ w2i[token] = i
37
+ i2w[i] = token
38
+ else:
39
+ with open(json_path, 'r') as f:
40
+ data = json.load(f)
41
+
42
+ counter = Counter()
43
+
44
+ for item in data:
45
+ captions = item["captions"]
46
+ for caption in captions:
47
+ tokens = tokenizer(caption)
48
+ counter.update(tokens)
49
+
50
+ words = [w for w, freq in counter.most_common() if freq >= min_freq]
51
+
52
+ voca = ["<pad>", "<sos>", "<eos>", "<unk>"]
53
+ voca.extend(words[:max_size-4])
54
+ voca_size = len(voca)
55
+
56
+ for i, w in enumerate(voca):
57
+ w2i[w] = i
58
+ i2w[i] = w
59
+
60
+ print(voca_size)
61
+
62
+ return w2i, i2w, voca_size
src/dataset/captioning_dataset.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import random
4
+ from PIL import Image
5
+ from torch.utils.data import Dataset
6
+ import torch
7
+
8
+
9
+ class CaptionDataset(Dataset):
10
+
11
+ def __init__(
12
+ self,
13
+ json_path,
14
+ image_dir,
15
+ w2i,
16
+ tokenizer: callable,
17
+ split='train',
18
+ transform=None,
19
+ max_len=30,
20
+ train_num_caption=1,
21
+ debug=False,
22
+ use_subword=False,
23
+ sp_model_path="tokenizer.model"
24
+ ):
25
+
26
+ with open(json_path, 'r') as f:
27
+ self.data = json.load(f)
28
+
29
+ # 디버깅용
30
+ if debug:
31
+ self.data= self.data[:10]
32
+
33
+ if split == "val":
34
+ self.is_val = True
35
+ else:
36
+ self.is_val = False
37
+
38
+ self.image_dir = image_dir
39
+ self.w2i = w2i
40
+ self.transform = transform
41
+ self.max_len = max_len
42
+ self.tokenizer = tokenizer
43
+ self.train_num_caption = train_num_caption
44
+ self.use_subword = use_subword
45
+ if self.use_subword:
46
+ import sentencepiece as spm
47
+
48
+ self.sp = spm.SentencePieceProcessor()
49
+ self.sp.load(sp_model_path)
50
+
51
+
52
+ def __len__(self):
53
+ return len(self.data)
54
+
55
+ def encode_caption(self, caption):
56
+
57
+ if self.use_subword:
58
+ words = self.sp.encode(caption.lower(), out_type=str)
59
+
60
+ tokens = (
61
+ [self.w2i["<sos>"]] +
62
+ [self.w2i.get(w, self.w2i["<unk>"]) for w in words] +
63
+ [self.w2i["<eos>"]]
64
+ )
65
+ else:
66
+ words = self.tokenizer(caption)
67
+
68
+ tokens = (
69
+ [self.w2i["<sos>"]] +
70
+ [self.w2i.get(w, self.w2i["<unk>"]) for w in words] +
71
+ [self.w2i["<eos>"]]
72
+ )
73
+
74
+ # truncation
75
+ if len(tokens) > self.max_len:
76
+ tokens = (tokens[:self.max_len - 1])
77
+ tokens.append(self.w2i["<eos>"])
78
+ else:
79
+ tokens += ([self.w2i["<pad>"]] * (self.max_len - len(tokens)))
80
+
81
+ return torch.tensor(tokens, dtype=torch.long)
82
+
83
+ def __getitem__(self, index):
84
+
85
+ data = self.data[index]
86
+ file_name = data["file_name"]
87
+
88
+ image_path = os.path.join(self.image_dir, file_name)
89
+
90
+ image = Image.open(image_path).convert('RGB')
91
+
92
+ if self.transform:
93
+ image = self.transform(image)
94
+
95
+
96
+ captions = data["captions"]
97
+
98
+ captions = captions[:5] # 캡션 5개 초과시 5개까지만 씀
99
+
100
+ while len(captions) < 5: # 캡션 5개 보다 부족할 시 마지막 캡션 복제해서 씀
101
+ captions.append(captions[-1])
102
+
103
+ # validation
104
+ if self.is_val:
105
+ caption = random.choice(captions)
106
+
107
+ tokens = (self.encode_caption(caption))
108
+
109
+ return image, tokens, captions, file_name
110
+
111
+ # train
112
+ selected_captions = (random.sample(captions, k=self.train_num_caption))
113
+
114
+ images = []
115
+ token_list = []
116
+ for caption in selected_captions:
117
+ images.append(image)
118
+ token_list.append(self.encode_caption(caption))
119
+
120
+ images = torch.stack(images)
121
+ tokens = torch.stack(token_list)
122
+
123
+ return images, tokens
124
+
src/dataset/classification_dataset.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from PIL import Image
3
+ from torch.utils.data import Dataset
4
+
5
+
6
+ class ClassificationDataset(Dataset):
7
+
8
+ def __init__(
9
+ self,
10
+ root_dir,
11
+ class_to_idx,
12
+ split="train",
13
+ transform=None,
14
+ split_ratio=(0.7, 0.15, 0.15)
15
+ ):
16
+
17
+ self.transform = transform
18
+ self.samples = []
19
+
20
+ for class_name in sorted(os.listdir(root_dir)):
21
+ class_path = os.path.join(
22
+ root_dir,
23
+ class_name
24
+ )
25
+
26
+ if not os.path.isdir(class_path):
27
+ continue
28
+
29
+ images = sorted(os.listdir(class_path))
30
+
31
+ total = len(images)
32
+
33
+ train_end = int(total * split_ratio[0])
34
+ val_end = train_end + int(total * split_ratio[1])
35
+
36
+ if split == "train":
37
+ split_images = images[:train_end]
38
+
39
+ elif split == "val":
40
+ split_images = images[train_end:val_end]
41
+
42
+ else:
43
+ split_images = images[val_end:]
44
+
45
+ label = class_to_idx[class_name]
46
+
47
+ for image_name in split_images:
48
+ image_path = os.path.join(
49
+ class_path,
50
+ image_name
51
+ )
52
+
53
+ self.samples.append(
54
+ (image_path, label)
55
+ )
56
+
57
+ def __len__(self):
58
+ return len(self.samples)
59
+
60
+ def __getitem__(self, index):
61
+ image_path, label = self.samples[index]
62
+ image = Image.open(
63
+ image_path
64
+ ).convert("RGB")
65
+
66
+ if self.transform:
67
+ image = self.transform(image)
68
+
69
+ return image, label, image_path
src/dataset/collate_caption.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ def collate_caption(batch):
4
+ images = []
5
+ tokens = []
6
+
7
+ for image, token in batch:
8
+ images.append(image)
9
+ tokens.append(token)
10
+
11
+ images = torch.cat(images, dim=0)
12
+ tokens = torch.cat(tokens, dim=0)
13
+
14
+ return images, tokens
src/dataset/sub_tokenizer1000.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f24d6fdba624cc28650fc27f6ef8bd2002d3a3b652205a2e0e8af2aa21ea7be
3
+ size 254104
src/dataset/sub_tokenizer1000.vocab ADDED
@@ -0,0 +1,1000 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <pad> 0
2
+ <sos> 0
3
+ <eos> 0
4
+ <unk> 0
5
+ ▁a -1.98322
6
+ . -2.9407
7
+ s -3.42779
8
+ ▁of -3.61005
9
+ ▁on -3.6172
10
+ ▁in -3.86704
11
+ ▁the -3.87608
12
+ nd -3.93335
13
+ ▁with -3.9459
14
+ ing -4.4554
15
+ ▁is -4.4607
16
+ ▁ -4.56396
17
+ ▁to -4.59365
18
+ e -4.65398
19
+ ▁man -4.85533
20
+ ▁sitting -4.95263
21
+ , -5.03648
22
+ ed -5.04331
23
+ y -5.11805
24
+ ▁an -5.13757
25
+ ▁next -5.20493
26
+ ▁two -5.22965
27
+ d -5.2952
28
+ ▁white -5.36199
29
+ n -5.36722
30
+ a -5.37838
31
+ i -5.38911
32
+ ▁are -5.39818
33
+ ▁holding -5.40034
34
+ ▁standing -5.40128
35
+ o -5.40533
36
+ ▁table -5.46676
37
+ p -5.47577
38
+ t -5.48538
39
+ ▁it -5.49508
40
+ r -5.51872
41
+ le -5.59946
42
+ ▁plate -5.6039
43
+ ▁woman -5.63334
44
+ ▁that -5.6784
45
+ ▁up -5.68785
46
+ ▁at -5.71356
47
+ er -5.7243
48
+ ▁top -5.76056
49
+ ▁people -5.76236
50
+ ▁some -5.79632
51
+ ▁person -5.81136
52
+ m -5.8687
53
+ ▁black -5.88285
54
+ ▁large -5.88415
55
+ ▁street -5.92398
56
+ es -5.93578
57
+ ▁dog -5.93954
58
+ ▁red -5.97105
59
+ ▁s -6.00218
60
+ ▁f -6.02117
61
+ ▁his -6.02288
62
+ ▁small -6.0281
63
+ ▁near -6.04428
64
+ l -6.05039
65
+ u -6.10616
66
+ ▁front -6.11685
67
+ g -6.13449
68
+ b -6.16149
69
+ ▁sign -6.19078
70
+ f -6.22392
71
+ ▁cat -6.23333
72
+ ▁bear -6.2501
73
+ ▁by -6.25248
74
+ ▁group -6.27012
75
+ ▁field -6.28847
76
+ ▁blue -6.29729
77
+ ▁has -6.30494
78
+ ▁food -6.30817
79
+ ▁down -6.34169
80
+ ▁green -6.34463
81
+ ar -6.34581
82
+ ▁young -6.36549
83
+ in -6.3685
84
+ ▁tennis -6.38354
85
+ ▁snow -6.3845
86
+ ▁other -6.39216
87
+ ▁close -6.39402
88
+ c -6.40273
89
+ ▁whi -6.40908
90
+ k -6.41607
91
+ ▁water -6.41957
92
+ ▁grass -6.42378
93
+ ▁there -6.42571
94
+ st -6.42804
95
+ al -6.42888
96
+ w -6.44482
97
+ h -6.46665
98
+ ▁side -6.47196
99
+ ll -6.4794
100
+ ▁train -6.51638
101
+ ▁computer -6.52006
102
+ ▁lay -6.53283
103
+ ch -6.54474
104
+ ▁stop -6.56702
105
+ ▁board -6.56877
106
+ ▁for -6.56976
107
+ ▁bus -6.57341
108
+ ▁baseball -6.58612
109
+ ▁phone -6.58992
110
+ ▁parked -6.59899
111
+ ▁walking -6.60393
112
+ ▁her -6.614
113
+ ▁b -6.6172
114
+ ▁riding -6.62086
115
+ ▁sits -6.62201
116
+ ▁bowl -6.62631
117
+ ▁clock -6.63455
118
+ ▁parking -6.63607
119
+ ▁kitchen -6.64843
120
+ ▁light -6.65729
121
+ or -6.68298
122
+ ▁wearing -6.69127
123
+ ▁picture -6.69418
124
+ ▁boy -6.69421
125
+ ▁looking -6.69659
126
+ ▁building -6.69858
127
+ en -6.71042
128
+ ▁out -6.71159
129
+ ▁girl -6.71339
130
+ ▁vase -6.73144
131
+ ▁fire -6.73145
132
+ ▁hand -6.73375
133
+ ▁pizza -6.7406
134
+ an -6.75261
135
+ ' -6.75293
136
+ ▁thre -6.76074
137
+ ▁t -6.76468
138
+ ▁couple -6.77331
139
+ at -6.7805
140
+ ▁be -6.78194
141
+ ▁yellow -6.78446
142
+ ▁bathroom -6.78927
143
+ ▁glass -6.79439
144
+ ▁woode -6.79814
145
+ ▁toilet -6.80878
146
+ ▁meter -6.80878
147
+ ▁from -6.82199
148
+ ▁m -6.82553
149
+ ▁several -6.82698
150
+ ▁bat -6.82956
151
+ ▁skateboard -6.83116
152
+ ▁boat -6.84214
153
+ ▁brown -6.84553
154
+ on -6.84954
155
+ ▁vegetables -6.85327
156
+ ▁hydrant -6.86097
157
+ ▁over -6.87842
158
+ ▁beach -6.88194
159
+ ▁scissors -6.88724
160
+ ▁sink -6.89616
161
+ ▁their -6.89832
162
+ ▁room -6.89974
163
+ ▁filled -6.90334
164
+ el -6.9077
165
+ ▁umbrella -6.93262
166
+ re -6.93408
167
+ v -6.93453
168
+ ▁desk -6.93802
169
+ ▁wine -6.94951
170
+ ly -6.95091
171
+ ▁elephant -6.95316
172
+ ▁horse -6.95723
173
+ ▁motorcycle -6.95896
174
+ ▁hot -6.96056
175
+ ▁road -6.96278
176
+ ▁open -6.96662
177
+ ▁st -6.97948
178
+ ▁pair -6.98018
179
+ ro -6.98102
180
+ ▁d -6.98209
181
+ it -6.9932
182
+ ▁tie -6.99493
183
+ ▁bench -6.99587
184
+ ur -6.99703
185
+ ▁g -7.0009
186
+ ▁piece -7.0058
187
+ ▁cake -7.01609
188
+ ▁pa -7.02046
189
+ ▁keyboard -7.02191
190
+ ▁each -7.02191
191
+ ▁thi -7.02415
192
+ ▁wall -7.03008
193
+ ▁sheep -7.03431
194
+ ▁orange -7.03899
195
+ ▁ball -7.03925
196
+ ▁different -7.04036
197
+ ▁frisbee -7.04036
198
+ ▁counter -7.04036
199
+ ▁flying -7.04119
200
+ ▁flowers -7.04797
201
+ ▁traffic -7.04866
202
+ ▁giraffe -7.05405
203
+ ▁laptop -7.05704
204
+ ▁car -7.06113
205
+ ▁tree -7.06714
206
+ ▁eating -7.06818
207
+ ▁teddy -7.07615
208
+ ▁its -7.07656
209
+ ▁bunch -7.07657
210
+ ▁around -7.0783
211
+ ▁mouse -7.08046
212
+ li -7.08325
213
+ ▁covered -7.08612
214
+ il -7.09208
215
+ ▁bike -7.10221
216
+ ▁broccoli -7.10883
217
+ ▁cell -7.11314
218
+ ▁through -7.11997
219
+ ▁inside -7.12178
220
+ la -7.12697
221
+ ▁fruit -7.12824
222
+ ▁one -7.13683
223
+ ▁remote -7.14262
224
+ ▁c -7.14674
225
+ ▁zebra -7.14708
226
+ ▁outside -7.15234
227
+ very -7.15784
228
+ ▁little -7.15879
229
+ ▁microwave -7.16345
230
+ ▁truck -7.1658
231
+ ▁area -7.16815
232
+ ▁hold -7.16833
233
+ ting -7.16983
234
+ et -7.17782
235
+ ▁city -7.18235
236
+ ▁old -7.18386
237
+ ter -7.18462
238
+ ▁lot -7.18947
239
+ ▁pe -7.19085
240
+ ▁back -7.19676
241
+ ▁bed -7.19905
242
+ ▁surfboard -7.19922
243
+ ▁sandwich -7.20647
244
+ ▁w -7.20741
245
+ ▁together -7.21876
246
+ ▁air -7.2197
247
+ ▁co -7.22215
248
+ ▁playing -7.22223
249
+ ▁sit -7.24205
250
+ ▁player -7.2433
251
+ ▁book -7.25398
252
+ ▁under -7.25907
253
+ ▁head -7.25926
254
+ ▁racket -7.25933
255
+ ▁behind -7.26428
256
+ ▁fence -7.26947
257
+ ▁bananas -7.27123
258
+ pped -7.27309
259
+ ▁la -7.27649
260
+ ▁bird -7.27907
261
+ ▁ground -7.27992
262
+ ▁men -7.28063
263
+ ▁po -7.28079
264
+ ▁background -7.28255
265
+ ▁snowboard -7.28561
266
+ ▁glasses -7.2927
267
+ ▁child -7.30655
268
+ ers -7.30667
269
+ ▁do -7.31758
270
+ ▁carrots -7.31859
271
+ ▁display -7.31909
272
+ ▁stuff -7.32014
273
+ ▁bag -7.32028
274
+ ic -7.32392
275
+ ous -7.32735
276
+ ol -7.32786
277
+ ▁cup -7.33429
278
+ th -7.33652
279
+ ck -7.33703
280
+ ▁baby -7.34507
281
+ ▁full -7.35071
282
+ ▁cut -7.35128
283
+ ▁stands -7.3625
284
+ ▁into -7.37317
285
+ ▁luggage -7.37353
286
+ id -7.37802
287
+ ion -7.38241
288
+ ▁being -7.38631
289
+ ▁oven -7.39099
290
+ ▁re -7.39201
291
+ ▁sh -7.39833
292
+ ▁beside -7.40047
293
+ ▁photo -7.40281
294
+ ▁sidewalk -7.41477
295
+ ve -7.41626
296
+ ▁shirt -7.42155
297
+ ▁paper -7.42387
298
+ ▁di -7.42522
299
+ ▁floor -7.42687
300
+ ▁window -7.42687
301
+ ▁dirt -7.42994
302
+ ▁bottle -7.43605
303
+ ▁knife -7.43605
304
+ ▁cutt -7.43621
305
+ ▁pink -7.43913
306
+ ▁ski -7.44157
307
+ ra -7.44446
308
+ z -7.44525
309
+ ▁cow -7.44585
310
+ ▁trees -7.45224
311
+ ▁game -7.45465
312
+ ▁bicycle -7.46093
313
+ ▁wood -7.46571
314
+ ▁look -7.47155
315
+ ▁pole -7.47177
316
+ ▁big -7.48325
317
+ ▁skis -7.48476
318
+ way -7.48747
319
+ ▁sp -7.49752
320
+ ▁chair -7.50274
321
+ ct -7.50429
322
+ ut -7.50672
323
+ ▁view -7.51264
324
+ ▁kite -7.51347
325
+ ▁pile -7.51817
326
+ ▁suitcase -7.51929
327
+ ▁face -7.52049
328
+ ▁airplane -7.52599
329
+ ▁walk -7.52786
330
+ ▁oranges -7.53837
331
+ ▁mouth -7.54637
332
+ ▁toothbrush -7.54683
333
+ ▁k -7.55424
334
+ ce -7.5561
335
+ ▁image -7.55671
336
+ ▁apples -7.55932
337
+ ▁zebras -7.56039
338
+ ▁n -7.56135
339
+ ▁sky -7.56722
340
+ ▁plane -7.57067
341
+ ti -7.574
342
+ ▁box -7.5742
343
+ ▁stand -7.57636
344
+ ▁kites -7.5947
345
+ ▁sa -7.59849
346
+ ▁ta -7.60838
347
+ ▁meat -7.62116
348
+ ▁apple -7.62208
349
+ ▁ca -7.62627
350
+ j -7.63227
351
+ ▁another -7.63229
352
+ ▁park -7.63289
353
+ ▁refrigerator -7.65497
354
+ ▁rock -7.65499
355
+ ent -7.66018
356
+ age -7.66522
357
+ ▁vari -7.68129
358
+ ▁cr -7.68348
359
+ ▁he -7.68909
360
+ ▁grassy -7.69146
361
+ ▁ma -7.70065
362
+ ▁four -7.70599
363
+ ▁slice -7.7131
364
+ ▁colorful -7.71405
365
+ ▁bears -7.71567
366
+ ▁line -7.72202
367
+ ▁hat -7.72466
368
+ ▁lo -7.72602
369
+ ▁someone -7.72644
370
+ ▁donuts -7.74847
371
+ ▁tall -7.74884
372
+ ▁ha -7.75025
373
+ ▁women -7.75124
374
+ ▁fork -7.75241
375
+ ▁banana -7.75259
376
+ us -7.75503
377
+ ▁cows -7.75741
378
+ ▁cheese -7.7597
379
+ ▁stove -7.75971
380
+ ▁control -7.76392
381
+ ▁h -7.76772
382
+ ▁them -7.76867
383
+ um -7.76899
384
+ ▁coffee -7.77252
385
+ ▁ra -7.77705
386
+ ▁station -7.77735
387
+ ▁mirror -7.79426
388
+ ▁along -7.79426
389
+ ▁ready -7.79866
390
+ ▁can -7.7987
391
+ ▁we -7.80183
392
+ ▁no -7.80262
393
+ ▁com -7.80472
394
+ ▁pan -7.80668
395
+ te -7.81235
396
+ ▁herd -7.81529
397
+ ▁pot -7.81626
398
+ ▁tak -7.81771
399
+ ▁camera -7.82099
400
+ ▁dish -7.82169
401
+ ▁pi -7.82775
402
+ ▁hanging -7.83368
403
+ ▁items -7.83463
404
+ ▁off -7.83819
405
+ ▁mo -7.84324
406
+ ▁tray -7.84383
407
+ ▁cook -7.84846
408
+ to -7.85812
409
+ ▁above -7.86718
410
+ ▁giraffes -7.87394
411
+ ▁spoon -7.87669
412
+ ▁middle -7.88628
413
+ ▁long -7.88667
414
+ ▁wii -7.88846
415
+ ▁suit -7.89112
416
+ ▁half -7.90085
417
+ ▁elephants -7.90111
418
+ ▁posing -7.90154
419
+ ▁metal -7.90575
420
+ ▁silver -7.90575
421
+ ▁get -7.90787
422
+ ▁ne -7.90954
423
+ ▁past -7.91062
424
+ ▁grazing -7.91067
425
+ un -7.91793
426
+ mp -7.92552
427
+ ▁bar -7.92721
428
+ ▁stick -7.93568
429
+ ▁ru -7.94538
430
+ ▁day -7.94592
431
+ ▁sand -7.94793
432
+ ▁smiling -7.95098
433
+ ▁like -7.95098
434
+ gain -7.95197
435
+ ▁mountain -7.95613
436
+ ▁carry -7.95619
437
+ ▁brush -7.96003
438
+ qu -7.96664
439
+ ▁fruits -7.96821
440
+ ▁de -7.96922
441
+ ot -7.97654
442
+ ▁plant -7.97702
443
+ ▁living -7.98231
444
+ ate -7.98572
445
+ ▁court -7.98764
446
+ ▁surf -7.9939
447
+ ▁basket -7.99835
448
+ ▁drink -8.00293
449
+ ive -8.0033
450
+ ▁ho -8.00457
451
+ ▁door -8.00632
452
+ ▁hill -8.00921
453
+ ▁animals -8.01801
454
+ side -8.02104
455
+ ies -8.02683
456
+ ▁signs -8.03339
457
+ ▁crowd -8.03682
458
+ ▁hair -8.03684
459
+ ▁pull -8.03822
460
+ ▁cars -8.03923
461
+ x -8.05366
462
+ ▁row -8.05382
463
+ ping -8.05595
464
+ ng -8.05924
465
+ ▁airport -8.05948
466
+ ▁across -8.05948
467
+ ▁bread -8.05949
468
+ ▁animal -8.06748
469
+ how -8.06865
470
+ ▁lean -8.07115
471
+ ▁swing -8.07118
472
+ ▁plastic -8.07682
473
+ ▁cross -8.07683
474
+ ▁who -8.07684
475
+ ▁couch -8.09448
476
+ ▁trick -8.09464
477
+ ▁tracks -8.11186
478
+ ▁horses -8.1119
479
+ led -8.11552
480
+ ▁donut -8.11644
481
+ ▁screen -8.1185
482
+ ▁set -8.11926
483
+ ▁prepar -8.1246
484
+ ▁ride -8.12466
485
+ ▁bu -8.13179
486
+ up -8.13602
487
+ ▁skate -8.13679
488
+ ▁birds -8.1389
489
+ ▁type -8.14312
490
+ ▁store -8.14344
491
+ ment -8.14676
492
+ ▁driving -8.14936
493
+ ▁empty -8.14936
494
+ ard -8.15133
495
+ ▁un -8.15811
496
+ lying -8.16103
497
+ ew -8.16374
498
+ ▁pu -8.1642
499
+ ▁chocolate -8.16835
500
+ ▁enclosure -8.16835
501
+ ▁color -8.16981
502
+ ▁something -8.17484
503
+ ▁hands -8.17591
504
+ ▁pen -8.17919
505
+ ▁market -8.1814
506
+ ▁kid -8.18247
507
+ ▁seat -8.19436
508
+ ▁purple -8.20082
509
+ ▁television -8.20082
510
+ ▁using -8.20104
511
+ ▁displayed -8.20337
512
+ ▁snowboarder -8.20711
513
+ ▁house -8.20748
514
+ ake -8.21209
515
+ ▁includ -8.21411
516
+ ▁slope -8.21411
517
+ ▁video -8.21411
518
+ ▁hit -8.21564
519
+ ▁con -8.21607
520
+ ▁or -8.21672
521
+ ▁skier -8.21749
522
+ ▁controller -8.22088
523
+ ▁shown -8.22145
524
+ own -8.22178
525
+ ▁container -8.22238
526
+ ▁pro -8.23454
527
+ ▁multi -8.24123
528
+ ▁case -8.24262
529
+ ▁tooth -8.24826
530
+ ▁gra -8.25037
531
+ ▁flower -8.25042
532
+ ▁outdoor -8.25507
533
+ uring -8.2612
534
+ ▁number -8.26206
535
+ ▁jet -8.26208
536
+ co -8.26604
537
+ ▁tv -8.26658
538
+ ish -8.27151
539
+ ▁double -8.2762
540
+ ▁hotdog -8.2762
541
+ ▁monitor -8.2762
542
+ ▁salad -8.27621
543
+ ▁lady -8.27628
544
+ ▁doughnuts -8.28027
545
+ ▁attached -8.28334
546
+ ▁contain -8.28887
547
+ ▁teeth -8.29054
548
+ ▁she -8.29109
549
+ ▁jump -8.29778
550
+ ▁resting -8.29782
551
+ ▁guy -8.30513
552
+ ▁made -8.30785
553
+ ▁ocean -8.31244
554
+ ▁work -8.31244
555
+ ▁tower -8.31265
556
+ ▁bun -8.31679
557
+ ▁corner -8.31984
558
+ ▁meal -8.31991
559
+ ling -8.32351
560
+ ▁mak -8.32645
561
+ ▁passenger -8.3273
562
+ ▁take -8.33001
563
+ ▁racquet -8.33482
564
+ ▁pose -8.33792
565
+ ▁backpack -8.35003
566
+ ▁high -8.35773
567
+ ▁surround -8.36548
568
+ ▁office -8.36662
569
+ ▁zoo -8.37329
570
+ ▁brick -8.37353
571
+ ation -8.37577
572
+ ▁neck -8.3822
573
+ light -8.38662
574
+ ▁restaurant -8.3891
575
+ ▁dry -8.3892
576
+ ▁cellphone -8.38966
577
+ ▁photograph -8.3971
578
+ ▁fresh -8.3971
579
+ ▁surface -8.3971
580
+ ▁shelf -8.39711
581
+ ween -8.39718
582
+ ▁jacket -8.40516
583
+ ▁sauce -8.40517
584
+ ▁wave -8.40517
585
+ ▁adult -8.41329
586
+ ▁statue -8.41329
587
+ ▁sc -8.41573
588
+ ▁waiting -8.41797
589
+ ▁branch -8.42149
590
+ ▁cabinet -8.42149
591
+ ▁kind -8.42173
592
+ ▁watching -8.42253
593
+ ▁painted -8.42488
594
+ ▁play -8.42802
595
+ ▁post -8.42841
596
+ ▁polar -8.42979
597
+ ▁track -8.43055
598
+ ▁cloth -8.43809
599
+ colored -8.44458
600
+ ▁runway -8.4465
601
+ ▁glove -8.45497
602
+ ▁time -8.45497
603
+ ▁clear -8.45499
604
+ ▁showing -8.45502
605
+ ▁sun -8.45814
606
+ top -8.45927
607
+ ▁toothbrushes -8.46235
608
+ ▁helmet -8.46351
609
+ ▁sleeping -8.46351
610
+ ▁chicken -8.46352
611
+ ▁rice -8.46352
612
+ ▁dark -8.46353
613
+ ▁children -8.46355
614
+ ▁but -8.46491
615
+ ▁arm -8.4712
616
+ ▁electronic -8.47214
617
+ ▁body -8.47214
618
+ ▁eaten -8.47334
619
+ ▁dressed -8.4777
620
+ ▁skiing -8.47879
621
+ ▁bet -8.48197
622
+ ▁going -8.48353
623
+ ▁tile -8.48614
624
+ ight -8.48895
625
+ ead -8.503
626
+ ▁use -8.50637
627
+ ▁talking -8.50738
628
+ ▁smile -8.51639
629
+ ▁pack -8.51932
630
+ ▁doughnut -8.52028
631
+ fri -8.52173
632
+ ant -8.52342
633
+ ▁go -8.52519
634
+ ▁bright -8.52549
635
+ ▁grey -8.52551
636
+ ▁dock -8.52768
637
+ ▁sliced -8.52772
638
+ ▁eat -8.53152
639
+ ▁wire -8.53602
640
+ ▁rail -8.53658
641
+ ▁cart -8.54105
642
+ ▁place -8.5424
643
+ ▁about -8.54392
644
+ ▁ex -8.54408
645
+ ▁gray -8.552
646
+ ▁left -8.55326
647
+ ▁fries -8.55331
648
+ ▁strip -8.5535
649
+ ▁toaster -8.5552
650
+ ▁carrot -8.55864
651
+ ▁star -8.57154
652
+ ▁intersection -8.57222
653
+ ▁toward -8.58186
654
+ ▁rack -8.5825
655
+ ▁pick -8.58321
656
+ ▁have -8.58323
657
+ ▁drinking -8.59303
658
+ ▁device -8.60135
659
+ ▁soup -8.60137
660
+ ▁tub -8.61128
661
+ ▁brushing -8.61371
662
+ ▁beer -8.61576
663
+ ▁blanket -8.62125
664
+ ▁shower -8.62126
665
+ ▁rain -8.62579
666
+ ▁skateboarder -8.62651
667
+ ▁leaves -8.63135
668
+ ▁trunk -8.63135
669
+ eep -8.63335
670
+ ▁variet -8.63348
671
+ ▁stra -8.63487
672
+ ▁dress -8.63501
673
+ ▁watch -8.65057
674
+ ▁male -8.6518
675
+ ▁potatoes -8.65186
676
+ ▁cover -8.65653
677
+ ▁wi -8.66218
678
+ ▁toppings -8.6625
679
+ ▁night -8.66297
680
+ ver -8.66416
681
+ ▁home -8.67288
682
+ ▁signal -8.68183
683
+ ▁eye -8.68385
684
+ ▁vehicle -8.69419
685
+ ▁shop -8.69747
686
+ ▁batter -8.69925
687
+ ▁decker -8.70507
688
+ ▁was -8.70587
689
+ ▁closeup -8.70815
690
+ ▁dessert -8.71605
691
+ ▁graffiti -8.71605
692
+ ▁older -8.72031
693
+ line -8.72433
694
+ ▁decorated -8.72717
695
+ ▁traveling -8.73226
696
+ ▁uniform -8.7384
697
+ ▁not -8.7476
698
+ ▁chi -8.7524
699
+ ▁stacked -8.75395
700
+ ▁hug -8.76127
701
+ ▁shaped -8.76411
702
+ phone -8.7734
703
+ ▁rest -8.7802
704
+ ▁river -8.78465
705
+ ▁motor -8.78468
706
+ ▁roll -8.78475
707
+ ▁lit -8.7856
708
+ able -8.78876
709
+ ▁egg -8.79656
710
+ ical -8.79692
711
+ placed -8.79851
712
+ ▁shot -8.81013
713
+ ▁setting -8.81027
714
+ ▁cloud -8.8208
715
+ ▁assortment -8.82108
716
+ lic -8.82399
717
+ ▁served -8.83315
718
+ ▁onions -8.83693
719
+ ▁ice -8.85444
720
+ ▁appliances -8.8583
721
+ ▁giant -8.85831
722
+ lush -8.85832
723
+ ▁underneath -8.85857
724
+ ▁vegetable -8.86435
725
+ ▁assort -8.87084
726
+ ▁machine -8.87112
727
+ ▁produc -8.87113
728
+ ▁gear -8.87123
729
+ ▁forest -8.8714
730
+ ▁smart -8.88411
731
+ ▁electric -8.88411
732
+ ▁reading -8.88411
733
+ ▁purse -8.88412
734
+ ▁mitt -8.88414
735
+ ▁moving -8.88414
736
+ ▁seen -8.88435
737
+ ▁van -8.88446
738
+ ▁fish -8.88869
739
+ unny -8.89689
740
+ ▁edge -8.8973
741
+ ▁feet -8.89741
742
+ ▁part -8.89746
743
+ ▁surfer -8.90804
744
+ ▁tea -8.91376
745
+ ▁fridge -8.93781
746
+ ▁perched -8.93781
747
+ ▁running -8.93782
748
+ ▁size -8.93785
749
+ ▁yard -8.93796
750
+ ▁mount -8.9517
751
+ ▁female -8.9517
752
+ ▁mustard -8.9517
753
+ ▁curb -8.95172
754
+ ▁turn -8.95174
755
+ ▁him -8.95175
756
+ ▁scene -8.95183
757
+ ▁ramp -8.95295
758
+ ▁log -8.95909
759
+ ▁object -8.96579
760
+ ▁draw -8.9658
761
+ ▁drive -8.9658
762
+ ▁cattle -8.96602
763
+ ▁cla -8.9711
764
+ ▁wheel -8.98007
765
+ ▁towel -8.98022
766
+ ▁point -8.9804
767
+ ▁putt -8.98076
768
+ ▁tomatoes -8.98212
769
+ ▁beautiful -8.99457
770
+ ▁mother -8.99457
771
+ ▁among -8.99458
772
+ ▁gold -8.99461
773
+ ▁hang -8.99878
774
+ intend -9.00038
775
+ ▁pasture -9.00931
776
+ ▁cage -9.01343
777
+ ▁bridge -9.0242
778
+ ▁clean -9.02423
779
+ ▁frost -9.02425
780
+ ▁leg -9.0249
781
+ ▁reflect -9.03935
782
+ ▁sausage -9.03935
783
+ ▁right -9.03942
784
+ ▁stone -9.03967
785
+ ▁candle -9.03972
786
+ ▁wrappe -9.03975
787
+ ▁single -9.04037
788
+ ▁goat -9.04131
789
+ ▁arranged -9.05473
790
+ ▁writ -9.05476
791
+ ▁blow -9.05488
792
+ ▁desktop -9.05653
793
+ board -9.05754
794
+ ▁equipment -9.07036
795
+ ▁cream -9.07036
796
+ ▁trail -9.07036
797
+ ▁mug -9.0704
798
+ ▁wild -9.0704
799
+ ▁sub -9.07047
800
+ ▁beans -9.07074
801
+ ▁paint -9.08035
802
+ ▁below -9.08623
803
+ ▁featur -9.08623
804
+ ▁pitch -9.08625
805
+ ▁perform -9.10236
806
+ ▁toothpaste -9.10236
807
+ ▁held -9.10243
808
+ ▁float -9.11875
809
+ ▁broken -9.11875
810
+ ▁glazed -9.11875
811
+ ▁ketchup -9.11875
812
+ ▁check -9.11876
813
+ ▁dinner -9.11876
814
+ ▁grill -9.11876
815
+ ▁police -9.11877
816
+ ▁pre -9.11932
817
+ ▁design -9.13542
818
+ ▁flock -9.13542
819
+ ▁gather -9.13542
820
+ ▁platform -9.13542
821
+ ▁trash -9.13542
822
+ ▁veggie -9.13542
823
+ ▁short -9.13545
824
+ ▁foot -9.13546
825
+ ▁flip -9.14075
826
+ ▁birthday -9.15237
827
+ ▁pretty -9.15237
828
+ ▁soda -9.15237
829
+ ▁reach -9.15241
830
+ ▁nice -9.15242
831
+ ▁public -9.16961
832
+ ▁round -9.16961
833
+ having -9.17397
834
+ ▁alone -9.18719
835
+ ▁bushes -9.18722
836
+ ▁lie -9.19505
837
+ ▁stack -9.19848
838
+ just -9.20508
839
+ ▁lake -9.20519
840
+ ▁stopped -9.20543
841
+ ▁fac -9.23775
842
+ ▁engine -9.24171
843
+ ▁tarmac -9.24171
844
+ ▁garden -9.24172
845
+ ▁vin -9.2419
846
+ tuck -9.24618
847
+ ▁toast -9.25758
848
+ ▁flat -9.26074
849
+ ▁try -9.26088
850
+ ▁french -9.27981
851
+ ▁beverage -9.27981
852
+ ▁shore -9.27993
853
+ ▁grow -9.28019
854
+ ▁fall -9.28069
855
+ ▁supplies -9.29942
856
+ ▁farm -9.29955
857
+ ▁mushroom -9.31942
858
+ ▁mix -9.31942
859
+ ▁chain -9.31942
860
+ ▁throw -9.31942
861
+ ▁good -9.31947
862
+ loaded -9.31949
863
+ ▁steel -9.31954
864
+ ▁wait -9.32815
865
+ ▁shape -9.33477
866
+ ▁advertis -9.33983
867
+ ▁lunch -9.33983
868
+ ▁modern -9.33983
869
+ ▁square -9.33983
870
+ ▁smo -9.34026
871
+ ▁tomato -9.35768
872
+ ▁enjoy -9.36066
873
+ ▁shoulder -9.36066
874
+ ▁lemon -9.36066
875
+ ▁pastries -9.36067
876
+ ▁milk -9.36068
877
+ ▁lamb -9.36071
878
+ ▁match -9.36071
879
+ ▁chew -9.36072
880
+ ▁rose -9.36084
881
+ ▁style -9.36108
882
+ ▁appear -9.38194
883
+ ▁breakfast -9.38194
884
+ ▁foreground -9.38194
885
+ ▁napkin -9.38194
886
+ ▁platter -9.38194
887
+ ▁ripe -9.38194
888
+ ▁strawberries -9.38194
889
+ ▁sunglasses -9.38194
890
+ ▁chopped -9.38194
891
+ ▁desert -9.38194
892
+ ▁blender -9.38194
893
+ ▁leaf -9.38194
894
+ ▁shade -9.38197
895
+ ▁lawn -9.382
896
+ ▁bri -9.39481
897
+ ▁decoration -9.40368
898
+ ▁pedestrian -9.40368
899
+ ▁baked -9.4037
900
+ ▁kneel -9.4259
901
+ ▁bottom -9.4259
902
+ ▁christmas -9.4259
903
+ ▁country -9.4259
904
+ ▁decorative -9.4259
905
+ ▁scooter -9.4259
906
+ ▁sculpture -9.4259
907
+ ▁sprinkles -9.4259
908
+ ▁things -9.42606
909
+ ▁chips -9.42617
910
+ ▁catcher -9.42642
911
+ ball -9.43836
912
+ ▁serving -9.44863
913
+ ▁cub -9.44867
914
+ ▁horn -9.44874
915
+ ▁bathtub -9.47188
916
+ ▁concrete -9.47188
917
+ ▁distance -9.47188
918
+ where -9.47193
919
+ ▁balloon -9.47194
920
+ ▁tri -9.47568
921
+ bow -9.50266
922
+ ▁bacon -9.52008
923
+ ▁bedroom -9.52008
924
+ ▁carriage -9.52008
925
+ ▁kitten -9.52008
926
+ ▁stainless -9.52008
927
+ ▁reads -9.52009
928
+ ▁bite -9.52042
929
+ ▁graze -9.52045
930
+ ▁carrie -9.52051
931
+ ▁juice -9.54508
932
+ ▁lettuce -9.54508
933
+ ▁partially -9.54508
934
+ ▁swimming -9.54508
935
+ ▁position -9.54508
936
+ ▁carpet -9.54512
937
+ ▁sort -9.54545
938
+ ▁travel -9.55899
939
+ ▁blurr -9.57072
940
+ ▁pigeons -9.57072
941
+ ▁what -9.57074
942
+ ▁cement -9.57078
943
+ ▁word -9.57079
944
+ ▁same -9.57092
945
+ ▁ship -9.58422
946
+ ▁climb -9.59704
947
+ ▁arrangement -9.59704
948
+ ▁collecti -9.59704
949
+ ▁shadow -9.59704
950
+ ▁parade -9.59704
951
+ ▁bucket -9.59704
952
+ ▁lift -9.59704
953
+ ▁center -9.59707
954
+ berry -9.5971
955
+ ▁flag -9.5971
956
+ ▁lead -9.59712
957
+ ▁giv -9.61391
958
+ ▁you -9.62387
959
+ ▁family -9.62407
960
+ ▁military -9.62407
961
+ ▁picnic -9.62407
962
+ ▁soccer -9.62407
963
+ ▁pavement -9.62407
964
+ ▁peanut -9.62407
965
+ ▁space -9.62409
966
+ ▁mark -9.65112
967
+ ▁curtain -9.65184
968
+ ▁himself -9.65184
969
+ ▁railroad -9.65184
970
+ ▁ledge -9.65184
971
+ ddler -9.65185
972
+ ▁duck -9.65187
973
+ ▁model -9.65188
974
+ rcial -9.65191
975
+ ▁base -9.65213
976
+ arrow -9.65254
977
+ made -9.6699
978
+ ▁propeller -9.68041
979
+ ▁school -9.68041
980
+ ▁puppy -9.68042
981
+ ▁cupcake -9.68042
982
+ ▁built -9.68043
983
+ ▁block -9.70983
984
+ ▁event -9.70983
985
+ ▁spread -9.70983
986
+ ▁winter -9.70985
987
+ ▁sport -9.70997
988
+ ▁antique -9.74013
989
+ ▁pattern -9.74013
990
+ ▁professional -9.74013
991
+ ▁balanc -9.74013
992
+ ▁consist -9.74013
993
+ ▁spray -9.74016
994
+ ough -9.74019
995
+ ▁figure -9.77138
996
+ ▁furniture -9.77138
997
+ ▁notebook -9.77138
998
+ ▁parrot -9.77138
999
+ ▁sofa -9.77138
1000
+ q -10.8674
src/dataset/sub_tokenizer1500.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33febb00379c559196d197de44fa529e8e3cfe5853e9a778dee369f6f46aa4fe
3
+ size 262628
src/dataset/sub_tokenizer1500.vocab ADDED
@@ -0,0 +1,1500 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <pad> 0
2
+ <sos> 0
3
+ <eos> 0
4
+ <unk> 0
5
+ ▁a -1.93118
6
+ . -2.88169
7
+ ▁of -3.55221
8
+ ▁on -3.56188
9
+ s -3.62067
10
+ ▁in -3.81444
11
+ ▁the -3.81817
12
+ ▁with -3.88689
13
+ nd -3.89226
14
+ ▁is -4.40178
15
+ ing -4.52367
16
+ ▁to -4.57599
17
+ ▁ -4.76533
18
+ ▁man -4.79839
19
+ ▁sitting -4.89362
20
+ , -4.97747
21
+ ▁an -5.06532
22
+ ed -5.14206
23
+ ▁next -5.14591
24
+ e -5.16728
25
+ ▁two -5.17064
26
+ ▁white -5.30298
27
+ y -5.31383
28
+ ▁are -5.32805
29
+ ▁holding -5.34089
30
+ ▁standing -5.34217
31
+ ▁table -5.41202
32
+ ▁it -5.45231
33
+ d -5.5279
34
+ ▁plate -5.54488
35
+ ▁woman -5.57433
36
+ ▁at -5.59347
37
+ n -5.59711
38
+ ▁that -5.61939
39
+ ▁up -5.62864
40
+ ▁top -5.69587
41
+ ▁people -5.70335
42
+ ▁some -5.73733
43
+ ▁person -5.75235
44
+ ▁black -5.82383
45
+ ▁large -5.82513
46
+ ▁street -5.86497
47
+ le -5.9101
48
+ ▁red -5.91133
49
+ a -5.94493
50
+ ▁his -5.96388
51
+ ▁small -5.96909
52
+ t -5.98331
53
+ r -5.98939
54
+ ▁dog -6.01085
55
+ ▁near -6.01845
56
+ o -6.04639
57
+ ▁front -6.05784
58
+ p -6.09572
59
+ ▁sign -6.13869
60
+ er -6.16551
61
+ ▁cat -6.17326
62
+ m -6.1807
63
+ ▁by -6.19106
64
+ ▁bear -6.19634
65
+ ▁group -6.2111
66
+ ▁field -6.22948
67
+ ▁has -6.24171
68
+ ▁food -6.24916
69
+ ▁blue -6.25015
70
+ ▁green -6.28562
71
+ ▁down -6.295
72
+ ▁young -6.30647
73
+ ▁tennis -6.32453
74
+ ▁snow -6.32548
75
+ ▁other -6.33314
76
+ ▁close -6.335
77
+ ▁whi -6.35017
78
+ ▁water -6.36056
79
+ ▁there -6.36118
80
+ ▁grass -6.36577
81
+ ▁side -6.41189
82
+ ▁train -6.45737
83
+ ▁computer -6.46105
84
+ ▁lay -6.47228
85
+ ▁board -6.50962
86
+ ▁baseball -6.52711
87
+ ▁phone -6.53028
88
+ ▁parked -6.53973
89
+ ▁for -6.54057
90
+ ▁walking -6.54314
91
+ b -6.54853
92
+ ▁her -6.5544
93
+ g -6.55726
94
+ ▁sits -6.55971
95
+ ▁riding -6.56185
96
+ ▁bowl -6.5673
97
+ i -6.56754
98
+ ▁clock -6.57554
99
+ ▁stop -6.57563
100
+ ▁parking -6.57676
101
+ ▁kitchen -6.58942
102
+ ▁wearing -6.63272
103
+ ▁picture -6.63517
104
+ ▁boy -6.63583
105
+ ▁looking -6.63721
106
+ ▁bus -6.64201
107
+ ▁girl -6.65437
108
+ ▁vase -6.67243
109
+ ▁pizza -6.68159
110
+ u -6.68776
111
+ es -6.68791
112
+ ▁fire -6.68928
113
+ ▁f -6.6895
114
+ ' -6.69392
115
+ ▁thre -6.70171
116
+ ▁out -6.70226
117
+ ▁woode -6.70691
118
+ ▁couple -6.7143
119
+ ▁yellow -6.72545
120
+ ▁bathroom -6.73026
121
+ ▁glass -6.73696
122
+ ▁toilet -6.74976
123
+ al -6.75068
124
+ ▁from -6.76297
125
+ ▁several -6.76797
126
+ ▁skateboard -6.77339
127
+ ▁building -6.78257
128
+ ▁brown -6.78652
129
+ ▁vegetables -6.79345
130
+ ▁light -6.79579
131
+ ▁hydrant -6.80196
132
+ ▁hand -6.80907
133
+ l -6.82187
134
+ ▁beach -6.82292
135
+ ▁scissors -6.82898
136
+ ▁sink -6.83714
137
+ st -6.83938
138
+ ▁room -6.84073
139
+ ▁be -6.84238
140
+ ▁their -6.84242
141
+ ▁filled -6.84433
142
+ k -6.85956
143
+ ▁over -6.86437
144
+ w -6.87453
145
+ ▁desk -6.87899
146
+ in -6.88124
147
+ ▁wine -6.89047
148
+ ▁bat -6.89335
149
+ ▁elephant -6.90038
150
+ ▁hot -6.90048
151
+ ▁road -6.90377
152
+ ▁open -6.90761
153
+ ▁meter -6.92037
154
+ ▁pair -6.92121
155
+ ▁tie -6.93415
156
+ ▁bench -6.93685
157
+ ▁horse -6.94242
158
+ ▁one -6.95584
159
+ ▁s -6.95612
160
+ ▁cake -6.95696
161
+ ▁thi -6.96189
162
+ ▁each -6.9629
163
+ ▁keyboard -6.96291
164
+ ▁wall -6.97106
165
+ ▁sheep -6.9752
166
+ ▁ball -6.98006
167
+ ▁counter -6.98135
168
+ ▁different -6.98135
169
+ ▁frisbee -6.98135
170
+ ch -6.98225
171
+ ▁flying -6.98315
172
+ ▁orange -6.98616
173
+ ▁flowers -6.98714
174
+ ▁traffic -6.98965
175
+ ▁d -6.99022
176
+ c -6.99385
177
+ ▁laptop -6.99806
178
+ ▁giraffe -7.00126
179
+ ▁eating -7.00898
180
+ en -7.0135
181
+ ▁tree -7.01429
182
+ ▁car -7.01644
183
+ ▁teddy -7.01714
184
+ ▁bunch -7.01741
185
+ ▁around -7.01929
186
+ ▁mouse -7.02144
187
+ ▁covered -7.02696
188
+ ▁its -7.04127
189
+ ▁broccoli -7.04982
190
+ ▁boat -7.05254
191
+ ▁cell -7.05417
192
+ ▁inside -7.05999
193
+ ▁through -7.06096
194
+ ▁fruit -7.07545
195
+ ▁remote -7.08361
196
+ ▁motorcycle -7.08501
197
+ ▁outside -7.09315
198
+ on -7.09401
199
+ ▁zebra -7.09424
200
+ ▁area -7.0966
201
+ very -7.09841
202
+ ▁little -7.09978
203
+ ▁microwave -7.10444
204
+ ▁truck -7.10678
205
+ ar -7.11999
206
+ ▁city -7.12334
207
+ ly -7.12514
208
+ ▁old -7.12601
209
+ ▁lot -7.12874
210
+ v -7.13036
211
+ ▁bed -7.13144
212
+ ▁surfboard -7.14022
213
+ ▁umbrella -7.15312
214
+ ▁together -7.15975
215
+ ▁playing -7.16295
216
+ or -7.17949
217
+ ▁back -7.17972
218
+ ▁player -7.18388
219
+ ▁sit -7.18649
220
+ ▁under -7.20005
221
+ ▁racket -7.20024
222
+ h -7.20133
223
+ el -7.20277
224
+ ▁behind -7.20527
225
+ ▁bananas -7.20846
226
+ ting -7.21381
227
+ ▁men -7.22024
228
+ ▁ground -7.22091
229
+ ▁background -7.22354
230
+ ▁b -7.22428
231
+ ▁bird -7.22629
232
+ ▁snowboard -7.22802
233
+ ▁bike -7.22918
234
+ ▁glasses -7.23108
235
+ ▁piece -7.23459
236
+ f -7.24452
237
+ ▁child -7.24754
238
+ ▁carrots -7.25782
239
+ ▁air -7.25869
240
+ ▁display -7.26076
241
+ ▁stuff -7.26113
242
+ ▁head -7.2612
243
+ ▁sandwich -7.26248
244
+ ▁cup -7.27509
245
+ pped -7.27733
246
+ ▁baby -7.28606
247
+ ▁full -7.2917
248
+ ▁hold -7.2918
249
+ an -7.29489
250
+ ▁stands -7.29853
251
+ ous -7.30242
252
+ ▁luggage -7.31451
253
+ ▁into -7.31494
254
+ ▁being -7.32332
255
+ ▁oven -7.33198
256
+ ▁beside -7.3412
257
+ ll -7.3429
258
+ ▁photo -7.3438
259
+ it -7.34923
260
+ ▁sidewalk -7.35576
261
+ ▁cutt -7.35913
262
+ ▁shirt -7.36241
263
+ ▁paper -7.36492
264
+ ▁floor -7.36792
265
+ ▁dirt -7.37091
266
+ ▁knife -7.37703
267
+ ▁g -7.37863
268
+ ▁pink -7.38024
269
+ ▁trees -7.38423
270
+ ▁fence -7.38543
271
+ ▁cow -7.3919
272
+ ▁game -7.39564
273
+ ▁bicycle -7.40192
274
+ ▁window -7.40507
275
+ ▁pole -7.41001
276
+ ▁look -7.41334
277
+ ▁skis -7.41535
278
+ re -7.42092
279
+ ▁big -7.42447
280
+ ▁t -7.42848
281
+ ▁m -7.4504
282
+ ▁pile -7.45319
283
+ ▁view -7.45363
284
+ ▁face -7.4605
285
+ ▁kite -7.4606
286
+ il -7.46525
287
+ ▁airplane -7.46698
288
+ ▁oranges -7.46926
289
+ ▁wood -7.47265
290
+ ro -7.47918
291
+ ▁mouth -7.48736
292
+ ▁do -7.48758
293
+ ▁toothbrush -7.48941
294
+ ▁zebras -7.49212
295
+ ▁apples -7.49458
296
+ ▁image -7.4977
297
+ ▁plane -7.51166
298
+ ur -7.51727
299
+ ▁stand -7.52431
300
+ ers -7.52482
301
+ ▁kites -7.52906
302
+ ▁sky -7.54382
303
+ ▁ski -7.54406
304
+ ▁meat -7.56227
305
+ ▁cut -7.56842
306
+ ▁apple -7.56932
307
+ ▁another -7.57328
308
+ ▁park -7.57555
309
+ ▁refrigerator -7.59596
310
+ la -7.60542
311
+ ▁vari -7.6226
312
+ ▁st -7.62544
313
+ ▁co -7.6269
314
+ ▁grassy -7.62858
315
+ ▁bears -7.62972
316
+ ▁bag -7.63261
317
+ et -7.63472
318
+ ▁four -7.64696
319
+ ▁colorful -7.65507
320
+ ▁hat -7.66174
321
+ ▁someone -7.66735
322
+ ▁them -7.66998
323
+ ▁book -7.67032
324
+ ▁donuts -7.68524
325
+ ▁tall -7.68824
326
+ ▁cows -7.68997
327
+ ▁bottle -7.69113
328
+ ▁fork -7.69134
329
+ ▁women -7.69223
330
+ ▁banana -7.69969
331
+ ▁stove -7.70069
332
+ ▁off -7.70246
333
+ ▁n -7.70318
334
+ ▁box -7.70362
335
+ ▁control -7.70493
336
+ ▁coffee -7.71351
337
+ ▁station -7.71806
338
+ ▁chair -7.72184
339
+ at -7.72473
340
+ id -7.73245
341
+ ter -7.73264
342
+ ▁mirror -7.73525
343
+ ▁along -7.73525
344
+ ▁ready -7.73965
345
+ ▁herd -7.75265
346
+ ▁cr -7.75359
347
+ ▁camera -7.76197
348
+ li -7.76254
349
+ ▁suitcase -7.76274
350
+ ▁w -7.7628
351
+ ▁c -7.76326
352
+ ck -7.76521
353
+ to -7.76797
354
+ ▁cheese -7.77104
355
+ ▁hanging -7.77442
356
+ ic -7.77562
357
+ ▁items -7.77855
358
+ ▁la -7.78103
359
+ ▁line -7.78226
360
+ ▁tray -7.78482
361
+ ▁giraffes -7.80095
362
+ ▁above -7.80817
363
+ ent -7.80837
364
+ ▁he -7.81318
365
+ ▁spoon -7.81767
366
+ ▁can -7.82528
367
+ ▁elephants -7.82618
368
+ ▁middle -7.82727
369
+ ▁long -7.82739
370
+ ▁wii -7.82816
371
+ ion -7.83194
372
+ ▁suit -7.8321
373
+ ▁re -7.83355
374
+ ▁po -7.83624
375
+ ▁half -7.84183
376
+ ▁posing -7.84212
377
+ ra -7.84547
378
+ ▁metal -7.84673
379
+ ▁pa -7.84758
380
+ ▁tak -7.84771
381
+ ▁grazing -7.85166
382
+ ▁get -7.85233
383
+ ve -7.85276
384
+ ▁bar -7.86318
385
+ th -7.86829
386
+ ▁sand -7.87105
387
+ way -7.87305
388
+ ▁stick -7.87666
389
+ ▁walk -7.8805
390
+ ut -7.88539
391
+ ▁day -7.88687
392
+ ▁smiling -7.89196
393
+ ▁like -7.89197
394
+ gain -7.89252
395
+ ▁fruits -7.89493
396
+ ce -7.89643
397
+ ▁mountain -7.89712
398
+ ▁carry -7.89715
399
+ ▁sh -7.90562
400
+ ▁living -7.9233
401
+ ▁pe -7.92486
402
+ ▁court -7.92862
403
+ ▁all -7.92962
404
+ ▁signs -7.93469
405
+ ▁surf -7.93646
406
+ ▁basket -7.93934
407
+ ol -7.94429
408
+ ▁silver -7.94475
409
+ ▁drink -7.94482
410
+ ct -7.94505
411
+ ▁door -7.94597
412
+ ▁animals -7.95318
413
+ ▁ma -7.95935
414
+ ▁cars -7.96219
415
+ ▁hair -7.97801
416
+ ▁pull -7.9781
417
+ ▁pan -7.98197
418
+ ▁dogs -7.98348
419
+ ▁row -7.99477
420
+ ▁h -7.99801
421
+ ▁across -8.00047
422
+ ▁airport -8.00047
423
+ ▁bread -8.00047
424
+ ▁lean -8.01205
425
+ ▁animal -8.01461
426
+ ▁plastic -8.01781
427
+ ▁who -8.01782
428
+ te -8.02911
429
+ ▁horses -8.03068
430
+ ▁trick -8.03557
431
+ ▁couch -8.03559
432
+ ▁no -8.03695
433
+ ▁com -8.03871
434
+ ▁dish -8.04053
435
+ z -8.04088
436
+ ▁tracks -8.04841
437
+ ▁mo -8.05569
438
+ ▁set -8.05572
439
+ ▁ca -8.05817
440
+ ▁screen -8.05949
441
+ ▁slice -8.05954
442
+ ▁donut -8.06354
443
+ ▁rock -8.06384
444
+ ▁birds -8.06533
445
+ ▁hands -8.06726
446
+ ▁ra -8.06906
447
+ ▁skate -8.07777
448
+ ▁store -8.0842
449
+ ▁driving -8.09035
450
+ ▁empty -8.09035
451
+ ▁un -8.09227
452
+ lying -8.09266
453
+ ▁pot -8.09951
454
+ up -8.1032
455
+ ▁chocolate -8.10934
456
+ ▁enclosure -8.10934
457
+ ▁something -8.1158
458
+ ▁market -8.12232
459
+ ▁seat -8.14076
460
+ ▁purple -8.14181
461
+ ▁television -8.14181
462
+ ▁using -8.14191
463
+ ▁displayed -8.1427
464
+ ▁snowboarder -8.1446
465
+ ▁house -8.14844
466
+ ▁skier -8.15227
467
+ ▁slope -8.1551
468
+ ▁video -8.1551
469
+ ▁color -8.15566
470
+ ▁hit -8.15587
471
+ ▁controller -8.16183
472
+ ▁container -8.16206
473
+ ▁shown -8.16321
474
+ ▁sp -8.16484
475
+ ies -8.16508
476
+ us -8.1671
477
+ ▁hill -8.16834
478
+ ▁di -8.18087
479
+ ▁multi -8.18222
480
+ uring -8.1826
481
+ ▁case -8.18262
482
+ ▁tooth -8.18916
483
+ ▁outdoor -8.19606
484
+ ▁flower -8.1975
485
+ ▁number -8.20305
486
+ ▁tv -8.20468
487
+ ▁brush -8.20488
488
+ ping -8.21358
489
+ ▁doughnuts -8.21646
490
+ ▁double -8.21719
491
+ ▁hotdog -8.21719
492
+ ▁monitor -8.21719
493
+ ▁salad -8.21719
494
+ ▁lady -8.21722
495
+ ▁pen -8.22209
496
+ ▁attached -8.22433
497
+ is -8.22731
498
+ ▁boats -8.23018
499
+ um -8.23022
500
+ ▁contain -8.23126
501
+ ▁teeth -8.23161
502
+ ▁k -8.23797
503
+ ▁resting -8.23827
504
+ ▁ha -8.24136
505
+ ▁bun -8.24273
506
+ ▁guy -8.24609
507
+ ▁made -8.24831
508
+ ot -8.25082
509
+ ▁de -8.25114
510
+ ▁ocean -8.25342
511
+ ▁tower -8.25351
512
+ ew -8.25769
513
+ ▁meal -8.26085
514
+ ▁corner -8.26138
515
+ ▁passenger -8.26829
516
+ ▁racquet -8.27581
517
+ ▁crowd -8.27749
518
+ un -8.28344
519
+ ▁umbrellas -8.28864
520
+ ▁backpack -8.29102
521
+ ▁high -8.29871
522
+ ▁toy -8.30503
523
+ ▁surround -8.30647
524
+ ▁zoo -8.31428
525
+ ▁brick -8.3144
526
+ ▁lights -8.31599
527
+ colored -8.32935
528
+ ▁restaurant -8.33009
529
+ ▁cellphone -8.33042
530
+ ng -8.3362
531
+ ▁pieces -8.33696
532
+ led -8.33796
533
+ ▁fresh -8.33809
534
+ ▁photograph -8.33809
535
+ ▁shelf -8.33809
536
+ ▁surface -8.33809
537
+ ween -8.33811
538
+ ▁jacket -8.34615
539
+ ▁sauce -8.34615
540
+ ▁wave -8.34616
541
+ ▁ride -8.35186
542
+ ▁statue -8.35428
543
+ ▁adult -8.35428
544
+ ▁waiting -8.3586
545
+ ▁watching -8.35954
546
+ ▁plant -8.36065
547
+ ▁branch -8.36248
548
+ ▁jet -8.36291
549
+ ▁post -8.36497
550
+ ▁painted -8.3655
551
+ ▁books -8.36582
552
+ ie -8.36742
553
+ ▁past -8.37075
554
+ ▁polar -8.37096
555
+ ▁play -8.37126
556
+ ▁so -8.37503
557
+ ▁track -8.37767
558
+ ▁runway -8.38762
559
+ side -8.38829
560
+ ▁glove -8.39596
561
+ ▁clear -8.39596
562
+ ▁time -8.39597
563
+ ▁toothbrushes -8.39938
564
+ ▁showing -8.40062
565
+ ▁chicken -8.4045
566
+ ▁helmet -8.4045
567
+ ▁dark -8.40451
568
+ ▁children -8.40452
569
+ ▁rice -8.40464
570
+ ▁sleeping -8.4071
571
+ ▁arm -8.40782
572
+ ▁bet -8.40891
573
+ ▁skiing -8.41273
574
+ ▁electronic -8.41312
575
+ ▁body -8.41314
576
+ ▁eaten -8.41397
577
+ ation -8.41581
578
+ top -8.41755
579
+ ▁dressed -8.4181
580
+ ▁tile -8.42344
581
+ ▁going -8.42404
582
+ ▁prepar -8.43043
583
+ co -8.43223
584
+ ple -8.43542
585
+ if -8.43911
586
+ ▁go -8.43936
587
+ ▁talking -8.45039
588
+ ling -8.45079
589
+ ive -8.45333
590
+ ▁or -8.46099
591
+ ▁sliced -8.46327
592
+ ▁bright -8.46647
593
+ ▁grey -8.46648
594
+ ▁doughnut -8.46741
595
+ ▁dock -8.46803
596
+ ▁lo -8.47278
597
+ ▁wire -8.47628
598
+ ▁rail -8.47653
599
+ ▁eat -8.47835
600
+ ▁sun -8.47862
601
+ ▁types -8.47913
602
+ ig -8.48178
603
+ ▁ex -8.48495
604
+ ▁about -8.48501
605
+ ▁gray -8.4893
606
+ ▁left -8.49425
607
+ ▁fries -8.49426
608
+ ▁strip -8.49429
609
+ ▁toaster -8.49558
610
+ age -8.49645
611
+ ▁includ -8.50229
612
+ ▁neck -8.5041
613
+ ▁carrot -8.5057
614
+ ▁star -8.50571
615
+ ▁intersection -8.51321
616
+ ▁ne -8.51381
617
+ mp -8.51572
618
+ ▁toward -8.52283
619
+ ▁rack -8.523
620
+ ▁have -8.52335
621
+ ti -8.52431
622
+ ▁drinking -8.53239
623
+ ▁device -8.54233
624
+ ▁soup -8.5427
625
+ ▁beer -8.54704
626
+ ▁brushing -8.54769
627
+ ▁tub -8.55224
628
+ ▁skateboarder -8.5601
629
+ ▁blanket -8.56224
630
+ ▁shower -8.56331
631
+ ▁ho -8.56386
632
+ ▁rain -8.56402
633
+ ▁leaves -8.57234
634
+ ▁trunk -8.57234
635
+ ▁variet -8.57362
636
+ ▁walks -8.57371
637
+ ▁stra -8.57436
638
+ ▁dress -8.57668
639
+ ▁bags -8.57846
640
+ ▁rocks -8.5855
641
+ ▁male -8.58767
642
+ ▁potatoes -8.59314
643
+ ▁place -8.59391
644
+ ▁take -8.597
645
+ ▁cover -8.59788
646
+ ▁gra -8.59976
647
+ ▁swing -8.60294
648
+ x -8.60304
649
+ ▁toppings -8.60342
650
+ ▁night -8.60375
651
+ ight -8.60386
652
+ ian -8.60506
653
+ ▁meters -8.60745
654
+ placed -8.61271
655
+ ▁cross -8.6135
656
+ ▁home -8.61381
657
+ ment -8.61483
658
+ ▁cart -8.61607
659
+ ▁signal -8.61711
660
+ ▁bo -8.6237
661
+ ▁shows -8.62811
662
+ ▁kid -8.63083
663
+ ▁vehicle -8.63518
664
+ ▁shop -8.63771
665
+ ▁batter -8.63776
666
+ ▁office -8.64358
667
+ ▁decker -8.64688
668
+ ▁closeup -8.64917
669
+ ▁sa -8.6532
670
+ ▁she -8.6543
671
+ ▁older -8.65575
672
+ ten -8.6566
673
+ ▁dessert -8.65704
674
+ ▁graffiti -8.65704
675
+ ▁buildings -8.66063
676
+ ▁decorated -8.66817
677
+ ▁traveling -8.67289
678
+ ▁uniform -8.67939
679
+ ▁motorcycles -8.68378
680
+ ▁not -8.68576
681
+ ke -8.68938
682
+ ca -8.68955
683
+ ish -8.69322
684
+ ▁bottles -8.69376
685
+ ▁stacked -8.6945
686
+ ▁hug -8.70225
687
+ ▁shaped -8.70448
688
+ ir -8.70499
689
+ ake -8.70533
690
+ ▁work -8.71475
691
+ ▁ru -8.71526
692
+ ate -8.71737
693
+ ▁rest -8.71757
694
+ im -8.72285
695
+ ▁se -8.72481
696
+ ▁motor -8.72565
697
+ ▁roll -8.72566
698
+ able -8.72568
699
+ ▁lit -8.72596
700
+ ▁river -8.72599
701
+ ▁egg -8.73755
702
+ op -8.74301
703
+ ta -8.74331
704
+ ▁cabinets -8.74989
705
+ ▁shot -8.75016
706
+ ▁setting -8.75117
707
+ ▁dry -8.75973
708
+ ▁cloud -8.76179
709
+ ▁assortment -8.76196
710
+ ver -8.76214
711
+ ▁slices -8.76601
712
+ ▁cooking -8.77002
713
+ ▁served -8.77589
714
+ ▁onions -8.77609
715
+ ▁way -8.78289
716
+ qu -8.78524
717
+ ▁cute -8.78994
718
+ ▁ice -8.7915
719
+ ▁we -8.79685
720
+ ▁cloth -8.7979
721
+ ▁bu -8.79794
722
+ ▁appliances -8.79929
723
+ lush -8.79929
724
+ ▁giant -8.79929
725
+ ▁underneath -8.79957
726
+ ▁pu -8.80217
727
+ ▁suitcases -8.80993
728
+ ▁vegetable -8.81141
729
+ ▁assort -8.81193
730
+ ▁machine -8.81211
731
+ ▁gear -8.81213
732
+ ▁forest -8.81221
733
+ ▁pack -8.813
734
+ ard -8.81466
735
+ ▁mak -8.81679
736
+ ▁jumping -8.82059
737
+ ▁bikes -8.82337
738
+ ▁electric -8.8251
739
+ ▁purse -8.8251
740
+ ▁reading -8.8251
741
+ ▁smart -8.8251
742
+ ▁mitt -8.82511
743
+ ▁moving -8.82512
744
+ ▁swinging -8.8256
745
+ ▁fish -8.82654
746
+ ▁seen -8.82973
747
+ ▁edge -8.83827
748
+ ▁feet -8.83828
749
+ unny -8.84257
750
+ ▁surfer -8.84536
751
+ ▁part -8.84568
752
+ ha -8.85162
753
+ ▁tea -8.85311
754
+ ▁ta -8.85313
755
+ ul -8.85659
756
+ ▁pi -8.8584
757
+ ▁con -8.86021
758
+ ▁see -8.86344
759
+ ▁chairs -8.86605
760
+ tra -8.86699
761
+ light -8.86717
762
+ ant -8.87635
763
+ ical -8.87771
764
+ ▁fridge -8.8788
765
+ ▁perched -8.8788
766
+ ▁running -8.87883
767
+ ▁yard -8.87885
768
+ ▁din -8.87895
769
+ ▁pro -8.88325
770
+ ▁cooked -8.88379
771
+ ▁but -8.88445
772
+ ▁cap -8.88554
773
+ ▁end -8.8862
774
+ ▁hay -8.88776
775
+ ▁mount -8.89269
776
+ ▁female -8.89269
777
+ ▁mustard -8.89269
778
+ ▁curb -8.89269
779
+ ▁turn -8.8927
780
+ ▁scene -8.89274
781
+ ▁him -8.89311
782
+ ▁ramp -8.89321
783
+ ▁log -8.89489
784
+ ▁wi -8.90234
785
+ ▁pie -8.90312
786
+ ▁object -8.90678
787
+ ▁draw -8.90678
788
+ ▁drive -8.90678
789
+ ▁cattle -8.90688
790
+ ▁cla -8.91037
791
+ lic -8.91151
792
+ ▁sale -8.91173
793
+ ead -8.91193
794
+ ▁wheel -8.92106
795
+ ▁towel -8.92112
796
+ ▁point -8.92116
797
+ ▁tomatoes -8.92201
798
+ ▁size -8.92222
799
+ ▁holder -8.92301
800
+ fri -8.92629
801
+ ▁bath -8.92676
802
+ tage -8.92972
803
+ ▁poses -8.93077
804
+ ▁shoes -8.93101
805
+ per -8.93464
806
+ ▁buses -8.93471
807
+ ▁beautiful -8.93555
808
+ ▁mother -8.93556
809
+ ▁among -8.93556
810
+ ▁gold -8.93557
811
+ ▁hang -8.93983
812
+ intend -8.9417
813
+ ▁pasture -8.95027
814
+ ▁cage -8.9514
815
+ ▁plants -8.95357
816
+ ▁bridge -8.96519
817
+ ▁clean -8.96519
818
+ ▁frost -8.9652
819
+ ▁leg -8.96545
820
+ ▁smiles -8.96762
821
+ ff -8.96829
822
+ ▁sc -8.96887
823
+ ▁pick -8.97804
824
+ ▁putt -8.97847
825
+ ▁sausage -8.98034
826
+ ▁right -8.98037
827
+ ▁stone -8.98045
828
+ ▁candle -8.98051
829
+ ▁wrappe -8.98052
830
+ ▁single -8.98062
831
+ ▁crossing -8.98077
832
+ ▁goat -8.98108
833
+ ▁lap -8.98755
834
+ ▁arranged -8.99572
835
+ ▁writ -8.99573
836
+ ▁blow -8.99574
837
+ ▁van -8.99579
838
+ ▁desktop -8.99754
839
+ ▁war -8.99867
840
+ board -9.00012
841
+ ▁handle -9.00077
842
+ ated -9.00415
843
+ ▁cream -9.01135
844
+ ▁equipment -9.01135
845
+ ▁mug -9.01136
846
+ ▁beans -9.01151
847
+ ▁pose -9.01615
848
+ ▁kinds -9.01994
849
+ ▁paint -9.02154
850
+ ▁below -9.02722
851
+ ▁pitch -9.02722
852
+ ▁fly -9.04102
853
+ ▁cook -9.04289
854
+ ▁toothpaste -9.04335
855
+ ▁perform -9.04335
856
+ ▁held -9.04337
857
+ eat -9.04504
858
+ ▁fenced -9.0479
859
+ ▁kids -9.05097
860
+ ▁peppers -9.05207
861
+ ▁make -9.05829
862
+ ▁broken -9.05974
863
+ ▁check -9.05974
864
+ ▁glazed -9.05974
865
+ ▁grill -9.05974
866
+ ▁ketchup -9.05974
867
+ ▁police -9.05975
868
+ ▁dinner -9.05975
869
+ ▁pre -9.05987
870
+ ▁float -9.05998
871
+ lar -9.06452
872
+ ▁jump -9.06548
873
+ ▁coat -9.06712
874
+ ▁flip -9.07263
875
+ ▁eyes -9.07455
876
+ ▁design -9.07641
877
+ ▁platform -9.07641
878
+ ▁trash -9.07641
879
+ ▁veggie -9.07641
880
+ ▁short -9.07642
881
+ ▁foot -9.07642
882
+ ▁gather -9.07647
883
+ ▁flock -9.0766
884
+ less -9.07765
885
+ ▁watch -9.08105
886
+ ▁use -9.09076
887
+ ▁birthday -9.09336
888
+ ▁pretty -9.09336
889
+ ▁reach -9.09337
890
+ ▁nice -9.09337
891
+ ▁soda -9.09338
892
+ ▁five -9.09576
893
+ ▁kind -9.10127
894
+ ial -9.10589
895
+ ▁public -9.1106
896
+ ▁round -9.1106
897
+ having -9.11272
898
+ ▁alone -9.12815
899
+ ▁bushes -9.12816
900
+ ▁lie -9.13024
901
+ ▁taken -9.13789
902
+ ▁stack -9.14013
903
+ just -9.14601
904
+ ▁stopped -9.14618
905
+ ▁lake -9.14671
906
+ ▁new -9.14915
907
+ ▁sea -9.15017
908
+ out -9.15305
909
+ ▁pet -9.15388
910
+ eep -9.16366
911
+ ▁produce -9.16723
912
+ ▁used -9.16863
913
+ tic -9.17655
914
+ ▁fac -9.18216
915
+ ▁engine -9.1827
916
+ ▁tarmac -9.1827
917
+ ▁garden -9.1827
918
+ ▁wild -9.18272
919
+ ▁vin -9.18277
920
+ tuck -9.18447
921
+ tro -9.18453
922
+ ▁mid -9.18527
923
+ ▁dishes -9.18707
924
+ ▁toast -9.1992
925
+ ▁flat -9.20159
926
+ ▁try -9.20164
927
+ ▁both -9.20226
928
+ ▁jar -9.20273
929
+ own -9.20329
930
+ ▁hole -9.20671
931
+ ▁wet -9.20733
932
+ ▁feeding -9.20948
933
+ ▁type -9.2135
934
+ j -9.21763
935
+ ▁french -9.2208
936
+ ▁beverage -9.2208
937
+ ▁shore -9.22082
938
+ ▁grow -9.22087
939
+ ▁fall -9.22102
940
+ ▁put -9.22688
941
+ ▁show -9.23742
942
+ ▁supplies -9.24041
943
+ ▁farm -9.24043
944
+ ▁pool -9.24189
945
+ ▁stoplight -9.24247
946
+ ▁working -9.2589
947
+ ▁mix -9.2604
948
+ ▁mushroom -9.26041
949
+ ▁chain -9.26041
950
+ ▁throw -9.26041
951
+ loaded -9.26042
952
+ ▁good -9.26043
953
+ ▁steel -9.26043
954
+ ▁prepare -9.26078
955
+ ▁brushes -9.2644
956
+ ▁wait -9.27001
957
+ ▁lin -9.27581
958
+ ▁shape -9.27684
959
+ ▁lunch -9.28082
960
+ ▁modern -9.28082
961
+ ▁square -9.28082
962
+ ▁smo -9.28088
963
+ ▁ear -9.28125
964
+ ▁wash -9.28147
965
+ ▁boxes -9.28756
966
+ phone -9.29231
967
+ ▁pad -9.29713
968
+ ock -9.29772
969
+ ▁tomato -9.30026
970
+ ▁enjoy -9.30165
971
+ ▁shoulder -9.30165
972
+ ▁lemon -9.30165
973
+ ▁pastries -9.30165
974
+ ▁milk -9.30165
975
+ ▁match -9.30166
976
+ ▁lamb -9.30166
977
+ ▁chew -9.30167
978
+ ▁rose -9.30169
979
+ ▁style -9.30179
980
+ ▁well -9.30274
981
+ ▁appear -9.32293
982
+ ▁breakfast -9.32293
983
+ ▁desert -9.32293
984
+ ▁foreground -9.32293
985
+ ▁napkin -9.32293
986
+ ▁platter -9.32293
987
+ ▁strawberries -9.32293
988
+ ▁sunglasses -9.32293
989
+ ▁blender -9.32293
990
+ ▁leaf -9.32293
991
+ ▁chopped -9.32293
992
+ ▁shade -9.32294
993
+ ▁lawn -9.32294
994
+ ▁ripe -9.32297
995
+ ▁sail -9.32409
996
+ ▁doll -9.32734
997
+ ▁gate -9.33599
998
+ ▁crowded -9.3398
999
+ ▁pedestrian -9.34466
1000
+ ▁baked -9.34467
1001
+ ▁decoration -9.34474
1002
+ ▁mess -9.34492
1003
+ ▁pasta -9.3516
1004
+ line -9.3603
1005
+ ▁bottom -9.36689
1006
+ ▁christmas -9.36689
1007
+ ▁country -9.36689
1008
+ ▁decorative -9.36689
1009
+ ▁kneel -9.36689
1010
+ ▁scooter -9.36689
1011
+ ▁sculpture -9.36689
1012
+ ▁sprinkles -9.36689
1013
+ ▁chips -9.36694
1014
+ ▁things -9.36697
1015
+ ▁catcher -9.36718
1016
+ ▁butter -9.36763
1017
+ ▁sandwiches -9.37834
1018
+ ▁potte -9.3786
1019
+ uch -9.37978
1020
+ ball -9.38093
1021
+ ▁smile -9.38591
1022
+ ▁serving -9.38961
1023
+ ▁horn -9.38965
1024
+ ▁says -9.3899
1025
+ ▁cub -9.39066
1026
+ ▁includes -9.393
1027
+ ▁dryer -9.39362
1028
+ ▁skies -9.39838
1029
+ ▁was -9.41263
1030
+ ▁bathtub -9.41287
1031
+ ▁concrete -9.41287
1032
+ ▁distance -9.41287
1033
+ where -9.41288
1034
+ ▁balloon -9.41289
1035
+ ▁nearby -9.41308
1036
+ ▁spot -9.41374
1037
+ ▁lamp -9.41404
1038
+ ▁tri -9.41457
1039
+ ▁path -9.41472
1040
+ ▁chi -9.42199
1041
+ ▁par -9.42369
1042
+ ▁ri -9.42577
1043
+ air -9.43144
1044
+ ▁nose -9.43838
1045
+ ular -9.44005
1046
+ ▁em -9.44072
1047
+ ▁run -9.44797
1048
+ ▁gl -9.45232
1049
+ ▁bacon -9.46107
1050
+ ▁bedroom -9.46107
1051
+ ▁carriage -9.46107
1052
+ ▁kitten -9.46107
1053
+ ▁stainless -9.46107
1054
+ ▁reads -9.46107
1055
+ ▁graze -9.46125
1056
+ ▁carrie -9.46131
1057
+ ▁bull -9.46233
1058
+ ▁race -9.46325
1059
+ ▁clothes -9.46378
1060
+ ▁low -9.46384
1061
+ ▁rider -9.46863
1062
+ ▁bite -9.4693
1063
+ ▁juice -9.48607
1064
+ ▁lettuce -9.48607
1065
+ ▁partially -9.48607
1066
+ ▁position -9.48607
1067
+ ▁swimming -9.48607
1068
+ ▁carpet -9.48608
1069
+ ▁sort -9.48618
1070
+ ▁plain -9.48756
1071
+ ▁paw -9.49126
1072
+ ▁travel -9.50079
1073
+ ▁cabinet -9.51108
1074
+ ▁blurr -9.51171
1075
+ ▁fashion -9.51171
1076
+ ▁pigeons -9.51171
1077
+ ▁what -9.51171
1078
+ ▁cement -9.51173
1079
+ ▁word -9.51173
1080
+ ▁same -9.51174
1081
+ ▁reflection -9.51199
1082
+ ▁after -9.51234
1083
+ ▁tiny -9.51597
1084
+ ▁pin -9.51823
1085
+ ▁ship -9.51836
1086
+ ▁feed -9.52763
1087
+ ▁arrangement -9.53803
1088
+ ▁bucket -9.53803
1089
+ ▁climb -9.53803
1090
+ ▁collecti -9.53803
1091
+ ▁shadow -9.53803
1092
+ ▁lift -9.53803
1093
+ ▁parade -9.53803
1094
+ ▁center -9.53804
1095
+ ▁flag -9.53804
1096
+ berry -9.53804
1097
+ ▁lead -9.53805
1098
+ ▁pears -9.53813
1099
+ ▁sheet -9.5393
1100
+ ▁tape -9.54224
1101
+ ▁pickle -9.5429
1102
+ ▁giv -9.54341
1103
+ ▁bri -9.55714
1104
+ ▁you -9.56488
1105
+ ▁cluttered -9.56505
1106
+ ▁family -9.56505
1107
+ ▁military -9.56505
1108
+ ▁pavement -9.56505
1109
+ ▁picnic -9.56505
1110
+ ▁soccer -9.56505
1111
+ ▁peanut -9.56505
1112
+ ▁space -9.56506
1113
+ ▁pastry -9.56509
1114
+ ▁lone -9.56512
1115
+ ▁finger -9.56707
1116
+ ▁watches -9.56732
1117
+ most -9.56742
1118
+ bow -9.57068
1119
+ ▁officer -9.57132
1120
+ ful -9.57246
1121
+ ▁serve -9.58911
1122
+ ▁mark -9.59236
1123
+ ▁deck -9.59241
1124
+ ▁curtain -9.59283
1125
+ ▁himself -9.59283
1126
+ ▁ledge -9.59283
1127
+ ▁railroad -9.59283
1128
+ ���duck -9.59284
1129
+ ▁base -9.59284
1130
+ ▁model -9.59285
1131
+ rcial -9.59286
1132
+ ▁comme -9.59289
1133
+ arrow -9.59302
1134
+ ▁hillside -9.59378
1135
+ ▁tools -9.5943
1136
+ ▁flor -9.59538
1137
+ ci -9.6108
1138
+ made -9.6127
1139
+ ddler -9.61507
1140
+ fore -9.61889
1141
+ ▁puppy -9.6214
1142
+ ▁school -9.6214
1143
+ ▁propeller -9.6214
1144
+ ▁cupcake -9.6214
1145
+ ▁built -9.62141
1146
+ ▁mini -9.62144
1147
+ ▁step -9.62151
1148
+ ▁string -9.6216
1149
+ ▁panda -9.62177
1150
+ ▁port -9.62248
1151
+ ▁pipe -9.62495
1152
+ ▁qui -9.62497
1153
+ ▁spi -9.64844
1154
+ ▁event -9.65082
1155
+ ▁block -9.65082
1156
+ ▁spread -9.65082
1157
+ ▁winter -9.65082
1158
+ ▁flies -9.65083
1159
+ ▁still -9.65083
1160
+ ▁sport -9.65085
1161
+ ▁fried -9.65092
1162
+ ▁direction -9.65107
1163
+ cal -9.65183
1164
+ ▁landing -9.65229
1165
+ ▁trailer -9.65376
1166
+ ▁eye -9.65481
1167
+ ▁bit -9.6791
1168
+ ▁sub -9.68077
1169
+ ▁balanc -9.68112
1170
+ ▁pattern -9.68112
1171
+ ▁professional -9.68112
1172
+ ▁consist -9.68112
1173
+ ▁grapes -9.68113
1174
+ ▁spray -9.68113
1175
+ ▁antique -9.68113
1176
+ ough -9.68114
1177
+ ▁stall -9.68115
1178
+ ▁package -9.68161
1179
+ ▁corn -9.68243
1180
+ ▁town -9.6827
1181
+ ▁tag -9.68465
1182
+ ▁tin -9.68496
1183
+ ▁figure -9.71237
1184
+ ▁furniture -9.71237
1185
+ ▁notebook -9.71237
1186
+ ▁lime -9.71237
1187
+ ▁parrot -9.71237
1188
+ ▁sofa -9.71237
1189
+ ▁outfit -9.71238
1190
+ ▁power -9.71239
1191
+ ▁disc -9.71248
1192
+ ▁fry -9.71262
1193
+ ▁wide -9.71277
1194
+ ▁chili -9.71294
1195
+ ▁hard -9.71328
1196
+ ▁features -9.71373
1197
+ ▁tail -9.71384
1198
+ ▁featur -9.74332
1199
+ ▁contents -9.74463
1200
+ ▁delicious -9.74463
1201
+ ▁expired -9.74463
1202
+ ▁guitar -9.74463
1203
+ ▁leash -9.74463
1204
+ ▁snack -9.74463
1205
+ ▁steak -9.74463
1206
+ ▁name -9.74463
1207
+ ▁push -9.74468
1208
+ ▁touching -9.74479
1209
+ ▁subway -9.74504
1210
+ ▁stir -9.74607
1211
+ ▁wear -9.76934
1212
+ ▁trail -9.77464
1213
+ neath -9.7773
1214
+ ▁berries -9.77796
1215
+ ▁ceramic -9.77796
1216
+ ▁condiments -9.77796
1217
+ ▁fabric -9.77796
1218
+ ▁fancy -9.77796
1219
+ ▁blond -9.77796
1220
+ ▁stairs -9.77798
1221
+ ▁pants -9.77835
1222
+ uzz -9.77851
1223
+ ▁tee -9.79758
1224
+ ▁craft -9.81244
1225
+ ▁visible -9.81244
1226
+ ▁chop -9.81246
1227
+ ▁rope -9.81249
1228
+ ▁beef -9.81252
1229
+ ▁key -9.81259
1230
+ ▁rais -9.8143
1231
+ ▁sleep -9.83725
1232
+ bby -9.84671
1233
+ ▁bouquet -9.84816
1234
+ ▁museum -9.84816
1235
+ ▁restroom -9.84816
1236
+ ▁shelves -9.84816
1237
+ ▁advertisement -9.84816
1238
+ ▁flown -9.84816
1239
+ ▁tank -9.84818
1240
+ ▁vest -9.84824
1241
+ ature -9.84836
1242
+ soft -9.84883
1243
+ ▁icing -9.84891
1244
+ ▁flo -9.88084
1245
+ band -9.8828
1246
+ ▁reflect -9.88479
1247
+ ▁amount -9.88519
1248
+ ▁owl -9.88519
1249
+ ▁steam -9.88519
1250
+ ▁tongue -9.88519
1251
+ ▁business -9.88519
1252
+ ▁costume -9.88519
1253
+ ▁heart -9.88521
1254
+ ▁calf -9.88521
1255
+ ▁worn -9.88522
1256
+ ▁sill -9.88598
1257
+ ▁propp -9.91136
1258
+ ized -9.92239
1259
+ how -9.92279
1260
+ ▁happy -9.92365
1261
+ ▁harbor -9.92365
1262
+ ▁pillow -9.92365
1263
+ ▁roof -9.92365
1264
+ ▁sugar -9.92365
1265
+ ▁airliner -9.92366
1266
+ ▁ornate -9.92366
1267
+ ▁indoor -9.92366
1268
+ ▁frame -9.92366
1269
+ ▁itself -9.92367
1270
+ ▁residen -9.92367
1271
+ ▁rusted -9.92368
1272
+ ▁selling -9.92368
1273
+ ▁ja -9.96136
1274
+ ▁figurine -9.96365
1275
+ ▁freezer -9.96365
1276
+ ▁garbage -9.96365
1277
+ ▁goggles -9.96365
1278
+ ▁waffle -9.96365
1279
+ ▁overhead -9.96366
1280
+ ▁section -9.96366
1281
+ ▁patio -9.96366
1282
+ ▁tasty -9.96367
1283
+ ▁frog -9.96368
1284
+ ▁mud -9.96372
1285
+ ▁belt -9.96374
1286
+ ▁fast -9.96384
1287
+ ▁curl -9.96391
1288
+ ▁item -9.97951
1289
+ shirt -9.99666
1290
+ ▁celery -10.0053
1291
+ ▁faucet -10.0053
1292
+ ▁kept -10.0053
1293
+ ▁leather -10.0053
1294
+ ▁structure -10.0053
1295
+ ▁loading -10.0053
1296
+ ▁relax -10.0053
1297
+ ▁scatter -10.0053
1298
+ ▁numer -10.0053
1299
+ ▁six -10.0053
1300
+ ▁asleep -10.0053
1301
+ ixture -10.0053
1302
+ ▁iron -10.0053
1303
+ ▁hood -10.0054
1304
+ ▁owner -10.0055
1305
+ ▁also -10.0056
1306
+ rban -10.0135
1307
+ ▁produc -10.0415
1308
+ thered -10.0476
1309
+ ▁accessories -10.0488
1310
+ ▁bakery -10.0488
1311
+ ▁cereal -10.0488
1312
+ ▁champagne -10.0488
1313
+ ▁commuter -10.0488
1314
+ ▁individual -10.0488
1315
+ ▁practic -10.0488
1316
+ ▁sweater -10.0488
1317
+ ▁ceiling -10.0488
1318
+ ▁text -10.0488
1319
+ ▁neatly -10.0488
1320
+ ▁rusty -10.0488
1321
+ ▁headphones -10.0488
1322
+ ▁cord -10.0489
1323
+ ▁lid -10.0492
1324
+ ▁je -10.0523
1325
+ ▁direct -10.0939
1326
+ ▁circle -10.0943
1327
+ ▁crochet -10.0943
1328
+ ▁magazine -10.0943
1329
+ ▁marble -10.0943
1330
+ ▁marina -10.0943
1331
+ ▁measur -10.0943
1332
+ ▁monkey -10.0943
1333
+ ▁roman -10.0943
1334
+ ▁urinal -10.0943
1335
+ ▁garage -10.0943
1336
+ ▁speak -10.0943
1337
+ ▁shoe -10.109
1338
+ form -10.1219
1339
+ ▁talk -10.131
1340
+ ▁potato -10.1405
1341
+ ▁advertis -10.1419
1342
+ ▁approach -10.1419
1343
+ ▁burger -10.1419
1344
+ ▁character -10.1419
1345
+ ▁depict -10.1419
1346
+ ▁jockey -10.1419
1347
+ ▁kiwi -10.1419
1348
+ ▁stream -10.1419
1349
+ ▁terminal -10.1419
1350
+ ▁attempt -10.1419
1351
+ ▁jetliner -10.1419
1352
+ ▁vendor -10.1419
1353
+ ▁stunt -10.1419
1354
+ ▁collar -10.1419
1355
+ foam -10.1419
1356
+ ▁palm -10.1419
1357
+ ▁necktie -10.1419
1358
+ ▁indicat -10.1419
1359
+ ▁foil -10.1419
1360
+ ▁than -10.1419
1361
+ ▁burn -10.1419
1362
+ ▁help -10.1421
1363
+ ▁tire -10.1427
1364
+ ▁jo -10.1432
1365
+ ▁pepper -10.1649
1366
+ ▁shak -10.1781
1367
+ ▁laughing -10.1919
1368
+ ▁ribbon -10.1919
1369
+ ▁shrimp -10.1919
1370
+ ▁sniff -10.1919
1371
+ ▁custom -10.1919
1372
+ ▁pepperoni -10.1919
1373
+ ▁missing -10.1919
1374
+ ▁rubb -10.1919
1375
+ ▁wool -10.1919
1376
+ ▁cartoon -10.1919
1377
+ ique -10.1919
1378
+ ▁slid -10.1919
1379
+ ▁needle -10.192
1380
+ ▁canoe -10.1922
1381
+ ▁paddle -10.2032
1382
+ ▁bikini -10.2445
1383
+ ▁connect -10.2445
1384
+ ▁focus -10.2445
1385
+ ▁furry -10.2445
1386
+ ▁garnish -10.2445
1387
+ ▁grizzl -10.2445
1388
+ ▁horseback -10.2445
1389
+ ▁jersey -10.2445
1390
+ ▁liquid -10.2445
1391
+ ▁motorbike -10.2445
1392
+ ▁newspaper -10.2445
1393
+ ▁opposite -10.2445
1394
+ ▁ostrich -10.2445
1395
+ ▁powder -10.2445
1396
+ ▁selection -10.2445
1397
+ ▁silverware -10.2445
1398
+ ▁america -10.2445
1399
+ ▁money -10.2445
1400
+ ▁process -10.2445
1401
+ ▁pocket -10.2445
1402
+ ▁relish -10.2445
1403
+ ▁jelly -10.2445
1404
+ ▁odd -10.2445
1405
+ ▁santa -10.2445
1406
+ ▁fighter -10.2445
1407
+ ▁patch -10.2445
1408
+ ▁typing -10.2445
1409
+ ▁cold -10.2446
1410
+ ▁tasting -10.2446
1411
+ lumin -10.2456
1412
+ ador -10.2834
1413
+ ▁aircraft -10.3001
1414
+ ▁bookshelf -10.3001
1415
+ ▁cigarette -10.3001
1416
+ ▁digital -10.3001
1417
+ ▁exhibit -10.3001
1418
+ ▁interesting -10.3001
1419
+ ▁meadow -10.3001
1420
+ ▁muffin -10.3001
1421
+ ▁natural -10.3001
1422
+ ▁organiz -10.3001
1423
+ ▁overlook -10.3001
1424
+ ▁sweet -10.3001
1425
+ plug -10.3001
1426
+ ▁baking -10.3001
1427
+ ▁celebrat -10.3001
1428
+ ▁remov -10.3001
1429
+ ▁wedding -10.3001
1430
+ ▁dozen -10.3001
1431
+ ▁forward -10.3001
1432
+ ▁jeans -10.3001
1433
+ ▁dust -10.3002
1434
+ guard -10.3004
1435
+ ▁beak -10.3005
1436
+ ield -10.3036
1437
+ book -10.3502
1438
+ ▁avocado -10.3589
1439
+ ▁construction -10.3589
1440
+ ▁grapefruit -10.3589
1441
+ ▁ingredients -10.3589
1442
+ ▁instruction -10.3589
1443
+ ▁ipod -10.3589
1444
+ ▁knives -10.3589
1445
+ ▁learning -10.3589
1446
+ ▁liquor -10.3589
1447
+ ▁ornament -10.3589
1448
+ ▁pencils -10.3589
1449
+ ▁pineapple -10.3589
1450
+ ▁shallow -10.3589
1451
+ ▁tourist -10.3589
1452
+ ▁transport -10.3589
1453
+ ▁trolley -10.3589
1454
+ ▁tulips -10.3589
1455
+ ▁knitt -10.3589
1456
+ ▁magnet -10.3589
1457
+ ▁clown -10.3589
1458
+ ▁loung -10.3589
1459
+ ▁finish -10.3589
1460
+ ▁first -10.3589
1461
+ ▁operat -10.3589
1462
+ ▁backyard -10.3589
1463
+ ▁semi -10.359
1464
+ ▁medi -10.372
1465
+ ▁scissor -10.3945
1466
+ ▁plai -10.4176
1467
+ ▁calculator -10.4214
1468
+ ▁chrome -10.4214
1469
+ ▁church -10.4214
1470
+ ▁extreme -10.4214
1471
+ ▁fixing -10.4214
1472
+ ▁flavor -10.4214
1473
+ ▁fluffy -10.4214
1474
+ ▁motorcyclist -10.4214
1475
+ ▁nokia -10.4214
1476
+ ▁project -10.4214
1477
+ ▁selfie -10.4214
1478
+ ▁skating -10.4214
1479
+ ▁sneakers -10.4214
1480
+ ▁spinach -10.4214
1481
+ ▁stretch -10.4214
1482
+ ▁transit -10.4214
1483
+ ▁vegetation -10.4214
1484
+ ▁console -10.4214
1485
+ ▁material -10.4214
1486
+ ▁roast -10.4214
1487
+ ▁cheesecake -10.4214
1488
+ ▁crouch -10.4214
1489
+ ▁hung -10.4214
1490
+ ▁taxi -10.4214
1491
+ ▁weather -10.4214
1492
+ ▁swan -10.4214
1493
+ ▁crib -10.4214
1494
+ ▁safe -10.4221
1495
+ ▁decor -10.4877
1496
+ ▁antelope -10.4881
1497
+ ▁bamboo -10.4881
1498
+ ▁blood -10.4881
1499
+ ▁circu -10.4881
1500
+ q -10.8094
src/dataset/sub_tokenizer2000.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c60fef05dfb996f1a074bc5a252a0bd87eba570e346493a778d47dd3ee929f95
3
+ size 271282
src/dataset/sub_tokenizer2000.vocab ADDED
@@ -0,0 +1,2000 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <pad> 0
2
+ <sos> 0
3
+ <eos> 0
4
+ <unk> 0
5
+ ▁a -1.90379
6
+ . -2.84988
7
+ ▁of -3.52043
8
+ ▁on -3.53193
9
+ ▁in -3.7858
10
+ ▁the -3.78741
11
+ s -3.81805
12
+ ▁with -3.85508
13
+ nd -3.87567
14
+ ▁is -4.37004
15
+ ▁to -4.55105
16
+ ing -4.63649
17
+ ▁man -4.76743
18
+ ▁ -4.8183
19
+ ▁sitting -4.86181
20
+ , -4.94566
21
+ ▁an -5.02722
22
+ ▁next -5.11411
23
+ ▁two -5.13883
24
+ ed -5.23179
25
+ ▁white -5.27115
26
+ ▁are -5.29061
27
+ ▁holding -5.3089
28
+ ▁standing -5.31022
29
+ ▁table -5.3802
30
+ ▁it -5.42532
31
+ y -5.47436
32
+ e -5.47748
33
+ ▁at -5.5327
34
+ ▁woman -5.54252
35
+ ▁plate -5.56616
36
+ ▁that -5.58758
37
+ ▁up -5.61664
38
+ ▁top -5.64691
39
+ ▁people -5.67154
40
+ d -5.70035
41
+ ▁some -5.70552
42
+ n -5.71926
43
+ ▁person -5.72054
44
+ ▁black -5.79203
45
+ ▁large -5.79333
46
+ ▁street -5.83316
47
+ ▁red -5.87919
48
+ ▁his -5.93696
49
+ ▁small -5.93728
50
+ ▁dog -5.98374
51
+ ▁near -5.98661
52
+ ▁front -6.02607
53
+ le -6.03631
54
+ ▁sign -6.112
55
+ ▁by -6.15826
56
+ ▁bear -6.16926
57
+ ▁cat -6.16953
58
+ ▁group -6.1793
59
+ ▁field -6.19767
60
+ ▁has -6.20777
61
+ ▁food -6.21735
62
+ ▁blue -6.21834
63
+ ▁green -6.25381
64
+ ▁down -6.26316
65
+ ▁young -6.27466
66
+ ▁tennis -6.29272
67
+ ▁snow -6.29366
68
+ ▁other -6.30134
69
+ ▁close -6.3032
70
+ ▁whi -6.31842
71
+ ▁there -6.32654
72
+ ▁grass -6.33462
73
+ ▁water -6.33544
74
+ ▁side -6.39473
75
+ ▁train -6.42556
76
+ ▁computer -6.42924
77
+ t -6.44999
78
+ a -6.46645
79
+ ▁baseball -6.4953
80
+ ▁parked -6.50776
81
+ ▁walking -6.51064
82
+ ▁board -6.51169
83
+ ▁sits -6.52527
84
+ ▁riding -6.53004
85
+ o -6.53714
86
+ ▁stop -6.54352
87
+ ▁parking -6.5445
88
+ ▁for -6.55358
89
+ ▁kitchen -6.55761
90
+ r -6.58109
91
+ ▁phone -6.58369
92
+ ▁her -6.59094
93
+ ▁wearing -6.60086
94
+ ▁boy -6.60371
95
+ ▁looking -6.60483
96
+ er -6.60786
97
+ ▁clock -6.61485
98
+ ▁bowl -6.62845
99
+ ▁bus -6.64378
100
+ ▁pizza -6.64978
101
+ ▁fire -6.6575
102
+ ▁woode -6.65812
103
+ ' -6.66211
104
+ ▁out -6.6706
105
+ m -6.67177
106
+ ▁couple -6.68249
107
+ p -6.68396
108
+ ▁picture -6.68538
109
+ ▁thre -6.69363
110
+ ▁yellow -6.69364
111
+ ▁bathroom -6.69845
112
+ ▁glass -6.70559
113
+ ▁girl -6.70816
114
+ ▁from -6.73116
115
+ ▁toilet -6.73449
116
+ ▁several -6.73616
117
+ ▁skateboard -6.74251
118
+ ▁vase -6.74527
119
+ ▁brown -6.75471
120
+ ▁building -6.75554
121
+ ▁vegetables -6.76098
122
+ ▁light -6.76845
123
+ ▁hydrant -6.77015
124
+ ▁hand -6.79109
125
+ ▁beach -6.79111
126
+ ▁scissors -6.79703
127
+ ▁sink -6.80533
128
+ ▁room -6.80892
129
+ ▁their -6.80978
130
+ ▁filled -6.81271
131
+ g -6.8148
132
+ ▁over -6.83258
133
+ ▁desk -6.84733
134
+ ▁wine -6.85868
135
+ ▁bat -6.86169
136
+ ▁road -6.87196
137
+ ▁elephant -6.87345
138
+ ▁open -6.8758
139
+ ▁one -6.88385
140
+ ▁pair -6.88936
141
+ ▁meter -6.89336
142
+ ▁be -6.91028
143
+ ▁horse -6.91547
144
+ ▁hot -6.92288
145
+ ▁cake -6.92509
146
+ ▁thi -6.92907
147
+ ▁each -6.93109
148
+ ▁keyboard -6.9311
149
+ ▁wall -6.93925
150
+ ▁sheep -6.94337
151
+ ▁different -6.94954
152
+ ▁frisbee -6.9496
153
+ ▁flying -6.95108
154
+ ▁flowers -6.95386
155
+ b -6.95499
156
+ ▁traffic -6.95785
157
+ ▁orange -6.95925
158
+ ▁laptop -6.96623
159
+ ▁giraffe -6.97434
160
+ ▁eating -6.97688
161
+ ▁bench -6.9801
162
+ ▁counter -6.98527
163
+ ▁teddy -6.98533
164
+ ▁its -6.98622
165
+ ▁f -6.98731
166
+ ▁tree -6.98738
167
+ ▁around -6.98749
168
+ ▁mouse -6.98963
169
+ ▁covered -6.99463
170
+ ▁tie -7.00138
171
+ st -7.00458
172
+ ▁lay -7.00734
173
+ u -7.01767
174
+ ▁broccoli -7.01801
175
+ ▁cell -7.02242
176
+ ▁inside -7.0251
177
+ ▁boat -7.02576
178
+ ▁through -7.02915
179
+ ▁fruit -7.04853
180
+ ▁remote -7.05181
181
+ ▁ball -7.0541
182
+ ▁bunch -7.05562
183
+ ▁motorcycle -7.05802
184
+ ▁area -7.05888
185
+ ▁outside -7.06116
186
+ very -7.06637
187
+ al -7.06708
188
+ ▁zebra -7.06733
189
+ ▁little -7.06797
190
+ ▁microwave -7.07264
191
+ ▁truck -7.07512
192
+ es -7.08244
193
+ ▁city -7.09155
194
+ ▁old -7.09496
195
+ ▁lot -7.09663
196
+ ▁bed -7.11485
197
+ ▁car -7.11886
198
+ ▁umbrella -7.12617
199
+ ▁together -7.12794
200
+ ▁playing -7.12974
201
+ ▁back -7.14791
202
+ ly -7.14982
203
+ ▁sit -7.15914
204
+ ▁under -7.16824
205
+ ▁behind -7.17346
206
+ ▁bananas -7.17365
207
+ ting -7.18579
208
+ ▁men -7.1879
209
+ en -7.18889
210
+ ▁ground -7.18911
211
+ ▁s -7.19022
212
+ ▁background -7.19173
213
+ ▁glasses -7.19856
214
+ ▁bird -7.19888
215
+ ▁bike -7.20221
216
+ l -7.20434
217
+ ▁piece -7.20762
218
+ ▁child -7.21573
219
+ ▁carrots -7.2246
220
+ ▁stuff -7.22932
221
+ ▁head -7.22934
222
+ ▁display -7.22943
223
+ ▁sandwich -7.23111
224
+ ▁air -7.23995
225
+ ▁surfboard -7.24968
226
+ ▁baby -7.25425
227
+ ▁full -7.25989
228
+ ▁stands -7.2628
229
+ ▁hold -7.2628
230
+ ▁into -7.2794
231
+ ▁luggage -7.28271
232
+ ▁being -7.28796
233
+ ▁oven -7.30017
234
+ w -7.3046
235
+ ▁player -7.30562
236
+ ▁beside -7.30918
237
+ ▁photo -7.31199
238
+ ous -7.31987
239
+ ▁sidewalk -7.32395
240
+ ▁shirt -7.33037
241
+ laying -7.33119
242
+ ▁paper -7.33304
243
+ ▁cutt -7.33384
244
+ ▁racket -7.33603
245
+ ▁floor -7.33609
246
+ ▁dirt -7.3391
247
+ ▁knife -7.34523
248
+ ▁trees -7.34536
249
+ ▁snowboard -7.34713
250
+ ▁pink -7.34836
251
+ ▁fence -7.35427
252
+ ▁game -7.36383
253
+ ▁cup -7.36772
254
+ pped -7.37284
255
+ ▁window -7.37327
256
+ i -7.37392
257
+ ▁skis -7.37715
258
+ ▁look -7.38279
259
+ ▁cow -7.3872
260
+ ▁big -7.3926
261
+ ▁pile -7.42014
262
+ ▁view -7.42185
263
+ ▁d -7.42569
264
+ ▁face -7.4275
265
+ ▁oranges -7.42956
266
+ on -7.43198
267
+ ▁kite -7.4337
268
+ ch -7.44104
269
+ ▁zebras -7.45306
270
+ ▁mouth -7.45555
271
+ ▁toothbrush -7.45804
272
+ ▁apples -7.45822
273
+ ▁image -7.46589
274
+ ▁plane -7.47986
275
+ c -7.48139
276
+ ▁kites -7.49201
277
+ ▁stand -7.49869
278
+ ▁sky -7.51201
279
+ ▁cut -7.52986
280
+ ▁meat -7.53038
281
+ h -7.53967
282
+ ▁another -7.54147
283
+ ▁apple -7.54237
284
+ ▁park -7.54538
285
+ ▁refrigerator -7.56415
286
+ ▁do -7.57386
287
+ ���bears -7.57774
288
+ ▁airplane -7.58145
289
+ ▁pole -7.58642
290
+ ▁vari -7.59082
291
+ ▁grassy -7.59426
292
+ k -7.6016
293
+ ▁bicycle -7.6096
294
+ ▁four -7.61515
295
+ ▁them -7.62206
296
+ ▁colorful -7.62325
297
+ f -7.62581
298
+ ▁hat -7.62834
299
+ ▁someone -7.63551
300
+ ▁ski -7.63961
301
+ ▁book -7.64343
302
+ ar -7.64588
303
+ ▁donuts -7.65007
304
+ ▁cows -7.65097
305
+ ▁tall -7.65632
306
+ ▁fork -7.65773
307
+ ▁women -7.66042
308
+ ▁bottle -7.6642
309
+ ▁stove -7.66888
310
+ ▁off -7.66944
311
+ ▁box -7.67222
312
+ ▁banana -7.67282
313
+ ▁control -7.67313
314
+ or -7.67649
315
+ ▁coffee -7.6817
316
+ ▁station -7.68613
317
+ ▁bag -7.6901
318
+ ▁chair -7.69491
319
+ it -7.69954
320
+ ▁wood -7.70204
321
+ ▁mirror -7.70344
322
+ ▁ready -7.70803
323
+ ▁herd -7.71799
324
+ an -7.72101
325
+ ll -7.72672
326
+ ▁camera -7.73017
327
+ ▁suitcase -7.73581
328
+ ▁cheese -7.73945
329
+ ▁hanging -7.74151
330
+ ▁items -7.74619
331
+ ▁tray -7.75301
332
+ ▁line -7.75759
333
+ ▁giraffes -7.75836
334
+ ▁above -7.77636
335
+ ▁elephants -7.78217
336
+ ▁middle -7.79546
337
+ ▁long -7.79554
338
+ ▁wii -7.79565
339
+ v -7.79936
340
+ ▁suit -7.80029
341
+ ▁half -7.81002
342
+ ▁posing -7.81013
343
+ ▁metal -7.81492
344
+ in -7.8172
345
+ ▁grazing -7.81985
346
+ ▁get -7.82
347
+ ▁tak -7.82337
348
+ ▁can -7.82451
349
+ ▁along -7.82967
350
+ ▁sand -7.83648
351
+ ▁t -7.85072
352
+ ▁fruits -7.85213
353
+ ▁day -7.85504
354
+ ▁smiling -7.86016
355
+ ▁like -7.86016
356
+ gain -7.86042
357
+ ▁carry -7.86556
358
+ ▁b -7.8661
359
+ ▁signs -7.87456
360
+ ▁all -7.88738
361
+ ▁living -7.89149
362
+ ▁co -7.89325
363
+ ▁court -7.89681
364
+ ur -7.90155
365
+ ▁surf -7.90671
366
+ ▁basket -7.90753
367
+ ▁cars -7.91145
368
+ ▁silver -7.91294
369
+ ▁animals -7.91676
370
+ ▁dogs -7.91824
371
+ ▁n -7.92221
372
+ ▁m -7.93004
373
+ ▁cr -7.93076
374
+ ▁la -7.93381
375
+ re -7.93666
376
+ ▁he -7.9454
377
+ la -7.94666
378
+ ▁pan -7.94721
379
+ ▁row -7.96295
380
+ il -7.96838
381
+ ▁across -7.96866
382
+ ▁airport -7.96866
383
+ ▁bread -7.96866
384
+ el -7.97626
385
+ et -7.97645
386
+ ▁lean -7.98022
387
+ ers -7.9831
388
+ ▁horses -7.98457
389
+ ▁plastic -7.986
390
+ ▁hair -7.98608
391
+ ▁animal -7.98773
392
+ ck -8.00089
393
+ ▁trick -8.00374
394
+ ▁couch -8.00375
395
+ ▁dish -8.00894
396
+ ▁tracks -8.01307
397
+ ▁walk -8.0142
398
+ ▁hands -8.01734
399
+ ▁spoon -8.01806
400
+ ▁set -8.02221
401
+ ▁birds -8.02343
402
+ to -8.02705
403
+ ▁screen -8.02768
404
+ ve -8.02892
405
+ ▁slice -8.03331
406
+ ▁donut -8.03666
407
+ ▁door -8.04045
408
+ ▁skate -8.04596
409
+ ▁store -8.05233
410
+ ▁g -8.05489
411
+ lying -8.05641
412
+ ▁driving -8.05854
413
+ ▁empty -8.05854
414
+ id -8.06796
415
+ ▁st -8.06843
416
+ ▁chocolate -8.07753
417
+ ▁enclosure -8.07754
418
+ ▁something -8.08397
419
+ ▁pot -8.08501
420
+ ion -8.08685
421
+ ▁market -8.09047
422
+ ol -8.09804
423
+ ▁snowboarder -8.10701
424
+ ▁displayed -8.10975
425
+ ▁purple -8.11
426
+ ▁television -8.11
427
+ ▁using -8.11007
428
+ up -8.11132
429
+ ▁house -8.11663
430
+ te -8.12195
431
+ ▁slope -8.12329
432
+ ▁video -8.12329
433
+ ▁hit -8.12384
434
+ ▁controller -8.13001
435
+ ▁shown -8.13113
436
+ ct -8.13323
437
+ ▁hill -8.13673
438
+ ter -8.13878
439
+ uring -8.14849
440
+ ▁mountain -8.14942
441
+ ▁case -8.15073
442
+ ▁tooth -8.15733
443
+ ce -8.16723
444
+ ▁flower -8.17063
445
+ ▁number -8.17125
446
+ ▁tv -8.1718
447
+ ▁brush -8.17476
448
+ th -8.17897
449
+ ▁doughnuts -8.18083
450
+ ▁boats -8.18276
451
+ ▁who -8.18294
452
+ ▁double -8.18538
453
+ ▁monitor -8.18538
454
+ ▁salad -8.18538
455
+ ▁lady -8.1854
456
+ ▁un -8.19178
457
+ ▁attached -8.19252
458
+ ▁w -8.19366
459
+ ▁teeth -8.19978
460
+ ▁contain -8.20528
461
+ ▁resting -8.20567
462
+ ▁c -8.21269
463
+ ic -8.21423
464
+ ▁guy -8.21427
465
+ ▁made -8.21638
466
+ ▁ocean -8.22162
467
+ ▁tower -8.22165
468
+ ▁pen -8.22303
469
+ ▁pull -8.22371
470
+ ▁rock -8.22534
471
+ ▁meal -8.22903
472
+ ▁corner -8.22938
473
+ ut -8.23325
474
+ ▁drink -8.23517
475
+ ▁passenger -8.23649
476
+ ▁bun -8.23926
477
+ ▁umbrellas -8.24185
478
+ ▁racquet -8.244
479
+ ▁crowd -8.24616
480
+ ▁bar -8.249
481
+ ▁toy -8.24938
482
+ ▁backpack -8.25921
483
+ ▁lights -8.26244
484
+ us -8.27446
485
+ ▁surround -8.27466
486
+ li -8.27745
487
+ ▁zoo -8.28247
488
+ ▁brick -8.28255
489
+ ▁pieces -8.29064
490
+ ▁multi -8.29803
491
+ ▁restaurant -8.29828
492
+ ▁cellphone -8.29839
493
+ ▁fresh -8.30628
494
+ ▁photograph -8.30628
495
+ ▁shelf -8.30628
496
+ ▁surface -8.30628
497
+ ▁jacket -8.31435
498
+ ▁sauce -8.31435
499
+ ra -8.31944
500
+ ▁ride -8.32111
501
+ ▁adult -8.32248
502
+ ▁statue -8.32248
503
+ ▁books -8.3242
504
+ ▁waiting -8.32447
505
+ ▁watching -8.32712
506
+ ▁jet -8.33094
507
+ ▁painted -8.33164
508
+ ▁plant -8.33394
509
+ ▁post -8.33454
510
+ ro -8.33538
511
+ ▁seat -8.33698
512
+ ▁polar -8.33905
513
+ ▁re -8.34023
514
+ ▁de -8.34346
515
+ ween -8.34728
516
+ ▁track -8.3508
517
+ ▁runway -8.35575
518
+ ▁glove -8.36415
519
+ ▁clear -8.36415
520
+ ▁time -8.36415
521
+ ▁toothbrushes -8.36648
522
+ colored -8.36734
523
+ ▁showing -8.36824
524
+ ▁bet -8.37115
525
+ ▁chicken -8.37269
526
+ ▁helmet -8.37269
527
+ ▁dark -8.3727
528
+ ▁children -8.3727
529
+ ▁rice -8.3728
530
+ ▁arm -8.37381
531
+ ▁sleeping -8.37499
532
+ ▁skiing -8.37539
533
+ ▁electronic -8.38132
534
+ ▁body -8.38132
535
+ ▁eaten -8.38205
536
+ ▁dressed -8.38589
537
+ ▁po -8.38869
538
+ z -8.38961
539
+ ▁going -8.39045
540
+ ▁tile -8.39077
541
+ um -8.39819
542
+ ▁high -8.40741
543
+ ▁no -8.41272
544
+ ▁ca -8.41279
545
+ ▁container -8.41439
546
+ ▁talking -8.41835
547
+ ping -8.4213
548
+ ▁sliced -8.43047
549
+ ▁grey -8.43467
550
+ ▁doughnut -8.44054
551
+ ▁rail -8.44409
552
+ ▁wire -8.44417
553
+ ▁types -8.44495
554
+ ▁eat -8.44589
555
+ ▁go -8.4494
556
+ ▁outdoor -8.44991
557
+ way -8.45313
558
+ ▁about -8.45317
559
+ ▁gray -8.45481
560
+ ▁stick -8.45768
561
+ ▁left -8.46244
562
+ ▁fries -8.46244
563
+ ▁strip -8.46332
564
+ ▁toaster -8.46334
565
+ ple -8.46803
566
+ ▁includ -8.47093
567
+ ▁neck -8.47215
568
+ ew -8.47727
569
+ ▁carrot -8.47883
570
+ ���plates -8.48014
571
+ ▁intersection -8.4814
572
+ ies -8.48551
573
+ ▁toward -8.49102
574
+ ▁rack -8.49106
575
+ ▁have -8.49121
576
+ ▁drinking -8.49373
577
+ ▁mo -8.49608
578
+ led -8.49823
579
+ at -8.49873
580
+ is -8.4997
581
+ ▁h -8.50948
582
+ ▁beer -8.50953
583
+ ▁ma -8.51031
584
+ ▁device -8.51053
585
+ ▁soup -8.51077
586
+ ▁brushing -8.51413
587
+ ▁tub -8.52043
588
+ ▁skateboarder -8.52274
589
+ ▁walks -8.52647
590
+ ▁bags -8.53018
591
+ ▁blanket -8.53043
592
+ ▁rain -8.53077
593
+ placed -8.53093
594
+ ▁shower -8.53111
595
+ ▁com -8.53531
596
+ ▁rocks -8.53974
597
+ ▁leaves -8.54053
598
+ ▁trunk -8.54053
599
+ ▁variet -8.54173
600
+ ▁skier -8.54242
601
+ ▁dress -8.54535
602
+ ▁meters -8.55009
603
+ ▁male -8.55324
604
+ se -8.55394
605
+ ▁take -8.55728
606
+ ▁potatoes -8.56124
607
+ ▁branch -8.5705
608
+ ▁toppings -8.57155
609
+ ▁night -8.57165
610
+ ▁swing -8.57239
611
+ ▁color -8.57781
612
+ ▁signal -8.58144
613
+ ▁home -8.58199
614
+ ▁cross -8.58294
615
+ ▁she -8.58596
616
+ ▁hotdog -8.59239
617
+ ▁shows -8.59355
618
+ ▁or -8.59559
619
+ ive -8.59638
620
+ ▁buildings -8.59816
621
+ ▁kid -8.60352
622
+ co -8.60403
623
+ ▁batter -8.60504
624
+ ▁sun -8.60763
625
+ ▁office -8.61285
626
+ ▁lo -8.61329
627
+ ▁decker -8.61477
628
+ un -8.6149
629
+ ▁closeup -8.61732
630
+ ▁past -8.62
631
+ ▁older -8.62047
632
+ ▁dessert -8.62523
633
+ ▁graffiti -8.62523
634
+ ▁motorcycles -8.62848
635
+ ▁decorated -8.63636
636
+ ▁cart -8.63985
637
+ ▁traveling -8.64054
638
+ ▁bright -8.64687
639
+ ▁uniform -8.64758
640
+ ▁wave -8.64843
641
+ ▁bottles -8.64876
642
+ ▁sp -8.65093
643
+ ▁stacked -8.66238
644
+ ation -8.66312
645
+ ▁prepar -8.66412
646
+ ▁hug -8.67044
647
+ ▁shaped -8.67231
648
+ ie -8.67365
649
+ ▁ne -8.68267
650
+ as -8.68526
651
+ ▁rest -8.68555
652
+ ake -8.69332
653
+ ▁motor -8.69384
654
+ ▁roll -8.69384
655
+ ▁lit -8.694
656
+ ▁river -8.69433
657
+ ng -8.69758
658
+ ▁di -8.70117
659
+ mp -8.7023
660
+ ▁cabinets -8.71577
661
+ ▁shot -8.71788
662
+ ian -8.71819
663
+ ped -8.71821
664
+ ▁setting -8.71935
665
+ ▁so -8.7209
666
+ ▁slices -8.72431
667
+ ▁cloud -8.72998
668
+ ▁assortment -8.73008
669
+ ▁dry -8.73142
670
+ ▁cooking -8.7332
671
+ ca -8.73336
672
+ ▁way -8.74097
673
+ ▁onions -8.74355
674
+ ▁served -8.7438
675
+ op -8.74553
676
+ ton -8.74844
677
+ ▁cute -8.75342
678
+ ▁place -8.75415
679
+ ▁ice -8.75825
680
+ ▁suitcases -8.76433
681
+ ▁cloth -8.76653
682
+ ▁giant -8.76748
683
+ ▁ex -8.76751
684
+ lush -8.76765
685
+ ▁underneath -8.76778
686
+ ▁bikes -8.76815
687
+ ▁appliances -8.77018
688
+ x -8.78016
689
+ ▁assort -8.7802
690
+ ▁machine -8.7803
691
+ ▁gear -8.78031
692
+ ▁forest -8.78036
693
+ ard -8.78049
694
+ ▁ra -8.7816
695
+ if -8.78346
696
+ ▁vegetable -8.78455
697
+ ▁jumping -8.7878
698
+ ti -8.78849
699
+ ▁swinging -8.79217
700
+ ▁electric -8.79329
701
+ ▁purse -8.79329
702
+ ▁smart -8.79329
703
+ ▁mitt -8.79329
704
+ ▁moving -8.7933
705
+ ▁fish -8.79417
706
+ ▁reading -8.79453
707
+ ▁mak -8.7957
708
+ ▁seen -8.79576
709
+ ▁play -8.79971
710
+ ent -8.80641
711
+ ▁edge -8.80646
712
+ ▁feet -8.80646
713
+ ▁surfer -8.8085
714
+ unny -8.81131
715
+ ▁ho -8.81569
716
+ able -8.81641
717
+ ▁chairs -8.81908
718
+ ling -8.82076
719
+ ▁part -8.82764
720
+ ▁din -8.83845
721
+ ▁fridge -8.847
722
+ ▁running -8.84701
723
+ ▁yard -8.84703
724
+ ▁perched -8.84816
725
+ ▁cooked -8.84902
726
+ ▁but -8.85047
727
+ ▁hay -8.8505
728
+ ish -8.85124
729
+ ▁end -8.85212
730
+ ▁female -8.86088
731
+ ▁mount -8.86088
732
+ ▁mustard -8.86088
733
+ ▁curb -8.86088
734
+ ▁scene -8.86089
735
+ ▁him -8.86148
736
+ ▁ramp -8.86157
737
+ ▁log -8.86229
738
+ ir -8.86636
739
+ tage -8.87072
740
+ ant -8.87295
741
+ ha -8.87336
742
+ ▁object -8.87497
743
+ ▁draw -8.87497
744
+ ▁cattle -8.87502
745
+ ▁sale -8.87809
746
+ ver -8.87839
747
+ ▁holder -8.88327
748
+ ▁dock -8.8855
749
+ ▁poses -8.88726
750
+ ▁we -8.88797
751
+ ▁wheel -8.88925
752
+ ▁size -8.88927
753
+ ▁towel -8.88927
754
+ ▁tomatoes -8.8899
755
+ ▁bath -8.89331
756
+ ten -8.89694
757
+ ▁shoes -8.89819
758
+ ▁buses -8.89825
759
+ ▁atop -8.90093
760
+ ▁beautiful -8.90375
761
+ ▁among -8.90375
762
+ ▁gold -8.90375
763
+ ▁mother -8.90388
764
+ ▁bu -8.90421
765
+ intend -8.90907
766
+ ▁plants -8.9126
767
+ ▁pasture -8.91856
768
+ ▁cage -8.91891
769
+ ▁pa -8.9214
770
+ ▁bicycles -8.92412
771
+ ▁k -8.92686
772
+ ▁con -8.93124
773
+ ▁smiles -8.93241
774
+ ▁shop -8.93336
775
+ ▁bridge -8.93338
776
+ ▁clean -8.93338
777
+ ▁o -8.94138
778
+ ▁sa -8.94237
779
+ king -8.94412
780
+ ty -8.94564
781
+ ▁pick -8.94631
782
+ ▁crossing -8.94716
783
+ tra -8.94722
784
+ ▁sausage -8.94853
785
+ ▁wrappe -8.94861
786
+ ▁stone -8.94862
787
+ ▁candle -8.94864
788
+ ▁single -8.9487
789
+ ▁right -8.95142
790
+ ▁lap -8.95192
791
+ ig -8.95197
792
+ lic -8.95305
793
+ ▁putt -8.95306
794
+ ▁vases -8.95733
795
+ ▁handle -8.96353
796
+ ▁arranged -8.96391
797
+ ▁blow -8.96392
798
+ ▁desktop -8.96441
799
+ ▁bowls -8.96528
800
+ board -8.96861
801
+ ▁work -8.97108
802
+ ta -8.97472
803
+ ▁cream -8.97954
804
+ ▁mug -8.97954
805
+ ▁equipment -8.97955
806
+ ▁beans -8.97962
807
+ ▁sh -8.97982
808
+ ▁kinds -8.98358
809
+ ate -8.98666
810
+ ▁wi -8.98951
811
+ per -8.99501
812
+ ▁below -8.99541
813
+ ▁phones -9.00677
814
+ ▁fly -9.00787
815
+ ▁toothpaste -9.01154
816
+ ▁perform -9.01154
817
+ ▁held -9.01155
818
+ ▁kids -9.01168
819
+ ▁fenced -9.01268
820
+ ▁ha -9.01676
821
+ ▁peppers -9.01863
822
+ ▁broken -9.02794
823
+ ▁ketchup -9.02794
824
+ ▁police -9.02794
825
+ ▁grill -9.02794
826
+ ▁dinner -9.02794
827
+ ▁flip -9.02803
828
+ ▁float -9.02807
829
+ ▁glazed -9.0284
830
+ lar -9.0287
831
+ ▁make -9.02938
832
+ ▁coat -9.03253
833
+ ▁jump -9.03492
834
+ less -9.03789
835
+ ▁eyes -9.03995
836
+ ▁pe -9.04206
837
+ ▁platform -9.0446
838
+ ▁trash -9.0446
839
+ ▁foot -9.0446
840
+ ▁design -9.0446
841
+ ▁gather -9.04462
842
+ ▁flock -9.04476
843
+ ▁rackets -9.04532
844
+ ▁cover -9.04694
845
+ ot -9.04859
846
+ ▁watch -9.05093
847
+ ▁pro -9.05256
848
+ ▁poles -9.05323
849
+ ▁not -9.05548
850
+ ▁use -9.05927
851
+ ▁birthday -9.06155
852
+ ▁pretty -9.06155
853
+ ▁reach -9.06155
854
+ ▁nice -9.06156
855
+ ▁soda -9.06156
856
+ ▁five -9.06325
857
+ ry -9.06769
858
+ ▁kind -9.07436
859
+ out -9.07513
860
+ im -9.07559
861
+ ▁cap -9.07768
862
+ ▁public -9.07879
863
+ ▁round -9.0788
864
+ having -9.07927
865
+ ke -9.09157
866
+ ▁woods -9.09593
867
+ ▁alone -9.09634
868
+ ▁bushes -9.09672
869
+ ▁sea -9.09702
870
+ ▁lie -9.0974
871
+ ▁pie -9.09898
872
+ ▁players -9.10129
873
+ ▁taken -9.1048
874
+ ated -9.10616
875
+ ▁ta -9.10644
876
+ ▁stack -9.1088
877
+ ▁lake -9.11458
878
+ ▁tea -9.11498
879
+ ▁new -9.11598
880
+ ▁stopped -9.11657
881
+ ▁bo -9.12561
882
+ ▁girls -9.13226
883
+ ▁produce -9.1347
884
+ ▁used -9.13607
885
+ eep -9.13774
886
+ ▁surfboards -9.14452
887
+ ul -9.14582
888
+ ▁engine -9.15089
889
+ ▁garden -9.15089
890
+ ▁tarmac -9.15089
891
+ ▁wild -9.1509
892
+ tuck -9.1517
893
+ ▁mid -9.15203
894
+ ▁vin -9.15343
895
+ ▁dishes -9.15387
896
+ ▁pictures -9.15426
897
+ ▁skiers -9.15512
898
+ light -9.1598
899
+ side -9.16681
900
+ ▁lin -9.16698
901
+ ▁toast -9.16817
902
+ ▁flat -9.16977
903
+ ▁both -9.17007
904
+ ▁try -9.17021
905
+ ▁jar -9.17041
906
+ ▁wet -9.17266
907
+ ▁hole -9.17314
908
+ ▁pet -9.17527
909
+ ▁feeding -9.17672
910
+ ▁hang -9.17911
911
+ ▁put -9.18438
912
+ ▁type -9.18665
913
+ ▁snowboards -9.18809
914
+ ▁beverage -9.18899
915
+ ▁french -9.18899
916
+ ▁shore -9.189
917
+ ▁grow -9.19133
918
+ ment -9.1917
919
+ tro -9.20361
920
+ ▁van -9.20857
921
+ ▁supplies -9.2086
922
+ ▁pre -9.20865
923
+ ▁pool -9.20927
924
+ ical -9.20931
925
+ ▁stoplight -9.21012
926
+ ▁show -9.21323
927
+ ▁working -9.22095
928
+ ▁pack -9.22349
929
+ j -9.22633
930
+ ▁drinks -9.22648
931
+ ▁drive -9.22736
932
+ ▁mix -9.22837
933
+ ▁chain -9.2286
934
+ ▁throw -9.2286
935
+ loaded -9.22861
936
+ ▁steel -9.22861
937
+ ▁good -9.22861
938
+ ▁brushes -9.23132
939
+ ▁clocks -9.2325
940
+ ▁star -9.24495
941
+ ▁shape -9.24568
942
+ qu -9.24648
943
+ ▁egg -9.24781
944
+ tic -9.2479
945
+ age -9.24878
946
+ ▁lunch -9.24901
947
+ ▁modern -9.24901
948
+ ▁square -9.24901
949
+ ▁ear -9.24918
950
+ ▁wash -9.24948
951
+ ther -9.2503
952
+ ▁pad -9.25358
953
+ ▁boxes -9.25361
954
+ ▁balls -9.25453
955
+ ▁ties -9.2653
956
+ ▁sc -9.26645
957
+ ight -9.26724
958
+ ▁tomato -9.2689
959
+ ▁shoulder -9.26984
960
+ ▁enjoy -9.26984
961
+ ▁pastries -9.26984
962
+ ▁milk -9.26984
963
+ ▁lamb -9.26985
964
+ ▁chew -9.26985
965
+ ▁rose -9.26986
966
+ ▁style -9.26992
967
+ ▁match -9.27015
968
+ ▁well -9.27046
969
+ ▁mountains -9.27287
970
+ ▁veggies -9.27527
971
+ ma -9.28982
972
+ ▁vehicles -9.29095
973
+ ▁appear -9.29112
974
+ ▁blender -9.29112
975
+ ▁breakfast -9.29112
976
+ ▁desert -9.29112
977
+ ▁foreground -9.29112
978
+ ▁leaf -9.29112
979
+ ▁napkin -9.29112
980
+ ▁platter -9.29112
981
+ ▁strawberries -9.29112
982
+ ▁sunglasses -9.29112
983
+ ▁shade -9.29112
984
+ ▁lawn -9.29113
985
+ ▁chopped -9.2912
986
+ ▁hotdogs -9.29181
987
+ ▁stra -9.2927
988
+ ▁ripe -9.29287
989
+ ▁doll -9.29379
990
+ ▁cla -9.29632
991
+ ▁dr -9.29828
992
+ ▁potte -9.29836
993
+ ▁gate -9.29845
994
+ ▁see -9.29895
995
+ ff -9.30004
996
+ ▁crowded -9.30661
997
+ ▁ru -9.30863
998
+ ▁pedestrian -9.31286
999
+ ▁baked -9.31286
1000
+ ▁decoration -9.31291
1001
+ ▁mess -9.31294
1002
+ ▁vehicle -9.31303
1003
+ ac -9.31455
1004
+ ▁par -9.31519
1005
+ ▁pasta -9.31593
1006
+ ▁noodles -9.31606
1007
+ ock -9.32467
1008
+ ▁bottom -9.33508
1009
+ ▁christmas -9.33508
1010
+ ▁country -9.33508
1011
+ ▁decorative -9.33508
1012
+ ▁scooter -9.33508
1013
+ ▁sculpture -9.33508
1014
+ ▁sprinkles -9.33508
1015
+ ▁chips -9.3351
1016
+ ▁things -9.33513
1017
+ ▁butter -9.33556
1018
+ ▁war -9.33657
1019
+ ▁kneel -9.33963
1020
+ uch -9.3405
1021
+ ▁catcher -9.34198
1022
+ ▁sandwiches -9.34288
1023
+ ▁pi -9.34733
1024
+ ▁gra -9.35321
1025
+ ▁se -9.35518
1026
+ ▁serving -9.35781
1027
+ ▁horn -9.35782
1028
+ ▁says -9.35794
1029
+ ▁smile -9.35928
1030
+ ▁includes -9.36011
1031
+ ▁dryer -9.36038
1032
+ ▁skies -9.36308
1033
+ ▁i -9.36614
1034
+ line -9.36833
1035
+ ▁spoons -9.37146
1036
+ ▁pulled -9.37477
1037
+ ▁prepare -9.37951
1038
+ own -9.37978
1039
+ ▁colors -9.38069
1040
+ ▁bathtub -9.38106
1041
+ ▁concrete -9.38106
1042
+ ▁distance -9.38106
1043
+ where -9.38106
1044
+ ▁balloon -9.38107
1045
+ ▁nearby -9.38118
1046
+ ▁was -9.38129
1047
+ ▁path -9.38151
1048
+ ▁tri -9.38157
1049
+ ▁lamp -9.38182
1050
+ ▁spot -9.38335
1051
+ ▁chi -9.38405
1052
+ ▁fac -9.38913
1053
+ ▁cups -9.40016
1054
+ ▁check -9.40265
1055
+ ▁nose -9.40615
1056
+ ular -9.40686
1057
+ ▁run -9.40878
1058
+ ger -9.41062
1059
+ ▁passing -9.41115
1060
+ ▁card -9.41367
1061
+ ▁ri -9.42765
1062
+ ▁bacon -9.42926
1063
+ ▁bedroom -9.42926
1064
+ ▁carriage -9.42926
1065
+ ▁kitten -9.42926
1066
+ ▁stainless -9.42926
1067
+ ▁graze -9.42934
1068
+ ▁carrie -9.42936
1069
+ ▁bull -9.43
1070
+ ▁low -9.43078
1071
+ ▁clothes -9.43111
1072
+ ▁fi -9.43248
1073
+ ▁vi -9.4333
1074
+ ▁rider -9.43351
1075
+ ▁painting -9.4345
1076
+ ▁reads -9.4346
1077
+ ▁bite -9.43579
1078
+ ▁pond -9.43614
1079
+ ▁airplanes -9.44209
1080
+ ▁legs -9.44344
1081
+ cal -9.44764
1082
+ ▁turn -9.44882
1083
+ ▁juice -9.45426
1084
+ ▁lettuce -9.45426
1085
+ ▁partially -9.45426
1086
+ ▁position -9.45426
1087
+ ▁carpet -9.45427
1088
+ ▁swimming -9.45427
1089
+ ▁sort -9.45431
1090
+ ▁pose -9.45529
1091
+ ▁paw -9.4554
1092
+ ▁plain -9.45556
1093
+ ▁docked -9.4639
1094
+ ine -9.46494
1095
+ eat -9.46557
1096
+ ▁staring -9.46617
1097
+ ▁travel -9.47023
1098
+ ▁here -9.47823
1099
+ ▁blurr -9.4799
1100
+ ▁pigeons -9.4799
1101
+ ▁what -9.4799
1102
+ ▁cement -9.47991
1103
+ ▁word -9.47991
1104
+ ▁same -9.47991
1105
+ ▁ship -9.47999
1106
+ ▁reflection -9.48012
1107
+ ▁fashion -9.48133
1108
+ ▁tiny -9.48351
1109
+ ▁pin -9.48409
1110
+ ▁cabinet -9.48423
1111
+ ▁spi -9.48493
1112
+ ▁mushrooms -9.48603
1113
+ ci -9.49338
1114
+ ▁point -9.49645
1115
+ ▁feed -9.49684
1116
+ ▁giv -9.49926
1117
+ just -9.50571
1118
+ ▁arrangement -9.50622
1119
+ ▁bucket -9.50622
1120
+ ▁collecti -9.50622
1121
+ ▁lift -9.50622
1122
+ ▁parade -9.50622
1123
+ ▁shadow -9.50622
1124
+ ▁climb -9.50622
1125
+ ▁flag -9.50622
1126
+ ▁lead -9.50623
1127
+ ▁center -9.50623
1128
+ ▁pears -9.50624
1129
+ ▁sheet -9.5071
1130
+ ▁life -9.50858
1131
+ ▁utensils -9.50952
1132
+ ▁gl -9.50966
1133
+ ▁pickle -9.51051
1134
+ ▁writing -9.51316
1135
+ ▁pu -9.51557
1136
+ ▁containers -9.51904
1137
+ ▁tape -9.52638
1138
+ ▁race -9.53101
1139
+ ▁pitch -9.53126
1140
+ ▁you -9.53311
1141
+ ▁family -9.53325
1142
+ ▁military -9.53325
1143
+ ▁pavement -9.53325
1144
+ ▁peanut -9.53325
1145
+ ▁picnic -9.53325
1146
+ ▁soccer -9.53325
1147
+ ▁space -9.53325
1148
+ ▁lone -9.53328
1149
+ ▁pastry -9.53355
1150
+ ▁finger -9.53421
1151
+ ▁cluttered -9.53437
1152
+ ▁watches -9.53479
1153
+ ▁officer -9.53677
1154
+ ful -9.53806
1155
+ ▁upside -9.53856
1156
+ ▁bell -9.54519
1157
+ ▁benches -9.54552
1158
+ man -9.54553
1159
+ ▁covering -9.549
1160
+ ure -9.54966
1161
+ ▁goats -9.55088
1162
+ ▁plays -9.55139
1163
+ ▁serve -9.55787
1164
+ ▁deck -9.56079
1165
+ ▁curtain -9.56102
1166
+ ▁himself -9.56102
1167
+ ▁ledge -9.56102
1168
+ ▁railroad -9.56102
1169
+ ▁duck -9.56103
1170
+ ▁model -9.56103
1171
+ ▁comme -9.56104
1172
+ rcial -9.56105
1173
+ arrow -9.56106
1174
+ ▁base -9.56112
1175
+ ▁hillside -9.56146
1176
+ ▁tools -9.56201
1177
+ ▁mark -9.56362
1178
+ ial -9.56538
1179
+ ▁frosting -9.56948
1180
+ ▁sticks -9.57019
1181
+ ▁outdoors -9.57072
1182
+ ral -9.57163
1183
+ cer -9.57499
1184
+ made -9.5813
1185
+ ddler -9.58536
1186
+ ▁waves -9.58741
1187
+ ▁propeller -9.58959
1188
+ ▁puppy -9.58959
1189
+ ▁school -9.58959
1190
+ ▁built -9.5896
1191
+ ▁cupcake -9.5896
1192
+ ▁mini -9.58962
1193
+ ▁step -9.58962
1194
+ ▁string -9.58966
1195
+ ▁panda -9.58968
1196
+ ▁port -9.59003
1197
+ ▁eggs -9.59129
1198
+ fore -9.59259
1199
+ ▁pipe -9.59269
1200
+ ster -9.59617
1201
+ ead -9.5992
1202
+ ▁flor -9.6181
1203
+ ▁block -9.61901
1204
+ ▁friend -9.61901
1205
+ ▁spread -9.61901
1206
+ ▁winter -9.61901
1207
+ ▁still -9.61901
1208
+ ▁flies -9.61901
1209
+ ▁sport -9.61902
1210
+ ▁fried -9.61904
1211
+ ▁direction -9.61921
1212
+ ▁event -9.61939
1213
+ ▁landing -9.62011
1214
+ ▁cub -9.62039
1215
+ ▁trailer -9.62104
1216
+ ▁cookie -9.62399
1217
+ ▁bi -9.62462
1218
+ ary -9.62694
1219
+ ▁eye -9.62776
1220
+ ▁barn -9.63204
1221
+ ▁prepared -9.63753
1222
+ ball -9.64097
1223
+ ▁comp -9.64222
1224
+ ▁tin -9.64718
1225
+ ▁corn -9.64787
1226
+ ▁dip -9.64922
1227
+ ▁balanc -9.64931
1228
+ ▁pattern -9.64931
1229
+ ▁professional -9.64931
1230
+ ▁consist -9.64931
1231
+ ▁spray -9.64931
1232
+ ▁antique -9.64932
1233
+ ▁stall -9.64932
1234
+ ▁package -9.64945
1235
+ ▁town -9.65018
1236
+ ▁tag -9.6513
1237
+ ▁grapes -9.6552
1238
+ ▁does -9.65696
1239
+ ▁lemons -9.65937
1240
+ ▁note -9.66422
1241
+ ▁stickers -9.66544
1242
+ ▁figure -9.68056
1243
+ ▁furniture -9.68056
1244
+ ▁lime -9.68056
1245
+ ▁parrot -9.68056
1246
+ ▁sofa -9.68056
1247
+ ▁outfit -9.68056
1248
+ ▁power -9.68058
1249
+ ▁disc -9.68059
1250
+ ▁notebook -9.68059
1251
+ ▁wide -9.68085
1252
+ ▁after -9.6809
1253
+ ▁chili -9.68091
1254
+ ▁hard -9.68108
1255
+ ▁tail -9.68122
1256
+ ▁features -9.68149
1257
+ ▁fry -9.68236
1258
+ ▁coin -9.6829
1259
+ ▁bit -9.6863
1260
+ ▁turned -9.68744
1261
+ ▁pointing -9.69232
1262
+ ▁bunches -9.69379
1263
+ ▁seated -9.70181
1264
+ ▁frost -9.70307
1265
+ ▁featur -9.71192
1266
+ ▁contents -9.71282
1267
+ ▁delicious -9.71282
1268
+ ▁expired -9.71282
1269
+ ▁guitar -9.71282
1270
+ ▁leash -9.71282
1271
+ ▁snack -9.71282
1272
+ ▁steak -9.71282
1273
+ ▁name -9.71282
1274
+ ▁push -9.71283
1275
+ ▁touching -9.71297
1276
+ ▁subway -9.71301
1277
+ ▁stir -9.71352
1278
+ ▁catch -9.72328
1279
+ ▁short -9.72397
1280
+ ▁whole -9.72414
1281
+ ▁waits -9.72591
1282
+ ▁wear -9.73765
1283
+ ▁trail -9.74385
1284
+ neath -9.74541
1285
+ ▁berries -9.74615
1286
+ ▁blond -9.74615
1287
+ ▁ceramic -9.74615
1288
+ ▁condiments -9.74615
1289
+ ▁fabric -9.74615
1290
+ ▁fancy -9.74615
1291
+ ▁stairs -9.74616
1292
+ ▁pants -9.74635
1293
+ uzz -9.74642
1294
+ ▁ham -9.7487
1295
+ ▁peeled -9.7509
1296
+ ▁tee -9.75839
1297
+ ▁paste -9.75922
1298
+ ▁goat -9.75928
1299
+ how -9.75951
1300
+ ▁bri -9.76824
1301
+ ▁shorts -9.76884
1302
+ top -9.77115
1303
+ ity -9.77287
1304
+ ▁kn -9.77848
1305
+ cent -9.77932
1306
+ ▁smo -9.78057
1307
+ ▁key -9.78062
1308
+ ▁craft -9.78063
1309
+ ▁visible -9.78063
1310
+ ▁beef -9.78067
1311
+ ▁rope -9.78084
1312
+ ▁flo -9.78113
1313
+ ▁rais -9.78124
1314
+ ▁sail -9.78141
1315
+ ▁net -9.78173
1316
+ ▁bro -9.78227
1317
+ ▁ram -9.78314
1318
+ ▁most -9.78953
1319
+ ▁propp -9.79941
1320
+ ▁rac -9.80259
1321
+ ▁farm -9.80443
1322
+ ▁sleep -9.80669
1323
+ ▁fall -9.81572
1324
+ ▁advertisement -9.81635
1325
+ ▁bouquet -9.81635
1326
+ ▁museum -9.81635
1327
+ ▁restroom -9.81635
1328
+ ▁shelves -9.81635
1329
+ ▁flown -9.81635
1330
+ ▁vest -9.8164
1331
+ ▁icing -9.81676
1332
+ ▁tank -9.81686
1333
+ ▁walkway -9.81766
1334
+ ▁seem -9.81848
1335
+ ▁shiny -9.82033
1336
+ ▁hid -9.8206
1337
+ ▁pier -9.82143
1338
+ ▁hotel -9.82262
1339
+ ▁packed -9.82626
1340
+ band -9.82788
1341
+ vi -9.82908
1342
+ ature -9.8321
1343
+ ▁cook -9.83224
1344
+ ▁tan -9.83303
1345
+ ▁rocky -9.83744
1346
+ ▁writ -9.84364
1347
+ ▁pass -9.84387
1348
+ bby -9.85172
1349
+ ▁bin -9.8518
1350
+ ▁reflect -9.85308
1351
+ ▁sub -9.85318
1352
+ ▁amount -9.85338
1353
+ ▁steam -9.85338
1354
+ ▁tongue -9.85338
1355
+ ▁business -9.85339
1356
+ ▁costume -9.85339
1357
+ ▁owl -9.85339
1358
+ ▁worn -9.85339
1359
+ ▁heart -9.85339
1360
+ ▁calf -9.8534
1361
+ ▁sill -9.85379
1362
+ ▁alongside -9.85419
1363
+ ▁tent -9.85452
1364
+ ▁gro -9.85528
1365
+ ▁print -9.85611
1366
+ air -9.85803
1367
+ min -9.86074
1368
+ ▁leg -9.87016
1369
+ ▁peel -9.88722
1370
+ ▁shin -9.89114
1371
+ ▁happy -9.89185
1372
+ ▁harbor -9.89185
1373
+ ▁pillow -9.89185
1374
+ ▁roof -9.89185
1375
+ ▁sugar -9.89185
1376
+ ▁airliner -9.89185
1377
+ ▁indoor -9.89185
1378
+ ▁ornate -9.89185
1379
+ ▁frame -9.89185
1380
+ ▁itself -9.89185
1381
+ ▁residen -9.89185
1382
+ ▁rusted -9.89186
1383
+ ▁shopping -9.89246
1384
+ ized -9.89285
1385
+ ▁gas -9.8932
1386
+ ator -9.89406
1387
+ ▁pitcher -9.8947
1388
+ ▁branches -9.89544
1389
+ ▁selling -9.89747
1390
+ ▁posed -9.89757
1391
+ ▁boarder -9.8982
1392
+ ▁em -9.90128
1393
+ ▁gr -9.9121
1394
+ ▁son -9.9131
1395
+ ▁hi -9.91573
1396
+ ▁ja -9.93084
1397
+ ▁fun -9.93085
1398
+ ▁figurine -9.93185
1399
+ ▁freezer -9.93185
1400
+ ▁garbage -9.93185
1401
+ ▁goggles -9.93185
1402
+ ▁overhead -9.93185
1403
+ ▁patio -9.93185
1404
+ ▁section -9.93185
1405
+ ▁waffle -9.93185
1406
+ ▁frog -9.93185
1407
+ ▁tasty -9.93186
1408
+ ▁mud -9.93187
1409
+ ▁belt -9.93188
1410
+ ough -9.93191
1411
+ ▁curl -9.93195
1412
+ ▁fast -9.93216
1413
+ ▁rug -9.93392
1414
+ ▁ring -9.93443
1415
+ ▁bra -9.93457
1416
+ ▁mat -9.94022
1417
+ tial -9.94446
1418
+ ▁item -9.95206
1419
+ fri -9.95832
1420
+ ▁scatter -9.97351
1421
+ ▁celery -9.97351
1422
+ ▁faucet -9.97351
1423
+ ▁kept -9.97351
1424
+ ▁leather -9.97351
1425
+ ▁loading -9.97351
1426
+ ▁relax -9.97351
1427
+ ▁structure -9.97351
1428
+ ▁numer -9.97352
1429
+ ▁six -9.97352
1430
+ ▁asleep -9.97352
1431
+ ▁hood -9.97353
1432
+ ▁iron -9.97353
1433
+ ▁owner -9.97359
1434
+ ▁also -9.9736
1435
+ ▁qui -9.97365
1436
+ ▁let -9.97421
1437
+ ▁fallen -9.97444
1438
+ ▁class -9.97447
1439
+ ▁straw -9.97482
1440
+ rban -9.97534
1441
+ ▁bow -9.98064
1442
+ right -10.0112
1443
+ ▁produc -10.0114
1444
+ ▁accessories -10.017
1445
+ ▁bakery -10.017
1446
+ ▁ceiling -10.017
1447
+ ▁cereal -10.017
1448
+ ▁champagne -10.017
1449
+ ▁commuter -10.017
1450
+ ▁individual -10.017
1451
+ ▁sweater -10.017
1452
+ ▁neatly -10.017
1453
+ ▁practic -10.017
1454
+ ▁text -10.017
1455
+ ▁headphones -10.017
1456
+ ▁rusty -10.017
1457
+ ▁cord -10.017
1458
+ ▁shar -10.0172
1459
+ ▁lid -10.0172
1460
+ ▁passes -10.0181
1461
+ thered -10.0262
1462
+ ign -10.0263
1463
+ ▁pay -10.0297
1464
+ ▁farmer -10.0318
1465
+ ▁bush -10.0319
1466
+ ▁move -10.033
1467
+ ok -10.0372
1468
+ ▁sticker -10.047
1469
+ ▁paint -10.0473
1470
+ ador -10.0608
1471
+ ▁direct -10.0621
1472
+ ▁circle -10.0624
1473
+ ▁crochet -10.0624
1474
+ ▁garage -10.0624
1475
+ ▁magazine -10.0624
1476
+ ▁measur -10.0624
1477
+ ▁monkey -10.0624
1478
+ ▁urinal -10.0624
1479
+ ▁marina -10.0624
1480
+ ▁speak -10.0624
1481
+ ▁roman -10.0625
1482
+ ▁marble -10.0625
1483
+ ▁crate -10.0636
1484
+ ▁doorway -10.0641
1485
+ ▁je -10.0645
1486
+ ▁real -10.0673
1487
+ ▁shoe -10.0804
1488
+ ▁talk -10.1004
1489
+ ▁potato -10.1091
1490
+ ▁chop -10.1099
1491
+ ▁advertis -10.1101
1492
+ ▁approach -10.1101
1493
+ ▁attempt -10.1101
1494
+ ▁character -10.1101
1495
+ ▁depict -10.1101
1496
+ ▁jockey -10.1101
1497
+ ▁stream -10.1101
1498
+ ▁terminal -10.1101
1499
+ ▁burger -10.1101
1500
+ ▁jetliner -10.1101
1501
+ ▁kiwi -10.1101
1502
+ ▁vendor -10.1101
1503
+ ▁stunt -10.1101
1504
+ ▁collar -10.1101
1505
+ ▁palm -10.1101
1506
+ foam -10.1101
1507
+ ▁foil -10.1101
1508
+ ▁necktie -10.1101
1509
+ ▁indicat -10.1101
1510
+ ▁burn -10.1101
1511
+ ▁than -10.1101
1512
+ ▁help -10.1102
1513
+ ▁tire -10.1105
1514
+ ▁workers -10.1113
1515
+ ▁jo -10.1113
1516
+ ▁grab -10.1122
1517
+ ▁driver -10.1135
1518
+ ▁brightly -10.1136
1519
+ ▁mar -10.1161
1520
+ ▁lab -10.1183
1521
+ ▁shak -10.1276
1522
+ ound -10.1287
1523
+ ▁pepper -10.138
1524
+ ▁dea -10.1406
1525
+ ▁fill -10.1557
1526
+ ▁laughing -10.1601
1527
+ ▁missing -10.1601
1528
+ ▁pepperoni -10.1601
1529
+ ▁ribbon -10.1601
1530
+ ▁shrimp -10.1601
1531
+ ▁custom -10.1601
1532
+ ▁sniff -10.1601
1533
+ ▁rubb -10.1601
1534
+ ique -10.1601
1535
+ ▁wool -10.1601
1536
+ ▁cartoon -10.1601
1537
+ ▁slid -10.1601
1538
+ ▁needle -10.1601
1539
+ ▁iphone -10.1601
1540
+ ▁speed -10.1602
1541
+ ▁canoe -10.1602
1542
+ berry -10.1603
1543
+ ▁multicolored -10.1616
1544
+ ▁bagel -10.1642
1545
+ ▁bent -10.1671
1546
+ ▁paddle -10.1676
1547
+ ▁sla -10.2031
1548
+ most -10.2043
1549
+ ▁garnish -10.2127
1550
+ ▁america -10.2127
1551
+ ▁bikini -10.2127
1552
+ ▁connect -10.2127
1553
+ ▁focus -10.2127
1554
+ ▁grizzl -10.2127
1555
+ ▁horseback -10.2127
1556
+ ▁jersey -10.2127
1557
+ ▁liquid -10.2127
1558
+ ▁money -10.2127
1559
+ ▁motorbike -10.2127
1560
+ ▁newspaper -10.2127
1561
+ ▁opposite -10.2127
1562
+ ▁ostrich -10.2127
1563
+ ▁powder -10.2127
1564
+ ▁process -10.2127
1565
+ ▁selection -10.2127
1566
+ ▁silverware -10.2127
1567
+ ▁furry -10.2127
1568
+ ▁pocket -10.2127
1569
+ ▁relish -10.2127
1570
+ ▁jelly -10.2127
1571
+ ▁patch -10.2127
1572
+ ▁fighter -10.2127
1573
+ ▁santa -10.2127
1574
+ ▁typing -10.2127
1575
+ ▁cold -10.2127
1576
+ ▁odd -10.2127
1577
+ ▁tasting -10.2128
1578
+ lumin -10.2138
1579
+ ▁sharp -10.2142
1580
+ ▁wings -10.2143
1581
+ ▁stew -10.2146
1582
+ ▁team -10.2165
1583
+ ▁checker -10.2177
1584
+ ▁wait -10.2362
1585
+ form -10.2464
1586
+ bow -10.2598
1587
+ plug -10.2683
1588
+ ▁aircraft -10.2683
1589
+ ▁bookshelf -10.2683
1590
+ ▁cigarette -10.2683
1591
+ ▁digital -10.2683
1592
+ ▁exhibit -10.2683
1593
+ ▁interesting -10.2683
1594
+ ▁meadow -10.2683
1595
+ ▁muffin -10.2683
1596
+ ▁natural -10.2683
1597
+ ▁organiz -10.2683
1598
+ ▁remov -10.2683
1599
+ ▁sweet -10.2683
1600
+ ▁celebrat -10.2683
1601
+ ▁overlook -10.2683
1602
+ ▁baking -10.2683
1603
+ ▁wedding -10.2683
1604
+ ▁dozen -10.2683
1605
+ ▁forward -10.2683
1606
+ ▁jeans -10.2683
1607
+ ▁sailboat -10.2683
1608
+ guard -10.2684
1609
+ ▁dust -10.2684
1610
+ ▁hook -10.2685
1611
+ ▁beak -10.2686
1612
+ ▁action -10.2686
1613
+ ▁adjust -10.2693
1614
+ ield -10.2699
1615
+ ▁more -10.2703
1616
+ tain -10.275
1617
+ tted -10.2755
1618
+ ▁layer -10.2867
1619
+ ▁medi -10.2971
1620
+ ▁start -10.3157
1621
+ book -10.3187
1622
+ ▁avocado -10.3271
1623
+ ▁construction -10.3271
1624
+ ▁grapefruit -10.3271
1625
+ ▁ingredients -10.3271
1626
+ ▁instruction -10.3271
1627
+ ▁ipod -10.3271
1628
+ ▁learning -10.3271
1629
+ ▁liquor -10.3271
1630
+ ▁ornament -10.3271
1631
+ ▁pencils -10.3271
1632
+ ▁shallow -10.3271
1633
+ ▁tourist -10.3271
1634
+ ▁transport -10.3271
1635
+ ▁trolley -10.3271
1636
+ ▁tulips -10.3271
1637
+ ▁clown -10.3271
1638
+ ▁magnet -10.3271
1639
+ ▁pineapple -10.3271
1640
+ ▁loung -10.3271
1641
+ ▁finish -10.3271
1642
+ ▁first -10.3271
1643
+ ▁knives -10.3271
1644
+ ▁backyard -10.3271
1645
+ ▁operat -10.3271
1646
+ ▁knitt -10.3271
1647
+ ▁skirt -10.3271
1648
+ ▁semi -10.3271
1649
+ ▁formal -10.3271
1650
+ ▁apartment -10.3272
1651
+ ▁mask -10.3274
1652
+ soft -10.3274
1653
+ ▁peas -10.3278
1654
+ ▁highway -10.3286
1655
+ ▁countertop -10.3288
1656
+ ▁flight -10.3301
1657
+ ▁scissor -10.3677
1658
+ shirt -10.3813
1659
+ phone -10.3817
1660
+ ▁perch -10.3842
1661
+ ▁plai -10.3863
1662
+ ▁calculator -10.3896
1663
+ ▁chrome -10.3896
1664
+ ▁church -10.3896
1665
+ ▁console -10.3896
1666
+ ▁flavor -10.3896
1667
+ ▁fluffy -10.3896
1668
+ ▁motorcyclist -10.3896
1669
+ ▁nokia -10.3896
1670
+ ▁project -10.3896
1671
+ ▁selfie -10.3896
1672
+ ▁skating -10.3896
1673
+ ▁sneakers -10.3896
1674
+ ▁spinach -10.3896
1675
+ ▁stretch -10.3896
1676
+ ▁transit -10.3896
1677
+ ▁vegetation -10.3896
1678
+ ▁cheesecake -10.3896
1679
+ ▁extreme -10.3896
1680
+ ▁material -10.3896
1681
+ ▁roast -10.3896
1682
+ ▁crouch -10.3896
1683
+ ▁hung -10.3896
1684
+ ▁fixing -10.3896
1685
+ ▁taxi -10.3896
1686
+ ▁crib -10.3896
1687
+ ▁swan -10.3896
1688
+ ▁boots -10.3896
1689
+ ▁weather -10.3896
1690
+ ▁safe -10.3896
1691
+ ▁sunset -10.3896
1692
+ ▁strawberry -10.3896
1693
+ ▁disk -10.3896
1694
+ ▁great -10.3897
1695
+ ▁competition -10.3898
1696
+ ▁alley -10.3899
1697
+ ▁label -10.3901
1698
+ ▁hangar -10.3912
1699
+ ▁bundle -10.3928
1700
+ ither -10.3942
1701
+ ould -10.3944
1702
+ ▁knee -10.395
1703
+ ▁page -10.4205
1704
+ ▁lemon -10.4341
1705
+ ▁sell -10.4478
1706
+ amon -10.4558
1707
+ ▁decor -10.456
1708
+ ▁antelope -10.4562
1709
+ ▁bamboo -10.4562
1710
+ ▁blood -10.4562
1711
+ ▁circu -10.4562
1712
+ ▁dishwasher -10.4562
1713
+ ▁habitat -10.4562
1714
+ ▁library -10.4562
1715
+ ▁located -10.4562
1716
+ ▁pancakes -10.4562
1717
+ ▁separate -10.4562
1718
+ ▁special -10.4562
1719
+ ▁tissue -10.4562
1720
+ ▁wagon -10.4562
1721
+ ▁basin -10.4562
1722
+ ▁elaborate -10.4562
1723
+ ▁follow -10.4562
1724
+ ▁garlic -10.4562
1725
+ ▁gravel -10.4562
1726
+ ▁mobile -10.4562
1727
+ ▁roam -10.4562
1728
+ ▁baggage -10.4562
1729
+ ▁bank -10.4562
1730
+ ▁complete -10.4562
1731
+ ▁gravy -10.4562
1732
+ ▁attire -10.4563
1733
+ ▁dried -10.4563
1734
+ ▁ruler -10.4563
1735
+ ▁cafe -10.4563
1736
+ ▁nap -10.4563
1737
+ ▁sold -10.4563
1738
+ ▁saying -10.4564
1739
+ ▁thread -10.4565
1740
+ ▁smoke -10.4565
1741
+ ▁candy -10.4571
1742
+ shells -10.4622
1743
+ ▁enclos -10.5276
1744
+ ▁artistic -10.5277
1745
+ ▁cauliflower -10.5277
1746
+ ▁charger -10.5277
1747
+ ▁chinese -10.5277
1748
+ ▁cushion -10.5277
1749
+ ▁dispenser -10.5277
1750
+ ▁grip -10.5277
1751
+ ▁illuminated -10.5277
1752
+ ▁industr -10.5277
1753
+ ▁kissing -10.5277
1754
+ ▁limb -10.5277
1755
+ ▁locomotive -10.5277
1756
+ ▁monument -10.5277
1757
+ ▁price -10.5277
1758
+ ▁splash -10.5277
1759
+ ▁system -10.5277
1760
+ ▁telephone -10.5277
1761
+ ▁turkey -10.5277
1762
+ ▁tusks -10.5277
1763
+ ▁wetsuit -10.5277
1764
+ ▁elder -10.5277
1765
+ ▁repair -10.5277
1766
+ ▁scarf -10.5277
1767
+ terior -10.5277
1768
+ ▁wax -10.5277
1769
+ ▁groom -10.5277
1770
+ ▁punch -10.5277
1771
+ ▁storm -10.5277
1772
+ ▁barrel -10.5277
1773
+ ▁pudd -10.5277
1774
+ ▁extend -10.5277
1775
+ ▁dump -10.5277
1776
+ ▁trop -10.5277
1777
+ ▁chas -10.5278
1778
+ ▁enter -10.5278
1779
+ ▁heav -10.5279
1780
+ ▁vanity -10.5279
1781
+ ▁lick -10.528
1782
+ ▁wok -10.5282
1783
+ ixture -10.5285
1784
+ boat -10.5962
1785
+ work -10.5963
1786
+ ▁apart -10.6045
1787
+ ▁micro -10.6046
1788
+ ▁donkey -10.6046
1789
+ ▁honey -10.6046
1790
+ ▁huddle -10.6046
1791
+ ▁jumbo -10.6046
1792
+ ▁pedestal -10.6046
1793
+ ▁protect -10.6046
1794
+ ▁symbol -10.6046
1795
+ ▁wrapping -10.6046
1796
+ ▁bloom -10.6046
1797
+ ▁break -10.6046
1798
+ ▁chest -10.6046
1799
+ ▁cruise -10.6046
1800
+ ▁gaze -10.6046
1801
+ ▁polish -10.6046
1802
+ ▁serious -10.6046
1803
+ ▁wrist -10.6046
1804
+ ▁booth -10.6046
1805
+ ▁soldier -10.6046
1806
+ usual -10.6046
1807
+ ▁nail -10.6046
1808
+ ▁coach -10.6046
1809
+ ▁formation -10.6046
1810
+ ▁fighting -10.6046
1811
+ ▁dvd -10.6046
1812
+ ▁bald -10.6046
1813
+ ▁sewing -10.6047
1814
+ ▁warning -10.6047
1815
+ ▁sideways -10.6049
1816
+ ▁range -10.6062
1817
+ ▁clip -10.6085
1818
+ ▁veggie -10.6658
1819
+ ▁swim -10.6879
1820
+ ▁cluster -10.6879
1821
+ ▁mattress -10.6879
1822
+ ▁suspende -10.6879
1823
+ ▁blueberries -10.6879
1824
+ ▁brocolli -10.6879
1825
+ ▁conveyor -10.6879
1826
+ ▁cucumber -10.6879
1827
+ ▁downhill -10.6879
1828
+ ▁festival -10.6879
1829
+ ▁fountain -10.6879
1830
+ ▁frozen -10.6879
1831
+ ▁handbag -10.6879
1832
+ ▁kayak -10.6879
1833
+ ▁language -10.6879
1834
+ ▁league -10.6879
1835
+ ▁mustache -10.6879
1836
+ ▁sleek -10.6879
1837
+ ▁vandalized -10.6879
1838
+ ▁windowsill -10.6879
1839
+ ▁yogurt -10.6879
1840
+ ▁twig -10.6879
1841
+ ▁strange -10.6879
1842
+ ▁peach -10.6879
1843
+ ▁drain -10.6879
1844
+ ▁root -10.6879
1845
+ ▁soap -10.6879
1846
+ ▁grind -10.688
1847
+ ▁microphone -10.688
1848
+ ▁melt -10.6881
1849
+ ▁peek -10.6882
1850
+ ▁tip -10.7229
1851
+ mote -10.7542
1852
+ ▁mushroom -10.7567
1853
+ dog -10.7698
1854
+ white -10.7706
1855
+ life -10.7736
1856
+ q -10.7779
1857
+ ▁navy -10.7788
1858
+ tangerines -10.7788
1859
+ ▁alcohol -10.7788
1860
+ ▁artwork -10.7788
1861
+ ▁asphalt -10.7788
1862
+ ▁belong -10.7788
1863
+ ▁bronze -10.7788
1864
+ ▁casserole -10.7788
1865
+ ▁charging -10.7788
1866
+ ▁chef -10.7788
1867
+ ▁cherries -10.7788
1868
+ ▁citrus -10.7788
1869
+ ▁crosswalk -10.7788
1870
+ ▁factory -10.7788
1871
+ ▁fireplace -10.7788
1872
+ ▁gentlem -10.7788
1873
+ ▁herself -10.7788
1874
+ ▁junk -10.7788
1875
+ ▁kettle -10.7788
1876
+ ▁krisp -10.7788
1877
+ ▁landscape -10.7788
1878
+ ▁leaving -10.7788
1879
+ ▁macaroni -10.7788
1880
+ ▁squash -10.7788
1881
+ ▁thumb -10.7788
1882
+ ▁interact -10.7788
1883
+ ▁softball -10.7788
1884
+ ▁storage -10.7788
1885
+ ▁depot -10.7788
1886
+ ▁human -10.7788
1887
+ ▁quilt -10.7788
1888
+ ▁wicker -10.7788
1889
+ ▁second -10.7789
1890
+ ▁stalk -10.7789
1891
+ ▁twin -10.7789
1892
+ ▁order -10.7789
1893
+ ▁steer -10.7789
1894
+ ▁music -10.7789
1895
+ ▁mash -10.779
1896
+ ▁crane -10.779
1897
+ ▁heels -10.779
1898
+ eagle -10.779
1899
+ ▁carving -10.779
1900
+ ▁appli -10.7793
1901
+ ▁swis -10.7796
1902
+ ▁ferr -10.78
1903
+ dual -10.7817
1904
+ pokes -10.7818
1905
+ ▁lu -10.7963
1906
+ ello -10.7972
1907
+ ▁read -10.8426
1908
+ ▁appliance -10.8568
1909
+ ripe -10.8612
1910
+ wood -10.8706
1911
+ ▁barrier -10.8788
1912
+ ▁bicyclist -10.8788
1913
+ ▁briefcase -10.8788
1914
+ ▁change -10.8788
1915
+ ▁convention -10.8788
1916
+ ▁conversation -10.8788
1917
+ ▁expos -10.8788
1918
+ ▁kreme -10.8788
1919
+ ▁mannequin -10.8788
1920
+ ▁plunger -10.8788
1921
+ ▁polka -10.8788
1922
+ ▁resembl -10.8788
1923
+ ▁savannah -10.8788
1924
+ ▁seafood -10.8788
1925
+ ▁service -10.8788
1926
+ ▁shaggy -10.8788
1927
+ ▁skyscraper -10.8788
1928
+ ▁spectators -10.8788
1929
+ ▁sunflower -10.8788
1930
+ ▁tablecloth -10.8788
1931
+ ▁wilderness -10.8788
1932
+ ▁zone -10.8788
1933
+ ▁elev -10.8788
1934
+ ▁hallway -10.8788
1935
+ ▁pepsi -10.8788
1936
+ ▁retriev -10.8788
1937
+ ▁sparse -10.8788
1938
+ ▁squat -10.8788
1939
+ ▁student -10.8788
1940
+ ▁cubicle -10.8788
1941
+ ▁freight -10.8788
1942
+ ▁bubbl -10.8788
1943
+ ▁duff -10.8788
1944
+ ▁skillet -10.8788
1945
+ ▁curve -10.8788
1946
+ ▁german -10.8788
1947
+ ▁frown -10.8788
1948
+ ▁mural -10.8788
1949
+ ▁mesh -10.8788
1950
+ ▁porch -10.8788
1951
+ ▁broad -10.8789
1952
+ ▁present -10.8789
1953
+ ▁toiletries -10.8789
1954
+ ▁mice -10.8789
1955
+ ▁fixtures -10.8789
1956
+ ▁stem -10.8789
1957
+ were -10.9497
1958
+ ▁grape -10.9678
1959
+ print -10.9815
1960
+ waiting -10.9816
1961
+ ▁chees -10.9844
1962
+ ▁glaze -10.9866
1963
+ ▁even -10.9884
1964
+ ploye -10.9899
1965
+ ▁audience -10.9899
1966
+ ▁bristles -10.9899
1967
+ ▁coleslaw -10.9899
1968
+ ▁commode -10.9899
1969
+ ▁consumption -10.9899
1970
+ ▁damage -10.9899
1971
+ ▁elegant -10.9899
1972
+ ▁entree -10.9899
1973
+ ▁environment -10.9899
1974
+ ▁inspect -10.9899
1975
+ ▁jungle -10.9899
1976
+ ▁radio -10.9899
1977
+ ▁receipt -10.9899
1978
+ ▁return -10.9899
1979
+ ▁scratch -10.9899
1980
+ ▁slow -10.9899
1981
+ ▁spaghetti -10.9899
1982
+ ▁surprised -10.9899
1983
+ ▁world -10.9899
1984
+ ▁describ -10.9899
1985
+ ▁pilot -10.9899
1986
+ ▁roost -10.9899
1987
+ ▁shoot -10.9899
1988
+ ▁smell -10.9899
1989
+ ▁wedge -10.9899
1990
+ ▁hawk -10.99
1991
+ ▁placemat -10.99
1992
+ ▁sauerk -10.99
1993
+ ▁peace -10.99
1994
+ ▁loaf -10.99
1995
+ ▁dairy -10.99
1996
+ ▁worm -10.99
1997
+ ▁carousel -10.99
1998
+ ▁apron -10.99
1999
+ ▁kick -10.99
2000
+ ▁begin -10.99
src/dataset/sub_tokenizing_captions.txt ADDED
The diff for this file is too large to render. See raw diff
 
src/dataset/train_sub_tokenizer.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import sentencepiece as spm
4
+ import yaml
5
+
6
+ # params
7
+ with open("/workspace/params.yaml", "r", encoding="utf-8") as f:
8
+ params = yaml.safe_load(f)
9
+
10
+
11
+ def train_sentencepiece(
12
+ json_path,
13
+ model_prefix="sub_tokenizer",
14
+ vocab_size=500,
15
+ model_type="unigram"
16
+ ):
17
+
18
+ with open(json_path, 'r') as f:
19
+ data = json.load(f)
20
+
21
+ txt_path = "/workspace/src/dataset/sub_tokenizing_captions.txt"
22
+
23
+ with open(txt_path, "w", encoding="utf-8") as f:
24
+ for item in data:
25
+ captions = item["captions"]
26
+
27
+ for caption in captions:
28
+ f.write(caption.lower() + "\n")
29
+
30
+ spm.SentencePieceTrainer.train(
31
+ input=txt_path,
32
+ model_prefix=model_prefix,
33
+ vocab_size=vocab_size,
34
+ model_type=model_type,
35
+
36
+ pad_piece="<pad>",
37
+ bos_piece="<sos>",
38
+ eos_piece="<eos>",
39
+ unk_piece="<unk>",
40
+
41
+ pad_id=0,
42
+ bos_id=1,
43
+ eos_id=2,
44
+ unk_id=3
45
+ )
46
+
47
+ print("tokenizer training done")
48
+
49
+ if __name__ == "__main__":
50
+ train_sentencepiece(
51
+ json_path="/workspace/data/captioning/annotations/train.json",
52
+ model_prefix="/workspace/src/dataset/sub_tokenizer2000",
53
+ vocab_size=params["captioning"]["tokenizer"]["sp_vocab_size"],
54
+ model_type="unigram"
55
+ )
src/debug/test_forward.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ sys.path.append("/workspace/src/models")
3
+ import torch
4
+
5
+
6
+ # model imports
7
+ from lstm import DecoderLSTM
8
+ from gru import DecoderGRU
9
+ from transformer import DecoderTransformer
10
+ # from transformer_scratch import DecoderTransformer
11
+
12
+ from resnet18 import EncoderResnet18
13
+ from efficientnet import EncoderEfficientNetB0
14
+ from convnext import EncoderConvNextTiny
15
+ from mobilenet import EncoderMobileNetV3Small
16
+ from vit import EncoderViTB16
17
+ from swin import EncoderSwinTiny
18
+ from deit import EncoderDeiTTiny
19
+
20
+
21
+
22
+ # device
23
+ device = torch.device(
24
+ "cuda" if torch.cuda.is_available() else "cpu"
25
+ )
26
+
27
+ print(f"device: {device}")
28
+
29
+
30
+
31
+ # caption model dummy input
32
+ feature = torch.randn(1, 512).to(device)
33
+ # feature = torch.randn(1, 49, 512).to(device)
34
+
35
+ caption = torch.tensor(
36
+ [[0, 1, 2, 3, 4]]
37
+ ).to(device)
38
+
39
+
40
+
41
+ ### LSTM Forward ###
42
+ lstm_model = DecoderLSTM().to(device)
43
+
44
+ lstm_out = lstm_model(
45
+ feature,
46
+ caption
47
+ )
48
+
49
+ print(f"LSTM: {lstm_out.shape}")
50
+
51
+
52
+
53
+ ### GRU Forward ###
54
+ gru_model = DecoderGRU().to(device)
55
+
56
+ gru_out = gru_model(
57
+ feature,
58
+ caption
59
+ )
60
+
61
+ print(f"GRU: {gru_out.shape}")
62
+
63
+
64
+
65
+ ### Transformer Forward ###
66
+ transformer_model = DecoderTransformer().to(device)
67
+
68
+ transformer_out, map, map = transformer_model(
69
+ caption,
70
+ feature,
71
+ 0
72
+ )
73
+
74
+ print(f"Transformer: {transformer_out.shape}")
75
+
76
+
77
+
78
+ ### ResNet18 Forward ###
79
+ NUM_CLASSES = 50
80
+
81
+ resnet18_model = EncoderResnet18(
82
+ num_classes=NUM_CLASSES
83
+ ).to(device)
84
+
85
+ dummy_images = torch.randn(
86
+ 8, 3, 224, 224
87
+ ).to(device)
88
+
89
+ logits, features = resnet18_model(
90
+ dummy_images
91
+ )
92
+
93
+ print(f"ResNet18 logits: {logits.shape}")
94
+ print(f"ResNet18 features: {features.shape}")
95
+
96
+
97
+
98
+ ### EfficientNet-B0 Forward ###
99
+ efficientnet_model = EncoderEfficientNetB0(
100
+ num_classes=NUM_CLASSES
101
+ ).to(device)
102
+
103
+ efficientnet_out = efficientnet_model(
104
+ dummy_images
105
+ )
106
+
107
+ print(
108
+ f"EfficientNet-B0: "
109
+ f"{efficientnet_out.shape}"
110
+ )
111
+
112
+ # expected:
113
+ # torch.Size([8, 50])
114
+
115
+
116
+
117
+ ### ConvNeXt-Tiny Forward ###
118
+ convnext_model = EncoderConvNextTiny(
119
+ num_classes=NUM_CLASSES
120
+ ).to(device)
121
+
122
+ convnext_out = convnext_model(
123
+ dummy_images
124
+ )
125
+
126
+ print(
127
+ f"ConvNeXt-Tiny: "
128
+ f"{convnext_out.shape}"
129
+ )
130
+
131
+ # expected:
132
+ # torch.Size([8, 50])
133
+
134
+
135
+
136
+ ### MobileNetV3 Small Forward ###
137
+ mobilenet_model = EncoderMobileNetV3Small(
138
+ num_classes=NUM_CLASSES
139
+ ).to(device)
140
+
141
+ mobilenet_out = mobilenet_model(
142
+ dummy_images
143
+ )
144
+
145
+ print(
146
+ f"MobileNetV3 Small: "
147
+ f"{mobilenet_out.shape}"
148
+ )
149
+
150
+ # expected:
151
+ # torch.Size([8, 50])
152
+
153
+
154
+
155
+ ### ViT-B/16 Forward ###
156
+ vit_model = EncoderViTB16(
157
+ num_classes=NUM_CLASSES
158
+ ).to(device)
159
+
160
+ vit_out = vit_model(
161
+ dummy_images
162
+ )
163
+
164
+ print(
165
+ f"ViT-B/16: "
166
+ f"{vit_out.shape}"
167
+ )
168
+
169
+ # expected:
170
+ # torch.Size([8, 50])
171
+
172
+
173
+
174
+ ### Swin-T Forward ###
175
+ swin_model = EncoderSwinTiny(
176
+ num_classes=NUM_CLASSES
177
+ ).to(device)
178
+
179
+ swin_out = swin_model(
180
+ dummy_images
181
+ )
182
+
183
+ print(
184
+ f"Swin-T: "
185
+ f"{swin_out.shape}"
186
+ )
187
+
188
+ # expected:
189
+ # torch.Size([8, 50])
190
+
191
+
192
+
193
+ ### DeiT-Tiny Forward ###
194
+ deit_model = EncoderDeiTTiny(
195
+ num_classes=NUM_CLASSES
196
+ ).to(device)
197
+
198
+ deit_out = deit_model(
199
+ dummy_images
200
+ )
201
+
202
+ print(
203
+ f"DeiT-Tiny: "
204
+ f"{deit_out.shape}"
205
+ )
206
+
207
+ # expected:
208
+ # torch.Size([8, 50])
src/engines/__pycache__/captioning_trainer.cpython-310.pyc ADDED
Binary file (765 Bytes). View file
 
src/engines/__pycache__/captioning_validator.cpython-310.pyc ADDED
Binary file (734 Bytes). View file
 
src/engines/__pycache__/classification_trainer.cpython-310.pyc ADDED
Binary file (1.07 kB). View file
 
src/engines/__pycache__/classification_validator.cpython-310.pyc ADDED
Binary file (972 Bytes). View file
 
src/engines/__pycache__/resnet18_decoder_trainer.cpython-310.pyc ADDED
Binary file (731 Bytes). View file
 
src/engines/__pycache__/resnet18_decoder_validator.cpython-310.pyc ADDED
Binary file (1.07 kB). View file
 
src/engines/captioning_trainer.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+
4
+ def train_one_epoch(
5
+ encoder,
6
+ decoder,
7
+ loader,
8
+ criterion,
9
+ optimizer,
10
+ device,
11
+ scheduler=None
12
+ ):
13
+
14
+ encoder.train()
15
+ decoder.train()
16
+
17
+ total_loss = 0
18
+ for images, captions in loader:
19
+ images = images.to(device)
20
+ captions = captions.to(device)
21
+
22
+ feature = encoder(images, return_features=True)
23
+
24
+ input_caption = captions[:, :-1]
25
+ target_caption = captions[:, 1:]
26
+
27
+ outputs = decoder(feature, input_caption)
28
+
29
+ loss = criterion(
30
+ outputs.reshape(-1, outputs.shape[-1]),
31
+ target_caption.reshape(-1)
32
+ )
33
+
34
+ if scheduler is not None:
35
+ scheduler.step()
36
+
37
+ optimizer.zero_grad()
38
+ loss.backward()
39
+ optimizer.step()
40
+
41
+ total_loss += loss.item()
42
+
43
+ return total_loss / len(loader)
src/engines/captioning_validator.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+
4
+ def validation_one_epoch(
5
+ encoder,
6
+ decoder,
7
+ loader,
8
+ criterion,
9
+ device,
10
+ ):
11
+
12
+ encoder.eval()
13
+ decoder.eval()
14
+
15
+ with torch.no_grad():
16
+ total_loss = 0
17
+ for images, captions, _, __ in loader:
18
+
19
+ images = images.to(device) # B, 3, 224, 224
20
+ captions = captions.to(device) # B, seq_len
21
+
22
+ feature = encoder(images, return_features=True) # B, 49, 512
23
+
24
+ input_caption = captions[:, :-1] # B, seq_len-1
25
+ target_caption = captions[:, 1:] # B, seq_len-1
26
+
27
+ outputs = decoder(feature, input_caption) # B, seq_len-1, voca_size
28
+
29
+ loss = criterion(
30
+ outputs.reshape(-1, outputs.shape[-1]), # B*(seq_len-1), voca_size
31
+ target_caption.reshape(-1) # B*seq_len-1
32
+ )
33
+
34
+ total_loss += loss.item()
35
+
36
+ return total_loss / len(loader)
src/engines/classification_trainer.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torchmetrics.classification import (
2
+ MulticlassAccuracy
3
+ )
4
+
5
+ from transforms.mixup import mixup_data
6
+ from transforms.cutmix import cutmix_data
7
+
8
+
9
+ def train_one_epoch(
10
+ model,
11
+ loader,
12
+ criterion,
13
+ optimizer,
14
+ device,
15
+ num_classes,
16
+ augmentation=None
17
+ ):
18
+
19
+ model.train()
20
+
21
+ metric = MulticlassAccuracy(
22
+ num_classes=num_classes
23
+ ).to(device)
24
+
25
+ total_loss = 0
26
+
27
+ for images, labels in loader:
28
+ images = images.to(device)
29
+ labels = labels.to(device)
30
+
31
+ if augmentation == "mixup":
32
+ images, labels_a, labels_b, lam = mixup_data(
33
+ images,
34
+ labels
35
+ )
36
+
37
+ elif augmentation == "cutmix":
38
+ images, labels_a, labels_b, lam = cutmix_data(
39
+ images,
40
+ labels
41
+ )
42
+
43
+ outputs = model(images)
44
+
45
+ if augmentation in ["mixup", "cutmix"]:
46
+ loss = (
47
+ lam * criterion(outputs, labels_a)
48
+ + (1 - lam) * criterion(outputs, labels_b)
49
+ )
50
+
51
+ else:
52
+ loss = criterion(
53
+ outputs,
54
+ labels
55
+ )
56
+
57
+ optimizer.zero_grad()
58
+ loss.backward()
59
+ optimizer.step()
60
+ total_loss += loss.item()
61
+ preds = outputs.argmax(dim=1)
62
+
63
+ metric.update(
64
+ preds,
65
+ labels
66
+ )
67
+
68
+ acc = metric.compute().item()
69
+
70
+ return total_loss / len(loader), acc
src/engines/classification_validator.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ from torchmetrics.classification import (
4
+ MulticlassAccuracy,
5
+ MulticlassF1Score,
6
+
7
+ # precision / recall
8
+ MulticlassPrecision,
9
+ MulticlassRecall
10
+ )
11
+
12
+
13
+ def validation_one_epoch(
14
+ model,
15
+ loader,
16
+ criterion,
17
+ device,
18
+ num_classes
19
+ ):
20
+
21
+ model.eval()
22
+
23
+ acc_metric = MulticlassAccuracy(
24
+ num_classes=num_classes
25
+ ).to(device)
26
+
27
+ f1_metric = MulticlassF1Score(
28
+ num_classes=num_classes,
29
+ average="macro"
30
+ ).to(device)
31
+
32
+ # precision metric
33
+ # precision_metric = MulticlassPrecision(
34
+ # num_classes=num_classes,
35
+ # average="macro"
36
+ # ).to(device)
37
+
38
+ # recall metric
39
+ # recall_metric = MulticlassRecall(
40
+ # num_classes=num_classes,
41
+ # average="macro"
42
+ # ).to(device)
43
+
44
+ total_loss = 0
45
+
46
+ with torch.no_grad():
47
+
48
+ for images, labels in loader:
49
+ images = images.to(device)
50
+ labels = labels.to(device)
51
+ outputs = model(images)
52
+ loss = criterion(
53
+ outputs,
54
+ labels
55
+ )
56
+ total_loss += loss.item()
57
+ preds = outputs.argmax(dim=1)
58
+
59
+ acc_metric.update(
60
+ preds,
61
+ labels
62
+ )
63
+
64
+ f1_metric.update(
65
+ preds,
66
+ labels
67
+ )
68
+
69
+ # precision_metric.update(
70
+ # preds,
71
+ # labels
72
+ # )
73
+
74
+ # recall_metric.update(
75
+ # preds,
76
+ # labels
77
+ # )
78
+
79
+ acc = acc_metric.compute().item()
80
+ f1 = f1_metric.compute().item()
81
+ # precision = precision_metric.compute().item()
82
+ # recall = recall_metric.compute().item()
83
+
84
+ return (
85
+ total_loss / len(loader),
86
+ acc,
87
+ f1,
88
+ # precision,
89
+ # recall
90
+ )
src/metrics/.ipynb_checkpoints/evaluate_caption-checkpoint.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from pycocoevalcap.bleu.bleu import Bleu
3
+ from pycocoevalcap.cider.cider import Cider
4
+
5
+ def evaluate_caption(
6
+ all_generated_sentence,
7
+ all_references
8
+ ):
9
+
10
+ references_dict = {i:list(sentences) for i, sentences in enumerate(all_references)}
11
+ generated_dict = {i:[sentence] for i, sentence in enumerate(all_generated_sentence)}
12
+
13
+ bleu_scorer = Bleu(4)
14
+ bleu_score, _ = bleu_scorer.compute_score(
15
+ references_dict,
16
+ generated_dict
17
+ )
18
+
19
+ cider_scorer = Cider()
20
+ cider_score, _ = cider_scorer.compute_score(
21
+ references_dict,
22
+ generated_dict
23
+ )
24
+
25
+ metric_result = {
26
+ "bleu1": bleu_score[0],
27
+ "bleu2": bleu_score[1],
28
+ "bleu3": bleu_score[2],
29
+ "bleu4": bleu_score[3],
30
+ "cider": cider_score,
31
+ "generated": generated_dict,
32
+ "references": references_dict
33
+ }
34
+
35
+ return metric_result
src/metrics/.ipynb_checkpoints/make_show_all_caption-checkpoint.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ###### best val loss 지점에서 모든 생성 캡션 출력 및 반환, heatmap 저장 #####
2
+ import torch
3
+ import random
4
+ from utils.checkpoint_manager import load_checkpoint
5
+
6
+ def make_show_all_caption(
7
+ loader,
8
+ encoder,
9
+ decoder,
10
+ optimizer,
11
+ w2i,
12
+ i2w,
13
+ best_path,
14
+ dec_atten_dir,
15
+ enc_dec_atten_dir,
16
+ SEED,
17
+ heatmap_sample,
18
+ layer,
19
+ device,
20
+ use_subword,
21
+ sp_model_path
22
+ ):
23
+
24
+ _, best_val_loss = load_checkpoint(
25
+ best_path,
26
+ encoder,
27
+ decoder,
28
+ optimizer,
29
+ device
30
+ )
31
+
32
+ all_references = []
33
+ all_generated_token = []
34
+ all_dec_atten = []
35
+ all_enc_dec_atten = []
36
+ all_images = []
37
+ all_file_name = []
38
+ for images, _, batch_references, file_name in loader:
39
+ images = images.to(device)
40
+
41
+ features = encoder(images, return_features=True)
42
+
43
+ generated_token, dec_atten, enc_dec_atten = decoder.generate(
44
+ features, # B, 49, 512
45
+ torch.full((features.size(0),), w2i["<sos>"], device=device), # B,
46
+ w2i["<eos>"],
47
+ )
48
+ all_dec_atten.extend(dec_atten) # all_B, layers, nhead, seq_len, seq_len
49
+ all_enc_dec_atten.extend(enc_dec_atten) # all_B, layers, nhead, seq_len, 49
50
+ all_images.extend(images.cpu())
51
+ all_references.extend(list(zip(*batch_references)))
52
+ all_generated_token.extend(generated_token) # all_B, seq_len-1
53
+ all_file_name.extend(file_name)
54
+
55
+
56
+ all_generated_sentence = []
57
+ for sentence_token in all_generated_token:
58
+ if w2i["<eos>"] in sentence_token:
59
+ end_inx = sentence_token.index(w2i["<eos>"])
60
+ sentence_token = sentence_token[:end_inx]
61
+
62
+ # ==================================
63
+ # SentencePiece tokenizer
64
+ # ==================================
65
+ if use_subword:
66
+ import sentencepiece as spm
67
+
68
+ sp = spm.SentencePieceProcessor()
69
+ sp.load(sp_model_path)
70
+ # special token 제거
71
+ sentence_token = [token for token in sentence_token
72
+ if token not in [
73
+ w2i["<pad>"],
74
+ w2i["<sos>"],
75
+ w2i["<eos>"]
76
+ ]
77
+ ]
78
+ sentence = sp.decode(sentence_token)
79
+
80
+ else:
81
+ words = [i2w[i] for i in sentence_token]
82
+ sentence = ' '.join(words)
83
+
84
+ all_generated_sentence.append(sentence) # all_B, 1(문장)
85
+
86
+
87
+ decoder.show_dec_atten(all_dec_atten[heatmap_sample], all_generated_sentence[heatmap_sample].split(), layer, dec_atten_dir)
88
+ decoder.show_cross_atten(all_enc_dec_atten[heatmap_sample], all_generated_sentence[heatmap_sample].split(), layer, all_images[heatmap_sample], enc_dec_atten_dir)
89
+
90
+ random.seed(SEED)
91
+ all_B = len(all_generated_sentence)
92
+ sample = random.sample(range(0, all_B), 5)
93
+ for i in sample:
94
+ print("-" * 60)
95
+ print(f' {all_file_name[i]}: {all_generated_sentence[i]}')
96
+ print("-" * 60)
97
+
98
+ for inx, reference in enumerate(all_references[i], start=1):
99
+ print(f'Reference {inx}: {reference}')
100
+ print("=" * 60)
101
+
102
+ print(f'Best Val Loss: {best_val_loss}')
103
+
104
+ return all_generated_sentence, all_references
105
+