This view is limited to 50 files because it contains too many changes.  See the raw diff here.
Files changed (50) hide show
  1. .gitattributes +0 -450
  2. README.md +2 -2
  3. app.py +10 -16
  4. requirements.txt +4 -7
  5. taming-transformers/License.txt +0 -19
  6. taming-transformers/README.md +0 -410
  7. taming-transformers/assets/birddrawnbyachild.png +0 -3
  8. taming-transformers/assets/coco_scene_images_training.svg +0 -2574
  9. taming-transformers/assets/drin.jpg +0 -3
  10. taming-transformers/assets/faceshq.jpg +0 -3
  11. taming-transformers/assets/first_stage_mushrooms.png +0 -3
  12. taming-transformers/assets/first_stage_squirrels.png +0 -3
  13. taming-transformers/assets/imagenet.png +0 -3
  14. taming-transformers/assets/lake_in_the_mountains.png +0 -3
  15. taming-transformers/assets/mountain.jpeg +0 -3
  16. taming-transformers/assets/scene_images_samples.svg +0 -0
  17. taming-transformers/assets/stormy.jpeg +0 -3
  18. taming-transformers/assets/sunset_and_ocean.jpg +0 -3
  19. taming-transformers/assets/teaser.png +0 -3
  20. taming-transformers/configs/coco_cond_stage.yaml +0 -49
  21. taming-transformers/configs/coco_scene_images_transformer.yaml +0 -80
  22. taming-transformers/configs/custom_vqgan.yaml +0 -43
  23. taming-transformers/configs/drin_transformer.yaml +0 -77
  24. taming-transformers/configs/faceshq_transformer.yaml +0 -61
  25. taming-transformers/configs/faceshq_vqgan.yaml +0 -42
  26. taming-transformers/configs/imagenet_vqgan.yaml +0 -42
  27. taming-transformers/configs/imagenetdepth_vqgan.yaml +0 -41
  28. taming-transformers/configs/open_images_scene_images_transformer.yaml +0 -86
  29. taming-transformers/configs/sflckr_cond_stage.yaml +0 -43
  30. taming-transformers/data/ade20k_examples.txt +0 -30
  31. taming-transformers/data/ade20k_images/ADE_val_00000123.jpg +0 -0
  32. taming-transformers/data/ade20k_images/ADE_val_00000125.jpg +0 -0
  33. taming-transformers/data/ade20k_images/ADE_val_00000126.jpg +0 -0
  34. taming-transformers/data/ade20k_images/ADE_val_00000203.jpg +0 -0
  35. taming-transformers/data/ade20k_images/ADE_val_00000262.jpg +0 -0
  36. taming-transformers/data/ade20k_images/ADE_val_00000287.jpg +0 -0
  37. taming-transformers/data/ade20k_images/ADE_val_00000289.jpg +0 -0
  38. taming-transformers/data/ade20k_images/ADE_val_00000303.jpg +0 -0
  39. taming-transformers/data/ade20k_images/ADE_val_00000509.jpg +0 -0
  40. taming-transformers/data/ade20k_images/ADE_val_00000532.jpg +0 -0
  41. taming-transformers/data/ade20k_images/ADE_val_00000573.jpg +0 -0
  42. taming-transformers/data/ade20k_images/ADE_val_00000603.jpg +0 -0
  43. taming-transformers/data/ade20k_images/ADE_val_00000636.jpg +0 -0
  44. taming-transformers/data/ade20k_images/ADE_val_00000734.jpg +0 -0
  45. taming-transformers/data/ade20k_images/ADE_val_00000875.jpg +0 -0
  46. taming-transformers/data/ade20k_images/ADE_val_00000880.jpg +0 -0
  47. taming-transformers/data/ade20k_images/ADE_val_00001177.jpg +0 -0
  48. taming-transformers/data/ade20k_images/ADE_val_00001200.jpg +0 -0
  49. taming-transformers/data/ade20k_images/ADE_val_00001209.jpg +0 -0
  50. taming-transformers/data/ade20k_images/ADE_val_00001388.jpg +0 -0
.gitattributes DELETED
@@ -1,450 +0,0 @@
1
- taming-transformers/assets/birddrawnbyachild.png filter=lfs diff=lfs merge=lfs -text
2
- taming-transformers/assets/drin.jpg filter=lfs diff=lfs merge=lfs -text
3
- taming-transformers/assets/faceshq.jpg filter=lfs diff=lfs merge=lfs -text
4
- taming-transformers/assets/first_stage_mushrooms.png filter=lfs diff=lfs merge=lfs -text
5
- taming-transformers/assets/first_stage_squirrels.png filter=lfs diff=lfs merge=lfs -text
6
- taming-transformers/assets/imagenet.png filter=lfs diff=lfs merge=lfs -text
7
- taming-transformers/assets/lake_in_the_mountains.png filter=lfs diff=lfs merge=lfs -text
8
- taming-transformers/assets/mountain.jpeg filter=lfs diff=lfs merge=lfs -text
9
- taming-transformers/assets/stormy.jpeg filter=lfs diff=lfs merge=lfs -text
10
- taming-transformers/assets/sunset_and_ocean.jpg filter=lfs diff=lfs merge=lfs -text
11
- taming-transformers/assets/teaser.png filter=lfs diff=lfs merge=lfs -text
12
- taming-transformers/data/coco_annotations_100/train2017/000000010005.jpg filter=lfs diff=lfs merge=lfs -text
13
- taming-transformers/data/coco_annotations_100/train2017/000000010014.jpg filter=lfs diff=lfs merge=lfs -text
14
- taming-transformers/data/coco_annotations_100/train2017/000000010015.jpg filter=lfs diff=lfs merge=lfs -text
15
- taming-transformers/data/coco_annotations_100/train2017/000000010023.jpg filter=lfs diff=lfs merge=lfs -text
16
- taming-transformers/data/coco_annotations_100/train2017/000000010024.jpg filter=lfs diff=lfs merge=lfs -text
17
- taming-transformers/data/coco_annotations_100/train2017/000000010037.jpg filter=lfs diff=lfs merge=lfs -text
18
- taming-transformers/data/coco_annotations_100/train2017/000000010039.jpg filter=lfs diff=lfs merge=lfs -text
19
- taming-transformers/data/coco_annotations_100/train2017/000000010040.jpg filter=lfs diff=lfs merge=lfs -text
20
- taming-transformers/data/coco_annotations_100/train2017/000000010041.jpg filter=lfs diff=lfs merge=lfs -text
21
- taming-transformers/data/coco_annotations_100/train2017/000000010046.jpg filter=lfs diff=lfs merge=lfs -text
22
- taming-transformers/data/coco_annotations_100/train2017/000000010056.jpg filter=lfs diff=lfs merge=lfs -text
23
- taming-transformers/data/coco_annotations_100/train2017/000000010058.jpg filter=lfs diff=lfs merge=lfs -text
24
- taming-transformers/data/coco_annotations_100/train2017/000000010069.jpg filter=lfs diff=lfs merge=lfs -text
25
- taming-transformers/data/coco_annotations_100/train2017/000000010073.jpg filter=lfs diff=lfs merge=lfs -text
26
- taming-transformers/data/coco_annotations_100/train2017/000000010077.jpg filter=lfs diff=lfs merge=lfs -text
27
- taming-transformers/data/coco_annotations_100/train2017/000000010082.jpg filter=lfs diff=lfs merge=lfs -text
28
- taming-transformers/data/coco_annotations_100/train2017/000000010083.jpg filter=lfs diff=lfs merge=lfs -text
29
- taming-transformers/data/coco_annotations_100/train2017/000000010084.jpg filter=lfs diff=lfs merge=lfs -text
30
- taming-transformers/data/coco_annotations_100/train2017/000000010094.jpg filter=lfs diff=lfs merge=lfs -text
31
- taming-transformers/data/coco_annotations_100/train2017/000000010097.jpg filter=lfs diff=lfs merge=lfs -text
32
- taming-transformers/data/coco_annotations_100/train2017/000000010104.jpg filter=lfs diff=lfs merge=lfs -text
33
- taming-transformers/data/coco_annotations_100/train2017/000000010114.jpg filter=lfs diff=lfs merge=lfs -text
34
- taming-transformers/data/coco_annotations_100/train2017/000000010115.jpg filter=lfs diff=lfs merge=lfs -text
35
- taming-transformers/data/coco_annotations_100/train2017/000000010123.jpg filter=lfs diff=lfs merge=lfs -text
36
- taming-transformers/data/coco_annotations_100/train2017/000000010125.jpg filter=lfs diff=lfs merge=lfs -text
37
- taming-transformers/data/coco_annotations_100/train2017/000000010130.jpg filter=lfs diff=lfs merge=lfs -text
38
- taming-transformers/data/coco_annotations_100/train2017/000000010136.jpg filter=lfs diff=lfs merge=lfs -text
39
- taming-transformers/data/coco_annotations_100/train2017/000000010138.jpg filter=lfs diff=lfs merge=lfs -text
40
- taming-transformers/data/coco_annotations_100/train2017/000000010142.jpg filter=lfs diff=lfs merge=lfs -text
41
- taming-transformers/data/coco_annotations_100/train2017/000000010145.jpg filter=lfs diff=lfs merge=lfs -text
42
- taming-transformers/data/coco_annotations_100/train2017/000000010149.jpg filter=lfs diff=lfs merge=lfs -text
43
- taming-transformers/data/coco_annotations_100/train2017/000000010161.jpg filter=lfs diff=lfs merge=lfs -text
44
- taming-transformers/data/coco_annotations_100/train2017/000000010166.jpg filter=lfs diff=lfs merge=lfs -text
45
- taming-transformers/data/coco_annotations_100/train2017/000000010175.jpg filter=lfs diff=lfs merge=lfs -text
46
- taming-transformers/data/coco_annotations_100/train2017/000000010176.jpg filter=lfs diff=lfs merge=lfs -text
47
- taming-transformers/data/coco_annotations_100/train2017/000000010179.jpg filter=lfs diff=lfs merge=lfs -text
48
- taming-transformers/data/coco_annotations_100/train2017/000000010192.jpg filter=lfs diff=lfs merge=lfs -text
49
- taming-transformers/data/coco_annotations_100/train2017/000000010196.jpg filter=lfs diff=lfs merge=lfs -text
50
- taming-transformers/data/coco_annotations_100/train2017/000000010211.jpg filter=lfs diff=lfs merge=lfs -text
51
- taming-transformers/data/coco_annotations_100/train2017/000000010216.jpg filter=lfs diff=lfs merge=lfs -text
52
- taming-transformers/data/coco_annotations_100/train2017/000000010217.jpg filter=lfs diff=lfs merge=lfs -text
53
- taming-transformers/data/coco_annotations_100/train2017/000000010219.jpg filter=lfs diff=lfs merge=lfs -text
54
- taming-transformers/data/coco_annotations_100/train2017/000000010229.jpg filter=lfs diff=lfs merge=lfs -text
55
- taming-transformers/data/coco_annotations_100/train2017/000000010230.jpg filter=lfs diff=lfs merge=lfs -text
56
- taming-transformers/data/coco_annotations_100/train2017/000000010232.jpg filter=lfs diff=lfs merge=lfs -text
57
- taming-transformers/data/coco_annotations_100/train2017/000000010239.jpg filter=lfs diff=lfs merge=lfs -text
58
- taming-transformers/data/coco_annotations_100/train2017/000000010241.jpg filter=lfs diff=lfs merge=lfs -text
59
- taming-transformers/data/coco_annotations_100/train2017/000000010244.jpg filter=lfs diff=lfs merge=lfs -text
60
- taming-transformers/data/coco_annotations_100/train2017/000000010245.jpg filter=lfs diff=lfs merge=lfs -text
61
- taming-transformers/data/coco_annotations_100/train2017/000000010248.jpg filter=lfs diff=lfs merge=lfs -text
62
- taming-transformers/data/coco_annotations_100/train2017/000000010249.jpg filter=lfs diff=lfs merge=lfs -text
63
- taming-transformers/data/coco_annotations_100/train2017/000000010256.jpg filter=lfs diff=lfs merge=lfs -text
64
- taming-transformers/data/coco_annotations_100/train2017/000000010263.jpg filter=lfs diff=lfs merge=lfs -text
65
- taming-transformers/data/coco_annotations_100/train2017/000000010275.jpg filter=lfs diff=lfs merge=lfs -text
66
- taming-transformers/data/coco_annotations_100/train2017/000000010276.jpg filter=lfs diff=lfs merge=lfs -text
67
- taming-transformers/data/coco_annotations_100/train2017/000000010281.jpg filter=lfs diff=lfs merge=lfs -text
68
- taming-transformers/data/coco_annotations_100/train2017/000000010290.jpg filter=lfs diff=lfs merge=lfs -text
69
- taming-transformers/data/coco_annotations_100/train2017/000000010303.jpg filter=lfs diff=lfs merge=lfs -text
70
- taming-transformers/data/coco_annotations_100/train2017/000000010318.jpg filter=lfs diff=lfs merge=lfs -text
71
- taming-transformers/data/coco_annotations_100/train2017/000000010319.jpg filter=lfs diff=lfs merge=lfs -text
72
- taming-transformers/data/coco_annotations_100/train2017/000000010321.jpg filter=lfs diff=lfs merge=lfs -text
73
- taming-transformers/data/coco_annotations_100/train2017/000000010324.jpg filter=lfs diff=lfs merge=lfs -text
74
- taming-transformers/data/coco_annotations_100/train2017/000000010327.jpg filter=lfs diff=lfs merge=lfs -text
75
- taming-transformers/data/coco_annotations_100/train2017/000000010337.jpg filter=lfs diff=lfs merge=lfs -text
76
- taming-transformers/data/coco_annotations_100/train2017/000000010342.jpg filter=lfs diff=lfs merge=lfs -text
77
- taming-transformers/data/coco_annotations_100/train2017/000000010343.jpg filter=lfs diff=lfs merge=lfs -text
78
- taming-transformers/data/coco_annotations_100/train2017/000000010346.jpg filter=lfs diff=lfs merge=lfs -text
79
- taming-transformers/data/coco_annotations_100/train2017/000000010358.jpg filter=lfs diff=lfs merge=lfs -text
80
- taming-transformers/data/coco_annotations_100/train2017/000000010369.jpg filter=lfs diff=lfs merge=lfs -text
81
- taming-transformers/data/coco_annotations_100/train2017/000000010386.jpg filter=lfs diff=lfs merge=lfs -text
82
- taming-transformers/data/coco_annotations_100/train2017/000000010393.jpg filter=lfs diff=lfs merge=lfs -text
83
- taming-transformers/data/coco_annotations_100/train2017/000000010395.jpg filter=lfs diff=lfs merge=lfs -text
84
- taming-transformers/data/coco_annotations_100/train2017/000000010400.jpg filter=lfs diff=lfs merge=lfs -text
85
- taming-transformers/data/coco_annotations_100/train2017/000000010403.jpg filter=lfs diff=lfs merge=lfs -text
86
- taming-transformers/data/coco_annotations_100/train2017/000000010405.jpg filter=lfs diff=lfs merge=lfs -text
87
- taming-transformers/data/coco_annotations_100/train2017/000000010407.jpg filter=lfs diff=lfs merge=lfs -text
88
- taming-transformers/data/coco_annotations_100/train2017/000000010414.jpg filter=lfs diff=lfs merge=lfs -text
89
- taming-transformers/data/coco_annotations_100/train2017/000000010420.jpg filter=lfs diff=lfs merge=lfs -text
90
- taming-transformers/data/coco_annotations_100/train2017/000000010421.jpg filter=lfs diff=lfs merge=lfs -text
91
- taming-transformers/data/coco_annotations_100/train2017/000000010428.jpg filter=lfs diff=lfs merge=lfs -text
92
- taming-transformers/data/coco_annotations_100/train2017/000000010430.jpg filter=lfs diff=lfs merge=lfs -text
93
- taming-transformers/data/coco_annotations_100/train2017/000000010432.jpg filter=lfs diff=lfs merge=lfs -text
94
- taming-transformers/data/coco_annotations_100/train2017/000000010434.jpg filter=lfs diff=lfs merge=lfs -text
95
- taming-transformers/data/coco_annotations_100/train2017/000000010442.jpg filter=lfs diff=lfs merge=lfs -text
96
- taming-transformers/data/coco_annotations_100/train2017/000000010445.jpg filter=lfs diff=lfs merge=lfs -text
97
- taming-transformers/data/coco_annotations_100/train2017/000000010449.jpg filter=lfs diff=lfs merge=lfs -text
98
- taming-transformers/data/coco_annotations_100/train2017/000000010463.jpg filter=lfs diff=lfs merge=lfs -text
99
- taming-transformers/data/coco_annotations_100/val2017/000000010092.jpg filter=lfs diff=lfs merge=lfs -text
100
- taming-transformers/data/coco_annotations_100/val2017/000000010583.jpg filter=lfs diff=lfs merge=lfs -text
101
- taming-transformers/data/coco_annotations_100/val2017/000000010707.jpg filter=lfs diff=lfs merge=lfs -text
102
- taming-transformers/data/coco_annotations_100/val2017/000000010764.jpg filter=lfs diff=lfs merge=lfs -text
103
- taming-transformers/data/coco_annotations_100/val2017/000000011122.jpg filter=lfs diff=lfs merge=lfs -text
104
- taming-transformers/data/coco_annotations_100/val2017/000000011149.jpg filter=lfs diff=lfs merge=lfs -text
105
- taming-transformers/data/coco_annotations_100/val2017/000000011197.jpg filter=lfs diff=lfs merge=lfs -text
106
- taming-transformers/data/coco_annotations_100/val2017/000000011511.jpg filter=lfs diff=lfs merge=lfs -text
107
- taming-transformers/data/coco_annotations_100/val2017/000000011615.jpg filter=lfs diff=lfs merge=lfs -text
108
- taming-transformers/data/coco_annotations_100/val2017/000000011699.jpg filter=lfs diff=lfs merge=lfs -text
109
- taming-transformers/data/coco_annotations_100/val2017/000000011760.jpg filter=lfs diff=lfs merge=lfs -text
110
- taming-transformers/data/coco_annotations_100/val2017/000000012062.jpg filter=lfs diff=lfs merge=lfs -text
111
- taming-transformers/data/coco_annotations_100/val2017/000000012120.jpg filter=lfs diff=lfs merge=lfs -text
112
- taming-transformers/data/coco_annotations_100/val2017/000000012280.jpg filter=lfs diff=lfs merge=lfs -text
113
- taming-transformers/data/coco_annotations_100/val2017/000000012576.jpg filter=lfs diff=lfs merge=lfs -text
114
- taming-transformers/data/coco_annotations_100/val2017/000000012639.jpg filter=lfs diff=lfs merge=lfs -text
115
- taming-transformers/data/coco_annotations_100/val2017/000000012670.jpg filter=lfs diff=lfs merge=lfs -text
116
- taming-transformers/data/coco_annotations_100/val2017/000000012748.jpg filter=lfs diff=lfs merge=lfs -text
117
- taming-transformers/data/coco_annotations_100/val2017/000000013004.jpg filter=lfs diff=lfs merge=lfs -text
118
- taming-transformers/data/coco_annotations_100/val2017/000000013177.jpg filter=lfs diff=lfs merge=lfs -text
119
- taming-transformers/data/coco_annotations_100/val2017/000000013201.jpg filter=lfs diff=lfs merge=lfs -text
120
- taming-transformers/data/coco_annotations_100/val2017/000000013291.jpg filter=lfs diff=lfs merge=lfs -text
121
- taming-transformers/data/coco_annotations_100/val2017/000000013348.jpg filter=lfs diff=lfs merge=lfs -text
122
- taming-transformers/data/coco_annotations_100/val2017/000000013546.jpg filter=lfs diff=lfs merge=lfs -text
123
- taming-transformers/data/coco_annotations_100/val2017/000000013659.jpg filter=lfs diff=lfs merge=lfs -text
124
- taming-transformers/data/coco_annotations_100/val2017/000000013729.jpg filter=lfs diff=lfs merge=lfs -text
125
- taming-transformers/data/coco_annotations_100/val2017/000000013774.jpg filter=lfs diff=lfs merge=lfs -text
126
- taming-transformers/data/coco_annotations_100/val2017/000000013923.jpg filter=lfs diff=lfs merge=lfs -text
127
- taming-transformers/data/coco_annotations_100/val2017/000000014007.jpg filter=lfs diff=lfs merge=lfs -text
128
- taming-transformers/data/coco_annotations_100/val2017/000000014038.jpg filter=lfs diff=lfs merge=lfs -text
129
- taming-transformers/data/coco_annotations_100/val2017/000000014226.jpg filter=lfs diff=lfs merge=lfs -text
130
- taming-transformers/data/coco_annotations_100/val2017/000000014380.jpg filter=lfs diff=lfs merge=lfs -text
131
- taming-transformers/data/coco_annotations_100/val2017/000000014439.jpg filter=lfs diff=lfs merge=lfs -text
132
- taming-transformers/data/coco_annotations_100/val2017/000000014473.jpg filter=lfs diff=lfs merge=lfs -text
133
- taming-transformers/data/coco_annotations_100/val2017/000000014831.jpg filter=lfs diff=lfs merge=lfs -text
134
- taming-transformers/data/coco_annotations_100/val2017/000000014888.jpg filter=lfs diff=lfs merge=lfs -text
135
- taming-transformers/data/coco_annotations_100/val2017/000000015079.jpg filter=lfs diff=lfs merge=lfs -text
136
- taming-transformers/data/coco_annotations_100/val2017/000000015254.jpg filter=lfs diff=lfs merge=lfs -text
137
- taming-transformers/data/coco_annotations_100/val2017/000000015272.jpg filter=lfs diff=lfs merge=lfs -text
138
- taming-transformers/data/coco_annotations_100/val2017/000000015278.jpg filter=lfs diff=lfs merge=lfs -text
139
- taming-transformers/data/coco_annotations_100/val2017/000000015335.jpg filter=lfs diff=lfs merge=lfs -text
140
- taming-transformers/data/coco_annotations_100/val2017/000000015338.jpg filter=lfs diff=lfs merge=lfs -text
141
- taming-transformers/data/coco_annotations_100/val2017/000000015440.jpg filter=lfs diff=lfs merge=lfs -text
142
- taming-transformers/data/coco_annotations_100/val2017/000000015517.jpg filter=lfs diff=lfs merge=lfs -text
143
- taming-transformers/data/coco_annotations_100/val2017/000000015597.jpg filter=lfs diff=lfs merge=lfs -text
144
- taming-transformers/data/coco_annotations_100/val2017/000000015660.jpg filter=lfs diff=lfs merge=lfs -text
145
- taming-transformers/data/coco_annotations_100/val2017/000000015746.jpg filter=lfs diff=lfs merge=lfs -text
146
- taming-transformers/data/coco_annotations_100/val2017/000000015751.jpg filter=lfs diff=lfs merge=lfs -text
147
- taming-transformers/data/coco_annotations_100/val2017/000000015956.jpg filter=lfs diff=lfs merge=lfs -text
148
- taming-transformers/data/coco_annotations_100/val2017/000000016010.jpg filter=lfs diff=lfs merge=lfs -text
149
- taming-transformers/data/coco_annotations_100/val2017/000000016228.jpg filter=lfs diff=lfs merge=lfs -text
150
- taming-transformers/data/coco_annotations_100/val2017/000000016249.jpg filter=lfs diff=lfs merge=lfs -text
151
- taming-transformers/data/coco_annotations_100/val2017/000000016439.jpg filter=lfs diff=lfs merge=lfs -text
152
- taming-transformers/data/coco_annotations_100/val2017/000000016451.jpg filter=lfs diff=lfs merge=lfs -text
153
- taming-transformers/data/coco_annotations_100/val2017/000000016598.jpg filter=lfs diff=lfs merge=lfs -text
154
- taming-transformers/data/coco_annotations_100/val2017/000000016958.jpg filter=lfs diff=lfs merge=lfs -text
155
- taming-transformers/data/coco_annotations_100/val2017/000000017029.jpg filter=lfs diff=lfs merge=lfs -text
156
- taming-transformers/data/coco_annotations_100/val2017/000000017031.jpg filter=lfs diff=lfs merge=lfs -text
157
- taming-transformers/data/coco_annotations_100/val2017/000000017115.jpg filter=lfs diff=lfs merge=lfs -text
158
- taming-transformers/data/coco_annotations_100/val2017/000000017178.jpg filter=lfs diff=lfs merge=lfs -text
159
- taming-transformers/data/coco_annotations_100/val2017/000000017182.jpg filter=lfs diff=lfs merge=lfs -text
160
- taming-transformers/data/coco_annotations_100/val2017/000000017207.jpg filter=lfs diff=lfs merge=lfs -text
161
- taming-transformers/data/coco_annotations_100/val2017/000000017379.jpg filter=lfs diff=lfs merge=lfs -text
162
- taming-transformers/data/coco_annotations_100/val2017/000000017436.jpg filter=lfs diff=lfs merge=lfs -text
163
- taming-transformers/data/coco_annotations_100/val2017/000000017627.jpg filter=lfs diff=lfs merge=lfs -text
164
- taming-transformers/data/coco_annotations_100/val2017/000000017714.jpg filter=lfs diff=lfs merge=lfs -text
165
- taming-transformers/data/coco_annotations_100/val2017/000000017899.jpg filter=lfs diff=lfs merge=lfs -text
166
- taming-transformers/data/coco_annotations_100/val2017/000000017905.jpg filter=lfs diff=lfs merge=lfs -text
167
- taming-transformers/data/coco_annotations_100/val2017/000000017959.jpg filter=lfs diff=lfs merge=lfs -text
168
- taming-transformers/data/coco_annotations_100/val2017/000000018150.jpg filter=lfs diff=lfs merge=lfs -text
169
- taming-transformers/data/coco_annotations_100/val2017/000000018193.jpg filter=lfs diff=lfs merge=lfs -text
170
- taming-transformers/data/coco_annotations_100/val2017/000000018380.jpg filter=lfs diff=lfs merge=lfs -text
171
- taming-transformers/data/coco_annotations_100/val2017/000000018491.jpg filter=lfs diff=lfs merge=lfs -text
172
- taming-transformers/data/coco_annotations_100/val2017/000000018519.jpg filter=lfs diff=lfs merge=lfs -text
173
- taming-transformers/data/coco_annotations_100/val2017/000000018575.jpg filter=lfs diff=lfs merge=lfs -text
174
- taming-transformers/data/coco_annotations_100/val2017/000000018737.jpg filter=lfs diff=lfs merge=lfs -text
175
- taming-transformers/data/coco_annotations_100/val2017/000000018837.jpg filter=lfs diff=lfs merge=lfs -text
176
- taming-transformers/data/coco_annotations_100/val2017/000000019042.jpg filter=lfs diff=lfs merge=lfs -text
177
- taming-transformers/data/coco_annotations_100/val2017/000000019109.jpg filter=lfs diff=lfs merge=lfs -text
178
- taming-transformers/data/coco_annotations_100/val2017/000000019221.jpg filter=lfs diff=lfs merge=lfs -text
179
- taming-transformers/data/coco_annotations_100/val2017/000000019402.jpg filter=lfs diff=lfs merge=lfs -text
180
- taming-transformers/data/coco_annotations_100/val2017/000000019432.jpg filter=lfs diff=lfs merge=lfs -text
181
- taming-transformers/data/coco_annotations_100/val2017/000000019924.jpg filter=lfs diff=lfs merge=lfs -text
182
- taming-transformers/data/coco_annotations_100/val2017/000000020059.jpg filter=lfs diff=lfs merge=lfs -text
183
- taming-transformers/data/coco_annotations_100/val2017/000000020107.jpg filter=lfs diff=lfs merge=lfs -text
184
- taming-transformers/data/coco_annotations_100/val2017/000000020247.jpg filter=lfs diff=lfs merge=lfs -text
185
- taming-transformers/data/coco_annotations_100/val2017/000000020333.jpg filter=lfs diff=lfs merge=lfs -text
186
- taming-transformers/data/coco_images/000000018380.jpg filter=lfs diff=lfs merge=lfs -text
187
- taming-transformers/data/coco_images/000000052507.jpg filter=lfs diff=lfs merge=lfs -text
188
- taming-transformers/data/coco_images/000000057672.jpg filter=lfs diff=lfs merge=lfs -text
189
- taming-transformers/data/coco_images/000000064898.jpg filter=lfs diff=lfs merge=lfs -text
190
- taming-transformers/data/coco_images/000000110638.jpg filter=lfs diff=lfs merge=lfs -text
191
- taming-transformers/data/coco_images/000000119445.jpg filter=lfs diff=lfs merge=lfs -text
192
- taming-transformers/data/coco_images/000000128658.jpg filter=lfs diff=lfs merge=lfs -text
193
- taming-transformers/data/coco_images/000000154358.jpg filter=lfs diff=lfs merge=lfs -text
194
- taming-transformers/data/coco_images/000000166259.jpg filter=lfs diff=lfs merge=lfs -text
195
- taming-transformers/data/coco_images/000000166563.jpg filter=lfs diff=lfs merge=lfs -text
196
- taming-transformers/data/coco_images/000000185599.jpg filter=lfs diff=lfs merge=lfs -text
197
- taming-transformers/data/coco_images/000000205834.jpg filter=lfs diff=lfs merge=lfs -text
198
- taming-transformers/data/coco_images/000000231169.jpg filter=lfs diff=lfs merge=lfs -text
199
- taming-transformers/data/coco_images/000000237928.jpg filter=lfs diff=lfs merge=lfs -text
200
- taming-transformers/data/coco_images/000000255824.jpg filter=lfs diff=lfs merge=lfs -text
201
- taming-transformers/data/coco_images/000000256775.jpg filter=lfs diff=lfs merge=lfs -text
202
- taming-transformers/data/coco_images/000000303653.jpg filter=lfs diff=lfs merge=lfs -text
203
- taming-transformers/data/coco_images/000000323895.jpg filter=lfs diff=lfs merge=lfs -text
204
- taming-transformers/data/coco_images/000000335529.jpg filter=lfs diff=lfs merge=lfs -text
205
- taming-transformers/data/coco_images/000000348045.jpg filter=lfs diff=lfs merge=lfs -text
206
- taming-transformers/data/coco_images/000000348481.jpg filter=lfs diff=lfs merge=lfs -text
207
- taming-transformers/data/coco_images/000000356347.jpg filter=lfs diff=lfs merge=lfs -text
208
- taming-transformers/data/coco_images/000000361180.jpg filter=lfs diff=lfs merge=lfs -text
209
- taming-transformers/data/coco_images/000000406997.jpg filter=lfs diff=lfs merge=lfs -text
210
- taming-transformers/data/coco_images/000000491464.jpg filter=lfs diff=lfs merge=lfs -text
211
- taming-transformers/data/coco_images/000000517069.jpg filter=lfs diff=lfs merge=lfs -text
212
- taming-transformers/data/coco_images/000000522393.jpg filter=lfs diff=lfs merge=lfs -text
213
- taming-transformers/data/coco_images/000000569273.jpg filter=lfs diff=lfs merge=lfs -text
214
- taming-transformers/data/drin_depth/n01795545/ILSVRC2012_val_00023344.png filter=lfs diff=lfs merge=lfs -text
215
- taming-transformers/data/drin_depth/n01819313/ILSVRC2012_val_00003068.png filter=lfs diff=lfs merge=lfs -text
216
- taming-transformers/data/drin_depth/n01820546/ILSVRC2012_val_00034784.png filter=lfs diff=lfs merge=lfs -text
217
- taming-transformers/data/drin_depth/n01820546/ILSVRC2012_val_00047491.png filter=lfs diff=lfs merge=lfs -text
218
- taming-transformers/data/drin_depth/n01828970/ILSVRC2012_val_00001336.png filter=lfs diff=lfs merge=lfs -text
219
- taming-transformers/data/drin_depth/n01828970/ILSVRC2012_val_00008236.png filter=lfs diff=lfs merge=lfs -text
220
- taming-transformers/data/drin_depth/n01828970/ILSVRC2012_val_00046802.png filter=lfs diff=lfs merge=lfs -text
221
- taming-transformers/data/drin_depth/n01843065/ILSVRC2012_val_00022439.png filter=lfs diff=lfs merge=lfs -text
222
- taming-transformers/data/drin_depth/n01847000/ILSVRC2012_val_00022364.png filter=lfs diff=lfs merge=lfs -text
223
- taming-transformers/data/drin_depth/n02085782/ILSVRC2012_val_00012298.png filter=lfs diff=lfs merge=lfs -text
224
- taming-transformers/data/drin_depth/n02086646/ILSVRC2012_val_00011473.png filter=lfs diff=lfs merge=lfs -text
225
- taming-transformers/data/drin_depth/n02088466/ILSVRC2012_val_00013651.png filter=lfs diff=lfs merge=lfs -text
226
- taming-transformers/data/drin_depth/n02089973/ILSVRC2012_val_00000028.png filter=lfs diff=lfs merge=lfs -text
227
- taming-transformers/data/drin_depth/n02093256/ILSVRC2012_val_00046547.png filter=lfs diff=lfs merge=lfs -text
228
- taming-transformers/data/drin_depth/n02096294/ILSVRC2012_val_00042133.png filter=lfs diff=lfs merge=lfs -text
229
- taming-transformers/data/drin_depth/n02099601/ILSVRC2012_val_00005697.png filter=lfs diff=lfs merge=lfs -text
230
- taming-transformers/data/drin_depth/n02099712/ILSVRC2012_val_00023471.png filter=lfs diff=lfs merge=lfs -text
231
- taming-transformers/data/drin_depth/n02100877/ILSVRC2012_val_00039863.png filter=lfs diff=lfs merge=lfs -text
232
- taming-transformers/data/drin_depth/n02101006/ILSVRC2012_val_00032333.png filter=lfs diff=lfs merge=lfs -text
233
- taming-transformers/data/drin_depth/n02101006/ILSVRC2012_val_00047325.png filter=lfs diff=lfs merge=lfs -text
234
- taming-transformers/data/drin_depth/n02101556/ILSVRC2012_val_00030540.png filter=lfs diff=lfs merge=lfs -text
235
- taming-transformers/data/drin_depth/n02102318/ILSVRC2012_val_00024691.png filter=lfs diff=lfs merge=lfs -text
236
- taming-transformers/data/drin_depth/n02105505/ILSVRC2012_val_00031252.png filter=lfs diff=lfs merge=lfs -text
237
- taming-transformers/data/drin_depth/n02110627/ILSVRC2012_val_00008310.png filter=lfs diff=lfs merge=lfs -text
238
- taming-transformers/data/drin_depth/n02111889/ILSVRC2012_val_00042625.png filter=lfs diff=lfs merge=lfs -text
239
- taming-transformers/data/drin_images/n01795545/ILSVRC2012_val_00023344.JPEG filter=lfs diff=lfs merge=lfs -text
240
- taming-transformers/data/drin_images/n01819313/ILSVRC2012_val_00003068.JPEG filter=lfs diff=lfs merge=lfs -text
241
- taming-transformers/data/drin_images/n01820546/ILSVRC2012_val_00034784.JPEG filter=lfs diff=lfs merge=lfs -text
242
- taming-transformers/data/drin_images/n01828970/ILSVRC2012_val_00001336.JPEG filter=lfs diff=lfs merge=lfs -text
243
- taming-transformers/data/drin_images/n01828970/ILSVRC2012_val_00008236.JPEG filter=lfs diff=lfs merge=lfs -text
244
- taming-transformers/data/drin_images/n01828970/ILSVRC2012_val_00046802.JPEG filter=lfs diff=lfs merge=lfs -text
245
- taming-transformers/data/drin_images/n01843065/ILSVRC2012_val_00022439.JPEG filter=lfs diff=lfs merge=lfs -text
246
- taming-transformers/data/drin_images/n01847000/ILSVRC2012_val_00022364.JPEG filter=lfs diff=lfs merge=lfs -text
247
- taming-transformers/data/drin_images/n02086646/ILSVRC2012_val_00011473.JPEG filter=lfs diff=lfs merge=lfs -text
248
- taming-transformers/data/drin_images/n02089973/ILSVRC2012_val_00000028.JPEG filter=lfs diff=lfs merge=lfs -text
249
- taming-transformers/data/drin_images/n02096294/ILSVRC2012_val_00042133.JPEG filter=lfs diff=lfs merge=lfs -text
250
- taming-transformers/data/drin_images/n02099601/ILSVRC2012_val_00005697.JPEG filter=lfs diff=lfs merge=lfs -text
251
- taming-transformers/data/drin_images/n02100877/ILSVRC2012_val_00039863.JPEG filter=lfs diff=lfs merge=lfs -text
252
- taming-transformers/data/drin_images/n02101006/ILSVRC2012_val_00032333.JPEG filter=lfs diff=lfs merge=lfs -text
253
- taming-transformers/data/drin_images/n02101006/ILSVRC2012_val_00047325.JPEG filter=lfs diff=lfs merge=lfs -text
254
- taming-transformers/data/drin_images/n02101556/ILSVRC2012_val_00030540.JPEG filter=lfs diff=lfs merge=lfs -text
255
- taming-transformers/data/drin_images/n02102318/ILSVRC2012_val_00024691.JPEG filter=lfs diff=lfs merge=lfs -text
256
- taming-transformers/data/drin_images/n02110627/ILSVRC2012_val_00008310.JPEG filter=lfs diff=lfs merge=lfs -text
257
- taming-transformers/data/open_images_annotations_100/train/000ab31e6be35fed.jpg filter=lfs diff=lfs merge=lfs -text
258
- taming-transformers/data/open_images_annotations_100/train/000ab7bec71cc50a.jpg filter=lfs diff=lfs merge=lfs -text
259
- taming-transformers/data/open_images_annotations_100/train/000ab8c20b3e5b58.jpg filter=lfs diff=lfs merge=lfs -text
260
- taming-transformers/data/open_images_annotations_100/train/000abc075d659122.jpg filter=lfs diff=lfs merge=lfs -text
261
- taming-transformers/data/open_images_annotations_100/train/000abe5eddc5b303.jpg filter=lfs diff=lfs merge=lfs -text
262
- taming-transformers/data/open_images_annotations_100/train/000ac34008b0ba4c.jpg filter=lfs diff=lfs merge=lfs -text
263
- taming-transformers/data/open_images_annotations_100/train/000ac8c676b6077a.jpg filter=lfs diff=lfs merge=lfs -text
264
- taming-transformers/data/open_images_annotations_100/train/000ac95750ac7399.jpg filter=lfs diff=lfs merge=lfs -text
265
- taming-transformers/data/open_images_annotations_100/train/000acf666d991c39.jpg filter=lfs diff=lfs merge=lfs -text
266
- taming-transformers/data/open_images_annotations_100/train/000ad0ecfb21ee63.jpg filter=lfs diff=lfs merge=lfs -text
267
- taming-transformers/data/open_images_annotations_100/train/000ad20b5e452b24.jpg filter=lfs diff=lfs merge=lfs -text
268
- taming-transformers/data/open_images_annotations_100/train/000ad3d42653f5f6.jpg filter=lfs diff=lfs merge=lfs -text
269
- taming-transformers/data/open_images_annotations_100/train/000ad6c520be9ec5.jpg filter=lfs diff=lfs merge=lfs -text
270
- taming-transformers/data/open_images_annotations_100/train/000ad6fa67b5ad96.jpg filter=lfs diff=lfs merge=lfs -text
271
- taming-transformers/data/open_images_annotations_100/train/000adcdd7244ce4a.jpg filter=lfs diff=lfs merge=lfs -text
272
- taming-transformers/data/open_images_annotations_100/train/000adef7197e3118.jpg filter=lfs diff=lfs merge=lfs -text
273
- taming-transformers/data/open_images_annotations_100/train/000adfe5b817011c.jpg filter=lfs diff=lfs merge=lfs -text
274
- taming-transformers/data/open_images_annotations_100/train/000ae235808cc1e8.jpg filter=lfs diff=lfs merge=lfs -text
275
- taming-transformers/data/open_images_annotations_100/train/000ae28755d2d20e.jpg filter=lfs diff=lfs merge=lfs -text
276
- taming-transformers/data/open_images_annotations_100/train/000aecd78b230135.jpg filter=lfs diff=lfs merge=lfs -text
277
- taming-transformers/data/open_images_annotations_100/train/000aee0af66d4237.jpg filter=lfs diff=lfs merge=lfs -text
278
- taming-transformers/data/open_images_annotations_100/train/000af631fb329557.jpg filter=lfs diff=lfs merge=lfs -text
279
- taming-transformers/data/open_images_annotations_100/train/000b06c0eed42a4c.jpg filter=lfs diff=lfs merge=lfs -text
280
- taming-transformers/data/open_images_annotations_100/train/000b093da01e5bfe.jpg filter=lfs diff=lfs merge=lfs -text
281
- taming-transformers/data/open_images_annotations_100/train/000b09d5d3fc821f.jpg filter=lfs diff=lfs merge=lfs -text
282
- taming-transformers/data/open_images_annotations_100/train/000b0f5159f54105.jpg filter=lfs diff=lfs merge=lfs -text
283
- taming-transformers/data/open_images_annotations_100/train/000b168e791f591d.jpg filter=lfs diff=lfs merge=lfs -text
284
- taming-transformers/data/open_images_annotations_100/train/000b1971d8daaeef.jpg filter=lfs diff=lfs merge=lfs -text
285
- taming-transformers/data/open_images_annotations_100/train/000b1b3b85edd850.jpg filter=lfs diff=lfs merge=lfs -text
286
- taming-transformers/data/open_images_annotations_100/train/000b1b92f0800e94.jpg filter=lfs diff=lfs merge=lfs -text
287
- taming-transformers/data/open_images_annotations_100/train/000b260e1f08a32a.jpg filter=lfs diff=lfs merge=lfs -text
288
- taming-transformers/data/open_images_annotations_100/train/000b29496f75c8e5.jpg filter=lfs diff=lfs merge=lfs -text
289
- taming-transformers/data/open_images_annotations_100/train/000b299b5f5ed902.jpg filter=lfs diff=lfs merge=lfs -text
290
- taming-transformers/data/open_images_annotations_100/train/000b2b00065e564a.jpg filter=lfs diff=lfs merge=lfs -text
291
- taming-transformers/data/open_images_annotations_100/train/000b2d1789d5f80d.jpg filter=lfs diff=lfs merge=lfs -text
292
- taming-transformers/data/open_images_annotations_100/train/000b38d9f2f664fe.jpg filter=lfs diff=lfs merge=lfs -text
293
- taming-transformers/data/open_images_annotations_100/train/000b393437134262.jpg filter=lfs diff=lfs merge=lfs -text
294
- taming-transformers/data/open_images_annotations_100/train/000b3940e7d25c03.jpg filter=lfs diff=lfs merge=lfs -text
295
- taming-transformers/data/open_images_annotations_100/train/000b397382b2464a.jpg filter=lfs diff=lfs merge=lfs -text
296
- taming-transformers/data/open_images_annotations_100/train/000b42cae15622e0.jpg filter=lfs diff=lfs merge=lfs -text
297
- taming-transformers/data/open_images_annotations_100/train/000b432ae644b679.jpg filter=lfs diff=lfs merge=lfs -text
298
- taming-transformers/data/open_images_annotations_100/train/000b485cedacbf97.jpg filter=lfs diff=lfs merge=lfs -text
299
- taming-transformers/data/open_images_annotations_100/train/000b4935979bf4b5.jpg filter=lfs diff=lfs merge=lfs -text
300
- taming-transformers/data/open_images_annotations_100/train/000b4fcdf1af3361.jpg filter=lfs diff=lfs merge=lfs -text
301
- taming-transformers/data/open_images_annotations_100/train/000b50bdd1933a36.jpg filter=lfs diff=lfs merge=lfs -text
302
- taming-transformers/data/open_images_annotations_100/train/000b55559b0244d7.jpg filter=lfs diff=lfs merge=lfs -text
303
- taming-transformers/data/open_images_annotations_100/train/000b55e339f0b131.jpg filter=lfs diff=lfs merge=lfs -text
304
- taming-transformers/data/open_images_annotations_100/train/000b567c26dd4e5d.jpg filter=lfs diff=lfs merge=lfs -text
305
- taming-transformers/data/open_images_annotations_100/train/000b59a7822679e6.jpg filter=lfs diff=lfs merge=lfs -text
306
- taming-transformers/data/open_images_annotations_100/train/000b5bc07c0c5df7.jpg filter=lfs diff=lfs merge=lfs -text
307
- taming-transformers/data/open_images_annotations_100/train/000b606e130bdf5e.jpg filter=lfs diff=lfs merge=lfs -text
308
- taming-transformers/data/open_images_annotations_100/train/000b63a1445f53c8.jpg filter=lfs diff=lfs merge=lfs -text
309
- taming-transformers/data/open_images_annotations_100/train/000b65a36ad46f9e.jpg filter=lfs diff=lfs merge=lfs -text
310
- taming-transformers/data/open_images_annotations_100/train/000b70a84aab664b.jpg filter=lfs diff=lfs merge=lfs -text
311
- taming-transformers/data/open_images_annotations_100/train/000b72e1446f8849.jpg filter=lfs diff=lfs merge=lfs -text
312
- taming-transformers/data/open_images_annotations_100/train/000b76a9b80ba43a.jpg filter=lfs diff=lfs merge=lfs -text
313
- taming-transformers/data/open_images_annotations_100/train/000b7dfaa1810a83.jpg filter=lfs diff=lfs merge=lfs -text
314
- taming-transformers/data/open_images_annotations_100/train/000b81b5757963e0.jpg filter=lfs diff=lfs merge=lfs -text
315
- taming-transformers/data/open_images_annotations_100/train/000b825dea3016eb.jpg filter=lfs diff=lfs merge=lfs -text
316
- taming-transformers/data/open_images_annotations_100/train/000b87119cc301cf.jpg filter=lfs diff=lfs merge=lfs -text
317
- taming-transformers/data/open_images_annotations_100/train/000b8d80f7386698.jpg filter=lfs diff=lfs merge=lfs -text
318
- taming-transformers/data/open_images_annotations_100/train/000b9007a01f7405.jpg filter=lfs diff=lfs merge=lfs -text
319
- taming-transformers/data/open_images_annotations_100/train/000b93644609911f.jpg filter=lfs diff=lfs merge=lfs -text
320
- taming-transformers/data/open_images_annotations_100/train/000b9814a07fd974.jpg filter=lfs diff=lfs merge=lfs -text
321
- taming-transformers/data/open_images_annotations_100/train/000b9a97776b3634.jpg filter=lfs diff=lfs merge=lfs -text
322
- taming-transformers/data/open_images_annotations_100/train/000b9b00d7aef8f5.jpg filter=lfs diff=lfs merge=lfs -text
323
- taming-transformers/data/open_images_annotations_100/train/000b9b61afea2cd4.jpg filter=lfs diff=lfs merge=lfs -text
324
- taming-transformers/data/open_images_annotations_100/train/000b9c365c9e307a.jpg filter=lfs diff=lfs merge=lfs -text
325
- taming-transformers/data/open_images_annotations_100/train/000b9d6c0f7d794d.jpg filter=lfs diff=lfs merge=lfs -text
326
- taming-transformers/data/open_images_annotations_100/train/000b9f3ba4891c11.jpg filter=lfs diff=lfs merge=lfs -text
327
- taming-transformers/data/open_images_annotations_100/train/000ba221f70676c6.jpg filter=lfs diff=lfs merge=lfs -text
328
- taming-transformers/data/open_images_annotations_100/train/000ba28d70b1a999.jpg filter=lfs diff=lfs merge=lfs -text
329
- taming-transformers/data/open_images_annotations_100/train/000ba3ca8a2ca955.jpg filter=lfs diff=lfs merge=lfs -text
330
- taming-transformers/data/open_images_annotations_100/train/000ba40bf7a2b458.jpg filter=lfs diff=lfs merge=lfs -text
331
- taming-transformers/data/open_images_annotations_100/train/000baa6f7dae9b79.jpg filter=lfs diff=lfs merge=lfs -text
332
- taming-transformers/data/open_images_annotations_100/train/000bab5b1a67844e.jpg filter=lfs diff=lfs merge=lfs -text
333
- taming-transformers/data/open_images_annotations_100/train/000bb0ae453283b0.jpg filter=lfs diff=lfs merge=lfs -text
334
- taming-transformers/data/open_images_annotations_100/train/000bb81adefe7332.jpg filter=lfs diff=lfs merge=lfs -text
335
- taming-transformers/data/open_images_annotations_100/train/000bb8bd9b1bca65.jpg filter=lfs diff=lfs merge=lfs -text
336
- taming-transformers/data/open_images_annotations_100/train/000bbdf0dc8099d8.jpg filter=lfs diff=lfs merge=lfs -text
337
- taming-transformers/data/open_images_annotations_100/train/000bc1eb7f74adae.jpg filter=lfs diff=lfs merge=lfs -text
338
- taming-transformers/data/open_images_annotations_100/train/000bc33717a6371f.jpg filter=lfs diff=lfs merge=lfs -text
339
- taming-transformers/data/open_images_annotations_100/train/000bc387c731dd97.jpg filter=lfs diff=lfs merge=lfs -text
340
- taming-transformers/data/open_images_annotations_100/train/000bc5006eb7fd98.jpg filter=lfs diff=lfs merge=lfs -text
341
- taming-transformers/data/open_images_annotations_100/train/000bc5ad4cc3ae73.jpg filter=lfs diff=lfs merge=lfs -text
342
- taming-transformers/data/open_images_annotations_100/train/000bc75d38907c78.jpg filter=lfs diff=lfs merge=lfs -text
343
- taming-transformers/data/open_images_annotations_100/train/000bc7b0a1889bcb.jpg filter=lfs diff=lfs merge=lfs -text
344
- taming-transformers/data/open_images_annotations_100/train/000bcd3bcd95cbb3.jpg filter=lfs diff=lfs merge=lfs -text
345
- taming-transformers/data/open_images_annotations_100/train/000bcee5bed5446b.jpg filter=lfs diff=lfs merge=lfs -text
346
- taming-transformers/data/open_images_annotations_100/validation/09c67960e389e4df.jpg filter=lfs diff=lfs merge=lfs -text
347
- taming-transformers/data/open_images_annotations_100/validation/09c6ddd2c210450e.jpg filter=lfs diff=lfs merge=lfs -text
348
- taming-transformers/data/open_images_annotations_100/validation/09c7f89055cf399b.jpg filter=lfs diff=lfs merge=lfs -text
349
- taming-transformers/data/open_images_annotations_100/validation/09c863d76bcf6b00.jpg filter=lfs diff=lfs merge=lfs -text
350
- taming-transformers/data/open_images_annotations_100/validation/09c993afacd01547.jpg filter=lfs diff=lfs merge=lfs -text
351
- taming-transformers/data/open_images_annotations_100/validation/09d2112596d9155b.jpg filter=lfs diff=lfs merge=lfs -text
352
- taming-transformers/data/open_images_annotations_100/validation/09d354dbd3dcc857.jpg filter=lfs diff=lfs merge=lfs -text
353
- taming-transformers/data/open_images_annotations_100/validation/09d45c49c4adbae4.jpg filter=lfs diff=lfs merge=lfs -text
354
- taming-transformers/data/open_images_annotations_100/validation/09d64f43c7111879.jpg filter=lfs diff=lfs merge=lfs -text
355
- taming-transformers/data/open_images_annotations_100/validation/09d8aa2d19ff724d.jpg filter=lfs diff=lfs merge=lfs -text
356
- taming-transformers/data/open_images_annotations_100/validation/09dcb9b52055d40f.jpg filter=lfs diff=lfs merge=lfs -text
357
- taming-transformers/data/open_images_annotations_100/validation/09dd0671cd633432.jpg filter=lfs diff=lfs merge=lfs -text
358
- taming-transformers/data/open_images_annotations_100/validation/09df63bd01367ca3.jpg filter=lfs diff=lfs merge=lfs -text
359
- taming-transformers/data/open_images_annotations_100/validation/09e094375efab7fe.jpg filter=lfs diff=lfs merge=lfs -text
360
- taming-transformers/data/open_images_annotations_100/validation/09e617d9d3120b32.jpg filter=lfs diff=lfs merge=lfs -text
361
- taming-transformers/data/open_images_annotations_100/validation/09ebcee57699eb98.jpg filter=lfs diff=lfs merge=lfs -text
362
- taming-transformers/data/open_images_annotations_100/validation/09f8b77a88f224d9.jpg filter=lfs diff=lfs merge=lfs -text
363
- taming-transformers/data/open_images_annotations_100/validation/09f8e760f60df0da.jpg filter=lfs diff=lfs merge=lfs -text
364
- taming-transformers/data/open_images_annotations_100/validation/09fa093bcd300c1a.jpg filter=lfs diff=lfs merge=lfs -text
365
- taming-transformers/data/open_images_annotations_100/validation/0a02c648d24f39fb.jpg filter=lfs diff=lfs merge=lfs -text
366
- taming-transformers/data/open_images_annotations_100/validation/0a08a4711c728078.jpg filter=lfs diff=lfs merge=lfs -text
367
- taming-transformers/data/open_images_annotations_100/validation/0a13dcaaab9a35e0.jpg filter=lfs diff=lfs merge=lfs -text
368
- taming-transformers/data/open_images_annotations_100/validation/0a1b11867383b13e.jpg filter=lfs diff=lfs merge=lfs -text
369
- taming-transformers/data/open_images_annotations_100/validation/0a23d3f0e7d850f4.jpg filter=lfs diff=lfs merge=lfs -text
370
- taming-transformers/data/open_images_annotations_100/validation/0a278d979b63fc72.jpg filter=lfs diff=lfs merge=lfs -text
371
- taming-transformers/data/open_images_annotations_100/validation/0a2c6ef66896fb92.jpg filter=lfs diff=lfs merge=lfs -text
372
- taming-transformers/data/open_images_annotations_100/validation/0a34d80ee1db201e.jpg filter=lfs diff=lfs merge=lfs -text
373
- taming-transformers/data/open_images_annotations_100/validation/0a37aa0734ac8016.jpg filter=lfs diff=lfs merge=lfs -text
374
- taming-transformers/data/open_images_annotations_100/validation/0a3873442ad329c2.jpg filter=lfs diff=lfs merge=lfs -text
375
- taming-transformers/data/open_images_annotations_100/validation/0a39325e5ad7f5a0.jpg filter=lfs diff=lfs merge=lfs -text
376
- taming-transformers/data/open_images_annotations_100/validation/0a3c01759e77a02d.jpg filter=lfs diff=lfs merge=lfs -text
377
- taming-transformers/data/open_images_annotations_100/validation/0a3f577a327ca7cc.jpg filter=lfs diff=lfs merge=lfs -text
378
- taming-transformers/data/open_images_annotations_100/validation/0a3f9b3d57ef354a.jpg filter=lfs diff=lfs merge=lfs -text
379
- taming-transformers/data/open_images_annotations_100/validation/0a41cda5f44baaf6.jpg filter=lfs diff=lfs merge=lfs -text
380
- taming-transformers/data/open_images_annotations_100/validation/0a47e7d602855f93.jpg filter=lfs diff=lfs merge=lfs -text
381
- taming-transformers/data/open_images_annotations_100/validation/0a4abf0a8071b917.jpg filter=lfs diff=lfs merge=lfs -text
382
- taming-transformers/data/open_images_annotations_100/validation/0a4db5693da70448.jpg filter=lfs diff=lfs merge=lfs -text
383
- taming-transformers/data/open_images_annotations_100/validation/0a556c8163b58fae.jpg filter=lfs diff=lfs merge=lfs -text
384
- taming-transformers/data/open_images_annotations_100/validation/0a563d05ebab4fe3.jpg filter=lfs diff=lfs merge=lfs -text
385
- taming-transformers/data/open_images_annotations_100/validation/0a599940d33b6b2b.jpg filter=lfs diff=lfs merge=lfs -text
386
- taming-transformers/data/open_images_annotations_100/validation/0a600f1148d1023c.jpg filter=lfs diff=lfs merge=lfs -text
387
- taming-transformers/data/open_images_annotations_100/validation/0a6a03c8f23ee744.jpg filter=lfs diff=lfs merge=lfs -text
388
- taming-transformers/data/open_images_annotations_100/validation/0a6bc386b28f2aac.jpg filter=lfs diff=lfs merge=lfs -text
389
- taming-transformers/data/open_images_annotations_100/validation/0a7074a2a5515531.jpg filter=lfs diff=lfs merge=lfs -text
390
- taming-transformers/data/open_images_annotations_100/validation/0a72fef43a51c479.jpg filter=lfs diff=lfs merge=lfs -text
391
- taming-transformers/data/open_images_annotations_100/validation/0a73064c82730ff5.jpg filter=lfs diff=lfs merge=lfs -text
392
- taming-transformers/data/open_images_annotations_100/validation/0a78374f2d3949ae.jpg filter=lfs diff=lfs merge=lfs -text
393
- taming-transformers/data/open_images_annotations_100/validation/0a7be0b883a12966.jpg filter=lfs diff=lfs merge=lfs -text
394
- taming-transformers/data/open_images_annotations_100/validation/0a7c597abf1e90d4.jpg filter=lfs diff=lfs merge=lfs -text
395
- taming-transformers/data/open_images_annotations_100/validation/0a7f13330a5d0023.jpg filter=lfs diff=lfs merge=lfs -text
396
- taming-transformers/data/open_images_annotations_100/validation/0a7f4d9a0ccb9afe.jpg filter=lfs diff=lfs merge=lfs -text
397
- taming-transformers/data/open_images_annotations_100/validation/0a7fbc1d68e4e5ae.jpg filter=lfs diff=lfs merge=lfs -text
398
- taming-transformers/data/open_images_annotations_100/validation/0a82f0443c940816.jpg filter=lfs diff=lfs merge=lfs -text
399
- taming-transformers/data/open_images_annotations_100/validation/0a8657e8b5c9d7bb.jpg filter=lfs diff=lfs merge=lfs -text
400
- taming-transformers/data/open_images_annotations_100/validation/0a877314ca2039d9.jpg filter=lfs diff=lfs merge=lfs -text
401
- taming-transformers/data/open_images_annotations_100/validation/0a917bbca24cf75d.jpg filter=lfs diff=lfs merge=lfs -text
402
- taming-transformers/data/open_images_annotations_100/validation/0a94296ff543a1dc.jpg filter=lfs diff=lfs merge=lfs -text
403
- taming-transformers/data/open_images_annotations_100/validation/0a9f73b3c2557150.jpg filter=lfs diff=lfs merge=lfs -text
404
- taming-transformers/data/open_images_annotations_100/validation/0a9ff75a7897e757.jpg filter=lfs diff=lfs merge=lfs -text
405
- taming-transformers/data/open_images_annotations_100/validation/0aa206fa7ea80036.jpg filter=lfs diff=lfs merge=lfs -text
406
- taming-transformers/data/open_images_annotations_100/validation/0aa3a6c33fca122b.jpg filter=lfs diff=lfs merge=lfs -text
407
- taming-transformers/data/open_images_annotations_100/validation/0aaad833ac61ac9d.jpg filter=lfs diff=lfs merge=lfs -text
408
- taming-transformers/data/open_images_annotations_100/validation/0aacbdb54e853a0a.jpg filter=lfs diff=lfs merge=lfs -text
409
- taming-transformers/data/open_images_annotations_100/validation/0aad9fc79a35bd53.jpg filter=lfs diff=lfs merge=lfs -text
410
- taming-transformers/data/open_images_annotations_100/validation/0aae34863935e33a.jpg filter=lfs diff=lfs merge=lfs -text
411
- taming-transformers/data/open_images_annotations_100/validation/0ab050b51e78acdb.jpg filter=lfs diff=lfs merge=lfs -text
412
- taming-transformers/data/open_images_annotations_100/validation/0ab10a6417ef2301.jpg filter=lfs diff=lfs merge=lfs -text
413
- taming-transformers/data/open_images_annotations_100/validation/0ab2b64f27f8baca.jpg filter=lfs diff=lfs merge=lfs -text
414
- taming-transformers/data/open_images_annotations_100/validation/0ab5c690eebfad95.jpg filter=lfs diff=lfs merge=lfs -text
415
- taming-transformers/data/open_images_annotations_100/validation/0ac166d12e401a98.jpg filter=lfs diff=lfs merge=lfs -text
416
- taming-transformers/data/open_images_annotations_100/validation/0ac2f91a7995aa8b.jpg filter=lfs diff=lfs merge=lfs -text
417
- taming-transformers/data/open_images_annotations_100/validation/0ac3c1db1b3645f2.jpg filter=lfs diff=lfs merge=lfs -text
418
- taming-transformers/data/open_images_annotations_100/validation/0ac51477636a6933.jpg filter=lfs diff=lfs merge=lfs -text
419
- taming-transformers/data/open_images_annotations_100/validation/0ac52440f73b5c80.jpg filter=lfs diff=lfs merge=lfs -text
420
- taming-transformers/data/open_images_annotations_100/validation/0ad7884032419621.jpg filter=lfs diff=lfs merge=lfs -text
421
- taming-transformers/data/open_images_annotations_100/validation/0ad7bad30cd432df.jpg filter=lfs diff=lfs merge=lfs -text
422
- taming-transformers/data/open_images_annotations_100/validation/0ad99d610a9092e6.jpg filter=lfs diff=lfs merge=lfs -text
423
- taming-transformers/data/open_images_annotations_100/validation/0ada35baba28134b.jpg filter=lfs diff=lfs merge=lfs -text
424
- taming-transformers/data/open_images_annotations_100/validation/0adc1330287b2e66.jpg filter=lfs diff=lfs merge=lfs -text
425
- taming-transformers/data/open_images_annotations_100/validation/0adc373e996aadc2.jpg filter=lfs diff=lfs merge=lfs -text
426
- taming-transformers/data/open_images_annotations_100/validation/0add91a2efb3f33d.jpg filter=lfs diff=lfs merge=lfs -text
427
- taming-transformers/data/open_images_annotations_100/validation/0ade7aef439e2102.jpg filter=lfs diff=lfs merge=lfs -text
428
- taming-transformers/data/sflckr_images/alaska_lakes/43259216952_59352d7204_b.jpg filter=lfs diff=lfs merge=lfs -text
429
- taming-transformers/data/sflckr_images/australia/12822389285_a7723081b5_b.jpg filter=lfs diff=lfs merge=lfs -text
430
- taming-transformers/data/sflckr_images/australia/8720651218_ca82a6608e_b.jpg filter=lfs diff=lfs merge=lfs -text
431
- taming-transformers/data/sflckr_images/black_forest/8364557382_c6c9ee2fd6_b.jpg filter=lfs diff=lfs merge=lfs -text
432
- taming-transformers/data/sflckr_images/canada/256743165_9f130ba95b_b.jpg filter=lfs diff=lfs merge=lfs -text
433
- taming-transformers/data/sflckr_images/canada/2883773_881c197107_c.jpg filter=lfs diff=lfs merge=lfs -text
434
- taming-transformers/data/sflckr_images/carribean/14351041152_ef77484a1f_b.jpg filter=lfs diff=lfs merge=lfs -text
435
- taming-transformers/data/sflckr_images/carribean/18176301_c9d27557cf_b.jpg filter=lfs diff=lfs merge=lfs -text
436
- taming-transformers/data/sflckr_images/cliff_ocean/36142796444_45d452f567_b.jpg filter=lfs diff=lfs merge=lfs -text
437
- taming-transformers/data/sflckr_images/desert/4534149722_3cc4f92891_b.jpg filter=lfs diff=lfs merge=lfs -text
438
- taming-transformers/data/sflckr_images/geysir/14996762478_a9bdbf959a_b.jpg filter=lfs diff=lfs merge=lfs -text
439
- taming-transformers/data/sflckr_images/geysir/26320755536_7c769b6218_b.jpg filter=lfs diff=lfs merge=lfs -text
440
- taming-transformers/data/sflckr_images/geysir/4748115806_7219c2b3be_b.jpg filter=lfs diff=lfs merge=lfs -text
441
- taming-transformers/data/sflckr_images/ireland/15570753471_74db396d14_b.jpg filter=lfs diff=lfs merge=lfs -text
442
- taming-transformers/data/sflckr_images/lakes/39933489595_f0e5d85b6d_b.jpg filter=lfs diff=lfs merge=lfs -text
443
- taming-transformers/data/sflckr_images/meadow/18864473291_844325caab_b.jpg filter=lfs diff=lfs merge=lfs -text
444
- taming-transformers/data/sflckr_images/mongolia/6076373946_e9ea2aee32_b.jpg filter=lfs diff=lfs merge=lfs -text
445
- taming-transformers/data/sflckr_images/newzealand_np/7942812194_9348729b93_b.jpg filter=lfs diff=lfs merge=lfs -text
446
- taming-transformers/data/sflckr_images/norway/20099378793_cc2df820af_b.jpg filter=lfs diff=lfs merge=lfs -text
447
- taming-transformers/data/sflckr_images/norway/25735082181_999927fe5a_b.jpg filter=lfs diff=lfs merge=lfs -text
448
- taming-transformers/data/sflckr_images/swiss_mountains/33509672006_bf4c416afd_b.jpg filter=lfs diff=lfs merge=lfs -text
449
- taming-transformers/data/sflckr_images/volcano/50254383883_27ed6ea93a_b.jpg filter=lfs diff=lfs merge=lfs -text
450
- taming-transformers/scripts/reconstruction_usage.ipynb filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -4,8 +4,8 @@ emoji: 👁
4
  colorFrom: pink
5
  colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 5.38.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
- ---
 
4
  colorFrom: pink
5
  colorTo: indigo
6
  sdk: gradio
7
+ sdk_version: 2.9.4
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
+ ---
app.py CHANGED
@@ -9,7 +9,6 @@ sys.path.append('./latent-diffusion')
9
  from taming.models import vqgan
10
  from ldm.util import instantiate_from_config
11
  from huggingface_hub import hf_hub_download
12
- import spaces
13
 
14
  model_path_e = hf_hub_download(repo_id="multimodalart/compvis-latent-diffusion-text2img-large", filename="txt2img-f8-large.ckpt")
15
 
@@ -101,7 +100,6 @@ model = model.to(device)
101
  safety_model = load_safety_model("ViT-B/32")
102
  clip_model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained='openai')
103
 
104
- @spaces.GPU
105
  def run(prompt, steps, width, height, images, scale):
106
  opt = argparse.Namespace(
107
  prompt = prompt,
@@ -181,26 +179,22 @@ def run(prompt, steps, width, height, images, scale):
181
  grid = 255. * rearrange(grid, 'c h w -> h w c').cpu().numpy()
182
 
183
  Image.fromarray(grid.astype(np.uint8)).save(os.path.join(outpath, f'{prompt.replace(" ", "-")}.png'))
184
- return(all_samples_images,Image.fromarray(grid.astype(np.uint8)),None)
185
 
186
- image = gr.Image(type="pil", label="Image Grid")
187
  css = ".output-image{height: 528px !important} .output-carousel .output-image{height:272px !important} a{text-decoration: underline}"
188
  iface = gr.Interface(fn=run, inputs=[
189
- gr.Textbox(label="Prompt - try adding increments to your prompt such as 'oil on canvas', 'a painting', 'a book cover'",placeholder="chalk pastel drawing of a dog wearing a funny hat"),
190
- gr.Slider(label="Steps - more steps can increase quality but will take longer to generate",value=45,maximum=50,minimum=1,step=1),
191
- gr.Radio(label="Width", choices=[32,64,128,256],value=256),
192
- gr.Radio(label="Height", choices=[32,64,128,256],value=256),
193
- gr.Slider(label="Images - How many images you wish to generate", value=2, step=1, minimum=1, maximum=4),
194
- gr.Slider(label="Diversity scale - How different from one another you wish the images to be",value=5.0, minimum=1.0, maximum=15.0),
195
  #gr.inputs.Slider(label="ETA - between 0 and 1. Lower values can provide better quality, higher values can be more diverse",default=0.0,minimum=0.0, maximum=1.0,step=0.1),
196
  ],
197
- outputs=[
198
- gr.Gallery(label="Individual images"),
199
- image,
200
- gr.Textbox(label="Error")
201
- ],
202
  css=css,
203
  title="Generate images from text with Latent Diffusion LAION-400M",
204
  description="<div>By typing a prompt and pressing submit you can generate images based on this prompt. <a href='https://github.com/CompVis/latent-diffusion' target='_blank'>Latent Diffusion</a> is a text-to-image model created by <a href='https://github.com/CompVis' target='_blank'>CompVis</a>, trained on the <a href='https://laion.ai/laion-400-open-dataset/'>LAION-400M dataset.</a><br>This UI to the model was assembled by <a style='color: rgb(245, 158, 11);font-weight:bold' href='https://twitter.com/multimodalart' target='_blank'>@multimodalart</a></div>",
205
  article="<h4 style='font-size: 110%;margin-top:.5em'>Biases acknowledgment</h4><div>Despite how impressive being able to turn text into image is, beware to the fact that this model may output content that reinforces or exarcbates societal biases. According to the <a href='https://arxiv.org/abs/2112.10752' target='_blank'>Latent Diffusion paper</a>:<i> \"Deep learning modules tend to reproduce or exacerbate biases that are already present in the data\"</i>. The model was trained on an unfiltered version the LAION-400M dataset, which scrapped non-curated image-text-pairs from the internet (the exception being the the removal of illegal content) and is meant to be used for research purposes, such as this one. <a href='https://laion.ai/laion-400-open-dataset/' target='_blank'>You can read more on LAION's website</a></div><h4 style='font-size: 110%;margin-top:1em'>Who owns the images produced by this demo?</h4><div>Definetly not me! Probably you do. I say probably because the Copyright discussion about AI generated art is ongoing. So <a href='https://www.theverge.com/2022/2/21/22944335/us-copyright-office-reject-ai-generated-art-recent-entrance-to-paradise' target='_blank'>it may be the case that everything produced here falls automatically into the public domain</a>. But in any case it is either yours or is in the public domain.</div>")
206
- iface.launch()
 
9
  from taming.models import vqgan
10
  from ldm.util import instantiate_from_config
11
  from huggingface_hub import hf_hub_download
 
12
 
13
  model_path_e = hf_hub_download(repo_id="multimodalart/compvis-latent-diffusion-text2img-large", filename="txt2img-f8-large.ckpt")
14
 
 
100
  safety_model = load_safety_model("ViT-B/32")
101
  clip_model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained='openai')
102
 
 
103
  def run(prompt, steps, width, height, images, scale):
104
  opt = argparse.Namespace(
105
  prompt = prompt,
 
179
  grid = 255. * rearrange(grid, 'c h w -> h w c').cpu().numpy()
180
 
181
  Image.fromarray(grid.astype(np.uint8)).save(os.path.join(outpath, f'{prompt.replace(" ", "-")}.png'))
182
+ return(Image.fromarray(grid.astype(np.uint8)),all_samples_images,None)
183
 
184
+ image = gr.outputs.Image(type="pil", label="Your result")
185
  css = ".output-image{height: 528px !important} .output-carousel .output-image{height:272px !important} a{text-decoration: underline}"
186
  iface = gr.Interface(fn=run, inputs=[
187
+ gr.inputs.Textbox(label="Prompt - try adding increments to your prompt such as 'oil on canvas', 'a painting', 'a book cover'",default="chalk pastel drawing of a dog wearing a funny hat"),
188
+ gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=45,maximum=50,minimum=1,step=1),
189
+ gr.inputs.Radio(label="Width", choices=[32,64,128,256],default=256),
190
+ gr.inputs.Radio(label="Height", choices=[32,64,128,256],default=256),
191
+ gr.inputs.Slider(label="Images - How many images you wish to generate", default=2, step=1, minimum=1, maximum=4),
192
+ gr.inputs.Slider(label="Diversity scale - How different from one another you wish the images to be",default=5.0, minimum=1.0, maximum=15.0),
193
  #gr.inputs.Slider(label="ETA - between 0 and 1. Lower values can provide better quality, higher values can be more diverse",default=0.0,minimum=0.0, maximum=1.0,step=0.1),
194
  ],
195
+ outputs=[image,gr.outputs.Carousel(label="Individual images",components=["image"]),gr.outputs.Textbox(label="Error")],
 
 
 
 
196
  css=css,
197
  title="Generate images from text with Latent Diffusion LAION-400M",
198
  description="<div>By typing a prompt and pressing submit you can generate images based on this prompt. <a href='https://github.com/CompVis/latent-diffusion' target='_blank'>Latent Diffusion</a> is a text-to-image model created by <a href='https://github.com/CompVis' target='_blank'>CompVis</a>, trained on the <a href='https://laion.ai/laion-400-open-dataset/'>LAION-400M dataset.</a><br>This UI to the model was assembled by <a style='color: rgb(245, 158, 11);font-weight:bold' href='https://twitter.com/multimodalart' target='_blank'>@multimodalart</a></div>",
199
  article="<h4 style='font-size: 110%;margin-top:.5em'>Biases acknowledgment</h4><div>Despite how impressive being able to turn text into image is, beware to the fact that this model may output content that reinforces or exarcbates societal biases. According to the <a href='https://arxiv.org/abs/2112.10752' target='_blank'>Latent Diffusion paper</a>:<i> \"Deep learning modules tend to reproduce or exacerbate biases that are already present in the data\"</i>. The model was trained on an unfiltered version the LAION-400M dataset, which scrapped non-curated image-text-pairs from the internet (the exception being the the removal of illegal content) and is meant to be used for research purposes, such as this one. <a href='https://laion.ai/laion-400-open-dataset/' target='_blank'>You can read more on LAION's website</a></div><h4 style='font-size: 110%;margin-top:1em'>Who owns the images produced by this demo?</h4><div>Definetly not me! Probably you do. I say probably because the Copyright discussion about AI generated art is ongoing. So <a href='https://www.theverge.com/2022/2/21/22944335/us-copyright-office-reject-ai-generated-art-recent-entrance-to-paradise' target='_blank'>it may be the case that everything produced here falls automatically into the public domain</a>. But in any case it is either yours or is in the public domain.</div>")
200
+ iface.launch(enable_queue=True)
requirements.txt CHANGED
@@ -3,19 +3,16 @@ ftfy
3
  regex
4
  tqdm
5
  omegaconf
6
- pytorch-lightning==1.9.0
7
  torch-fidelity
8
  transformers
9
  einops
10
  gradio
11
- torch==1.13.1
12
  open_clip_torch
13
  numpy
14
  tqdm
15
  torchvision
16
  Pillow
17
- autokeras==1.0.20
18
- huggingface_hub
19
- deepspeed>=0.8.2
20
- torchmetrics==0.11.4
21
- tensorflow==2.12.1
 
3
  regex
4
  tqdm
5
  omegaconf
6
+ pytorch-lightning
7
  torch-fidelity
8
  transformers
9
  einops
10
  gradio
11
+ torch
12
  open_clip_torch
13
  numpy
14
  tqdm
15
  torchvision
16
  Pillow
17
+ autokeras
18
+ huggingface_hub
 
 
 
taming-transformers/License.txt DELETED
@@ -1,19 +0,0 @@
1
- Copyright (c) 2020 Patrick Esser and Robin Rombach and Björn Ommer
2
-
3
- Permission is hereby granted, free of charge, to any person obtaining a copy
4
- of this software and associated documentation files (the "Software"), to deal
5
- in the Software without restriction, including without limitation the rights
6
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
- copies of the Software, and to permit persons to whom the Software is
8
- furnished to do so, subject to the following conditions:
9
-
10
- The above copyright notice and this permission notice shall be included in all
11
- copies or substantial portions of the Software.
12
-
13
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
15
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
16
- IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
17
- DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18
- OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
19
- OR OTHER DEALINGS IN THE SOFTWARE./
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
taming-transformers/README.md DELETED
@@ -1,410 +0,0 @@
1
- # Taming Transformers for High-Resolution Image Synthesis
2
- ##### CVPR 2021 (Oral)
3
- ![teaser](assets/mountain.jpeg)
4
-
5
- [**Taming Transformers for High-Resolution Image Synthesis**](https://compvis.github.io/taming-transformers/)<br/>
6
- [Patrick Esser](https://github.com/pesser)\*,
7
- [Robin Rombach](https://github.com/rromb)\*,
8
- [Björn Ommer](https://hci.iwr.uni-heidelberg.de/Staff/bommer)<br/>
9
- \* equal contribution
10
-
11
- **tl;dr** We combine the efficiancy of convolutional approaches with the expressivity of transformers by introducing a convolutional VQGAN, which learns a codebook of context-rich visual parts, whose composition is modeled with an autoregressive transformer.
12
-
13
- ![teaser](assets/teaser.png)
14
- [arXiv](https://arxiv.org/abs/2012.09841) | [BibTeX](#bibtex) | [Project Page](https://compvis.github.io/taming-transformers/)
15
-
16
-
17
- ### News
18
- #### 2022
19
- - More pretrained VQGANs (e.g. a f8-model with only 256 codebook entries) are available in our new work on [Latent Diffusion Models](https://github.com/CompVis/latent-diffusion).
20
- - Added scene synthesis models as proposed in the paper [High-Resolution Complex Scene Synthesis with Transformers](https://arxiv.org/abs/2105.06458), see [this section](#scene-image-synthesis).
21
- #### 2021
22
- - Thanks to [rom1504](https://github.com/rom1504) it is now easy to [train a VQGAN on your own datasets](#training-on-custom-data).
23
- - Included a bugfix for the quantizer. For backward compatibility it is
24
- disabled by default (which corresponds to always training with `beta=1.0`).
25
- Use `legacy=False` in the quantizer config to enable it.
26
- Thanks [richcmwang](https://github.com/richcmwang) and [wcshin-git](https://github.com/wcshin-git)!
27
- - Our paper received an update: See https://arxiv.org/abs/2012.09841v3 and the corresponding changelog.
28
- - Added a pretrained, [1.4B transformer model](https://k00.fr/s511rwcv) trained for class-conditional ImageNet synthesis, which obtains state-of-the-art FID scores among autoregressive approaches and outperforms BigGAN.
29
- - Added pretrained, unconditional models on [FFHQ](https://k00.fr/yndvfu95) and [CelebA-HQ](https://k00.fr/2xkmielf).
30
- - Added accelerated sampling via caching of keys/values in the self-attention operation, used in `scripts/sample_fast.py`.
31
- - Added a checkpoint of a [VQGAN](https://heibox.uni-heidelberg.de/d/2e5662443a6b4307b470/) trained with f8 compression and Gumbel-Quantization.
32
- See also our updated [reconstruction notebook](https://colab.research.google.com/github/CompVis/taming-transformers/blob/master/scripts/reconstruction_usage.ipynb).
33
- - We added a [colab notebook](https://colab.research.google.com/github/CompVis/taming-transformers/blob/master/scripts/reconstruction_usage.ipynb) which compares two VQGANs and OpenAI's [DALL-E](https://github.com/openai/DALL-E). See also [this section](#more-resources).
34
- - We now include an overview of pretrained models in [Tab.1](#overview-of-pretrained-models). We added models for [COCO](#coco) and [ADE20k](#ade20k).
35
- - The streamlit demo now supports image completions.
36
- - We now include a couple of examples from the D-RIN dataset so you can run the
37
- [D-RIN demo](#d-rin) without preparing the dataset first.
38
- - You can now jump right into sampling with our [Colab quickstart notebook](https://colab.research.google.com/github/CompVis/taming-transformers/blob/master/scripts/taming-transformers.ipynb).
39
-
40
- ## Requirements
41
- A suitable [conda](https://conda.io/) environment named `taming` can be created
42
- and activated with:
43
-
44
- ```
45
- conda env create -f environment.yaml
46
- conda activate taming
47
- ```
48
- ## Overview of pretrained models
49
- The following table provides an overview of all models that are currently available.
50
- FID scores were evaluated using [torch-fidelity](https://github.com/toshas/torch-fidelity).
51
- For reference, we also include a link to the recently released autoencoder of the [DALL-E](https://github.com/openai/DALL-E) model.
52
- See the corresponding [colab
53
- notebook](https://colab.research.google.com/github/CompVis/taming-transformers/blob/master/scripts/reconstruction_usage.ipynb)
54
- for a comparison and discussion of reconstruction capabilities.
55
-
56
- | Dataset | FID vs train | FID vs val | Link | Samples (256x256) | Comments
57
- | ------------- | ------------- | ------------- |------------- | ------------- |------------- |
58
- | FFHQ (f=16) | 9.6 | -- | [ffhq_transformer](https://k00.fr/yndvfu95) | [ffhq_samples](https://k00.fr/j626x093) |
59
- | CelebA-HQ (f=16) | 10.2 | -- | [celebahq_transformer](https://k00.fr/2xkmielf) | [celebahq_samples](https://k00.fr/j626x093) |
60
- | ADE20K (f=16) | -- | 35.5 | [ade20k_transformer](https://k00.fr/ot46cksa) | [ade20k_samples.zip](https://heibox.uni-heidelberg.de/f/70bb78cbaf844501b8fb/) [2k] | evaluated on val split (2k images)
61
- | COCO-Stuff (f=16) | -- | 20.4 | [coco_transformer](https://k00.fr/2zz6i2ce) | [coco_samples.zip](https://heibox.uni-heidelberg.de/f/a395a9be612f4a7a8054/) [5k] | evaluated on val split (5k images)
62
- | ImageNet (cIN) (f=16) | 15.98/15.78/6.59/5.88/5.20 | -- | [cin_transformer](https://k00.fr/s511rwcv) | [cin_samples](https://k00.fr/j626x093) | different decoding hyperparameters |
63
- | | | | || |
64
- | FacesHQ (f=16) | -- | -- | [faceshq_transformer](https://k00.fr/qqfl2do8)
65
- | S-FLCKR (f=16) | -- | -- | [sflckr](https://heibox.uni-heidelberg.de/d/73487ab6e5314cb5adba/)
66
- | D-RIN (f=16) | -- | -- | [drin_transformer](https://k00.fr/39jcugc5)
67
- | | | | | || |
68
- | VQGAN ImageNet (f=16), 1024 | 10.54 | 7.94 | [vqgan_imagenet_f16_1024](https://heibox.uni-heidelberg.de/d/8088892a516d4e3baf92/) | [reconstructions](https://k00.fr/j626x093) | Reconstruction-FIDs.
69
- | VQGAN ImageNet (f=16), 16384 | 7.41 | 4.98 |[vqgan_imagenet_f16_16384](https://heibox.uni-heidelberg.de/d/a7530b09fed84f80a887/) | [reconstructions](https://k00.fr/j626x093) | Reconstruction-FIDs.
70
- | VQGAN OpenImages (f=8), 256 | -- | 1.49 |https://ommer-lab.com/files/latent-diffusion/vq-f8-n256.zip | --- | Reconstruction-FIDs. Available via [latent diffusion](https://github.com/CompVis/latent-diffusion).
71
- | VQGAN OpenImages (f=8), 16384 | -- | 1.14 |https://ommer-lab.com/files/latent-diffusion/vq-f8.zip | --- | Reconstruction-FIDs. Available via [latent diffusion](https://github.com/CompVis/latent-diffusion)
72
- | VQGAN OpenImages (f=8), 8192, GumbelQuantization | 3.24 | 1.49 |[vqgan_gumbel_f8](https://heibox.uni-heidelberg.de/d/2e5662443a6b4307b470/) | --- | Reconstruction-FIDs.
73
- | | | | | || |
74
- | DALL-E dVAE (f=8), 8192, GumbelQuantization | 33.88 | 32.01 | https://github.com/openai/DALL-E | [reconstructions](https://k00.fr/j626x093) | Reconstruction-FIDs.
75
-
76
-
77
- ## Running pretrained models
78
-
79
- The commands below will start a streamlit demo which supports sampling at
80
- different resolutions and image completions. To run a non-interactive version
81
- of the sampling process, replace `streamlit run scripts/sample_conditional.py --`
82
- by `python scripts/make_samples.py --outdir <path_to_write_samples_to>` and
83
- keep the remaining command line arguments.
84
-
85
- To sample from unconditional or class-conditional models,
86
- run `python scripts/sample_fast.py -r <path/to/config_and_checkpoint>`.
87
- We describe below how to use this script to sample from the ImageNet, FFHQ, and CelebA-HQ models,
88
- respectively.
89
-
90
- ### S-FLCKR
91
- ![teaser](assets/sunset_and_ocean.jpg)
92
-
93
- You can also [run this model in a Colab
94
- notebook](https://colab.research.google.com/github/CompVis/taming-transformers/blob/master/scripts/taming-transformers.ipynb),
95
- which includes all necessary steps to start sampling.
96
-
97
- Download the
98
- [2020-11-09T13-31-51_sflckr](https://heibox.uni-heidelberg.de/d/73487ab6e5314cb5adba/)
99
- folder and place it into `logs`. Then, run
100
- ```
101
- streamlit run scripts/sample_conditional.py -- -r logs/2020-11-09T13-31-51_sflckr/
102
- ```
103
-
104
- ### ImageNet
105
- ![teaser](assets/imagenet.png)
106
-
107
- Download the [2021-04-03T19-39-50_cin_transformer](https://k00.fr/s511rwcv)
108
- folder and place it into logs. Sampling from the class-conditional ImageNet
109
- model does not require any data preparation. To produce 50 samples for each of
110
- the 1000 classes of ImageNet, with k=600 for top-k sampling, p=0.92 for nucleus
111
- sampling and temperature t=1.0, run
112
-
113
- ```
114
- python scripts/sample_fast.py -r logs/2021-04-03T19-39-50_cin_transformer/ -n 50 -k 600 -t 1.0 -p 0.92 --batch_size 25
115
- ```
116
-
117
- To restrict the model to certain classes, provide them via the `--classes` argument, separated by
118
- commas. For example, to sample 50 *ostriches*, *border collies* and *whiskey jugs*, run
119
-
120
- ```
121
- python scripts/sample_fast.py -r logs/2021-04-03T19-39-50_cin_transformer/ -n 50 -k 600 -t 1.0 -p 0.92 --batch_size 25 --classes 9,232,901
122
- ```
123
- We recommended to experiment with the autoregressive decoding parameters (top-k, top-p and temperature) for best results.
124
-
125
- ### FFHQ/CelebA-HQ
126
-
127
- Download the [2021-04-23T18-19-01_ffhq_transformer](https://k00.fr/yndvfu95) and
128
- [2021-04-23T18-11-19_celebahq_transformer](https://k00.fr/2xkmielf)
129
- folders and place them into logs.
130
- Again, sampling from these unconditional models does not require any data preparation.
131
- To produce 50000 samples, with k=250 for top-k sampling,
132
- p=1.0 for nucleus sampling and temperature t=1.0, run
133
-
134
- ```
135
- python scripts/sample_fast.py -r logs/2021-04-23T18-19-01_ffhq_transformer/
136
- ```
137
- for FFHQ and
138
-
139
- ```
140
- python scripts/sample_fast.py -r logs/2021-04-23T18-11-19_celebahq_transformer/
141
- ```
142
- to sample from the CelebA-HQ model.
143
- For both models it can be advantageous to vary the top-k/top-p parameters for sampling.
144
-
145
- ### FacesHQ
146
- ![teaser](assets/faceshq.jpg)
147
-
148
- Download [2020-11-13T21-41-45_faceshq_transformer](https://k00.fr/qqfl2do8) and
149
- place it into `logs`. Follow the data preparation steps for
150
- [CelebA-HQ](#celeba-hq) and [FFHQ](#ffhq). Run
151
- ```
152
- streamlit run scripts/sample_conditional.py -- -r logs/2020-11-13T21-41-45_faceshq_transformer/
153
- ```
154
-
155
- ### D-RIN
156
- ![teaser](assets/drin.jpg)
157
-
158
- Download [2020-11-20T12-54-32_drin_transformer](https://k00.fr/39jcugc5) and
159
- place it into `logs`. To run the demo on a couple of example depth maps
160
- included in the repository, run
161
-
162
- ```
163
- streamlit run scripts/sample_conditional.py -- -r logs/2020-11-20T12-54-32_drin_transformer/ --ignore_base_data data="{target: main.DataModuleFromConfig, params: {batch_size: 1, validation: {target: taming.data.imagenet.DRINExamples}}}"
164
- ```
165
-
166
- To run the demo on the complete validation set, first follow the data preparation steps for
167
- [ImageNet](#imagenet) and then run
168
- ```
169
- streamlit run scripts/sample_conditional.py -- -r logs/2020-11-20T12-54-32_drin_transformer/
170
- ```
171
-
172
- ### COCO
173
- Download [2021-01-20T16-04-20_coco_transformer](https://k00.fr/2zz6i2ce) and
174
- place it into `logs`. To run the demo on a couple of example segmentation maps
175
- included in the repository, run
176
-
177
- ```
178
- streamlit run scripts/sample_conditional.py -- -r logs/2021-01-20T16-04-20_coco_transformer/ --ignore_base_data data="{target: main.DataModuleFromConfig, params: {batch_size: 1, validation: {target: taming.data.coco.Examples}}}"
179
- ```
180
-
181
- ### ADE20k
182
- Download [2020-11-20T21-45-44_ade20k_transformer](https://k00.fr/ot46cksa) and
183
- place it into `logs`. To run the demo on a couple of example segmentation maps
184
- included in the repository, run
185
-
186
- ```
187
- streamlit run scripts/sample_conditional.py -- -r logs/2020-11-20T21-45-44_ade20k_transformer/ --ignore_base_data data="{target: main.DataModuleFromConfig, params: {batch_size: 1, validation: {target: taming.data.ade20k.Examples}}}"
188
- ```
189
-
190
- ## Scene Image Synthesis
191
- ![teaser](assets/scene_images_samples.svg)
192
- Scene image generation based on bounding box conditionals as done in our CVPR2021 AI4CC workshop paper [High-Resolution Complex Scene Synthesis with Transformers](https://arxiv.org/abs/2105.06458) (see talk on [workshop page](https://visual.cs.brown.edu/workshops/aicc2021/#awards)). Supporting the datasets COCO and Open Images.
193
-
194
- ### Training
195
- Download first-stage models [COCO-8k-VQGAN](https://heibox.uni-heidelberg.de/f/78dea9589974474c97c1/) for COCO or [COCO/Open-Images-8k-VQGAN](https://heibox.uni-heidelberg.de/f/461d9a9f4fcf48ab84f4/) for Open Images.
196
- Change `ckpt_path` in `data/coco_scene_images_transformer.yaml` and `data/open_images_scene_images_transformer.yaml` to point to the downloaded first-stage models.
197
- Download the full COCO/OI datasets and adapt `data_path` in the same files, unless working with the 100 files provided for training and validation suits your needs already.
198
-
199
- Code can be run with
200
- `python main.py --base configs/coco_scene_images_transformer.yaml -t True --gpus 0,`
201
- or
202
- `python main.py --base configs/open_images_scene_images_transformer.yaml -t True --gpus 0,`
203
-
204
- ### Sampling
205
- Train a model as described above or download a pre-trained model:
206
- - [Open Images 1 billion parameter model](https://drive.google.com/file/d/1FEK-Z7hyWJBvFWQF50pzSK9y1W_CJEig/view?usp=sharing) available that trained 100 epochs. On 256x256 pixels, FID 41.48±0.21, SceneFID 14.60±0.15, Inception Score 18.47±0.27. The model was trained with 2d crops of images and is thus well-prepared for the task of generating high-resolution images, e.g. 512x512.
207
- - [Open Images distilled version of the above model with 125 million parameters](https://drive.google.com/file/d/1xf89g0mc78J3d8Bx5YhbK4tNRNlOoYaO) allows for sampling on smaller GPUs (4 GB is enough for sampling 256x256 px images). Model was trained for 60 epochs with 10% soft loss, 90% hard loss. On 256x256 pixels, FID 43.07±0.40, SceneFID 15.93±0.19, Inception Score 17.23±0.11.
208
- - [COCO 30 epochs](https://heibox.uni-heidelberg.de/f/0d0b2594e9074c7e9a33/)
209
- - [COCO 60 epochs](https://drive.google.com/file/d/1bInd49g2YulTJBjU32Awyt5qnzxxG5U9/) (find model statistics for both COCO versions in `assets/coco_scene_images_training.svg`)
210
-
211
- When downloading a pre-trained model, remember to change `ckpt_path` in `configs/*project.yaml` to point to your downloaded first-stage model (see ->Training).
212
-
213
- Scene image generation can be run with
214
- `python scripts/make_scene_samples.py --outdir=/some/outdir -r /path/to/pretrained/model --resolution=512,512`
215
-
216
-
217
- ## Training on custom data
218
-
219
- Training on your own dataset can be beneficial to get better tokens and hence better images for your domain.
220
- Those are the steps to follow to make this work:
221
- 1. install the repo with `conda env create -f environment.yaml`, `conda activate taming` and `pip install -e .`
222
- 1. put your .jpg files in a folder `your_folder`
223
- 2. create 2 text files a `xx_train.txt` and `xx_test.txt` that point to the files in your training and test set respectively (for example `find $(pwd)/your_folder -name "*.jpg" > train.txt`)
224
- 3. adapt `configs/custom_vqgan.yaml` to point to these 2 files
225
- 4. run `python main.py --base configs/custom_vqgan.yaml -t True --gpus 0,1` to
226
- train on two GPUs. Use `--gpus 0,` (with a trailing comma) to train on a single GPU.
227
-
228
- ## Data Preparation
229
-
230
- ### ImageNet
231
- The code will try to download (through [Academic
232
- Torrents](http://academictorrents.com/)) and prepare ImageNet the first time it
233
- is used. However, since ImageNet is quite large, this requires a lot of disk
234
- space and time. If you already have ImageNet on your disk, you can speed things
235
- up by putting the data into
236
- `${XDG_CACHE}/autoencoders/data/ILSVRC2012_{split}/data/` (which defaults to
237
- `~/.cache/autoencoders/data/ILSVRC2012_{split}/data/`), where `{split}` is one
238
- of `train`/`validation`. It should have the following structure:
239
-
240
- ```
241
- ${XDG_CACHE}/autoencoders/data/ILSVRC2012_{split}/data/
242
- ├── n01440764
243
- │ ├── n01440764_10026.JPEG
244
- │ ├── n01440764_10027.JPEG
245
- │ ├── ...
246
- ├── n01443537
247
- │ ├── n01443537_10007.JPEG
248
- │ ├── n01443537_10014.JPEG
249
- │ ├── ...
250
- ├── ...
251
- ```
252
-
253
- If you haven't extracted the data, you can also place
254
- `ILSVRC2012_img_train.tar`/`ILSVRC2012_img_val.tar` (or symlinks to them) into
255
- `${XDG_CACHE}/autoencoders/data/ILSVRC2012_train/` /
256
- `${XDG_CACHE}/autoencoders/data/ILSVRC2012_validation/`, which will then be
257
- extracted into above structure without downloading it again. Note that this
258
- will only happen if neither a folder
259
- `${XDG_CACHE}/autoencoders/data/ILSVRC2012_{split}/data/` nor a file
260
- `${XDG_CACHE}/autoencoders/data/ILSVRC2012_{split}/.ready` exist. Remove them
261
- if you want to force running the dataset preparation again.
262
-
263
- You will then need to prepare the depth data using
264
- [MiDaS](https://github.com/intel-isl/MiDaS). Create a symlink
265
- `data/imagenet_depth` pointing to a folder with two subfolders `train` and
266
- `val`, each mirroring the structure of the corresponding ImageNet folder
267
- described above and containing a `png` file for each of ImageNet's `JPEG`
268
- files. The `png` encodes `float32` depth values obtained from MiDaS as RGBA
269
- images. We provide the script `scripts/extract_depth.py` to generate this data.
270
- **Please note** that this script uses [MiDaS via PyTorch
271
- Hub](https://pytorch.org/hub/intelisl_midas_v2/). When we prepared the data,
272
- the hub provided the [MiDaS
273
- v2.0](https://github.com/intel-isl/MiDaS/releases/tag/v2) version, but now it
274
- provides a v2.1 version. We haven't tested our models with depth maps obtained
275
- via v2.1 and if you want to make sure that things work as expected, you must
276
- adjust the script to make sure it explicitly uses
277
- [v2.0](https://github.com/intel-isl/MiDaS/releases/tag/v2)!
278
-
279
- ### CelebA-HQ
280
- Create a symlink `data/celebahq` pointing to a folder containing the `.npy`
281
- files of CelebA-HQ (instructions to obtain them can be found in the [PGGAN
282
- repository](https://github.com/tkarras/progressive_growing_of_gans)).
283
-
284
- ### FFHQ
285
- Create a symlink `data/ffhq` pointing to the `images1024x1024` folder obtained
286
- from the [FFHQ repository](https://github.com/NVlabs/ffhq-dataset).
287
-
288
- ### S-FLCKR
289
- Unfortunately, we are not allowed to distribute the images we collected for the
290
- S-FLCKR dataset and can therefore only give a description how it was produced.
291
- There are many resources on [collecting images from the
292
- web](https://github.com/adrianmrit/flickrdatasets) to get started.
293
- We collected sufficiently large images from [flickr](https://www.flickr.com)
294
- (see `data/flickr_tags.txt` for a full list of tags used to find images)
295
- and various [subreddits](https://www.reddit.com/r/sfwpornnetwork/wiki/network)
296
- (see `data/subreddits.txt` for all subreddits that were used).
297
- Overall, we collected 107625 images, and split them randomly into 96861
298
- training images and 10764 validation images. We then obtained segmentation
299
- masks for each image using [DeepLab v2](https://arxiv.org/abs/1606.00915)
300
- trained on [COCO-Stuff](https://arxiv.org/abs/1612.03716). We used a [PyTorch
301
- reimplementation](https://github.com/kazuto1011/deeplab-pytorch) and include an
302
- example script for this process in `scripts/extract_segmentation.py`.
303
-
304
- ### COCO
305
- Create a symlink `data/coco` containing the images from the 2017 split in
306
- `train2017` and `val2017`, and their annotations in `annotations`. Files can be
307
- obtained from the [COCO webpage](https://cocodataset.org/). In addition, we use
308
- the [Stuff+thing PNG-style annotations on COCO 2017
309
- trainval](http://calvin.inf.ed.ac.uk/wp-content/uploads/data/cocostuffdataset/stuffthingmaps_trainval2017.zip)
310
- annotations from [COCO-Stuff](https://github.com/nightrome/cocostuff), which
311
- should be placed under `data/cocostuffthings`.
312
-
313
- ### ADE20k
314
- Create a symlink `data/ade20k_root` containing the contents of
315
- [ADEChallengeData2016.zip](http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip)
316
- from the [MIT Scene Parsing Benchmark](http://sceneparsing.csail.mit.edu/).
317
-
318
- ## Training models
319
-
320
- ### FacesHQ
321
-
322
- Train a VQGAN with
323
- ```
324
- python main.py --base configs/faceshq_vqgan.yaml -t True --gpus 0,
325
- ```
326
-
327
- Then, adjust the checkpoint path of the config key
328
- `model.params.first_stage_config.params.ckpt_path` in
329
- `configs/faceshq_transformer.yaml` (or download
330
- [2020-11-09T13-33-36_faceshq_vqgan](https://k00.fr/uxy5usa9) and place into `logs`, which
331
- corresponds to the preconfigured checkpoint path), then run
332
- ```
333
- python main.py --base configs/faceshq_transformer.yaml -t True --gpus 0,
334
- ```
335
-
336
- ### D-RIN
337
-
338
- Train a VQGAN on ImageNet with
339
- ```
340
- python main.py --base configs/imagenet_vqgan.yaml -t True --gpus 0,
341
- ```
342
-
343
- or download a pretrained one from [2020-09-23T17-56-33_imagenet_vqgan](https://k00.fr/u0j2dtac)
344
- and place under `logs`. If you trained your own, adjust the path in the config
345
- key `model.params.first_stage_config.params.ckpt_path` of
346
- `configs/drin_transformer.yaml`.
347
-
348
- Train a VQGAN on Depth Maps of ImageNet with
349
- ```
350
- python main.py --base configs/imagenetdepth_vqgan.yaml -t True --gpus 0,
351
- ```
352
-
353
- or download a pretrained one from [2020-11-03T15-34-24_imagenetdepth_vqgan](https://k00.fr/55rlxs6i)
354
- and place under `logs`. If you trained your own, adjust the path in the config
355
- key `model.params.cond_stage_config.params.ckpt_path` of
356
- `configs/drin_transformer.yaml`.
357
-
358
- To train the transformer, run
359
- ```
360
- python main.py --base configs/drin_transformer.yaml -t True --gpus 0,
361
- ```
362
-
363
- ## More Resources
364
- ### Comparing Different First Stage Models
365
- The reconstruction and compression capabilities of different fist stage models can be analyzed in this [colab notebook](https://colab.research.google.com/github/CompVis/taming-transformers/blob/master/scripts/reconstruction_usage.ipynb).
366
- In particular, the notebook compares two VQGANs with a downsampling factor of f=16 for each and codebook dimensionality of 1024 and 16384,
367
- a VQGAN with f=8 and 8192 codebook entries and the discrete autoencoder of OpenAI's [DALL-E](https://github.com/openai/DALL-E) (which has f=8 and 8192
368
- codebook entries).
369
- ![firststages1](assets/first_stage_squirrels.png)
370
- ![firststages2](assets/first_stage_mushrooms.png)
371
-
372
- ### Other
373
- - A [video summary](https://www.youtube.com/watch?v=o7dqGcLDf0A&feature=emb_imp_woyt) by [Two Minute Papers](https://www.youtube.com/channel/UCbfYPyITQ-7l4upoX8nvctg).
374
- - A [video summary](https://www.youtube.com/watch?v=-wDSDtIAyWQ) by [Gradient Dude](https://www.youtube.com/c/GradientDude/about).
375
- - A [weights and biases report summarizing the paper](https://wandb.ai/ayush-thakur/taming-transformer/reports/-Overview-Taming-Transformers-for-High-Resolution-Image-Synthesis---Vmlldzo0NjEyMTY)
376
- by [ayulockin](https://github.com/ayulockin).
377
- - A [video summary](https://www.youtube.com/watch?v=JfUTd8fjtX8&feature=emb_imp_woyt) by [What's AI](https://www.youtube.com/channel/UCUzGQrN-lyyc0BWTYoJM_Sg).
378
- - Take a look at [ak9250's notebook](https://github.com/ak9250/taming-transformers/blob/master/tamingtransformerscolab.ipynb) if you want to run the streamlit demos on Colab.
379
-
380
- ### Text-to-Image Optimization via CLIP
381
- VQGAN has been successfully used as an image generator guided by the [CLIP](https://github.com/openai/CLIP) model, both for pure image generation
382
- from scratch and image-to-image translation. We recommend the following notebooks/videos/resources:
383
-
384
- - [Advadnouns](https://twitter.com/advadnoun/status/1389316507134357506) Patreon and corresponding LatentVision notebooks: https://www.patreon.com/patronizeme
385
- - The [notebook]( https://colab.research.google.com/drive/1L8oL-vLJXVcRzCFbPwOoMkPKJ8-aYdPN) of [Rivers Have Wings](https://twitter.com/RiversHaveWings).
386
- - A [video](https://www.youtube.com/watch?v=90QDe6DQXF4&t=12s) explanation by [Dot CSV](https://www.youtube.com/channel/UCy5znSnfMsDwaLlROnZ7Qbg) (in Spanish, but English subtitles are available)
387
-
388
- ![txt2img](assets/birddrawnbyachild.png)
389
-
390
- Text prompt: *'A bird drawn by a child'*
391
-
392
- ## Shout-outs
393
- Thanks to everyone who makes their code and models available. In particular,
394
-
395
- - The architecture of our VQGAN is inspired by [Denoising Diffusion Probabilistic Models](https://github.com/hojonathanho/diffusion)
396
- - The very hackable transformer implementation [minGPT](https://github.com/karpathy/minGPT)
397
- - The good ol' [PatchGAN](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix) and [Learned Perceptual Similarity (LPIPS)](https://github.com/richzhang/PerceptualSimilarity)
398
-
399
- ## BibTeX
400
-
401
- ```
402
- @misc{esser2020taming,
403
- title={Taming Transformers for High-Resolution Image Synthesis},
404
- author={Patrick Esser and Robin Rombach and Björn Ommer},
405
- year={2020},
406
- eprint={2012.09841},
407
- archivePrefix={arXiv},
408
- primaryClass={cs.CV}
409
- }
410
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
taming-transformers/assets/birddrawnbyachild.png DELETED

Git LFS Details

  • SHA256: 165778bb85e86f8aaaed38eee4d33f62ab1ef237d890229cfa2e0685f5064127
  • Pointer size: 132 Bytes
  • Size of remote file: 1.61 MB
taming-transformers/assets/coco_scene_images_training.svg DELETED
taming-transformers/assets/drin.jpg DELETED

Git LFS Details

  • SHA256: 83652380049c45af8c1b75216ded141b3d064cca8154eb2875337b4d5182152b
  • Pointer size: 131 Bytes
  • Size of remote file: 286 kB
taming-transformers/assets/faceshq.jpg DELETED

Git LFS Details

  • SHA256: 6f20c66b935086464db0bad4b5dd90fadb3fb1d20373cb02c415ec4a9cfb989c
  • Pointer size: 131 Bytes
  • Size of remote file: 307 kB
taming-transformers/assets/first_stage_mushrooms.png DELETED

Git LFS Details

  • SHA256: 425218621d5e01ea30c9e51fa0969ad36c22063a405dc6f6ccb6dd8db64000a0
  • Pointer size: 132 Bytes
  • Size of remote file: 1.35 MB
taming-transformers/assets/first_stage_squirrels.png DELETED

Git LFS Details

  • SHA256: b5f234ee1566d6c537339a7110a1a1df088d527812097c19ac61f01b335cd6ae
  • Pointer size: 132 Bytes
  • Size of remote file: 1.42 MB
taming-transformers/assets/imagenet.png DELETED

Git LFS Details

  • SHA256: 2057d65399435ba17f265ad7ff421a9aabfb6051dec00bec5a37383dfccb2e54
  • Pointer size: 132 Bytes
  • Size of remote file: 1.03 MB
taming-transformers/assets/lake_in_the_mountains.png DELETED

Git LFS Details

  • SHA256: 9d0fa79e39e09c1eb398b1643cf3c5ee2cc94cc6f394771d20cb907838b36852
  • Pointer size: 131 Bytes
  • Size of remote file: 565 kB
taming-transformers/assets/mountain.jpeg DELETED

Git LFS Details

  • SHA256: 22859310b39f5011abc78e36970fdb0f3d62a33817d9301bde3d1252a11bc0bc
  • Pointer size: 131 Bytes
  • Size of remote file: 436 kB
taming-transformers/assets/scene_images_samples.svg DELETED
taming-transformers/assets/stormy.jpeg DELETED

Git LFS Details

  • SHA256: 13b9cde8e62c3fb145c4dd3d13c0d450e023f2405824f0a74b4e3f06411ce884
  • Pointer size: 131 Bytes
  • Size of remote file: 718 kB
taming-transformers/assets/sunset_and_ocean.jpg DELETED

Git LFS Details

  • SHA256: 0c967b3073a56221eda2cc5418efb8535a85d87f4b40cd487d42abae8135b341
  • Pointer size: 131 Bytes
  • Size of remote file: 322 kB
taming-transformers/assets/teaser.png DELETED

Git LFS Details

  • SHA256: 988481993d7911b41b38a86341e016a47729807552ce667f5713bca1118a7b11
  • Pointer size: 131 Bytes
  • Size of remote file: 359 kB
taming-transformers/configs/coco_cond_stage.yaml DELETED
@@ -1,49 +0,0 @@
1
- model:
2
- base_learning_rate: 4.5e-06
3
- target: taming.models.vqgan.VQSegmentationModel
4
- params:
5
- embed_dim: 256
6
- n_embed: 1024
7
- image_key: "segmentation"
8
- n_labels: 183
9
- ddconfig:
10
- double_z: false
11
- z_channels: 256
12
- resolution: 256
13
- in_channels: 183
14
- out_ch: 183
15
- ch: 128
16
- ch_mult:
17
- - 1
18
- - 1
19
- - 2
20
- - 2
21
- - 4
22
- num_res_blocks: 2
23
- attn_resolutions:
24
- - 16
25
- dropout: 0.0
26
-
27
- lossconfig:
28
- target: taming.modules.losses.segmentation.BCELossWithQuant
29
- params:
30
- codebook_weight: 1.0
31
-
32
- data:
33
- target: main.DataModuleFromConfig
34
- params:
35
- batch_size: 12
36
- train:
37
- target: taming.data.coco.CocoImagesAndCaptionsTrain
38
- params:
39
- size: 296
40
- crop_size: 256
41
- onehot_segmentation: true
42
- use_stuffthing: true
43
- validation:
44
- target: taming.data.coco.CocoImagesAndCaptionsValidation
45
- params:
46
- size: 256
47
- crop_size: 256
48
- onehot_segmentation: true
49
- use_stuffthing: true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
taming-transformers/configs/coco_scene_images_transformer.yaml DELETED
@@ -1,80 +0,0 @@
1
- model:
2
- base_learning_rate: 4.5e-06
3
- target: taming.models.cond_transformer.Net2NetTransformer
4
- params:
5
- cond_stage_key: objects_bbox
6
- transformer_config:
7
- target: taming.modules.transformer.mingpt.GPT
8
- params:
9
- vocab_size: 8192
10
- block_size: 348 # = 256 + 92 = dim(vqgan_latent_space,16x16) + dim(conditional_builder.embedding_dim)
11
- n_layer: 40
12
- n_head: 16
13
- n_embd: 1408
14
- embd_pdrop: 0.1
15
- resid_pdrop: 0.1
16
- attn_pdrop: 0.1
17
- first_stage_config:
18
- target: taming.models.vqgan.VQModel
19
- params:
20
- ckpt_path: /path/to/coco_epoch117.ckpt # https://heibox.uni-heidelberg.de/f/78dea9589974474c97c1/
21
- embed_dim: 256
22
- n_embed: 8192
23
- ddconfig:
24
- double_z: false
25
- z_channels: 256
26
- resolution: 256
27
- in_channels: 3
28
- out_ch: 3
29
- ch: 128
30
- ch_mult:
31
- - 1
32
- - 1
33
- - 2
34
- - 2
35
- - 4
36
- num_res_blocks: 2
37
- attn_resolutions:
38
- - 16
39
- dropout: 0.0
40
- lossconfig:
41
- target: taming.modules.losses.DummyLoss
42
- cond_stage_config:
43
- target: taming.models.dummy_cond_stage.DummyCondStage
44
- params:
45
- conditional_key: objects_bbox
46
-
47
- data:
48
- target: main.DataModuleFromConfig
49
- params:
50
- batch_size: 6
51
- train:
52
- target: taming.data.annotated_objects_coco.AnnotatedObjectsCoco
53
- params:
54
- data_path: data/coco_annotations_100 # substitute with path to full dataset
55
- split: train
56
- keys: [image, objects_bbox, file_name, annotations]
57
- no_tokens: 8192
58
- target_image_size: 256
59
- min_object_area: 0.00001
60
- min_objects_per_image: 2
61
- max_objects_per_image: 30
62
- crop_method: random-1d
63
- random_flip: true
64
- use_group_parameter: true
65
- encode_crop: true
66
- validation:
67
- target: taming.data.annotated_objects_coco.AnnotatedObjectsCoco
68
- params:
69
- data_path: data/coco_annotations_100 # substitute with path to full dataset
70
- split: validation
71
- keys: [image, objects_bbox, file_name, annotations]
72
- no_tokens: 8192
73
- target_image_size: 256
74
- min_object_area: 0.00001
75
- min_objects_per_image: 2
76
- max_objects_per_image: 30
77
- crop_method: center
78
- random_flip: false
79
- use_group_parameter: true
80
- encode_crop: true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
taming-transformers/configs/custom_vqgan.yaml DELETED
@@ -1,43 +0,0 @@
1
- model:
2
- base_learning_rate: 4.5e-6
3
- target: taming.models.vqgan.VQModel
4
- params:
5
- embed_dim: 256
6
- n_embed: 1024
7
- ddconfig:
8
- double_z: False
9
- z_channels: 256
10
- resolution: 256
11
- in_channels: 3
12
- out_ch: 3
13
- ch: 128
14
- ch_mult: [ 1,1,2,2,4] # num_down = len(ch_mult)-1
15
- num_res_blocks: 2
16
- attn_resolutions: [16]
17
- dropout: 0.0
18
-
19
- lossconfig:
20
- target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
21
- params:
22
- disc_conditional: False
23
- disc_in_channels: 3
24
- disc_start: 10000
25
- disc_weight: 0.8
26
- codebook_weight: 1.0
27
-
28
- data:
29
- target: main.DataModuleFromConfig
30
- params:
31
- batch_size: 5
32
- num_workers: 8
33
- train:
34
- target: taming.data.custom.CustomTrain
35
- params:
36
- training_images_list_file: some/training.txt
37
- size: 256
38
- validation:
39
- target: taming.data.custom.CustomTest
40
- params:
41
- test_images_list_file: some/test.txt
42
- size: 256
43
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
taming-transformers/configs/drin_transformer.yaml DELETED
@@ -1,77 +0,0 @@
1
- model:
2
- base_learning_rate: 4.5e-06
3
- target: taming.models.cond_transformer.Net2NetTransformer
4
- params:
5
- cond_stage_key: depth
6
- transformer_config:
7
- target: taming.modules.transformer.mingpt.GPT
8
- params:
9
- vocab_size: 1024
10
- block_size: 512
11
- n_layer: 24
12
- n_head: 16
13
- n_embd: 1024
14
- first_stage_config:
15
- target: taming.models.vqgan.VQModel
16
- params:
17
- ckpt_path: logs/2020-09-23T17-56-33_imagenet_vqgan/checkpoints/last.ckpt
18
- embed_dim: 256
19
- n_embed: 1024
20
- ddconfig:
21
- double_z: false
22
- z_channels: 256
23
- resolution: 256
24
- in_channels: 3
25
- out_ch: 3
26
- ch: 128
27
- ch_mult:
28
- - 1
29
- - 1
30
- - 2
31
- - 2
32
- - 4
33
- num_res_blocks: 2
34
- attn_resolutions:
35
- - 16
36
- dropout: 0.0
37
- lossconfig:
38
- target: taming.modules.losses.DummyLoss
39
- cond_stage_config:
40
- target: taming.models.vqgan.VQModel
41
- params:
42
- ckpt_path: logs/2020-11-03T15-34-24_imagenetdepth_vqgan/checkpoints/last.ckpt
43
- embed_dim: 256
44
- n_embed: 1024
45
- ddconfig:
46
- double_z: false
47
- z_channels: 256
48
- resolution: 256
49
- in_channels: 1
50
- out_ch: 1
51
- ch: 128
52
- ch_mult:
53
- - 1
54
- - 1
55
- - 2
56
- - 2
57
- - 4
58
- num_res_blocks: 2
59
- attn_resolutions:
60
- - 16
61
- dropout: 0.0
62
- lossconfig:
63
- target: taming.modules.losses.DummyLoss
64
-
65
- data:
66
- target: main.DataModuleFromConfig
67
- params:
68
- batch_size: 2
69
- num_workers: 8
70
- train:
71
- target: taming.data.imagenet.RINTrainWithDepth
72
- params:
73
- size: 256
74
- validation:
75
- target: taming.data.imagenet.RINValidationWithDepth
76
- params:
77
- size: 256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
taming-transformers/configs/faceshq_transformer.yaml DELETED
@@ -1,61 +0,0 @@
1
- model:
2
- base_learning_rate: 4.5e-06
3
- target: taming.models.cond_transformer.Net2NetTransformer
4
- params:
5
- cond_stage_key: coord
6
- transformer_config:
7
- target: taming.modules.transformer.mingpt.GPT
8
- params:
9
- vocab_size: 1024
10
- block_size: 512
11
- n_layer: 24
12
- n_head: 16
13
- n_embd: 1024
14
- first_stage_config:
15
- target: taming.models.vqgan.VQModel
16
- params:
17
- ckpt_path: logs/2020-11-09T13-33-36_faceshq_vqgan/checkpoints/last.ckpt
18
- embed_dim: 256
19
- n_embed: 1024
20
- ddconfig:
21
- double_z: false
22
- z_channels: 256
23
- resolution: 256
24
- in_channels: 3
25
- out_ch: 3
26
- ch: 128
27
- ch_mult:
28
- - 1
29
- - 1
30
- - 2
31
- - 2
32
- - 4
33
- num_res_blocks: 2
34
- attn_resolutions:
35
- - 16
36
- dropout: 0.0
37
- lossconfig:
38
- target: taming.modules.losses.DummyLoss
39
- cond_stage_config:
40
- target: taming.modules.misc.coord.CoordStage
41
- params:
42
- n_embed: 1024
43
- down_factor: 16
44
-
45
- data:
46
- target: main.DataModuleFromConfig
47
- params:
48
- batch_size: 2
49
- num_workers: 8
50
- train:
51
- target: taming.data.faceshq.FacesHQTrain
52
- params:
53
- size: 256
54
- crop_size: 256
55
- coord: True
56
- validation:
57
- target: taming.data.faceshq.FacesHQValidation
58
- params:
59
- size: 256
60
- crop_size: 256
61
- coord: True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
taming-transformers/configs/faceshq_vqgan.yaml DELETED
@@ -1,42 +0,0 @@
1
- model:
2
- base_learning_rate: 4.5e-6
3
- target: taming.models.vqgan.VQModel
4
- params:
5
- embed_dim: 256
6
- n_embed: 1024
7
- ddconfig:
8
- double_z: False
9
- z_channels: 256
10
- resolution: 256
11
- in_channels: 3
12
- out_ch: 3
13
- ch: 128
14
- ch_mult: [ 1,1,2,2,4] # num_down = len(ch_mult)-1
15
- num_res_blocks: 2
16
- attn_resolutions: [16]
17
- dropout: 0.0
18
-
19
- lossconfig:
20
- target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
21
- params:
22
- disc_conditional: False
23
- disc_in_channels: 3
24
- disc_start: 30001
25
- disc_weight: 0.8
26
- codebook_weight: 1.0
27
-
28
- data:
29
- target: main.DataModuleFromConfig
30
- params:
31
- batch_size: 3
32
- num_workers: 8
33
- train:
34
- target: taming.data.faceshq.FacesHQTrain
35
- params:
36
- size: 256
37
- crop_size: 256
38
- validation:
39
- target: taming.data.faceshq.FacesHQValidation
40
- params:
41
- size: 256
42
- crop_size: 256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
taming-transformers/configs/imagenet_vqgan.yaml DELETED
@@ -1,42 +0,0 @@
1
- model:
2
- base_learning_rate: 4.5e-6
3
- target: taming.models.vqgan.VQModel
4
- params:
5
- embed_dim: 256
6
- n_embed: 1024
7
- ddconfig:
8
- double_z: False
9
- z_channels: 256
10
- resolution: 256
11
- in_channels: 3
12
- out_ch: 3
13
- ch: 128
14
- ch_mult: [ 1,1,2,2,4] # num_down = len(ch_mult)-1
15
- num_res_blocks: 2
16
- attn_resolutions: [16]
17
- dropout: 0.0
18
-
19
- lossconfig:
20
- target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
21
- params:
22
- disc_conditional: False
23
- disc_in_channels: 3
24
- disc_start: 250001
25
- disc_weight: 0.8
26
- codebook_weight: 1.0
27
-
28
- data:
29
- target: main.DataModuleFromConfig
30
- params:
31
- batch_size: 12
32
- num_workers: 24
33
- train:
34
- target: taming.data.imagenet.ImageNetTrain
35
- params:
36
- config:
37
- size: 256
38
- validation:
39
- target: taming.data.imagenet.ImageNetValidation
40
- params:
41
- config:
42
- size: 256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
taming-transformers/configs/imagenetdepth_vqgan.yaml DELETED
@@ -1,41 +0,0 @@
1
- model:
2
- base_learning_rate: 4.5e-6
3
- target: taming.models.vqgan.VQModel
4
- params:
5
- embed_dim: 256
6
- n_embed: 1024
7
- image_key: depth
8
- ddconfig:
9
- double_z: False
10
- z_channels: 256
11
- resolution: 256
12
- in_channels: 1
13
- out_ch: 1
14
- ch: 128
15
- ch_mult: [ 1,1,2,2,4] # num_down = len(ch_mult)-1
16
- num_res_blocks: 2
17
- attn_resolutions: [16]
18
- dropout: 0.0
19
-
20
- lossconfig:
21
- target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
22
- params:
23
- disc_conditional: False
24
- disc_in_channels: 1
25
- disc_start: 50001
26
- disc_weight: 0.75
27
- codebook_weight: 1.0
28
-
29
- data:
30
- target: main.DataModuleFromConfig
31
- params:
32
- batch_size: 3
33
- num_workers: 8
34
- train:
35
- target: taming.data.imagenet.ImageNetTrainWithDepth
36
- params:
37
- size: 256
38
- validation:
39
- target: taming.data.imagenet.ImageNetValidationWithDepth
40
- params:
41
- size: 256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
taming-transformers/configs/open_images_scene_images_transformer.yaml DELETED
@@ -1,86 +0,0 @@
1
- model:
2
- base_learning_rate: 4.5e-06
3
- target: taming.models.cond_transformer.Net2NetTransformer
4
- params:
5
- cond_stage_key: objects_bbox
6
- transformer_config:
7
- target: taming.modules.transformer.mingpt.GPT
8
- params:
9
- vocab_size: 8192
10
- block_size: 348 # = 256 + 92 = dim(vqgan_latent_space,16x16) + dim(conditional_builder.embedding_dim)
11
- n_layer: 36
12
- n_head: 16
13
- n_embd: 1536
14
- embd_pdrop: 0.1
15
- resid_pdrop: 0.1
16
- attn_pdrop: 0.1
17
- first_stage_config:
18
- target: taming.models.vqgan.VQModel
19
- params:
20
- ckpt_path: /path/to/coco_oi_epoch12.ckpt # https://heibox.uni-heidelberg.de/f/461d9a9f4fcf48ab84f4/
21
- embed_dim: 256
22
- n_embed: 8192
23
- ddconfig:
24
- double_z: false
25
- z_channels: 256
26
- resolution: 256
27
- in_channels: 3
28
- out_ch: 3
29
- ch: 128
30
- ch_mult:
31
- - 1
32
- - 1
33
- - 2
34
- - 2
35
- - 4
36
- num_res_blocks: 2
37
- attn_resolutions:
38
- - 16
39
- dropout: 0.0
40
- lossconfig:
41
- target: taming.modules.losses.DummyLoss
42
- cond_stage_config:
43
- target: taming.models.dummy_cond_stage.DummyCondStage
44
- params:
45
- conditional_key: objects_bbox
46
-
47
- data:
48
- target: main.DataModuleFromConfig
49
- params:
50
- batch_size: 6
51
- train:
52
- target: taming.data.annotated_objects_open_images.AnnotatedObjectsOpenImages
53
- params:
54
- data_path: data/open_images_annotations_100 # substitute with path to full dataset
55
- split: train
56
- keys: [image, objects_bbox, file_name, annotations]
57
- no_tokens: 8192
58
- target_image_size: 256
59
- category_allow_list_target: taming.data.open_images_helper.top_300_classes_plus_coco_compatibility
60
- category_mapping_target: taming.data.open_images_helper.open_images_unify_categories_for_coco
61
- min_object_area: 0.0001
62
- min_objects_per_image: 2
63
- max_objects_per_image: 30
64
- crop_method: random-2d
65
- random_flip: true
66
- use_group_parameter: true
67
- use_additional_parameters: true
68
- encode_crop: true
69
- validation:
70
- target: taming.data.annotated_objects_open_images.AnnotatedObjectsOpenImages
71
- params:
72
- data_path: data/open_images_annotations_100 # substitute with path to full dataset
73
- split: validation
74
- keys: [image, objects_bbox, file_name, annotations]
75
- no_tokens: 8192
76
- target_image_size: 256
77
- category_allow_list_target: taming.data.open_images_helper.top_300_classes_plus_coco_compatibility
78
- category_mapping_target: taming.data.open_images_helper.open_images_unify_categories_for_coco
79
- min_object_area: 0.0001
80
- min_objects_per_image: 2
81
- max_objects_per_image: 30
82
- crop_method: center
83
- random_flip: false
84
- use_group_parameter: true
85
- use_additional_parameters: true
86
- encode_crop: true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
taming-transformers/configs/sflckr_cond_stage.yaml DELETED
@@ -1,43 +0,0 @@
1
- model:
2
- base_learning_rate: 4.5e-06
3
- target: taming.models.vqgan.VQSegmentationModel
4
- params:
5
- embed_dim: 256
6
- n_embed: 1024
7
- image_key: "segmentation"
8
- n_labels: 182
9
- ddconfig:
10
- double_z: false
11
- z_channels: 256
12
- resolution: 256
13
- in_channels: 182
14
- out_ch: 182
15
- ch: 128
16
- ch_mult:
17
- - 1
18
- - 1
19
- - 2
20
- - 2
21
- - 4
22
- num_res_blocks: 2
23
- attn_resolutions:
24
- - 16
25
- dropout: 0.0
26
-
27
- lossconfig:
28
- target: taming.modules.losses.segmentation.BCELossWithQuant
29
- params:
30
- codebook_weight: 1.0
31
-
32
- data:
33
- target: cutlit.DataModuleFromConfig
34
- params:
35
- batch_size: 12
36
- train:
37
- target: taming.data.sflckr.Examples # adjust
38
- params:
39
- size: 256
40
- validation:
41
- target: taming.data.sflckr.Examples # adjust
42
- params:
43
- size: 256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
taming-transformers/data/ade20k_examples.txt DELETED
@@ -1,30 +0,0 @@
1
- ADE_val_00000636.jpg
2
- ADE_val_00000126.jpg
3
- ADE_val_00001412.jpg
4
- ADE_val_00001845.jpg
5
- ADE_val_00001200.jpg
6
- ADE_val_00001578.jpg
7
- ADE_val_00000880.jpg
8
- ADE_val_00000875.jpg
9
- ADE_val_00000123.jpg
10
- ADE_val_00001209.jpg
11
- ADE_val_00000203.jpg
12
- ADE_val_00001851.jpg
13
- ADE_val_00001583.jpg
14
- ADE_val_00000287.jpg
15
- ADE_val_00001947.jpg
16
- ADE_val_00000262.jpg
17
- ADE_val_00000603.jpg
18
- ADE_val_00000125.jpg
19
- ADE_val_00001698.jpg
20
- ADE_val_00001966.jpg
21
- ADE_val_00000532.jpg
22
- ADE_val_00001177.jpg
23
- ADE_val_00000734.jpg
24
- ADE_val_00001498.jpg
25
- ADE_val_00001766.jpg
26
- ADE_val_00000303.jpg
27
- ADE_val_00000509.jpg
28
- ADE_val_00000573.jpg
29
- ADE_val_00000289.jpg
30
- ADE_val_00001388.jpg
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
taming-transformers/data/ade20k_images/ADE_val_00000123.jpg DELETED
Binary file (8.73 kB)
 
taming-transformers/data/ade20k_images/ADE_val_00000125.jpg DELETED
Binary file (41.2 kB)
 
taming-transformers/data/ade20k_images/ADE_val_00000126.jpg DELETED
Binary file (60.6 kB)
 
taming-transformers/data/ade20k_images/ADE_val_00000203.jpg DELETED
Binary file (16 kB)
 
taming-transformers/data/ade20k_images/ADE_val_00000262.jpg DELETED
Binary file (12.7 kB)
 
taming-transformers/data/ade20k_images/ADE_val_00000287.jpg DELETED
Binary file (51.6 kB)
 
taming-transformers/data/ade20k_images/ADE_val_00000289.jpg DELETED
Binary file (13.6 kB)
 
taming-transformers/data/ade20k_images/ADE_val_00000303.jpg DELETED
Binary file (45.2 kB)
 
taming-transformers/data/ade20k_images/ADE_val_00000509.jpg DELETED
Binary file (68.3 kB)
 
taming-transformers/data/ade20k_images/ADE_val_00000532.jpg DELETED
Binary file (52.2 kB)
 
taming-transformers/data/ade20k_images/ADE_val_00000573.jpg DELETED
Binary file (39.6 kB)
 
taming-transformers/data/ade20k_images/ADE_val_00000603.jpg DELETED
Binary file (17.5 kB)
 
taming-transformers/data/ade20k_images/ADE_val_00000636.jpg DELETED
Binary file (48.5 kB)
 
taming-transformers/data/ade20k_images/ADE_val_00000734.jpg DELETED
Binary file (17.9 kB)
 
taming-transformers/data/ade20k_images/ADE_val_00000875.jpg DELETED
Binary file (40.3 kB)
 
taming-transformers/data/ade20k_images/ADE_val_00000880.jpg DELETED
Binary file (53.7 kB)
 
taming-transformers/data/ade20k_images/ADE_val_00001177.jpg DELETED
Binary file (22.7 kB)
 
taming-transformers/data/ade20k_images/ADE_val_00001200.jpg DELETED
Binary file (74.7 kB)
 
taming-transformers/data/ade20k_images/ADE_val_00001209.jpg DELETED
Binary file (16.6 kB)
 
taming-transformers/data/ade20k_images/ADE_val_00001388.jpg DELETED
Binary file (55.1 kB)