tungmtp commited on
Commit
bbfbbcc
·
verified ·
1 Parent(s): 4de9a94

Delete Florence-2-Flux-Large

Browse files
Florence-2-Flux-Large/.gitattributes DELETED
@@ -1,35 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Florence-2-Flux-Large/README.md DELETED
@@ -1,59 +0,0 @@
1
- ---
2
- license: apache-2.0
3
- language:
4
- - en
5
- library_name: transformers
6
- pipeline_tag: image-text-to-text
7
- tags:
8
- - art
9
- base_model: microsoft/Florence-2-large
10
- datasets:
11
- - kadirnar/fluxdev_controlnet_16k
12
- ---
13
-
14
- ```
15
- pip install -q datasets flash_attn timm einops
16
- ```
17
-
18
- ```python
19
-
20
- from transformers import AutoModelForCausalLM, AutoProcessor, AutoConfig
21
- import torch
22
-
23
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
24
-
25
- model = AutoModelForCausalLM.from_pretrained("gokaygokay/Florence-2-Flux-Large", trust_remote_code=True).to(device).eval()
26
- processor = AutoProcessor.from_pretrained("gokaygokay/Florence-2-Flux-Large", trust_remote_code=True)
27
-
28
- # Function to run the model on an example
29
- def run_example(task_prompt, text_input, image):
30
- prompt = task_prompt + text_input
31
-
32
- # Ensure the image is in RGB mode
33
- if image.mode != "RGB":
34
- image = image.convert("RGB")
35
-
36
- inputs = processor(text=prompt, images=image, return_tensors="pt").to(device)
37
- generated_ids = model.generate(
38
- input_ids=inputs["input_ids"],
39
- pixel_values=inputs["pixel_values"],
40
- max_new_tokens=1024,
41
- num_beams=3,
42
- repetition_penalty=1.10,
43
- )
44
- generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
45
- parsed_answer = processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
46
- return parsed_answer
47
-
48
- from PIL import Image
49
- import requests
50
- import copy
51
-
52
- url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true"
53
- image = Image.open(requests.get(url, stream=True).raw)
54
- answer = run_example("<DESCRIPTION>", "Describe this image in great detail.", image)
55
-
56
- final_answer = answer["<DESCRIPTION>"]
57
- print(final_answer)
58
-
59
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Florence-2-Flux-Large/added_tokens.json DELETED
@@ -1,1026 +0,0 @@
1
- {
2
- "</cap>": 51270,
3
- "</dcap>": 51274,
4
- "</grounding>": 51276,
5
- "</ncap>": 51272,
6
- "</ocr>": 50268,
7
- "</od>": 50266,
8
- "</poly>": 51287,
9
- "</proposal>": 51285,
10
- "</region_cap>": 51281,
11
- "</region_to_desciption>": 51283,
12
- "</seg>": 51278,
13
- "<and>": 51288,
14
- "<cap>": 51269,
15
- "<dcap>": 51273,
16
- "<grounding>": 51275,
17
- "<loc_0>": 50269,
18
- "<loc_100>": 50369,
19
- "<loc_101>": 50370,
20
- "<loc_102>": 50371,
21
- "<loc_103>": 50372,
22
- "<loc_104>": 50373,
23
- "<loc_105>": 50374,
24
- "<loc_106>": 50375,
25
- "<loc_107>": 50376,
26
- "<loc_108>": 50377,
27
- "<loc_109>": 50378,
28
- "<loc_10>": 50279,
29
- "<loc_110>": 50379,
30
- "<loc_111>": 50380,
31
- "<loc_112>": 50381,
32
- "<loc_113>": 50382,
33
- "<loc_114>": 50383,
34
- "<loc_115>": 50384,
35
- "<loc_116>": 50385,
36
- "<loc_117>": 50386,
37
- "<loc_118>": 50387,
38
- "<loc_119>": 50388,
39
- "<loc_11>": 50280,
40
- "<loc_120>": 50389,
41
- "<loc_121>": 50390,
42
- "<loc_122>": 50391,
43
- "<loc_123>": 50392,
44
- "<loc_124>": 50393,
45
- "<loc_125>": 50394,
46
- "<loc_126>": 50395,
47
- "<loc_127>": 50396,
48
- "<loc_128>": 50397,
49
- "<loc_129>": 50398,
50
- "<loc_12>": 50281,
51
- "<loc_130>": 50399,
52
- "<loc_131>": 50400,
53
- "<loc_132>": 50401,
54
- "<loc_133>": 50402,
55
- "<loc_134>": 50403,
56
- "<loc_135>": 50404,
57
- "<loc_136>": 50405,
58
- "<loc_137>": 50406,
59
- "<loc_138>": 50407,
60
- "<loc_139>": 50408,
61
- "<loc_13>": 50282,
62
- "<loc_140>": 50409,
63
- "<loc_141>": 50410,
64
- "<loc_142>": 50411,
65
- "<loc_143>": 50412,
66
- "<loc_144>": 50413,
67
- "<loc_145>": 50414,
68
- "<loc_146>": 50415,
69
- "<loc_147>": 50416,
70
- "<loc_148>": 50417,
71
- "<loc_149>": 50418,
72
- "<loc_14>": 50283,
73
- "<loc_150>": 50419,
74
- "<loc_151>": 50420,
75
- "<loc_152>": 50421,
76
- "<loc_153>": 50422,
77
- "<loc_154>": 50423,
78
- "<loc_155>": 50424,
79
- "<loc_156>": 50425,
80
- "<loc_157>": 50426,
81
- "<loc_158>": 50427,
82
- "<loc_159>": 50428,
83
- "<loc_15>": 50284,
84
- "<loc_160>": 50429,
85
- "<loc_161>": 50430,
86
- "<loc_162>": 50431,
87
- "<loc_163>": 50432,
88
- "<loc_164>": 50433,
89
- "<loc_165>": 50434,
90
- "<loc_166>": 50435,
91
- "<loc_167>": 50436,
92
- "<loc_168>": 50437,
93
- "<loc_169>": 50438,
94
- "<loc_16>": 50285,
95
- "<loc_170>": 50439,
96
- "<loc_171>": 50440,
97
- "<loc_172>": 50441,
98
- "<loc_173>": 50442,
99
- "<loc_174>": 50443,
100
- "<loc_175>": 50444,
101
- "<loc_176>": 50445,
102
- "<loc_177>": 50446,
103
- "<loc_178>": 50447,
104
- "<loc_179>": 50448,
105
- "<loc_17>": 50286,
106
- "<loc_180>": 50449,
107
- "<loc_181>": 50450,
108
- "<loc_182>": 50451,
109
- "<loc_183>": 50452,
110
- "<loc_184>": 50453,
111
- "<loc_185>": 50454,
112
- "<loc_186>": 50455,
113
- "<loc_187>": 50456,
114
- "<loc_188>": 50457,
115
- "<loc_189>": 50458,
116
- "<loc_18>": 50287,
117
- "<loc_190>": 50459,
118
- "<loc_191>": 50460,
119
- "<loc_192>": 50461,
120
- "<loc_193>": 50462,
121
- "<loc_194>": 50463,
122
- "<loc_195>": 50464,
123
- "<loc_196>": 50465,
124
- "<loc_197>": 50466,
125
- "<loc_198>": 50467,
126
- "<loc_199>": 50468,
127
- "<loc_19>": 50288,
128
- "<loc_1>": 50270,
129
- "<loc_200>": 50469,
130
- "<loc_201>": 50470,
131
- "<loc_202>": 50471,
132
- "<loc_203>": 50472,
133
- "<loc_204>": 50473,
134
- "<loc_205>": 50474,
135
- "<loc_206>": 50475,
136
- "<loc_207>": 50476,
137
- "<loc_208>": 50477,
138
- "<loc_209>": 50478,
139
- "<loc_20>": 50289,
140
- "<loc_210>": 50479,
141
- "<loc_211>": 50480,
142
- "<loc_212>": 50481,
143
- "<loc_213>": 50482,
144
- "<loc_214>": 50483,
145
- "<loc_215>": 50484,
146
- "<loc_216>": 50485,
147
- "<loc_217>": 50486,
148
- "<loc_218>": 50487,
149
- "<loc_219>": 50488,
150
- "<loc_21>": 50290,
151
- "<loc_220>": 50489,
152
- "<loc_221>": 50490,
153
- "<loc_222>": 50491,
154
- "<loc_223>": 50492,
155
- "<loc_224>": 50493,
156
- "<loc_225>": 50494,
157
- "<loc_226>": 50495,
158
- "<loc_227>": 50496,
159
- "<loc_228>": 50497,
160
- "<loc_229>": 50498,
161
- "<loc_22>": 50291,
162
- "<loc_230>": 50499,
163
- "<loc_231>": 50500,
164
- "<loc_232>": 50501,
165
- "<loc_233>": 50502,
166
- "<loc_234>": 50503,
167
- "<loc_235>": 50504,
168
- "<loc_236>": 50505,
169
- "<loc_237>": 50506,
170
- "<loc_238>": 50507,
171
- "<loc_239>": 50508,
172
- "<loc_23>": 50292,
173
- "<loc_240>": 50509,
174
- "<loc_241>": 50510,
175
- "<loc_242>": 50511,
176
- "<loc_243>": 50512,
177
- "<loc_244>": 50513,
178
- "<loc_245>": 50514,
179
- "<loc_246>": 50515,
180
- "<loc_247>": 50516,
181
- "<loc_248>": 50517,
182
- "<loc_249>": 50518,
183
- "<loc_24>": 50293,
184
- "<loc_250>": 50519,
185
- "<loc_251>": 50520,
186
- "<loc_252>": 50521,
187
- "<loc_253>": 50522,
188
- "<loc_254>": 50523,
189
- "<loc_255>": 50524,
190
- "<loc_256>": 50525,
191
- "<loc_257>": 50526,
192
- "<loc_258>": 50527,
193
- "<loc_259>": 50528,
194
- "<loc_25>": 50294,
195
- "<loc_260>": 50529,
196
- "<loc_261>": 50530,
197
- "<loc_262>": 50531,
198
- "<loc_263>": 50532,
199
- "<loc_264>": 50533,
200
- "<loc_265>": 50534,
201
- "<loc_266>": 50535,
202
- "<loc_267>": 50536,
203
- "<loc_268>": 50537,
204
- "<loc_269>": 50538,
205
- "<loc_26>": 50295,
206
- "<loc_270>": 50539,
207
- "<loc_271>": 50540,
208
- "<loc_272>": 50541,
209
- "<loc_273>": 50542,
210
- "<loc_274>": 50543,
211
- "<loc_275>": 50544,
212
- "<loc_276>": 50545,
213
- "<loc_277>": 50546,
214
- "<loc_278>": 50547,
215
- "<loc_279>": 50548,
216
- "<loc_27>": 50296,
217
- "<loc_280>": 50549,
218
- "<loc_281>": 50550,
219
- "<loc_282>": 50551,
220
- "<loc_283>": 50552,
221
- "<loc_284>": 50553,
222
- "<loc_285>": 50554,
223
- "<loc_286>": 50555,
224
- "<loc_287>": 50556,
225
- "<loc_288>": 50557,
226
- "<loc_289>": 50558,
227
- "<loc_28>": 50297,
228
- "<loc_290>": 50559,
229
- "<loc_291>": 50560,
230
- "<loc_292>": 50561,
231
- "<loc_293>": 50562,
232
- "<loc_294>": 50563,
233
- "<loc_295>": 50564,
234
- "<loc_296>": 50565,
235
- "<loc_297>": 50566,
236
- "<loc_298>": 50567,
237
- "<loc_299>": 50568,
238
- "<loc_29>": 50298,
239
- "<loc_2>": 50271,
240
- "<loc_300>": 50569,
241
- "<loc_301>": 50570,
242
- "<loc_302>": 50571,
243
- "<loc_303>": 50572,
244
- "<loc_304>": 50573,
245
- "<loc_305>": 50574,
246
- "<loc_306>": 50575,
247
- "<loc_307>": 50576,
248
- "<loc_308>": 50577,
249
- "<loc_309>": 50578,
250
- "<loc_30>": 50299,
251
- "<loc_310>": 50579,
252
- "<loc_311>": 50580,
253
- "<loc_312>": 50581,
254
- "<loc_313>": 50582,
255
- "<loc_314>": 50583,
256
- "<loc_315>": 50584,
257
- "<loc_316>": 50585,
258
- "<loc_317>": 50586,
259
- "<loc_318>": 50587,
260
- "<loc_319>": 50588,
261
- "<loc_31>": 50300,
262
- "<loc_320>": 50589,
263
- "<loc_321>": 50590,
264
- "<loc_322>": 50591,
265
- "<loc_323>": 50592,
266
- "<loc_324>": 50593,
267
- "<loc_325>": 50594,
268
- "<loc_326>": 50595,
269
- "<loc_327>": 50596,
270
- "<loc_328>": 50597,
271
- "<loc_329>": 50598,
272
- "<loc_32>": 50301,
273
- "<loc_330>": 50599,
274
- "<loc_331>": 50600,
275
- "<loc_332>": 50601,
276
- "<loc_333>": 50602,
277
- "<loc_334>": 50603,
278
- "<loc_335>": 50604,
279
- "<loc_336>": 50605,
280
- "<loc_337>": 50606,
281
- "<loc_338>": 50607,
282
- "<loc_339>": 50608,
283
- "<loc_33>": 50302,
284
- "<loc_340>": 50609,
285
- "<loc_341>": 50610,
286
- "<loc_342>": 50611,
287
- "<loc_343>": 50612,
288
- "<loc_344>": 50613,
289
- "<loc_345>": 50614,
290
- "<loc_346>": 50615,
291
- "<loc_347>": 50616,
292
- "<loc_348>": 50617,
293
- "<loc_349>": 50618,
294
- "<loc_34>": 50303,
295
- "<loc_350>": 50619,
296
- "<loc_351>": 50620,
297
- "<loc_352>": 50621,
298
- "<loc_353>": 50622,
299
- "<loc_354>": 50623,
300
- "<loc_355>": 50624,
301
- "<loc_356>": 50625,
302
- "<loc_357>": 50626,
303
- "<loc_358>": 50627,
304
- "<loc_359>": 50628,
305
- "<loc_35>": 50304,
306
- "<loc_360>": 50629,
307
- "<loc_361>": 50630,
308
- "<loc_362>": 50631,
309
- "<loc_363>": 50632,
310
- "<loc_364>": 50633,
311
- "<loc_365>": 50634,
312
- "<loc_366>": 50635,
313
- "<loc_367>": 50636,
314
- "<loc_368>": 50637,
315
- "<loc_369>": 50638,
316
- "<loc_36>": 50305,
317
- "<loc_370>": 50639,
318
- "<loc_371>": 50640,
319
- "<loc_372>": 50641,
320
- "<loc_373>": 50642,
321
- "<loc_374>": 50643,
322
- "<loc_375>": 50644,
323
- "<loc_376>": 50645,
324
- "<loc_377>": 50646,
325
- "<loc_378>": 50647,
326
- "<loc_379>": 50648,
327
- "<loc_37>": 50306,
328
- "<loc_380>": 50649,
329
- "<loc_381>": 50650,
330
- "<loc_382>": 50651,
331
- "<loc_383>": 50652,
332
- "<loc_384>": 50653,
333
- "<loc_385>": 50654,
334
- "<loc_386>": 50655,
335
- "<loc_387>": 50656,
336
- "<loc_388>": 50657,
337
- "<loc_389>": 50658,
338
- "<loc_38>": 50307,
339
- "<loc_390>": 50659,
340
- "<loc_391>": 50660,
341
- "<loc_392>": 50661,
342
- "<loc_393>": 50662,
343
- "<loc_394>": 50663,
344
- "<loc_395>": 50664,
345
- "<loc_396>": 50665,
346
- "<loc_397>": 50666,
347
- "<loc_398>": 50667,
348
- "<loc_399>": 50668,
349
- "<loc_39>": 50308,
350
- "<loc_3>": 50272,
351
- "<loc_400>": 50669,
352
- "<loc_401>": 50670,
353
- "<loc_402>": 50671,
354
- "<loc_403>": 50672,
355
- "<loc_404>": 50673,
356
- "<loc_405>": 50674,
357
- "<loc_406>": 50675,
358
- "<loc_407>": 50676,
359
- "<loc_408>": 50677,
360
- "<loc_409>": 50678,
361
- "<loc_40>": 50309,
362
- "<loc_410>": 50679,
363
- "<loc_411>": 50680,
364
- "<loc_412>": 50681,
365
- "<loc_413>": 50682,
366
- "<loc_414>": 50683,
367
- "<loc_415>": 50684,
368
- "<loc_416>": 50685,
369
- "<loc_417>": 50686,
370
- "<loc_418>": 50687,
371
- "<loc_419>": 50688,
372
- "<loc_41>": 50310,
373
- "<loc_420>": 50689,
374
- "<loc_421>": 50690,
375
- "<loc_422>": 50691,
376
- "<loc_423>": 50692,
377
- "<loc_424>": 50693,
378
- "<loc_425>": 50694,
379
- "<loc_426>": 50695,
380
- "<loc_427>": 50696,
381
- "<loc_428>": 50697,
382
- "<loc_429>": 50698,
383
- "<loc_42>": 50311,
384
- "<loc_430>": 50699,
385
- "<loc_431>": 50700,
386
- "<loc_432>": 50701,
387
- "<loc_433>": 50702,
388
- "<loc_434>": 50703,
389
- "<loc_435>": 50704,
390
- "<loc_436>": 50705,
391
- "<loc_437>": 50706,
392
- "<loc_438>": 50707,
393
- "<loc_439>": 50708,
394
- "<loc_43>": 50312,
395
- "<loc_440>": 50709,
396
- "<loc_441>": 50710,
397
- "<loc_442>": 50711,
398
- "<loc_443>": 50712,
399
- "<loc_444>": 50713,
400
- "<loc_445>": 50714,
401
- "<loc_446>": 50715,
402
- "<loc_447>": 50716,
403
- "<loc_448>": 50717,
404
- "<loc_449>": 50718,
405
- "<loc_44>": 50313,
406
- "<loc_450>": 50719,
407
- "<loc_451>": 50720,
408
- "<loc_452>": 50721,
409
- "<loc_453>": 50722,
410
- "<loc_454>": 50723,
411
- "<loc_455>": 50724,
412
- "<loc_456>": 50725,
413
- "<loc_457>": 50726,
414
- "<loc_458>": 50727,
415
- "<loc_459>": 50728,
416
- "<loc_45>": 50314,
417
- "<loc_460>": 50729,
418
- "<loc_461>": 50730,
419
- "<loc_462>": 50731,
420
- "<loc_463>": 50732,
421
- "<loc_464>": 50733,
422
- "<loc_465>": 50734,
423
- "<loc_466>": 50735,
424
- "<loc_467>": 50736,
425
- "<loc_468>": 50737,
426
- "<loc_469>": 50738,
427
- "<loc_46>": 50315,
428
- "<loc_470>": 50739,
429
- "<loc_471>": 50740,
430
- "<loc_472>": 50741,
431
- "<loc_473>": 50742,
432
- "<loc_474>": 50743,
433
- "<loc_475>": 50744,
434
- "<loc_476>": 50745,
435
- "<loc_477>": 50746,
436
- "<loc_478>": 50747,
437
- "<loc_479>": 50748,
438
- "<loc_47>": 50316,
439
- "<loc_480>": 50749,
440
- "<loc_481>": 50750,
441
- "<loc_482>": 50751,
442
- "<loc_483>": 50752,
443
- "<loc_484>": 50753,
444
- "<loc_485>": 50754,
445
- "<loc_486>": 50755,
446
- "<loc_487>": 50756,
447
- "<loc_488>": 50757,
448
- "<loc_489>": 50758,
449
- "<loc_48>": 50317,
450
- "<loc_490>": 50759,
451
- "<loc_491>": 50760,
452
- "<loc_492>": 50761,
453
- "<loc_493>": 50762,
454
- "<loc_494>": 50763,
455
- "<loc_495>": 50764,
456
- "<loc_496>": 50765,
457
- "<loc_497>": 50766,
458
- "<loc_498>": 50767,
459
- "<loc_499>": 50768,
460
- "<loc_49>": 50318,
461
- "<loc_4>": 50273,
462
- "<loc_500>": 50769,
463
- "<loc_501>": 50770,
464
- "<loc_502>": 50771,
465
- "<loc_503>": 50772,
466
- "<loc_504>": 50773,
467
- "<loc_505>": 50774,
468
- "<loc_506>": 50775,
469
- "<loc_507>": 50776,
470
- "<loc_508>": 50777,
471
- "<loc_509>": 50778,
472
- "<loc_50>": 50319,
473
- "<loc_510>": 50779,
474
- "<loc_511>": 50780,
475
- "<loc_512>": 50781,
476
- "<loc_513>": 50782,
477
- "<loc_514>": 50783,
478
- "<loc_515>": 50784,
479
- "<loc_516>": 50785,
480
- "<loc_517>": 50786,
481
- "<loc_518>": 50787,
482
- "<loc_519>": 50788,
483
- "<loc_51>": 50320,
484
- "<loc_520>": 50789,
485
- "<loc_521>": 50790,
486
- "<loc_522>": 50791,
487
- "<loc_523>": 50792,
488
- "<loc_524>": 50793,
489
- "<loc_525>": 50794,
490
- "<loc_526>": 50795,
491
- "<loc_527>": 50796,
492
- "<loc_528>": 50797,
493
- "<loc_529>": 50798,
494
- "<loc_52>": 50321,
495
- "<loc_530>": 50799,
496
- "<loc_531>": 50800,
497
- "<loc_532>": 50801,
498
- "<loc_533>": 50802,
499
- "<loc_534>": 50803,
500
- "<loc_535>": 50804,
501
- "<loc_536>": 50805,
502
- "<loc_537>": 50806,
503
- "<loc_538>": 50807,
504
- "<loc_539>": 50808,
505
- "<loc_53>": 50322,
506
- "<loc_540>": 50809,
507
- "<loc_541>": 50810,
508
- "<loc_542>": 50811,
509
- "<loc_543>": 50812,
510
- "<loc_544>": 50813,
511
- "<loc_545>": 50814,
512
- "<loc_546>": 50815,
513
- "<loc_547>": 50816,
514
- "<loc_548>": 50817,
515
- "<loc_549>": 50818,
516
- "<loc_54>": 50323,
517
- "<loc_550>": 50819,
518
- "<loc_551>": 50820,
519
- "<loc_552>": 50821,
520
- "<loc_553>": 50822,
521
- "<loc_554>": 50823,
522
- "<loc_555>": 50824,
523
- "<loc_556>": 50825,
524
- "<loc_557>": 50826,
525
- "<loc_558>": 50827,
526
- "<loc_559>": 50828,
527
- "<loc_55>": 50324,
528
- "<loc_560>": 50829,
529
- "<loc_561>": 50830,
530
- "<loc_562>": 50831,
531
- "<loc_563>": 50832,
532
- "<loc_564>": 50833,
533
- "<loc_565>": 50834,
534
- "<loc_566>": 50835,
535
- "<loc_567>": 50836,
536
- "<loc_568>": 50837,
537
- "<loc_569>": 50838,
538
- "<loc_56>": 50325,
539
- "<loc_570>": 50839,
540
- "<loc_571>": 50840,
541
- "<loc_572>": 50841,
542
- "<loc_573>": 50842,
543
- "<loc_574>": 50843,
544
- "<loc_575>": 50844,
545
- "<loc_576>": 50845,
546
- "<loc_577>": 50846,
547
- "<loc_578>": 50847,
548
- "<loc_579>": 50848,
549
- "<loc_57>": 50326,
550
- "<loc_580>": 50849,
551
- "<loc_581>": 50850,
552
- "<loc_582>": 50851,
553
- "<loc_583>": 50852,
554
- "<loc_584>": 50853,
555
- "<loc_585>": 50854,
556
- "<loc_586>": 50855,
557
- "<loc_587>": 50856,
558
- "<loc_588>": 50857,
559
- "<loc_589>": 50858,
560
- "<loc_58>": 50327,
561
- "<loc_590>": 50859,
562
- "<loc_591>": 50860,
563
- "<loc_592>": 50861,
564
- "<loc_593>": 50862,
565
- "<loc_594>": 50863,
566
- "<loc_595>": 50864,
567
- "<loc_596>": 50865,
568
- "<loc_597>": 50866,
569
- "<loc_598>": 50867,
570
- "<loc_599>": 50868,
571
- "<loc_59>": 50328,
572
- "<loc_5>": 50274,
573
- "<loc_600>": 50869,
574
- "<loc_601>": 50870,
575
- "<loc_602>": 50871,
576
- "<loc_603>": 50872,
577
- "<loc_604>": 50873,
578
- "<loc_605>": 50874,
579
- "<loc_606>": 50875,
580
- "<loc_607>": 50876,
581
- "<loc_608>": 50877,
582
- "<loc_609>": 50878,
583
- "<loc_60>": 50329,
584
- "<loc_610>": 50879,
585
- "<loc_611>": 50880,
586
- "<loc_612>": 50881,
587
- "<loc_613>": 50882,
588
- "<loc_614>": 50883,
589
- "<loc_615>": 50884,
590
- "<loc_616>": 50885,
591
- "<loc_617>": 50886,
592
- "<loc_618>": 50887,
593
- "<loc_619>": 50888,
594
- "<loc_61>": 50330,
595
- "<loc_620>": 50889,
596
- "<loc_621>": 50890,
597
- "<loc_622>": 50891,
598
- "<loc_623>": 50892,
599
- "<loc_624>": 50893,
600
- "<loc_625>": 50894,
601
- "<loc_626>": 50895,
602
- "<loc_627>": 50896,
603
- "<loc_628>": 50897,
604
- "<loc_629>": 50898,
605
- "<loc_62>": 50331,
606
- "<loc_630>": 50899,
607
- "<loc_631>": 50900,
608
- "<loc_632>": 50901,
609
- "<loc_633>": 50902,
610
- "<loc_634>": 50903,
611
- "<loc_635>": 50904,
612
- "<loc_636>": 50905,
613
- "<loc_637>": 50906,
614
- "<loc_638>": 50907,
615
- "<loc_639>": 50908,
616
- "<loc_63>": 50332,
617
- "<loc_640>": 50909,
618
- "<loc_641>": 50910,
619
- "<loc_642>": 50911,
620
- "<loc_643>": 50912,
621
- "<loc_644>": 50913,
622
- "<loc_645>": 50914,
623
- "<loc_646>": 50915,
624
- "<loc_647>": 50916,
625
- "<loc_648>": 50917,
626
- "<loc_649>": 50918,
627
- "<loc_64>": 50333,
628
- "<loc_650>": 50919,
629
- "<loc_651>": 50920,
630
- "<loc_652>": 50921,
631
- "<loc_653>": 50922,
632
- "<loc_654>": 50923,
633
- "<loc_655>": 50924,
634
- "<loc_656>": 50925,
635
- "<loc_657>": 50926,
636
- "<loc_658>": 50927,
637
- "<loc_659>": 50928,
638
- "<loc_65>": 50334,
639
- "<loc_660>": 50929,
640
- "<loc_661>": 50930,
641
- "<loc_662>": 50931,
642
- "<loc_663>": 50932,
643
- "<loc_664>": 50933,
644
- "<loc_665>": 50934,
645
- "<loc_666>": 50935,
646
- "<loc_667>": 50936,
647
- "<loc_668>": 50937,
648
- "<loc_669>": 50938,
649
- "<loc_66>": 50335,
650
- "<loc_670>": 50939,
651
- "<loc_671>": 50940,
652
- "<loc_672>": 50941,
653
- "<loc_673>": 50942,
654
- "<loc_674>": 50943,
655
- "<loc_675>": 50944,
656
- "<loc_676>": 50945,
657
- "<loc_677>": 50946,
658
- "<loc_678>": 50947,
659
- "<loc_679>": 50948,
660
- "<loc_67>": 50336,
661
- "<loc_680>": 50949,
662
- "<loc_681>": 50950,
663
- "<loc_682>": 50951,
664
- "<loc_683>": 50952,
665
- "<loc_684>": 50953,
666
- "<loc_685>": 50954,
667
- "<loc_686>": 50955,
668
- "<loc_687>": 50956,
669
- "<loc_688>": 50957,
670
- "<loc_689>": 50958,
671
- "<loc_68>": 50337,
672
- "<loc_690>": 50959,
673
- "<loc_691>": 50960,
674
- "<loc_692>": 50961,
675
- "<loc_693>": 50962,
676
- "<loc_694>": 50963,
677
- "<loc_695>": 50964,
678
- "<loc_696>": 50965,
679
- "<loc_697>": 50966,
680
- "<loc_698>": 50967,
681
- "<loc_699>": 50968,
682
- "<loc_69>": 50338,
683
- "<loc_6>": 50275,
684
- "<loc_700>": 50969,
685
- "<loc_701>": 50970,
686
- "<loc_702>": 50971,
687
- "<loc_703>": 50972,
688
- "<loc_704>": 50973,
689
- "<loc_705>": 50974,
690
- "<loc_706>": 50975,
691
- "<loc_707>": 50976,
692
- "<loc_708>": 50977,
693
- "<loc_709>": 50978,
694
- "<loc_70>": 50339,
695
- "<loc_710>": 50979,
696
- "<loc_711>": 50980,
697
- "<loc_712>": 50981,
698
- "<loc_713>": 50982,
699
- "<loc_714>": 50983,
700
- "<loc_715>": 50984,
701
- "<loc_716>": 50985,
702
- "<loc_717>": 50986,
703
- "<loc_718>": 50987,
704
- "<loc_719>": 50988,
705
- "<loc_71>": 50340,
706
- "<loc_720>": 50989,
707
- "<loc_721>": 50990,
708
- "<loc_722>": 50991,
709
- "<loc_723>": 50992,
710
- "<loc_724>": 50993,
711
- "<loc_725>": 50994,
712
- "<loc_726>": 50995,
713
- "<loc_727>": 50996,
714
- "<loc_728>": 50997,
715
- "<loc_729>": 50998,
716
- "<loc_72>": 50341,
717
- "<loc_730>": 50999,
718
- "<loc_731>": 51000,
719
- "<loc_732>": 51001,
720
- "<loc_733>": 51002,
721
- "<loc_734>": 51003,
722
- "<loc_735>": 51004,
723
- "<loc_736>": 51005,
724
- "<loc_737>": 51006,
725
- "<loc_738>": 51007,
726
- "<loc_739>": 51008,
727
- "<loc_73>": 50342,
728
- "<loc_740>": 51009,
729
- "<loc_741>": 51010,
730
- "<loc_742>": 51011,
731
- "<loc_743>": 51012,
732
- "<loc_744>": 51013,
733
- "<loc_745>": 51014,
734
- "<loc_746>": 51015,
735
- "<loc_747>": 51016,
736
- "<loc_748>": 51017,
737
- "<loc_749>": 51018,
738
- "<loc_74>": 50343,
739
- "<loc_750>": 51019,
740
- "<loc_751>": 51020,
741
- "<loc_752>": 51021,
742
- "<loc_753>": 51022,
743
- "<loc_754>": 51023,
744
- "<loc_755>": 51024,
745
- "<loc_756>": 51025,
746
- "<loc_757>": 51026,
747
- "<loc_758>": 51027,
748
- "<loc_759>": 51028,
749
- "<loc_75>": 50344,
750
- "<loc_760>": 51029,
751
- "<loc_761>": 51030,
752
- "<loc_762>": 51031,
753
- "<loc_763>": 51032,
754
- "<loc_764>": 51033,
755
- "<loc_765>": 51034,
756
- "<loc_766>": 51035,
757
- "<loc_767>": 51036,
758
- "<loc_768>": 51037,
759
- "<loc_769>": 51038,
760
- "<loc_76>": 50345,
761
- "<loc_770>": 51039,
762
- "<loc_771>": 51040,
763
- "<loc_772>": 51041,
764
- "<loc_773>": 51042,
765
- "<loc_774>": 51043,
766
- "<loc_775>": 51044,
767
- "<loc_776>": 51045,
768
- "<loc_777>": 51046,
769
- "<loc_778>": 51047,
770
- "<loc_779>": 51048,
771
- "<loc_77>": 50346,
772
- "<loc_780>": 51049,
773
- "<loc_781>": 51050,
774
- "<loc_782>": 51051,
775
- "<loc_783>": 51052,
776
- "<loc_784>": 51053,
777
- "<loc_785>": 51054,
778
- "<loc_786>": 51055,
779
- "<loc_787>": 51056,
780
- "<loc_788>": 51057,
781
- "<loc_789>": 51058,
782
- "<loc_78>": 50347,
783
- "<loc_790>": 51059,
784
- "<loc_791>": 51060,
785
- "<loc_792>": 51061,
786
- "<loc_793>": 51062,
787
- "<loc_794>": 51063,
788
- "<loc_795>": 51064,
789
- "<loc_796>": 51065,
790
- "<loc_797>": 51066,
791
- "<loc_798>": 51067,
792
- "<loc_799>": 51068,
793
- "<loc_79>": 50348,
794
- "<loc_7>": 50276,
795
- "<loc_800>": 51069,
796
- "<loc_801>": 51070,
797
- "<loc_802>": 51071,
798
- "<loc_803>": 51072,
799
- "<loc_804>": 51073,
800
- "<loc_805>": 51074,
801
- "<loc_806>": 51075,
802
- "<loc_807>": 51076,
803
- "<loc_808>": 51077,
804
- "<loc_809>": 51078,
805
- "<loc_80>": 50349,
806
- "<loc_810>": 51079,
807
- "<loc_811>": 51080,
808
- "<loc_812>": 51081,
809
- "<loc_813>": 51082,
810
- "<loc_814>": 51083,
811
- "<loc_815>": 51084,
812
- "<loc_816>": 51085,
813
- "<loc_817>": 51086,
814
- "<loc_818>": 51087,
815
- "<loc_819>": 51088,
816
- "<loc_81>": 50350,
817
- "<loc_820>": 51089,
818
- "<loc_821>": 51090,
819
- "<loc_822>": 51091,
820
- "<loc_823>": 51092,
821
- "<loc_824>": 51093,
822
- "<loc_825>": 51094,
823
- "<loc_826>": 51095,
824
- "<loc_827>": 51096,
825
- "<loc_828>": 51097,
826
- "<loc_829>": 51098,
827
- "<loc_82>": 50351,
828
- "<loc_830>": 51099,
829
- "<loc_831>": 51100,
830
- "<loc_832>": 51101,
831
- "<loc_833>": 51102,
832
- "<loc_834>": 51103,
833
- "<loc_835>": 51104,
834
- "<loc_836>": 51105,
835
- "<loc_837>": 51106,
836
- "<loc_838>": 51107,
837
- "<loc_839>": 51108,
838
- "<loc_83>": 50352,
839
- "<loc_840>": 51109,
840
- "<loc_841>": 51110,
841
- "<loc_842>": 51111,
842
- "<loc_843>": 51112,
843
- "<loc_844>": 51113,
844
- "<loc_845>": 51114,
845
- "<loc_846>": 51115,
846
- "<loc_847>": 51116,
847
- "<loc_848>": 51117,
848
- "<loc_849>": 51118,
849
- "<loc_84>": 50353,
850
- "<loc_850>": 51119,
851
- "<loc_851>": 51120,
852
- "<loc_852>": 51121,
853
- "<loc_853>": 51122,
854
- "<loc_854>": 51123,
855
- "<loc_855>": 51124,
856
- "<loc_856>": 51125,
857
- "<loc_857>": 51126,
858
- "<loc_858>": 51127,
859
- "<loc_859>": 51128,
860
- "<loc_85>": 50354,
861
- "<loc_860>": 51129,
862
- "<loc_861>": 51130,
863
- "<loc_862>": 51131,
864
- "<loc_863>": 51132,
865
- "<loc_864>": 51133,
866
- "<loc_865>": 51134,
867
- "<loc_866>": 51135,
868
- "<loc_867>": 51136,
869
- "<loc_868>": 51137,
870
- "<loc_869>": 51138,
871
- "<loc_86>": 50355,
872
- "<loc_870>": 51139,
873
- "<loc_871>": 51140,
874
- "<loc_872>": 51141,
875
- "<loc_873>": 51142,
876
- "<loc_874>": 51143,
877
- "<loc_875>": 51144,
878
- "<loc_876>": 51145,
879
- "<loc_877>": 51146,
880
- "<loc_878>": 51147,
881
- "<loc_879>": 51148,
882
- "<loc_87>": 50356,
883
- "<loc_880>": 51149,
884
- "<loc_881>": 51150,
885
- "<loc_882>": 51151,
886
- "<loc_883>": 51152,
887
- "<loc_884>": 51153,
888
- "<loc_885>": 51154,
889
- "<loc_886>": 51155,
890
- "<loc_887>": 51156,
891
- "<loc_888>": 51157,
892
- "<loc_889>": 51158,
893
- "<loc_88>": 50357,
894
- "<loc_890>": 51159,
895
- "<loc_891>": 51160,
896
- "<loc_892>": 51161,
897
- "<loc_893>": 51162,
898
- "<loc_894>": 51163,
899
- "<loc_895>": 51164,
900
- "<loc_896>": 51165,
901
- "<loc_897>": 51166,
902
- "<loc_898>": 51167,
903
- "<loc_899>": 51168,
904
- "<loc_89>": 50358,
905
- "<loc_8>": 50277,
906
- "<loc_900>": 51169,
907
- "<loc_901>": 51170,
908
- "<loc_902>": 51171,
909
- "<loc_903>": 51172,
910
- "<loc_904>": 51173,
911
- "<loc_905>": 51174,
912
- "<loc_906>": 51175,
913
- "<loc_907>": 51176,
914
- "<loc_908>": 51177,
915
- "<loc_909>": 51178,
916
- "<loc_90>": 50359,
917
- "<loc_910>": 51179,
918
- "<loc_911>": 51180,
919
- "<loc_912>": 51181,
920
- "<loc_913>": 51182,
921
- "<loc_914>": 51183,
922
- "<loc_915>": 51184,
923
- "<loc_916>": 51185,
924
- "<loc_917>": 51186,
925
- "<loc_918>": 51187,
926
- "<loc_919>": 51188,
927
- "<loc_91>": 50360,
928
- "<loc_920>": 51189,
929
- "<loc_921>": 51190,
930
- "<loc_922>": 51191,
931
- "<loc_923>": 51192,
932
- "<loc_924>": 51193,
933
- "<loc_925>": 51194,
934
- "<loc_926>": 51195,
935
- "<loc_927>": 51196,
936
- "<loc_928>": 51197,
937
- "<loc_929>": 51198,
938
- "<loc_92>": 50361,
939
- "<loc_930>": 51199,
940
- "<loc_931>": 51200,
941
- "<loc_932>": 51201,
942
- "<loc_933>": 51202,
943
- "<loc_934>": 51203,
944
- "<loc_935>": 51204,
945
- "<loc_936>": 51205,
946
- "<loc_937>": 51206,
947
- "<loc_938>": 51207,
948
- "<loc_939>": 51208,
949
- "<loc_93>": 50362,
950
- "<loc_940>": 51209,
951
- "<loc_941>": 51210,
952
- "<loc_942>": 51211,
953
- "<loc_943>": 51212,
954
- "<loc_944>": 51213,
955
- "<loc_945>": 51214,
956
- "<loc_946>": 51215,
957
- "<loc_947>": 51216,
958
- "<loc_948>": 51217,
959
- "<loc_949>": 51218,
960
- "<loc_94>": 50363,
961
- "<loc_950>": 51219,
962
- "<loc_951>": 51220,
963
- "<loc_952>": 51221,
964
- "<loc_953>": 51222,
965
- "<loc_954>": 51223,
966
- "<loc_955>": 51224,
967
- "<loc_956>": 51225,
968
- "<loc_957>": 51226,
969
- "<loc_958>": 51227,
970
- "<loc_959>": 51228,
971
- "<loc_95>": 50364,
972
- "<loc_960>": 51229,
973
- "<loc_961>": 51230,
974
- "<loc_962>": 51231,
975
- "<loc_963>": 51232,
976
- "<loc_964>": 51233,
977
- "<loc_965>": 51234,
978
- "<loc_966>": 51235,
979
- "<loc_967>": 51236,
980
- "<loc_968>": 51237,
981
- "<loc_969>": 51238,
982
- "<loc_96>": 50365,
983
- "<loc_970>": 51239,
984
- "<loc_971>": 51240,
985
- "<loc_972>": 51241,
986
- "<loc_973>": 51242,
987
- "<loc_974>": 51243,
988
- "<loc_975>": 51244,
989
- "<loc_976>": 51245,
990
- "<loc_977>": 51246,
991
- "<loc_978>": 51247,
992
- "<loc_979>": 51248,
993
- "<loc_97>": 50366,
994
- "<loc_980>": 51249,
995
- "<loc_981>": 51250,
996
- "<loc_982>": 51251,
997
- "<loc_983>": 51252,
998
- "<loc_984>": 51253,
999
- "<loc_985>": 51254,
1000
- "<loc_986>": 51255,
1001
- "<loc_987>": 51256,
1002
- "<loc_988>": 51257,
1003
- "<loc_989>": 51258,
1004
- "<loc_98>": 50367,
1005
- "<loc_990>": 51259,
1006
- "<loc_991>": 51260,
1007
- "<loc_992>": 51261,
1008
- "<loc_993>": 51262,
1009
- "<loc_994>": 51263,
1010
- "<loc_995>": 51264,
1011
- "<loc_996>": 51265,
1012
- "<loc_997>": 51266,
1013
- "<loc_998>": 51267,
1014
- "<loc_999>": 51268,
1015
- "<loc_99>": 50368,
1016
- "<loc_9>": 50278,
1017
- "<ncap>": 51271,
1018
- "<ocr>": 50267,
1019
- "<od>": 50265,
1020
- "<poly>": 51286,
1021
- "<proposal>": 51284,
1022
- "<region_cap>": 51280,
1023
- "<region_to_desciption>": 51282,
1024
- "<seg>": 51277,
1025
- "<sep>": 51279
1026
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Florence-2-Flux-Large/config.json DELETED
@@ -1,85 +0,0 @@
1
- {
2
- "_name_or_path": "florence2",
3
- "architectures": [
4
- "Florence2ForConditionalGeneration"
5
- ],
6
- "auto_map": {
7
- "AutoConfig": "configuration_florence2.Florence2Config",
8
- "AutoModelForCausalLM": "modeling_florence2.Florence2ForConditionalGeneration"
9
- },
10
- "bos_token_id": 0,
11
- "eos_token_id": 2,
12
- "ignore_index": -100,
13
- "model_type": "florence2",
14
- "pad_token_id": 1,
15
- "projection_dim": 1024,
16
- "text_config": {
17
- "vocab_size": 51289,
18
- "activation_dropout": 0.1,
19
- "activation_function": "gelu",
20
- "add_bias_logits": false,
21
- "add_final_layer_norm": false,
22
- "attention_dropout": 0.1,
23
- "bos_token_id": 0,
24
- "classif_dropout": 0.1,
25
- "classifier_dropout": 0.0,
26
- "d_model": 1024,
27
- "decoder_attention_heads": 16,
28
- "decoder_ffn_dim": 4096,
29
- "decoder_layerdrop": 0.0,
30
- "decoder_layers": 12,
31
- "decoder_start_token_id": 2,
32
- "dropout": 0.1,
33
- "early_stopping": true,
34
- "encoder_attention_heads": 16,
35
- "encoder_ffn_dim": 4096,
36
- "encoder_layerdrop": 0.0,
37
- "encoder_layers": 12,
38
- "eos_token_id": 2,
39
- "forced_eos_token_id": 2,
40
- "forced_bos_token_id": 0,
41
- "gradient_checkpointing": false,
42
- "init_std": 0.02,
43
- "is_encoder_decoder": true,
44
- "label2id": {
45
- "LABEL_0": 0,
46
- "LABEL_1": 1,
47
- "LABEL_2": 2
48
- },
49
- "max_position_embeddings": 1024,
50
- "no_repeat_ngram_size": 3,
51
- "normalize_before": false,
52
- "num_hidden_layers": 12,
53
- "pad_token_id": 1,
54
- "scale_embedding": false,
55
- "num_beams": 3
56
- },
57
- "vision_config": {
58
- "model_type": "davit",
59
- "drop_path_rate": 0.1,
60
- "patch_size": [7, 3, 3, 3],
61
- "patch_stride": [4, 2, 2, 2],
62
- "patch_padding": [3, 1, 1, 1],
63
- "patch_prenorm": [false, true, true, true],
64
- "enable_checkpoint": false,
65
- "dim_embed": [256, 512, 1024, 2048],
66
- "num_heads": [8, 16, 32, 64],
67
- "num_groups": [8, 16, 32, 64],
68
- "depths": [1, 1, 9, 1],
69
- "window_size": 12,
70
- "projection_dim": 1024,
71
- "visual_temporal_embedding": {
72
- "type": "COSINE",
73
- "max_temporal_embeddings": 100
74
- },
75
- "image_pos_embed": {
76
- "type": "learned_abs_2d",
77
- "max_pos_embeddings": 50
78
- },
79
- "image_feature_source": ["spatial_avg_pool", "temporal_avg_pool"]
80
- },
81
- "vocab_size": 51289,
82
- "torch_dtype": "float16",
83
- "transformers_version": "4.41.0.dev0",
84
- "is_encoder_decoder": true
85
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Florence-2-Flux-Large/configuration_florence2.py DELETED
@@ -1,340 +0,0 @@
1
- # coding=utf-8
2
- # Copyright 2024 Microsoft and the HuggingFace Inc. team. All rights reserved.
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- import warnings
15
- """ Florence-2 configuration"""
16
-
17
- from typing import Optional
18
-
19
- from transformers import AutoConfig
20
- from transformers.configuration_utils import PretrainedConfig
21
- from transformers.utils import logging
22
-
23
- logger = logging.get_logger(__name__)
24
-
25
- class Florence2VisionConfig(PretrainedConfig):
26
- r"""
27
- This is the configuration class to store the configuration of a [`Florence2VisionModel`]. It is used to instantiate a Florence2VisionModel
28
- according to the specified arguments, defining the model architecture. Instantiating a configuration with the
29
- defaults will yield a similar configuration to that of the Florence2VisionModel architecture.
30
-
31
- Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
32
- documentation from [`PretrainedConfig`] for more information.
33
-
34
- Args:
35
- drop_path_rate (`float`, *optional*, defaults to 0.1):
36
- The dropout rate of the drop path layer.
37
- patch_size (`List[int]`, *optional*, defaults to [7, 3, 3, 3]):
38
- The patch size of the image.
39
- patch_stride (`List[int]`, *optional*, defaults to [4, 2, 2, 2]):
40
- The patch stride of the image.
41
- patch_padding (`List[int]`, *optional*, defaults to [3, 1, 1, 1]):
42
- The patch padding of the image.
43
- patch_prenorm (`List[bool]`, *optional*, defaults to [false, true, true, true]):
44
- Whether to apply layer normalization before the patch embedding layer.
45
- enable_checkpoint (`bool`, *optional*, defaults to False):
46
- Whether to enable checkpointing.
47
- dim_embed (`List[int]`, *optional*, defaults to [256, 512, 1024, 2048]):
48
- The dimension of the embedding layer.
49
- num_heads (`List[int]`, *optional*, defaults to [8, 16, 32, 64]):
50
- The number of attention heads.
51
- num_groups (`List[int]`, *optional*, defaults to [8, 16, 32, 64]):
52
- The number of groups.
53
- depths (`List[int]`, *optional*, defaults to [1, 1, 9, 1]):
54
- The depth of the model.
55
- window_size (`int`, *optional*, defaults to 12):
56
- The window size of the model.
57
- projection_dim (`int`, *optional*, defaults to 1024):
58
- The dimension of the projection layer.
59
- visual_temporal_embedding (`dict`, *optional*):
60
- The configuration of the visual temporal embedding.
61
- image_pos_embed (`dict`, *optional*):
62
- The configuration of the image position embedding.
63
- image_feature_source (`List[str]`, *optional*, defaults to ["spatial_avg_pool", "temporal_avg_pool"]):
64
- The source of the image feature.
65
- Example:
66
-
67
- ```python
68
- >>> from transformers import Florence2VisionConfig, Florence2VisionModel
69
-
70
- >>> # Initializing a Florence2 Vision style configuration
71
- >>> configuration = Florence2VisionConfig()
72
-
73
- >>> # Initializing a model (with random weights)
74
- >>> model = Florence2VisionModel(configuration)
75
-
76
- >>> # Accessing the model configuration
77
- >>> configuration = model.config
78
- ```"""
79
-
80
- model_type = "florence2_vision"
81
- keys_to_ignore_at_inference = ["past_key_values"]
82
-
83
- def __init__(
84
- self,
85
- drop_path_rate=0.1,
86
- patch_size=[7, 3, 3, 3],
87
- patch_stride=[4, 2, 2, 2],
88
- patch_padding=[3, 1, 1, 1],
89
- patch_prenorm=[False, True, True, True],
90
- enable_checkpoint=False,
91
- dim_embed=[256, 512, 1024, 2048],
92
- num_heads=[8, 16, 32, 64],
93
- num_groups=[8, 16, 32, 64],
94
- depths=[1, 1, 9, 1],
95
- window_size=12,
96
- projection_dim=1024,
97
- visual_temporal_embedding=None,
98
- image_pos_embed=None,
99
- image_feature_source=["spatial_avg_pool", "temporal_avg_pool"],
100
- **kwargs,
101
- ):
102
- self.drop_path_rate = drop_path_rate
103
- self.patch_size = patch_size
104
- self.patch_stride = patch_stride
105
- self.patch_padding = patch_padding
106
- self.patch_prenorm = patch_prenorm
107
- self.enable_checkpoint = enable_checkpoint
108
- self.dim_embed = dim_embed
109
- self.num_heads = num_heads
110
- self.num_groups = num_groups
111
- self.depths = depths
112
- self.window_size = window_size
113
- self.projection_dim = projection_dim
114
- self.visual_temporal_embedding = visual_temporal_embedding
115
- self.image_pos_embed = image_pos_embed
116
- self.image_feature_source = image_feature_source
117
-
118
- super().__init__(**kwargs)
119
-
120
-
121
-
122
- class Florence2LanguageConfig(PretrainedConfig):
123
- r"""
124
- This is the configuration class to store the configuration of a [`Florence2LanguagePreTrainedModel`]. It is used to instantiate a BART
125
- model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
126
- defaults will yield a similar configuration to that of the BART
127
- [facebook/bart-large](https://huggingface.co/facebook/bart-large) architecture.
128
-
129
- Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
130
- documentation from [`PretrainedConfig`] for more information.
131
-
132
-
133
- Args:
134
- vocab_size (`int`, *optional*, defaults to 51289):
135
- Vocabulary size of the Florence2Language model. Defines the number of different tokens that can be represented by the
136
- `inputs_ids` passed when calling [`Florence2LanguageModel`].
137
- d_model (`int`, *optional*, defaults to 1024):
138
- Dimensionality of the layers and the pooler layer.
139
- encoder_layers (`int`, *optional*, defaults to 12):
140
- Number of encoder layers.
141
- decoder_layers (`int`, *optional*, defaults to 12):
142
- Number of decoder layers.
143
- encoder_attention_heads (`int`, *optional*, defaults to 16):
144
- Number of attention heads for each attention layer in the Transformer encoder.
145
- decoder_attention_heads (`int`, *optional*, defaults to 16):
146
- Number of attention heads for each attention layer in the Transformer decoder.
147
- decoder_ffn_dim (`int`, *optional*, defaults to 4096):
148
- Dimensionality of the "intermediate" (often named feed-forward) layer in decoder.
149
- encoder_ffn_dim (`int`, *optional*, defaults to 4096):
150
- Dimensionality of the "intermediate" (often named feed-forward) layer in decoder.
151
- activation_function (`str` or `function`, *optional*, defaults to `"gelu"`):
152
- The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
153
- `"relu"`, `"silu"` and `"gelu_new"` are supported.
154
- dropout (`float`, *optional*, defaults to 0.1):
155
- The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
156
- attention_dropout (`float`, *optional*, defaults to 0.0):
157
- The dropout ratio for the attention probabilities.
158
- activation_dropout (`float`, *optional*, defaults to 0.0):
159
- The dropout ratio for activations inside the fully connected layer.
160
- classifier_dropout (`float`, *optional*, defaults to 0.0):
161
- The dropout ratio for classifier.
162
- max_position_embeddings (`int`, *optional*, defaults to 1024):
163
- The maximum sequence length that this model might ever be used with. Typically set this to something large
164
- just in case (e.g., 512 or 1024 or 2048).
165
- init_std (`float`, *optional*, defaults to 0.02):
166
- The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
167
- encoder_layerdrop (`float`, *optional*, defaults to 0.0):
168
- The LayerDrop probability for the encoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
169
- for more details.
170
- decoder_layerdrop (`float`, *optional*, defaults to 0.0):
171
- The LayerDrop probability for the decoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
172
- for more details.
173
- scale_embedding (`bool`, *optional*, defaults to `False`):
174
- Scale embeddings by diving by sqrt(d_model).
175
- use_cache (`bool`, *optional*, defaults to `True`):
176
- Whether or not the model should return the last key/values attentions (not used by all models).
177
- num_labels (`int`, *optional*, defaults to 3):
178
- The number of labels to use in [`Florence2LanguageForSequenceClassification`].
179
- forced_eos_token_id (`int`, *optional*, defaults to 2):
180
- The id of the token to force as the last generated token when `max_length` is reached. Usually set to
181
- `eos_token_id`.
182
-
183
- Example:
184
-
185
- ```python
186
- >>> from transformers import Florence2LanguageConfig, Florence2LanguageModel
187
-
188
- >>> # Initializing a Florence2 Language style configuration
189
- >>> configuration = Florence2LanguageConfig()
190
-
191
- >>> # Initializing a model (with random weights)
192
- >>> model = Florence2LangaugeModel(configuration)
193
-
194
- >>> # Accessing the model configuration
195
- >>> configuration = model.config
196
- ```"""
197
-
198
- model_type = "florence2_language"
199
- keys_to_ignore_at_inference = ["past_key_values"]
200
- attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"}
201
-
202
- def __init__(
203
- self,
204
- vocab_size=51289,
205
- max_position_embeddings=1024,
206
- encoder_layers=12,
207
- encoder_ffn_dim=4096,
208
- encoder_attention_heads=16,
209
- decoder_layers=12,
210
- decoder_ffn_dim=4096,
211
- decoder_attention_heads=16,
212
- encoder_layerdrop=0.0,
213
- decoder_layerdrop=0.0,
214
- activation_function="gelu",
215
- d_model=1024,
216
- dropout=0.1,
217
- attention_dropout=0.0,
218
- activation_dropout=0.0,
219
- init_std=0.02,
220
- classifier_dropout=0.0,
221
- scale_embedding=False,
222
- use_cache=True,
223
- num_labels=3,
224
- pad_token_id=1,
225
- bos_token_id=0,
226
- eos_token_id=2,
227
- is_encoder_decoder=True,
228
- decoder_start_token_id=2,
229
- forced_eos_token_id=2,
230
- **kwargs,
231
- ):
232
- self.vocab_size = vocab_size
233
- self.max_position_embeddings = max_position_embeddings
234
- self.d_model = d_model
235
- self.encoder_ffn_dim = encoder_ffn_dim
236
- self.encoder_layers = encoder_layers
237
- self.encoder_attention_heads = encoder_attention_heads
238
- self.decoder_ffn_dim = decoder_ffn_dim
239
- self.decoder_layers = decoder_layers
240
- self.decoder_attention_heads = decoder_attention_heads
241
- self.dropout = dropout
242
- self.attention_dropout = attention_dropout
243
- self.activation_dropout = activation_dropout
244
- self.activation_function = activation_function
245
- self.init_std = init_std
246
- self.encoder_layerdrop = encoder_layerdrop
247
- self.decoder_layerdrop = decoder_layerdrop
248
- self.classifier_dropout = classifier_dropout
249
- self.use_cache = use_cache
250
- self.num_hidden_layers = encoder_layers
251
- self.scale_embedding = scale_embedding # scale factor will be sqrt(d_model) if True
252
-
253
- super().__init__(
254
- num_labels=num_labels,
255
- pad_token_id=pad_token_id,
256
- bos_token_id=bos_token_id,
257
- eos_token_id=eos_token_id,
258
- is_encoder_decoder=is_encoder_decoder,
259
- decoder_start_token_id=decoder_start_token_id,
260
- forced_eos_token_id=forced_eos_token_id,
261
- **kwargs,
262
- )
263
-
264
- # ensure backward compatibility for BART CNN models
265
- if self.forced_bos_token_id is None and kwargs.get("force_bos_token_to_be_generated", False):
266
- self.forced_bos_token_id = self.bos_token_id
267
- warnings.warn(
268
- f"Please make sure the config includes `forced_bos_token_id={self.bos_token_id}` in future versions. "
269
- "The config can simply be saved and uploaded again to be fixed."
270
- )
271
-
272
- class Florence2Config(PretrainedConfig):
273
- r"""
274
- This is the configuration class to store the configuration of a [`Florence2ForConditionalGeneration`]. It is used to instantiate an
275
- Florence-2 model according to the specified arguments, defining the model architecture.
276
-
277
- Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
278
- documentation from [`PretrainedConfig`] for more information.
279
-
280
- Args:
281
- vision_config (`Florence2VisionConfig`, *optional*):
282
- Custom vision config or dict
283
- text_config (`Union[AutoConfig, dict]`, *optional*):
284
- The config object of the text backbone.
285
- ignore_index (`int`, *optional*, defaults to -100):
286
- The ignore index for the loss function.
287
- vocab_size (`int`, *optional*, defaults to 51289):
288
- Vocabulary size of the Florence2model. Defines the number of different tokens that can be represented by the
289
- `inputs_ids` passed when calling [`~Florence2ForConditionalGeneration`]
290
- projection_dim (`int`, *optional*, defaults to 1024):
291
- Dimension of the multimodal projection space.
292
-
293
- Example:
294
-
295
- ```python
296
- >>> from transformers import Florence2ForConditionalGeneration, Florence2Config, CLIPVisionConfig, BartConfig
297
-
298
- >>> # Initializing a clip-like vision config
299
- >>> vision_config = CLIPVisionConfig()
300
-
301
- >>> # Initializing a Bart config
302
- >>> text_config = BartConfig()
303
-
304
- >>> # Initializing a Florence-2 configuration
305
- >>> configuration = Florence2Config(vision_config, text_config)
306
-
307
- >>> # Initializing a model from the florence-2 configuration
308
- >>> model = Florence2ForConditionalGeneration(configuration)
309
-
310
- >>> # Accessing the model configuration
311
- >>> configuration = model.config
312
- ```"""
313
-
314
- model_type = "florence2"
315
- is_composition = False
316
-
317
- def __init__(
318
- self,
319
- vision_config=None,
320
- text_config=None,
321
- ignore_index=-100,
322
- vocab_size=51289,
323
- projection_dim=1024,
324
- **kwargs,
325
- ):
326
- self.ignore_index = ignore_index
327
- self.vocab_size = vocab_size
328
- self.projection_dim = projection_dim
329
- if vision_config is not None:
330
- vision_config = PretrainedConfig(**vision_config)
331
- self.vision_config = vision_config
332
- self.vocab_size = self.vocab_size
333
-
334
- self.text_config = text_config
335
- if text_config is not None:
336
- self.text_config = Florence2LanguageConfig(**text_config)
337
-
338
-
339
- super().__init__(**kwargs)
340
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Florence-2-Flux-Large/generation_config.json DELETED
@@ -1,4 +0,0 @@
1
- {
2
- "num_beams": 3,
3
- "early_stopping": false
4
- }
 
 
 
 
 
Florence-2-Flux-Large/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
Florence-2-Flux-Large/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:82d0f8da156f27d64c31abef8281b1c4cb646ec4edfab2debe5f64a78d208946
3
- size 3291921348
 
 
 
 
Florence-2-Flux-Large/modeling_florence2.py DELETED
The diff for this file is too large to render. See raw diff
 
Florence-2-Flux-Large/preprocessor_config.json DELETED
@@ -1,39 +0,0 @@
1
- {
2
- "auto_map": {
3
- "AutoProcessor": "processing_florence2.Florence2Processor"
4
- },
5
- "_valid_processor_keys": [
6
- "images",
7
- "do_resize",
8
- "size",
9
- "resample",
10
- "do_rescale",
11
- "rescale_factor",
12
- "do_normalize",
13
- "image_mean",
14
- "image_std",
15
- "return_tensors",
16
- "data_format",
17
- "input_data_format",
18
- "do_convert_rgb"
19
- ],
20
- "do_convert_rgb": null,
21
- "do_normalize": true,
22
- "do_rescale": true,
23
- "do_resize": true,
24
- "do_center_crop": false,
25
- "image_processor_type": "CLIPImageProcessor",
26
- "image_seq_length": 577,
27
- "image_mean": [0.485, 0.456, 0.406],
28
- "image_std": [0.229, 0.224, 0.225],
29
- "processor_class": "Florence2Processor",
30
- "resample": 3,
31
- "size": {
32
- "height": 768,
33
- "width":768
34
- },
35
- "crop_size": {
36
- "height": 768,
37
- "width": 768
38
- }
39
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Florence-2-Flux-Large/processing_florence2.py DELETED
@@ -1,1088 +0,0 @@
1
- # coding=utf-8
2
- # Copyright 2024 Microsoft and The HuggingFace Inc. team.
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
- """
16
- Processor class for Florence-2.
17
- """
18
-
19
- import re
20
- import logging
21
- from typing import List, Optional, Union
22
- import numpy as np
23
-
24
- import torch
25
-
26
- from transformers.feature_extraction_utils import BatchFeature
27
- from transformers.image_utils import ImageInput, is_valid_image
28
- from transformers.processing_utils import ProcessorMixin
29
- from transformers.tokenization_utils_base import (
30
- PaddingStrategy,
31
- PreTokenizedInput,
32
- TextInput,
33
- TruncationStrategy,
34
- )
35
- from transformers.utils import TensorType
36
-
37
-
38
- logger = logging.getLogger(__name__)
39
-
40
- # Copied from transformers.models.idefics2.processing_idefics2.is_url
41
- def is_url(val) -> bool:
42
- return isinstance(val, str) and val.startswith("http")
43
-
44
- # Copied from transformers.models.idefics2.processing_idefics2.is_image_or_image_url
45
- def is_image_or_image_url(elem):
46
- return is_url(elem) or is_valid_image(elem)
47
-
48
-
49
- def _is_str_or_image(elem):
50
- return isinstance(elem, (str)) or is_image_or_image_url(elem)
51
-
52
-
53
- class Florence2Processor(ProcessorMixin):
54
- r"""
55
- Constructs a Florence2 processor which wraps a Florence2 image processor and a Florence2 tokenizer into a single processor.
56
-
57
- [`Florence2Processor`] offers all the functionalities of [`CLIPImageProcessor`] and [`BartTokenizerFast`]. See the
58
- [`~Florence2Processor.__call__`] and [`~Florence2Processor.decode`] for more information.
59
-
60
- Args:
61
- image_processor ([`CLIPImageProcessor`], *optional*):
62
- The image processor is a required input.
63
- tokenizer ([`BartTokenizerFast`], *optional*):
64
- The tokenizer is a required input.
65
- """
66
-
67
- attributes = ["image_processor", "tokenizer"]
68
- image_processor_class = "CLIPImageProcessor"
69
- tokenizer_class = ("BartTokenizer", "BartTokenizerFast")
70
-
71
- def __init__(
72
- self,
73
- image_processor=None,
74
- tokenizer=None,
75
- ):
76
- if image_processor is None:
77
- raise ValueError("You need to specify an `image_processor`.")
78
- if tokenizer is None:
79
- raise ValueError("You need to specify a `tokenizer`.")
80
- if not hasattr(image_processor, "image_seq_length"):
81
- raise ValueError("Image processor is missing an `image_seq_length` attribute.")
82
-
83
- self.image_seq_length = image_processor.image_seq_length
84
-
85
- tokens_to_add = {
86
- 'additional_special_tokens': \
87
- tokenizer.additional_special_tokens + \
88
- ['<od>', '</od>', '<ocr>', '</ocr>'] + \
89
- [f'<loc_{x}>' for x in range(1000)] + \
90
- ['<cap>', '</cap>', '<ncap>', '</ncap>','<dcap>', '</dcap>', '<grounding>', '</grounding>', '<seg>', '</seg>', '<sep>', '<region_cap>', '</region_cap>', '<region_to_desciption>', '</region_to_desciption>', '<proposal>', '</proposal>', '<poly>', '</poly>', '<and>']
91
- }
92
- tokenizer.add_special_tokens(tokens_to_add)
93
-
94
- self.tasks_answer_post_processing_type = {
95
- '<OCR>': 'pure_text',
96
- '<OCR_WITH_REGION>': 'ocr',
97
- '<CAPTION>': 'pure_text',
98
- '<DETAILED_CAPTION>': 'pure_text',
99
- '<MORE_DETAILED_CAPTION>': 'pure_text',
100
- '<OD>': 'description_with_bboxes',
101
- '<DENSE_REGION_CAPTION>': 'description_with_bboxes',
102
- '<CAPTION_TO_PHRASE_GROUNDING>': "phrase_grounding",
103
- '<REFERRING_EXPRESSION_SEGMENTATION>': 'polygons',
104
- '<REGION_TO_SEGMENTATION>': 'polygons',
105
- '<OPEN_VOCABULARY_DETECTION>': 'description_with_bboxes_or_polygons',
106
- '<REGION_TO_CATEGORY>': 'pure_text',
107
- '<REGION_TO_DESCRIPTION>': 'pure_text',
108
- '<REGION_TO_OCR>': 'pure_text',
109
- '<REGION_PROPOSAL>': 'bboxes'
110
- }
111
-
112
- self.task_prompts_without_inputs = {
113
- '<OCR>': 'What is the text in the image?',
114
- '<OCR_WITH_REGION>': 'What is the text in the image, with regions?',
115
- '<CAPTION>': 'What does the image describe?',
116
- '<DETAILED_CAPTION>': 'Describe in detail what is shown in the image.',
117
- '<MORE_DETAILED_CAPTION>': 'Describe with a paragraph what is shown in the image.',
118
- '<OD>': 'Locate the objects with category name in the image.',
119
- '<DENSE_REGION_CAPTION>': 'Locate the objects in the image, with their descriptions.',
120
- '<REGION_PROPOSAL>': 'Locate the region proposals in the image.'
121
- }
122
-
123
- self.task_prompts_with_input = {
124
- '<CAPTION_TO_PHRASE_GROUNDING>': "Locate the phrases in the caption: {input}",
125
- '<REFERRING_EXPRESSION_SEGMENTATION>': 'Locate {input} in the image with mask',
126
- '<REGION_TO_SEGMENTATION>': 'What is the polygon mask of region {input}',
127
- '<OPEN_VOCABULARY_DETECTION>': 'Locate {input} in the image.',
128
- '<REGION_TO_CATEGORY>': 'What is the region {input}?',
129
- '<REGION_TO_DESCRIPTION>': 'What does the region {input} describe?',
130
- '<REGION_TO_OCR>': 'What text is in the region {input}?',
131
- }
132
-
133
- self.post_processor = Florence2PostProcesser(tokenizer=tokenizer)
134
-
135
-
136
- super().__init__(image_processor, tokenizer)
137
-
138
- def _construct_prompts(self, text):
139
- # replace the task tokens with the task prompts if task token is in the text
140
- prompts = []
141
- for _text in text:
142
- # 1. fixed task prompts without additional inputs
143
- for task_token, task_prompt in self.task_prompts_without_inputs.items():
144
- if task_token in _text:
145
- assert _text == task_token, f"Task token {task_token} should be the only token in the text."
146
- _text = task_prompt
147
- break
148
- # 2. task prompts with additional inputs
149
- for task_token, task_prompt in self.task_prompts_with_input.items():
150
- if task_token in _text:
151
- _text = task_prompt.format(input=_text.replace(task_token, ''))
152
- break
153
- prompts.append(_text)
154
- return prompts
155
-
156
- def __call__(
157
- self,
158
- text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None,
159
- images: ImageInput = None,
160
- tokenize_newline_separately: bool = True,
161
- padding: Union[bool, str, PaddingStrategy] = False,
162
- truncation: Union[bool, str, TruncationStrategy] = None,
163
- max_length=None,
164
- return_tensors: Optional[Union[str, TensorType]] = TensorType.PYTORCH,
165
- do_resize: bool = None,
166
- do_normalize: bool = None,
167
- image_mean: Optional[Union[float, List[float]]] = None,
168
- image_std: Optional[Union[float, List[float]]] = None,
169
- data_format: Optional["ChannelDimension"] = "channels_first", # noqa: F821
170
- input_data_format: Optional[
171
- Union[str, "ChannelDimension"] # noqa: F821
172
- ] = None,
173
- resample: "PILImageResampling" = None, # noqa: F821
174
- do_convert_rgb: bool = None,
175
- do_thumbnail: bool = None,
176
- do_align_long_axis: bool = None,
177
- do_rescale: bool = None,
178
- ) -> BatchFeature:
179
- """
180
- Main method to prepare for the model one or several sequences(s) and image(s). This method forwards the `text`
181
- and `kwargs` arguments to BartTokenizerFast's [`~BartTokenizerFast.__call__`] if `text` is not `None` to encode
182
- the text. To prepare the image(s), this method forwards the `images` and `kwrags` arguments to
183
- CLIPImageProcessor's [`~CLIPImageProcessor.__call__`] if `images` is not `None`. Please refer to the doctsring
184
- of the above two methods for more information.
185
-
186
- Args:
187
- text (`str`, `List[str]`, `List[List[str]]`):
188
- The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
189
- (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
190
- `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
191
- images (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`, `List[torch.Tensor]`):
192
- The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
193
- tensor. In case of a NumPy array/PyTorch tensor, each image should be of shape (C, H, W), where C is a
194
- number of channels, H and W are image height and width.
195
- tokenize_newline_separately (`bool`, defaults to `True`):
196
- Adds a separately tokenized '\n' at the end of the prompt.
197
- padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `False`):
198
- Select a strategy to pad the returned sequences (according to the model's padding side and padding
199
- index) among:
200
- - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
201
- sequence if provided).
202
- - `'max_length'`: Pad to a maximum length specified with the argument `max_length` or to the maximum
203
- acceptable input length for the model if that argument is not provided.
204
- - `False` or `'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of different
205
- lengths).
206
- max_length (`int`, *optional*):
207
- Maximum length of the returned list and optionally padding length (see above).
208
- truncation (`bool`, *optional*):
209
- Activates truncation to cut input sequences longer than `max_length` to `max_length`.
210
- return_tensors (`str` or [`~utils.TensorType`], *optional*):
211
- If set, will return tensors of a particular framework. Acceptable values are:
212
-
213
- - `'tf'`: Return TensorFlow `tf.constant` objects.
214
- - `'pt'`: Return PyTorch `torch.Tensor` objects.
215
- - `'np'`: Return NumPy `np.ndarray` objects.
216
- - `'jax'`: Return JAX `jnp.ndarray` objects.
217
-
218
- Returns:
219
- [`BatchFeature`]: A [`BatchFeature`] with the following fields:
220
-
221
- - **input_ids** -- List of token ids to be fed to a model. Returned when `text` is not `None`. If `suffix`
222
- is provided, the `input_ids` will also contain the suffix input ids.
223
- - **attention_mask** -- List of indices specifying which tokens should be attended to by the model (when
224
- `return_attention_mask=True` or if *"attention_mask"* is in `self.model_input_names` and if `text` is not
225
- `None`).
226
- - **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
227
- - **labels** -- Labels compatible with training if `suffix` is not None
228
- """
229
-
230
- return_token_type_ids = False
231
-
232
- if images is None:
233
- raise ValueError("`images` are expected as arguments to a `Florence2Processor` instance.")
234
- if text is None:
235
- logger.warning_once(
236
- "You are using Florence-2 without a text prompt."
237
- )
238
- text = ""
239
-
240
- if isinstance(text, List) and isinstance(images, List):
241
- if len(images) < len(text):
242
- raise ValueError(
243
- f"Received {len(images)} images for {len(text)} prompts. Each prompt should be associated with an image."
244
- )
245
- if _is_str_or_image(text):
246
- text = [text]
247
- elif isinstance(text, list) and _is_str_or_image(text[0]):
248
- pass
249
-
250
- pixel_values = self.image_processor(
251
- images,
252
- do_resize=do_resize,
253
- do_normalize=do_normalize,
254
- return_tensors=return_tensors,
255
- image_mean=image_mean,
256
- image_std=image_std,
257
- input_data_format=input_data_format,
258
- data_format=data_format,
259
- resample=resample,
260
- do_convert_rgb=do_convert_rgb,
261
- )["pixel_values"]
262
-
263
- if max_length is not None:
264
- max_length -= self.image_seq_length # max_length has to account for the image tokens
265
-
266
- text = self._construct_prompts(text)
267
-
268
- inputs = self.tokenizer(
269
- text,
270
- return_tensors=return_tensors,
271
- padding=padding,
272
- max_length=max_length,
273
- truncation=truncation,
274
- return_token_type_ids=return_token_type_ids,
275
- )
276
-
277
- return_data = {**inputs, "pixel_values": pixel_values}
278
-
279
- if return_token_type_ids:
280
- labels = inputs["input_ids"].masked_fill(inputs["token_type_ids"] == 0, -100)
281
- return_data.update({"labels": labels})
282
- return BatchFeature(data=return_data)
283
-
284
- # Copied from transformers.models.clip.processing_clip.CLIPProcessor.batch_decode with CLIP->Florence2
285
- def batch_decode(self, *args, **kwargs):
286
- """
287
- This method forwards all its arguments to BartTokenizerFast's [`~PreTrainedTokenizer.batch_decode`]. Please
288
- refer to the docstring of this method for more information.
289
- """
290
- return self.tokenizer.batch_decode(*args, **kwargs)
291
-
292
- # Copied from transformers.models.clip.processing_clip.CLIPProcessor.decode with CLIP->Florence2
293
- def decode(self, *args, **kwargs):
294
- """
295
- This method forwards all its arguments to BartTokenizerFast's [`~PreTrainedTokenizer.decode`]. Please refer to
296
- the docstring of this method for more information.
297
- """
298
- return self.tokenizer.decode(*args, **kwargs)
299
-
300
- @property
301
- # Copied from transformers.models.clip.processing_clip.CLIPProcessor.model_input_names with CLIP->Florence2
302
- def model_input_names(self):
303
- tokenizer_input_names = self.tokenizer.model_input_names
304
- image_processor_input_names = self.image_processor.model_input_names
305
- return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names))
306
-
307
- def post_process_generation(self, text, task, image_size):
308
- """
309
- Post-process the output of the model to each of the task outputs.
310
-
311
- Args:
312
- text (`str`): The text to post-process.
313
- task (`str`): The task to post-process the text for.
314
- image_size (`Tuple[int, int]`): The size of the image. height x width.
315
- """
316
-
317
- task_answer_post_processing_type = self.tasks_answer_post_processing_type.get(task, 'pure_text')
318
- task_answer = self.post_processor(
319
- text=text,
320
- image_size=image_size,
321
- parse_tasks=task_answer_post_processing_type,
322
- )[task_answer_post_processing_type]
323
-
324
- if task_answer_post_processing_type == 'pure_text':
325
- final_answer = task_answer
326
- # remove the special tokens
327
- final_answer = final_answer.replace('<s>', '').replace('</s>', '')
328
- elif task_answer_post_processing_type in ['od', 'description_with_bboxes', 'bboxes']:
329
- od_instances = task_answer
330
- bboxes_od = [_od_instance['bbox'] for _od_instance in od_instances]
331
- labels_od = [str(_od_instance['cat_name']) for _od_instance in od_instances]
332
- final_answer = {'bboxes': bboxes_od, 'labels': labels_od}
333
- elif task_answer_post_processing_type in ['ocr']:
334
- bboxes = [_od_instance['quad_box'] for _od_instance in task_answer]
335
- labels = [str(_od_instance['text']) for _od_instance in task_answer]
336
- final_answer = {'quad_boxes': bboxes, 'labels': labels}
337
- elif task_answer_post_processing_type in ['phrase_grounding']:
338
- bboxes = []
339
- labels = []
340
- for _grounded_phrase in task_answer:
341
- for _bbox in _grounded_phrase['bbox']:
342
- bboxes.append(_bbox)
343
- labels.append(_grounded_phrase['cat_name'])
344
- final_answer = {'bboxes': bboxes, 'labels': labels}
345
- elif task_answer_post_processing_type in ['description_with_polygons', 'polygons']:
346
- labels = []
347
- polygons = []
348
- for result in task_answer:
349
- label = result['cat_name']
350
- _polygons = result['polygons']
351
- labels.append(label)
352
- polygons.append(_polygons)
353
- final_answer = {'polygons': polygons, 'labels': labels}
354
- elif task_answer_post_processing_type in ['description_with_bboxes_or_polygons']:
355
- bboxes = []
356
- bboxes_labels = []
357
- polygons = []
358
- polygons_labels = []
359
- for result in task_answer:
360
- label = result['cat_name']
361
- if 'polygons' in result:
362
- _polygons = result['polygons']
363
- polygons.append(_polygons)
364
- polygons_labels.append(label)
365
- else:
366
- _bbox = result['bbox']
367
- bboxes.append(_bbox)
368
- bboxes_labels.append(label)
369
- final_answer = {'bboxes': bboxes, 'bboxes_labels': bboxes_labels, 'polygons': polygons, 'polygons_labels': polygons_labels}
370
- else:
371
- raise ValueError('Unknown task answer post processing type: {}'.format(task_answer_post_processing_type))
372
-
373
- final_answer = {
374
- task: final_answer}
375
- return final_answer
376
-
377
- class BoxQuantizer(object):
378
- def __init__(self, mode, bins):
379
- self.mode = mode
380
- self.bins = bins
381
-
382
- def quantize(self, boxes: torch.Tensor, size):
383
- bins_w, bins_h = self.bins # Quantization bins.
384
- size_w, size_h = size # Original image size.
385
- size_per_bin_w = size_w / bins_w
386
- size_per_bin_h = size_h / bins_h
387
- xmin, ymin, xmax, ymax = boxes.split(1, dim=-1) # Shape: 4 * [N, 1].
388
-
389
- if self.mode == 'floor':
390
- quantized_xmin = (
391
- xmin / size_per_bin_w).floor().clamp(0, bins_w - 1)
392
- quantized_ymin = (
393
- ymin / size_per_bin_h).floor().clamp(0, bins_h - 1)
394
- quantized_xmax = (
395
- xmax / size_per_bin_w).floor().clamp(0, bins_w - 1)
396
- quantized_ymax = (
397
- ymax / size_per_bin_h).floor().clamp(0, bins_h - 1)
398
-
399
- elif self.mode == 'round':
400
- raise NotImplementedError()
401
-
402
- else:
403
- raise ValueError('Incorrect quantization type.')
404
-
405
- quantized_boxes = torch.cat(
406
- (quantized_xmin, quantized_ymin, quantized_xmax, quantized_ymax), dim=-1
407
- ).int()
408
-
409
- return quantized_boxes
410
-
411
- def dequantize(self, boxes: torch.Tensor, size):
412
- bins_w, bins_h = self.bins # Quantization bins.
413
- size_w, size_h = size # Original image size.
414
- size_per_bin_w = size_w / bins_w
415
- size_per_bin_h = size_h / bins_h
416
- xmin, ymin, xmax, ymax = boxes.split(1, dim=-1) # Shape: 4 * [N, 1].
417
-
418
- if self.mode == 'floor':
419
- # Add 0.5 to use the center position of the bin as the coordinate.
420
- dequantized_xmin = (xmin + 0.5) * size_per_bin_w
421
- dequantized_ymin = (ymin + 0.5) * size_per_bin_h
422
- dequantized_xmax = (xmax + 0.5) * size_per_bin_w
423
- dequantized_ymax = (ymax + 0.5) * size_per_bin_h
424
-
425
- elif self.mode == 'round':
426
- raise NotImplementedError()
427
-
428
- else:
429
- raise ValueError('Incorrect quantization type.')
430
-
431
- dequantized_boxes = torch.cat(
432
- (dequantized_xmin, dequantized_ymin,
433
- dequantized_xmax, dequantized_ymax), dim=-1
434
- )
435
-
436
- return dequantized_boxes
437
-
438
-
439
- class CoordinatesQuantizer(object):
440
- """
441
- Quantize coornidates (Nx2)
442
- """
443
-
444
- def __init__(self, mode, bins):
445
- self.mode = mode
446
- self.bins = bins
447
-
448
- def quantize(self, coordinates: torch.Tensor, size):
449
- bins_w, bins_h = self.bins # Quantization bins.
450
- size_w, size_h = size # Original image size.
451
- size_per_bin_w = size_w / bins_w
452
- size_per_bin_h = size_h / bins_h
453
- assert coordinates.shape[-1] == 2, 'coordinates should be shape (N, 2)'
454
- x, y = coordinates.split(1, dim=-1) # Shape: 4 * [N, 1].
455
-
456
- if self.mode == 'floor':
457
- quantized_x = (x / size_per_bin_w).floor().clamp(0, bins_w - 1)
458
- quantized_y = (y / size_per_bin_h).floor().clamp(0, bins_h - 1)
459
-
460
- elif self.mode == 'round':
461
- raise NotImplementedError()
462
-
463
- else:
464
- raise ValueError('Incorrect quantization type.')
465
-
466
- quantized_coordinates = torch.cat(
467
- (quantized_x, quantized_y), dim=-1
468
- ).int()
469
-
470
- return quantized_coordinates
471
-
472
- def dequantize(self, coordinates: torch.Tensor, size):
473
- bins_w, bins_h = self.bins # Quantization bins.
474
- size_w, size_h = size # Original image size.
475
- size_per_bin_w = size_w / bins_w
476
- size_per_bin_h = size_h / bins_h
477
- assert coordinates.shape[-1] == 2, 'coordinates should be shape (N, 2)'
478
- x, y = coordinates.split(1, dim=-1) # Shape: 4 * [N, 1].
479
-
480
- if self.mode == 'floor':
481
- # Add 0.5 to use the center position of the bin as the coordinate.
482
- dequantized_x = (x + 0.5) * size_per_bin_w
483
- dequantized_y = (y + 0.5) * size_per_bin_h
484
-
485
- elif self.mode == 'round':
486
- raise NotImplementedError()
487
-
488
- else:
489
- raise ValueError('Incorrect quantization type.')
490
-
491
- dequantized_coordinates = torch.cat(
492
- (dequantized_x, dequantized_y), dim=-1
493
- )
494
-
495
- return dequantized_coordinates
496
-
497
-
498
- class Florence2PostProcesser(object):
499
- r"""
500
- Florence-2 post process for converting text prediction to various tasks results.
501
-
502
- Args:
503
- config: A dict of configs.
504
- tokenizer: A tokenizer for decoding text to spans.
505
- sample config:
506
- UNIFIED_POST_PROCESS:
507
- # commom configs
508
- NUM_BBOX_HEIGHT_BINS: 1000
509
- NUM_BBOX_WIDTH_BINS: 1000
510
- COORDINATES_HEIGHT_BINS: 1000
511
- COORDINATES_WIDTH_BINS: 1000
512
- # task specific configs, override the common configs
513
- PRASE_TASKS:
514
- - TASK_NAME: 'video_dense_caption'
515
- PATTERN: 'r<time_(\d+)><time_(\d+)>([a-zA-Z0-9 ]+)'
516
- SCORE_MODE: 'avg_cat_name_scores'
517
- NUM_BINS: 100
518
- - TASK_NAME: 'od'
519
- PATTERN: 'r<loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)>([a-zA-Z0-9 ]+)'
520
- SCORE_MODE: 'avg_cat_name_scores'
521
-
522
- Returns:
523
- parsed_dict (dict): A dict of parsed results.
524
- """
525
- def __init__(
526
- self,
527
- tokenizer=None
528
- ):
529
- parse_tasks = []
530
- parse_task_configs = {}
531
- config = self._create_default_config()
532
- for task in config['PARSE_TASKS']:
533
- parse_tasks.append(task['TASK_NAME'])
534
- parse_task_configs[task['TASK_NAME']] = task
535
-
536
- self.config = config
537
- self.parse_tasks = parse_tasks
538
- self.parse_tasks_configs = parse_task_configs
539
-
540
- self.tokenizer = tokenizer
541
- if self.tokenizer is not None:
542
- self.all_special_tokens = set(self.tokenizer.all_special_tokens)
543
-
544
- self.init_quantizers()
545
- self.black_list_of_phrase_grounding = self._create_black_list_of_phrase_grounding()
546
-
547
- def _create_black_list_of_phrase_grounding(self):
548
- black_list = {}
549
-
550
- if 'phrase_grounding' in self.parse_tasks and self.parse_tasks_configs['phrase_grounding']['FILTER_BY_BLACK_LIST']:
551
- black_list = set(
552
- ['it', 'I', 'me', 'mine',
553
- 'you', 'your', 'yours',
554
- 'he', 'him', 'his',
555
- 'she', 'her', 'hers',
556
- 'they', 'them', 'their', 'theirs',
557
- 'one', 'oneself',
558
- 'we', 'us', 'our', 'ours',
559
- 'you', 'your', 'yours',
560
- 'they', 'them', 'their', 'theirs',
561
- 'mine', 'yours', 'his', 'hers', 'its',
562
- 'ours', 'yours', 'theirs',
563
- 'myself', 'yourself', 'himself', 'herself', 'itself',
564
- 'ourselves', 'yourselves', 'themselves',
565
- 'this', 'that',
566
- 'these', 'those',
567
- 'who', 'whom', 'whose', 'which', 'what',
568
- 'who', 'whom', 'whose', 'which', 'that',
569
- 'all', 'another', 'any', 'anybody', 'anyone', 'anything',
570
- 'each', 'everybody', 'everyone', 'everything',
571
- 'few', 'many', 'nobody', 'none', 'one', 'several',
572
- 'some', 'somebody', 'someone', 'something',
573
- 'each other', 'one another',
574
- 'myself', 'yourself', 'himself', 'herself', 'itself',
575
- 'ourselves', 'yourselves', 'themselves',
576
- 'the image', 'image', 'images', 'the', 'a', 'an', 'a group',
577
- 'other objects', 'lots', 'a set',
578
- ]
579
- )
580
-
581
- return black_list
582
-
583
- def _create_default_config(self):
584
- config = {
585
- 'NUM_BBOX_HEIGHT_BINS': 1000,
586
- 'NUM_BBOX_WIDTH_BINS': 1000,
587
- 'BOX_QUANTIZATION_MODE': 'floor',
588
- 'COORDINATES_HEIGHT_BINS': 1000,
589
- 'COORDINATES_WIDTH_BINS': 1000,
590
- 'COORDINATES_QUANTIZATION_MODE': 'floor',
591
- 'PARSE_TASKS': [
592
- {
593
- 'TASK_NAME': 'od',
594
- 'PATTERN': r'([a-zA-Z0-9 ]+)<loc_(\\d+)><loc_(\\d+)><loc_(\\d+)><loc_(\\d+)>'
595
- },
596
- {
597
- 'TASK_NAME': 'ocr',
598
- 'PATTERN': r'(.+?)<loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)>',
599
- 'AREA_THRESHOLD': 0.00
600
- },
601
- {
602
- 'TASK_NAME': 'phrase_grounding',
603
- 'FILTER_BY_BLACK_LIST': True
604
- },
605
- {
606
- 'TASK_NAME': 'pure_text',
607
- },
608
- {
609
- 'TASK_NAME': 'description_with_bboxes',
610
- },
611
- {
612
- 'TASK_NAME': 'description_with_polygons',
613
- },
614
- {
615
- 'TASK_NAME': 'polygons',
616
- },
617
- {
618
- 'TASK_NAME': 'bboxes',
619
- },
620
- {
621
- 'TASK_NAME': 'description_with_bboxes_or_polygons',
622
- }
623
- ]
624
- }
625
-
626
- return config
627
-
628
- def init_quantizers(self):
629
- # we have box_quantizer (od, grounding) and coordinates_quantizer (ocr, referring_segmentation)
630
- num_bbox_height_bins = self.config.get('NUM_BBOX_HEIGHT_BINS', 1000)
631
- num_bbox_width_bins = self.config.get('NUM_BBOX_WIDTH_BINS', 1000)
632
- box_quantization_mode = self.config.get('BOX_QUANTIZATION_MODE', 'floor')
633
- self.box_quantizer = BoxQuantizer(
634
- box_quantization_mode,
635
- (num_bbox_width_bins, num_bbox_height_bins),
636
- )
637
-
638
- num_bbox_height_bins = self.config['COORDINATES_HEIGHT_BINS'] if 'COORDINATES_HEIGHT_BINS' in self.config else self.config.get('NUM_BBOX_HEIGHT_BINS', 1000)
639
- num_bbox_width_bins = self.config['COORDINATES_WIDTH_BINS'] if 'COORDINATES_WIDTH_BINS' in self.config else self.config.get('NUM_BBOX_WIDTH_BINS', 1000)
640
- box_quantization_mode = self.config.get('COORDINATES_QUANTIZATION_MODE') if 'COORDINATES_QUANTIZATION_MODE' in self.config else self.config.get('BOX_QUANTIZATION_MODE', 'floor')
641
- self.coordinates_quantizer = CoordinatesQuantizer(
642
- box_quantization_mode,
643
- (num_bbox_width_bins, num_bbox_height_bins),
644
- )
645
-
646
- def decode_with_spans(self, tokenizer, token_ids):
647
- filtered_tokens = tokenizer.convert_ids_to_tokens(
648
- token_ids, skip_special_tokens=False)
649
- assert len(filtered_tokens) == len(token_ids)
650
-
651
- # To avoid mixing byte-level and unicode for byte-level BPT
652
- # we need to build string separately for added tokens and byte-level tokens
653
- # cf. https://github.com/huggingface/transformers/issues/1133
654
- sub_texts = []
655
- for token in filtered_tokens:
656
- if token in self.all_special_tokens:
657
- sub_texts.append(token)
658
- else:
659
- if isinstance(tokenizer, (BartTokenizer, BartTokenizerFast)):
660
- sub_text = tokenizer.convert_tokens_to_string([token])
661
- elif isinstance(tokenizer, (T5Tokenizer, T5TokenizerFast)):
662
- # Ref: https://github.com/google/sentencepiece#whitespace-is-treated-as-a-basic-symbol
663
- # Note: Do not strip sub_text as it may have functional whitespace
664
- sub_text = token.replace('▁', ' ')
665
- else:
666
- raise ValueError(f'type {type(tokenizer)} not supported')
667
- sub_texts.append(sub_text)
668
-
669
- text = ''
670
- spans = []
671
- for sub_text in sub_texts:
672
- span = (len(text), len(text) + len(sub_text)) # [start index, end index).
673
- text += sub_text
674
- spans.append(span)
675
-
676
- # Text format:
677
- # 1. T5Tokenizer/T5TokenizerFast:
678
- # "<loc_1><loc_2><loc_3><loc_4> transplanting dog<loc_1><loc_2><loc_3><loc_4> cat</s>"
679
- # Equivalent to t5_tokenizer.decode(input_ids, skip_special_tokens=False, clean_up_tokenization_spaces=False, spaces_between_special_tokens=False)
680
- # 2. BartTokenizer (need to double check):
681
- # "<s><loc_1><loc_2><loc_3><loc_4>transplanting dog<loc_1><loc_2><loc_3><loc_4>cat</s>"
682
- # Equivalent to bart_tokenizer.decode(input_ids, skip_special_tokens=False, clean_up_tokenization_spaces=False, spaces_between_special_tokens=False)
683
- return text, spans
684
-
685
- def parse_od_from_text_and_spans(
686
- self,
687
- text,
688
- pattern,
689
- image_size,
690
- phrase_centric=False
691
- ):
692
- parsed = list(re.finditer(pattern, text))
693
-
694
- instances = []
695
- for i in range(len(parsed)):
696
- # Prepare instance.
697
- instance = {}
698
-
699
- if phrase_centric:
700
- bbox_bins = [int(parsed[i].group(j)) for j in range(2, 6)]
701
- else:
702
- bbox_bins = [int(parsed[i].group(j)) for j in range(1, 5)]
703
- instance['bbox'] = self.box_quantizer.dequantize(
704
- boxes=torch.tensor(bbox_bins),
705
- size=image_size
706
- ).tolist()
707
-
708
- if phrase_centric:
709
- instance['cat_name'] = parsed[i].group(1).lower().strip()
710
- else:
711
- instance['cat_name'] = parsed[i].group(5).lower().strip()
712
- instances.append(instance)
713
-
714
- return instances
715
-
716
- def parse_ocr_from_text_and_spans(self,
717
- text,
718
- pattern,
719
- image_size,
720
- area_threshold=-1.0,
721
- ):
722
- bboxes = []
723
- labels = []
724
- text = text.replace('<s>', '')
725
- # ocr with regions
726
- parsed = re.findall(pattern, text)
727
- instances = []
728
- image_width, image_height = image_size
729
-
730
- for ocr_line in parsed:
731
- ocr_content = ocr_line[0]
732
- quad_box = ocr_line[1:]
733
- quad_box = [int(i) for i in quad_box]
734
- quad_box = self.coordinates_quantizer.dequantize(
735
- torch.tensor(np.array(quad_box).reshape(-1, 2)),
736
- size=image_size
737
- ).reshape(-1).tolist()
738
-
739
- if area_threshold > 0:
740
- x_coords = [i for i in quad_box[0::2]]
741
- y_coords = [i for i in quad_box[1::2]]
742
-
743
- # apply the Shoelace formula
744
- area = 0.5 * abs(sum(x_coords[i] * y_coords[i + 1] - x_coords[i + 1] * y_coords[i] for i in range(4 - 1)))
745
-
746
- if area < (image_width * image_height) * area_threshold:
747
- continue
748
-
749
- bboxes.append(quad_box)
750
- labels.append(ocr_content)
751
- instances.append({
752
- 'quad_box': quad_box,
753
- 'text': ocr_content,
754
- })
755
- return instances
756
-
757
- def parse_phrase_grounding_from_text_and_spans(self, text, pattern, image_size):
758
- # ignore <s> </s> and <pad>
759
- cur_span = 0
760
- if text.startswith('<s>'):
761
- cur_span += 3
762
-
763
- text = text.replace('<s>', '')
764
- text = text.replace('</s>', '')
765
- text = text.replace('<pad>', '')
766
-
767
- pattern = r"([^<]+(?:<loc_\d+>){4,})"
768
- phrases = re.findall(pattern, text)
769
-
770
- # pattern should be text pattern and od pattern
771
- pattern = r'^\s*(.*?)(?=<od>|</od>|<box>|</box>|<bbox>|</bbox>|<loc_)'
772
- box_pattern = r'<loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)>'
773
-
774
- instances = []
775
- for pharse_text in phrases:
776
- phrase_text_strip = pharse_text.replace('<ground>', '', 1)
777
- phrase_text_strip = pharse_text.replace('<obj>', '', 1)
778
-
779
- if phrase_text_strip == '':
780
- cur_span += len(pharse_text)
781
- continue
782
-
783
- # Prepare instance.
784
- instance = {}
785
-
786
- # parse phrase, get string
787
- phrase = re.search(pattern, phrase_text_strip)
788
- if phrase is None:
789
- cur_span += len(pharse_text)
790
- continue
791
-
792
- # parse bboxes by box_pattern
793
- bboxes_parsed = list(re.finditer(box_pattern, pharse_text))
794
- if len(bboxes_parsed) == 0:
795
- cur_span += len(pharse_text)
796
- continue
797
-
798
- phrase = phrase.group()
799
- # remove leading and trailing spaces
800
- phrase = phrase.strip()
801
-
802
- if phrase in self.black_list_of_phrase_grounding:
803
- cur_span += len(pharse_text)
804
- continue
805
-
806
- # a list of list
807
- bbox_bins = [[int(_bboxes_parsed.group(j)) for j in range(1, 5)] for _bboxes_parsed in bboxes_parsed]
808
- instance['bbox'] = self.box_quantizer.dequantize(
809
- boxes=torch.tensor(bbox_bins),
810
- size=image_size
811
- ).tolist()
812
-
813
- # exclude non-ascii characters
814
- phrase = phrase.encode('ascii',errors='ignore').decode('ascii')
815
- instance['cat_name'] = phrase
816
-
817
- instances.append(instance)
818
-
819
- return instances
820
-
821
- def parse_description_with_bboxes_from_text_and_spans(self, text, pattern, image_size, allow_empty_phrase=False):
822
- # temporary parse solution, split by '.'
823
- # ignore <s> </s> and <pad>
824
-
825
- text = text.replace('<s>', '')
826
- text = text.replace('</s>', '')
827
- text = text.replace('<pad>', '')
828
-
829
- if allow_empty_phrase:
830
- pattern = rf"(?:(?:<loc_\d+>){{4,}})"
831
- else:
832
- pattern = r"([^<]+(?:<loc_\d+>){4,})"
833
- phrases = re.findall(pattern, text)
834
-
835
- # pattern should be text pattern and od pattern
836
- pattern = r'^\s*(.*?)(?=<od>|</od>|<box>|</box>|<bbox>|</bbox>|<loc_)'
837
- box_pattern = r'<loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)>'
838
-
839
- instances = []
840
- for pharse_text in phrases:
841
- phrase_text_strip = pharse_text.replace('<ground>', '', 1)
842
- phrase_text_strip = pharse_text.replace('<obj>', '', 1)
843
-
844
- if phrase_text_strip == '' and not allow_empty_phrase:
845
- continue
846
-
847
- # parse phrase, get string
848
- phrase = re.search(pattern, phrase_text_strip)
849
- if phrase is None:
850
- continue
851
-
852
- phrase = phrase.group()
853
- # remove leading and trailing spaces
854
- phrase = phrase.strip()
855
-
856
- # parse bboxes by box_pattern
857
- bboxes_parsed = list(re.finditer(box_pattern, pharse_text))
858
- if len(bboxes_parsed) == 0:
859
- continue
860
-
861
- # a list of list
862
- bbox_bins = [[int(_bboxes_parsed.group(j)) for j in range(1, 5)] for _bboxes_parsed in bboxes_parsed]
863
-
864
- bboxes = self.box_quantizer.dequantize(
865
- boxes=torch.tensor(bbox_bins),
866
- size=image_size
867
- ).tolist()
868
-
869
- phrase = phrase.encode('ascii',errors='ignore').decode('ascii')
870
- for _bboxes in bboxes:
871
- # Prepare instance.
872
- instance = {}
873
- instance['bbox'] = _bboxes
874
- # exclude non-ascii characters
875
- instance['cat_name'] = phrase
876
- instances.append(instance)
877
-
878
- return instances
879
-
880
- def parse_description_with_polygons_from_text_and_spans(self, text, pattern, image_size,
881
- allow_empty_phrase=False,
882
- polygon_sep_token='<sep>',
883
- polygon_start_token='<poly>',
884
- polygon_end_token='</poly>',
885
- with_box_at_start=False,
886
- ):
887
-
888
- # ref_seg format: '<expression><x1><y1><x2><y2><><><sep><><><><>'
889
- # ignore <s> </s> and <pad>
890
-
891
- text = text.replace('<s>', '')
892
- text = text.replace('</s>', '')
893
- text = text.replace('<pad>', '')
894
-
895
- if allow_empty_phrase:
896
- pattern = rf"(?:(?:<loc_\d+>|{re.escape(polygon_sep_token)}|{re.escape(polygon_start_token)}|{re.escape(polygon_end_token)}){{4,}})"
897
- else:
898
- # [^<]+: This part matches one or more characters that are not the < symbol.
899
- # The ^ inside the square brackets [] is a negation, meaning it matches anything except <.
900
- #
901
- pattern = rf"([^<]+(?:<loc_\d+>|{re.escape(polygon_sep_token)}|{re.escape(polygon_start_token)}|{re.escape(polygon_end_token)}){{4,}})"
902
- phrases = re.findall(pattern, text)
903
-
904
- phrase_string_pattern = r'^\s*(.*?)(?=<od>|</od>|<box>|</box>|<bbox>|</bbox>|<loc_|<poly>)'
905
- box_pattern = rf'((?:<loc_\d+>)+)(?:{re.escape(polygon_sep_token)}|$)'
906
-
907
- # one polygons instance is separated by polygon_start_token and polygon_end_token
908
- polygons_instance_pattern = rf'{re.escape(polygon_start_token)}(.*?){re.escape(polygon_end_token)}'
909
-
910
- instances = []
911
- for phrase_text in phrases:
912
-
913
- # exclude loc_\d+>
914
- # need to get span if want to include category score
915
- phrase_text_strip = re.sub(r'^loc_\d+>', '', phrase_text, count=1)
916
-
917
- # phrase = phrase.replace('<poly>', '')
918
- # phrase = phrase.replace('poly>', '')
919
-
920
- if phrase_text_strip == '' and not allow_empty_phrase:
921
- continue
922
-
923
-
924
- # parse phrase, get string
925
- phrase = re.search(phrase_string_pattern, phrase_text_strip)
926
- if phrase is None:
927
- continue
928
- phrase = phrase.group()
929
- # remove leading and trailing spaces
930
- phrase = phrase.strip()
931
-
932
- # parse bboxes by box_pattern
933
-
934
- # split by polygon_start_token and polygon_end_token first using polygons_instance_pattern
935
- if polygon_start_token in phrase_text and polygon_end_token in phrase_text:
936
- polygons_instances_parsed = list(re.finditer(polygons_instance_pattern, phrase_text))
937
- else:
938
- polygons_instances_parsed = [phrase_text]
939
-
940
- for _polygons_instances_parsed in polygons_instances_parsed:
941
- # Prepare instance.
942
- instance = {}
943
-
944
- # polygons_parsed= list(re.finditer(box_pattern, phrase_text))
945
- if isinstance(_polygons_instances_parsed, str):
946
- polygons_parsed= list(re.finditer(box_pattern, _polygons_instances_parsed))
947
- else:
948
- polygons_parsed= list(re.finditer(box_pattern, _polygons_instances_parsed.group(1)))
949
- if len(polygons_parsed) == 0:
950
- continue
951
-
952
- # a list of list (polygon)
953
- bbox = []
954
- polygons = []
955
- for _polygon_parsed in polygons_parsed:
956
- # group 1: whole <loc_\d+>...</loc_\d+>
957
- _polygon = _polygon_parsed.group(1)
958
- # parse into list of int
959
- _polygon = [int(_loc_parsed.group(1)) for _loc_parsed in re.finditer(r'<loc_(\d+)>', _polygon)]
960
- if with_box_at_start and len(bbox) == 0:
961
- if len(_polygon) > 4:
962
- # no valid bbox prediction
963
- bbox = _polygon[:4]
964
- _polygon = _polygon[4:]
965
- else:
966
- bbox = [0, 0, 0, 0]
967
- # abandon last element if is not paired
968
- if len(_polygon) % 2 == 1:
969
- _polygon = _polygon[:-1]
970
-
971
- # reshape into (n, 2)
972
- _polygon = self.coordinates_quantizer.dequantize(
973
- torch.tensor(np.array(_polygon).reshape(-1, 2)),
974
- size=image_size
975
- ).reshape(-1).tolist()
976
- # reshape back
977
- polygons.append(_polygon)
978
-
979
- instance['cat_name'] = phrase
980
- instance['polygons'] = polygons
981
- if len(bbox) != 0:
982
- instance['bbox'] = self.box_quantizer.dequantize(
983
- boxes=torch.tensor([bbox]),
984
- size=image_size
985
- ).tolist()[0]
986
-
987
- instances.append(instance)
988
-
989
- return instances
990
-
991
- def __call__(
992
- self,
993
- text=None,
994
- image_size=None,
995
- parse_tasks=None,
996
- ):
997
- """
998
- Args:
999
- text: model outputs
1000
- image_size: (width, height)
1001
- parse_tasks: a list of tasks to parse, if None, parse all tasks.
1002
-
1003
- """
1004
- if parse_tasks is not None:
1005
- if isinstance(parse_tasks, str):
1006
- parse_tasks = [parse_tasks]
1007
- for _parse_task in parse_tasks:
1008
- assert _parse_task in self.parse_tasks, f'parse task {_parse_task} not supported'
1009
-
1010
- # sequence or text should be provided
1011
- assert text is not None, 'text should be provided'
1012
-
1013
- parsed_dict = {
1014
- 'text': text
1015
- }
1016
-
1017
- for task in self.parse_tasks:
1018
- if parse_tasks is not None and task not in parse_tasks:
1019
- continue
1020
-
1021
- pattern = self.parse_tasks_configs[task].get('PATTERN', None)
1022
-
1023
- if task == 'ocr':
1024
- instances = self.parse_ocr_from_text_and_spans(
1025
- text,
1026
- pattern=pattern,
1027
- image_size=image_size,
1028
- area_threshold=self.parse_tasks_configs[task].get('AREA_THRESHOLD', 0.0),
1029
- )
1030
- parsed_dict['ocr'] = instances
1031
- elif task == 'phrase_grounding':
1032
- instances = self.parse_phrase_grounding_from_text_and_spans(
1033
- text,
1034
- pattern=pattern,
1035
- image_size=image_size,
1036
- )
1037
- parsed_dict['phrase_grounding'] = instances
1038
- elif task == 'pure_text':
1039
- parsed_dict['pure_text'] = text
1040
- elif task == 'description_with_bboxes':
1041
- instances = self.parse_description_with_bboxes_from_text_and_spans(
1042
- text,
1043
- pattern=pattern,
1044
- image_size=image_size,
1045
- )
1046
- parsed_dict['description_with_bboxes'] = instances
1047
- elif task == 'description_with_polygons':
1048
- instances = self.parse_description_with_polygons_from_text_and_spans(
1049
- text,
1050
- pattern=pattern,
1051
- image_size=image_size,
1052
- )
1053
- parsed_dict['description_with_polygons'] = instances
1054
- elif task == 'polygons':
1055
- instances = self.parse_description_with_polygons_from_text_and_spans(
1056
- text,
1057
- pattern=pattern,
1058
- image_size=image_size,
1059
- allow_empty_phrase=True,
1060
- )
1061
- parsed_dict['polygons'] = instances
1062
- elif task == 'bboxes':
1063
- instances = self.parse_description_with_bboxes_from_text_and_spans(
1064
- text,
1065
- pattern=pattern,
1066
- image_size=image_size,
1067
- allow_empty_phrase=True,
1068
- )
1069
- parsed_dict['bboxes'] = instances
1070
- elif task == 'description_with_bboxes_or_polygons':
1071
- if '<poly>' in text:
1072
- # only support either polygons or bboxes, not both at the same time
1073
- instances = self.parse_description_with_polygons_from_text_and_spans(
1074
- text,
1075
- pattern=pattern,
1076
- image_size=image_size,
1077
- )
1078
- else:
1079
- instances = self.parse_description_with_bboxes_from_text_and_spans(
1080
- text,
1081
- pattern=pattern,
1082
- image_size=image_size,
1083
- )
1084
- parsed_dict['description_with_bboxes_or_polygons'] = instances
1085
- else:
1086
- raise ValueError("task {} is not supported".format(task))
1087
-
1088
- return parsed_dict
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Florence-2-Flux-Large/special_tokens_map.json DELETED
The diff for this file is too large to render. See raw diff
 
Florence-2-Flux-Large/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
Florence-2-Flux-Large/tokenizer_config.json DELETED
@@ -1,4 +0,0 @@
1
- {
2
- "model_max_length": 1024
3
- }
4
-
 
 
 
 
 
Florence-2-Flux-Large/vocab.json DELETED
The diff for this file is too large to render. See raw diff