ChuuniZ commited on Mar 24

Commit

eb88be2

verified ·

1 Parent(s): d7603bf

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

Joy_caption/cgrkzexw-599808/text_model/special_tokens_map.json +23 -0
Joy_caption/cgrkzexw-599808/text_model/tokenizer_config.json +2064 -0
RMBG/RMBG-2.0/BiRefNet_config.py +11 -0
RMBG/RMBG-2.0/birefnet.py +2244 -0
RMBG/RMBG-2.0/config.json +20 -0
florence2/DocVQA/added_tokens.json +1026 -0
florence2/DocVQA/config.json +237 -0
florence2/DocVQA/generation_config.json +4 -0
florence2/DocVQA/modeling_florence2.py +0 -0
florence2/DocVQA/preprocessor_config.json +50 -0
florence2/DocVQA/processor_config.json +6 -0
florence2/DocVQA/special_tokens_map.json +0 -0
florence2/DocVQA/tokenizer.json +0 -0
florence2/DocVQA/tokenizer_config.json +0 -0
florence2/DocVQA/vocab.json +0 -0
florence2/base/LICENSE +21 -0
florence2/base/config.json +85 -0
florence2/base/configuration_florence2.py +340 -0
florence2/base/modeling_florence2.py +0 -0
florence2/base/preprocessor_config.json +39 -0
florence2/base/processing_florence2.py +1088 -0
florence2/base/tokenizer.json +0 -0
florence2/base/tokenizer_config.json +4 -0
florence2/base/vocab.json +0 -0
florence2/large-ft/LICENSE +21 -0
florence2/large-ft/config.json +85 -0
florence2/large-ft/configuration_florence2.py +340 -0
florence2/large-ft/generation_config.json +4 -0
florence2/large-ft/modeling_florence2.py +0 -0
florence2/large-ft/preprocessor_config.json +39 -0
florence2/large-ft/processing_florence2.py +1088 -0
florence2/large-ft/tokenizer.json +0 -0
florence2/large-ft/tokenizer_config.json +4 -0
florence2/large-ft/vocab.json +0 -0
loras/Hyper-FLUX.1-dev-8steps-lora.sha256 +1 -0
loras/flux/arcane-style-2.sha256 +1 -0
loras/illu/ATRex_style-12.sha256 +1 -0
loras/illu/Gloom hands illus-000040.sha256 +1 -0
loras/illu/HerrscherAGGA2025_Chibi-IL_V1.sha256 +1 -0
loras/illu/Illustrious_Fujimoto_Manga_Style.sha256 +1 -0
loras/illu/My_Wish_is_for_Love_ILXL.sha256 +1 -0
loras/illu/WindWaker_Style_IXL.safetensors.rgthree-info.json +2763 -0
loras/illu/breast-press-pov-illustriousxl-lora-nochekaiser.sha256 +1 -0
loras/illu/caressing-testicles-v2-illustriousxl-lora-nochekaiser.sha256 +1 -0
loras/illu/hand-on-own-hip-on-side-illustriousxl-lora-nochekaiser.sha256 +1 -0
loras/illu/hearthandsonbreast.sha256 +1 -0
loras/illu/missionary-asphyxiation-illustriousxl-lora-nochekaiser.sha256 +1 -0
loras/illu/pov-hands-female-orgasm-illustriousxl-lora-nochekaiser.sha256 +1 -0
loras/illu/random-nsfw-poses-v8-illustriousxl-lora-nochekaiser.sha256 +1 -0
loras/pony/R3DStyle.sha256 +1 -0

Joy_caption/cgrkzexw-599808/text_model/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "bos_token": {
+    "content": "<|begin_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|eot_id|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|finetune_right_pad_id|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

Joy_caption/cgrkzexw-599808/text_model/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,2064 @@

+{
+  "added_tokens_decoder": {
+    "128000": {
+      "content": "<|begin_of_text|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128001": {
+      "content": "<|end_of_text|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128002": {
+      "content": "<|reserved_special_token_0|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128003": {
+      "content": "<|reserved_special_token_1|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128004": {
+      "content": "<|finetune_right_pad_id|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128005": {
+      "content": "<|reserved_special_token_2|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128006": {
+      "content": "<|start_header_id|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128007": {
+      "content": "<|end_header_id|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128008": {
+      "content": "<|eom_id|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128009": {
+      "content": "<|eot_id|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128010": {
+      "content": "<|python_tag|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128011": {
+      "content": "<|reserved_special_token_3|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128012": {
+      "content": "<|reserved_special_token_4|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128013": {
+      "content": "<|reserved_special_token_5|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128014": {
+      "content": "<|reserved_special_token_6|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128015": {
+      "content": "<|reserved_special_token_7|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128016": {
+      "content": "<|reserved_special_token_8|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128017": {
+      "content": "<|reserved_special_token_9|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128018": {
+      "content": "<|reserved_special_token_10|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128019": {
+      "content": "<|reserved_special_token_11|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128020": {
+      "content": "<|reserved_special_token_12|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128021": {
+      "content": "<|reserved_special_token_13|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128022": {
+      "content": "<|reserved_special_token_14|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128023": {
+      "content": "<|reserved_special_token_15|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128024": {
+      "content": "<|reserved_special_token_16|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128025": {
+      "content": "<|reserved_special_token_17|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128026": {
+      "content": "<|reserved_special_token_18|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128027": {
+      "content": "<|reserved_special_token_19|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128028": {
+      "content": "<|reserved_special_token_20|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128029": {
+      "content": "<|reserved_special_token_21|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128030": {
+      "content": "<|reserved_special_token_22|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128031": {
+      "content": "<|reserved_special_token_23|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128032": {
+      "content": "<|reserved_special_token_24|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128033": {
+      "content": "<|reserved_special_token_25|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128034": {
+      "content": "<|reserved_special_token_26|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128035": {
+      "content": "<|reserved_special_token_27|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128036": {
+      "content": "<|reserved_special_token_28|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128037": {
+      "content": "<|reserved_special_token_29|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128038": {
+      "content": "<|reserved_special_token_30|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128039": {
+      "content": "<|reserved_special_token_31|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128040": {
+      "content": "<|reserved_special_token_32|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128041": {
+      "content": "<|reserved_special_token_33|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128042": {
+      "content": "<|reserved_special_token_34|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128043": {
+      "content": "<|reserved_special_token_35|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128044": {
+      "content": "<|reserved_special_token_36|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128045": {
+      "content": "<|reserved_special_token_37|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128046": {
+      "content": "<|reserved_special_token_38|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128047": {
+      "content": "<|reserved_special_token_39|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128048": {
+      "content": "<|reserved_special_token_40|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128049": {
+      "content": "<|reserved_special_token_41|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128050": {
+      "content": "<|reserved_special_token_42|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128051": {
+      "content": "<|reserved_special_token_43|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128052": {
+      "content": "<|reserved_special_token_44|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128053": {
+      "content": "<|reserved_special_token_45|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128054": {
+      "content": "<|reserved_special_token_46|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128055": {
+      "content": "<|reserved_special_token_47|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128056": {
+      "content": "<|reserved_special_token_48|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128057": {
+      "content": "<|reserved_special_token_49|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128058": {
+      "content": "<|reserved_special_token_50|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128059": {
+      "content": "<|reserved_special_token_51|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128060": {
+      "content": "<|reserved_special_token_52|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128061": {
+      "content": "<|reserved_special_token_53|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128062": {
+      "content": "<|reserved_special_token_54|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128063": {
+      "content": "<|reserved_special_token_55|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128064": {
+      "content": "<|reserved_special_token_56|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128065": {
+      "content": "<|reserved_special_token_57|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128066": {
+      "content": "<|reserved_special_token_58|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128067": {
+      "content": "<|reserved_special_token_59|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128068": {
+      "content": "<|reserved_special_token_60|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128069": {
+      "content": "<|reserved_special_token_61|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128070": {
+      "content": "<|reserved_special_token_62|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128071": {
+      "content": "<|reserved_special_token_63|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128072": {
+      "content": "<|reserved_special_token_64|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128073": {
+      "content": "<|reserved_special_token_65|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128074": {
+      "content": "<|reserved_special_token_66|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128075": {
+      "content": "<|reserved_special_token_67|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128076": {
+      "content": "<|reserved_special_token_68|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128077": {
+      "content": "<|reserved_special_token_69|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128078": {
+      "content": "<|reserved_special_token_70|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128079": {
+      "content": "<|reserved_special_token_71|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128080": {
+      "content": "<|reserved_special_token_72|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128081": {
+      "content": "<|reserved_special_token_73|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128082": {
+      "content": "<|reserved_special_token_74|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128083": {
+      "content": "<|reserved_special_token_75|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128084": {
+      "content": "<|reserved_special_token_76|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128085": {
+      "content": "<|reserved_special_token_77|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128086": {
+      "content": "<|reserved_special_token_78|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128087": {
+      "content": "<|reserved_special_token_79|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128088": {
+      "content": "<|reserved_special_token_80|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128089": {
+      "content": "<|reserved_special_token_81|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128090": {
+      "content": "<|reserved_special_token_82|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128091": {
+      "content": "<|reserved_special_token_83|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128092": {
+      "content": "<|reserved_special_token_84|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128093": {
+      "content": "<|reserved_special_token_85|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128094": {
+      "content": "<|reserved_special_token_86|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128095": {
+      "content": "<|reserved_special_token_87|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128096": {
+      "content": "<|reserved_special_token_88|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128097": {
+      "content": "<|reserved_special_token_89|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128098": {
+      "content": "<|reserved_special_token_90|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128099": {
+      "content": "<|reserved_special_token_91|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128100": {
+      "content": "<|reserved_special_token_92|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128101": {
+      "content": "<|reserved_special_token_93|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128102": {
+      "content": "<|reserved_special_token_94|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128103": {
+      "content": "<|reserved_special_token_95|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128104": {
+      "content": "<|reserved_special_token_96|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128105": {
+      "content": "<|reserved_special_token_97|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128106": {
+      "content": "<|reserved_special_token_98|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128107": {
+      "content": "<|reserved_special_token_99|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128108": {
+      "content": "<|reserved_special_token_100|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128109": {
+      "content": "<|reserved_special_token_101|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128110": {
+      "content": "<|reserved_special_token_102|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128111": {
+      "content": "<|reserved_special_token_103|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128112": {
+      "content": "<|reserved_special_token_104|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128113": {
+      "content": "<|reserved_special_token_105|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128114": {
+      "content": "<|reserved_special_token_106|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128115": {
+      "content": "<|reserved_special_token_107|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128116": {
+      "content": "<|reserved_special_token_108|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128117": {
+      "content": "<|reserved_special_token_109|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128118": {
+      "content": "<|reserved_special_token_110|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128119": {
+      "content": "<|reserved_special_token_111|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128120": {
+      "content": "<|reserved_special_token_112|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128121": {
+      "content": "<|reserved_special_token_113|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128122": {
+      "content": "<|reserved_special_token_114|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128123": {
+      "content": "<|reserved_special_token_115|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128124": {
+      "content": "<|reserved_special_token_116|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128125": {
+      "content": "<|reserved_special_token_117|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128126": {
+      "content": "<|reserved_special_token_118|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128127": {
+      "content": "<|reserved_special_token_119|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128128": {
+      "content": "<|reserved_special_token_120|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128129": {
+      "content": "<|reserved_special_token_121|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128130": {
+      "content": "<|reserved_special_token_122|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128131": {
+      "content": "<|reserved_special_token_123|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128132": {
+      "content": "<|reserved_special_token_124|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128133": {
+      "content": "<|reserved_special_token_125|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128134": {
+      "content": "<|reserved_special_token_126|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128135": {
+      "content": "<|reserved_special_token_127|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128136": {
+      "content": "<|reserved_special_token_128|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128137": {
+      "content": "<|reserved_special_token_129|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128138": {
+      "content": "<|reserved_special_token_130|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128139": {
+      "content": "<|reserved_special_token_131|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128140": {
+      "content": "<|reserved_special_token_132|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128141": {
+      "content": "<|reserved_special_token_133|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128142": {
+      "content": "<|reserved_special_token_134|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128143": {
+      "content": "<|reserved_special_token_135|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128144": {
+      "content": "<|reserved_special_token_136|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128145": {
+      "content": "<|reserved_special_token_137|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128146": {
+      "content": "<|reserved_special_token_138|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128147": {
+      "content": "<|reserved_special_token_139|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128148": {
+      "content": "<|reserved_special_token_140|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128149": {
+      "content": "<|reserved_special_token_141|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128150": {
+      "content": "<|reserved_special_token_142|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128151": {
+      "content": "<|reserved_special_token_143|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128152": {
+      "content": "<|reserved_special_token_144|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128153": {
+      "content": "<|reserved_special_token_145|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128154": {
+      "content": "<|reserved_special_token_146|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128155": {
+      "content": "<|reserved_special_token_147|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128156": {
+      "content": "<|reserved_special_token_148|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128157": {
+      "content": "<|reserved_special_token_149|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128158": {
+      "content": "<|reserved_special_token_150|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128159": {
+      "content": "<|reserved_special_token_151|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128160": {
+      "content": "<|reserved_special_token_152|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128161": {
+      "content": "<|reserved_special_token_153|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128162": {
+      "content": "<|reserved_special_token_154|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128163": {
+      "content": "<|reserved_special_token_155|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128164": {
+      "content": "<|reserved_special_token_156|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128165": {
+      "content": "<|reserved_special_token_157|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128166": {
+      "content": "<|reserved_special_token_158|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128167": {
+      "content": "<|reserved_special_token_159|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128168": {
+      "content": "<|reserved_special_token_160|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128169": {
+      "content": "<|reserved_special_token_161|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128170": {
+      "content": "<|reserved_special_token_162|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128171": {
+      "content": "<|reserved_special_token_163|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128172": {
+      "content": "<|reserved_special_token_164|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128173": {
+      "content": "<|reserved_special_token_165|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128174": {
+      "content": "<|reserved_special_token_166|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128175": {
+      "content": "<|reserved_special_token_167|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128176": {
+      "content": "<|reserved_special_token_168|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128177": {
+      "content": "<|reserved_special_token_169|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128178": {
+      "content": "<|reserved_special_token_170|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128179": {
+      "content": "<|reserved_special_token_171|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128180": {
+      "content": "<|reserved_special_token_172|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128181": {
+      "content": "<|reserved_special_token_173|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128182": {
+      "content": "<|reserved_special_token_174|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128183": {
+      "content": "<|reserved_special_token_175|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128184": {
+      "content": "<|reserved_special_token_176|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128185": {
+      "content": "<|reserved_special_token_177|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128186": {
+      "content": "<|reserved_special_token_178|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128187": {
+      "content": "<|reserved_special_token_179|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128188": {
+      "content": "<|reserved_special_token_180|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128189": {
+      "content": "<|reserved_special_token_181|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128190": {
+      "content": "<|reserved_special_token_182|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128191": {
+      "content": "<|reserved_special_token_183|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128192": {
+      "content": "<|reserved_special_token_184|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128193": {
+      "content": "<|reserved_special_token_185|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128194": {
+      "content": "<|reserved_special_token_186|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128195": {
+      "content": "<|reserved_special_token_187|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128196": {
+      "content": "<|reserved_special_token_188|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128197": {
+      "content": "<|reserved_special_token_189|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128198": {
+      "content": "<|reserved_special_token_190|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128199": {
+      "content": "<|reserved_special_token_191|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128200": {
+      "content": "<|reserved_special_token_192|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128201": {
+      "content": "<|reserved_special_token_193|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128202": {
+      "content": "<|reserved_special_token_194|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128203": {
+      "content": "<|reserved_special_token_195|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128204": {
+      "content": "<|reserved_special_token_196|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128205": {
+      "content": "<|reserved_special_token_197|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128206": {
+      "content": "<|reserved_special_token_198|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128207": {
+      "content": "<|reserved_special_token_199|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128208": {
+      "content": "<|reserved_special_token_200|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128209": {
+      "content": "<|reserved_special_token_201|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128210": {
+      "content": "<|reserved_special_token_202|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128211": {
+      "content": "<|reserved_special_token_203|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128212": {
+      "content": "<|reserved_special_token_204|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128213": {
+      "content": "<|reserved_special_token_205|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128214": {
+      "content": "<|reserved_special_token_206|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128215": {
+      "content": "<|reserved_special_token_207|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128216": {
+      "content": "<|reserved_special_token_208|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128217": {
+      "content": "<|reserved_special_token_209|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128218": {
+      "content": "<|reserved_special_token_210|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128219": {
+      "content": "<|reserved_special_token_211|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128220": {
+      "content": "<|reserved_special_token_212|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128221": {
+      "content": "<|reserved_special_token_213|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128222": {
+      "content": "<|reserved_special_token_214|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128223": {
+      "content": "<|reserved_special_token_215|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128224": {
+      "content": "<|reserved_special_token_216|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128225": {
+      "content": "<|reserved_special_token_217|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128226": {
+      "content": "<|reserved_special_token_218|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128227": {
+      "content": "<|reserved_special_token_219|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128228": {
+      "content": "<|reserved_special_token_220|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128229": {
+      "content": "<|reserved_special_token_221|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128230": {
+      "content": "<|reserved_special_token_222|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128231": {
+      "content": "<|reserved_special_token_223|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128232": {
+      "content": "<|reserved_special_token_224|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128233": {
+      "content": "<|reserved_special_token_225|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128234": {
+      "content": "<|reserved_special_token_226|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128235": {
+      "content": "<|reserved_special_token_227|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128236": {
+      "content": "<|reserved_special_token_228|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128237": {
+      "content": "<|reserved_special_token_229|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128238": {
+      "content": "<|reserved_special_token_230|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128239": {
+      "content": "<|reserved_special_token_231|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128240": {
+      "content": "<|reserved_special_token_232|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128241": {
+      "content": "<|reserved_special_token_233|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128242": {
+      "content": "<|reserved_special_token_234|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128243": {
+      "content": "<|reserved_special_token_235|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128244": {
+      "content": "<|reserved_special_token_236|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128245": {
+      "content": "<|reserved_special_token_237|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128246": {
+      "content": "<|reserved_special_token_238|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128247": {
+      "content": "<|reserved_special_token_239|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128248": {
+      "content": "<|reserved_special_token_240|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128249": {
+      "content": "<|reserved_special_token_241|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128250": {
+      "content": "<|reserved_special_token_242|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128251": {
+      "content": "<|reserved_special_token_243|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128252": {
+      "content": "<|reserved_special_token_244|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128253": {
+      "content": "<|reserved_special_token_245|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128254": {
+      "content": "<|reserved_special_token_246|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128255": {
+      "content": "<|reserved_special_token_247|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|begin_of_text|>",
+  "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n    {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n    {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n    {%- set date_string = \"26 July 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content'] %}\n    {%- set messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\n\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n    {{- \"Environment: ipython\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n    {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\n\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\n\" }}\n{{- \"Today Date: \" + date_string + \"\n\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n    {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\n\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\n\n\" }}\n    {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n    {#- Extract the first user message so we can plug it in here #}\n    {%- if messages | length != 0 %}\n        {%- set first_user_message = messages[0]['content'] %}\n        {%- set messages = messages[1:] %}\n    {%- else %}\n        {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n    {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}\n    {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n    {{- \"with its proper arguments that best answers the given prompt.\n\n\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\n\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\n\n\" }}\n    {%- endfor %}\n    {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] + '<|eot_id|>' }}\n    {%- elif 'tool_calls' in message %}\n        {%- if not message.tool_calls|length == 1 %}\n            {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n        {%- endif %}\n        {%- set tool_call = message.tool_calls[0].function %}\n        {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n            {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}\n            {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n            {%- for arg_name, arg_val in tool_call.arguments | items %}\n                {{- arg_name + '=\"' + arg_val + '\"' }}\n                {%- if not loop.last %}\n                    {{- \", \" }}\n                {%- endif %}\n                {%- endfor %}\n            {{- \")\" }}\n        {%- else  %}\n            {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}\n            {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n            {{- '\"parameters\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- \"}\" }}\n        {%- endif %}\n        {%- if builtin_tools is defined %}\n            {#- This means we're in ipython mode #}\n            {{- \"<|eom_id|>\" }}\n        {%- else %}\n            {{- \"<|eot_id|>\" }}\n        {%- endif %}\n    {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n        {{- \"<|start_header_id|>ipython<|end_header_id|>\n\n\" }}\n        {%- if message.content is mapping or message.content is iterable %}\n            {{- message.content | tojson }}\n        {%- else %}\n            {{- message.content }}\n        {%- endif %}\n        {{- \"<|eot_id|>\" }}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}\n{%- endif %}\n",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|eot_id|>",
+  "model_input_names": [
+    "input_ids",
+    "attention_mask"
+  ],
+  "model_max_length": 131072,
+  "pad_token": "<|finetune_right_pad_id|>",
+  "padding_side": "right",
+  "tokenizer_class": "PreTrainedTokenizerFast"
+}

RMBG/RMBG-2.0/BiRefNet_config.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from transformers import PretrainedConfig
+class BiRefNetConfig(PretrainedConfig):
+    model_type = "SegformerForSemanticSegmentation"
+    def __init__(
+        self,
+        bb_pretrained=False,
+        **kwargs
+    ):
+        self.bb_pretrained = bb_pretrained
+        super().__init__(**kwargs)

RMBG/RMBG-2.0/birefnet.py ADDED Viewed

	@@ -0,0 +1,2244 @@

+### config.py
+import os
+import math
+class Config():
+    def __init__(self) -> None:
+        # PATH settings
+        self.sys_home_dir = os.path.expanduser('~')    # Make up your file system as: SYS_HOME_DIR/codes/dis/BiRefNet, SYS_HOME_DIR/datasets/dis/xx, SYS_HOME_DIR/weights/xx
+        # TASK settings
+        self.task = ['DIS5K', 'COD', 'HRSOD', 'DIS5K+HRSOD+HRS10K', 'P3M-10k'][0]
+        self.training_set = {
+            'DIS5K': ['DIS-TR', 'DIS-TR+DIS-TE1+DIS-TE2+DIS-TE3+DIS-TE4'][0],
+            'COD': 'TR-COD10K+TR-CAMO',
+            'HRSOD': ['TR-DUTS', 'TR-HRSOD', 'TR-UHRSD', 'TR-DUTS+TR-HRSOD', 'TR-DUTS+TR-UHRSD', 'TR-HRSOD+TR-UHRSD', 'TR-DUTS+TR-HRSOD+TR-UHRSD'][5],
+            'DIS5K+HRSOD+HRS10K': 'DIS-TE1+DIS-TE2+DIS-TE3+DIS-TE4+DIS-TR+TE-HRS10K+TE-HRSOD+TE-UHRSD+TR-HRS10K+TR-HRSOD+TR-UHRSD',     # leave DIS-VD for evaluation.
+            'P3M-10k': 'TR-P3M-10k',
+        }[self.task]
+        self.prompt4loc = ['dense', 'sparse'][0]
+        # Faster-Training settings
+        self.load_all = True
+        self.compile = True     # 1. Trigger CPU memory leak in some extend, which is an inherent problem of PyTorch.
+                                #   Machines with > 70GB CPU memory can run the whole training on DIS5K with default setting.
+                                # 2. Higher PyTorch version may fix it: https://github.com/pytorch/pytorch/issues/119607.
+                                # 3. But compile in Pytorch > 2.0.1 seems to bring no acceleration for training.
+        self.precisionHigh = True
+        # MODEL settings
+        self.ms_supervision = True
+        self.out_ref = self.ms_supervision and True
+        self.dec_ipt = True
+        self.dec_ipt_split = True
+        self.cxt_num = [0, 3][1]    # multi-scale skip connections from encoder
+        self.mul_scl_ipt = ['', 'add', 'cat'][2]
+        self.dec_att = ['', 'ASPP', 'ASPPDeformable'][2]
+        self.squeeze_block = ['', 'BasicDecBlk_x1', 'ResBlk_x4', 'ASPP_x3', 'ASPPDeformable_x3'][1]
+        self.dec_blk = ['BasicDecBlk', 'ResBlk', 'HierarAttDecBlk'][0]
+        # TRAINING settings
+        self.batch_size = 4
+        self.IoU_finetune_last_epochs = [
+            0,
+            {
+                'DIS5K': -50,
+                'COD': -20,
+                'HRSOD': -20,
+                'DIS5K+HRSOD+HRS10K': -20,
+                'P3M-10k': -20,
+            }[self.task]
+        ][1]    # choose 0 to skip
+        self.lr = (1e-4 if 'DIS5K' in self.task else 1e-5) * math.sqrt(self.batch_size / 4)     # DIS needs high lr to converge faster. Adapt the lr linearly
+        self.size = 1024
+        self.num_workers = max(4, self.batch_size)          # will be decrease to min(it, batch_size) at the initialization of the data_loader
+        # Backbone settings
+        self.bb = [
+            'vgg16', 'vgg16bn', 'resnet50',         # 0, 1, 2
+            'swin_v1_t', 'swin_v1_s',               # 3, 4
+            'swin_v1_b', 'swin_v1_l',               # 5-bs9, 6-bs4
+            'pvt_v2_b0', 'pvt_v2_b1',               # 7, 8
+            'pvt_v2_b2', 'pvt_v2_b5',               # 9-bs10, 10-bs5
+        ][6]
+        self.lateral_channels_in_collection = {
+            'vgg16': [512, 256, 128, 64], 'vgg16bn': [512, 256, 128, 64], 'resnet50': [1024, 512, 256, 64],
+            'pvt_v2_b2': [512, 320, 128, 64], 'pvt_v2_b5': [512, 320, 128, 64],
+            'swin_v1_b': [1024, 512, 256, 128], 'swin_v1_l': [1536, 768, 384, 192],
+            'swin_v1_t': [768, 384, 192, 96], 'swin_v1_s': [768, 384, 192, 96],
+            'pvt_v2_b0': [256, 160, 64, 32], 'pvt_v2_b1': [512, 320, 128, 64],
+        }[self.bb]
+        if self.mul_scl_ipt == 'cat':
+            self.lateral_channels_in_collection = [channel * 2 for channel in self.lateral_channels_in_collection]
+        self.cxt = self.lateral_channels_in_collection[1:][::-1][-self.cxt_num:] if self.cxt_num else []
+        # MODEL settings - inactive
+        self.lat_blk = ['BasicLatBlk'][0]
+        self.dec_channels_inter = ['fixed', 'adap'][0]
+        self.refine = ['', 'itself', 'RefUNet', 'Refiner', 'RefinerPVTInChannels4'][0]
+        self.progressive_ref = self.refine and True
+        self.ender = self.progressive_ref and False
+        self.scale = self.progressive_ref and 2
+        self.auxiliary_classification = False       # Only for DIS5K, where class labels are saved in `dataset.py`.
+        self.refine_iteration = 1
+        self.freeze_bb = False
+        self.model = [
+            'BiRefNet',
+        ][0]
+        if self.dec_blk == 'HierarAttDecBlk':
+            self.batch_size = 2 ** [0, 1, 2, 3, 4][2]
+        # TRAINING settings - inactive
+        self.preproc_methods = ['flip', 'enhance', 'rotate', 'pepper', 'crop'][:4]
+        self.optimizer = ['Adam', 'AdamW'][1]
+        self.lr_decay_epochs = [1e5]    # Set to negative N to decay the lr in the last N-th epoch.
+        self.lr_decay_rate = 0.5
+        # Loss
+        self.lambdas_pix_last = {
+            # not 0 means opening this loss
+            # original rate -- 1 : 30 : 1.5 : 0.2, bce x 30
+            'bce': 30 * 1,          # high performance
+            'iou': 0.5 * 1,         # 0 / 255
+            'iou_patch': 0.5 * 0,   # 0 / 255, win_size = (64, 64)
+            'mse': 150 * 0,         # can smooth the saliency map
+            'triplet': 3 * 0,
+            'reg': 100 * 0,
+            'ssim': 10 * 1,          # help contours,
+            'cnt': 5 * 0,          # help contours
+            'structure': 5 * 0,    # structure loss from codes of MVANet. A little improvement on DIS-TE[1,2,3], a bit more decrease on DIS-TE4.
+        }
+        self.lambdas_cls = {
+            'ce': 5.0
+        }
+        # Adv
+        self.lambda_adv_g = 10. * 0        # turn to 0 to avoid adv training
+        self.lambda_adv_d = 3. * (self.lambda_adv_g > 0)
+        # PATH settings - inactive
+        self.data_root_dir = os.path.join(self.sys_home_dir, 'datasets/dis')
+        self.weights_root_dir = os.path.join(self.sys_home_dir, 'weights')
+        self.weights = {
+            'pvt_v2_b2': os.path.join(self.weights_root_dir, 'pvt_v2_b2.pth'),
+            'pvt_v2_b5': os.path.join(self.weights_root_dir, ['pvt_v2_b5.pth', 'pvt_v2_b5_22k.pth'][0]),
+            'swin_v1_b': os.path.join(self.weights_root_dir, ['swin_base_patch4_window12_384_22kto1k.pth', 'swin_base_patch4_window12_384_22k.pth'][0]),
+            'swin_v1_l': os.path.join(self.weights_root_dir, ['swin_large_patch4_window12_384_22kto1k.pth', 'swin_large_patch4_window12_384_22k.pth'][0]),
+            'swin_v1_t': os.path.join(self.weights_root_dir, ['swin_tiny_patch4_window7_224_22kto1k_finetune.pth'][0]),
+            'swin_v1_s': os.path.join(self.weights_root_dir, ['swin_small_patch4_window7_224_22kto1k_finetune.pth'][0]),
+            'pvt_v2_b0': os.path.join(self.weights_root_dir, ['pvt_v2_b0.pth'][0]),
+            'pvt_v2_b1': os.path.join(self.weights_root_dir, ['pvt_v2_b1.pth'][0]),
+        }
+        # Callbacks - inactive
+        self.verbose_eval = True
+        self.only_S_MAE = False
+        self.use_fp16 = False   # Bugs. It may cause nan in training.
+        self.SDPA_enabled = False    # Bugs. Slower and errors occur in multi-GPUs
+        # others
+        self.device = [0, 'cpu'][0]     # .to(0) == .to('cuda:0')
+        self.batch_size_valid = 1
+        self.rand_seed = 7
+        # run_sh_file = [f for f in os.listdir('.') if 'train.sh' == f] + [os.path.join('..', f) for f in os.listdir('..') if 'train.sh' == f]
+        # with open(run_sh_file[0], 'r') as f:
+        #     lines = f.readlines()
+        #     self.save_last = int([l.strip() for l in lines if '"{}")'.format(self.task) in l and 'val_last=' in l][0].split('val_last=')[-1].split()[0])
+        #     self.save_step = int([l.strip() for l in lines if '"{}")'.format(self.task) in l and 'step=' in l][0].split('step=')[-1].split()[0])
+        # self.val_step = [0, self.save_step][0]
+    def print_task(self) -> None:
+        # Return task for choosing settings in shell scripts.
+        print(self.task)
+### models/backbones/pvt_v2.py
+import torch
+import torch.nn as nn
+from functools import partial
+from timm.models.layers import DropPath, to_2tuple, trunc_normal_
+from timm.models.registry import register_model
+import math
+# from config import Config
+# config = Config()
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.dwconv = DWConv(hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+    def forward(self, x, H, W):
+        x = self.fc1(x)
+        x = self.dwconv(x, H, W)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+class Attention(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0., sr_ratio=1):
+        super().__init__()
+        assert dim % num_heads == 0, f"dim {dim} should be divided by num_heads {num_heads}."
+        self.dim = dim
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim ** -0.5
+        self.q = nn.Linear(dim, dim, bias=qkv_bias)
+        self.kv = nn.Linear(dim, dim * 2, bias=qkv_bias)
+        self.attn_drop_prob = attn_drop
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+        self.sr_ratio = sr_ratio
+        if sr_ratio > 1:
+            self.sr = nn.Conv2d(dim, dim, kernel_size=sr_ratio, stride=sr_ratio)
+            self.norm = nn.LayerNorm(dim)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+    def forward(self, x, H, W):
+        B, N, C = x.shape
+        q = self.q(x).reshape(B, N, self.num_heads, C // self.num_heads).permute(0, 2, 1, 3)
+        if self.sr_ratio > 1:
+            x_ = x.permute(0, 2, 1).reshape(B, C, H, W)
+            x_ = self.sr(x_).reshape(B, C, -1).permute(0, 2, 1)
+            x_ = self.norm(x_)
+            kv = self.kv(x_).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        else:
+            kv = self.kv(x).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        k, v = kv[0], kv[1]
+        if config.SDPA_enabled:
+            x = torch.nn.functional.scaled_dot_product_attention(
+                q, k, v,
+                attn_mask=None, dropout_p=self.attn_drop_prob, is_causal=False
+            ).transpose(1, 2).reshape(B, N, C)
+        else:
+            attn = (q @ k.transpose(-2, -1)) * self.scale
+            attn = attn.softmax(dim=-1)
+            attn = self.attn_drop(attn)
+            x = (attn @ v).transpose(1, 2).reshape(B, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+class Block(nn.Module):
+    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
+                 drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm, sr_ratio=1):
+        super().__init__()
+        self.norm1 = norm_layer(dim)
+        self.attn = Attention(
+            dim,
+            num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
+            attn_drop=attn_drop, proj_drop=drop, sr_ratio=sr_ratio)
+        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+    def forward(self, x, H, W):
+        x = x + self.drop_path(self.attn(self.norm1(x), H, W))
+        x = x + self.drop_path(self.mlp(self.norm2(x), H, W))
+        return x
+class OverlapPatchEmbed(nn.Module):
+    """ Image to Patch Embedding
+    """
+    def __init__(self, img_size=224, patch_size=7, stride=4, in_channels=3, embed_dim=768):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.H, self.W = img_size[0] // patch_size[0], img_size[1] // patch_size[1]
+        self.num_patches = self.H * self.W
+        self.proj = nn.Conv2d(in_channels, embed_dim, kernel_size=patch_size, stride=stride,
+                              padding=(patch_size[0] // 2, patch_size[1] // 2))
+        self.norm = nn.LayerNorm(embed_dim)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+    def forward(self, x):
+        x = self.proj(x)
+        _, _, H, W = x.shape
+        x = x.flatten(2).transpose(1, 2)
+        x = self.norm(x)
+        return x, H, W
+class PyramidVisionTransformerImpr(nn.Module):
+    def __init__(self, img_size=224, patch_size=16, in_channels=3, num_classes=1000, embed_dims=[64, 128, 256, 512],
+                 num_heads=[1, 2, 4, 8], mlp_ratios=[4, 4, 4, 4], qkv_bias=False, qk_scale=None, drop_rate=0.,
+                 attn_drop_rate=0., drop_path_rate=0., norm_layer=nn.LayerNorm,
+                 depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1]):
+        super().__init__()
+        self.num_classes = num_classes
+        self.depths = depths
+        # patch_embed
+        self.patch_embed1 = OverlapPatchEmbed(img_size=img_size, patch_size=7, stride=4, in_channels=in_channels,
+                                              embed_dim=embed_dims[0])
+        self.patch_embed2 = OverlapPatchEmbed(img_size=img_size // 4, patch_size=3, stride=2, in_channels=embed_dims[0],
+                                              embed_dim=embed_dims[1])
+        self.patch_embed3 = OverlapPatchEmbed(img_size=img_size // 8, patch_size=3, stride=2, in_channels=embed_dims[1],
+                                              embed_dim=embed_dims[2])
+        self.patch_embed4 = OverlapPatchEmbed(img_size=img_size // 16, patch_size=3, stride=2, in_channels=embed_dims[2],
+                                              embed_dim=embed_dims[3])
+        # transformer encoder
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule
+        cur = 0
+        self.block1 = nn.ModuleList([Block(
+            dim=embed_dims[0], num_heads=num_heads[0], mlp_ratio=mlp_ratios[0], qkv_bias=qkv_bias, qk_scale=qk_scale,
+            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer,
+            sr_ratio=sr_ratios[0])
+            for i in range(depths[0])])
+        self.norm1 = norm_layer(embed_dims[0])
+        cur += depths[0]
+        self.block2 = nn.ModuleList([Block(
+            dim=embed_dims[1], num_heads=num_heads[1], mlp_ratio=mlp_ratios[1], qkv_bias=qkv_bias, qk_scale=qk_scale,
+            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer,
+            sr_ratio=sr_ratios[1])
+            for i in range(depths[1])])
+        self.norm2 = norm_layer(embed_dims[1])
+        cur += depths[1]
+        self.block3 = nn.ModuleList([Block(
+            dim=embed_dims[2], num_heads=num_heads[2], mlp_ratio=mlp_ratios[2], qkv_bias=qkv_bias, qk_scale=qk_scale,
+            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer,
+            sr_ratio=sr_ratios[2])
+            for i in range(depths[2])])
+        self.norm3 = norm_layer(embed_dims[2])
+        cur += depths[2]
+        self.block4 = nn.ModuleList([Block(
+            dim=embed_dims[3], num_heads=num_heads[3], mlp_ratio=mlp_ratios[3], qkv_bias=qkv_bias, qk_scale=qk_scale,
+            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer,
+            sr_ratio=sr_ratios[3])
+            for i in range(depths[3])])
+        self.norm4 = norm_layer(embed_dims[3])
+        # classification head
+        # self.head = nn.Linear(embed_dims[3], num_classes) if num_classes > 0 else nn.Identity()
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+    def init_weights(self, pretrained=None):
+        if isinstance(pretrained, str):
+            logger = 1
+            #load_checkpoint(self, pretrained, map_location='cpu', strict=False, logger=logger)
+    def reset_drop_path(self, drop_path_rate):
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(self.depths))]
+        cur = 0
+        for i in range(self.depths[0]):
+            self.block1[i].drop_path.drop_prob = dpr[cur + i]
+        cur += self.depths[0]
+        for i in range(self.depths[1]):
+            self.block2[i].drop_path.drop_prob = dpr[cur + i]
+        cur += self.depths[1]
+        for i in range(self.depths[2]):
+            self.block3[i].drop_path.drop_prob = dpr[cur + i]
+        cur += self.depths[2]
+        for i in range(self.depths[3]):
+            self.block4[i].drop_path.drop_prob = dpr[cur + i]
+    def freeze_patch_emb(self):
+        self.patch_embed1.requires_grad = False
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {'pos_embed1', 'pos_embed2', 'pos_embed3', 'pos_embed4', 'cls_token'}  # has pos_embed may be better
+    def get_classifier(self):
+        return self.head
+    def reset_classifier(self, num_classes, global_pool=''):
+        self.num_classes = num_classes
+        self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity()
+    def forward_features(self, x):
+        B = x.shape[0]
+        outs = []
+        # stage 1
+        x, H, W = self.patch_embed1(x)
+        for i, blk in enumerate(self.block1):
+            x = blk(x, H, W)
+        x = self.norm1(x)
+        x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        outs.append(x)
+        # stage 2
+        x, H, W = self.patch_embed2(x)
+        for i, blk in enumerate(self.block2):
+            x = blk(x, H, W)
+        x = self.norm2(x)
+        x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        outs.append(x)
+        # stage 3
+        x, H, W = self.patch_embed3(x)
+        for i, blk in enumerate(self.block3):
+            x = blk(x, H, W)
+        x = self.norm3(x)
+        x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        outs.append(x)
+        # stage 4
+        x, H, W = self.patch_embed4(x)
+        for i, blk in enumerate(self.block4):
+            x = blk(x, H, W)
+        x = self.norm4(x)
+        x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        outs.append(x)
+        return outs
+        # return x.mean(dim=1)
+    def forward(self, x):
+        x = self.forward_features(x)
+        # x = self.head(x)
+        return x
+class DWConv(nn.Module):
+    def __init__(self, dim=768):
+        super(DWConv, self).__init__()
+        self.dwconv = nn.Conv2d(dim, dim, 3, 1, 1, bias=True, groups=dim)
+    def forward(self, x, H, W):
+        B, N, C = x.shape
+        x = x.transpose(1, 2).view(B, C, H, W).contiguous()
+        x = self.dwconv(x)
+        x = x.flatten(2).transpose(1, 2)
+        return x
+def _conv_filter(state_dict, patch_size=16):
+    """ convert patch embedding weight from manual patchify + linear proj to conv"""
+    out_dict = {}
+    for k, v in state_dict.items():
+        if 'patch_embed.proj.weight' in k:
+            v = v.reshape((v.shape[0], 3, patch_size, patch_size))
+        out_dict[k] = v
+    return out_dict
+## @register_model
+class pvt_v2_b0(PyramidVisionTransformerImpr):
+    def __init__(self, **kwargs):
+        super(pvt_v2_b0, self).__init__(
+            patch_size=4, embed_dims=[32, 64, 160, 256], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4],
+            qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[2, 2, 2, 2], sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0, drop_path_rate=0.1)
+## @register_model
+class pvt_v2_b1(PyramidVisionTransformerImpr):
+    def __init__(self, **kwargs):
+        super(pvt_v2_b1, self).__init__(
+            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4],
+            qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[2, 2, 2, 2], sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0, drop_path_rate=0.1)
+## @register_model
+class pvt_v2_b2(PyramidVisionTransformerImpr):
+    def __init__(self, in_channels=3, **kwargs):
+        super(pvt_v2_b2, self).__init__(
+            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4],
+            qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0, drop_path_rate=0.1, in_channels=in_channels)
+## @register_model
+class pvt_v2_b3(PyramidVisionTransformerImpr):
+    def __init__(self, **kwargs):
+        super(pvt_v2_b3, self).__init__(
+            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4],
+            qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 4, 18, 3], sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0, drop_path_rate=0.1)
+## @register_model
+class pvt_v2_b4(PyramidVisionTransformerImpr):
+    def __init__(self, **kwargs):
+        super(pvt_v2_b4, self).__init__(
+            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4],
+            qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 8, 27, 3], sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0, drop_path_rate=0.1)
+## @register_model
+class pvt_v2_b5(PyramidVisionTransformerImpr):
+    def __init__(self, **kwargs):
+        super(pvt_v2_b5, self).__init__(
+            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
+            qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 6, 40, 3], sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0, drop_path_rate=0.1)
+### models/backbones/swin_v1.py
+# --------------------------------------------------------
+# Swin Transformer
+# Copyright (c) 2021 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ze Liu, Yutong Lin, Yixuan Wei
+# --------------------------------------------------------
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.checkpoint as checkpoint
+import numpy as np
+from timm.models.layers import DropPath, to_2tuple, trunc_normal_
+# from config import Config
+# config = Config()
+class Mlp(nn.Module):
+    """ Multilayer perceptron."""
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+def window_partition(x, window_size):
+    """
+    Args:
+        x: (B, H, W, C)
+        window_size (int): window size
+    Returns:
+        windows: (num_windows*B, window_size, window_size, C)
+    """
+    B, H, W, C = x.shape
+    x = x.view(B, H // window_size, window_size, W // window_size, window_size, C)
+    windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)
+    return windows
+def window_reverse(windows, window_size, H, W):
+    """
+    Args:
+        windows: (num_windows*B, window_size, window_size, C)
+        window_size (int): Window size
+        H (int): Height of image
+        W (int): Width of image
+    Returns:
+        x: (B, H, W, C)
+    """
+    B = int(windows.shape[0] / (H * W / window_size / window_size))
+    x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1)
+    x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)
+    return x
+class WindowAttention(nn.Module):
+    """ Window based multi-head self attention (W-MSA) module with relative position bias.
+    It supports both of shifted and non-shifted window.
+    Args:
+        dim (int): Number of input channels.
+        window_size (tuple[int]): The height and width of the window.
+        num_heads (int): Number of attention heads.
+        qkv_bias (bool, optional):  If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set
+        attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0
+        proj_drop (float, optional): Dropout ratio of output. Default: 0.0
+    """
+    def __init__(self, dim, window_size, num_heads, qkv_bias=True, qk_scale=None, attn_drop=0., proj_drop=0.):
+        super().__init__()
+        self.dim = dim
+        self.window_size = window_size  # Wh, Ww
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim ** -0.5
+        # define a parameter table of relative position bias
+        self.relative_position_bias_table = nn.Parameter(
+            torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads))  # 2*Wh-1 * 2*Ww-1, nH
+        # get pair-wise relative position index for each token inside the window
+        coords_h = torch.arange(self.window_size[0])
+        coords_w = torch.arange(self.window_size[1])
+        coords = torch.stack(torch.meshgrid([coords_h, coords_w], indexing='ij'))  # 2, Wh, Ww
+        coords_flatten = torch.flatten(coords, 1)  # 2, Wh*Ww
+        relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # 2, Wh*Ww, Wh*Ww
+        relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # Wh*Ww, Wh*Ww, 2
+        relative_coords[:, :, 0] += self.window_size[0] - 1  # shift to start from 0
+        relative_coords[:, :, 1] += self.window_size[1] - 1
+        relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
+        relative_position_index = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
+        self.register_buffer("relative_position_index", relative_position_index)
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop_prob = attn_drop
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+        trunc_normal_(self.relative_position_bias_table, std=.02)
+        self.softmax = nn.Softmax(dim=-1)
+    def forward(self, x, mask=None):
+        """ Forward function.
+        Args:
+            x: input features with shape of (num_windows*B, N, C)
+            mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None
+        """
+        B_, N, C = x.shape
+        qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        q, k, v = qkv[0], qkv[1], qkv[2]  # make torchscript happy (cannot use tensor as tuple)
+        q = q * self.scale
+        if config.SDPA_enabled:
+            x = torch.nn.functional.scaled_dot_product_attention(
+                q, k, v,
+                attn_mask=None, dropout_p=self.attn_drop_prob, is_causal=False
+            ).transpose(1, 2).reshape(B_, N, C)
+        else:
+            attn = (q @ k.transpose(-2, -1))
+            relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)].view(
+                self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1)  # Wh*Ww,Wh*Ww,nH
+            relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()  # nH, Wh*Ww, Wh*Ww
+            attn = attn + relative_position_bias.unsqueeze(0)
+            if mask is not None:
+                nW = mask.shape[0]
+                attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0)
+                attn = attn.view(-1, self.num_heads, N, N)
+                attn = self.softmax(attn)
+            else:
+                attn = self.softmax(attn)
+            attn = self.attn_drop(attn)
+            x = (attn @ v).transpose(1, 2).reshape(B_, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+class SwinTransformerBlock(nn.Module):
+    """ Swin Transformer Block.
+    Args:
+        dim (int): Number of input channels.
+        num_heads (int): Number of attention heads.
+        window_size (int): Window size.
+        shift_size (int): Shift size for SW-MSA.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float, optional): Stochastic depth rate. Default: 0.0
+        act_layer (nn.Module, optional): Activation layer. Default: nn.GELU
+        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
+    """
+    def __init__(self, dim, num_heads, window_size=7, shift_size=0,
+                 mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., drop_path=0.,
+                 act_layer=nn.GELU, norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.dim = dim
+        self.num_heads = num_heads
+        self.window_size = window_size
+        self.shift_size = shift_size
+        self.mlp_ratio = mlp_ratio
+        assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size"
+        self.norm1 = norm_layer(dim)
+        self.attn = WindowAttention(
+            dim, window_size=to_2tuple(self.window_size), num_heads=num_heads,
+            qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+        self.H = None
+        self.W = None
+    def forward(self, x, mask_matrix):
+        """ Forward function.
+        Args:
+            x: Input feature, tensor size (B, H*W, C).
+            H, W: Spatial resolution of the input feature.
+            mask_matrix: Attention mask for cyclic shift.
+        """
+        B, L, C = x.shape
+        H, W = self.H, self.W
+        assert L == H * W, "input feature has wrong size"
+        shortcut = x
+        x = self.norm1(x)
+        x = x.view(B, H, W, C)
+        # pad feature maps to multiples of window size
+        pad_l = pad_t = 0
+        pad_r = (self.window_size - W % self.window_size) % self.window_size
+        pad_b = (self.window_size - H % self.window_size) % self.window_size
+        x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b))
+        _, Hp, Wp, _ = x.shape
+        # cyclic shift
+        if self.shift_size > 0:
+            shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2))
+            attn_mask = mask_matrix
+        else:
+            shifted_x = x
+            attn_mask = None
+        # partition windows
+        x_windows = window_partition(shifted_x, self.window_size)  # nW*B, window_size, window_size, C
+        x_windows = x_windows.view(-1, self.window_size * self.window_size, C)  # nW*B, window_size*window_size, C
+        # W-MSA/SW-MSA
+        attn_windows = self.attn(x_windows, mask=attn_mask)  # nW*B, window_size*window_size, C
+        # merge windows
+        attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C)
+        shifted_x = window_reverse(attn_windows, self.window_size, Hp, Wp)  # B H' W' C
+        # reverse cyclic shift
+        if self.shift_size > 0:
+            x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2))
+        else:
+            x = shifted_x
+        if pad_r > 0 or pad_b > 0:
+            x = x[:, :H, :W, :].contiguous()
+        x = x.view(B, H * W, C)
+        # FFN
+        x = shortcut + self.drop_path(x)
+        x = x + self.drop_path(self.mlp(self.norm2(x)))
+        return x
+class PatchMerging(nn.Module):
+    """ Patch Merging Layer
+    Args:
+        dim (int): Number of input channels.
+        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
+    """
+    def __init__(self, dim, norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.dim = dim
+        self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False)
+        self.norm = norm_layer(4 * dim)
+    def forward(self, x, H, W):
+        """ Forward function.
+        Args:
+            x: Input feature, tensor size (B, H*W, C).
+            H, W: Spatial resolution of the input feature.
+        """
+        B, L, C = x.shape
+        assert L == H * W, "input feature has wrong size"
+        x = x.view(B, H, W, C)
+        # padding
+        pad_input = (H % 2 == 1) or (W % 2 == 1)
+        if pad_input:
+            x = F.pad(x, (0, 0, 0, W % 2, 0, H % 2))
+        x0 = x[:, 0::2, 0::2, :]  # B H/2 W/2 C
+        x1 = x[:, 1::2, 0::2, :]  # B H/2 W/2 C
+        x2 = x[:, 0::2, 1::2, :]  # B H/2 W/2 C
+        x3 = x[:, 1::2, 1::2, :]  # B H/2 W/2 C
+        x = torch.cat([x0, x1, x2, x3], -1)  # B H/2 W/2 4*C
+        x = x.view(B, -1, 4 * C)  # B H/2*W/2 4*C
+        x = self.norm(x)
+        x = self.reduction(x)
+        return x
+class BasicLayer(nn.Module):
+    """ A basic Swin Transformer layer for one stage.
+    Args:
+        dim (int): Number of feature channels
+        depth (int): Depths of this stage.
+        num_heads (int): Number of attention head.
+        window_size (int): Local window size. Default: 7.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
+        norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
+        downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
+    """
+    def __init__(self,
+                 dim,
+                 depth,
+                 num_heads,
+                 window_size=7,
+                 mlp_ratio=4.,
+                 qkv_bias=True,
+                 qk_scale=None,
+                 drop=0.,
+                 attn_drop=0.,
+                 drop_path=0.,
+                 norm_layer=nn.LayerNorm,
+                 downsample=None,
+                 use_checkpoint=False):
+        super().__init__()
+        self.window_size = window_size
+        self.shift_size = window_size // 2
+        self.depth = depth
+        self.use_checkpoint = use_checkpoint
+        # build blocks
+        self.blocks = nn.ModuleList([
+            SwinTransformerBlock(
+                dim=dim,
+                num_heads=num_heads,
+                window_size=window_size,
+                shift_size=0 if (i % 2 == 0) else window_size // 2,
+                mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                drop=drop,
+                attn_drop=attn_drop,
+                drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path,
+                norm_layer=norm_layer)
+            for i in range(depth)])
+        # patch merging layer
+        if downsample is not None:
+            self.downsample = downsample(dim=dim, norm_layer=norm_layer)
+        else:
+            self.downsample = None
+    def forward(self, x, H, W):
+        """ Forward function.
+        Args:
+            x: Input feature, tensor size (B, H*W, C).
+            H, W: Spatial resolution of the input feature.
+        """
+        # calculate attention mask for SW-MSA
+        Hp = int(np.ceil(H / self.window_size)) * self.window_size
+        Wp = int(np.ceil(W / self.window_size)) * self.window_size
+        img_mask = torch.zeros((1, Hp, Wp, 1), device=x.device)  # 1 Hp Wp 1
+        h_slices = (slice(0, -self.window_size),
+                    slice(-self.window_size, -self.shift_size),
+                    slice(-self.shift_size, None))
+        w_slices = (slice(0, -self.window_size),
+                    slice(-self.window_size, -self.shift_size),
+                    slice(-self.shift_size, None))
+        cnt = 0
+        for h in h_slices:
+            for w in w_slices:
+                img_mask[:, h, w, :] = cnt
+                cnt += 1
+        mask_windows = window_partition(img_mask, self.window_size)  # nW, window_size, window_size, 1
+        mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
+        attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
+        attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
+        for blk in self.blocks:
+            blk.H, blk.W = H, W
+            if self.use_checkpoint:
+                x = checkpoint.checkpoint(blk, x, attn_mask)
+            else:
+                x = blk(x, attn_mask)
+        if self.downsample is not None:
+            x_down = self.downsample(x, H, W)
+            Wh, Ww = (H + 1) // 2, (W + 1) // 2
+            return x, H, W, x_down, Wh, Ww
+        else:
+            return x, H, W, x, H, W
+class PatchEmbed(nn.Module):
+    """ Image to Patch Embedding
+    Args:
+        patch_size (int): Patch token size. Default: 4.
+        in_channels (int): Number of input image channels. Default: 3.
+        embed_dim (int): Number of linear projection output channels. Default: 96.
+        norm_layer (nn.Module, optional): Normalization layer. Default: None
+    """
+    def __init__(self, patch_size=4, in_channels=3, embed_dim=96, norm_layer=None):
+        super().__init__()
+        patch_size = to_2tuple(patch_size)
+        self.patch_size = patch_size
+        self.in_channels = in_channels
+        self.embed_dim = embed_dim
+        self.proj = nn.Conv2d(in_channels, embed_dim, kernel_size=patch_size, stride=patch_size)
+        if norm_layer is not None:
+            self.norm = norm_layer(embed_dim)
+        else:
+            self.norm = None
+    def forward(self, x):
+        """Forward function."""
+        # padding
+        _, _, H, W = x.size()
+        if W % self.patch_size[1] != 0:
+            x = F.pad(x, (0, self.patch_size[1] - W % self.patch_size[1]))
+        if H % self.patch_size[0] != 0:
+            x = F.pad(x, (0, 0, 0, self.patch_size[0] - H % self.patch_size[0]))
+        x = self.proj(x)  # B C Wh Ww
+        if self.norm is not None:
+            Wh, Ww = x.size(2), x.size(3)
+            x = x.flatten(2).transpose(1, 2)
+            x = self.norm(x)
+            x = x.transpose(1, 2).view(-1, self.embed_dim, Wh, Ww)
+        return x
+class SwinTransformer(nn.Module):
+    """ Swin Transformer backbone.
+        A PyTorch impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows`  -
+          https://arxiv.org/pdf/2103.14030
+    Args:
+        pretrain_img_size (int): Input image size for training the pretrained model,
+            used in absolute postion embedding. Default 224.
+        patch_size (int | tuple(int)): Patch size. Default: 4.
+        in_channels (int): Number of input image channels. Default: 3.
+        embed_dim (int): Number of linear projection output channels. Default: 96.
+        depths (tuple[int]): Depths of each Swin Transformer stage.
+        num_heads (tuple[int]): Number of attention head of each stage.
+        window_size (int): Window size. Default: 7.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4.
+        qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float): Override default qk scale of head_dim ** -0.5 if set.
+        drop_rate (float): Dropout rate.
+        attn_drop_rate (float): Attention dropout rate. Default: 0.
+        drop_path_rate (float): Stochastic depth rate. Default: 0.2.
+        norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm.
+        ape (bool): If True, add absolute position embedding to the patch embedding. Default: False.
+        patch_norm (bool): If True, add normalization after patch embedding. Default: True.
+        out_indices (Sequence[int]): Output from which stages.
+        frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
+            -1 means not freezing any parameters.
+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
+    """
+    def __init__(self,
+                 pretrain_img_size=224,
+                 patch_size=4,
+                 in_channels=3,
+                 embed_dim=96,
+                 depths=[2, 2, 6, 2],
+                 num_heads=[3, 6, 12, 24],
+                 window_size=7,
+                 mlp_ratio=4.,
+                 qkv_bias=True,
+                 qk_scale=None,
+                 drop_rate=0.,
+                 attn_drop_rate=0.,
+                 drop_path_rate=0.2,
+                 norm_layer=nn.LayerNorm,
+                 ape=False,
+                 patch_norm=True,
+                 out_indices=(0, 1, 2, 3),
+                 frozen_stages=-1,
+                 use_checkpoint=False):
+        super().__init__()
+        self.pretrain_img_size = pretrain_img_size
+        self.num_layers = len(depths)
+        self.embed_dim = embed_dim
+        self.ape = ape
+        self.patch_norm = patch_norm
+        self.out_indices = out_indices
+        self.frozen_stages = frozen_stages
+        # split image into non-overlapping patches
+        self.patch_embed = PatchEmbed(
+            patch_size=patch_size, in_channels=in_channels, embed_dim=embed_dim,
+            norm_layer=norm_layer if self.patch_norm else None)
+        # absolute position embedding
+        if self.ape:
+            pretrain_img_size = to_2tuple(pretrain_img_size)
+            patch_size = to_2tuple(patch_size)
+            patches_resolution = [pretrain_img_size[0] // patch_size[0], pretrain_img_size[1] // patch_size[1]]
+            self.absolute_pos_embed = nn.Parameter(torch.zeros(1, embed_dim, patches_resolution[0], patches_resolution[1]))
+            trunc_normal_(self.absolute_pos_embed, std=.02)
+        self.pos_drop = nn.Dropout(p=drop_rate)
+        # stochastic depth
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule
+        # build layers
+        self.layers = nn.ModuleList()
+        for i_layer in range(self.num_layers):
+            layer = BasicLayer(
+                dim=int(embed_dim * 2 ** i_layer),
+                depth=depths[i_layer],
+                num_heads=num_heads[i_layer],
+                window_size=window_size,
+                mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                drop=drop_rate,
+                attn_drop=attn_drop_rate,
+                drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],
+                norm_layer=norm_layer,
+                downsample=PatchMerging if (i_layer < self.num_layers - 1) else None,
+                use_checkpoint=use_checkpoint)
+            self.layers.append(layer)
+        num_features = [int(embed_dim * 2 ** i) for i in range(self.num_layers)]
+        self.num_features = num_features
+        # add a norm layer for each output
+        for i_layer in out_indices:
+            layer = norm_layer(num_features[i_layer])
+            layer_name = f'norm{i_layer}'
+            self.add_module(layer_name, layer)
+        self._freeze_stages()
+    def _freeze_stages(self):
+        if self.frozen_stages >= 0:
+            self.patch_embed.eval()
+            for param in self.patch_embed.parameters():
+                param.requires_grad = False
+        if self.frozen_stages >= 1 and self.ape:
+            self.absolute_pos_embed.requires_grad = False
+        if self.frozen_stages >= 2:
+            self.pos_drop.eval()
+            for i in range(0, self.frozen_stages - 1):
+                m = self.layers[i]
+                m.eval()
+                for param in m.parameters():
+                    param.requires_grad = False
+    def forward(self, x):
+        """Forward function."""
+        x = self.patch_embed(x)
+        Wh, Ww = x.size(2), x.size(3)
+        if self.ape:
+            # interpolate the position embedding to the corresponding size
+            absolute_pos_embed = F.interpolate(self.absolute_pos_embed, size=(Wh, Ww), mode='bicubic')
+            x = (x + absolute_pos_embed) # B Wh*Ww C
+        outs = []#x.contiguous()]
+        x = x.flatten(2).transpose(1, 2)
+        x = self.pos_drop(x)
+        for i in range(self.num_layers):
+            layer = self.layers[i]
+            x_out, H, W, x, Wh, Ww = layer(x, Wh, Ww)
+            if i in self.out_indices:
+                norm_layer = getattr(self, f'norm{i}')
+                x_out = norm_layer(x_out)
+                out = x_out.view(-1, H, W, self.num_features[i]).permute(0, 3, 1, 2).contiguous()
+                outs.append(out)
+        return tuple(outs)
+    def train(self, mode=True):
+        """Convert the model into training mode while keep layers freezed."""
+        super(SwinTransformer, self).train(mode)
+        self._freeze_stages()
+def swin_v1_t():
+    model = SwinTransformer(embed_dim=96, depths=[2, 2, 6, 2], num_heads=[3, 6, 12, 24], window_size=7)
+    return model
+def swin_v1_s():
+    model = SwinTransformer(embed_dim=96, depths=[2, 2, 18, 2], num_heads=[3, 6, 12, 24], window_size=7)
+    return model
+def swin_v1_b():
+    model = SwinTransformer(embed_dim=128, depths=[2, 2, 18, 2], num_heads=[4, 8, 16, 32], window_size=12)
+    return model
+def swin_v1_l():
+    model = SwinTransformer(embed_dim=192, depths=[2, 2, 18, 2], num_heads=[6, 12, 24, 48], window_size=12)
+    return model
+### models/modules/deform_conv.py
+import torch
+import torch.nn as nn
+from torchvision.ops import deform_conv2d
+class DeformableConv2d(nn.Module):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size=3,
+                 stride=1,
+                 padding=1,
+                 bias=False):
+        super(DeformableConv2d, self).__init__()
+        assert type(kernel_size) == tuple or type(kernel_size) == int
+        kernel_size = kernel_size if type(kernel_size) == tuple else (kernel_size, kernel_size)
+        self.stride = stride if type(stride) == tuple else (stride, stride)
+        self.padding = padding
+        self.offset_conv = nn.Conv2d(in_channels,
+                                     2 * kernel_size[0] * kernel_size[1],
+                                     kernel_size=kernel_size,
+                                     stride=stride,
+                                     padding=self.padding,
+                                     bias=True)
+        nn.init.constant_(self.offset_conv.weight, 0.)
+        nn.init.constant_(self.offset_conv.bias, 0.)
+        self.modulator_conv = nn.Conv2d(in_channels,
+                                     1 * kernel_size[0] * kernel_size[1],
+                                     kernel_size=kernel_size,
+                                     stride=stride,
+                                     padding=self.padding,
+                                     bias=True)
+        nn.init.constant_(self.modulator_conv.weight, 0.)
+        nn.init.constant_(self.modulator_conv.bias, 0.)
+        self.regular_conv = nn.Conv2d(in_channels,
+                                      out_channels=out_channels,
+                                      kernel_size=kernel_size,
+                                      stride=stride,
+                                      padding=self.padding,
+                                      bias=bias)
+    def forward(self, x):
+        #h, w = x.shape[2:]
+        #max_offset = max(h, w)/4.
+        offset = self.offset_conv(x)#.clamp(-max_offset, max_offset)
+        modulator = 2. * torch.sigmoid(self.modulator_conv(x))
+        x = deform_conv2d(
+            input=x,
+            offset=offset,
+            weight=self.regular_conv.weight,
+            bias=self.regular_conv.bias,
+            padding=self.padding,
+            mask=modulator,
+            stride=self.stride,
+        )
+        return x
+### utils.py
+import torch.nn as nn
+def build_act_layer(act_layer):
+    if act_layer == 'ReLU':
+        return nn.ReLU(inplace=True)
+    elif act_layer == 'SiLU':
+        return nn.SiLU(inplace=True)
+    elif act_layer == 'GELU':
+        return nn.GELU()
+    raise NotImplementedError(f'build_act_layer does not support {act_layer}')
+def build_norm_layer(dim,
+                     norm_layer,
+                     in_format='channels_last',
+                     out_format='channels_last',
+                     eps=1e-6):
+    layers = []
+    if norm_layer == 'BN':
+        if in_format == 'channels_last':
+            layers.append(to_channels_first())
+        layers.append(nn.BatchNorm2d(dim))
+        if out_format == 'channels_last':
+            layers.append(to_channels_last())
+    elif norm_layer == 'LN':
+        if in_format == 'channels_first':
+            layers.append(to_channels_last())
+        layers.append(nn.LayerNorm(dim, eps=eps))
+        if out_format == 'channels_first':
+            layers.append(to_channels_first())
+    else:
+        raise NotImplementedError(
+            f'build_norm_layer does not support {norm_layer}')
+    return nn.Sequential(*layers)
+class to_channels_first(nn.Module):
+    def __init__(self):
+        super().__init__()
+    def forward(self, x):
+        return x.permute(0, 3, 1, 2)
+class to_channels_last(nn.Module):
+    def __init__(self):
+        super().__init__()
+    def forward(self, x):
+        return x.permute(0, 2, 3, 1)
+### dataset.py
+_class_labels_TR_sorted = (
+    'Airplane, Ant, Antenna, Archery, Axe, BabyCarriage, Bag, BalanceBeam, Balcony, Balloon, Basket, BasketballHoop, Beatle, Bed, Bee, Bench, Bicycle, '
+    'BicycleFrame, BicycleStand, Boat, Bonsai, BoomLift, Bridge, BunkBed, Butterfly, Button, Cable, CableLift, Cage, Camcorder, Cannon, Canoe, Car, '
+    'CarParkDropArm, Carriage, Cart, Caterpillar, CeilingLamp, Centipede, Chair, Clip, Clock, Clothes, CoatHanger, Comb, ConcretePumpTruck, Crack, Crane, '
+    'Cup, DentalChair, Desk, DeskChair, Diagram, DishRack, DoorHandle, Dragonfish, Dragonfly, Drum, Earphone, Easel, ElectricIron, Excavator, Eyeglasses, '
+    'Fan, Fence, Fencing, FerrisWheel, FireExtinguisher, Fishing, Flag, FloorLamp, Forklift, GasStation, Gate, Gear, Goal, Golf, GymEquipment, Hammock, '
+    'Handcart, Handcraft, Handrail, HangGlider, Harp, Harvester, Headset, Helicopter, Helmet, Hook, HorizontalBar, Hydrovalve, IroningTable, Jewelry, Key, '
+    'KidsPlayground, Kitchenware, Kite, Knife, Ladder, LaundryRack, Lightning, Lobster, Locust, Machine, MachineGun, MagazineRack, Mantis, Medal, MemorialArchway, '
+    'Microphone, Missile, MobileHolder, Monitor, Mosquito, Motorcycle, MovingTrolley, Mower, MusicPlayer, MusicStand, ObservationTower, Octopus, OilWell, '
+    'OlympicLogo, OperatingTable, OutdoorFitnessEquipment, Parachute, Pavilion, Piano, Pipe, PlowHarrow, PoleVault, Punchbag, Rack, Racket, Rifle, Ring, Robot, '
+    'RockClimbing, Rope, Sailboat, Satellite, Scaffold, Scale, Scissor, Scooter, Sculpture, Seadragon, Seahorse, Seal, SewingMachine, Ship, Shoe, ShoppingCart, '
+    'ShoppingTrolley, Shower, Shrimp, Signboard, Skateboarding, Skeleton, Skiing, Spade, SpeedBoat, Spider, Spoon, Stair, Stand, Stationary, SteeringWheel, '
+    'Stethoscope, Stool, Stove, StreetLamp, SweetStand, Swing, Sword, TV, Table, TableChair, TableLamp, TableTennis, Tank, Tapeline, Teapot, Telescope, Tent, '
+    'TobaccoPipe, Toy, Tractor, TrafficLight, TrafficSign, Trampoline, TransmissionTower, Tree, Tricycle, TrimmerCover, Tripod, Trombone, Truck, Trumpet, Tuba, '
+    'UAV, Umbrella, UnevenBars, UtilityPole, VacuumCleaner, Violin, Wakesurfing, Watch, WaterTower, WateringPot, Well, WellLid, Wheel, Wheelchair, WindTurbine, Windmill, WineGlass, WireWhisk, Yacht'
+)
+class_labels_TR_sorted = _class_labels_TR_sorted.split(', ')
+### models/backbones/build_backbones.py
+import torch
+import torch.nn as nn
+from collections import OrderedDict
+from torchvision.models import vgg16, vgg16_bn, VGG16_Weights, VGG16_BN_Weights, resnet50, ResNet50_Weights
+# from models.pvt_v2 import pvt_v2_b0, pvt_v2_b1, pvt_v2_b2, pvt_v2_b5
+# from models.swin_v1 import swin_v1_t, swin_v1_s, swin_v1_b, swin_v1_l
+# from config import Config
+config = Config()
+def build_backbone(bb_name, pretrained=True, params_settings=''):
+    if bb_name == 'vgg16':
+        bb_net = list(vgg16(pretrained=VGG16_Weights.DEFAULT if pretrained else None).children())[0]
+        bb = nn.Sequential(OrderedDict({'conv1': bb_net[:4], 'conv2': bb_net[4:9], 'conv3': bb_net[9:16], 'conv4': bb_net[16:23]}))
+    elif bb_name == 'vgg16bn':
+        bb_net = list(vgg16_bn(pretrained=VGG16_BN_Weights.DEFAULT if pretrained else None).children())[0]
+        bb = nn.Sequential(OrderedDict({'conv1': bb_net[:6], 'conv2': bb_net[6:13], 'conv3': bb_net[13:23], 'conv4': bb_net[23:33]}))
+    elif bb_name == 'resnet50':
+        bb_net = list(resnet50(pretrained=ResNet50_Weights.DEFAULT if pretrained else None).children())
+        bb = nn.Sequential(OrderedDict({'conv1': nn.Sequential(*bb_net[0:3]), 'conv2': bb_net[4], 'conv3': bb_net[5], 'conv4': bb_net[6]}))
+    else:
+        bb = eval('{}({})'.format(bb_name, params_settings))
+        if pretrained:
+            bb = load_weights(bb, bb_name)
+    return bb
+def load_weights(model, model_name):
+    save_model = torch.load(config.weights[model_name], map_location='cpu')
+    model_dict = model.state_dict()
+    state_dict = {k: v if v.size() == model_dict[k].size() else model_dict[k] for k, v in save_model.items() if k in model_dict.keys()}
+    # to ignore the weights with mismatched size when I modify the backbone itself.
+    if not state_dict:
+        save_model_keys = list(save_model.keys())
+        sub_item = save_model_keys[0] if len(save_model_keys) == 1 else None
+        state_dict = {k: v if v.size() == model_dict[k].size() else model_dict[k] for k, v in save_model[sub_item].items() if k in model_dict.keys()}
+        if not state_dict or not sub_item:
+            print('Weights are not successully loaded. Check the state dict of weights file.')
+            return None
+        else:
+            print('Found correct weights in the "{}" item of loaded state_dict.'.format(sub_item))
+    model_dict.update(state_dict)
+    model.load_state_dict(model_dict)
+    return model
+### models/modules/decoder_blocks.py
+import torch
+import torch.nn as nn
+# from models.aspp import ASPP, ASPPDeformable
+# from config import Config
+# config = Config()
+class BasicDecBlk(nn.Module):
+    def __init__(self, in_channels=64, out_channels=64, inter_channels=64):
+        super(BasicDecBlk, self).__init__()
+        inter_channels = in_channels // 4 if config.dec_channels_inter == 'adap' else 64
+        self.conv_in = nn.Conv2d(in_channels, inter_channels, 3, 1, padding=1)
+        self.relu_in = nn.ReLU(inplace=True)
+        if config.dec_att == 'ASPP':
+            self.dec_att = ASPP(in_channels=inter_channels)
+        elif config.dec_att == 'ASPPDeformable':
+            self.dec_att = ASPPDeformable(in_channels=inter_channels)
+        self.conv_out = nn.Conv2d(inter_channels, out_channels, 3, 1, padding=1)
+        self.bn_in = nn.BatchNorm2d(inter_channels) if config.batch_size > 1 else nn.Identity()
+        self.bn_out = nn.BatchNorm2d(out_channels) if config.batch_size > 1 else nn.Identity()
+    def forward(self, x):
+        x = self.conv_in(x)
+        x = self.bn_in(x)
+        x = self.relu_in(x)
+        if hasattr(self, 'dec_att'):
+            x = self.dec_att(x)
+        x = self.conv_out(x)
+        x = self.bn_out(x)
+        return x
+class ResBlk(nn.Module):
+    def __init__(self, in_channels=64, out_channels=None, inter_channels=64):
+        super(ResBlk, self).__init__()
+        if out_channels is None:
+            out_channels = in_channels
+        inter_channels = in_channels // 4 if config.dec_channels_inter == 'adap' else 64
+        self.conv_in = nn.Conv2d(in_channels, inter_channels, 3, 1, padding=1)
+        self.bn_in = nn.BatchNorm2d(inter_channels) if config.batch_size > 1 else nn.Identity()
+        self.relu_in = nn.ReLU(inplace=True)
+        if config.dec_att == 'ASPP':
+            self.dec_att = ASPP(in_channels=inter_channels)
+        elif config.dec_att == 'ASPPDeformable':
+            self.dec_att = ASPPDeformable(in_channels=inter_channels)
+        self.conv_out = nn.Conv2d(inter_channels, out_channels, 3, 1, padding=1)
+        self.bn_out = nn.BatchNorm2d(out_channels) if config.batch_size > 1 else nn.Identity()
+        self.conv_resi = nn.Conv2d(in_channels, out_channels, 1, 1, 0)
+    def forward(self, x):
+        _x = self.conv_resi(x)
+        x = self.conv_in(x)
+        x = self.bn_in(x)
+        x = self.relu_in(x)
+        if hasattr(self, 'dec_att'):
+            x = self.dec_att(x)
+        x = self.conv_out(x)
+        x = self.bn_out(x)
+        return x + _x
+### models/modules/lateral_blocks.py
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from functools import partial
+# from config import Config
+# config = Config()
+class BasicLatBlk(nn.Module):
+    def __init__(self, in_channels=64, out_channels=64, inter_channels=64):
+        super(BasicLatBlk, self).__init__()
+        inter_channels = in_channels // 4 if config.dec_channels_inter == 'adap' else 64
+        self.conv = nn.Conv2d(in_channels, out_channels, 1, 1, 0)
+    def forward(self, x):
+        x = self.conv(x)
+        return x
+### models/modules/aspp.py
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+# from models.deform_conv import DeformableConv2d
+# from config import Config
+# config = Config()
+class _ASPPModule(nn.Module):
+    def __init__(self, in_channels, planes, kernel_size, padding, dilation):
+        super(_ASPPModule, self).__init__()
+        self.atrous_conv = nn.Conv2d(in_channels, planes, kernel_size=kernel_size,
+                                            stride=1, padding=padding, dilation=dilation, bias=False)
+        self.bn = nn.BatchNorm2d(planes) if config.batch_size > 1 else nn.Identity()
+        self.relu = nn.ReLU(inplace=True)
+    def forward(self, x):
+        x = self.atrous_conv(x)
+        x = self.bn(x)
+        return self.relu(x)
+class ASPP(nn.Module):
+    def __init__(self, in_channels=64, out_channels=None, output_stride=16):
+        super(ASPP, self).__init__()
+        self.down_scale = 1
+        if out_channels is None:
+            out_channels = in_channels
+        self.in_channelster = 256 // self.down_scale
+        if output_stride == 16:
+            dilations = [1, 6, 12, 18]
+        elif output_stride == 8:
+            dilations = [1, 12, 24, 36]
+        else:
+            raise NotImplementedError
+        self.aspp1 = _ASPPModule(in_channels, self.in_channelster, 1, padding=0, dilation=dilations[0])
+        self.aspp2 = _ASPPModule(in_channels, self.in_channelster, 3, padding=dilations[1], dilation=dilations[1])
+        self.aspp3 = _ASPPModule(in_channels, self.in_channelster, 3, padding=dilations[2], dilation=dilations[2])
+        self.aspp4 = _ASPPModule(in_channels, self.in_channelster, 3, padding=dilations[3], dilation=dilations[3])
+        self.global_avg_pool = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)),
+                                             nn.Conv2d(in_channels, self.in_channelster, 1, stride=1, bias=False),
+                                             nn.BatchNorm2d(self.in_channelster) if config.batch_size > 1 else nn.Identity(),
+                                             nn.ReLU(inplace=True))
+        self.conv1 = nn.Conv2d(self.in_channelster * 5, out_channels, 1, bias=False)
+        self.bn1 = nn.BatchNorm2d(out_channels) if config.batch_size > 1 else nn.Identity()
+        self.relu = nn.ReLU(inplace=True)
+        self.dropout = nn.Dropout(0.5)
+    def forward(self, x):
+        x1 = self.aspp1(x)
+        x2 = self.aspp2(x)
+        x3 = self.aspp3(x)
+        x4 = self.aspp4(x)
+        x5 = self.global_avg_pool(x)
+        x5 = F.interpolate(x5, size=x1.size()[2:], mode='bilinear', align_corners=True)
+        x = torch.cat((x1, x2, x3, x4, x5), dim=1)
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        return self.dropout(x)
+##################### Deformable
+class _ASPPModuleDeformable(nn.Module):
+    def __init__(self, in_channels, planes, kernel_size, padding):
+        super(_ASPPModuleDeformable, self).__init__()
+        self.atrous_conv = DeformableConv2d(in_channels, planes, kernel_size=kernel_size,
+                                            stride=1, padding=padding, bias=False)
+        self.bn = nn.BatchNorm2d(planes) if config.batch_size > 1 else nn.Identity()
+        self.relu = nn.ReLU(inplace=True)
+    def forward(self, x):
+        x = self.atrous_conv(x)
+        x = self.bn(x)
+        return self.relu(x)
+class ASPPDeformable(nn.Module):
+    def __init__(self, in_channels, out_channels=None, parallel_block_sizes=[1, 3, 7]):
+        super(ASPPDeformable, self).__init__()
+        self.down_scale = 1
+        if out_channels is None:
+            out_channels = in_channels
+        self.in_channelster = 256 // self.down_scale
+        self.aspp1 = _ASPPModuleDeformable(in_channels, self.in_channelster, 1, padding=0)
+        self.aspp_deforms = nn.ModuleList([
+            _ASPPModuleDeformable(in_channels, self.in_channelster, conv_size, padding=int(conv_size//2)) for conv_size in parallel_block_sizes
+        ])
+        self.global_avg_pool = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)),
+                                             nn.Conv2d(in_channels, self.in_channelster, 1, stride=1, bias=False),
+                                             nn.BatchNorm2d(self.in_channelster) if config.batch_size > 1 else nn.Identity(),
+                                             nn.ReLU(inplace=True))
+        self.conv1 = nn.Conv2d(self.in_channelster * (2 + len(self.aspp_deforms)), out_channels, 1, bias=False)
+        self.bn1 = nn.BatchNorm2d(out_channels) if config.batch_size > 1 else nn.Identity()
+        self.relu = nn.ReLU(inplace=True)
+        self.dropout = nn.Dropout(0.5)
+    def forward(self, x):
+        x1 = self.aspp1(x)
+        x_aspp_deforms = [aspp_deform(x) for aspp_deform in self.aspp_deforms]
+        x5 = self.global_avg_pool(x)
+        x5 = F.interpolate(x5, size=x1.size()[2:], mode='bilinear', align_corners=True)
+        x = torch.cat((x1, *x_aspp_deforms, x5), dim=1)
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        return self.dropout(x)
+### models/refinement/refiner.py
+import torch
+import torch.nn as nn
+from collections import OrderedDict
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision.models import vgg16, vgg16_bn
+from torchvision.models import resnet50
+# from config import Config
+# from dataset import class_labels_TR_sorted
+# from models.build_backbone import build_backbone
+# from models.decoder_blocks import BasicDecBlk
+# from models.lateral_blocks import BasicLatBlk
+# from models.ing import *
+# from models.stem_layer import StemLayer
+class RefinerPVTInChannels4(nn.Module):
+    def __init__(self, in_channels=3+1):
+        super(RefinerPVTInChannels4, self).__init__()
+        self.config = Config()
+        self.epoch = 1
+        self.bb = build_backbone(self.config.bb, params_settings='in_channels=4')
+        lateral_channels_in_collection = {
+            'vgg16': [512, 256, 128, 64], 'vgg16bn': [512, 256, 128, 64], 'resnet50': [1024, 512, 256, 64],
+            'pvt_v2_b2': [512, 320, 128, 64], 'pvt_v2_b5': [512, 320, 128, 64],
+            'swin_v1_b': [1024, 512, 256, 128], 'swin_v1_l': [1536, 768, 384, 192],
+        }
+        channels = lateral_channels_in_collection[self.config.bb]
+        self.squeeze_module = BasicDecBlk(channels[0], channels[0])
+        self.decoder = Decoder(channels)
+        if 0:
+            for key, value in self.named_parameters():
+                if 'bb.' in key:
+                    value.requires_grad = False
+    def forward(self, x):
+        if isinstance(x, list):
+            x = torch.cat(x, dim=1)
+        ########## Encoder ##########
+        if self.config.bb in ['vgg16', 'vgg16bn', 'resnet50']:
+            x1 = self.bb.conv1(x)
+            x2 = self.bb.conv2(x1)
+            x3 = self.bb.conv3(x2)
+            x4 = self.bb.conv4(x3)
+        else:
+            x1, x2, x3, x4 = self.bb(x)
+        x4 = self.squeeze_module(x4)
+        ########## Decoder ##########
+        features = [x, x1, x2, x3, x4]
+        scaled_preds = self.decoder(features)
+        return scaled_preds
+class Refiner(nn.Module):
+    def __init__(self, in_channels=3+1):
+        super(Refiner, self).__init__()
+        self.config = Config()
+        self.epoch = 1
+        self.stem_layer = StemLayer(in_channels=in_channels, inter_channels=48, out_channels=3, norm_layer='BN' if self.config.batch_size > 1 else 'LN')
+        self.bb = build_backbone(self.config.bb)
+        lateral_channels_in_collection = {
+            'vgg16': [512, 256, 128, 64], 'vgg16bn': [512, 256, 128, 64], 'resnet50': [1024, 512, 256, 64],
+            'pvt_v2_b2': [512, 320, 128, 64], 'pvt_v2_b5': [512, 320, 128, 64],
+            'swin_v1_b': [1024, 512, 256, 128], 'swin_v1_l': [1536, 768, 384, 192],
+        }
+        channels = lateral_channels_in_collection[self.config.bb]
+        self.squeeze_module = BasicDecBlk(channels[0], channels[0])
+        self.decoder = Decoder(channels)
+        if 0:
+            for key, value in self.named_parameters():
+                if 'bb.' in key:
+                    value.requires_grad = False
+    def forward(self, x):
+        if isinstance(x, list):
+            x = torch.cat(x, dim=1)
+        x = self.stem_layer(x)
+        ########## Encoder ##########
+        if self.config.bb in ['vgg16', 'vgg16bn', 'resnet50']:
+            x1 = self.bb.conv1(x)
+            x2 = self.bb.conv2(x1)
+            x3 = self.bb.conv3(x2)
+            x4 = self.bb.conv4(x3)
+        else:
+            x1, x2, x3, x4 = self.bb(x)
+        x4 = self.squeeze_module(x4)
+        ########## Decoder ##########
+        features = [x, x1, x2, x3, x4]
+        scaled_preds = self.decoder(features)
+        return scaled_preds
+class Decoder(nn.Module):
+    def __init__(self, channels):
+        super(Decoder, self).__init__()
+        self.config = Config()
+        DecoderBlock = eval('BasicDecBlk')
+        LateralBlock = eval('BasicLatBlk')
+        self.decoder_block4 = DecoderBlock(channels[0], channels[1])
+        self.decoder_block3 = DecoderBlock(channels[1], channels[2])
+        self.decoder_block2 = DecoderBlock(channels[2], channels[3])
+        self.decoder_block1 = DecoderBlock(channels[3], channels[3]//2)
+        self.lateral_block4 = LateralBlock(channels[1], channels[1])
+        self.lateral_block3 = LateralBlock(channels[2], channels[2])
+        self.lateral_block2 = LateralBlock(channels[3], channels[3])
+        if self.config.ms_supervision:
+            self.conv_ms_spvn_4 = nn.Conv2d(channels[1], 1, 1, 1, 0)
+            self.conv_ms_spvn_3 = nn.Conv2d(channels[2], 1, 1, 1, 0)
+            self.conv_ms_spvn_2 = nn.Conv2d(channels[3], 1, 1, 1, 0)
+        self.conv_out1 = nn.Sequential(nn.Conv2d(channels[3]//2, 1, 1, 1, 0))
+    def forward(self, features):
+        x, x1, x2, x3, x4 = features
+        outs = []
+        p4 = self.decoder_block4(x4)
+        _p4 = F.interpolate(p4, size=x3.shape[2:], mode='bilinear', align_corners=True)
+        _p3 = _p4 + self.lateral_block4(x3)
+        p3 = self.decoder_block3(_p3)
+        _p3 = F.interpolate(p3, size=x2.shape[2:], mode='bilinear', align_corners=True)
+        _p2 = _p3 + self.lateral_block3(x2)
+        p2 = self.decoder_block2(_p2)
+        _p2 = F.interpolate(p2, size=x1.shape[2:], mode='bilinear', align_corners=True)
+        _p1 = _p2 + self.lateral_block2(x1)
+        _p1 = self.decoder_block1(_p1)
+        _p1 = F.interpolate(_p1, size=x.shape[2:], mode='bilinear', align_corners=True)
+        p1_out = self.conv_out1(_p1)
+        if self.config.ms_supervision:
+            outs.append(self.conv_ms_spvn_4(p4))
+            outs.append(self.conv_ms_spvn_3(p3))
+            outs.append(self.conv_ms_spvn_2(p2))
+        outs.append(p1_out)
+        return outs
+class RefUNet(nn.Module):
+    # Refinement
+    def __init__(self, in_channels=3+1):
+        super(RefUNet, self).__init__()
+        self.encoder_1 = nn.Sequential(
+            nn.Conv2d(in_channels, 64, 3, 1, 1),
+            nn.Conv2d(64, 64, 3, 1, 1),
+            nn.BatchNorm2d(64),
+            nn.ReLU(inplace=True)
+        )
+        self.encoder_2 = nn.Sequential(
+            nn.MaxPool2d(2, 2, ceil_mode=True),
+            nn.Conv2d(64, 64, 3, 1, 1),
+            nn.BatchNorm2d(64),
+            nn.ReLU(inplace=True)
+        )
+        self.encoder_3 = nn.Sequential(
+            nn.MaxPool2d(2, 2, ceil_mode=True),
+            nn.Conv2d(64, 64, 3, 1, 1),
+            nn.BatchNorm2d(64),
+            nn.ReLU(inplace=True)
+        )
+        self.encoder_4 = nn.Sequential(
+            nn.MaxPool2d(2, 2, ceil_mode=True),
+            nn.Conv2d(64, 64, 3, 1, 1),
+            nn.BatchNorm2d(64),
+            nn.ReLU(inplace=True)
+        )
+        self.pool4 = nn.MaxPool2d(2, 2, ceil_mode=True)
+        #####
+        self.decoder_5 = nn.Sequential(
+            nn.Conv2d(64, 64, 3, 1, 1),
+            nn.BatchNorm2d(64),
+            nn.ReLU(inplace=True)
+        )
+        #####
+        self.decoder_4 = nn.Sequential(
+            nn.Conv2d(128, 64, 3, 1, 1),
+            nn.BatchNorm2d(64),
+            nn.ReLU(inplace=True)
+        )
+        self.decoder_3 = nn.Sequential(
+            nn.Conv2d(128, 64, 3, 1, 1),
+            nn.BatchNorm2d(64),
+            nn.ReLU(inplace=True)
+        )
+        self.decoder_2 = nn.Sequential(
+            nn.Conv2d(128, 64, 3, 1, 1),
+            nn.BatchNorm2d(64),
+            nn.ReLU(inplace=True)
+        )
+        self.decoder_1 = nn.Sequential(
+            nn.Conv2d(128, 64, 3, 1, 1),
+            nn.BatchNorm2d(64),
+            nn.ReLU(inplace=True)
+        )
+        self.conv_d0 = nn.Conv2d(64, 1, 3, 1, 1)
+        self.upscore2 = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
+    def forward(self, x):
+        outs = []
+        if isinstance(x, list):
+            x = torch.cat(x, dim=1)
+        hx = x
+        hx1 = self.encoder_1(hx)
+        hx2 = self.encoder_2(hx1)
+        hx3 = self.encoder_3(hx2)
+        hx4 = self.encoder_4(hx3)
+        hx = self.decoder_5(self.pool4(hx4))
+        hx = torch.cat((self.upscore2(hx), hx4), 1)
+        d4 = self.decoder_4(hx)
+        hx = torch.cat((self.upscore2(d4), hx3), 1)
+        d3 = self.decoder_3(hx)
+        hx = torch.cat((self.upscore2(d3), hx2), 1)
+        d2 = self.decoder_2(hx)
+        hx = torch.cat((self.upscore2(d2), hx1), 1)
+        d1 = self.decoder_1(hx)
+        x = self.conv_d0(d1)
+        outs.append(x)
+        return outs
+### models/stem_layer.py
+import torch.nn as nn
+# from utils import build_act_layer, build_norm_layer
+class StemLayer(nn.Module):
+    r""" Stem layer of InternImage
+    Args:
+        in_channels (int): number of input channels
+        out_channels (int): number of output channels
+        act_layer (str): activation layer
+        norm_layer (str): normalization layer
+    """
+    def __init__(self,
+                 in_channels=3+1,
+                 inter_channels=48,
+                 out_channels=96,
+                 act_layer='GELU',
+                 norm_layer='BN'):
+        super().__init__()
+        self.conv1 = nn.Conv2d(in_channels,
+                               inter_channels,
+                               kernel_size=3,
+                               stride=1,
+                               padding=1)
+        self.norm1 = build_norm_layer(
+            inter_channels, norm_layer, 'channels_first', 'channels_first'
+        )
+        self.act = build_act_layer(act_layer)
+        self.conv2 = nn.Conv2d(inter_channels,
+                               out_channels,
+                               kernel_size=3,
+                               stride=1,
+                               padding=1)
+        self.norm2 = build_norm_layer(
+            out_channels, norm_layer, 'channels_first', 'channels_first'
+        )
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.norm1(x)
+        x = self.act(x)
+        x = self.conv2(x)
+        x = self.norm2(x)
+        return x
+### models/birefnet.py
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from kornia.filters import laplacian
+from transformers import PreTrainedModel
+# from config import Config
+# from dataset import class_labels_TR_sorted
+# from models.build_backbone import build_backbone
+# from models.decoder_blocks import BasicDecBlk, ResBlk, HierarAttDecBlk
+# from models.lateral_blocks import BasicLatBlk
+# from models.aspp import ASPP, ASPPDeformable
+# from models.ing import *
+# from models.refiner import Refiner, RefinerPVTInChannels4, RefUNet
+# from models.stem_layer import StemLayer
+from .BiRefNet_config import BiRefNetConfig
+class BiRefNet(
+    PreTrainedModel
+):
+    config_class = BiRefNetConfig
+    def __init__(self, bb_pretrained=True, config=BiRefNetConfig()):
+        super(BiRefNet, self).__init__(config)
+        bb_pretrained = config.bb_pretrained
+        self.config = Config()
+        self.epoch = 1
+        self.bb = build_backbone(self.config.bb, pretrained=bb_pretrained)
+        channels = self.config.lateral_channels_in_collection
+        if self.config.auxiliary_classification:
+            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+            self.cls_head = nn.Sequential(
+                nn.Linear(channels[0], len(class_labels_TR_sorted))
+            )
+        if self.config.squeeze_block:
+            self.squeeze_module = nn.Sequential(*[
+                eval(self.config.squeeze_block.split('_x')[0])(channels[0]+sum(self.config.cxt), channels[0])
+                for _ in range(eval(self.config.squeeze_block.split('_x')[1]))
+            ])
+        self.decoder = Decoder(channels)
+        if self.config.ender:
+            self.dec_end = nn.Sequential(
+                nn.Conv2d(1, 16, 3, 1, 1),
+                nn.Conv2d(16, 1, 3, 1, 1),
+                nn.ReLU(inplace=True),
+            )
+        # refine patch-level segmentation
+        if self.config.refine:
+            if self.config.refine == 'itself':
+                self.stem_layer = StemLayer(in_channels=3+1, inter_channels=48, out_channels=3, norm_layer='BN' if self.config.batch_size > 1 else 'LN')
+            else:
+                self.refiner = eval('{}({})'.format(self.config.refine, 'in_channels=3+1'))
+        if self.config.freeze_bb:
+            # Freeze the backbone...
+            print(self.named_parameters())
+            for key, value in self.named_parameters():
+                if 'bb.' in key and 'refiner.' not in key:
+                    value.requires_grad = False
+    def forward_enc(self, x):
+        if self.config.bb in ['vgg16', 'vgg16bn', 'resnet50']:
+            x1 = self.bb.conv1(x); x2 = self.bb.conv2(x1); x3 = self.bb.conv3(x2); x4 = self.bb.conv4(x3)
+        else:
+            x1, x2, x3, x4 = self.bb(x)
+            if self.config.mul_scl_ipt == 'cat':
+                B, C, H, W = x.shape
+                x1_, x2_, x3_, x4_ = self.bb(F.interpolate(x, size=(H//2, W//2), mode='bilinear', align_corners=True))
+                x1 = torch.cat([x1, F.interpolate(x1_, size=x1.shape[2:], mode='bilinear', align_corners=True)], dim=1)
+                x2 = torch.cat([x2, F.interpolate(x2_, size=x2.shape[2:], mode='bilinear', align_corners=True)], dim=1)
+                x3 = torch.cat([x3, F.interpolate(x3_, size=x3.shape[2:], mode='bilinear', align_corners=True)], dim=1)
+                x4 = torch.cat([x4, F.interpolate(x4_, size=x4.shape[2:], mode='bilinear', align_corners=True)], dim=1)
+            elif self.config.mul_scl_ipt == 'add':
+                B, C, H, W = x.shape
+                x1_, x2_, x3_, x4_ = self.bb(F.interpolate(x, size=(H//2, W//2), mode='bilinear', align_corners=True))
+                x1 = x1 + F.interpolate(x1_, size=x1.shape[2:], mode='bilinear', align_corners=True)
+                x2 = x2 + F.interpolate(x2_, size=x2.shape[2:], mode='bilinear', align_corners=True)
+                x3 = x3 + F.interpolate(x3_, size=x3.shape[2:], mode='bilinear', align_corners=True)
+                x4 = x4 + F.interpolate(x4_, size=x4.shape[2:], mode='bilinear', align_corners=True)
+        class_preds = self.cls_head(self.avgpool(x4).view(x4.shape[0], -1)) if self.training and self.config.auxiliary_classification else None
+        if self.config.cxt:
+            x4 = torch.cat(
+                (
+                    *[
+                        F.interpolate(x1, size=x4.shape[2:], mode='bilinear', align_corners=True),
+                        F.interpolate(x2, size=x4.shape[2:], mode='bilinear', align_corners=True),
+                        F.interpolate(x3, size=x4.shape[2:], mode='bilinear', align_corners=True),
+                    ][-len(self.config.cxt):],
+                    x4
+                ),
+                dim=1
+            )
+        return (x1, x2, x3, x4), class_preds
+    def forward_ori(self, x):
+        ########## Encoder ##########
+        (x1, x2, x3, x4), class_preds = self.forward_enc(x)
+        if self.config.squeeze_block:
+            x4 = self.squeeze_module(x4)
+        ########## Decoder ##########
+        features = [x, x1, x2, x3, x4]
+        if self.training and self.config.out_ref:
+            features.append(laplacian(torch.mean(x, dim=1).unsqueeze(1), kernel_size=5))
+        scaled_preds = self.decoder(features)
+        return scaled_preds, class_preds
+    def forward(self, x):
+        scaled_preds, class_preds = self.forward_ori(x)
+        class_preds_lst = [class_preds]
+        return [scaled_preds, class_preds_lst] if self.training else scaled_preds
+class Decoder(nn.Module):
+    def __init__(self, channels):
+        super(Decoder, self).__init__()
+        self.config = Config()
+        DecoderBlock = eval(self.config.dec_blk)
+        LateralBlock = eval(self.config.lat_blk)
+        if self.config.dec_ipt:
+            self.split = self.config.dec_ipt_split
+            N_dec_ipt = 64
+            DBlock = SimpleConvs
+            ic = 64
+            ipt_cha_opt = 1
+            self.ipt_blk5 = DBlock(2**10*3 if self.split else 3, [N_dec_ipt, channels[0]//8][ipt_cha_opt], inter_channels=ic)
+            self.ipt_blk4 = DBlock(2**8*3 if self.split else 3, [N_dec_ipt, channels[0]//8][ipt_cha_opt], inter_channels=ic)
+            self.ipt_blk3 = DBlock(2**6*3 if self.split else 3, [N_dec_ipt, channels[1]//8][ipt_cha_opt], inter_channels=ic)
+            self.ipt_blk2 = DBlock(2**4*3 if self.split else 3, [N_dec_ipt, channels[2]//8][ipt_cha_opt], inter_channels=ic)
+            self.ipt_blk1 = DBlock(2**0*3 if self.split else 3, [N_dec_ipt, channels[3]//8][ipt_cha_opt], inter_channels=ic)
+        else:
+            self.split = None
+        self.decoder_block4 = DecoderBlock(channels[0]+([N_dec_ipt, channels[0]//8][ipt_cha_opt] if self.config.dec_ipt else 0), channels[1])
+        self.decoder_block3 = DecoderBlock(channels[1]+([N_dec_ipt, channels[0]//8][ipt_cha_opt] if self.config.dec_ipt else 0), channels[2])
+        self.decoder_block2 = DecoderBlock(channels[2]+([N_dec_ipt, channels[1]//8][ipt_cha_opt] if self.config.dec_ipt else 0), channels[3])
+        self.decoder_block1 = DecoderBlock(channels[3]+([N_dec_ipt, channels[2]//8][ipt_cha_opt] if self.config.dec_ipt else 0), channels[3]//2)
+        self.conv_out1 = nn.Sequential(nn.Conv2d(channels[3]//2+([N_dec_ipt, channels[3]//8][ipt_cha_opt] if self.config.dec_ipt else 0), 1, 1, 1, 0))
+        self.lateral_block4 = LateralBlock(channels[1], channels[1])
+        self.lateral_block3 = LateralBlock(channels[2], channels[2])
+        self.lateral_block2 = LateralBlock(channels[3], channels[3])
+        if self.config.ms_supervision:
+            self.conv_ms_spvn_4 = nn.Conv2d(channels[1], 1, 1, 1, 0)
+            self.conv_ms_spvn_3 = nn.Conv2d(channels[2], 1, 1, 1, 0)
+            self.conv_ms_spvn_2 = nn.Conv2d(channels[3], 1, 1, 1, 0)
+            if self.config.out_ref:
+                _N = 16
+                self.gdt_convs_4 = nn.Sequential(nn.Conv2d(channels[1], _N, 3, 1, 1), nn.BatchNorm2d(_N) if self.config.batch_size > 1 else nn.Identity(), nn.ReLU(inplace=True))
+                self.gdt_convs_3 = nn.Sequential(nn.Conv2d(channels[2], _N, 3, 1, 1), nn.BatchNorm2d(_N) if self.config.batch_size > 1 else nn.Identity(), nn.ReLU(inplace=True))
+                self.gdt_convs_2 = nn.Sequential(nn.Conv2d(channels[3], _N, 3, 1, 1), nn.BatchNorm2d(_N) if self.config.batch_size > 1 else nn.Identity(), nn.ReLU(inplace=True))
+                self.gdt_convs_pred_4 = nn.Sequential(nn.Conv2d(_N, 1, 1, 1, 0))
+                self.gdt_convs_pred_3 = nn.Sequential(nn.Conv2d(_N, 1, 1, 1, 0))
+                self.gdt_convs_pred_2 = nn.Sequential(nn.Conv2d(_N, 1, 1, 1, 0))
+                self.gdt_convs_attn_4 = nn.Sequential(nn.Conv2d(_N, 1, 1, 1, 0))
+                self.gdt_convs_attn_3 = nn.Sequential(nn.Conv2d(_N, 1, 1, 1, 0))
+                self.gdt_convs_attn_2 = nn.Sequential(nn.Conv2d(_N, 1, 1, 1, 0))
+    def get_patches_batch(self, x, p):
+        _size_h, _size_w = p.shape[2:]
+        patches_batch = []
+        for idx in range(x.shape[0]):
+            columns_x = torch.split(x[idx], split_size_or_sections=_size_w, dim=-1)
+            patches_x = []
+            for column_x in columns_x:
+                patches_x += [p.unsqueeze(0) for p in torch.split(column_x, split_size_or_sections=_size_h, dim=-2)]
+            patch_sample = torch.cat(patches_x, dim=1)
+            patches_batch.append(patch_sample)
+        return torch.cat(patches_batch, dim=0)
+    def forward(self, features):
+        if self.training and self.config.out_ref:
+            outs_gdt_pred = []
+            outs_gdt_label = []
+            x, x1, x2, x3, x4, gdt_gt = features
+        else:
+            x, x1, x2, x3, x4 = features
+        outs = []
+        if self.config.dec_ipt:
+            patches_batch = self.get_patches_batch(x, x4) if self.split else x
+            x4 = torch.cat((x4, self.ipt_blk5(F.interpolate(patches_batch, size=x4.shape[2:], mode='bilinear', align_corners=True))), 1)
+        p4 = self.decoder_block4(x4)
+        m4 = self.conv_ms_spvn_4(p4) if self.config.ms_supervision else None
+        if self.config.out_ref:
+            p4_gdt = self.gdt_convs_4(p4)
+            if self.training:
+                # >> GT:
+                m4_dia = m4
+                gdt_label_main_4 = gdt_gt * F.interpolate(m4_dia, size=gdt_gt.shape[2:], mode='bilinear', align_corners=True)
+                outs_gdt_label.append(gdt_label_main_4)
+                # >> Pred:
+                gdt_pred_4 = self.gdt_convs_pred_4(p4_gdt)
+                outs_gdt_pred.append(gdt_pred_4)
+            gdt_attn_4 = self.gdt_convs_attn_4(p4_gdt).sigmoid()
+            # >> Finally:
+            p4 = p4 * gdt_attn_4
+        _p4 = F.interpolate(p4, size=x3.shape[2:], mode='bilinear', align_corners=True)
+        _p3 = _p4 + self.lateral_block4(x3)
+        if self.config.dec_ipt:
+            patches_batch = self.get_patches_batch(x, _p3) if self.split else x
+            _p3 = torch.cat((_p3, self.ipt_blk4(F.interpolate(patches_batch, size=x3.shape[2:], mode='bilinear', align_corners=True))), 1)
+        p3 = self.decoder_block3(_p3)
+        m3 = self.conv_ms_spvn_3(p3) if self.config.ms_supervision else None
+        if self.config.out_ref:
+            p3_gdt = self.gdt_convs_3(p3)
+            if self.training:
+                # >> GT:
+                # m3 --dilation--> m3_dia
+                # G_3^gt * m3_dia --> G_3^m, which is the label of gradient
+                m3_dia = m3
+                gdt_label_main_3 = gdt_gt * F.interpolate(m3_dia, size=gdt_gt.shape[2:], mode='bilinear', align_corners=True)
+                outs_gdt_label.append(gdt_label_main_3)
+                # >> Pred:
+                # p3 --conv--BN--> F_3^G, where F_3^G predicts the \hat{G_3} with xx
+                # F_3^G --sigmoid--> A_3^G
+                gdt_pred_3 = self.gdt_convs_pred_3(p3_gdt)
+                outs_gdt_pred.append(gdt_pred_3)
+            gdt_attn_3 = self.gdt_convs_attn_3(p3_gdt).sigmoid()
+            # >> Finally:
+            # p3 = p3 * A_3^G
+            p3 = p3 * gdt_attn_3
+        _p3 = F.interpolate(p3, size=x2.shape[2:], mode='bilinear', align_corners=True)
+        _p2 = _p3 + self.lateral_block3(x2)
+        if self.config.dec_ipt:
+            patches_batch = self.get_patches_batch(x, _p2) if self.split else x
+            _p2 = torch.cat((_p2, self.ipt_blk3(F.interpolate(patches_batch, size=x2.shape[2:], mode='bilinear', align_corners=True))), 1)
+        p2 = self.decoder_block2(_p2)
+        m2 = self.conv_ms_spvn_2(p2) if self.config.ms_supervision else None
+        if self.config.out_ref:
+            p2_gdt = self.gdt_convs_2(p2)
+            if self.training:
+                # >> GT:
+                m2_dia = m2
+                gdt_label_main_2 = gdt_gt * F.interpolate(m2_dia, size=gdt_gt.shape[2:], mode='bilinear', align_corners=True)
+                outs_gdt_label.append(gdt_label_main_2)
+                # >> Pred:
+                gdt_pred_2 = self.gdt_convs_pred_2(p2_gdt)
+                outs_gdt_pred.append(gdt_pred_2)
+            gdt_attn_2 = self.gdt_convs_attn_2(p2_gdt).sigmoid()
+            # >> Finally:
+            p2 = p2 * gdt_attn_2
+        _p2 = F.interpolate(p2, size=x1.shape[2:], mode='bilinear', align_corners=True)
+        _p1 = _p2 + self.lateral_block2(x1)
+        if self.config.dec_ipt:
+            patches_batch = self.get_patches_batch(x, _p1) if self.split else x
+            _p1 = torch.cat((_p1, self.ipt_blk2(F.interpolate(patches_batch, size=x1.shape[2:], mode='bilinear', align_corners=True))), 1)
+        _p1 = self.decoder_block1(_p1)
+        _p1 = F.interpolate(_p1, size=x.shape[2:], mode='bilinear', align_corners=True)
+        if self.config.dec_ipt:
+            patches_batch = self.get_patches_batch(x, _p1) if self.split else x
+            _p1 = torch.cat((_p1, self.ipt_blk1(F.interpolate(patches_batch, size=x.shape[2:], mode='bilinear', align_corners=True))), 1)
+        p1_out = self.conv_out1(_p1)
+        if self.config.ms_supervision:
+            outs.append(m4)
+            outs.append(m3)
+            outs.append(m2)
+        outs.append(p1_out)
+        return outs if not (self.config.out_ref and self.training) else ([outs_gdt_pred, outs_gdt_label], outs)
+class SimpleConvs(nn.Module):
+    def __init__(
+        self, in_channels: int, out_channels: int, inter_channels=64
+    ) -> None:
+        super().__init__()
+        self.conv1 = nn.Conv2d(in_channels, inter_channels, 3, 1, 1)
+        self.conv_out = nn.Conv2d(inter_channels, out_channels, 3, 1, 1)
+    def forward(self, x):
+        return self.conv_out(self.conv1(x))

RMBG/RMBG-2.0/config.json ADDED Viewed

	@@ -0,0 +1,20 @@

+{
+  "_name_or_path": "ZhengPeng7/BiRefNet",
+  "architectures": [
+    "BiRefNet"
+  ],
+  "auto_map": {
+    "AutoConfig": "BiRefNet_config.BiRefNetConfig",
+    "AutoModelForImageSegmentation": "birefnet.BiRefNet"
+  },
+  "custom_pipelines": {
+    "image-segmentation": {
+      "pt": [
+        "AutoModelForImageSegmentation"
+      ],
+      "tf": [],
+      "type": "image"
+    }
+  },
+  "bb_pretrained": false
+}

florence2/DocVQA/added_tokens.json ADDED Viewed

	@@ -0,0 +1,1026 @@

+{
+  "</cap>": 51270,
+  "</dcap>": 51274,
+  "</grounding>": 51276,
+  "</ncap>": 51272,
+  "</ocr>": 50268,
+  "</od>": 50266,
+  "</poly>": 51287,
+  "</proposal>": 51285,
+  "</region_cap>": 51281,
+  "</region_to_desciption>": 51283,
+  "</seg>": 51278,
+  "<and>": 51288,
+  "<cap>": 51269,
+  "<dcap>": 51273,
+  "<grounding>": 51275,
+  "<loc_0>": 50269,
+  "<loc_100>": 50369,
+  "<loc_101>": 50370,
+  "<loc_102>": 50371,
+  "<loc_103>": 50372,
+  "<loc_104>": 50373,
+  "<loc_105>": 50374,
+  "<loc_106>": 50375,
+  "<loc_107>": 50376,
+  "<loc_108>": 50377,
+  "<loc_109>": 50378,
+  "<loc_10>": 50279,
+  "<loc_110>": 50379,
+  "<loc_111>": 50380,
+  "<loc_112>": 50381,
+  "<loc_113>": 50382,
+  "<loc_114>": 50383,
+  "<loc_115>": 50384,
+  "<loc_116>": 50385,
+  "<loc_117>": 50386,
+  "<loc_118>": 50387,
+  "<loc_119>": 50388,
+  "<loc_11>": 50280,
+  "<loc_120>": 50389,
+  "<loc_121>": 50390,
+  "<loc_122>": 50391,
+  "<loc_123>": 50392,
+  "<loc_124>": 50393,
+  "<loc_125>": 50394,
+  "<loc_126>": 50395,
+  "<loc_127>": 50396,
+  "<loc_128>": 50397,
+  "<loc_129>": 50398,
+  "<loc_12>": 50281,
+  "<loc_130>": 50399,
+  "<loc_131>": 50400,
+  "<loc_132>": 50401,
+  "<loc_133>": 50402,
+  "<loc_134>": 50403,
+  "<loc_135>": 50404,
+  "<loc_136>": 50405,
+  "<loc_137>": 50406,
+  "<loc_138>": 50407,
+  "<loc_139>": 50408,
+  "<loc_13>": 50282,
+  "<loc_140>": 50409,
+  "<loc_141>": 50410,
+  "<loc_142>": 50411,
+  "<loc_143>": 50412,
+  "<loc_144>": 50413,
+  "<loc_145>": 50414,
+  "<loc_146>": 50415,
+  "<loc_147>": 50416,
+  "<loc_148>": 50417,
+  "<loc_149>": 50418,
+  "<loc_14>": 50283,
+  "<loc_150>": 50419,
+  "<loc_151>": 50420,
+  "<loc_152>": 50421,
+  "<loc_153>": 50422,
+  "<loc_154>": 50423,
+  "<loc_155>": 50424,
+  "<loc_156>": 50425,
+  "<loc_157>": 50426,
+  "<loc_158>": 50427,
+  "<loc_159>": 50428,
+  "<loc_15>": 50284,
+  "<loc_160>": 50429,
+  "<loc_161>": 50430,
+  "<loc_162>": 50431,
+  "<loc_163>": 50432,
+  "<loc_164>": 50433,
+  "<loc_165>": 50434,
+  "<loc_166>": 50435,
+  "<loc_167>": 50436,
+  "<loc_168>": 50437,
+  "<loc_169>": 50438,
+  "<loc_16>": 50285,
+  "<loc_170>": 50439,
+  "<loc_171>": 50440,
+  "<loc_172>": 50441,
+  "<loc_173>": 50442,
+  "<loc_174>": 50443,
+  "<loc_175>": 50444,
+  "<loc_176>": 50445,
+  "<loc_177>": 50446,
+  "<loc_178>": 50447,
+  "<loc_179>": 50448,
+  "<loc_17>": 50286,
+  "<loc_180>": 50449,
+  "<loc_181>": 50450,
+  "<loc_182>": 50451,
+  "<loc_183>": 50452,
+  "<loc_184>": 50453,
+  "<loc_185>": 50454,
+  "<loc_186>": 50455,
+  "<loc_187>": 50456,
+  "<loc_188>": 50457,
+  "<loc_189>": 50458,
+  "<loc_18>": 50287,
+  "<loc_190>": 50459,
+  "<loc_191>": 50460,
+  "<loc_192>": 50461,
+  "<loc_193>": 50462,
+  "<loc_194>": 50463,
+  "<loc_195>": 50464,
+  "<loc_196>": 50465,
+  "<loc_197>": 50466,
+  "<loc_198>": 50467,
+  "<loc_199>": 50468,
+  "<loc_19>": 50288,
+  "<loc_1>": 50270,
+  "<loc_200>": 50469,
+  "<loc_201>": 50470,
+  "<loc_202>": 50471,
+  "<loc_203>": 50472,
+  "<loc_204>": 50473,
+  "<loc_205>": 50474,
+  "<loc_206>": 50475,
+  "<loc_207>": 50476,
+  "<loc_208>": 50477,
+  "<loc_209>": 50478,
+  "<loc_20>": 50289,
+  "<loc_210>": 50479,
+  "<loc_211>": 50480,
+  "<loc_212>": 50481,
+  "<loc_213>": 50482,
+  "<loc_214>": 50483,
+  "<loc_215>": 50484,
+  "<loc_216>": 50485,
+  "<loc_217>": 50486,
+  "<loc_218>": 50487,
+  "<loc_219>": 50488,
+  "<loc_21>": 50290,
+  "<loc_220>": 50489,
+  "<loc_221>": 50490,
+  "<loc_222>": 50491,
+  "<loc_223>": 50492,
+  "<loc_224>": 50493,
+  "<loc_225>": 50494,
+  "<loc_226>": 50495,
+  "<loc_227>": 50496,
+  "<loc_228>": 50497,
+  "<loc_229>": 50498,
+  "<loc_22>": 50291,
+  "<loc_230>": 50499,
+  "<loc_231>": 50500,
+  "<loc_232>": 50501,
+  "<loc_233>": 50502,
+  "<loc_234>": 50503,
+  "<loc_235>": 50504,
+  "<loc_236>": 50505,
+  "<loc_237>": 50506,
+  "<loc_238>": 50507,
+  "<loc_239>": 50508,
+  "<loc_23>": 50292,
+  "<loc_240>": 50509,
+  "<loc_241>": 50510,
+  "<loc_242>": 50511,
+  "<loc_243>": 50512,
+  "<loc_244>": 50513,
+  "<loc_245>": 50514,
+  "<loc_246>": 50515,
+  "<loc_247>": 50516,
+  "<loc_248>": 50517,
+  "<loc_249>": 50518,
+  "<loc_24>": 50293,
+  "<loc_250>": 50519,
+  "<loc_251>": 50520,
+  "<loc_252>": 50521,
+  "<loc_253>": 50522,
+  "<loc_254>": 50523,
+  "<loc_255>": 50524,
+  "<loc_256>": 50525,
+  "<loc_257>": 50526,
+  "<loc_258>": 50527,
+  "<loc_259>": 50528,
+  "<loc_25>": 50294,
+  "<loc_260>": 50529,
+  "<loc_261>": 50530,
+  "<loc_262>": 50531,
+  "<loc_263>": 50532,
+  "<loc_264>": 50533,
+  "<loc_265>": 50534,
+  "<loc_266>": 50535,
+  "<loc_267>": 50536,
+  "<loc_268>": 50537,
+  "<loc_269>": 50538,
+  "<loc_26>": 50295,
+  "<loc_270>": 50539,
+  "<loc_271>": 50540,
+  "<loc_272>": 50541,
+  "<loc_273>": 50542,
+  "<loc_274>": 50543,
+  "<loc_275>": 50544,
+  "<loc_276>": 50545,
+  "<loc_277>": 50546,
+  "<loc_278>": 50547,
+  "<loc_279>": 50548,
+  "<loc_27>": 50296,
+  "<loc_280>": 50549,
+  "<loc_281>": 50550,
+  "<loc_282>": 50551,
+  "<loc_283>": 50552,
+  "<loc_284>": 50553,
+  "<loc_285>": 50554,
+  "<loc_286>": 50555,
+  "<loc_287>": 50556,
+  "<loc_288>": 50557,
+  "<loc_289>": 50558,
+  "<loc_28>": 50297,
+  "<loc_290>": 50559,
+  "<loc_291>": 50560,
+  "<loc_292>": 50561,
+  "<loc_293>": 50562,
+  "<loc_294>": 50563,
+  "<loc_295>": 50564,
+  "<loc_296>": 50565,
+  "<loc_297>": 50566,
+  "<loc_298>": 50567,
+  "<loc_299>": 50568,
+  "<loc_29>": 50298,
+  "<loc_2>": 50271,
+  "<loc_300>": 50569,
+  "<loc_301>": 50570,
+  "<loc_302>": 50571,
+  "<loc_303>": 50572,
+  "<loc_304>": 50573,
+  "<loc_305>": 50574,
+  "<loc_306>": 50575,
+  "<loc_307>": 50576,
+  "<loc_308>": 50577,
+  "<loc_309>": 50578,
+  "<loc_30>": 50299,
+  "<loc_310>": 50579,
+  "<loc_311>": 50580,
+  "<loc_312>": 50581,
+  "<loc_313>": 50582,
+  "<loc_314>": 50583,
+  "<loc_315>": 50584,
+  "<loc_316>": 50585,
+  "<loc_317>": 50586,
+  "<loc_318>": 50587,
+  "<loc_319>": 50588,
+  "<loc_31>": 50300,
+  "<loc_320>": 50589,
+  "<loc_321>": 50590,
+  "<loc_322>": 50591,
+  "<loc_323>": 50592,
+  "<loc_324>": 50593,
+  "<loc_325>": 50594,
+  "<loc_326>": 50595,
+  "<loc_327>": 50596,
+  "<loc_328>": 50597,
+  "<loc_329>": 50598,
+  "<loc_32>": 50301,
+  "<loc_330>": 50599,
+  "<loc_331>": 50600,
+  "<loc_332>": 50601,
+  "<loc_333>": 50602,
+  "<loc_334>": 50603,
+  "<loc_335>": 50604,
+  "<loc_336>": 50605,
+  "<loc_337>": 50606,
+  "<loc_338>": 50607,
+  "<loc_339>": 50608,
+  "<loc_33>": 50302,
+  "<loc_340>": 50609,
+  "<loc_341>": 50610,
+  "<loc_342>": 50611,
+  "<loc_343>": 50612,
+  "<loc_344>": 50613,
+  "<loc_345>": 50614,
+  "<loc_346>": 50615,
+  "<loc_347>": 50616,
+  "<loc_348>": 50617,
+  "<loc_349>": 50618,
+  "<loc_34>": 50303,
+  "<loc_350>": 50619,
+  "<loc_351>": 50620,
+  "<loc_352>": 50621,
+  "<loc_353>": 50622,
+  "<loc_354>": 50623,
+  "<loc_355>": 50624,
+  "<loc_356>": 50625,
+  "<loc_357>": 50626,
+  "<loc_358>": 50627,
+  "<loc_359>": 50628,
+  "<loc_35>": 50304,
+  "<loc_360>": 50629,
+  "<loc_361>": 50630,
+  "<loc_362>": 50631,
+  "<loc_363>": 50632,
+  "<loc_364>": 50633,
+  "<loc_365>": 50634,
+  "<loc_366>": 50635,
+  "<loc_367>": 50636,
+  "<loc_368>": 50637,
+  "<loc_369>": 50638,
+  "<loc_36>": 50305,
+  "<loc_370>": 50639,
+  "<loc_371>": 50640,
+  "<loc_372>": 50641,
+  "<loc_373>": 50642,
+  "<loc_374>": 50643,
+  "<loc_375>": 50644,
+  "<loc_376>": 50645,
+  "<loc_377>": 50646,
+  "<loc_378>": 50647,
+  "<loc_379>": 50648,
+  "<loc_37>": 50306,
+  "<loc_380>": 50649,
+  "<loc_381>": 50650,
+  "<loc_382>": 50651,
+  "<loc_383>": 50652,
+  "<loc_384>": 50653,
+  "<loc_385>": 50654,
+  "<loc_386>": 50655,
+  "<loc_387>": 50656,
+  "<loc_388>": 50657,
+  "<loc_389>": 50658,
+  "<loc_38>": 50307,
+  "<loc_390>": 50659,
+  "<loc_391>": 50660,
+  "<loc_392>": 50661,
+  "<loc_393>": 50662,
+  "<loc_394>": 50663,
+  "<loc_395>": 50664,
+  "<loc_396>": 50665,
+  "<loc_397>": 50666,
+  "<loc_398>": 50667,
+  "<loc_399>": 50668,
+  "<loc_39>": 50308,
+  "<loc_3>": 50272,
+  "<loc_400>": 50669,
+  "<loc_401>": 50670,
+  "<loc_402>": 50671,
+  "<loc_403>": 50672,
+  "<loc_404>": 50673,
+  "<loc_405>": 50674,
+  "<loc_406>": 50675,
+  "<loc_407>": 50676,
+  "<loc_408>": 50677,
+  "<loc_409>": 50678,
+  "<loc_40>": 50309,
+  "<loc_410>": 50679,
+  "<loc_411>": 50680,
+  "<loc_412>": 50681,
+  "<loc_413>": 50682,
+  "<loc_414>": 50683,
+  "<loc_415>": 50684,
+  "<loc_416>": 50685,
+  "<loc_417>": 50686,
+  "<loc_418>": 50687,
+  "<loc_419>": 50688,
+  "<loc_41>": 50310,
+  "<loc_420>": 50689,
+  "<loc_421>": 50690,
+  "<loc_422>": 50691,
+  "<loc_423>": 50692,
+  "<loc_424>": 50693,
+  "<loc_425>": 50694,
+  "<loc_426>": 50695,
+  "<loc_427>": 50696,
+  "<loc_428>": 50697,
+  "<loc_429>": 50698,
+  "<loc_42>": 50311,
+  "<loc_430>": 50699,
+  "<loc_431>": 50700,
+  "<loc_432>": 50701,
+  "<loc_433>": 50702,
+  "<loc_434>": 50703,
+  "<loc_435>": 50704,
+  "<loc_436>": 50705,
+  "<loc_437>": 50706,
+  "<loc_438>": 50707,
+  "<loc_439>": 50708,
+  "<loc_43>": 50312,
+  "<loc_440>": 50709,
+  "<loc_441>": 50710,
+  "<loc_442>": 50711,
+  "<loc_443>": 50712,
+  "<loc_444>": 50713,
+  "<loc_445>": 50714,
+  "<loc_446>": 50715,
+  "<loc_447>": 50716,
+  "<loc_448>": 50717,
+  "<loc_449>": 50718,
+  "<loc_44>": 50313,
+  "<loc_450>": 50719,
+  "<loc_451>": 50720,
+  "<loc_452>": 50721,
+  "<loc_453>": 50722,
+  "<loc_454>": 50723,
+  "<loc_455>": 50724,
+  "<loc_456>": 50725,
+  "<loc_457>": 50726,
+  "<loc_458>": 50727,
+  "<loc_459>": 50728,
+  "<loc_45>": 50314,
+  "<loc_460>": 50729,
+  "<loc_461>": 50730,
+  "<loc_462>": 50731,
+  "<loc_463>": 50732,
+  "<loc_464>": 50733,
+  "<loc_465>": 50734,
+  "<loc_466>": 50735,
+  "<loc_467>": 50736,
+  "<loc_468>": 50737,
+  "<loc_469>": 50738,
+  "<loc_46>": 50315,
+  "<loc_470>": 50739,
+  "<loc_471>": 50740,
+  "<loc_472>": 50741,
+  "<loc_473>": 50742,
+  "<loc_474>": 50743,
+  "<loc_475>": 50744,
+  "<loc_476>": 50745,
+  "<loc_477>": 50746,
+  "<loc_478>": 50747,
+  "<loc_479>": 50748,
+  "<loc_47>": 50316,
+  "<loc_480>": 50749,
+  "<loc_481>": 50750,
+  "<loc_482>": 50751,
+  "<loc_483>": 50752,
+  "<loc_484>": 50753,
+  "<loc_485>": 50754,
+  "<loc_486>": 50755,
+  "<loc_487>": 50756,
+  "<loc_488>": 50757,
+  "<loc_489>": 50758,
+  "<loc_48>": 50317,
+  "<loc_490>": 50759,
+  "<loc_491>": 50760,
+  "<loc_492>": 50761,
+  "<loc_493>": 50762,
+  "<loc_494>": 50763,
+  "<loc_495>": 50764,
+  "<loc_496>": 50765,
+  "<loc_497>": 50766,
+  "<loc_498>": 50767,
+  "<loc_499>": 50768,
+  "<loc_49>": 50318,
+  "<loc_4>": 50273,
+  "<loc_500>": 50769,
+  "<loc_501>": 50770,
+  "<loc_502>": 50771,
+  "<loc_503>": 50772,
+  "<loc_504>": 50773,
+  "<loc_505>": 50774,
+  "<loc_506>": 50775,
+  "<loc_507>": 50776,
+  "<loc_508>": 50777,
+  "<loc_509>": 50778,
+  "<loc_50>": 50319,
+  "<loc_510>": 50779,
+  "<loc_511>": 50780,
+  "<loc_512>": 50781,
+  "<loc_513>": 50782,
+  "<loc_514>": 50783,
+  "<loc_515>": 50784,
+  "<loc_516>": 50785,
+  "<loc_517>": 50786,
+  "<loc_518>": 50787,
+  "<loc_519>": 50788,
+  "<loc_51>": 50320,
+  "<loc_520>": 50789,
+  "<loc_521>": 50790,
+  "<loc_522>": 50791,
+  "<loc_523>": 50792,
+  "<loc_524>": 50793,
+  "<loc_525>": 50794,
+  "<loc_526>": 50795,
+  "<loc_527>": 50796,
+  "<loc_528>": 50797,
+  "<loc_529>": 50798,
+  "<loc_52>": 50321,
+  "<loc_530>": 50799,
+  "<loc_531>": 50800,
+  "<loc_532>": 50801,
+  "<loc_533>": 50802,
+  "<loc_534>": 50803,
+  "<loc_535>": 50804,
+  "<loc_536>": 50805,
+  "<loc_537>": 50806,
+  "<loc_538>": 50807,
+  "<loc_539>": 50808,
+  "<loc_53>": 50322,
+  "<loc_540>": 50809,
+  "<loc_541>": 50810,
+  "<loc_542>": 50811,
+  "<loc_543>": 50812,
+  "<loc_544>": 50813,
+  "<loc_545>": 50814,
+  "<loc_546>": 50815,
+  "<loc_547>": 50816,
+  "<loc_548>": 50817,
+  "<loc_549>": 50818,
+  "<loc_54>": 50323,
+  "<loc_550>": 50819,
+  "<loc_551>": 50820,
+  "<loc_552>": 50821,
+  "<loc_553>": 50822,
+  "<loc_554>": 50823,
+  "<loc_555>": 50824,
+  "<loc_556>": 50825,
+  "<loc_557>": 50826,
+  "<loc_558>": 50827,
+  "<loc_559>": 50828,
+  "<loc_55>": 50324,
+  "<loc_560>": 50829,
+  "<loc_561>": 50830,
+  "<loc_562>": 50831,
+  "<loc_563>": 50832,
+  "<loc_564>": 50833,
+  "<loc_565>": 50834,
+  "<loc_566>": 50835,
+  "<loc_567>": 50836,
+  "<loc_568>": 50837,
+  "<loc_569>": 50838,
+  "<loc_56>": 50325,
+  "<loc_570>": 50839,
+  "<loc_571>": 50840,
+  "<loc_572>": 50841,
+  "<loc_573>": 50842,
+  "<loc_574>": 50843,
+  "<loc_575>": 50844,
+  "<loc_576>": 50845,
+  "<loc_577>": 50846,
+  "<loc_578>": 50847,
+  "<loc_579>": 50848,
+  "<loc_57>": 50326,
+  "<loc_580>": 50849,
+  "<loc_581>": 50850,
+  "<loc_582>": 50851,
+  "<loc_583>": 50852,
+  "<loc_584>": 50853,
+  "<loc_585>": 50854,
+  "<loc_586>": 50855,
+  "<loc_587>": 50856,
+  "<loc_588>": 50857,
+  "<loc_589>": 50858,
+  "<loc_58>": 50327,
+  "<loc_590>": 50859,
+  "<loc_591>": 50860,
+  "<loc_592>": 50861,
+  "<loc_593>": 50862,
+  "<loc_594>": 50863,
+  "<loc_595>": 50864,
+  "<loc_596>": 50865,
+  "<loc_597>": 50866,
+  "<loc_598>": 50867,
+  "<loc_599>": 50868,
+  "<loc_59>": 50328,
+  "<loc_5>": 50274,
+  "<loc_600>": 50869,
+  "<loc_601>": 50870,
+  "<loc_602>": 50871,
+  "<loc_603>": 50872,
+  "<loc_604>": 50873,
+  "<loc_605>": 50874,
+  "<loc_606>": 50875,
+  "<loc_607>": 50876,
+  "<loc_608>": 50877,
+  "<loc_609>": 50878,
+  "<loc_60>": 50329,
+  "<loc_610>": 50879,
+  "<loc_611>": 50880,
+  "<loc_612>": 50881,
+  "<loc_613>": 50882,
+  "<loc_614>": 50883,
+  "<loc_615>": 50884,
+  "<loc_616>": 50885,
+  "<loc_617>": 50886,
+  "<loc_618>": 50887,
+  "<loc_619>": 50888,
+  "<loc_61>": 50330,
+  "<loc_620>": 50889,
+  "<loc_621>": 50890,
+  "<loc_622>": 50891,
+  "<loc_623>": 50892,
+  "<loc_624>": 50893,
+  "<loc_625>": 50894,
+  "<loc_626>": 50895,
+  "<loc_627>": 50896,
+  "<loc_628>": 50897,
+  "<loc_629>": 50898,
+  "<loc_62>": 50331,
+  "<loc_630>": 50899,
+  "<loc_631>": 50900,
+  "<loc_632>": 50901,
+  "<loc_633>": 50902,
+  "<loc_634>": 50903,
+  "<loc_635>": 50904,
+  "<loc_636>": 50905,
+  "<loc_637>": 50906,
+  "<loc_638>": 50907,
+  "<loc_639>": 50908,
+  "<loc_63>": 50332,
+  "<loc_640>": 50909,
+  "<loc_641>": 50910,
+  "<loc_642>": 50911,
+  "<loc_643>": 50912,
+  "<loc_644>": 50913,
+  "<loc_645>": 50914,
+  "<loc_646>": 50915,
+  "<loc_647>": 50916,
+  "<loc_648>": 50917,
+  "<loc_649>": 50918,
+  "<loc_64>": 50333,
+  "<loc_650>": 50919,
+  "<loc_651>": 50920,
+  "<loc_652>": 50921,
+  "<loc_653>": 50922,
+  "<loc_654>": 50923,
+  "<loc_655>": 50924,
+  "<loc_656>": 50925,
+  "<loc_657>": 50926,
+  "<loc_658>": 50927,
+  "<loc_659>": 50928,
+  "<loc_65>": 50334,
+  "<loc_660>": 50929,
+  "<loc_661>": 50930,
+  "<loc_662>": 50931,
+  "<loc_663>": 50932,
+  "<loc_664>": 50933,
+  "<loc_665>": 50934,
+  "<loc_666>": 50935,
+  "<loc_667>": 50936,
+  "<loc_668>": 50937,
+  "<loc_669>": 50938,
+  "<loc_66>": 50335,
+  "<loc_670>": 50939,
+  "<loc_671>": 50940,
+  "<loc_672>": 50941,
+  "<loc_673>": 50942,
+  "<loc_674>": 50943,
+  "<loc_675>": 50944,
+  "<loc_676>": 50945,
+  "<loc_677>": 50946,
+  "<loc_678>": 50947,
+  "<loc_679>": 50948,
+  "<loc_67>": 50336,
+  "<loc_680>": 50949,
+  "<loc_681>": 50950,
+  "<loc_682>": 50951,
+  "<loc_683>": 50952,
+  "<loc_684>": 50953,
+  "<loc_685>": 50954,
+  "<loc_686>": 50955,
+  "<loc_687>": 50956,
+  "<loc_688>": 50957,
+  "<loc_689>": 50958,
+  "<loc_68>": 50337,
+  "<loc_690>": 50959,
+  "<loc_691>": 50960,
+  "<loc_692>": 50961,
+  "<loc_693>": 50962,
+  "<loc_694>": 50963,
+  "<loc_695>": 50964,
+  "<loc_696>": 50965,
+  "<loc_697>": 50966,
+  "<loc_698>": 50967,
+  "<loc_699>": 50968,
+  "<loc_69>": 50338,
+  "<loc_6>": 50275,
+  "<loc_700>": 50969,
+  "<loc_701>": 50970,
+  "<loc_702>": 50971,
+  "<loc_703>": 50972,
+  "<loc_704>": 50973,
+  "<loc_705>": 50974,
+  "<loc_706>": 50975,
+  "<loc_707>": 50976,
+  "<loc_708>": 50977,
+  "<loc_709>": 50978,
+  "<loc_70>": 50339,
+  "<loc_710>": 50979,
+  "<loc_711>": 50980,
+  "<loc_712>": 50981,
+  "<loc_713>": 50982,
+  "<loc_714>": 50983,
+  "<loc_715>": 50984,
+  "<loc_716>": 50985,
+  "<loc_717>": 50986,
+  "<loc_718>": 50987,
+  "<loc_719>": 50988,
+  "<loc_71>": 50340,
+  "<loc_720>": 50989,
+  "<loc_721>": 50990,
+  "<loc_722>": 50991,
+  "<loc_723>": 50992,
+  "<loc_724>": 50993,
+  "<loc_725>": 50994,
+  "<loc_726>": 50995,
+  "<loc_727>": 50996,
+  "<loc_728>": 50997,
+  "<loc_729>": 50998,
+  "<loc_72>": 50341,
+  "<loc_730>": 50999,
+  "<loc_731>": 51000,
+  "<loc_732>": 51001,
+  "<loc_733>": 51002,
+  "<loc_734>": 51003,
+  "<loc_735>": 51004,
+  "<loc_736>": 51005,
+  "<loc_737>": 51006,
+  "<loc_738>": 51007,
+  "<loc_739>": 51008,
+  "<loc_73>": 50342,
+  "<loc_740>": 51009,
+  "<loc_741>": 51010,
+  "<loc_742>": 51011,
+  "<loc_743>": 51012,
+  "<loc_744>": 51013,
+  "<loc_745>": 51014,
+  "<loc_746>": 51015,
+  "<loc_747>": 51016,
+  "<loc_748>": 51017,
+  "<loc_749>": 51018,
+  "<loc_74>": 50343,
+  "<loc_750>": 51019,
+  "<loc_751>": 51020,
+  "<loc_752>": 51021,
+  "<loc_753>": 51022,
+  "<loc_754>": 51023,
+  "<loc_755>": 51024,
+  "<loc_756>": 51025,
+  "<loc_757>": 51026,
+  "<loc_758>": 51027,
+  "<loc_759>": 51028,
+  "<loc_75>": 50344,
+  "<loc_760>": 51029,
+  "<loc_761>": 51030,
+  "<loc_762>": 51031,
+  "<loc_763>": 51032,
+  "<loc_764>": 51033,
+  "<loc_765>": 51034,
+  "<loc_766>": 51035,
+  "<loc_767>": 51036,
+  "<loc_768>": 51037,
+  "<loc_769>": 51038,
+  "<loc_76>": 50345,
+  "<loc_770>": 51039,
+  "<loc_771>": 51040,
+  "<loc_772>": 51041,
+  "<loc_773>": 51042,
+  "<loc_774>": 51043,
+  "<loc_775>": 51044,
+  "<loc_776>": 51045,
+  "<loc_777>": 51046,
+  "<loc_778>": 51047,
+  "<loc_779>": 51048,
+  "<loc_77>": 50346,
+  "<loc_780>": 51049,
+  "<loc_781>": 51050,
+  "<loc_782>": 51051,
+  "<loc_783>": 51052,
+  "<loc_784>": 51053,
+  "<loc_785>": 51054,
+  "<loc_786>": 51055,
+  "<loc_787>": 51056,
+  "<loc_788>": 51057,
+  "<loc_789>": 51058,
+  "<loc_78>": 50347,
+  "<loc_790>": 51059,
+  "<loc_791>": 51060,
+  "<loc_792>": 51061,
+  "<loc_793>": 51062,
+  "<loc_794>": 51063,
+  "<loc_795>": 51064,
+  "<loc_796>": 51065,
+  "<loc_797>": 51066,
+  "<loc_798>": 51067,
+  "<loc_799>": 51068,
+  "<loc_79>": 50348,
+  "<loc_7>": 50276,
+  "<loc_800>": 51069,
+  "<loc_801>": 51070,
+  "<loc_802>": 51071,
+  "<loc_803>": 51072,
+  "<loc_804>": 51073,
+  "<loc_805>": 51074,
+  "<loc_806>": 51075,
+  "<loc_807>": 51076,
+  "<loc_808>": 51077,
+  "<loc_809>": 51078,
+  "<loc_80>": 50349,
+  "<loc_810>": 51079,
+  "<loc_811>": 51080,
+  "<loc_812>": 51081,
+  "<loc_813>": 51082,
+  "<loc_814>": 51083,
+  "<loc_815>": 51084,
+  "<loc_816>": 51085,
+  "<loc_817>": 51086,
+  "<loc_818>": 51087,
+  "<loc_819>": 51088,
+  "<loc_81>": 50350,
+  "<loc_820>": 51089,
+  "<loc_821>": 51090,
+  "<loc_822>": 51091,
+  "<loc_823>": 51092,
+  "<loc_824>": 51093,
+  "<loc_825>": 51094,
+  "<loc_826>": 51095,
+  "<loc_827>": 51096,
+  "<loc_828>": 51097,
+  "<loc_829>": 51098,
+  "<loc_82>": 50351,
+  "<loc_830>": 51099,
+  "<loc_831>": 51100,
+  "<loc_832>": 51101,
+  "<loc_833>": 51102,
+  "<loc_834>": 51103,
+  "<loc_835>": 51104,
+  "<loc_836>": 51105,
+  "<loc_837>": 51106,
+  "<loc_838>": 51107,
+  "<loc_839>": 51108,
+  "<loc_83>": 50352,
+  "<loc_840>": 51109,
+  "<loc_841>": 51110,
+  "<loc_842>": 51111,
+  "<loc_843>": 51112,
+  "<loc_844>": 51113,
+  "<loc_845>": 51114,
+  "<loc_846>": 51115,
+  "<loc_847>": 51116,
+  "<loc_848>": 51117,
+  "<loc_849>": 51118,
+  "<loc_84>": 50353,
+  "<loc_850>": 51119,
+  "<loc_851>": 51120,
+  "<loc_852>": 51121,
+  "<loc_853>": 51122,
+  "<loc_854>": 51123,
+  "<loc_855>": 51124,
+  "<loc_856>": 51125,
+  "<loc_857>": 51126,
+  "<loc_858>": 51127,
+  "<loc_859>": 51128,
+  "<loc_85>": 50354,
+  "<loc_860>": 51129,
+  "<loc_861>": 51130,
+  "<loc_862>": 51131,
+  "<loc_863>": 51132,
+  "<loc_864>": 51133,
+  "<loc_865>": 51134,
+  "<loc_866>": 51135,
+  "<loc_867>": 51136,
+  "<loc_868>": 51137,
+  "<loc_869>": 51138,
+  "<loc_86>": 50355,
+  "<loc_870>": 51139,
+  "<loc_871>": 51140,
+  "<loc_872>": 51141,
+  "<loc_873>": 51142,
+  "<loc_874>": 51143,
+  "<loc_875>": 51144,
+  "<loc_876>": 51145,
+  "<loc_877>": 51146,
+  "<loc_878>": 51147,
+  "<loc_879>": 51148,
+  "<loc_87>": 50356,
+  "<loc_880>": 51149,
+  "<loc_881>": 51150,
+  "<loc_882>": 51151,
+  "<loc_883>": 51152,
+  "<loc_884>": 51153,
+  "<loc_885>": 51154,
+  "<loc_886>": 51155,
+  "<loc_887>": 51156,
+  "<loc_888>": 51157,
+  "<loc_889>": 51158,
+  "<loc_88>": 50357,
+  "<loc_890>": 51159,
+  "<loc_891>": 51160,
+  "<loc_892>": 51161,
+  "<loc_893>": 51162,
+  "<loc_894>": 51163,
+  "<loc_895>": 51164,
+  "<loc_896>": 51165,
+  "<loc_897>": 51166,
+  "<loc_898>": 51167,
+  "<loc_899>": 51168,
+  "<loc_89>": 50358,
+  "<loc_8>": 50277,
+  "<loc_900>": 51169,
+  "<loc_901>": 51170,
+  "<loc_902>": 51171,
+  "<loc_903>": 51172,
+  "<loc_904>": 51173,
+  "<loc_905>": 51174,
+  "<loc_906>": 51175,
+  "<loc_907>": 51176,
+  "<loc_908>": 51177,
+  "<loc_909>": 51178,
+  "<loc_90>": 50359,
+  "<loc_910>": 51179,
+  "<loc_911>": 51180,
+  "<loc_912>": 51181,
+  "<loc_913>": 51182,
+  "<loc_914>": 51183,
+  "<loc_915>": 51184,
+  "<loc_916>": 51185,
+  "<loc_917>": 51186,
+  "<loc_918>": 51187,
+  "<loc_919>": 51188,
+  "<loc_91>": 50360,
+  "<loc_920>": 51189,
+  "<loc_921>": 51190,
+  "<loc_922>": 51191,
+  "<loc_923>": 51192,
+  "<loc_924>": 51193,
+  "<loc_925>": 51194,
+  "<loc_926>": 51195,
+  "<loc_927>": 51196,
+  "<loc_928>": 51197,
+  "<loc_929>": 51198,
+  "<loc_92>": 50361,
+  "<loc_930>": 51199,
+  "<loc_931>": 51200,
+  "<loc_932>": 51201,
+  "<loc_933>": 51202,
+  "<loc_934>": 51203,
+  "<loc_935>": 51204,
+  "<loc_936>": 51205,
+  "<loc_937>": 51206,
+  "<loc_938>": 51207,
+  "<loc_939>": 51208,
+  "<loc_93>": 50362,
+  "<loc_940>": 51209,
+  "<loc_941>": 51210,
+  "<loc_942>": 51211,
+  "<loc_943>": 51212,
+  "<loc_944>": 51213,
+  "<loc_945>": 51214,
+  "<loc_946>": 51215,
+  "<loc_947>": 51216,
+  "<loc_948>": 51217,
+  "<loc_949>": 51218,
+  "<loc_94>": 50363,
+  "<loc_950>": 51219,
+  "<loc_951>": 51220,
+  "<loc_952>": 51221,
+  "<loc_953>": 51222,
+  "<loc_954>": 51223,
+  "<loc_955>": 51224,
+  "<loc_956>": 51225,
+  "<loc_957>": 51226,
+  "<loc_958>": 51227,
+  "<loc_959>": 51228,
+  "<loc_95>": 50364,
+  "<loc_960>": 51229,
+  "<loc_961>": 51230,
+  "<loc_962>": 51231,
+  "<loc_963>": 51232,
+  "<loc_964>": 51233,
+  "<loc_965>": 51234,
+  "<loc_966>": 51235,
+  "<loc_967>": 51236,
+  "<loc_968>": 51237,
+  "<loc_969>": 51238,
+  "<loc_96>": 50365,
+  "<loc_970>": 51239,
+  "<loc_971>": 51240,
+  "<loc_972>": 51241,
+  "<loc_973>": 51242,
+  "<loc_974>": 51243,
+  "<loc_975>": 51244,
+  "<loc_976>": 51245,
+  "<loc_977>": 51246,
+  "<loc_978>": 51247,
+  "<loc_979>": 51248,
+  "<loc_97>": 50366,
+  "<loc_980>": 51249,
+  "<loc_981>": 51250,
+  "<loc_982>": 51251,
+  "<loc_983>": 51252,
+  "<loc_984>": 51253,
+  "<loc_985>": 51254,
+  "<loc_986>": 51255,
+  "<loc_987>": 51256,
+  "<loc_988>": 51257,
+  "<loc_989>": 51258,
+  "<loc_98>": 50367,
+  "<loc_990>": 51259,
+  "<loc_991>": 51260,
+  "<loc_992>": 51261,
+  "<loc_993>": 51262,
+  "<loc_994>": 51263,
+  "<loc_995>": 51264,
+  "<loc_996>": 51265,
+  "<loc_997>": 51266,
+  "<loc_998>": 51267,
+  "<loc_999>": 51268,
+  "<loc_99>": 50368,
+  "<loc_9>": 50278,
+  "<ncap>": 51271,
+  "<ocr>": 50267,
+  "<od>": 50265,
+  "<poly>": 51286,
+  "<proposal>": 51284,
+  "<region_cap>": 51280,
+  "<region_to_desciption>": 51282,
+  "<seg>": 51277,
+  "<sep>": 51279
+}

florence2/DocVQA/config.json ADDED Viewed

	@@ -0,0 +1,237 @@

+{
+  "_name_or_path": "model_checkpoints/vqainstruct_no_lora/epoch_5",
+  "architectures": [
+    "Florence2ForConditionalGeneration"
+  ],
+  "auto_map": {
+    "AutoConfig": "configuration_florence2.Florence2Config",
+    "AutoModelForCausalLM": "modeling_florence2.Florence2ForConditionalGeneration"
+  },
+  "bos_token_id": 0,
+  "eos_token_id": 2,
+  "ignore_index": -100,
+  "is_encoder_decoder": true,
+  "model_type": "florence2",
+  "pad_token_id": 1,
+  "projection_dim": 1024,
+  "text_config": {
+    "_name_or_path": "",
+    "activation_dropout": 0.1,
+    "activation_function": "gelu",
+    "add_bias_logits": false,
+    "add_cross_attention": false,
+    "add_final_layer_norm": false,
+    "architectures": null,
+    "attention_dropout": 0.1,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": 0,
+    "chunk_size_feed_forward": 0,
+    "classif_dropout": 0.1,
+    "classifier_dropout": 0.0,
+    "cross_attention_hidden_size": null,
+    "d_model": 1024,
+    "decoder_attention_heads": 16,
+    "decoder_ffn_dim": 4096,
+    "decoder_layerdrop": 0.0,
+    "decoder_layers": 12,
+    "decoder_start_token_id": 2,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "dropout": 0.1,
+    "early_stopping": true,
+    "encoder_attention_heads": 16,
+    "encoder_ffn_dim": 4096,
+    "encoder_layerdrop": 0.0,
+    "encoder_layers": 12,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": 2,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": 0,
+    "forced_eos_token_id": 2,
+    "gradient_checkpointing": false,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1",
+      "2": "LABEL_2"
+    },
+    "init_std": 0.02,
+    "is_decoder": false,
+    "is_encoder_decoder": true,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1,
+      "LABEL_2": 2
+    },
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "max_position_embeddings": 1024,
+    "min_length": 0,
+    "model_type": "florence2_language",
+    "no_repeat_ngram_size": 3,
+    "normalize_before": false,
+    "num_beam_groups": 1,
+    "num_beams": 3,
+    "num_hidden_layers": 12,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": 1,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "scale_embedding": false,
+    "sep_token_id": null,
+    "suppress_tokens": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "typical_p": 1.0,
+    "use_bfloat16": false,
+    "use_cache": true,
+    "vocab_size": 51289
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.41.2",
+  "vision_config": {
+    "_name_or_path": "",
+    "add_cross_attention": false,
+    "architectures": null,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": null,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "depths": [
+      1,
+      1,
+      9,
+      1
+    ],
+    "dim_embed": [
+      256,
+      512,
+      1024,
+      2048
+    ],
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "drop_path_rate": 0.1,
+    "early_stopping": false,
+    "enable_checkpoint": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": null,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "image_feature_source": [
+      "spatial_avg_pool",
+      "temporal_avg_pool"
+    ],
+    "image_pos_embed": {
+      "max_pos_embeddings": 50,
+      "type": "learned_abs_2d"
+    },
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "min_length": 0,
+    "model_type": "davit",
+    "no_repeat_ngram_size": 0,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_groups": [
+      8,
+      16,
+      32,
+      64
+    ],
+    "num_heads": [
+      8,
+      16,
+      32,
+      64
+    ],
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": null,
+    "patch_padding": [
+      3,
+      1,
+      1,
+      1
+    ],
+    "patch_prenorm": [
+      false,
+      true,
+      true,
+      true
+    ],
+    "patch_size": [
+      7,
+      3,
+      3,
+      3
+    ],
+    "patch_stride": [
+      4,
+      2,
+      2,
+      2
+    ],
+    "prefix": null,
+    "problem_type": null,
+    "projection_dim": 1024,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "suppress_tokens": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "typical_p": 1.0,
+    "use_bfloat16": false,
+    "visual_temporal_embedding": {
+      "max_temporal_embeddings": 100,
+      "type": "COSINE"
+    },
+    "window_size": 12
+  },
+  "vocab_size": 51289
+}

florence2/DocVQA/generation_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "num_beams": 3,
+  "transformers_version": "4.41.2"
+}

florence2/DocVQA/modeling_florence2.py ADDED Viewed

The diff for this file is too large to render. See raw diff

florence2/DocVQA/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,50 @@

+{
+  "_valid_processor_keys": [
+    "images",
+    "do_resize",
+    "size",
+    "resample",
+    "do_center_crop",
+    "crop_size",
+    "do_rescale",
+    "rescale_factor",
+    "do_normalize",
+    "image_mean",
+    "image_std",
+    "do_convert_rgb",
+    "return_tensors",
+    "data_format",
+    "input_data_format"
+  ],
+  "auto_map": {
+    "AutoProcessor": "processing_florence2.Florence2Processor"
+  },
+  "crop_size": {
+    "height": 768,
+    "width": 768
+  },
+  "do_center_crop": false,
+  "do_convert_rgb": null,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.485,
+    0.456,
+    0.406
+  ],
+  "image_processor_type": "CLIPImageProcessor",
+  "image_seq_length": 577,
+  "image_std": [
+    0.229,
+    0.224,
+    0.225
+  ],
+  "processor_class": "Florence2Processor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 768,
+    "width": 768
+  }
+}

florence2/DocVQA/processor_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "auto_map": {
+    "AutoProcessor": "processing_florence2.Florence2Processor"
+  },
+  "processor_class": "Florence2Processor"
+}

florence2/DocVQA/special_tokens_map.json ADDED Viewed

The diff for this file is too large to render. See raw diff

florence2/DocVQA/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

florence2/DocVQA/tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

florence2/DocVQA/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

florence2/base/LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+    MIT License
+    Copyright (c) Microsoft Corporation.
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to deal
+    in the Software without restriction, including without limitation the rights
+    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+    copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+    The above copyright notice and this permission notice shall be included in all
+    copies or substantial portions of the Software.
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+    SOFTWARE

florence2/base/config.json ADDED Viewed

	@@ -0,0 +1,85 @@

+{
+  "_name_or_path": "florence2",
+  "architectures": [
+    "Florence2ForConditionalGeneration"
+  ],
+  "auto_map": {
+    "AutoConfig": "configuration_florence2.Florence2Config",
+    "AutoModelForCausalLM": "modeling_florence2.Florence2ForConditionalGeneration"
+  },
+  "bos_token_id": 0,
+  "eos_token_id": 2,
+  "ignore_index": -100,
+  "model_type": "florence2",
+  "pad_token_id": 1,
+  "projection_dim": 768,
+  "text_config": {
+      "vocab_size": 51289,
+      "activation_dropout": 0.1,
+      "activation_function": "gelu",
+      "add_bias_logits": false,
+      "add_final_layer_norm": false,
+      "attention_dropout": 0.1,
+      "bos_token_id": 0,
+      "classif_dropout": 0.1,
+      "classifier_dropout": 0.0,
+      "d_model": 768,
+      "decoder_attention_heads": 12,
+      "decoder_ffn_dim": 3072,
+      "decoder_layerdrop": 0.0,
+      "decoder_layers": 6,
+      "decoder_start_token_id": 2,
+      "dropout": 0.1,
+      "early_stopping": true,
+      "encoder_attention_heads": 12,
+      "encoder_ffn_dim": 3072,
+      "encoder_layerdrop": 0.0,
+      "encoder_layers": 6,
+      "eos_token_id": 2,
+      "forced_eos_token_id": 2,
+      "forced_bos_token_id": 0,
+      "gradient_checkpointing": false,
+      "init_std": 0.02,
+      "is_encoder_decoder": true,
+      "label2id": {
+        "LABEL_0": 0,
+        "LABEL_1": 1,
+        "LABEL_2": 2
+      },
+      "max_position_embeddings": 1024,
+      "no_repeat_ngram_size": 3,
+      "normalize_before": false,
+      "num_hidden_layers": 6,
+      "pad_token_id": 1,
+      "scale_embedding": false,
+      "num_beams": 3
+  },
+  "vision_config": {
+    "model_type": "davit",
+    "drop_path_rate": 0.1,
+    "patch_size": [7, 3, 3, 3],
+    "patch_stride": [4, 2, 2, 2],
+    "patch_padding": [3, 1, 1, 1],
+    "patch_prenorm": [false, true, true, true],
+    "enable_checkpoint": false,
+    "dim_embed": [128, 256, 512, 1024],
+    "num_heads": [4, 8, 16, 32],
+    "num_groups": [4, 8, 16, 32],
+    "depths": [1, 1, 9, 1],
+    "window_size": 12,
+    "projection_dim": 768,
+    "visual_temporal_embedding": {
+        "type": "COSINE",
+        "max_temporal_embeddings": 100
+    },
+    "image_pos_embed": {
+        "type": "learned_abs_2d",
+        "max_pos_embeddings": 50
+    },
+    "image_feature_source": ["spatial_avg_pool", "temporal_avg_pool"]
+  },
+  "vocab_size": 51289,
+  "torch_dtype": "float16",
+  "transformers_version": "4.41.0.dev0",
+  "is_encoder_decoder": true
+}

florence2/base/configuration_florence2.py ADDED Viewed

	@@ -0,0 +1,340 @@

+# coding=utf-8
+# Copyright 2024 Microsoft and the HuggingFace Inc. team. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import warnings
+""" Florence-2 configuration"""
+from typing import Optional
+from transformers import AutoConfig
+from transformers.configuration_utils import PretrainedConfig
+from transformers.utils import logging
+logger = logging.get_logger(__name__)
+class Florence2VisionConfig(PretrainedConfig):
+    r"""
+    This is the configuration class to store the configuration of a [`Florence2VisionModel`]. It is used to instantiate a Florence2VisionModel
+    according to the specified arguments, defining the model architecture. Instantiating a configuration with the
+    defaults will yield a similar configuration to that of the Florence2VisionModel architecture.
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
+    documentation from [`PretrainedConfig`] for more information.
+    Args:
+        drop_path_rate (`float`, *optional*, defaults to 0.1):
+            The dropout rate of the drop path layer.
+        patch_size (`List[int]`, *optional*, defaults to [7, 3, 3, 3]):
+            The patch size of the image.
+        patch_stride (`List[int]`, *optional*, defaults to [4, 2, 2, 2]):
+            The patch stride of the image.
+        patch_padding (`List[int]`, *optional*, defaults to [3, 1, 1, 1]):
+            The patch padding of the image.
+        patch_prenorm (`List[bool]`, *optional*, defaults to [false, true, true, true]):
+            Whether to apply layer normalization before the patch embedding layer.
+        enable_checkpoint (`bool`, *optional*, defaults to False):
+            Whether to enable checkpointing.
+        dim_embed (`List[int]`, *optional*, defaults to [256, 512, 1024, 2048]):
+            The dimension of the embedding layer.
+        num_heads (`List[int]`, *optional*, defaults to [8, 16, 32, 64]):
+            The number of attention heads.
+        num_groups (`List[int]`, *optional*, defaults to [8, 16, 32, 64]):
+            The number of groups.
+        depths (`List[int]`, *optional*, defaults to [1, 1, 9, 1]):
+            The depth of the model.
+        window_size (`int`, *optional*, defaults to 12):
+            The window size of the model.
+        projection_dim (`int`, *optional*, defaults to 1024):
+            The dimension of the projection layer.
+        visual_temporal_embedding (`dict`, *optional*):
+            The configuration of the visual temporal embedding.
+        image_pos_embed (`dict`, *optional*):
+            The configuration of the image position embedding.
+        image_feature_source (`List[str]`, *optional*, defaults to ["spatial_avg_pool", "temporal_avg_pool"]):
+            The source of the image feature.
+    Example:
+    ```python
+    >>> from transformers import Florence2VisionConfig, Florence2VisionModel
+    >>> # Initializing a Florence2 Vision style configuration
+    >>> configuration = Florence2VisionConfig()
+    >>> # Initializing a model (with random weights)
+    >>> model = Florence2VisionModel(configuration)
+    >>> # Accessing the model configuration
+    >>> configuration = model.config
+    ```"""
+    model_type = "florence2_vision"
+    keys_to_ignore_at_inference = ["past_key_values"]
+    def __init__(
+        self,
+        drop_path_rate=0.1,
+        patch_size=[7, 3, 3, 3],
+        patch_stride=[4, 2, 2, 2],
+        patch_padding=[3, 1, 1, 1],
+        patch_prenorm=[False, True, True, True],
+        enable_checkpoint=False,
+        dim_embed=[256, 512, 1024, 2048],
+        num_heads=[8, 16, 32, 64],
+        num_groups=[8, 16, 32, 64],
+        depths=[1, 1, 9, 1],
+        window_size=12,
+        projection_dim=1024,
+        visual_temporal_embedding=None,
+        image_pos_embed=None,
+        image_feature_source=["spatial_avg_pool", "temporal_avg_pool"],
+        **kwargs,
+    ):
+        self.drop_path_rate = drop_path_rate
+        self.patch_size = patch_size
+        self.patch_stride = patch_stride
+        self.patch_padding = patch_padding
+        self.patch_prenorm = patch_prenorm
+        self.enable_checkpoint = enable_checkpoint
+        self.dim_embed = dim_embed
+        self.num_heads = num_heads
+        self.num_groups = num_groups
+        self.depths = depths
+        self.window_size = window_size
+        self.projection_dim = projection_dim
+        self.visual_temporal_embedding = visual_temporal_embedding
+        self.image_pos_embed = image_pos_embed
+        self.image_feature_source = image_feature_source
+        super().__init__(**kwargs)
+class Florence2LanguageConfig(PretrainedConfig):
+    r"""
+    This is the configuration class to store the configuration of a [`Florence2LanguagePreTrainedModel`]. It is used to instantiate a BART
+    model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
+    defaults will yield a similar configuration to that of the BART
+    [facebook/bart-large](https://huggingface.co/facebook/bart-large) architecture.
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
+    documentation from [`PretrainedConfig`] for more information.
+    Args:
+        vocab_size (`int`, *optional*, defaults to 51289):
+            Vocabulary size of the Florence2Language model. Defines the number of different tokens that can be represented by the
+            `inputs_ids` passed when calling [`Florence2LanguageModel`].
+        d_model (`int`, *optional*, defaults to 1024):
+            Dimensionality of the layers and the pooler layer.
+        encoder_layers (`int`, *optional*, defaults to 12):
+            Number of encoder layers.
+        decoder_layers (`int`, *optional*, defaults to 12):
+            Number of decoder layers.
+        encoder_attention_heads (`int`, *optional*, defaults to 16):
+            Number of attention heads for each attention layer in the Transformer encoder.
+        decoder_attention_heads (`int`, *optional*, defaults to 16):
+            Number of attention heads for each attention layer in the Transformer decoder.
+        decoder_ffn_dim (`int`, *optional*, defaults to 4096):
+            Dimensionality of the "intermediate" (often named feed-forward) layer in decoder.
+        encoder_ffn_dim (`int`, *optional*, defaults to 4096):
+            Dimensionality of the "intermediate" (often named feed-forward) layer in decoder.
+        activation_function (`str` or `function`, *optional*, defaults to `"gelu"`):
+            The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
+            `"relu"`, `"silu"` and `"gelu_new"` are supported.
+        dropout (`float`, *optional*, defaults to 0.1):
+            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
+        attention_dropout (`float`, *optional*, defaults to 0.0):
+            The dropout ratio for the attention probabilities.
+        activation_dropout (`float`, *optional*, defaults to 0.0):
+            The dropout ratio for activations inside the fully connected layer.
+        classifier_dropout (`float`, *optional*, defaults to 0.0):
+            The dropout ratio for classifier.
+        max_position_embeddings (`int`, *optional*, defaults to 1024):
+            The maximum sequence length that this model might ever be used with. Typically set this to something large
+            just in case (e.g., 512 or 1024 or 2048).
+        init_std (`float`, *optional*, defaults to 0.02):
+            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
+        encoder_layerdrop (`float`, *optional*, defaults to 0.0):
+            The LayerDrop probability for the encoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
+            for more details.
+        decoder_layerdrop (`float`, *optional*, defaults to 0.0):
+            The LayerDrop probability for the decoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
+            for more details.
+        scale_embedding (`bool`, *optional*, defaults to `False`):
+            Scale embeddings by diving by sqrt(d_model).
+        use_cache (`bool`, *optional*, defaults to `True`):
+            Whether or not the model should return the last key/values attentions (not used by all models).
+        num_labels (`int`, *optional*, defaults to 3):
+            The number of labels to use in [`Florence2LanguageForSequenceClassification`].
+        forced_eos_token_id (`int`, *optional*, defaults to 2):
+            The id of the token to force as the last generated token when `max_length` is reached. Usually set to
+            `eos_token_id`.
+    Example:
+    ```python
+    >>> from transformers import Florence2LanguageConfig, Florence2LanguageModel
+    >>> # Initializing a Florence2 Language style configuration
+    >>> configuration = Florence2LanguageConfig()
+    >>> # Initializing a model (with random weights)
+    >>> model = Florence2LangaugeModel(configuration)
+    >>> # Accessing the model configuration
+    >>> configuration = model.config
+    ```"""
+    model_type = "florence2_language"
+    keys_to_ignore_at_inference = ["past_key_values"]
+    attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"}
+    def __init__(
+        self,
+        vocab_size=51289,
+        max_position_embeddings=1024,
+        encoder_layers=12,
+        encoder_ffn_dim=4096,
+        encoder_attention_heads=16,
+        decoder_layers=12,
+        decoder_ffn_dim=4096,
+        decoder_attention_heads=16,
+        encoder_layerdrop=0.0,
+        decoder_layerdrop=0.0,
+        activation_function="gelu",
+        d_model=1024,
+        dropout=0.1,
+        attention_dropout=0.0,
+        activation_dropout=0.0,
+        init_std=0.02,
+        classifier_dropout=0.0,
+        scale_embedding=False,
+        use_cache=True,
+        num_labels=3,
+        pad_token_id=1,
+        bos_token_id=0,
+        eos_token_id=2,
+        is_encoder_decoder=True,
+        decoder_start_token_id=2,
+        forced_eos_token_id=2,
+        **kwargs,
+    ):
+        self.vocab_size = vocab_size
+        self.max_position_embeddings = max_position_embeddings
+        self.d_model = d_model
+        self.encoder_ffn_dim = encoder_ffn_dim
+        self.encoder_layers = encoder_layers
+        self.encoder_attention_heads = encoder_attention_heads
+        self.decoder_ffn_dim = decoder_ffn_dim
+        self.decoder_layers = decoder_layers
+        self.decoder_attention_heads = decoder_attention_heads
+        self.dropout = dropout
+        self.attention_dropout = attention_dropout
+        self.activation_dropout = activation_dropout
+        self.activation_function = activation_function
+        self.init_std = init_std
+        self.encoder_layerdrop = encoder_layerdrop
+        self.decoder_layerdrop = decoder_layerdrop
+        self.classifier_dropout = classifier_dropout
+        self.use_cache = use_cache
+        self.num_hidden_layers = encoder_layers
+        self.scale_embedding = scale_embedding  # scale factor will be sqrt(d_model) if True
+        super().__init__(
+            num_labels=num_labels,
+            pad_token_id=pad_token_id,
+            bos_token_id=bos_token_id,
+            eos_token_id=eos_token_id,
+            is_encoder_decoder=is_encoder_decoder,
+            decoder_start_token_id=decoder_start_token_id,
+            forced_eos_token_id=forced_eos_token_id,
+            **kwargs,
+        )
+        # ensure backward compatibility for BART CNN models
+        if self.forced_bos_token_id is None and kwargs.get("force_bos_token_to_be_generated", False):
+            self.forced_bos_token_id = self.bos_token_id
+            warnings.warn(
+                f"Please make sure the config includes `forced_bos_token_id={self.bos_token_id}` in future versions. "
+                "The config can simply be saved and uploaded again to be fixed."
+            )
+class Florence2Config(PretrainedConfig):
+    r"""
+    This is the configuration class to store the configuration of a [`Florence2ForConditionalGeneration`]. It is used to instantiate an
+    Florence-2 model according to the specified arguments, defining the model architecture.
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
+    documentation from [`PretrainedConfig`] for more information.
+    Args:
+        vision_config (`Florence2VisionConfig`,  *optional*):
+            Custom vision config or dict
+        text_config (`Union[AutoConfig, dict]`, *optional*):
+            The config object of the text backbone.
+        ignore_index (`int`, *optional*, defaults to -100):
+            The ignore index for the loss function.
+        vocab_size (`int`, *optional*, defaults to 51289):
+            Vocabulary size of the Florence2model. Defines the number of different tokens that can be represented by the
+            `inputs_ids` passed when calling [`~Florence2ForConditionalGeneration`]
+        projection_dim (`int`, *optional*, defaults to 1024):
+            Dimension of the multimodal projection space.
+    Example:
+    ```python
+    >>> from transformers import Florence2ForConditionalGeneration, Florence2Config, CLIPVisionConfig, BartConfig
+    >>> # Initializing a clip-like vision config
+    >>> vision_config = CLIPVisionConfig()
+    >>> # Initializing a Bart config
+    >>> text_config = BartConfig()
+    >>> # Initializing a Florence-2 configuration
+    >>> configuration = Florence2Config(vision_config, text_config)
+    >>> # Initializing a model from the florence-2 configuration
+    >>> model = Florence2ForConditionalGeneration(configuration)
+    >>> # Accessing the model configuration
+    >>> configuration = model.config
+    ```"""
+    model_type = "florence2"
+    is_composition = False
+    def __init__(
+        self,
+        vision_config=None,
+        text_config=None,
+        ignore_index=-100,
+        vocab_size=51289,
+        projection_dim=1024,
+        **kwargs,
+    ):
+        self.ignore_index = ignore_index
+        self.vocab_size = vocab_size
+        self.projection_dim = projection_dim
+        if vision_config is not None:
+            vision_config = PretrainedConfig(**vision_config)
+        self.vision_config = vision_config
+        self.vocab_size = self.vocab_size
+        self.text_config = text_config
+        if text_config is not None:
+            self.text_config = Florence2LanguageConfig(**text_config)
+        super().__init__(**kwargs)

florence2/base/modeling_florence2.py ADDED Viewed

The diff for this file is too large to render. See raw diff

florence2/base/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "auto_map": {
+    "AutoProcessor": "processing_florence2.Florence2Processor"
+   },
+  "_valid_processor_keys": [
+    "images",
+    "do_resize",
+    "size",
+    "resample",
+    "do_rescale",
+    "rescale_factor",
+    "do_normalize",
+    "image_mean",
+    "image_std",
+    "return_tensors",
+    "data_format",
+    "input_data_format",
+    "do_convert_rgb"
+  ],
+  "do_convert_rgb": null,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "do_center_crop": false,
+  "image_processor_type": "CLIPImageProcessor",
+  "image_seq_length": 577,
+  "image_mean": [0.485, 0.456, 0.406],
+  "image_std":  [0.229, 0.224, 0.225],
+  "processor_class": "Florence2Processor",
+  "resample": 3,
+  "size": {
+    "height": 768,
+    "width":768
+  },
+  "crop_size": {
+    "height": 768,
+    "width": 768
+  }
+}

florence2/base/processing_florence2.py ADDED Viewed

	@@ -0,0 +1,1088 @@

+# coding=utf-8
+# Copyright 2024 Microsoft and The HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Processor class for Florence-2.
+"""
+import re
+import logging
+from typing import List, Optional, Union
+import numpy as np
+import torch
+from transformers.feature_extraction_utils import BatchFeature
+from transformers.image_utils import ImageInput, is_valid_image
+from transformers.processing_utils import ProcessorMixin
+from transformers.tokenization_utils_base import (
+    PaddingStrategy,
+    PreTokenizedInput,
+    TextInput,
+    TruncationStrategy,
+)
+from transformers.utils import TensorType
+logger = logging.getLogger(__name__)
+# Copied from transformers.models.idefics2.processing_idefics2.is_url
+def is_url(val) -> bool:
+    return isinstance(val, str) and val.startswith("http")
+# Copied from transformers.models.idefics2.processing_idefics2.is_image_or_image_url
+def is_image_or_image_url(elem):
+    return is_url(elem) or is_valid_image(elem)
+def _is_str_or_image(elem):
+    return isinstance(elem, (str)) or is_image_or_image_url(elem)
+class Florence2Processor(ProcessorMixin):
+    r"""
+    Constructs a Florence2 processor which wraps a Florence2 image processor and a Florence2 tokenizer into a single processor.
+    [`Florence2Processor`] offers all the functionalities of [`CLIPImageProcessor`] and [`BartTokenizerFast`]. See the
+    [`~Florence2Processor.__call__`] and [`~Florence2Processor.decode`] for more information.
+    Args:
+        image_processor ([`CLIPImageProcessor`], *optional*):
+            The image processor is a required input.
+        tokenizer ([`BartTokenizerFast`], *optional*):
+            The tokenizer is a required input.
+    """
+    attributes = ["image_processor", "tokenizer"]
+    image_processor_class = "CLIPImageProcessor"
+    tokenizer_class = ("BartTokenizer", "BartTokenizerFast")
+    def __init__(
+        self,
+        image_processor=None,
+        tokenizer=None,
+    ):
+        if image_processor is None:
+            raise ValueError("You need to specify an `image_processor`.")
+        if tokenizer is None:
+            raise ValueError("You need to specify a `tokenizer`.")
+        if not hasattr(image_processor, "image_seq_length"):
+            raise ValueError("Image processor is missing an `image_seq_length` attribute.")
+        self.image_seq_length = image_processor.image_seq_length
+        tokens_to_add = {
+                'additional_special_tokens': \
+                    tokenizer.additional_special_tokens + \
+                    ['<od>', '</od>', '<ocr>', '</ocr>'] + \
+                    [f'<loc_{x}>' for x in range(1000)] + \
+                    ['<cap>', '</cap>', '<ncap>', '</ncap>','<dcap>', '</dcap>', '<grounding>', '</grounding>', '<seg>', '</seg>', '<sep>', '<region_cap>', '</region_cap>', '<region_to_desciption>', '</region_to_desciption>', '<proposal>', '</proposal>', '<poly>', '</poly>', '<and>']
+            }
+        tokenizer.add_special_tokens(tokens_to_add)
+        self.tasks_answer_post_processing_type = {
+            '<OCR>': 'pure_text',
+            '<OCR_WITH_REGION>': 'ocr',
+            '<CAPTION>': 'pure_text',
+            '<DETAILED_CAPTION>': 'pure_text',
+            '<MORE_DETAILED_CAPTION>': 'pure_text',
+            '<OD>': 'description_with_bboxes',
+            '<DENSE_REGION_CAPTION>': 'description_with_bboxes',
+            '<CAPTION_TO_PHRASE_GROUNDING>': "phrase_grounding",
+            '<REFERRING_EXPRESSION_SEGMENTATION>': 'polygons',
+            '<REGION_TO_SEGMENTATION>': 'polygons',
+            '<OPEN_VOCABULARY_DETECTION>': 'description_with_bboxes_or_polygons',
+            '<REGION_TO_CATEGORY>': 'pure_text',
+            '<REGION_TO_DESCRIPTION>': 'pure_text',
+            '<REGION_TO_OCR>': 'pure_text',
+            '<REGION_PROPOSAL>': 'bboxes'
+        }
+        self.task_prompts_without_inputs = {
+            '<OCR>': 'What is the text in the image?',
+            '<OCR_WITH_REGION>': 'What is the text in the image, with regions?',
+            '<CAPTION>': 'What does the image describe?',
+            '<DETAILED_CAPTION>': 'Describe in detail what is shown in the image.',
+            '<MORE_DETAILED_CAPTION>': 'Describe with a paragraph what is shown in the image.',
+            '<OD>': 'Locate the objects with category name in the image.',
+            '<DENSE_REGION_CAPTION>': 'Locate the objects in the image, with their descriptions.',
+            '<REGION_PROPOSAL>': 'Locate the region proposals in the image.'
+        }
+        self.task_prompts_with_input = {
+            '<CAPTION_TO_PHRASE_GROUNDING>': "Locate the phrases in the caption: {input}",
+            '<REFERRING_EXPRESSION_SEGMENTATION>': 'Locate {input} in the image with mask',
+            '<REGION_TO_SEGMENTATION>': 'What is the polygon mask of region {input}',
+            '<OPEN_VOCABULARY_DETECTION>': 'Locate {input} in the image.',
+            '<REGION_TO_CATEGORY>': 'What is the region {input}?',
+            '<REGION_TO_DESCRIPTION>': 'What does the region {input} describe?',
+            '<REGION_TO_OCR>': 'What text is in the region {input}?',
+        }
+        self.post_processor = Florence2PostProcesser(tokenizer=tokenizer)
+        super().__init__(image_processor, tokenizer)
+    def _construct_prompts(self, text):
+        # replace the task tokens with the task prompts if task token is in the text
+        prompts = []
+        for _text in text:
+            # 1. fixed task prompts without additional inputs
+            for task_token, task_prompt in self.task_prompts_without_inputs.items():
+                if task_token in _text:
+                    assert _text == task_token, f"Task token {task_token} should be the only token in the text."
+                    _text = task_prompt
+                    break
+            # 2. task prompts with additional inputs
+            for task_token, task_prompt in self.task_prompts_with_input.items():
+                if task_token in _text:
+                    _text = task_prompt.format(input=_text.replace(task_token, ''))
+                    break
+            prompts.append(_text)
+        return prompts
+    def __call__(
+        self,
+        text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None,
+        images: ImageInput = None,
+        tokenize_newline_separately: bool = True,
+        padding: Union[bool, str, PaddingStrategy] = False,
+        truncation: Union[bool, str, TruncationStrategy] = None,
+        max_length=None,
+        return_tensors: Optional[Union[str, TensorType]] = TensorType.PYTORCH,
+        do_resize: bool = None,
+        do_normalize: bool = None,
+        image_mean: Optional[Union[float, List[float]]] = None,
+        image_std: Optional[Union[float, List[float]]] = None,
+        data_format: Optional["ChannelDimension"] = "channels_first",  # noqa: F821
+        input_data_format: Optional[
+            Union[str, "ChannelDimension"]  # noqa: F821
+        ] = None,
+        resample: "PILImageResampling" = None,  # noqa: F821
+        do_convert_rgb: bool = None,
+        do_thumbnail: bool = None,
+        do_align_long_axis: bool = None,
+        do_rescale: bool = None,
+    ) -> BatchFeature:
+        """
+        Main method to prepare for the model one or several sequences(s) and image(s). This method forwards the `text`
+        and `kwargs` arguments to BartTokenizerFast's [`~BartTokenizerFast.__call__`] if `text` is not `None` to encode
+        the text. To prepare the image(s), this method forwards the `images` and `kwrags` arguments to
+        CLIPImageProcessor's [`~CLIPImageProcessor.__call__`] if `images` is not `None`. Please refer to the doctsring
+        of the above two methods for more information.
+        Args:
+            text (`str`, `List[str]`, `List[List[str]]`):
+                The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
+                (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
+                `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
+            images (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`, `List[torch.Tensor]`):
+                The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
+                tensor. In case of a NumPy array/PyTorch tensor, each image should be of shape (C, H, W), where C is a
+                number of channels, H and W are image height and width.
+            tokenize_newline_separately (`bool`, defaults to `True`):
+                Adds a separately tokenized '\n' at the end of the prompt.
+            padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `False`):
+                Select a strategy to pad the returned sequences (according to the model's padding side and padding
+                index) among:
+                - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+                  sequence if provided).
+                - `'max_length'`: Pad to a maximum length specified with the argument `max_length` or to the maximum
+                  acceptable input length for the model if that argument is not provided.
+                - `False` or `'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of different
+                  lengths).
+            max_length (`int`, *optional*):
+                Maximum length of the returned list and optionally padding length (see above).
+            truncation (`bool`, *optional*):
+                Activates truncation to cut input sequences longer than `max_length` to `max_length`.
+            return_tensors (`str` or [`~utils.TensorType`], *optional*):
+                If set, will return tensors of a particular framework. Acceptable values are:
+                - `'tf'`: Return TensorFlow `tf.constant` objects.
+                - `'pt'`: Return PyTorch `torch.Tensor` objects.
+                - `'np'`: Return NumPy `np.ndarray` objects.
+                - `'jax'`: Return JAX `jnp.ndarray` objects.
+        Returns:
+            [`BatchFeature`]: A [`BatchFeature`] with the following fields:
+            - **input_ids** -- List of token ids to be fed to a model. Returned when `text` is not `None`. If `suffix`
+              is provided, the `input_ids` will also contain the suffix input ids.
+            - **attention_mask** -- List of indices specifying which tokens should be attended to by the model (when
+              `return_attention_mask=True` or if *"attention_mask"* is in `self.model_input_names` and if `text` is not
+              `None`).
+            - **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
+            - **labels** -- Labels compatible with training if `suffix` is not None
+        """
+        return_token_type_ids = False
+        if images is None:
+            raise ValueError("`images` are expected as arguments to a `Florence2Processor` instance.")
+        if text is None:
+            logger.warning_once(
+                "You are using Florence-2 without a text prompt."
+            )
+            text = ""
+        if isinstance(text, List) and isinstance(images, List):
+            if len(images) < len(text):
+                raise ValueError(
+                    f"Received {len(images)} images for {len(text)} prompts. Each prompt should be associated with an image."
+                )
+        if _is_str_or_image(text):
+            text = [text]
+        elif isinstance(text, list) and _is_str_or_image(text[0]):
+            pass
+        pixel_values = self.image_processor(
+            images,
+            do_resize=do_resize,
+            do_normalize=do_normalize,
+            return_tensors=return_tensors,
+            image_mean=image_mean,
+            image_std=image_std,
+            input_data_format=input_data_format,
+            data_format=data_format,
+            resample=resample,
+            do_convert_rgb=do_convert_rgb,
+        )["pixel_values"]
+        if max_length is not None:
+            max_length -= self.image_seq_length  # max_length has to account for the image tokens
+        text = self._construct_prompts(text)
+        inputs = self.tokenizer(
+            text,
+            return_tensors=return_tensors,
+            padding=padding,
+            max_length=max_length,
+            truncation=truncation,
+            return_token_type_ids=return_token_type_ids,
+        )
+        return_data = {**inputs, "pixel_values": pixel_values}
+        if return_token_type_ids:
+            labels = inputs["input_ids"].masked_fill(inputs["token_type_ids"] == 0, -100)
+            return_data.update({"labels": labels})
+        return BatchFeature(data=return_data)
+    # Copied from transformers.models.clip.processing_clip.CLIPProcessor.batch_decode with CLIP->Florence2
+    def batch_decode(self, *args, **kwargs):
+        """
+        This method forwards all its arguments to BartTokenizerFast's [`~PreTrainedTokenizer.batch_decode`]. Please
+        refer to the docstring of this method for more information.
+        """
+        return self.tokenizer.batch_decode(*args, **kwargs)
+    # Copied from transformers.models.clip.processing_clip.CLIPProcessor.decode with CLIP->Florence2
+    def decode(self, *args, **kwargs):
+        """
+        This method forwards all its arguments to BartTokenizerFast's [`~PreTrainedTokenizer.decode`]. Please refer to
+        the docstring of this method for more information.
+        """
+        return self.tokenizer.decode(*args, **kwargs)
+    @property
+    # Copied from transformers.models.clip.processing_clip.CLIPProcessor.model_input_names with CLIP->Florence2
+    def model_input_names(self):
+        tokenizer_input_names = self.tokenizer.model_input_names
+        image_processor_input_names = self.image_processor.model_input_names
+        return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names))
+    def post_process_generation(self, text, task, image_size):
+        """
+        Post-process the output of the model to each of the task outputs.
+        Args:
+            text (`str`): The text to post-process.
+            task (`str`): The task to post-process the text for.
+            image_size (`Tuple[int, int]`): The size of the image. height x width.
+        """
+        task_answer_post_processing_type = self.tasks_answer_post_processing_type.get(task, 'pure_text')
+        task_answer = self.post_processor(
+            text=text,
+            image_size=image_size,
+            parse_tasks=task_answer_post_processing_type,
+        )[task_answer_post_processing_type]
+        if task_answer_post_processing_type == 'pure_text':
+            final_answer = task_answer
+            # remove the special tokens
+            final_answer = final_answer.replace('<s>', '').replace('</s>', '')
+        elif task_answer_post_processing_type in ['od', 'description_with_bboxes', 'bboxes']:
+            od_instances = task_answer
+            bboxes_od = [_od_instance['bbox'] for _od_instance in od_instances]
+            labels_od = [str(_od_instance['cat_name']) for _od_instance in od_instances]
+            final_answer = {'bboxes': bboxes_od, 'labels': labels_od}
+        elif task_answer_post_processing_type in ['ocr']:
+            bboxes = [_od_instance['quad_box'] for _od_instance in task_answer]
+            labels = [str(_od_instance['text']) for _od_instance in task_answer]
+            final_answer = {'quad_boxes': bboxes, 'labels': labels}
+        elif task_answer_post_processing_type in ['phrase_grounding']:
+            bboxes = []
+            labels = []
+            for _grounded_phrase in task_answer:
+                for _bbox in _grounded_phrase['bbox']:
+                    bboxes.append(_bbox)
+                    labels.append(_grounded_phrase['cat_name'])
+            final_answer = {'bboxes': bboxes, 'labels': labels}
+        elif task_answer_post_processing_type in ['description_with_polygons', 'polygons']:
+            labels = []
+            polygons = []
+            for result in task_answer:
+                label = result['cat_name']
+                _polygons = result['polygons']
+                labels.append(label)
+                polygons.append(_polygons)
+            final_answer = {'polygons': polygons, 'labels': labels}
+        elif task_answer_post_processing_type in ['description_with_bboxes_or_polygons']:
+            bboxes = []
+            bboxes_labels = []
+            polygons = []
+            polygons_labels = []
+            for result in task_answer:
+                label = result['cat_name']
+                if 'polygons' in result:
+                    _polygons = result['polygons']
+                    polygons.append(_polygons)
+                    polygons_labels.append(label)
+                else:
+                    _bbox = result['bbox']
+                    bboxes.append(_bbox)
+                    bboxes_labels.append(label)
+            final_answer = {'bboxes': bboxes, 'bboxes_labels': bboxes_labels, 'polygons': polygons, 'polygons_labels': polygons_labels}
+        else:
+            raise ValueError('Unknown task answer post processing type: {}'.format(task_answer_post_processing_type))
+        final_answer = {
+            task: final_answer}
+        return final_answer
+class BoxQuantizer(object):
+    def __init__(self, mode, bins):
+        self.mode = mode
+        self.bins = bins
+    def quantize(self, boxes: torch.Tensor, size):
+        bins_w, bins_h = self.bins  # Quantization bins.
+        size_w, size_h = size       # Original image size.
+        size_per_bin_w = size_w / bins_w
+        size_per_bin_h = size_h / bins_h
+        xmin, ymin, xmax, ymax = boxes.split(1, dim=-1)  # Shape: 4 * [N, 1].
+        if self.mode == 'floor':
+            quantized_xmin = (
+                xmin / size_per_bin_w).floor().clamp(0, bins_w - 1)
+            quantized_ymin = (
+                ymin / size_per_bin_h).floor().clamp(0, bins_h - 1)
+            quantized_xmax = (
+                xmax / size_per_bin_w).floor().clamp(0, bins_w - 1)
+            quantized_ymax = (
+                ymax / size_per_bin_h).floor().clamp(0, bins_h - 1)
+        elif self.mode == 'round':
+            raise NotImplementedError()
+        else:
+            raise ValueError('Incorrect quantization type.')
+        quantized_boxes = torch.cat(
+            (quantized_xmin, quantized_ymin, quantized_xmax, quantized_ymax), dim=-1
+        ).int()
+        return quantized_boxes
+    def dequantize(self, boxes: torch.Tensor, size):
+        bins_w, bins_h = self.bins  # Quantization bins.
+        size_w, size_h = size       # Original image size.
+        size_per_bin_w = size_w / bins_w
+        size_per_bin_h = size_h / bins_h
+        xmin, ymin, xmax, ymax = boxes.split(1, dim=-1)  # Shape: 4 * [N, 1].
+        if self.mode == 'floor':
+            # Add 0.5 to use the center position of the bin as the coordinate.
+            dequantized_xmin = (xmin + 0.5) * size_per_bin_w
+            dequantized_ymin = (ymin + 0.5) * size_per_bin_h
+            dequantized_xmax = (xmax + 0.5) * size_per_bin_w
+            dequantized_ymax = (ymax + 0.5) * size_per_bin_h
+        elif self.mode == 'round':
+            raise NotImplementedError()
+        else:
+            raise ValueError('Incorrect quantization type.')
+        dequantized_boxes = torch.cat(
+            (dequantized_xmin, dequantized_ymin,
+             dequantized_xmax, dequantized_ymax), dim=-1
+        )
+        return dequantized_boxes
+class CoordinatesQuantizer(object):
+    """
+    Quantize coornidates (Nx2)
+    """
+    def __init__(self, mode, bins):
+        self.mode = mode
+        self.bins = bins
+    def quantize(self, coordinates: torch.Tensor, size):
+        bins_w, bins_h = self.bins  # Quantization bins.
+        size_w, size_h = size       # Original image size.
+        size_per_bin_w = size_w / bins_w
+        size_per_bin_h = size_h / bins_h
+        assert coordinates.shape[-1] == 2, 'coordinates should be shape (N, 2)'
+        x, y = coordinates.split(1, dim=-1)  # Shape: 4 * [N, 1].
+        if self.mode == 'floor':
+            quantized_x = (x / size_per_bin_w).floor().clamp(0, bins_w - 1)
+            quantized_y = (y / size_per_bin_h).floor().clamp(0, bins_h - 1)
+        elif self.mode == 'round':
+            raise NotImplementedError()
+        else:
+            raise ValueError('Incorrect quantization type.')
+        quantized_coordinates = torch.cat(
+            (quantized_x, quantized_y), dim=-1
+        ).int()
+        return quantized_coordinates
+    def dequantize(self, coordinates: torch.Tensor, size):
+        bins_w, bins_h = self.bins  # Quantization bins.
+        size_w, size_h = size       # Original image size.
+        size_per_bin_w = size_w / bins_w
+        size_per_bin_h = size_h / bins_h
+        assert coordinates.shape[-1] == 2, 'coordinates should be shape (N, 2)'
+        x, y = coordinates.split(1, dim=-1)  # Shape: 4 * [N, 1].
+        if self.mode == 'floor':
+            # Add 0.5 to use the center position of the bin as the coordinate.
+            dequantized_x = (x + 0.5) * size_per_bin_w
+            dequantized_y = (y + 0.5) * size_per_bin_h
+        elif self.mode == 'round':
+            raise NotImplementedError()
+        else:
+            raise ValueError('Incorrect quantization type.')
+        dequantized_coordinates = torch.cat(
+            (dequantized_x, dequantized_y), dim=-1
+        )
+        return dequantized_coordinates
+class Florence2PostProcesser(object):
+    """
+    Florence-2 post process for converting text prediction to various tasks results.
+    Args:
+        config: A dict of configs.
+        tokenizer: A tokenizer for decoding text to spans.
+        sample config:
+            UNIFIED_POST_PROCESS:
+                # commom configs
+                NUM_BBOX_HEIGHT_BINS: 1000
+                NUM_BBOX_WIDTH_BINS: 1000
+                COORDINATES_HEIGHT_BINS: 1000
+                COORDINATES_WIDTH_BINS: 1000
+                # task specific configs, override the common configs
+                PRASE_TASKS:
+                    - TASK_NAME: 'video_dense_caption'
+                      PATTERN: 'r<time_(\d+)><time_(\d+)>([a-zA-Z0-9 ]+)'
+                      SCORE_MODE: 'avg_cat_name_scores'
+                      NUM_BINS: 100
+                    - TASK_NAME: 'od'
+                      PATTERN: 'r<loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)>([a-zA-Z0-9 ]+)'
+                      SCORE_MODE: 'avg_cat_name_scores'
+    Returns:
+        parsed_dict (dict): A dict of parsed results.
+    """
+    def __init__(
+        self,
+        tokenizer=None
+    ):
+        parse_tasks = []
+        parse_task_configs = {}
+        config = self._create_default_config()
+        for task in config['PARSE_TASKS']:
+            parse_tasks.append(task['TASK_NAME'])
+            parse_task_configs[task['TASK_NAME']] = task
+        self.config = config
+        self.parse_tasks = parse_tasks
+        self.parse_tasks_configs = parse_task_configs
+        self.tokenizer =  tokenizer
+        if self.tokenizer is not None:
+            self.all_special_tokens = set(self.tokenizer.all_special_tokens)
+        self.init_quantizers()
+        self.black_list_of_phrase_grounding = self._create_black_list_of_phrase_grounding()
+    def _create_black_list_of_phrase_grounding(self):
+        black_list = {}
+        if 'phrase_grounding' in self.parse_tasks and self.parse_tasks_configs['phrase_grounding']['FILTER_BY_BLACK_LIST']:
+            black_list =  set(
+                ['it', 'I', 'me', 'mine',
+                 'you', 'your', 'yours',
+                 'he', 'him', 'his',
+                 'she', 'her', 'hers',
+                 'they', 'them', 'their', 'theirs',
+                 'one', 'oneself',
+                 'we', 'us', 'our', 'ours',
+                 'you', 'your', 'yours',
+                 'they', 'them', 'their', 'theirs',
+                 'mine', 'yours', 'his', 'hers', 'its',
+                 'ours', 'yours', 'theirs',
+                 'myself', 'yourself', 'himself', 'herself', 'itself',
+                 'ourselves', 'yourselves', 'themselves',
+                 'this', 'that',
+                 'these', 'those',
+                 'who', 'whom', 'whose', 'which', 'what',
+                 'who', 'whom', 'whose', 'which', 'that',
+                 'all', 'another', 'any', 'anybody', 'anyone', 'anything',
+                 'each', 'everybody', 'everyone', 'everything',
+                 'few', 'many', 'nobody', 'none', 'one', 'several',
+                 'some', 'somebody', 'someone', 'something',
+                 'each other', 'one another',
+                 'myself', 'yourself', 'himself', 'herself', 'itself',
+                 'ourselves', 'yourselves', 'themselves',
+                 'the image', 'image', 'images', 'the', 'a', 'an', 'a group',
+                 'other objects', 'lots', 'a set',
+                 ]
+            )
+        return black_list
+    def _create_default_config(self):
+        config = {
+            'NUM_BBOX_HEIGHT_BINS': 1000,
+            'NUM_BBOX_WIDTH_BINS': 1000,
+            'BOX_QUANTIZATION_MODE': 'floor',
+            'COORDINATES_HEIGHT_BINS': 1000,
+            'COORDINATES_WIDTH_BINS': 1000,
+            'COORDINATES_QUANTIZATION_MODE': 'floor',
+            'PARSE_TASKS': [
+                {
+                    'TASK_NAME': 'od',
+                    'PATTERN': r'([a-zA-Z0-9 ]+)<loc_(\\d+)><loc_(\\d+)><loc_(\\d+)><loc_(\\d+)>'
+                },
+                {
+                    'TASK_NAME': 'ocr',
+                    'PATTERN':  r'(.+?)<loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)>',
+                    'AREA_THRESHOLD': 0.00
+                },
+                {
+                    'TASK_NAME': 'phrase_grounding',
+                    'FILTER_BY_BLACK_LIST': True
+                },
+                {
+                    'TASK_NAME': 'pure_text',
+                },
+                {
+                    'TASK_NAME': 'description_with_bboxes',
+                },
+                {
+                    'TASK_NAME': 'description_with_polygons',
+                },
+                {
+                    'TASK_NAME': 'polygons',
+                },
+                {
+                    'TASK_NAME': 'bboxes',
+                },
+                {
+                    'TASK_NAME': 'description_with_bboxes_or_polygons',
+                }
+            ]
+        }
+        return config
+    def init_quantizers(self):
+        # we have box_quantizer (od, grounding) and coordinates_quantizer (ocr, referring_segmentation)
+        num_bbox_height_bins = self.config.get('NUM_BBOX_HEIGHT_BINS', 1000)
+        num_bbox_width_bins = self.config.get('NUM_BBOX_WIDTH_BINS', 1000)
+        box_quantization_mode = self.config.get('BOX_QUANTIZATION_MODE', 'floor')
+        self.box_quantizer = BoxQuantizer(
+            box_quantization_mode,
+            (num_bbox_width_bins, num_bbox_height_bins),
+        )
+        num_bbox_height_bins = self.config['COORDINATES_HEIGHT_BINS'] if 'COORDINATES_HEIGHT_BINS' in self.config else self.config.get('NUM_BBOX_HEIGHT_BINS', 1000)
+        num_bbox_width_bins = self.config['COORDINATES_WIDTH_BINS'] if 'COORDINATES_WIDTH_BINS' in self.config else self.config.get('NUM_BBOX_WIDTH_BINS', 1000)
+        box_quantization_mode = self.config.get('COORDINATES_QUANTIZATION_MODE') if 'COORDINATES_QUANTIZATION_MODE' in self.config else self.config.get('BOX_QUANTIZATION_MODE', 'floor')
+        self.coordinates_quantizer = CoordinatesQuantizer(
+            box_quantization_mode,
+            (num_bbox_width_bins, num_bbox_height_bins),
+        )
+    def decode_with_spans(self, tokenizer, token_ids):
+        filtered_tokens = tokenizer.convert_ids_to_tokens(
+            token_ids, skip_special_tokens=False)
+        assert len(filtered_tokens) == len(token_ids)
+        # To avoid mixing byte-level and unicode for byte-level BPT
+        # we need to build string separately for added tokens and byte-level tokens
+        # cf. https://github.com/huggingface/transformers/issues/1133
+        sub_texts = []
+        for token in filtered_tokens:
+            if token in self.all_special_tokens:
+                sub_texts.append(token)
+            else:
+                if isinstance(tokenizer, (BartTokenizer, BartTokenizerFast)):
+                    sub_text = tokenizer.convert_tokens_to_string([token])
+                elif isinstance(tokenizer, (T5Tokenizer, T5TokenizerFast)):
+                    # Ref: https://github.com/google/sentencepiece#whitespace-is-treated-as-a-basic-symbol
+                    # Note: Do not strip sub_text as it may have functional whitespace
+                    sub_text = token.replace('▁', ' ')
+                else:
+                    raise ValueError(f'type {type(tokenizer)} not supported')
+                sub_texts.append(sub_text)
+        text = ''
+        spans = []
+        for sub_text in sub_texts:
+            span = (len(text), len(text) + len(sub_text))  # [start index, end index).
+            text += sub_text
+            spans.append(span)
+        # Text format:
+        # 1. T5Tokenizer/T5TokenizerFast:
+        #      "<loc_1><loc_2><loc_3><loc_4> transplanting dog<loc_1><loc_2><loc_3><loc_4> cat</s>"
+        #    Equivalent to t5_tokenizer.decode(input_ids, skip_special_tokens=False, clean_up_tokenization_spaces=False, spaces_between_special_tokens=False)
+        # 2. BartTokenizer (need to double check):
+        #      "<s><loc_1><loc_2><loc_3><loc_4>transplanting dog<loc_1><loc_2><loc_3><loc_4>cat</s>"
+        #    Equivalent to bart_tokenizer.decode(input_ids, skip_special_tokens=False, clean_up_tokenization_spaces=False, spaces_between_special_tokens=False)
+        return text, spans
+    def parse_od_from_text_and_spans(
+        self,
+        text,
+        pattern,
+        image_size,
+        phrase_centric=False
+    ):
+        parsed = list(re.finditer(pattern, text))
+        instances = []
+        for i in range(len(parsed)):
+            # Prepare instance.
+            instance = {}
+            if phrase_centric:
+                bbox_bins = [int(parsed[i].group(j)) for j in range(2, 6)]
+            else:
+                bbox_bins = [int(parsed[i].group(j)) for j in range(1, 5)]
+            instance['bbox'] = self.box_quantizer.dequantize(
+                boxes=torch.tensor(bbox_bins),
+                size=image_size
+            ).tolist()
+            if phrase_centric:
+                instance['cat_name'] = parsed[i].group(1).lower().strip()
+            else:
+                instance['cat_name'] = parsed[i].group(5).lower().strip()
+            instances.append(instance)
+        return instances
+    def parse_ocr_from_text_and_spans(self,
+                                    text,
+                                     pattern,
+                                     image_size,
+                                     area_threshold=-1.0,
+        ):
+        bboxes = []
+        labels = []
+        text = text.replace('<s>', '')
+        # ocr with regions
+        parsed = re.findall(pattern, text)
+        instances = []
+        image_width, image_height = image_size
+        for ocr_line in parsed:
+            ocr_content = ocr_line[0]
+            quad_box = ocr_line[1:]
+            quad_box = [int(i) for i in quad_box]
+            quad_box = self.coordinates_quantizer.dequantize(
+                torch.tensor(np.array(quad_box).reshape(-1, 2)),
+                size=image_size
+            ).reshape(-1).tolist()
+            if area_threshold > 0:
+                x_coords = [i for i in quad_box[0::2]]
+                y_coords = [i for i in quad_box[1::2]]
+                # apply the Shoelace formula
+                area = 0.5 * abs(sum(x_coords[i] * y_coords[i + 1] - x_coords[i + 1] * y_coords[i] for i in range(4 - 1)))
+                if area < (image_width * image_height) * area_threshold:
+                    continue
+            bboxes.append(quad_box)
+            labels.append(ocr_content)
+            instances.append({
+                'quad_box': quad_box,
+                'text': ocr_content,
+            })
+        return instances
+    def parse_phrase_grounding_from_text_and_spans(self, text, pattern, image_size):
+        # ignore <s> </s> and <pad>
+        cur_span = 0
+        if text.startswith('<s>'):
+            cur_span += 3
+        text = text.replace('<s>', '')
+        text = text.replace('</s>', '')
+        text = text.replace('<pad>', '')
+        pattern = r"([^<]+(?:<loc_\d+>){4,})"
+        phrases = re.findall(pattern, text)
+        # pattern should be text pattern and od pattern
+        pattern = r'^\s*(.*?)(?=<od>|</od>|<box>|</box>|<bbox>|</bbox>|<loc_)'
+        box_pattern = r'<loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)>'
+        instances = []
+        for pharse_text in phrases:
+            phrase_text_strip = pharse_text.replace('<ground>', '', 1)
+            phrase_text_strip = pharse_text.replace('<obj>', '', 1)
+            if phrase_text_strip == '':
+                cur_span += len(pharse_text)
+                continue
+            # Prepare instance.
+            instance = {}
+            # parse phrase, get string
+            phrase = re.search(pattern, phrase_text_strip)
+            if phrase is None:
+                cur_span += len(pharse_text)
+                continue
+            # parse bboxes by box_pattern
+            bboxes_parsed = list(re.finditer(box_pattern, pharse_text))
+            if len(bboxes_parsed) == 0:
+                cur_span += len(pharse_text)
+                continue
+            phrase = phrase.group()
+            # remove leading and trailing spaces
+            phrase = phrase.strip()
+            if phrase in self.black_list_of_phrase_grounding:
+                cur_span += len(pharse_text)
+                continue
+            # a list of list
+            bbox_bins = [[int(_bboxes_parsed.group(j)) for j in range(1, 5)] for _bboxes_parsed in bboxes_parsed]
+            instance['bbox'] = self.box_quantizer.dequantize(
+                boxes=torch.tensor(bbox_bins),
+                size=image_size
+            ).tolist()
+            # exclude non-ascii characters
+            phrase = phrase.encode('ascii',errors='ignore').decode('ascii')
+            instance['cat_name'] = phrase
+            instances.append(instance)
+        return instances
+    def parse_description_with_bboxes_from_text_and_spans(self, text, pattern, image_size, allow_empty_phrase=False):
+        # temporary parse solution, split by '.'
+        # ignore <s> </s> and <pad>
+        text = text.replace('<s>', '')
+        text = text.replace('</s>', '')
+        text = text.replace('<pad>', '')
+        if allow_empty_phrase:
+            pattern = rf"(?:(?:<loc_\d+>){{4,}})"
+        else:
+            pattern = r"([^<]+(?:<loc_\d+>){4,})"
+        phrases = re.findall(pattern, text)
+        # pattern should be text pattern and od pattern
+        pattern = r'^\s*(.*?)(?=<od>|</od>|<box>|</box>|<bbox>|</bbox>|<loc_)'
+        box_pattern = r'<loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)>'
+        instances = []
+        for pharse_text in phrases:
+            phrase_text_strip = pharse_text.replace('<ground>', '', 1)
+            phrase_text_strip = pharse_text.replace('<obj>', '', 1)
+            if phrase_text_strip == '' and not allow_empty_phrase:
+                continue
+            # parse phrase, get string
+            phrase = re.search(pattern, phrase_text_strip)
+            if phrase is None:
+                continue
+            phrase = phrase.group()
+            # remove leading and trailing spaces
+            phrase = phrase.strip()
+            # parse bboxes by box_pattern
+            bboxes_parsed = list(re.finditer(box_pattern, pharse_text))
+            if len(bboxes_parsed) == 0:
+                continue
+            # a list of list
+            bbox_bins = [[int(_bboxes_parsed.group(j)) for j in range(1, 5)] for _bboxes_parsed in bboxes_parsed]
+            bboxes = self.box_quantizer.dequantize(
+                boxes=torch.tensor(bbox_bins),
+                size=image_size
+            ).tolist()
+            phrase = phrase.encode('ascii',errors='ignore').decode('ascii')
+            for _bboxes in bboxes:
+                # Prepare instance.
+                instance = {}
+                instance['bbox'] = _bboxes
+                # exclude non-ascii characters
+                instance['cat_name'] = phrase
+                instances.append(instance)
+        return instances
+    def parse_description_with_polygons_from_text_and_spans(self, text, pattern, image_size,
+                                                            allow_empty_phrase=False,
+                                                            polygon_sep_token='<sep>',
+                                                            polygon_start_token='<poly>',
+                                                            polygon_end_token='</poly>',
+                                                            with_box_at_start=False,
+                                                            ):
+        # ref_seg format: '<expression><x1><y1><x2><y2><><><sep><><><><>'
+        # ignore <s> </s> and <pad>
+        text = text.replace('<s>', '')
+        text = text.replace('</s>', '')
+        text = text.replace('<pad>', '')
+        if allow_empty_phrase:
+            pattern = rf"(?:(?:<loc_\d+>|{re.escape(polygon_sep_token)}|{re.escape(polygon_start_token)}|{re.escape(polygon_end_token)}){{4,}})"
+        else:
+            # [^<]+: This part matches one or more characters that are not the < symbol.
+            # The ^ inside the square brackets [] is a negation, meaning it matches anything except <.
+            #
+            pattern = rf"([^<]+(?:<loc_\d+>|{re.escape(polygon_sep_token)}|{re.escape(polygon_start_token)}|{re.escape(polygon_end_token)}){{4,}})"
+        phrases = re.findall(pattern, text)
+        phrase_string_pattern = r'^\s*(.*?)(?=<od>|</od>|<box>|</box>|<bbox>|</bbox>|<loc_|<poly>)'
+        box_pattern =  rf'((?:<loc_\d+>)+)(?:{re.escape(polygon_sep_token)}|$)'
+        # one polygons instance is separated by polygon_start_token and polygon_end_token
+        polygons_instance_pattern = rf'{re.escape(polygon_start_token)}(.*?){re.escape(polygon_end_token)}'
+        instances = []
+        for phrase_text in phrases:
+            # exclude loc_\d+>
+            # need to get span if want to include category score
+            phrase_text_strip = re.sub(r'^loc_\d+>', '', phrase_text, count=1)
+            # phrase = phrase.replace('<poly>', '')
+            # phrase = phrase.replace('poly>', '')
+            if phrase_text_strip == '' and not allow_empty_phrase:
+                continue
+            # parse phrase, get string
+            phrase = re.search(phrase_string_pattern, phrase_text_strip)
+            if phrase is None:
+                continue
+            phrase = phrase.group()
+            # remove leading and trailing spaces
+            phrase = phrase.strip()
+            # parse bboxes by box_pattern
+            # split by polygon_start_token and polygon_end_token first using polygons_instance_pattern
+            if polygon_start_token in phrase_text and polygon_end_token in phrase_text:
+                polygons_instances_parsed = list(re.finditer(polygons_instance_pattern, phrase_text))
+            else:
+                polygons_instances_parsed = [phrase_text]
+            for _polygons_instances_parsed in polygons_instances_parsed:
+                # Prepare instance.
+                instance = {}
+                # polygons_parsed= list(re.finditer(box_pattern, phrase_text))
+                if isinstance(_polygons_instances_parsed, str):
+                    polygons_parsed= list(re.finditer(box_pattern, _polygons_instances_parsed))
+                else:
+                    polygons_parsed= list(re.finditer(box_pattern, _polygons_instances_parsed.group(1)))
+                if len(polygons_parsed) == 0:
+                    continue
+                # a list of list (polygon)
+                bbox = []
+                polygons = []
+                for _polygon_parsed in polygons_parsed:
+                    # group 1: whole <loc_\d+>...</loc_\d+>
+                    _polygon = _polygon_parsed.group(1)
+                    # parse into list of int
+                    _polygon = [int(_loc_parsed.group(1)) for _loc_parsed in re.finditer(r'<loc_(\d+)>', _polygon)]
+                    if with_box_at_start and len(bbox) == 0:
+                        if len(_polygon) > 4:
+                            # no valid bbox prediction
+                            bbox = _polygon[:4]
+                            _polygon = _polygon[4:]
+                        else:
+                            bbox = [0, 0, 0, 0]
+                    # abandon last element if is not paired
+                    if len(_polygon) % 2 == 1:
+                        _polygon = _polygon[:-1]
+                    # reshape into (n, 2)
+                    _polygon = self.coordinates_quantizer.dequantize(
+                        torch.tensor(np.array(_polygon).reshape(-1, 2)),
+                        size=image_size
+                    ).reshape(-1).tolist()
+                    # reshape back
+                    polygons.append(_polygon)
+                instance['cat_name'] = phrase
+                instance['polygons'] = polygons
+                if len(bbox) != 0:
+                    instance['bbox'] = self.box_quantizer.dequantize(
+                        boxes=torch.tensor([bbox]),
+                        size=image_size
+                    ).tolist()[0]
+                instances.append(instance)
+        return instances
+    def __call__(
+        self,
+        text=None,
+        image_size=None,
+        parse_tasks=None,
+    ):
+        """
+        Args:
+            text: model outputs
+            image_size: (width, height)
+            parse_tasks: a list of tasks to parse, if None, parse all tasks.
+        """
+        if parse_tasks is not None:
+            if isinstance(parse_tasks, str):
+                parse_tasks = [parse_tasks]
+            for _parse_task in parse_tasks:
+                assert _parse_task in self.parse_tasks, f'parse task {_parse_task} not supported'
+        # sequence or text should be provided
+        assert text is not None, 'text should be provided'
+        parsed_dict = {
+            'text': text
+        }
+        for task in self.parse_tasks:
+            if parse_tasks is not None and task not in parse_tasks:
+                continue
+            pattern = self.parse_tasks_configs[task].get('PATTERN', None)
+            if task == 'ocr':
+                instances = self.parse_ocr_from_text_and_spans(
+                    text,
+                    pattern=pattern,
+                    image_size=image_size,
+                    area_threshold=self.parse_tasks_configs[task].get('AREA_THRESHOLD', 0.0),
+                )
+                parsed_dict['ocr'] = instances
+            elif task == 'phrase_grounding':
+                instances = self.parse_phrase_grounding_from_text_and_spans(
+                    text,
+                    pattern=pattern,
+                    image_size=image_size,
+                )
+                parsed_dict['phrase_grounding'] = instances
+            elif task == 'pure_text':
+                parsed_dict['pure_text'] = text
+            elif task == 'description_with_bboxes':
+                instances = self.parse_description_with_bboxes_from_text_and_spans(
+                    text,
+                    pattern=pattern,
+                    image_size=image_size,
+                )
+                parsed_dict['description_with_bboxes'] = instances
+            elif task == 'description_with_polygons':
+                instances = self.parse_description_with_polygons_from_text_and_spans(
+                    text,
+                    pattern=pattern,
+                    image_size=image_size,
+                )
+                parsed_dict['description_with_polygons'] = instances
+            elif task == 'polygons':
+                instances = self.parse_description_with_polygons_from_text_and_spans(
+                    text,
+                    pattern=pattern,
+                    image_size=image_size,
+                    allow_empty_phrase=True,
+                )
+                parsed_dict['polygons'] = instances
+            elif task == 'bboxes':
+                instances = self.parse_description_with_bboxes_from_text_and_spans(
+                    text,
+                    pattern=pattern,
+                    image_size=image_size,
+                    allow_empty_phrase=True,
+                )
+                parsed_dict['bboxes'] = instances
+            elif task == 'description_with_bboxes_or_polygons':
+                if '<poly>' in text:
+                    # only support either polygons or bboxes, not both at the same time
+                    instances = self.parse_description_with_polygons_from_text_and_spans(
+                        text,
+                        pattern=pattern,
+                        image_size=image_size,
+                    )
+                else:
+                    instances = self.parse_description_with_bboxes_from_text_and_spans(
+                        text,
+                        pattern=pattern,
+                        image_size=image_size,
+                    )
+                parsed_dict['description_with_bboxes_or_polygons'] = instances
+            else:
+                raise ValueError("task {} is not supported".format(task))
+        return parsed_dict

florence2/base/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

florence2/base/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "model_max_length": 1024
+}

florence2/base/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

florence2/large-ft/LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+    MIT License
+    Copyright (c) Microsoft Corporation.
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to deal
+    in the Software without restriction, including without limitation the rights
+    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+    copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+    The above copyright notice and this permission notice shall be included in all
+    copies or substantial portions of the Software.
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+    SOFTWARE

florence2/large-ft/config.json ADDED Viewed

	@@ -0,0 +1,85 @@

+{
+  "_name_or_path": "florence2",
+  "architectures": [
+    "Florence2ForConditionalGeneration"
+  ],
+  "auto_map": {
+    "AutoConfig": "configuration_florence2.Florence2Config",
+    "AutoModelForCausalLM": "modeling_florence2.Florence2ForConditionalGeneration"
+  },
+  "bos_token_id": 0,
+  "eos_token_id": 2,
+  "ignore_index": -100,
+  "model_type": "florence2",
+  "pad_token_id": 1,
+  "projection_dim": 1024,
+  "text_config": {
+      "vocab_size": 51289,
+      "activation_dropout": 0.1,
+      "activation_function": "gelu",
+      "add_bias_logits": false,
+      "add_final_layer_norm": false,
+      "attention_dropout": 0.1,
+      "bos_token_id": 0,
+      "classif_dropout": 0.1,
+      "classifier_dropout": 0.0,
+      "d_model": 1024,
+      "decoder_attention_heads": 16,
+      "decoder_ffn_dim": 4096,
+      "decoder_layerdrop": 0.0,
+      "decoder_layers": 12,
+      "decoder_start_token_id": 2,
+      "dropout": 0.1,
+      "early_stopping": true,
+      "encoder_attention_heads": 16,
+      "encoder_ffn_dim": 4096,
+      "encoder_layerdrop": 0.0,
+      "encoder_layers": 12,
+      "eos_token_id": 2,
+      "forced_eos_token_id": 2,
+      "forced_bos_token_id": 0,
+      "gradient_checkpointing": false,
+      "init_std": 0.02,
+      "is_encoder_decoder": true,
+      "label2id": {
+        "LABEL_0": 0,
+        "LABEL_1": 1,
+        "LABEL_2": 2
+      },
+      "max_position_embeddings": 1024,
+      "no_repeat_ngram_size": 3,
+      "normalize_before": false,
+      "num_hidden_layers": 12,
+      "pad_token_id": 1,
+      "scale_embedding": false,
+      "num_beams": 3
+  },
+  "vision_config": {
+    "model_type": "davit",
+    "drop_path_rate": 0.1,
+    "patch_size": [7, 3, 3, 3],
+    "patch_stride": [4, 2, 2, 2],
+    "patch_padding": [3, 1, 1, 1],
+    "patch_prenorm": [false, true, true, true],
+    "enable_checkpoint": false,
+    "dim_embed": [256, 512, 1024, 2048],
+    "num_heads": [8, 16, 32, 64],
+    "num_groups": [8, 16, 32, 64],
+    "depths": [1, 1, 9, 1],
+    "window_size": 12,
+    "projection_dim": 1024,
+    "visual_temporal_embedding": {
+        "type": "COSINE",
+        "max_temporal_embeddings": 100
+    },
+    "image_pos_embed": {
+        "type": "learned_abs_2d",
+        "max_pos_embeddings": 50
+    },
+    "image_feature_source": ["spatial_avg_pool", "temporal_avg_pool"]
+  },
+  "vocab_size": 51289,
+  "torch_dtype": "float16",
+  "transformers_version": "4.41.0.dev0",
+  "is_encoder_decoder": true
+}

florence2/large-ft/configuration_florence2.py ADDED Viewed

	@@ -0,0 +1,340 @@

+# coding=utf-8
+# Copyright 2024 Microsoft and the HuggingFace Inc. team. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import warnings
+""" Florence-2 configuration"""
+from typing import Optional
+from transformers import AutoConfig
+from transformers.configuration_utils import PretrainedConfig
+from transformers.utils import logging
+logger = logging.get_logger(__name__)
+class Florence2VisionConfig(PretrainedConfig):
+    r"""
+    This is the configuration class to store the configuration of a [`Florence2VisionModel`]. It is used to instantiate a Florence2VisionModel
+    according to the specified arguments, defining the model architecture. Instantiating a configuration with the
+    defaults will yield a similar configuration to that of the Florence2VisionModel architecture.
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
+    documentation from [`PretrainedConfig`] for more information.
+    Args:
+        drop_path_rate (`float`, *optional*, defaults to 0.1):
+            The dropout rate of the drop path layer.
+        patch_size (`List[int]`, *optional*, defaults to [7, 3, 3, 3]):
+            The patch size of the image.
+        patch_stride (`List[int]`, *optional*, defaults to [4, 2, 2, 2]):
+            The patch stride of the image.
+        patch_padding (`List[int]`, *optional*, defaults to [3, 1, 1, 1]):
+            The patch padding of the image.
+        patch_prenorm (`List[bool]`, *optional*, defaults to [false, true, true, true]):
+            Whether to apply layer normalization before the patch embedding layer.
+        enable_checkpoint (`bool`, *optional*, defaults to False):
+            Whether to enable checkpointing.
+        dim_embed (`List[int]`, *optional*, defaults to [256, 512, 1024, 2048]):
+            The dimension of the embedding layer.
+        num_heads (`List[int]`, *optional*, defaults to [8, 16, 32, 64]):
+            The number of attention heads.
+        num_groups (`List[int]`, *optional*, defaults to [8, 16, 32, 64]):
+            The number of groups.
+        depths (`List[int]`, *optional*, defaults to [1, 1, 9, 1]):
+            The depth of the model.
+        window_size (`int`, *optional*, defaults to 12):
+            The window size of the model.
+        projection_dim (`int`, *optional*, defaults to 1024):
+            The dimension of the projection layer.
+        visual_temporal_embedding (`dict`, *optional*):
+            The configuration of the visual temporal embedding.
+        image_pos_embed (`dict`, *optional*):
+            The configuration of the image position embedding.
+        image_feature_source (`List[str]`, *optional*, defaults to ["spatial_avg_pool", "temporal_avg_pool"]):
+            The source of the image feature.
+    Example:
+    ```python
+    >>> from transformers import Florence2VisionConfig, Florence2VisionModel
+    >>> # Initializing a Florence2 Vision style configuration
+    >>> configuration = Florence2VisionConfig()
+    >>> # Initializing a model (with random weights)
+    >>> model = Florence2VisionModel(configuration)
+    >>> # Accessing the model configuration
+    >>> configuration = model.config
+    ```"""
+    model_type = "florence2_vision"
+    keys_to_ignore_at_inference = ["past_key_values"]
+    def __init__(
+        self,
+        drop_path_rate=0.1,
+        patch_size=[7, 3, 3, 3],
+        patch_stride=[4, 2, 2, 2],
+        patch_padding=[3, 1, 1, 1],
+        patch_prenorm=[False, True, True, True],
+        enable_checkpoint=False,
+        dim_embed=[256, 512, 1024, 2048],
+        num_heads=[8, 16, 32, 64],
+        num_groups=[8, 16, 32, 64],
+        depths=[1, 1, 9, 1],
+        window_size=12,
+        projection_dim=1024,
+        visual_temporal_embedding=None,
+        image_pos_embed=None,
+        image_feature_source=["spatial_avg_pool", "temporal_avg_pool"],
+        **kwargs,
+    ):
+        self.drop_path_rate = drop_path_rate
+        self.patch_size = patch_size
+        self.patch_stride = patch_stride
+        self.patch_padding = patch_padding
+        self.patch_prenorm = patch_prenorm
+        self.enable_checkpoint = enable_checkpoint
+        self.dim_embed = dim_embed
+        self.num_heads = num_heads
+        self.num_groups = num_groups
+        self.depths = depths
+        self.window_size = window_size
+        self.projection_dim = projection_dim
+        self.visual_temporal_embedding = visual_temporal_embedding
+        self.image_pos_embed = image_pos_embed
+        self.image_feature_source = image_feature_source
+        super().__init__(**kwargs)
+class Florence2LanguageConfig(PretrainedConfig):
+    r"""
+    This is the configuration class to store the configuration of a [`Florence2LanguagePreTrainedModel`]. It is used to instantiate a BART
+    model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
+    defaults will yield a similar configuration to that of the BART
+    [facebook/bart-large](https://huggingface.co/facebook/bart-large) architecture.
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
+    documentation from [`PretrainedConfig`] for more information.
+    Args:
+        vocab_size (`int`, *optional*, defaults to 51289):
+            Vocabulary size of the Florence2Language model. Defines the number of different tokens that can be represented by the
+            `inputs_ids` passed when calling [`Florence2LanguageModel`].
+        d_model (`int`, *optional*, defaults to 1024):
+            Dimensionality of the layers and the pooler layer.
+        encoder_layers (`int`, *optional*, defaults to 12):
+            Number of encoder layers.
+        decoder_layers (`int`, *optional*, defaults to 12):
+            Number of decoder layers.
+        encoder_attention_heads (`int`, *optional*, defaults to 16):
+            Number of attention heads for each attention layer in the Transformer encoder.
+        decoder_attention_heads (`int`, *optional*, defaults to 16):
+            Number of attention heads for each attention layer in the Transformer decoder.
+        decoder_ffn_dim (`int`, *optional*, defaults to 4096):
+            Dimensionality of the "intermediate" (often named feed-forward) layer in decoder.
+        encoder_ffn_dim (`int`, *optional*, defaults to 4096):
+            Dimensionality of the "intermediate" (often named feed-forward) layer in decoder.
+        activation_function (`str` or `function`, *optional*, defaults to `"gelu"`):
+            The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
+            `"relu"`, `"silu"` and `"gelu_new"` are supported.
+        dropout (`float`, *optional*, defaults to 0.1):
+            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
+        attention_dropout (`float`, *optional*, defaults to 0.0):
+            The dropout ratio for the attention probabilities.
+        activation_dropout (`float`, *optional*, defaults to 0.0):
+            The dropout ratio for activations inside the fully connected layer.
+        classifier_dropout (`float`, *optional*, defaults to 0.0):
+            The dropout ratio for classifier.
+        max_position_embeddings (`int`, *optional*, defaults to 1024):
+            The maximum sequence length that this model might ever be used with. Typically set this to something large
+            just in case (e.g., 512 or 1024 or 2048).
+        init_std (`float`, *optional*, defaults to 0.02):
+            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
+        encoder_layerdrop (`float`, *optional*, defaults to 0.0):
+            The LayerDrop probability for the encoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
+            for more details.
+        decoder_layerdrop (`float`, *optional*, defaults to 0.0):
+            The LayerDrop probability for the decoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
+            for more details.
+        scale_embedding (`bool`, *optional*, defaults to `False`):
+            Scale embeddings by diving by sqrt(d_model).
+        use_cache (`bool`, *optional*, defaults to `True`):
+            Whether or not the model should return the last key/values attentions (not used by all models).
+        num_labels (`int`, *optional*, defaults to 3):
+            The number of labels to use in [`Florence2LanguageForSequenceClassification`].
+        forced_eos_token_id (`int`, *optional*, defaults to 2):
+            The id of the token to force as the last generated token when `max_length` is reached. Usually set to
+            `eos_token_id`.
+    Example:
+    ```python
+    >>> from transformers import Florence2LanguageConfig, Florence2LanguageModel
+    >>> # Initializing a Florence2 Language style configuration
+    >>> configuration = Florence2LanguageConfig()
+    >>> # Initializing a model (with random weights)
+    >>> model = Florence2LangaugeModel(configuration)
+    >>> # Accessing the model configuration
+    >>> configuration = model.config
+    ```"""
+    model_type = "florence2_language"
+    keys_to_ignore_at_inference = ["past_key_values"]
+    attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"}
+    def __init__(
+        self,
+        vocab_size=51289,
+        max_position_embeddings=1024,
+        encoder_layers=12,
+        encoder_ffn_dim=4096,
+        encoder_attention_heads=16,
+        decoder_layers=12,
+        decoder_ffn_dim=4096,
+        decoder_attention_heads=16,
+        encoder_layerdrop=0.0,
+        decoder_layerdrop=0.0,
+        activation_function="gelu",
+        d_model=1024,
+        dropout=0.1,
+        attention_dropout=0.0,
+        activation_dropout=0.0,
+        init_std=0.02,
+        classifier_dropout=0.0,
+        scale_embedding=False,
+        use_cache=True,
+        num_labels=3,
+        pad_token_id=1,
+        bos_token_id=0,
+        eos_token_id=2,
+        is_encoder_decoder=True,
+        decoder_start_token_id=2,
+        forced_eos_token_id=2,
+        **kwargs,
+    ):
+        self.vocab_size = vocab_size
+        self.max_position_embeddings = max_position_embeddings
+        self.d_model = d_model
+        self.encoder_ffn_dim = encoder_ffn_dim
+        self.encoder_layers = encoder_layers
+        self.encoder_attention_heads = encoder_attention_heads
+        self.decoder_ffn_dim = decoder_ffn_dim
+        self.decoder_layers = decoder_layers
+        self.decoder_attention_heads = decoder_attention_heads
+        self.dropout = dropout
+        self.attention_dropout = attention_dropout
+        self.activation_dropout = activation_dropout
+        self.activation_function = activation_function
+        self.init_std = init_std
+        self.encoder_layerdrop = encoder_layerdrop
+        self.decoder_layerdrop = decoder_layerdrop
+        self.classifier_dropout = classifier_dropout
+        self.use_cache = use_cache
+        self.num_hidden_layers = encoder_layers
+        self.scale_embedding = scale_embedding  # scale factor will be sqrt(d_model) if True
+        super().__init__(
+            num_labels=num_labels,
+            pad_token_id=pad_token_id,
+            bos_token_id=bos_token_id,
+            eos_token_id=eos_token_id,
+            is_encoder_decoder=is_encoder_decoder,
+            decoder_start_token_id=decoder_start_token_id,
+            forced_eos_token_id=forced_eos_token_id,
+            **kwargs,
+        )
+        # ensure backward compatibility for BART CNN models
+        if self.forced_bos_token_id is None and kwargs.get("force_bos_token_to_be_generated", False):
+            self.forced_bos_token_id = self.bos_token_id
+            warnings.warn(
+                f"Please make sure the config includes `forced_bos_token_id={self.bos_token_id}` in future versions. "
+                "The config can simply be saved and uploaded again to be fixed."
+            )
+class Florence2Config(PretrainedConfig):
+    r"""
+    This is the configuration class to store the configuration of a [`Florence2ForConditionalGeneration`]. It is used to instantiate an
+    Florence-2 model according to the specified arguments, defining the model architecture.
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
+    documentation from [`PretrainedConfig`] for more information.
+    Args:
+        vision_config (`Florence2VisionConfig`,  *optional*):
+            Custom vision config or dict
+        text_config (`Union[AutoConfig, dict]`, *optional*):
+            The config object of the text backbone.
+        ignore_index (`int`, *optional*, defaults to -100):
+            The ignore index for the loss function.
+        vocab_size (`int`, *optional*, defaults to 51289):
+            Vocabulary size of the Florence2model. Defines the number of different tokens that can be represented by the
+            `inputs_ids` passed when calling [`~Florence2ForConditionalGeneration`]
+        projection_dim (`int`, *optional*, defaults to 1024):
+            Dimension of the multimodal projection space.
+    Example:
+    ```python
+    >>> from transformers import Florence2ForConditionalGeneration, Florence2Config, CLIPVisionConfig, BartConfig
+    >>> # Initializing a clip-like vision config
+    >>> vision_config = CLIPVisionConfig()
+    >>> # Initializing a Bart config
+    >>> text_config = BartConfig()
+    >>> # Initializing a Florence-2 configuration
+    >>> configuration = Florence2Config(vision_config, text_config)
+    >>> # Initializing a model from the florence-2 configuration
+    >>> model = Florence2ForConditionalGeneration(configuration)
+    >>> # Accessing the model configuration
+    >>> configuration = model.config
+    ```"""
+    model_type = "florence2"
+    is_composition = False
+    def __init__(
+        self,
+        vision_config=None,
+        text_config=None,
+        ignore_index=-100,
+        vocab_size=51289,
+        projection_dim=1024,
+        **kwargs,
+    ):
+        self.ignore_index = ignore_index
+        self.vocab_size = vocab_size
+        self.projection_dim = projection_dim
+        if vision_config is not None:
+            vision_config = PretrainedConfig(**vision_config)
+        self.vision_config = vision_config
+        self.vocab_size = self.vocab_size
+        self.text_config = text_config
+        if text_config is not None:
+            self.text_config = Florence2LanguageConfig(**text_config)
+        super().__init__(**kwargs)

florence2/large-ft/generation_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "num_beams": 3,
+    "early_stopping": false
+}

florence2/large-ft/modeling_florence2.py ADDED Viewed

The diff for this file is too large to render. See raw diff

florence2/large-ft/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "auto_map": {
+    "AutoProcessor": "processing_florence2.Florence2Processor"
+   },
+  "_valid_processor_keys": [
+    "images",
+    "do_resize",
+    "size",
+    "resample",
+    "do_rescale",
+    "rescale_factor",
+    "do_normalize",
+    "image_mean",
+    "image_std",
+    "return_tensors",
+    "data_format",
+    "input_data_format",
+    "do_convert_rgb"
+  ],
+  "do_convert_rgb": null,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "do_center_crop": false,
+  "image_processor_type": "CLIPImageProcessor",
+  "image_seq_length": 577,
+  "image_mean": [0.485, 0.456, 0.406],
+  "image_std":  [0.229, 0.224, 0.225],
+  "processor_class": "Florence2Processor",
+  "resample": 3,
+  "size": {
+    "height": 768,
+    "width":768
+  },
+  "crop_size": {
+    "height": 768,
+    "width": 768
+  }
+}

florence2/large-ft/processing_florence2.py ADDED Viewed

	@@ -0,0 +1,1088 @@

+# coding=utf-8
+# Copyright 2024 Microsoft and The HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Processor class for Florence-2.
+"""
+import re
+import logging
+from typing import List, Optional, Union
+import numpy as np
+import torch
+from transformers.feature_extraction_utils import BatchFeature
+from transformers.image_utils import ImageInput, is_valid_image
+from transformers.processing_utils import ProcessorMixin
+from transformers.tokenization_utils_base import (
+    PaddingStrategy,
+    PreTokenizedInput,
+    TextInput,
+    TruncationStrategy,
+)
+from transformers.utils import TensorType
+logger = logging.getLogger(__name__)
+# Copied from transformers.models.idefics2.processing_idefics2.is_url
+def is_url(val) -> bool:
+    return isinstance(val, str) and val.startswith("http")
+# Copied from transformers.models.idefics2.processing_idefics2.is_image_or_image_url
+def is_image_or_image_url(elem):
+    return is_url(elem) or is_valid_image(elem)
+def _is_str_or_image(elem):
+    return isinstance(elem, (str)) or is_image_or_image_url(elem)
+class Florence2Processor(ProcessorMixin):
+    r"""
+    Constructs a Florence2 processor which wraps a Florence2 image processor and a Florence2 tokenizer into a single processor.
+    [`Florence2Processor`] offers all the functionalities of [`CLIPImageProcessor`] and [`BartTokenizerFast`]. See the
+    [`~Florence2Processor.__call__`] and [`~Florence2Processor.decode`] for more information.
+    Args:
+        image_processor ([`CLIPImageProcessor`], *optional*):
+            The image processor is a required input.
+        tokenizer ([`BartTokenizerFast`], *optional*):
+            The tokenizer is a required input.
+    """
+    attributes = ["image_processor", "tokenizer"]
+    image_processor_class = "CLIPImageProcessor"
+    tokenizer_class = ("BartTokenizer", "BartTokenizerFast")
+    def __init__(
+        self,
+        image_processor=None,
+        tokenizer=None,
+    ):
+        if image_processor is None:
+            raise ValueError("You need to specify an `image_processor`.")
+        if tokenizer is None:
+            raise ValueError("You need to specify a `tokenizer`.")
+        if not hasattr(image_processor, "image_seq_length"):
+            raise ValueError("Image processor is missing an `image_seq_length` attribute.")
+        self.image_seq_length = image_processor.image_seq_length
+        tokens_to_add = {
+                'additional_special_tokens': \
+                    tokenizer.additional_special_tokens + \
+                    ['<od>', '</od>', '<ocr>', '</ocr>'] + \
+                    [f'<loc_{x}>' for x in range(1000)] + \
+                    ['<cap>', '</cap>', '<ncap>', '</ncap>','<dcap>', '</dcap>', '<grounding>', '</grounding>', '<seg>', '</seg>', '<sep>', '<region_cap>', '</region_cap>', '<region_to_desciption>', '</region_to_desciption>', '<proposal>', '</proposal>', '<poly>', '</poly>', '<and>']
+            }
+        tokenizer.add_special_tokens(tokens_to_add)
+        self.tasks_answer_post_processing_type = {
+            '<OCR>': 'pure_text',
+            '<OCR_WITH_REGION>': 'ocr',
+            '<CAPTION>': 'pure_text',
+            '<DETAILED_CAPTION>': 'pure_text',
+            '<MORE_DETAILED_CAPTION>': 'pure_text',
+            '<OD>': 'description_with_bboxes',
+            '<DENSE_REGION_CAPTION>': 'description_with_bboxes',
+            '<CAPTION_TO_PHRASE_GROUNDING>': "phrase_grounding",
+            '<REFERRING_EXPRESSION_SEGMENTATION>': 'polygons',
+            '<REGION_TO_SEGMENTATION>': 'polygons',
+            '<OPEN_VOCABULARY_DETECTION>': 'description_with_bboxes_or_polygons',
+            '<REGION_TO_CATEGORY>': 'pure_text',
+            '<REGION_TO_DESCRIPTION>': 'pure_text',
+            '<REGION_TO_OCR>': 'pure_text',
+            '<REGION_PROPOSAL>': 'bboxes'
+        }
+        self.task_prompts_without_inputs = {
+            '<OCR>': 'What is the text in the image?',
+            '<OCR_WITH_REGION>': 'What is the text in the image, with regions?',
+            '<CAPTION>': 'What does the image describe?',
+            '<DETAILED_CAPTION>': 'Describe in detail what is shown in the image.',
+            '<MORE_DETAILED_CAPTION>': 'Describe with a paragraph what is shown in the image.',
+            '<OD>': 'Locate the objects with category name in the image.',
+            '<DENSE_REGION_CAPTION>': 'Locate the objects in the image, with their descriptions.',
+            '<REGION_PROPOSAL>': 'Locate the region proposals in the image.'
+        }
+        self.task_prompts_with_input = {
+            '<CAPTION_TO_PHRASE_GROUNDING>': "Locate the phrases in the caption: {input}",
+            '<REFERRING_EXPRESSION_SEGMENTATION>': 'Locate {input} in the image with mask',
+            '<REGION_TO_SEGMENTATION>': 'What is the polygon mask of region {input}',
+            '<OPEN_VOCABULARY_DETECTION>': 'Locate {input} in the image.',
+            '<REGION_TO_CATEGORY>': 'What is the region {input}?',
+            '<REGION_TO_DESCRIPTION>': 'What does the region {input} describe?',
+            '<REGION_TO_OCR>': 'What text is in the region {input}?',
+        }
+        self.post_processor = Florence2PostProcesser(tokenizer=tokenizer)
+        super().__init__(image_processor, tokenizer)
+    def _construct_prompts(self, text):
+        # replace the task tokens with the task prompts if task token is in the text
+        prompts = []
+        for _text in text:
+            # 1. fixed task prompts without additional inputs
+            for task_token, task_prompt in self.task_prompts_without_inputs.items():
+                if task_token in _text:
+                    assert _text == task_token, f"Task token {task_token} should be the only token in the text."
+                    _text = task_prompt
+                    break
+            # 2. task prompts with additional inputs
+            for task_token, task_prompt in self.task_prompts_with_input.items():
+                if task_token in _text:
+                    _text = task_prompt.format(input=_text.replace(task_token, ''))
+                    break
+            prompts.append(_text)
+        return prompts
+    def __call__(
+        self,
+        text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None,
+        images: ImageInput = None,
+        tokenize_newline_separately: bool = True,
+        padding: Union[bool, str, PaddingStrategy] = False,
+        truncation: Union[bool, str, TruncationStrategy] = None,
+        max_length=None,
+        return_tensors: Optional[Union[str, TensorType]] = TensorType.PYTORCH,
+        do_resize: bool = None,
+        do_normalize: bool = None,
+        image_mean: Optional[Union[float, List[float]]] = None,
+        image_std: Optional[Union[float, List[float]]] = None,
+        data_format: Optional["ChannelDimension"] = "channels_first",  # noqa: F821
+        input_data_format: Optional[
+            Union[str, "ChannelDimension"]  # noqa: F821
+        ] = None,
+        resample: "PILImageResampling" = None,  # noqa: F821
+        do_convert_rgb: bool = None,
+        do_thumbnail: bool = None,
+        do_align_long_axis: bool = None,
+        do_rescale: bool = None,
+    ) -> BatchFeature:
+        """
+        Main method to prepare for the model one or several sequences(s) and image(s). This method forwards the `text`
+        and `kwargs` arguments to BartTokenizerFast's [`~BartTokenizerFast.__call__`] if `text` is not `None` to encode
+        the text. To prepare the image(s), this method forwards the `images` and `kwrags` arguments to
+        CLIPImageProcessor's [`~CLIPImageProcessor.__call__`] if `images` is not `None`. Please refer to the doctsring
+        of the above two methods for more information.
+        Args:
+            text (`str`, `List[str]`, `List[List[str]]`):
+                The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
+                (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
+                `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
+            images (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`, `List[torch.Tensor]`):
+                The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
+                tensor. In case of a NumPy array/PyTorch tensor, each image should be of shape (C, H, W), where C is a
+                number of channels, H and W are image height and width.
+            tokenize_newline_separately (`bool`, defaults to `True`):
+                Adds a separately tokenized '\n' at the end of the prompt.
+            padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `False`):
+                Select a strategy to pad the returned sequences (according to the model's padding side and padding
+                index) among:
+                - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+                  sequence if provided).
+                - `'max_length'`: Pad to a maximum length specified with the argument `max_length` or to the maximum
+                  acceptable input length for the model if that argument is not provided.
+                - `False` or `'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of different
+                  lengths).
+            max_length (`int`, *optional*):
+                Maximum length of the returned list and optionally padding length (see above).
+            truncation (`bool`, *optional*):
+                Activates truncation to cut input sequences longer than `max_length` to `max_length`.
+            return_tensors (`str` or [`~utils.TensorType`], *optional*):
+                If set, will return tensors of a particular framework. Acceptable values are:
+                - `'tf'`: Return TensorFlow `tf.constant` objects.
+                - `'pt'`: Return PyTorch `torch.Tensor` objects.
+                - `'np'`: Return NumPy `np.ndarray` objects.
+                - `'jax'`: Return JAX `jnp.ndarray` objects.
+        Returns:
+            [`BatchFeature`]: A [`BatchFeature`] with the following fields:
+            - **input_ids** -- List of token ids to be fed to a model. Returned when `text` is not `None`. If `suffix`
+              is provided, the `input_ids` will also contain the suffix input ids.
+            - **attention_mask** -- List of indices specifying which tokens should be attended to by the model (when
+              `return_attention_mask=True` or if *"attention_mask"* is in `self.model_input_names` and if `text` is not
+              `None`).
+            - **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
+            - **labels** -- Labels compatible with training if `suffix` is not None
+        """
+        return_token_type_ids = False
+        if images is None:
+            raise ValueError("`images` are expected as arguments to a `Florence2Processor` instance.")
+        if text is None:
+            logger.warning_once(
+                "You are using Florence-2 without a text prompt."
+            )
+            text = ""
+        if isinstance(text, List) and isinstance(images, List):
+            if len(images) < len(text):
+                raise ValueError(
+                    f"Received {len(images)} images for {len(text)} prompts. Each prompt should be associated with an image."
+                )
+        if _is_str_or_image(text):
+            text = [text]
+        elif isinstance(text, list) and _is_str_or_image(text[0]):
+            pass
+        pixel_values = self.image_processor(
+            images,
+            do_resize=do_resize,
+            do_normalize=do_normalize,
+            return_tensors=return_tensors,
+            image_mean=image_mean,
+            image_std=image_std,
+            input_data_format=input_data_format,
+            data_format=data_format,
+            resample=resample,
+            do_convert_rgb=do_convert_rgb,
+        )["pixel_values"]
+        if max_length is not None:
+            max_length -= self.image_seq_length  # max_length has to account for the image tokens
+        text = self._construct_prompts(text)
+        inputs = self.tokenizer(
+            text,
+            return_tensors=return_tensors,
+            padding=padding,
+            max_length=max_length,
+            truncation=truncation,
+            return_token_type_ids=return_token_type_ids,
+        )
+        return_data = {**inputs, "pixel_values": pixel_values}
+        if return_token_type_ids:
+            labels = inputs["input_ids"].masked_fill(inputs["token_type_ids"] == 0, -100)
+            return_data.update({"labels": labels})
+        return BatchFeature(data=return_data)
+    # Copied from transformers.models.clip.processing_clip.CLIPProcessor.batch_decode with CLIP->Florence2
+    def batch_decode(self, *args, **kwargs):
+        """
+        This method forwards all its arguments to BartTokenizerFast's [`~PreTrainedTokenizer.batch_decode`]. Please
+        refer to the docstring of this method for more information.
+        """
+        return self.tokenizer.batch_decode(*args, **kwargs)
+    # Copied from transformers.models.clip.processing_clip.CLIPProcessor.decode with CLIP->Florence2
+    def decode(self, *args, **kwargs):
+        """
+        This method forwards all its arguments to BartTokenizerFast's [`~PreTrainedTokenizer.decode`]. Please refer to
+        the docstring of this method for more information.
+        """
+        return self.tokenizer.decode(*args, **kwargs)
+    @property
+    # Copied from transformers.models.clip.processing_clip.CLIPProcessor.model_input_names with CLIP->Florence2
+    def model_input_names(self):
+        tokenizer_input_names = self.tokenizer.model_input_names
+        image_processor_input_names = self.image_processor.model_input_names
+        return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names))
+    def post_process_generation(self, text, task, image_size):
+        """
+        Post-process the output of the model to each of the task outputs.
+        Args:
+            text (`str`): The text to post-process.
+            task (`str`): The task to post-process the text for.
+            image_size (`Tuple[int, int]`): The size of the image. height x width.
+        """
+        task_answer_post_processing_type = self.tasks_answer_post_processing_type.get(task, 'pure_text')
+        task_answer = self.post_processor(
+            text=text,
+            image_size=image_size,
+            parse_tasks=task_answer_post_processing_type,
+        )[task_answer_post_processing_type]
+        if task_answer_post_processing_type == 'pure_text':
+            final_answer = task_answer
+            # remove the special tokens
+            final_answer = final_answer.replace('<s>', '').replace('</s>', '')
+        elif task_answer_post_processing_type in ['od', 'description_with_bboxes', 'bboxes']:
+            od_instances = task_answer
+            bboxes_od = [_od_instance['bbox'] for _od_instance in od_instances]
+            labels_od = [str(_od_instance['cat_name']) for _od_instance in od_instances]
+            final_answer = {'bboxes': bboxes_od, 'labels': labels_od}
+        elif task_answer_post_processing_type in ['ocr']:
+            bboxes = [_od_instance['quad_box'] for _od_instance in task_answer]
+            labels = [str(_od_instance['text']) for _od_instance in task_answer]
+            final_answer = {'quad_boxes': bboxes, 'labels': labels}
+        elif task_answer_post_processing_type in ['phrase_grounding']:
+            bboxes = []
+            labels = []
+            for _grounded_phrase in task_answer:
+                for _bbox in _grounded_phrase['bbox']:
+                    bboxes.append(_bbox)
+                    labels.append(_grounded_phrase['cat_name'])
+            final_answer = {'bboxes': bboxes, 'labels': labels}
+        elif task_answer_post_processing_type in ['description_with_polygons', 'polygons']:
+            labels = []
+            polygons = []
+            for result in task_answer:
+                label = result['cat_name']
+                _polygons = result['polygons']
+                labels.append(label)
+                polygons.append(_polygons)
+            final_answer = {'polygons': polygons, 'labels': labels}
+        elif task_answer_post_processing_type in ['description_with_bboxes_or_polygons']:
+            bboxes = []
+            bboxes_labels = []
+            polygons = []
+            polygons_labels = []
+            for result in task_answer:
+                label = result['cat_name']
+                if 'polygons' in result:
+                    _polygons = result['polygons']
+                    polygons.append(_polygons)
+                    polygons_labels.append(label)
+                else:
+                    _bbox = result['bbox']
+                    bboxes.append(_bbox)
+                    bboxes_labels.append(label)
+            final_answer = {'bboxes': bboxes, 'bboxes_labels': bboxes_labels, 'polygons': polygons, 'polygons_labels': polygons_labels}
+        else:
+            raise ValueError('Unknown task answer post processing type: {}'.format(task_answer_post_processing_type))
+        final_answer = {
+            task: final_answer}
+        return final_answer
+class BoxQuantizer(object):
+    def __init__(self, mode, bins):
+        self.mode = mode
+        self.bins = bins
+    def quantize(self, boxes: torch.Tensor, size):
+        bins_w, bins_h = self.bins  # Quantization bins.
+        size_w, size_h = size       # Original image size.
+        size_per_bin_w = size_w / bins_w
+        size_per_bin_h = size_h / bins_h
+        xmin, ymin, xmax, ymax = boxes.split(1, dim=-1)  # Shape: 4 * [N, 1].
+        if self.mode == 'floor':
+            quantized_xmin = (
+                xmin / size_per_bin_w).floor().clamp(0, bins_w - 1)
+            quantized_ymin = (
+                ymin / size_per_bin_h).floor().clamp(0, bins_h - 1)
+            quantized_xmax = (
+                xmax / size_per_bin_w).floor().clamp(0, bins_w - 1)
+            quantized_ymax = (
+                ymax / size_per_bin_h).floor().clamp(0, bins_h - 1)
+        elif self.mode == 'round':
+            raise NotImplementedError()
+        else:
+            raise ValueError('Incorrect quantization type.')
+        quantized_boxes = torch.cat(
+            (quantized_xmin, quantized_ymin, quantized_xmax, quantized_ymax), dim=-1
+        ).int()
+        return quantized_boxes
+    def dequantize(self, boxes: torch.Tensor, size):
+        bins_w, bins_h = self.bins  # Quantization bins.
+        size_w, size_h = size       # Original image size.
+        size_per_bin_w = size_w / bins_w
+        size_per_bin_h = size_h / bins_h
+        xmin, ymin, xmax, ymax = boxes.split(1, dim=-1)  # Shape: 4 * [N, 1].
+        if self.mode == 'floor':
+            # Add 0.5 to use the center position of the bin as the coordinate.
+            dequantized_xmin = (xmin + 0.5) * size_per_bin_w
+            dequantized_ymin = (ymin + 0.5) * size_per_bin_h
+            dequantized_xmax = (xmax + 0.5) * size_per_bin_w
+            dequantized_ymax = (ymax + 0.5) * size_per_bin_h
+        elif self.mode == 'round':
+            raise NotImplementedError()
+        else:
+            raise ValueError('Incorrect quantization type.')
+        dequantized_boxes = torch.cat(
+            (dequantized_xmin, dequantized_ymin,
+             dequantized_xmax, dequantized_ymax), dim=-1
+        )
+        return dequantized_boxes
+class CoordinatesQuantizer(object):
+    """
+    Quantize coornidates (Nx2)
+    """
+    def __init__(self, mode, bins):
+        self.mode = mode
+        self.bins = bins
+    def quantize(self, coordinates: torch.Tensor, size):
+        bins_w, bins_h = self.bins  # Quantization bins.
+        size_w, size_h = size       # Original image size.
+        size_per_bin_w = size_w / bins_w
+        size_per_bin_h = size_h / bins_h
+        assert coordinates.shape[-1] == 2, 'coordinates should be shape (N, 2)'
+        x, y = coordinates.split(1, dim=-1)  # Shape: 4 * [N, 1].
+        if self.mode == 'floor':
+            quantized_x = (x / size_per_bin_w).floor().clamp(0, bins_w - 1)
+            quantized_y = (y / size_per_bin_h).floor().clamp(0, bins_h - 1)
+        elif self.mode == 'round':
+            raise NotImplementedError()
+        else:
+            raise ValueError('Incorrect quantization type.')
+        quantized_coordinates = torch.cat(
+            (quantized_x, quantized_y), dim=-1
+        ).int()
+        return quantized_coordinates
+    def dequantize(self, coordinates: torch.Tensor, size):
+        bins_w, bins_h = self.bins  # Quantization bins.
+        size_w, size_h = size       # Original image size.
+        size_per_bin_w = size_w / bins_w
+        size_per_bin_h = size_h / bins_h
+        assert coordinates.shape[-1] == 2, 'coordinates should be shape (N, 2)'
+        x, y = coordinates.split(1, dim=-1)  # Shape: 4 * [N, 1].
+        if self.mode == 'floor':
+            # Add 0.5 to use the center position of the bin as the coordinate.
+            dequantized_x = (x + 0.5) * size_per_bin_w
+            dequantized_y = (y + 0.5) * size_per_bin_h
+        elif self.mode == 'round':
+            raise NotImplementedError()
+        else:
+            raise ValueError('Incorrect quantization type.')
+        dequantized_coordinates = torch.cat(
+            (dequantized_x, dequantized_y), dim=-1
+        )
+        return dequantized_coordinates
+class Florence2PostProcesser(object):
+    """
+    Florence-2 post process for converting text prediction to various tasks results.
+    Args:
+        config: A dict of configs.
+        tokenizer: A tokenizer for decoding text to spans.
+        sample config:
+            UNIFIED_POST_PROCESS:
+                # commom configs
+                NUM_BBOX_HEIGHT_BINS: 1000
+                NUM_BBOX_WIDTH_BINS: 1000
+                COORDINATES_HEIGHT_BINS: 1000
+                COORDINATES_WIDTH_BINS: 1000
+                # task specific configs, override the common configs
+                PRASE_TASKS:
+                    - TASK_NAME: 'video_dense_caption'
+                      PATTERN: 'r<time_(\d+)><time_(\d+)>([a-zA-Z0-9 ]+)'
+                      SCORE_MODE: 'avg_cat_name_scores'
+                      NUM_BINS: 100
+                    - TASK_NAME: 'od'
+                      PATTERN: 'r<loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)>([a-zA-Z0-9 ]+)'
+                      SCORE_MODE: 'avg_cat_name_scores'
+    Returns:
+        parsed_dict (dict): A dict of parsed results.
+    """
+    def __init__(
+        self,
+        tokenizer=None
+    ):
+        parse_tasks = []
+        parse_task_configs = {}
+        config = self._create_default_config()
+        for task in config['PARSE_TASKS']:
+            parse_tasks.append(task['TASK_NAME'])
+            parse_task_configs[task['TASK_NAME']] = task
+        self.config = config
+        self.parse_tasks = parse_tasks
+        self.parse_tasks_configs = parse_task_configs
+        self.tokenizer =  tokenizer
+        if self.tokenizer is not None:
+            self.all_special_tokens = set(self.tokenizer.all_special_tokens)
+        self.init_quantizers()
+        self.black_list_of_phrase_grounding = self._create_black_list_of_phrase_grounding()
+    def _create_black_list_of_phrase_grounding(self):
+        black_list = {}
+        if 'phrase_grounding' in self.parse_tasks and self.parse_tasks_configs['phrase_grounding']['FILTER_BY_BLACK_LIST']:
+            black_list =  set(
+                ['it', 'I', 'me', 'mine',
+                 'you', 'your', 'yours',
+                 'he', 'him', 'his',
+                 'she', 'her', 'hers',
+                 'they', 'them', 'their', 'theirs',
+                 'one', 'oneself',
+                 'we', 'us', 'our', 'ours',
+                 'you', 'your', 'yours',
+                 'they', 'them', 'their', 'theirs',
+                 'mine', 'yours', 'his', 'hers', 'its',
+                 'ours', 'yours', 'theirs',
+                 'myself', 'yourself', 'himself', 'herself', 'itself',
+                 'ourselves', 'yourselves', 'themselves',
+                 'this', 'that',
+                 'these', 'those',
+                 'who', 'whom', 'whose', 'which', 'what',
+                 'who', 'whom', 'whose', 'which', 'that',
+                 'all', 'another', 'any', 'anybody', 'anyone', 'anything',
+                 'each', 'everybody', 'everyone', 'everything',
+                 'few', 'many', 'nobody', 'none', 'one', 'several',
+                 'some', 'somebody', 'someone', 'something',
+                 'each other', 'one another',
+                 'myself', 'yourself', 'himself', 'herself', 'itself',
+                 'ourselves', 'yourselves', 'themselves',
+                 'the image', 'image', 'images', 'the', 'a', 'an', 'a group',
+                 'other objects', 'lots', 'a set',
+                 ]
+            )
+        return black_list
+    def _create_default_config(self):
+        config = {
+            'NUM_BBOX_HEIGHT_BINS': 1000,
+            'NUM_BBOX_WIDTH_BINS': 1000,
+            'BOX_QUANTIZATION_MODE': 'floor',
+            'COORDINATES_HEIGHT_BINS': 1000,
+            'COORDINATES_WIDTH_BINS': 1000,
+            'COORDINATES_QUANTIZATION_MODE': 'floor',
+            'PARSE_TASKS': [
+                {
+                    'TASK_NAME': 'od',
+                    'PATTERN': r'([a-zA-Z0-9 ]+)<loc_(\\d+)><loc_(\\d+)><loc_(\\d+)><loc_(\\d+)>'
+                },
+                {
+                    'TASK_NAME': 'ocr',
+                    'PATTERN':  r'(.+?)<loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)>',
+                    'AREA_THRESHOLD': 0.00
+                },
+                {
+                    'TASK_NAME': 'phrase_grounding',
+                    'FILTER_BY_BLACK_LIST': True
+                },
+                {
+                    'TASK_NAME': 'pure_text',
+                },
+                {
+                    'TASK_NAME': 'description_with_bboxes',
+                },
+                {
+                    'TASK_NAME': 'description_with_polygons',
+                },
+                {
+                    'TASK_NAME': 'polygons',
+                },
+                {
+                    'TASK_NAME': 'bboxes',
+                },
+                {
+                    'TASK_NAME': 'description_with_bboxes_or_polygons',
+                }
+            ]
+        }
+        return config
+    def init_quantizers(self):
+        # we have box_quantizer (od, grounding) and coordinates_quantizer (ocr, referring_segmentation)
+        num_bbox_height_bins = self.config.get('NUM_BBOX_HEIGHT_BINS', 1000)
+        num_bbox_width_bins = self.config.get('NUM_BBOX_WIDTH_BINS', 1000)
+        box_quantization_mode = self.config.get('BOX_QUANTIZATION_MODE', 'floor')
+        self.box_quantizer = BoxQuantizer(
+            box_quantization_mode,
+            (num_bbox_width_bins, num_bbox_height_bins),
+        )
+        num_bbox_height_bins = self.config['COORDINATES_HEIGHT_BINS'] if 'COORDINATES_HEIGHT_BINS' in self.config else self.config.get('NUM_BBOX_HEIGHT_BINS', 1000)
+        num_bbox_width_bins = self.config['COORDINATES_WIDTH_BINS'] if 'COORDINATES_WIDTH_BINS' in self.config else self.config.get('NUM_BBOX_WIDTH_BINS', 1000)
+        box_quantization_mode = self.config.get('COORDINATES_QUANTIZATION_MODE') if 'COORDINATES_QUANTIZATION_MODE' in self.config else self.config.get('BOX_QUANTIZATION_MODE', 'floor')
+        self.coordinates_quantizer = CoordinatesQuantizer(
+            box_quantization_mode,
+            (num_bbox_width_bins, num_bbox_height_bins),
+        )
+    def decode_with_spans(self, tokenizer, token_ids):
+        filtered_tokens = tokenizer.convert_ids_to_tokens(
+            token_ids, skip_special_tokens=False)
+        assert len(filtered_tokens) == len(token_ids)
+        # To avoid mixing byte-level and unicode for byte-level BPT
+        # we need to build string separately for added tokens and byte-level tokens
+        # cf. https://github.com/huggingface/transformers/issues/1133
+        sub_texts = []
+        for token in filtered_tokens:
+            if token in self.all_special_tokens:
+                sub_texts.append(token)
+            else:
+                if isinstance(tokenizer, (BartTokenizer, BartTokenizerFast)):
+                    sub_text = tokenizer.convert_tokens_to_string([token])
+                elif isinstance(tokenizer, (T5Tokenizer, T5TokenizerFast)):
+                    # Ref: https://github.com/google/sentencepiece#whitespace-is-treated-as-a-basic-symbol
+                    # Note: Do not strip sub_text as it may have functional whitespace
+                    sub_text = token.replace('▁', ' ')
+                else:
+                    raise ValueError(f'type {type(tokenizer)} not supported')
+                sub_texts.append(sub_text)
+        text = ''
+        spans = []
+        for sub_text in sub_texts:
+            span = (len(text), len(text) + len(sub_text))  # [start index, end index).
+            text += sub_text
+            spans.append(span)
+        # Text format:
+        # 1. T5Tokenizer/T5TokenizerFast:
+        #      "<loc_1><loc_2><loc_3><loc_4> transplanting dog<loc_1><loc_2><loc_3><loc_4> cat</s>"
+        #    Equivalent to t5_tokenizer.decode(input_ids, skip_special_tokens=False, clean_up_tokenization_spaces=False, spaces_between_special_tokens=False)
+        # 2. BartTokenizer (need to double check):
+        #      "<s><loc_1><loc_2><loc_3><loc_4>transplanting dog<loc_1><loc_2><loc_3><loc_4>cat</s>"
+        #    Equivalent to bart_tokenizer.decode(input_ids, skip_special_tokens=False, clean_up_tokenization_spaces=False, spaces_between_special_tokens=False)
+        return text, spans
+    def parse_od_from_text_and_spans(
+        self,
+        text,
+        pattern,
+        image_size,
+        phrase_centric=False
+    ):
+        parsed = list(re.finditer(pattern, text))
+        instances = []
+        for i in range(len(parsed)):
+            # Prepare instance.
+            instance = {}
+            if phrase_centric:
+                bbox_bins = [int(parsed[i].group(j)) for j in range(2, 6)]
+            else:
+                bbox_bins = [int(parsed[i].group(j)) for j in range(1, 5)]
+            instance['bbox'] = self.box_quantizer.dequantize(
+                boxes=torch.tensor(bbox_bins),
+                size=image_size
+            ).tolist()
+            if phrase_centric:
+                instance['cat_name'] = parsed[i].group(1).lower().strip()
+            else:
+                instance['cat_name'] = parsed[i].group(5).lower().strip()
+            instances.append(instance)
+        return instances
+    def parse_ocr_from_text_and_spans(self,
+                                    text,
+                                     pattern,
+                                     image_size,
+                                     area_threshold=-1.0,
+        ):
+        bboxes = []
+        labels = []
+        text = text.replace('<s>', '')
+        # ocr with regions
+        parsed = re.findall(pattern, text)
+        instances = []
+        image_width, image_height = image_size
+        for ocr_line in parsed:
+            ocr_content = ocr_line[0]
+            quad_box = ocr_line[1:]
+            quad_box = [int(i) for i in quad_box]
+            quad_box = self.coordinates_quantizer.dequantize(
+                torch.tensor(np.array(quad_box).reshape(-1, 2)),
+                size=image_size
+            ).reshape(-1).tolist()
+            if area_threshold > 0:
+                x_coords = [i for i in quad_box[0::2]]
+                y_coords = [i for i in quad_box[1::2]]
+                # apply the Shoelace formula
+                area = 0.5 * abs(sum(x_coords[i] * y_coords[i + 1] - x_coords[i + 1] * y_coords[i] for i in range(4 - 1)))
+                if area < (image_width * image_height) * area_threshold:
+                    continue
+            bboxes.append(quad_box)
+            labels.append(ocr_content)
+            instances.append({
+                'quad_box': quad_box,
+                'text': ocr_content,
+            })
+        return instances
+    def parse_phrase_grounding_from_text_and_spans(self, text, pattern, image_size):
+        # ignore <s> </s> and <pad>
+        cur_span = 0
+        if text.startswith('<s>'):
+            cur_span += 3
+        text = text.replace('<s>', '')
+        text = text.replace('</s>', '')
+        text = text.replace('<pad>', '')
+        pattern = r"([^<]+(?:<loc_\d+>){4,})"
+        phrases = re.findall(pattern, text)
+        # pattern should be text pattern and od pattern
+        pattern = r'^\s*(.*?)(?=<od>|</od>|<box>|</box>|<bbox>|</bbox>|<loc_)'
+        box_pattern = r'<loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)>'
+        instances = []
+        for pharse_text in phrases:
+            phrase_text_strip = pharse_text.replace('<ground>', '', 1)
+            phrase_text_strip = pharse_text.replace('<obj>', '', 1)
+            if phrase_text_strip == '':
+                cur_span += len(pharse_text)
+                continue
+            # Prepare instance.
+            instance = {}
+            # parse phrase, get string
+            phrase = re.search(pattern, phrase_text_strip)
+            if phrase is None:
+                cur_span += len(pharse_text)
+                continue
+            # parse bboxes by box_pattern
+            bboxes_parsed = list(re.finditer(box_pattern, pharse_text))
+            if len(bboxes_parsed) == 0:
+                cur_span += len(pharse_text)
+                continue
+            phrase = phrase.group()
+            # remove leading and trailing spaces
+            phrase = phrase.strip()
+            if phrase in self.black_list_of_phrase_grounding:
+                cur_span += len(pharse_text)
+                continue
+            # a list of list
+            bbox_bins = [[int(_bboxes_parsed.group(j)) for j in range(1, 5)] for _bboxes_parsed in bboxes_parsed]
+            instance['bbox'] = self.box_quantizer.dequantize(
+                boxes=torch.tensor(bbox_bins),
+                size=image_size
+            ).tolist()
+            # exclude non-ascii characters
+            phrase = phrase.encode('ascii',errors='ignore').decode('ascii')
+            instance['cat_name'] = phrase
+            instances.append(instance)
+        return instances
+    def parse_description_with_bboxes_from_text_and_spans(self, text, pattern, image_size, allow_empty_phrase=False):
+        # temporary parse solution, split by '.'
+        # ignore <s> </s> and <pad>
+        text = text.replace('<s>', '')
+        text = text.replace('</s>', '')
+        text = text.replace('<pad>', '')
+        if allow_empty_phrase:
+            pattern = rf"(?:(?:<loc_\d+>){{4,}})"
+        else:
+            pattern = r"([^<]+(?:<loc_\d+>){4,})"
+        phrases = re.findall(pattern, text)
+        # pattern should be text pattern and od pattern
+        pattern = r'^\s*(.*?)(?=<od>|</od>|<box>|</box>|<bbox>|</bbox>|<loc_)'
+        box_pattern = r'<loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)>'
+        instances = []
+        for pharse_text in phrases:
+            phrase_text_strip = pharse_text.replace('<ground>', '', 1)
+            phrase_text_strip = pharse_text.replace('<obj>', '', 1)
+            if phrase_text_strip == '' and not allow_empty_phrase:
+                continue
+            # parse phrase, get string
+            phrase = re.search(pattern, phrase_text_strip)
+            if phrase is None:
+                continue
+            phrase = phrase.group()
+            # remove leading and trailing spaces
+            phrase = phrase.strip()
+            # parse bboxes by box_pattern
+            bboxes_parsed = list(re.finditer(box_pattern, pharse_text))
+            if len(bboxes_parsed) == 0:
+                continue
+            # a list of list
+            bbox_bins = [[int(_bboxes_parsed.group(j)) for j in range(1, 5)] for _bboxes_parsed in bboxes_parsed]
+            bboxes = self.box_quantizer.dequantize(
+                boxes=torch.tensor(bbox_bins),
+                size=image_size
+            ).tolist()
+            phrase = phrase.encode('ascii',errors='ignore').decode('ascii')
+            for _bboxes in bboxes:
+                # Prepare instance.
+                instance = {}
+                instance['bbox'] = _bboxes
+                # exclude non-ascii characters
+                instance['cat_name'] = phrase
+                instances.append(instance)
+        return instances
+    def parse_description_with_polygons_from_text_and_spans(self, text, pattern, image_size,
+                                                            allow_empty_phrase=False,
+                                                            polygon_sep_token='<sep>',
+                                                            polygon_start_token='<poly>',
+                                                            polygon_end_token='</poly>',
+                                                            with_box_at_start=False,
+                                                            ):
+        # ref_seg format: '<expression><x1><y1><x2><y2><><><sep><><><><>'
+        # ignore <s> </s> and <pad>
+        text = text.replace('<s>', '')
+        text = text.replace('</s>', '')
+        text = text.replace('<pad>', '')
+        if allow_empty_phrase:
+            pattern = rf"(?:(?:<loc_\d+>|{re.escape(polygon_sep_token)}|{re.escape(polygon_start_token)}|{re.escape(polygon_end_token)}){{4,}})"
+        else:
+            # [^<]+: This part matches one or more characters that are not the < symbol.
+            # The ^ inside the square brackets [] is a negation, meaning it matches anything except <.
+            #
+            pattern = rf"([^<]+(?:<loc_\d+>|{re.escape(polygon_sep_token)}|{re.escape(polygon_start_token)}|{re.escape(polygon_end_token)}){{4,}})"
+        phrases = re.findall(pattern, text)
+        phrase_string_pattern = r'^\s*(.*?)(?=<od>|</od>|<box>|</box>|<bbox>|</bbox>|<loc_|<poly>)'
+        box_pattern =  rf'((?:<loc_\d+>)+)(?:{re.escape(polygon_sep_token)}|$)'
+        # one polygons instance is separated by polygon_start_token and polygon_end_token
+        polygons_instance_pattern = rf'{re.escape(polygon_start_token)}(.*?){re.escape(polygon_end_token)}'
+        instances = []
+        for phrase_text in phrases:
+            # exclude loc_\d+>
+            # need to get span if want to include category score
+            phrase_text_strip = re.sub(r'^loc_\d+>', '', phrase_text, count=1)
+            # phrase = phrase.replace('<poly>', '')
+            # phrase = phrase.replace('poly>', '')
+            if phrase_text_strip == '' and not allow_empty_phrase:
+                continue
+            # parse phrase, get string
+            phrase = re.search(phrase_string_pattern, phrase_text_strip)
+            if phrase is None:
+                continue
+            phrase = phrase.group()
+            # remove leading and trailing spaces
+            phrase = phrase.strip()
+            # parse bboxes by box_pattern
+            # split by polygon_start_token and polygon_end_token first using polygons_instance_pattern
+            if polygon_start_token in phrase_text and polygon_end_token in phrase_text:
+                polygons_instances_parsed = list(re.finditer(polygons_instance_pattern, phrase_text))
+            else:
+                polygons_instances_parsed = [phrase_text]
+            for _polygons_instances_parsed in polygons_instances_parsed:
+                # Prepare instance.
+                instance = {}
+                # polygons_parsed= list(re.finditer(box_pattern, phrase_text))
+                if isinstance(_polygons_instances_parsed, str):
+                    polygons_parsed= list(re.finditer(box_pattern, _polygons_instances_parsed))
+                else:
+                    polygons_parsed= list(re.finditer(box_pattern, _polygons_instances_parsed.group(1)))
+                if len(polygons_parsed) == 0:
+                    continue
+                # a list of list (polygon)
+                bbox = []
+                polygons = []
+                for _polygon_parsed in polygons_parsed:
+                    # group 1: whole <loc_\d+>...</loc_\d+>
+                    _polygon = _polygon_parsed.group(1)
+                    # parse into list of int
+                    _polygon = [int(_loc_parsed.group(1)) for _loc_parsed in re.finditer(r'<loc_(\d+)>', _polygon)]
+                    if with_box_at_start and len(bbox) == 0:
+                        if len(_polygon) > 4:
+                            # no valid bbox prediction
+                            bbox = _polygon[:4]
+                            _polygon = _polygon[4:]
+                        else:
+                            bbox = [0, 0, 0, 0]
+                    # abandon last element if is not paired
+                    if len(_polygon) % 2 == 1:
+                        _polygon = _polygon[:-1]
+                    # reshape into (n, 2)
+                    _polygon = self.coordinates_quantizer.dequantize(
+                        torch.tensor(np.array(_polygon).reshape(-1, 2)),
+                        size=image_size
+                    ).reshape(-1).tolist()
+                    # reshape back
+                    polygons.append(_polygon)
+                instance['cat_name'] = phrase
+                instance['polygons'] = polygons
+                if len(bbox) != 0:
+                    instance['bbox'] = self.box_quantizer.dequantize(
+                        boxes=torch.tensor([bbox]),
+                        size=image_size
+                    ).tolist()[0]
+                instances.append(instance)
+        return instances
+    def __call__(
+        self,
+        text=None,
+        image_size=None,
+        parse_tasks=None,
+    ):
+        """
+        Args:
+            text: model outputs
+            image_size: (width, height)
+            parse_tasks: a list of tasks to parse, if None, parse all tasks.
+        """
+        if parse_tasks is not None:
+            if isinstance(parse_tasks, str):
+                parse_tasks = [parse_tasks]
+            for _parse_task in parse_tasks:
+                assert _parse_task in self.parse_tasks, f'parse task {_parse_task} not supported'
+        # sequence or text should be provided
+        assert text is not None, 'text should be provided'
+        parsed_dict = {
+            'text': text
+        }
+        for task in self.parse_tasks:
+            if parse_tasks is not None and task not in parse_tasks:
+                continue
+            pattern = self.parse_tasks_configs[task].get('PATTERN', None)
+            if task == 'ocr':
+                instances = self.parse_ocr_from_text_and_spans(
+                    text,
+                    pattern=pattern,
+                    image_size=image_size,
+                    area_threshold=self.parse_tasks_configs[task].get('AREA_THRESHOLD', 0.0),
+                )
+                parsed_dict['ocr'] = instances
+            elif task == 'phrase_grounding':
+                instances = self.parse_phrase_grounding_from_text_and_spans(
+                    text,
+                    pattern=pattern,
+                    image_size=image_size,
+                )
+                parsed_dict['phrase_grounding'] = instances
+            elif task == 'pure_text':
+                parsed_dict['pure_text'] = text
+            elif task == 'description_with_bboxes':
+                instances = self.parse_description_with_bboxes_from_text_and_spans(
+                    text,
+                    pattern=pattern,
+                    image_size=image_size,
+                )
+                parsed_dict['description_with_bboxes'] = instances
+            elif task == 'description_with_polygons':
+                instances = self.parse_description_with_polygons_from_text_and_spans(
+                    text,
+                    pattern=pattern,
+                    image_size=image_size,
+                )
+                parsed_dict['description_with_polygons'] = instances
+            elif task == 'polygons':
+                instances = self.parse_description_with_polygons_from_text_and_spans(
+                    text,
+                    pattern=pattern,
+                    image_size=image_size,
+                    allow_empty_phrase=True,
+                )
+                parsed_dict['polygons'] = instances
+            elif task == 'bboxes':
+                instances = self.parse_description_with_bboxes_from_text_and_spans(
+                    text,
+                    pattern=pattern,
+                    image_size=image_size,
+                    allow_empty_phrase=True,
+                )
+                parsed_dict['bboxes'] = instances
+            elif task == 'description_with_bboxes_or_polygons':
+                if '<poly>' in text:
+                    # only support either polygons or bboxes, not both at the same time
+                    instances = self.parse_description_with_polygons_from_text_and_spans(
+                        text,
+                        pattern=pattern,
+                        image_size=image_size,
+                    )
+                else:
+                    instances = self.parse_description_with_bboxes_from_text_and_spans(
+                        text,
+                        pattern=pattern,
+                        image_size=image_size,
+                    )
+                parsed_dict['description_with_bboxes_or_polygons'] = instances
+            else:
+                raise ValueError("task {} is not supported".format(task))
+        return parsed_dict

florence2/large-ft/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

florence2/large-ft/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "model_max_length": 1024
+}

florence2/large-ft/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

loras/Hyper-FLUX.1-dev-8steps-lora.sha256 ADDED Viewed

	@@ -0,0 +1 @@


1	+ e0ab0fdf569cd01a382f19bd87681f628879dea7ad51fe5a3799b6c18c7b2d03

loras/flux/arcane-style-2.sha256 ADDED Viewed

	@@ -0,0 +1 @@


1	+ 5bd48e61bd50b3f3295df044fe316f8b761020042c4629979aa0538752a8bfab

loras/illu/ATRex_style-12.sha256 ADDED Viewed

	@@ -0,0 +1 @@


1	+ a991f67fd19e8c055bdaf24f399db6c0d0975bc8dd83d86627f601ef0bc6b63f

loras/illu/Gloom hands illus-000040.sha256 ADDED Viewed

	@@ -0,0 +1 @@


1	+ c1d67232deffa974138331cbc25dfd7edde33daf13a5250024d1293886eecc42

loras/illu/HerrscherAGGA2025_Chibi-IL_V1.sha256 ADDED Viewed

	@@ -0,0 +1 @@


1	+ 093a1cd059a79f3ba275991eb747aa20d43899575b753ffea2f00f25c9f61a32

loras/illu/Illustrious_Fujimoto_Manga_Style.sha256 ADDED Viewed

	@@ -0,0 +1 @@


1	+ 867292e5857652cf5aefce90889ca0ac7266d248bcdc00dd71a6379e6955cd9e

loras/illu/My_Wish_is_for_Love_ILXL.sha256 ADDED Viewed

	@@ -0,0 +1 @@


1	+ ce50536919b72bd683207ae75bf7381fdaa778304a768fa51ab5cabdb9a5e146

loras/illu/WindWaker_Style_IXL.safetensors.rgthree-info.json ADDED Viewed

	@@ -0,0 +1,2763 @@

+{
+  "file": "illu/WindWaker_Style_IXL.safetensors",
+  "path": "/workspace/ComfyUI/models/loras/illu/WindWaker_Style_IXL.safetensors",
+  "images": [
+    {
+      "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/b9a71a85-c130-466c-9512-3c9e4b94daf2/width=832/51277231.jpeg",
+      "civitaiUrl": "https://civitai.com/images/51277231",
+      "width": 832,
+      "height": 1216,
+      "type": "image",
+      "nsfwLevel": 1,
+      "seed": 98679726,
+      "positive": "masterpiece, best quality, solo,   1girl, solo, blue eyes, blonde hair,  hat, weapon, pointy ears, sword, chibi, 3d, instrument, shield   , smile, looking at viewer,",
+      "negative": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4),  multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,",
+      "steps": 30,
+      "sampler": "Euler a",
+      "cfg": 7,
+      "model": null,
+      "resources": []
+    },
+    {
+      "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/7d335619-6459-4f74-b014-90a41f971889/width=832/51163882.jpeg",
+      "civitaiUrl": "https://civitai.com/images/51163882",
+      "width": 832,
+      "height": 1216,
+      "type": "image",
+      "nsfwLevel": 1,
+      "seed": 252630410834070,
+      "positive": "masterpiece, best quality, solo,   1girl, solo, pink hair, short twintails, white dress, cowboy shot, 3d,    ,  <lora:WindWaker_Style_IXL:1.0>,",
+      "negative": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4),  multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,",
+      "steps": 30,
+      "sampler": "DPM++ 2M Karras",
+      "cfg": 7,
+      "model": null,
+      "resources": [
+        {
+          "name": "WindWaker_Style_IXL",
+          "type": "lora",
+          "weight": 1
+        }
+      ]
+    },
+    {
+      "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/dec826b5-fd01-406b-b881-b9fe5a6abbde/width=832/51163886.jpeg",
+      "civitaiUrl": "https://civitai.com/images/51163886",
+      "width": 832,
+      "height": 1216,
+      "type": "image",
+      "nsfwLevel": 1,
+      "seed": 272033102095518,
+      "positive": "masterpiece, best quality, solo,   1girl, solo, long hair, smile, blue eyes, gloves, jewelry, one eye closed, green hair, pointy ears, necklace, makeup, tiara, ;)  ,  <lora:WindWaker_Style_IXL:1.0>,",
+      "negative": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4),  multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,",
+      "steps": 30,
+      "sampler": "DPM++ 2M Karras",
+      "cfg": 7,
+      "model": null,
+      "resources": [
+        {
+          "name": "WindWaker_Style_IXL",
+          "type": "lora",
+          "weight": 1
+        }
+      ]
+    },
+    {
+      "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/9ada0404-1402-482d-96aa-183da60661aa/width=832/51163887.jpeg",
+      "civitaiUrl": "https://civitai.com/images/51163887",
+      "width": 832,
+      "height": 1216,
+      "type": "image",
+      "nsfwLevel": 1,
+      "seed": 991930792475181,
+      "positive": "masterpiece, best quality, solo,   1girl, solo, blue eyes, blonde hair, 1boy, hat, weapon, male focus, pointy ears, sword, dark skin, chibi, instrument, shield   ,  <lora:WindWaker_Style_IXL:1.0>,",
+      "negative": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4),  multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,",
+      "steps": 30,
+      "sampler": "DPM++ 2M Karras",
+      "cfg": 7,
+      "model": null,
+      "resources": [
+        {
+          "name": "WindWaker_Style_IXL",
+          "type": "lora",
+          "weight": 1
+        }
+      ]
+    },
+    {
+      "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/50c31f3d-3c04-4bb7-8609-34956da40d61/width=832/51170537.jpeg",
+      "civitaiUrl": "https://civitai.com/images/51170537",
+      "width": 832,
+      "height": 1216,
+      "type": "image",
+      "nsfwLevel": 1,
+      "seed": 31337,
+      "positive": "masterpiece, best quality, solo,   1girl, solo, chibi, 3d, \nzzMajora, spikes, yellow sclera, solo, 1boy, horns, glowing eyes,\nlooking at viewer, ,",
+      "negative": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4),  multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,",
+      "steps": 30,
+      "sampler": "Euler a",
+      "cfg": 7,
+      "model": null,
+      "resources": []
+    },
+    {
+      "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/833e0c0d-8b3a-4393-8c44-b80cea0e6975/width=832/51169122.jpeg",
+      "civitaiUrl": "https://civitai.com/images/51169122",
+      "width": 832,
+      "height": 1216,
+      "type": "image",
+      "nsfwLevel": 1,
+      "seed": 1729355716,
+      "positive": "masterpiece, best quality, solo,   1girl, solo, blue eyes, blonde hair,  hat, weapon,  pointy ears, sword, chibi,  shield   ,  dynamic pose, sword slash, motion lines,",
+      "negative": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4),  multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,",
+      "steps": 30,
+      "sampler": "DPM++ 2M Karras",
+      "cfg": 7,
+      "model": null,
+      "resources": []
+    },
+    {
+      "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/06154f27-96c8-487d-96f8-c627866842ad/width=832/51169120.jpeg",
+      "civitaiUrl": "https://civitai.com/images/51169120",
+      "width": 832,
+      "height": 1216,
+      "type": "image",
+      "nsfwLevel": 1,
+      "seed": 952291488,
+      "positive": "masterpiece, best quality, solo,   1girl, solo, 3d, chibi,\n Princess Peach, pink dress, blonde hair, blue eyes, long hair, crown, gem, gloves, puffy sleeves, short sleeves, white gloves, solo, smiling, looking at viewer, cowboy shot,",
+      "negative": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4),  multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,",
+      "steps": 30,
+      "sampler": "DPM++ 2M Karras",
+      "cfg": 7,
+      "model": null,
+      "resources": []
+    },
+    {
+      "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/0fe7b1a9-914f-47aa-906b-83e572f767a3/width=832/51163876.jpeg",
+      "civitaiUrl": "https://civitai.com/images/51163876",
+      "width": 832,
+      "height": 1216,
+      "type": "image",
+      "nsfwLevel": 1,
+      "seed": 31337,
+      "positive": "masterpiece, best quality, solo,   1girl, solo, long hair, looking at viewer, smile, open mouth, blue eyes, dress, jewelry, red hair, pointy ears, white dress, bracelet, neckerchief, empty eyes, yellow neckerchief, triforce,    ,  <lora:WindWaker_Style_IXL:1.0>,",
+      "negative": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4),  multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,",
+      "steps": 30,
+      "sampler": "DPM++ 2M Karras",
+      "cfg": 7,
+      "model": null,
+      "resources": [
+        {
+          "name": "WindWaker_Style_IXL",
+          "type": "lora",
+          "weight": 1
+        }
+      ]
+    },
+    {
+      "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/3147ea21-bc7a-4a84-9d77-93f62c34e274/width=832/51169254.jpeg",
+      "civitaiUrl": "https://civitai.com/images/51169254",
+      "width": 832,
+      "height": 1216,
+      "type": "image",
+      "nsfwLevel": 1,
+      "seed": 438227750,
+      "positive": "masterpiece, best quality, solo,   1girl, solo, chibi, 3d, \nzzHilda, red eyes, purple hair, long hair, pointy ears,  tiara, white gloves, dress, elbow gloves, jewelry, makeup, earrings, purple tabard, triforce, shoulder armor, tiara, \nlooking at viewer, smile,",
+      "negative": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4),  multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,",
+      "steps": 30,
+      "sampler": "Euler a",
+      "cfg": 5,
+      "model": null,
+      "resources": []
+    },
+    {
+      "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/bb4cbd77-e6dc-4525-96b9-23114ce381bc/width=832/51163885.jpeg",
+      "civitaiUrl": "https://civitai.com/images/51163885",
+      "width": 832,
+      "height": 1248,
+      "type": "image",
+      "nsfwLevel": 1,
+      "seed": 1332683,
+      "positive": "masterpiece, best quality, BREAK,  1girl, solo, blonde hair, wavy hair, angel, angel wings, halo, smile, sitting, forest, white dress,smile, looking at viewer,  <lora:WindWaker_Style_IXL:1.0>,",
+      "negative": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4),  multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,",
+      "steps": 30,
+      "sampler": "DPM++ 2M Karras",
+      "cfg": 7,
+      "model": null,
+      "resources": [
+        {
+          "name": "WindWaker_Style_IXL",
+          "type": "lora",
+          "weight": 1
+        }
+      ]
+    }
+  ],
+  "raw": {
+    "metadata": {
+      "ss_output_name": "WindWaker_Style_IXL",
+      "ss_base_model_version": "sdxl_base_v1-0",
+      "ss_clip_skip": "1",
+      "ss_learning_rate": "0.0001",
+      "ss_network_dim": "32",
+      "ss_network_dropout": "None",
+      "ss_multires_noise_iterations": "None",
+      "ss_loss_type": "l2",
+      "modelspec.prediction_type": "epsilon",
+      "ss_datasets": "[{\"is_dreambooth\": true, \"batch_size_per_device\": 1, \"num_train_images\": 462, \"num_reg_images\": 0, \"resolution\": [768, 768], \"enable_bucket\": true, \"min_bucket_reso\": 256, \"max_bucket_reso\": 4096, \"tag_frequency\": {\"WindWaker_Style_IXL\": {\"multiple girls\": 4, \"blonde hair\": 68, \"brown hair\": 14, \"1boy\": 80, \"dress\": 21, \"3girls\": 2, \"bird\": 7, \"penguin\": 1, \"looking at viewer\": 24, \"smile\": 52, \"blue eyes\": 49, \"twintails\": 9, \"pointy ears\": 98, \"black eyes\": 12, \"parody\": 5, \"standing\": 44, \"pokemon (creature)\": 4, \"fire\": 2, \"molten rock\": 2, \"solo\": 109, \"no humans\": 21, \"crown\": 2, \"pillar\": 3, \"throne\": 1, \"stained glass\": 1, \"hat\": 49, \"closed eyes\": 7, \"upper body\": 11, \"male focus\": 65, \"instrument\": 5, \"green headwear\": 4, \"playing instrument\": 2, \"violin\": 1, \"1girl\": 64, \"full body\": 17, \"black skin\": 2, \"cape\": 8, \"from side\": 2, \"night\": 8, \"holding\": 29, \"mask\": 1, \"blue background\": 1, \"music\": 1, \"holding instrument\": 1, \"outdoors\": 38, \"sky\": 39, \"cloud\": 36, \"water\": 15, \"glowing\": 8, \"ocean\": 20, \"night sky\": 3, \"scenery\": 12, \"horizon\": 2, \"multiple boys\": 9, \"2boys\": 7, \"from behind\": 8, \"star (sky)\": 3, \"starry sky\": 2, \"barrel\": 1, \"day\": 33, \"blue sky\": 17, \"red eyes\": 28, \"weapon\": 58, \"horns\": 3, \"belt\": 18, \"sword\": 46, \"indoors\": 10, \"holding weapon\": 19, \"armor\": 3, \"holding sword\": 16, \"helmet\": 1, \"shoulder armor\": 1, \"glowing eyes\": 3, \"walking\": 2, \"pauldrons\": 1, \"shield\": 38, \"full armor\": 1, \"tree\": 20, \"windmill\": 1, \"watercraft\": 8, \"ship\": 2, \"boat\": 4, \"open mouth\": 19, \"shirt\": 13, \"teeth\": 10, \"sleeveless\": 8, \"arm up\": 1, \"blush stickers\": 2, \"grass\": 14, \"short hair\": 9, \"one eye closed\": 4, \"green hair\": 4, \"neckerchief\": 10, \"crossed arms\": 1, \"red neckerchief\": 1, \"tunic\": 13, \"holding shield\": 3, \"vest\": 2, \"master sword\": 6, \"desert\": 1, \"fantasy\": 1, \"building\": 6, \"window\": 1, \"shadow\": 8, \"sunlight\": 1, \"plant\": 5, \"stairs\": 3, \"shade\": 1, \"beach\": 2, \"landscape\": 1, \"sign\": 3, \"flag\": 1, \"ruins\": 1, \"dark skin\": 30, \"door\": 1, \"carpet\": 1, \":3\": 2, \"own hands together\": 1, \"beak\": 2, \"chain\": 2, \"triforce\": 12, \"left-handed\": 8, \"child\": 1, \"male child\": 1, \"battle\": 1, \"long hair\": 27, \"gloves\": 15, \"lying\": 4, \"parted lips\": 1, \"floating hair\": 1, \"on side\": 3, \"sleeping\": 4, \"tiara\": 3, \"pink dress\": 1, \"closed mouth\": 12, \"on back\": 1, \"white hair\": 1, \"facial hair\": 1, \"beard\": 1, \"serious\": 1, \"yellow eyes\": 7, \"red hair\": 30, \"colored skin\": 4, \"fangs\": 2, \"thick eyebrows\": 1, \"green skin\": 1, \"tusks\": 1, \"elbow gloves\": 1, \"jewelry\": 13, \"necklace\": 1, \"makeup\": 1, \";)\": 1, \"long sleeves\": 6, \"green eyes\": 5, \"braid\": 4, \"gun\": 1, \"aiming\": 2, \"flower\": 2, \"bracelet\": 6, \"dark-skinned female\": 9, \"blue dress\": 1, \"sandals\": 5, \"weapon on back\": 2, \"earrings\": 5, \"dark-skinned male\": 4, \"fairy\": 2, \"freckles\": 1, \"purple eyes\": 1, \"boots\": 11, \"chibi\": 14, \"cosplay\": 4, \"headband\": 1, \"furry\": 2, \"1other\": 4, \"gameplay mechanics\": 5, \"yellow fur\": 1, \"stadium\": 1, \"lucario\": 1, \"animal ears\": 4, \"furry male\": 1, \"short sleeves\": 7, \"scarf\": 4, \"expressionless\": 1, \"looking up\": 1, \"red shirt\": 3, \"meme\": 1, \"sailor moon redraw challenge (meme)\": 1, \"bandana\": 1, \"palm tree\": 3, \"fake screenshot\": 2, \"turban\": 2, \"health bar\": 2, \"knife\": 1, \"dagger\": 1, \"slit pupils\": 3, \"androgynous\": 2, \"colored sclera\": 2, \"blue scarf\": 1, \"animal hat\": 3, \"yellow sclera\": 2, \"cat hat\": 3, \"cat\": 1, \"cliff\": 1, \"cat ears\": 1, \"witch hat\": 1, \":<\": 1, \":d\": 5, \"white dress\": 2, \"empty eyes\": 3, \"yellow neckerchief\": 3, \"tabard\": 1, \"skirt\": 3, \"shovel\": 1, \"profile\": 3, \"cow\": 2, \"simple background\": 2, \"grey background\": 1, \"black background\": 1, \"brown footwear\": 3, \"staff\": 1, \"sunglasses\": 2, \"hawaiian shirt\": 1, \"polearm\": 2, \"spear\": 1, \"ponytail\": 1, \"pants\": 5, \"pirate\": 1, \"ahoge\": 4, \"blue shirt\": 4, \"t-shirt\": 6, \"waving\": 2, \"pikachu\": 1, \"sheath\": 3, \"facing away\": 1, \"flying\": 1, \"chicken\": 1, \"thighhighs\": 1, \"bow\": 1, \":t\": 1, \"food\": 1, \"fruit\": 1, \"bow (weapon)\": 1, \"arrow (projectile)\": 1, \"holding arrow\": 1, \"wand\": 1, \"hammer\": 1, \"rabbit ears\": 2, \"hair over one eye\": 4, \"grin\": 6, \"sharp teeth\": 5, \"red footwear\": 8, \"forehead jewel\": 1, \"blue hair\": 1, \"bodysuit\": 1, \"outstretched arm\": 1, \"purple hair\": 2, \"fusion\": 1, \"among us\": 1, \"jacket\": 1, \"spiked hair\": 1, \"pink flower\": 1, \"red gloves\": 2, \"white footwear\": 1, \"skull\": 13, \"evil smile\": 3, \"dark persona\": 1, \"grey footwear\": 1, \"grey skin\": 1, \"blood\": 1, \"arrow (symbol)\": 2, \"pantyhose\": 4, \"english text\": 1, \"torch\": 1, \"cave\": 1, \"dual wielding\": 1, \"blue skin\": 2, \"energy sword\": 1, \"fang\": 3, \"black dress\": 3, \"wristband\": 3, \"tentacle hair\": 3, \"octarian\": 3, \"octoling\": 3, \"octoling girl\": 3, \"twin braids\": 3, \"forehead\": 2, \"bare shoulders\": 2, \"black footwear\": 2, \"sleeveless dress\": 2, \"bangs\": 3, \"female child\": 2, \"surcoat\": 1, \"animal\": 1, \"monster\": 1, \"creature\": 1, \"black gloves\": 1, \"green background\": 1, \"yordle\": 1, \"rain\": 1, \"dark\": 1, \"heads-up display\": 1, \"diluc (genshin impact)\": 1, \"2girls\": 2, \"hair bun\": 1, \"double bun\": 1, \"slime (creature)\": 1, \"grey hair\": 1, \"broom\": 2, \"broom riding\": 1, \"black hair\": 5, \"hairband\": 5, \"red dress\": 5, \"big hair\": 4, \"holding broom\": 1, \"orange hairband\": 4, \"shoes\": 1, \"trident\": 1, \"orange neckerchief\": 1, \"book\": 1, \"fence\": 1, \"wall\": 1, \"stone wall\": 1, \"camera\": 1, \"holding camera\": 1, \"video camera\": 1}}, \"bucket_info\": {\"buckets\": {\"0\": {\"resolution\": [512, 1088], \"count\": 9}, \"1\": {\"resolution\": [576, 960], \"count\": 54}, \"2\": {\"resolution\": [576, 1024], \"count\": 18}, \"3\": {\"resolution\": [640, 896], \"count\": 60}, \"4\": {\"resolution\": [704, 832], \"count\": 30}, \"5\": {\"resolution\": [768, 768], \"count\": 78}, \"6\": {\"resolution\": [832, 704], \"count\": 48}, \"7\": {\"resolution\": [896, 640], \"count\": 54}, \"8\": {\"resolution\": [960, 576], \"count\": 24}, \"9\": {\"resolution\": [1024, 576], \"count\": 78}, \"10\": {\"resolution\": [1088, 512], \"count\": 9}}, \"mean_img_ar_error\": 0.031399245684279216}, \"subsets\": [{\"img_count\": 154, \"num_repeats\": 3, \"color_aug\": false, \"flip_aug\": false, \"random_crop\": false, \"shuffle_caption\": true, \"keep_tokens\": 1, \"keep_tokens_separator\": \"\", \"secondary_separator\": null, \"enable_wildcard\": false, \"caption_prefix\": null, \"caption_suffix\": null, \"image_dir\": \"WindWaker_Style_IXL\", \"class_tokens\": null, \"is_reg\": false}]}]",
+      "modelspec.date": "2025-01-12T04:43:45",
+      "ss_seed": "42",
+      "ss_network_module": "networks.lora",
+      "modelspec.sai_model_spec": "1.0.0",
+      "ss_mixed_precision": "fp16",
+      "sshs_model_hash": "cb3949362934ce652a2d242c2c4ebf26f720f1ec05b8b4a030b268548065080c",
+      "modelspec.title": "WindWaker_Style_IXL",
+      "ss_lowram": "False",
+      "ss_training_comment": "Lora created by https://civitai.com/user/CitronLegacy",
+      "ss_cache_latents": "True",
+      "ss_debiased_estimation": "False",
+      "ss_steps": "4620",
+      "ss_full_fp16": "False",
+      "ss_multires_noise_discount": "0.3",
+      "ss_min_snr_gamma": "None",
+      "modelspec.architecture": "stable-diffusion-xl-v1-base/lora",
+      "ss_caption_dropout_rate": "0.0",
+      "ss_optimizer": "bitsandbytes.optim.adamw.AdamW8bit(weight_decay=0.1,betas=[0.9, 0.99])",
+      "ss_training_started_at": "1736647818.3101928",
+      "ss_session_id": "148266488",
+      "ss_network_alpha": "32",
+      "ss_tag_frequency": {
+        "WindWaker_Style_IXL": {
+          "multiple girls": 4,
+          "blonde hair": 68,
+          "brown hair": 14,
+          "1boy": 80,
+          "dress": 21,
+          "3girls": 2,
+          "bird": 7,
+          "penguin": 1,
+          "looking at viewer": 24,
+          "smile": 52,
+          "blue eyes": 49,
+          "twintails": 9,
+          "pointy ears": 98,
+          "black eyes": 12,
+          "parody": 5,
+          "standing": 44,
+          "pokemon (creature)": 4,
+          "fire": 2,
+          "molten rock": 2,
+          "solo": 109,
+          "no humans": 21,
+          "crown": 2,
+          "pillar": 3,
+          "throne": 1,
+          "stained glass": 1,
+          "hat": 49,
+          "closed eyes": 7,
+          "upper body": 11,
+          "male focus": 65,
+          "instrument": 5,
+          "green headwear": 4,
+          "playing instrument": 2,
+          "violin": 1,
+          "1girl": 64,
+          "full body": 17,
+          "black skin": 2,
+          "cape": 8,
+          "from side": 2,
+          "night": 8,
+          "holding": 29,
+          "mask": 1,
+          "blue background": 1,
+          "music": 1,
+          "holding instrument": 1,
+          "outdoors": 38,
+          "sky": 39,
+          "cloud": 36,
+          "water": 15,
+          "glowing": 8,
+          "ocean": 20,
+          "night sky": 3,
+          "scenery": 12,
+          "horizon": 2,
+          "multiple boys": 9,
+          "2boys": 7,
+          "from behind": 8,
+          "star (sky)": 3,
+          "starry sky": 2,
+          "barrel": 1,
+          "day": 33,
+          "blue sky": 17,
+          "red eyes": 28,
+          "weapon": 58,
+          "horns": 3,
+          "belt": 18,
+          "sword": 46,
+          "indoors": 10,
+          "holding weapon": 19,
+          "armor": 3,
+          "holding sword": 16,
+          "helmet": 1,
+          "shoulder armor": 1,
+          "glowing eyes": 3,
+          "walking": 2,
+          "pauldrons": 1,
+          "shield": 38,
+          "full armor": 1,
+          "tree": 20,
+          "windmill": 1,
+          "watercraft": 8,
+          "ship": 2,
+          "boat": 4,
+          "open mouth": 19,
+          "shirt": 13,
+          "teeth": 10,
+          "sleeveless": 8,
+          "arm up": 1,
+          "blush stickers": 2,
+          "grass": 14,
+          "short hair": 9,
+          "one eye closed": 4,
+          "green hair": 4,
+          "neckerchief": 10,
+          "crossed arms": 1,
+          "red neckerchief": 1,
+          "tunic": 13,
+          "holding shield": 3,
+          "vest": 2,
+          "master sword": 6,
+          "desert": 1,
+          "fantasy": 1,
+          "building": 6,
+          "window": 1,
+          "shadow": 8,
+          "sunlight": 1,
+          "plant": 5,
+          "stairs": 3,
+          "shade": 1,
+          "beach": 2,
+          "landscape": 1,
+          "sign": 3,
+          "flag": 1,
+          "ruins": 1,
+          "dark skin": 30,
+          "door": 1,
+          "carpet": 1,
+          ":3": 2,
+          "own hands together": 1,
+          "beak": 2,
+          "chain": 2,
+          "triforce": 12,
+          "left-handed": 8,
+          "child": 1,
+          "male child": 1,
+          "battle": 1,
+          "long hair": 27,
+          "gloves": 15,
+          "lying": 4,
+          "parted lips": 1,
+          "floating hair": 1,
+          "on side": 3,
+          "sleeping": 4,
+          "tiara": 3,
+          "pink dress": 1,
+          "closed mouth": 12,
+          "on back": 1,
+          "white hair": 1,
+          "facial hair": 1,
+          "beard": 1,
+          "serious": 1,
+          "yellow eyes": 7,
+          "red hair": 30,
+          "colored skin": 4,
+          "fangs": 2,
+          "thick eyebrows": 1,
+          "green skin": 1,
+          "tusks": 1,
+          "elbow gloves": 1,
+          "jewelry": 13,
+          "necklace": 1,
+          "makeup": 1,
+          ";)": 1,
+          "long sleeves": 6,
+          "green eyes": 5,
+          "braid": 4,
+          "gun": 1,
+          "aiming": 2,
+          "flower": 2,
+          "bracelet": 6,
+          "dark-skinned female": 9,
+          "blue dress": 1,
+          "sandals": 5,
+          "weapon on back": 2,
+          "earrings": 5,
+          "dark-skinned male": 4,
+          "fairy": 2,
+          "freckles": 1,
+          "purple eyes": 1,
+          "boots": 11,
+          "chibi": 14,
+          "cosplay": 4,
+          "headband": 1,
+          "furry": 2,
+          "1other": 4,
+          "gameplay mechanics": 5,
+          "yellow fur": 1,
+          "stadium": 1,
+          "lucario": 1,
+          "animal ears": 4,
+          "furry male": 1,
+          "short sleeves": 7,
+          "scarf": 4,
+          "expressionless": 1,
+          "looking up": 1,
+          "red shirt": 3,
+          "meme": 1,
+          "sailor moon redraw challenge (meme)": 1,
+          "bandana": 1,
+          "palm tree": 3,
+          "fake screenshot": 2,
+          "turban": 2,
+          "health bar": 2,
+          "knife": 1,
+          "dagger": 1,
+          "slit pupils": 3,
+          "androgynous": 2,
+          "colored sclera": 2,
+          "blue scarf": 1,
+          "animal hat": 3,
+          "yellow sclera": 2,
+          "cat hat": 3,
+          "cat": 1,
+          "cliff": 1,
+          "cat ears": 1,
+          "witch hat": 1,
+          ":<": 1,
+          ":d": 5,
+          "white dress": 2,
+          "empty eyes": 3,
+          "yellow neckerchief": 3,
+          "tabard": 1,
+          "skirt": 3,
+          "shovel": 1,
+          "profile": 3,
+          "cow": 2,
+          "simple background": 2,
+          "grey background": 1,
+          "black background": 1,
+          "brown footwear": 3,
+          "staff": 1,
+          "sunglasses": 2,
+          "hawaiian shirt": 1,
+          "polearm": 2,
+          "spear": 1,
+          "ponytail": 1,
+          "pants": 5,
+          "pirate": 1,
+          "ahoge": 4,
+          "blue shirt": 4,
+          "t-shirt": 6,
+          "waving": 2,
+          "pikachu": 1,
+          "sheath": 3,
+          "facing away": 1,
+          "flying": 1,
+          "chicken": 1,
+          "thighhighs": 1,
+          "bow": 1,
+          ":t": 1,
+          "food": 1,
+          "fruit": 1,
+          "bow (weapon)": 1,
+          "arrow (projectile)": 1,
+          "holding arrow": 1,
+          "wand": 1,
+          "hammer": 1,
+          "rabbit ears": 2,
+          "hair over one eye": 4,
+          "grin": 6,
+          "sharp teeth": 5,
+          "red footwear": 8,
+          "forehead jewel": 1,
+          "blue hair": 1,
+          "bodysuit": 1,
+          "outstretched arm": 1,
+          "purple hair": 2,
+          "fusion": 1,
+          "among us": 1,
+          "jacket": 1,
+          "spiked hair": 1,
+          "pink flower": 1,
+          "red gloves": 2,
+          "white footwear": 1,
+          "skull": 13,
+          "evil smile": 3,
+          "dark persona": 1,
+          "grey footwear": 1,
+          "grey skin": 1,
+          "blood": 1,
+          "arrow (symbol)": 2,
+          "pantyhose": 4,
+          "english text": 1,
+          "torch": 1,
+          "cave": 1,
+          "dual wielding": 1,
+          "blue skin": 2,
+          "energy sword": 1,
+          "fang": 3,
+          "black dress": 3,
+          "wristband": 3,
+          "tentacle hair": 3,
+          "octarian": 3,
+          "octoling": 3,
+          "octoling girl": 3,
+          "twin braids": 3,
+          "forehead": 2,
+          "bare shoulders": 2,
+          "black footwear": 2,
+          "sleeveless dress": 2,
+          "bangs": 3,
+          "female child": 2,
+          "surcoat": 1,
+          "animal": 1,
+          "monster": 1,
+          "creature": 1,
+          "black gloves": 1,
+          "green background": 1,
+          "yordle": 1,
+          "rain": 1,
+          "dark": 1,
+          "heads-up display": 1,
+          "diluc (genshin impact)": 1,
+          "2girls": 2,
+          "hair bun": 1,
+          "double bun": 1,
+          "slime (creature)": 1,
+          "grey hair": 1,
+          "broom": 2,
+          "broom riding": 1,
+          "black hair": 5,
+          "hairband": 5,
+          "red dress": 5,
+          "big hair": 4,
+          "holding broom": 1,
+          "orange hairband": 4,
+          "shoes": 1,
+          "trident": 1,
+          "orange neckerchief": 1,
+          "book": 1,
+          "fence": 1,
+          "wall": 1,
+          "stone wall": 1,
+          "camera": 1,
+          "holding camera": 1,
+          "video camera": 1
+        }
+      },
+      "modelspec.encoder_layer": "1",
+      "sshs_legacy_hash": "bf29079a",
+      "ss_epoch": "10",
+      "ss_sd_model_name": "OnomaAIResearch/Illustrious-xl-early-release-v0",
+      "ss_num_train_images": "462",
+      "ss_num_epochs": "10",
+      "ss_caption_tag_dropout_rate": "0.0",
+      "modelspec.implementation": "https://github.com/Stability-AI/generative-models",
+      "ss_max_train_steps": "4620",
+      "ss_adaptive_noise_scale": "None",
+      "ss_huber_schedule": "snr",
+      "ss_lr_scheduler": "cosine_with_restarts",
+      "modelspec.resolution": "1024x1024",
+      "ss_zero_terminal_snr": "False",
+      "ss_ip_noise_gamma": "None",
+      "ss_caption_dropout_every_n_epochs": "0",
+      "ss_sd_scripts_commit_hash": "(unknown)",
+      "ss_text_encoder_lr": "0.0001",
+      "ss_gradient_checkpointing": "False",
+      "ss_lr_warmup_steps": "78",
+      "ss_max_token_length": "75",
+      "ss_dataset_dirs": {
+        "WindWaker_Style_IXL": {
+          "n_repeats": 3,
+          "img_count": 154
+        }
+      },
+      "ss_max_grad_norm": "1",
+      "ss_face_crop_aug_range": "None",
+      "ss_gradient_accumulation_steps": "1",
+      "ss_num_reg_images": "0",
+      "ss_v2": "False",
+      "ss_num_batches_per_epoch": "462",
+      "ss_noise_offset": "0.03",
+      "ss_prior_loss_weight": "1",
+      "ss_scale_weight_norms": "None",
+      "ss_noise_offset_random_strength": "False",
+      "ss_ip_noise_gamma_random_strength": "False",
+      "ss_huber_c": "0.1",
+      "ss_training_finished_at": "1736657025.676514",
+      "ss_unet_lr": "0.0001",
+      "_sha256": "6e0655e27e5635e91d6f9edbc74198ab13a8bb1c4c95fd2ed0e361d422199711"
+    },
+    "civitai": {
+      "id": 1273708,
+      "modelId": 208265,
+      "name": "Illustrious",
+      "createdAt": "2025-01-12T14:32:52.185Z",
+      "updatedAt": "2025-03-22T22:32:54.409Z",
+      "status": "Published",
+      "publishedAt": "2025-01-12T15:25:14.123Z",
+      "trainedWords": [
+        "3D, ",
+        "Chibi, "
+      ],
+      "trainingStatus": null,
+      "trainingDetails": null,
+      "baseModel": "Illustrious",
+      "baseModelType": null,
+      "earlyAccessEndsAt": null,
+      "earlyAccessConfig": null,
+      "description": "<p>3d, </p><p>&lt;lora:WindWaker_Style_IXL:1.0&gt;,</p>",
+      "uploadType": "Created",
+      "usageControl": "Download",
+      "air": "urn:air:sdxl:lora:civitai:208265@1273708",
+      "stats": {
+        "downloadCount": 503,
+        "ratingCount": 0,
+        "rating": 0,
+        "thumbsUpCount": 104
+      },
+      "model": {
+        "name": "Style of the Winds (The Legend Of Zelda: Wind Waker) [Illustrious & NoobAI & SD1.5]",
+        "type": "LORA",
+        "nsfw": false,
+        "poi": false
+      },
+      "files": [
+        {
+          "id": 1178599,
+          "sizeKB": 223108.06640625,
+          "name": "WindWaker_Style_IXL.safetensors",
+          "type": "Model",
+          "pickleScanResult": "Success",
+          "pickleScanMessage": "No Pickle imports",
+          "virusScanResult": "Success",
+          "virusScanMessage": null,
+          "scannedAt": "2025-01-12T14:41:33.135Z",
+          "metadata": {
+            "format": "SafeTensor",
+            "size": null,
+            "fp": null
+          },
+          "hashes": {
+            "AutoV1": "87417B67",
+            "AutoV2": "6E0655E27E",
+            "SHA256": "6E0655E27E5635E91D6F9EDBC74198AB13A8BB1C4C95FD2ED0E361D422199711",
+            "CRC32": "7B77E249",
+            "BLAKE3": "AC1EABA893D66C855957B5D1FF036126D872662BFE00BD5D37508FABF8953EC9",
+            "AutoV3": "CB3949362934"
+          },
+          "primary": true,
+          "downloadUrl": "https://civitai.com/api/download/models/1273708"
+        }
+      ],
+      "images": [
+        {
+          "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/b9a71a85-c130-466c-9512-3c9e4b94daf2/width=832/51277231.jpeg",
+          "nsfwLevel": 1,
+          "width": 832,
+          "height": 1216,
+          "hash": "UOJj;f9w~VWm4.M{9cWV9HIq9bs,E1~Bw|t6",
+          "type": "image",
+          "metadata": {
+            "hash": "UOJj;f9w~VWm4.M{9cWV9HIq9bs,E1~Bw|t6",
+            "size": 147226,
+            "width": 832,
+            "height": 1216
+          },
+          "meta": {
+            "Size": "832x1216",
+            "seed": 98679726,
+            "extra": {
+              "remixOfId": 51163887
+            },
+            "steps": 30,
+            "prompt": "masterpiece, best quality, solo,   1girl, solo, blue eyes, blonde hair,  hat, weapon, pointy ears, sword, chibi, 3d, instrument, shield   , smile, looking at viewer,",
+            "sampler": "Euler a",
+            "cfgScale": 7,
+            "clipSkip": 2,
+            "resources": [],
+            "Created Date": "2025-01-13T0256:00.7778409Z",
+            "negativePrompt": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4),  multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,",
+            "civitaiResources": [
+              {
+                "type": "checkpoint",
+                "modelVersionId": 1183765,
+                "modelVersionName": "v8.0"
+              },
+              {
+                "type": "lora",
+                "weight": 1,
+                "modelVersionId": 1273708,
+                "modelVersionName": "Illustrious"
+              }
+            ]
+          },
+          "availability": "Public",
+          "hasMeta": true,
+          "hasPositivePrompt": true,
+          "onSite": true,
+          "remixOfId": 51163887
+        },
+        {
+          "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/7d335619-6459-4f74-b014-90a41f971889/width=832/51163882.jpeg",
+          "nsfwLevel": 1,
+          "width": 832,
+          "height": 1216,
+          "hash": "UFJQco01B#-V~D01.RDlrY%0%esX59aLRPM|",
+          "type": "image",
+          "metadata": {
+            "hash": "UFJQco01B#-V~D01.RDlrY%0%esX59aLRPM|",
+            "size": 1259841,
+            "width": 832,
+            "height": 1216
+          },
+          "meta": {
+            "Size": "832x1216",
+            "seed": 252630410834070,
+            "steps": 30,
+            "hashes": {
+              "model": "63e5c28bf8",
+              "LORA:WindWaker_Style_IXL": "6e0655e27e"
+            },
+            "prompt": "masterpiece, best quality, solo,   1girl, solo, pink hair, short twintails, white dress, cowboy shot, 3d,    ,  <lora:WindWaker_Style_IXL:1.0>,",
+            "sampler": "DPM++ 2M Karras",
+            "cfgScale": 7,
+            "resources": [
+              {
+                "name": "WindWaker_Style_IXL",
+                "type": "lora",
+                "weight": 1
+              }
+            ],
+            "Model hash": "63e5c28bf8",
+            "negativePrompt": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4),  multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,",
+            "waiNSFWIllustrious_v80 Version": "ComfyUI"
+          },
+          "availability": "Public",
+          "hasMeta": true,
+          "hasPositivePrompt": true,
+          "onSite": false,
+          "remixOfId": null
+        },
+        {
+          "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/dec826b5-fd01-406b-b881-b9fe5a6abbde/width=832/51163886.jpeg",
+          "nsfwLevel": 1,
+          "width": 832,
+          "height": 1216,
+          "hash": "UdKA[g_J.6tktjM|sptQX7%1wNWEV[t6fSxt",
+          "type": "image",
+          "metadata": {
+            "hash": "UdKA[g_J.6tktjM|sptQX7%1wNWEV[t6fSxt",
+            "size": 1120952,
+            "width": 832,
+            "height": 1216
+          },
+          "meta": {
+            "Size": "832x1216",
+            "seed": 272033102095518,
+            "steps": 30,
+            "hashes": {
+              "model": "63e5c28bf8",
+              "LORA:WindWaker_Style_IXL": "6e0655e27e"
+            },
+            "prompt": "masterpiece, best quality, solo,   1girl, solo, long hair, smile, blue eyes, gloves, jewelry, one eye closed, green hair, pointy ears, necklace, makeup, tiara, ;)  ,  <lora:WindWaker_Style_IXL:1.0>,",
+            "sampler": "DPM++ 2M Karras",
+            "cfgScale": 7,
+            "resources": [
+              {
+                "name": "WindWaker_Style_IXL",
+                "type": "lora",
+                "weight": 1
+              }
+            ],
+            "Model hash": "63e5c28bf8",
+            "negativePrompt": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4),  multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,",
+            "waiNSFWIllustrious_v80 Version": "ComfyUI"
+          },
+          "availability": "Public",
+          "hasMeta": true,
+          "hasPositivePrompt": true,
+          "onSite": false,
+          "remixOfId": null
+        },
+        {
+          "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/9ada0404-1402-482d-96aa-183da60661aa/width=832/51163887.jpeg",
+          "nsfwLevel": 1,
+          "width": 832,
+          "height": 1216,
+          "hash": "UNLWkPR._M-n00s.9coz9ZD+0LxWs*?G-:IV",
+          "type": "image",
+          "metadata": {
+            "hash": "UNLWkPR._M-n00s.9coz9ZD+0LxWs*?G-:IV",
+            "size": 1110897,
+            "width": 832,
+            "height": 1216
+          },
+          "meta": {
+            "Size": "832x1216",
+            "seed": 991930792475181,
+            "steps": 30,
+            "hashes": {
+              "model": "63e5c28bf8",
+              "LORA:WindWaker_Style_IXL": "6e0655e27e"
+            },
+            "prompt": "masterpiece, best quality, solo,   1girl, solo, blue eyes, blonde hair, 1boy, hat, weapon, male focus, pointy ears, sword, dark skin, chibi, instrument, shield   ,  <lora:WindWaker_Style_IXL:1.0>,",
+            "sampler": "DPM++ 2M Karras",
+            "cfgScale": 7,
+            "resources": [
+              {
+                "name": "WindWaker_Style_IXL",
+                "type": "lora",
+                "weight": 1
+              }
+            ],
+            "Model hash": "63e5c28bf8",
+            "negativePrompt": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4),  multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,",
+            "waiNSFWIllustrious_v80 Version": "ComfyUI"
+          },
+          "availability": "Public",
+          "hasMeta": true,
+          "hasPositivePrompt": true,
+          "onSite": false,
+          "remixOfId": null
+        },
+        {
+          "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/50c31f3d-3c04-4bb7-8609-34956da40d61/width=832/51170537.jpeg",
+          "nsfwLevel": 1,
+          "width": 832,
+          "height": 1216,
+          "hash": "U27^6;Iq03=_jwxYI.EQ04so~SEg0yWF%3xV",
+          "type": "image",
+          "metadata": {
+            "hash": "U27^6;Iq03=_jwxYI.EQ04so~SEg0yWF%3xV",
+            "size": 139579,
+            "width": 832,
+            "height": 1216
+          },
+          "meta": {
+            "Size": "832x1216",
+            "seed": 31337,
+            "steps": 30,
+            "prompt": "masterpiece, best quality, solo,   1girl, solo, chibi, 3d, \nzzMajora, spikes, yellow sclera, solo, 1boy, horns, glowing eyes,\nlooking at viewer, ,",
+            "sampler": "Euler a",
+            "cfgScale": 7,
+            "clipSkip": 2,
+            "resources": [],
+            "Created Date": "2025-01-12T1544:13.9453863Z",
+            "negativePrompt": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4),  multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,",
+            "civitaiResources": [
+              {
+                "type": "checkpoint",
+                "modelVersionId": 1183765,
+                "modelVersionName": "v8.0"
+              },
+              {
+                "type": "lora",
+                "weight": 0.8,
+                "modelVersionId": 1193395,
+                "modelVersionName": "Illustrious"
+              },
+              {
+                "type": "lora",
+                "weight": 1,
+                "modelVersionId": 1273708,
+                "modelVersionName": "Illustrious"
+              }
+            ]
+          },
+          "availability": "Public",
+          "hasMeta": true,
+          "hasPositivePrompt": true,
+          "onSite": true,
+          "remixOfId": null
+        },
+        {
+          "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/833e0c0d-8b3a-4393-8c44-b80cea0e6975/width=832/51169122.jpeg",
+          "nsfwLevel": 1,
+          "width": 832,
+          "height": 1216,
+          "hash": "UUMjBQxc%hM_~osEOZWA%fD*R,tQ%MV?nOS4",
+          "type": "image",
+          "metadata": {
+            "hash": "UUMjBQxc%hM_~osEOZWA%fD*R,tQ%MV?nOS4",
+            "size": 198671,
+            "width": 832,
+            "height": 1216
+          },
+          "meta": {
+            "Size": "832x1216",
+            "seed": 1729355716,
+            "extra": {
+              "remixOfId": 51163887
+            },
+            "steps": 30,
+            "prompt": "masterpiece, best quality, solo,   1girl, solo, blue eyes, blonde hair,  hat, weapon,  pointy ears, sword, chibi,  shield   ,  dynamic pose, sword slash, motion lines,",
+            "sampler": "DPM++ 2M Karras",
+            "cfgScale": 7,
+            "clipSkip": 2,
+            "resources": [],
+            "Created Date": "2025-01-12T1529:44.8655317Z",
+            "negativePrompt": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4),  multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,",
+            "civitaiResources": [
+              {
+                "type": "checkpoint",
+                "modelVersionId": 1183765,
+                "modelVersionName": "v8.0"
+              },
+              {
+                "type": "lora",
+                "weight": 1,
+                "modelVersionId": 1273708,
+                "modelVersionName": "Illustrious"
+              }
+            ]
+          },
+          "availability": "Public",
+          "hasMeta": true,
+          "hasPositivePrompt": true,
+          "onSite": true,
+          "remixOfId": 51163887
+        },
+        {
+          "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/06154f27-96c8-487d-96f8-c627866842ad/width=832/51169120.jpeg",
+          "nsfwLevel": 1,
+          "width": 832,
+          "height": 1216,
+          "hash": "UGLf8XtR00~UCVxbnXtOIDWTjKRn9hxZx@R-",
+          "type": "image",
+          "metadata": {
+            "hash": "UGLf8XtR00~UCVxbnXtOIDWTjKRn9hxZx@R-",
+            "size": 166337,
+            "width": 832,
+            "height": 1216
+          },
+          "meta": {
+            "Size": "832x1216",
+            "seed": 952291488,
+            "extra": {
+              "remixOfId": 51163887
+            },
+            "steps": 30,
+            "prompt": "masterpiece, best quality, solo,   1girl, solo, 3d, chibi,\n Princess Peach, pink dress, blonde hair, blue eyes, long hair, crown, gem, gloves, puffy sleeves, short sleeves, white gloves, solo, smiling, looking at viewer, cowboy shot,",
+            "sampler": "DPM++ 2M Karras",
+            "cfgScale": 7,
+            "clipSkip": 2,
+            "resources": [],
+            "Created Date": "2025-01-12T1533:26.5487359Z",
+            "negativePrompt": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4),  multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,",
+            "civitaiResources": [
+              {
+                "type": "checkpoint",
+                "modelVersionId": 1183765,
+                "modelVersionName": "v8.0"
+              },
+              {
+                "type": "lora",
+                "weight": 1,
+                "modelVersionId": 1273708,
+                "modelVersionName": "Illustrious"
+              }
+            ]
+          },
+          "availability": "Public",
+          "hasMeta": true,
+          "hasPositivePrompt": true,
+          "onSite": true,
+          "remixOfId": 51163887
+        },
+        {
+          "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/0fe7b1a9-914f-47aa-906b-83e572f767a3/width=832/51163876.jpeg",
+          "nsfwLevel": 1,
+          "width": 832,
+          "height": 1216,
+          "hash": "UKK,T#}?02OF00RQY5S2E+N^fMWWWFM|s.oc",
+          "type": "image",
+          "metadata": {
+            "hash": "UKK,T#}?02OF00RQY5S2E+N^fMWWWFM|s.oc",
+            "size": 852319,
+            "width": 832,
+            "height": 1216
+          },
+          "meta": {
+            "Size": "832x1216",
+            "seed": 31337,
+            "steps": 30,
+            "hashes": {
+              "model": "63e5c28bf8",
+              "LORA:WindWaker_Style_IXL": "6e0655e27e"
+            },
+            "prompt": "masterpiece, best quality, solo,   1girl, solo, long hair, looking at viewer, smile, open mouth, blue eyes, dress, jewelry, red hair, pointy ears, white dress, bracelet, neckerchief, empty eyes, yellow neckerchief, triforce,    ,  <lora:WindWaker_Style_IXL:1.0>,",
+            "sampler": "DPM++ 2M Karras",
+            "cfgScale": 7,
+            "resources": [
+              {
+                "name": "WindWaker_Style_IXL",
+                "type": "lora",
+                "weight": 1
+              }
+            ],
+            "Model hash": "63e5c28bf8",
+            "negativePrompt": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4),  multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,",
+            "waiNSFWIllustrious_v80 Version": "ComfyUI"
+          },
+          "availability": "Public",
+          "hasMeta": true,
+          "hasPositivePrompt": true,
+          "onSite": false,
+          "remixOfId": null
+        },
+        {
+          "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/3147ea21-bc7a-4a84-9d77-93f62c34e274/width=832/51169254.jpeg",
+          "nsfwLevel": 1,
+          "width": 832,
+          "height": 1216,
+          "hash": "UBEoAB0L0J~p00?HA1D%b|E1-NbwJB%2?Fsl",
+          "type": "image",
+          "metadata": {
+            "hash": "UBEoAB0L0J~p00?HA1D%b|E1-NbwJB%2?Fsl",
+            "size": 146909,
+            "width": 832,
+            "height": 1216
+          },
+          "meta": {
+            "Size": "832x1216",
+            "seed": 438227750,
+            "extra": {
+              "remixOfId": 51163887
+            },
+            "steps": 30,
+            "prompt": "masterpiece, best quality, solo,   1girl, solo, chibi, 3d, \nzzHilda, red eyes, purple hair, long hair, pointy ears,  tiara, white gloves, dress, elbow gloves, jewelry, makeup, earrings, purple tabard, triforce, shoulder armor, tiara, \nlooking at viewer, smile,",
+            "sampler": "Euler a",
+            "cfgScale": 5,
+            "clipSkip": 2,
+            "resources": [],
+            "Created Date": "2025-01-12T1534:34.0959271Z",
+            "negativePrompt": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4),  multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,",
+            "civitaiResources": [
+              {
+                "type": "checkpoint",
+                "modelVersionId": 1183765,
+                "modelVersionName": "v8.0"
+              },
+              {
+                "type": "lora",
+                "weight": 0.7,
+                "modelVersionId": 1221721,
+                "modelVersionName": "v1.0"
+              },
+              {
+                "type": "lora",
+                "weight": 1,
+                "modelVersionId": 1273708,
+                "modelVersionName": "Illustrious"
+              }
+            ]
+          },
+          "availability": "Public",
+          "hasMeta": true,
+          "hasPositivePrompt": true,
+          "onSite": true,
+          "remixOfId": 51163887
+        },
+        {
+          "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/bb4cbd77-e6dc-4525-96b9-23114ce381bc/width=832/51163885.jpeg",
+          "nsfwLevel": 1,
+          "width": 832,
+          "height": 1248,
+          "hash": "U8HC77?900Iw1bIqm_tQu6Rp*{tK0?xV}NtQ",
+          "type": "image",
+          "metadata": {
+            "hash": "U8HC77?900Iw1bIqm_tQu6Rp*{tK0?xV}NtQ",
+            "size": 1154943,
+            "width": 832,
+            "height": 1248
+          },
+          "meta": {
+            "Size": "832x1248",
+            "seed": 1332683,
+            "steps": 30,
+            "hashes": {
+              "model": "04ba0dfcc1",
+              "LORA:WindWaker_Style_IXL": "6e0655e27e"
+            },
+            "prompt": "masterpiece, best quality, BREAK,  1girl, solo, blonde hair, wavy hair, angel, angel wings, halo, smile, sitting, forest, white dress,smile, looking at viewer,  <lora:WindWaker_Style_IXL:1.0>,",
+            "sampler": "DPM++ 2M Karras",
+            "cfgScale": 7,
+            "resources": [
+              {
+                "name": "WindWaker_Style_IXL",
+                "type": "lora",
+                "weight": 1
+              }
+            ],
+            "Model hash": "04ba0dfcc1",
+            "negativePrompt": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4),  multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,",
+            "waiNSFWIllustrious_v70 Version": "ComfyUI"
+          },
+          "availability": "Public",
+          "hasMeta": true,
+          "hasPositivePrompt": true,
+          "onSite": false,
+          "remixOfId": null
+        }
+      ],
+      "downloadUrl": "https://civitai.com/api/download/models/1273708",
+      "_sha256": "6e0655e27e5635e91d6f9edbc74198ab13a8bb1c4c95fd2ed0e361d422199711",
+      "_civitai_api": "https://civitai.com/api/v1/model-versions/by-hash/6e0655e27e5635e91d6f9edbc74198ab13a8bb1c4c95fd2ed0e361d422199711"
+    }
+  },
+  "baseModelFile": "OnomaAIResearch/Illustrious-xl-early-release-v0",
+  "trainedWords": [
+    {
+      "word": "3D",
+      "civitai": true
+    },
+    {
+      "word": "Chibi",
+      "civitai": true
+    },
+    {
+      "word": "solo",
+      "count": 109,
+      "metadata": true
+    },
+    {
+      "word": "pointy ears",
+      "count": 98,
+      "metadata": true
+    },
+    {
+      "word": "1boy",
+      "count": 80,
+      "metadata": true
+    },
+    {
+      "word": "blonde hair",
+      "count": 68,
+      "metadata": true
+    },
+    {
+      "word": "male focus",
+      "count": 65,
+      "metadata": true
+    },
+    {
+      "word": "1girl",
+      "count": 64,
+      "metadata": true
+    },
+    {
+      "word": "weapon",
+      "count": 58,
+      "metadata": true
+    },
+    {
+      "word": "smile",
+      "count": 52,
+      "metadata": true
+    },
+    {
+      "word": "blue eyes",
+      "count": 49,
+      "metadata": true
+    },
+    {
+      "word": "hat",
+      "count": 49,
+      "metadata": true
+    },
+    {
+      "word": "sword",
+      "count": 46,
+      "metadata": true
+    },
+    {
+      "word": "standing",
+      "count": 44,
+      "metadata": true
+    },
+    {
+      "word": "sky",
+      "count": 39,
+      "metadata": true
+    },
+    {
+      "word": "outdoors",
+      "count": 38,
+      "metadata": true
+    },
+    {
+      "word": "shield",
+      "count": 38,
+      "metadata": true
+    },
+    {
+      "word": "cloud",
+      "count": 36,
+      "metadata": true
+    },
+    {
+      "word": "day",
+      "count": 33,
+      "metadata": true
+    },
+    {
+      "word": "dark skin",
+      "count": 30,
+      "metadata": true
+    },
+    {
+      "word": "red hair",
+      "count": 30,
+      "metadata": true
+    },
+    {
+      "word": "holding",
+      "count": 29,
+      "metadata": true
+    },
+    {
+      "word": "red eyes",
+      "count": 28,
+      "metadata": true
+    },
+    {
+      "word": "long hair",
+      "count": 27,
+      "metadata": true
+    },
+    {
+      "word": "looking at viewer",
+      "count": 24,
+      "metadata": true
+    },
+    {
+      "word": "dress",
+      "count": 21,
+      "metadata": true
+    },
+    {
+      "word": "no humans",
+      "count": 21,
+      "metadata": true
+    },
+    {
+      "word": "ocean",
+      "count": 20,
+      "metadata": true
+    },
+    {
+      "word": "tree",
+      "count": 20,
+      "metadata": true
+    },
+    {
+      "word": "holding weapon",
+      "count": 19,
+      "metadata": true
+    },
+    {
+      "word": "open mouth",
+      "count": 19,
+      "metadata": true
+    },
+    {
+      "word": "belt",
+      "count": 18,
+      "metadata": true
+    },
+    {
+      "word": "full body",
+      "count": 17,
+      "metadata": true
+    },
+    {
+      "word": "blue sky",
+      "count": 17,
+      "metadata": true
+    },
+    {
+      "word": "holding sword",
+      "count": 16,
+      "metadata": true
+    },
+    {
+      "word": "water",
+      "count": 15,
+      "metadata": true
+    },
+    {
+      "word": "gloves",
+      "count": 15,
+      "metadata": true
+    },
+    {
+      "word": "brown hair",
+      "count": 14,
+      "metadata": true
+    },
+    {
+      "word": "grass",
+      "count": 14,
+      "metadata": true
+    },
+    {
+      "word": "chibi",
+      "count": 14,
+      "metadata": true
+    },
+    {
+      "word": "shirt",
+      "count": 13,
+      "metadata": true
+    },
+    {
+      "word": "tunic",
+      "count": 13,
+      "metadata": true
+    },
+    {
+      "word": "jewelry",
+      "count": 13,
+      "metadata": true
+    },
+    {
+      "word": "skull",
+      "count": 13,
+      "metadata": true
+    },
+    {
+      "word": "black eyes",
+      "count": 12,
+      "metadata": true
+    },
+    {
+      "word": "scenery",
+      "count": 12,
+      "metadata": true
+    },
+    {
+      "word": "triforce",
+      "count": 12,
+      "metadata": true
+    },
+    {
+      "word": "closed mouth",
+      "count": 12,
+      "metadata": true
+    },
+    {
+      "word": "upper body",
+      "count": 11,
+      "metadata": true
+    },
+    {
+      "word": "boots",
+      "count": 11,
+      "metadata": true
+    },
+    {
+      "word": "indoors",
+      "count": 10,
+      "metadata": true
+    },
+    {
+      "word": "teeth",
+      "count": 10,
+      "metadata": true
+    },
+    {
+      "word": "neckerchief",
+      "count": 10,
+      "metadata": true
+    },
+    {
+      "word": "twintails",
+      "count": 9,
+      "metadata": true
+    },
+    {
+      "word": "multiple boys",
+      "count": 9,
+      "metadata": true
+    },
+    {
+      "word": "short hair",
+      "count": 9,
+      "metadata": true
+    },
+    {
+      "word": "dark-skinned female",
+      "count": 9,
+      "metadata": true
+    },
+    {
+      "word": "cape",
+      "count": 8,
+      "metadata": true
+    },
+    {
+      "word": "night",
+      "count": 8,
+      "metadata": true
+    },
+    {
+      "word": "glowing",
+      "count": 8,
+      "metadata": true
+    },
+    {
+      "word": "from behind",
+      "count": 8,
+      "metadata": true
+    },
+    {
+      "word": "watercraft",
+      "count": 8,
+      "metadata": true
+    },
+    {
+      "word": "sleeveless",
+      "count": 8,
+      "metadata": true
+    },
+    {
+      "word": "shadow",
+      "count": 8,
+      "metadata": true
+    },
+    {
+      "word": "left-handed",
+      "count": 8,
+      "metadata": true
+    },
+    {
+      "word": "red footwear",
+      "count": 8,
+      "metadata": true
+    },
+    {
+      "word": "bird",
+      "count": 7,
+      "metadata": true
+    },
+    {
+      "word": "closed eyes",
+      "count": 7,
+      "metadata": true
+    },
+    {
+      "word": "2boys",
+      "count": 7,
+      "metadata": true
+    },
+    {
+      "word": "yellow eyes",
+      "count": 7,
+      "metadata": true
+    },
+    {
+      "word": "short sleeves",
+      "count": 7,
+      "metadata": true
+    },
+    {
+      "word": "master sword",
+      "count": 6,
+      "metadata": true
+    },
+    {
+      "word": "building",
+      "count": 6,
+      "metadata": true
+    },
+    {
+      "word": "long sleeves",
+      "count": 6,
+      "metadata": true
+    },
+    {
+      "word": "bracelet",
+      "count": 6,
+      "metadata": true
+    },
+    {
+      "word": "t-shirt",
+      "count": 6,
+      "metadata": true
+    },
+    {
+      "word": "grin",
+      "count": 6,
+      "metadata": true
+    },
+    {
+      "word": "parody",
+      "count": 5,
+      "metadata": true
+    },
+    {
+      "word": "instrument",
+      "count": 5,
+      "metadata": true
+    },
+    {
+      "word": "plant",
+      "count": 5,
+      "metadata": true
+    },
+    {
+      "word": "green eyes",
+      "count": 5,
+      "metadata": true
+    },
+    {
+      "word": "sandals",
+      "count": 5,
+      "metadata": true
+    },
+    {
+      "word": "earrings",
+      "count": 5,
+      "metadata": true
+    },
+    {
+      "word": "gameplay mechanics",
+      "count": 5,
+      "metadata": true
+    },
+    {
+      "word": ":d",
+      "count": 5,
+      "metadata": true
+    },
+    {
+      "word": "pants",
+      "count": 5,
+      "metadata": true
+    },
+    {
+      "word": "sharp teeth",
+      "count": 5,
+      "metadata": true
+    },
+    {
+      "word": "black hair",
+      "count": 5,
+      "metadata": true
+    },
+    {
+      "word": "hairband",
+      "count": 5,
+      "metadata": true
+    },
+    {
+      "word": "red dress",
+      "count": 5,
+      "metadata": true
+    },
+    {
+      "word": "multiple girls",
+      "count": 4,
+      "metadata": true
+    },
+    {
+      "word": "pokemon (creature)",
+      "count": 4,
+      "metadata": true
+    },
+    {
+      "word": "green headwear",
+      "count": 4,
+      "metadata": true
+    },
+    {
+      "word": "boat",
+      "count": 4,
+      "metadata": true
+    },
+    {
+      "word": "one eye closed",
+      "count": 4,
+      "metadata": true
+    },
+    {
+      "word": "green hair",
+      "count": 4,
+      "metadata": true
+    },
+    {
+      "word": "lying",
+      "count": 4,
+      "metadata": true
+    },
+    {
+      "word": "sleeping",
+      "count": 4,
+      "metadata": true
+    },
+    {
+      "word": "colored skin",
+      "count": 4,
+      "metadata": true
+    },
+    {
+      "word": "braid",
+      "count": 4,
+      "metadata": true
+    },
+    {
+      "word": "dark-skinned male",
+      "count": 4,
+      "metadata": true
+    },
+    {
+      "word": "cosplay",
+      "count": 4,
+      "metadata": true
+    },
+    {
+      "word": "1other",
+      "count": 4,
+      "metadata": true
+    },
+    {
+      "word": "animal ears",
+      "count": 4,
+      "metadata": true
+    },
+    {
+      "word": "scarf",
+      "count": 4,
+      "metadata": true
+    },
+    {
+      "word": "ahoge",
+      "count": 4,
+      "metadata": true
+    },
+    {
+      "word": "blue shirt",
+      "count": 4,
+      "metadata": true
+    },
+    {
+      "word": "hair over one eye",
+      "count": 4,
+      "metadata": true
+    },
+    {
+      "word": "pantyhose",
+      "count": 4,
+      "metadata": true
+    },
+    {
+      "word": "big hair",
+      "count": 4,
+      "metadata": true
+    },
+    {
+      "word": "orange hairband",
+      "count": 4,
+      "metadata": true
+    },
+    {
+      "word": "pillar",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "night sky",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "star (sky)",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "horns",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "armor",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "glowing eyes",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "holding shield",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "stairs",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "sign",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "on side",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "tiara",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "red shirt",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "palm tree",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "slit pupils",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "animal hat",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "cat hat",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "empty eyes",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "yellow neckerchief",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "skirt",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "profile",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "brown footwear",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "sheath",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "evil smile",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "fang",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "black dress",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "wristband",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "tentacle hair",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "octarian",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "octoling",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "octoling girl",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "twin braids",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "bangs",
+      "count": 3,
+      "metadata": true
+    },
+    {
+      "word": "3girls",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "fire",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "molten rock",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "crown",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "playing instrument",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "black skin",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "from side",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "horizon",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "starry sky",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "walking",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "ship",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "blush stickers",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "vest",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "beach",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": ":3",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "beak",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "chain",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "fangs",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "aiming",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "flower",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "weapon on back",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "fairy",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "furry",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "fake screenshot",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "turban",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "health bar",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "androgynous",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "colored sclera",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "yellow sclera",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "white dress",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "cow",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "simple background",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "sunglasses",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "polearm",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "waving",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "rabbit ears",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "purple hair",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "red gloves",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "arrow (symbol)",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "blue skin",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "forehead",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "bare shoulders",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "black footwear",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "sleeveless dress",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "female child",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "2girls",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "broom",
+      "count": 2,
+      "metadata": true
+    },
+    {
+      "word": "penguin",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "throne",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "stained glass",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "violin",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "mask",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "blue background",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "music",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "holding instrument",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "barrel",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "helmet",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "shoulder armor",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "pauldrons",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "full armor",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "windmill",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "arm up",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "crossed arms",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "red neckerchief",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "desert",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "fantasy",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "window",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "sunlight",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "shade",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "landscape",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "flag",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "ruins",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "door",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "carpet",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "own hands together",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "child",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "male child",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "battle",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "parted lips",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "floating hair",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "pink dress",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "on back",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "white hair",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "facial hair",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "beard",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "serious",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "thick eyebrows",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "green skin",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "tusks",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "elbow gloves",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "necklace",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "makeup",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": ";)",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "gun",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "blue dress",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "freckles",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "purple eyes",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "headband",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "yellow fur",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "stadium",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "lucario",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "furry male",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "expressionless",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "looking up",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "meme",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "sailor moon redraw challenge (meme)",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "bandana",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "knife",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "dagger",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "blue scarf",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "cat",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "cliff",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "cat ears",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "witch hat",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": ":<",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "tabard",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "shovel",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "grey background",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "black background",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "staff",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "hawaiian shirt",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "spear",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "ponytail",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "pirate",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "pikachu",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "facing away",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "flying",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "chicken",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "thighhighs",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "bow",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": ":t",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "food",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "fruit",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "bow (weapon)",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "arrow (projectile)",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "holding arrow",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "wand",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "hammer",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "forehead jewel",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "blue hair",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "bodysuit",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "outstretched arm",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "fusion",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "among us",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "jacket",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "spiked hair",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "pink flower",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "white footwear",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "dark persona",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "grey footwear",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "grey skin",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "blood",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "english text",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "torch",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "cave",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "dual wielding",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "energy sword",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "surcoat",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "animal",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "monster",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "creature",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "black gloves",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "green background",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "yordle",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "rain",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "dark",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "heads-up display",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "diluc (genshin impact)",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "hair bun",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "double bun",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "slime (creature)",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "grey hair",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "broom riding",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "holding broom",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "shoes",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "trident",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "orange neckerchief",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "book",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "fence",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "wall",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "stone wall",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "camera",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "holding camera",
+      "count": 1,
+      "metadata": true
+    },
+    {
+      "word": "video camera",
+      "count": 1,
+      "metadata": true
+    }
+  ],
+  "sha256": "6e0655e27e5635e91d6f9edbc74198ab13a8bb1c4c95fd2ed0e361d422199711",
+  "name": "Style of the Winds (The Legend Of Zelda: Wind Waker) [Illustrious & NoobAI & SD1.5] - Illustrious",
+  "type": "LORA",
+  "baseModel": "Illustrious",
+  "links": [
+    "https://civitai.com/models/208265?modelVersionId=1273708",
+    "https://civitai.com/api/v1/model-versions/by-hash/6e0655e27e5635e91d6f9edbc74198ab13a8bb1c4c95fd2ed0e361d422199711"
+  ]
+}

loras/illu/breast-press-pov-illustriousxl-lora-nochekaiser.sha256 ADDED Viewed

	@@ -0,0 +1 @@


1	+ 160543ffc7ef32c78fd65443a0a57d15d18af60b3cb27784a45e2bfdaa32e605

loras/illu/caressing-testicles-v2-illustriousxl-lora-nochekaiser.sha256 ADDED Viewed

	@@ -0,0 +1 @@


1	+ ac3db17dbbbfceeaceec10fabb1155139542ae0cc6a36e97c728ea1b0268d941

loras/illu/hand-on-own-hip-on-side-illustriousxl-lora-nochekaiser.sha256 ADDED Viewed

	@@ -0,0 +1 @@


1	+ 906f5e048807a39e146446ce2233753b3903050d91f1e8af77e4cb446ed61642

loras/illu/hearthandsonbreast.sha256 ADDED Viewed

	@@ -0,0 +1 @@


1	+ 894f528bc94b7ea1e7c6466a9d634e81d05e9c680c6d9fc40aa72a1a62278584

loras/illu/missionary-asphyxiation-illustriousxl-lora-nochekaiser.sha256 ADDED Viewed

	@@ -0,0 +1 @@


1	+ ea4cf413b18ce427e185f2f63ee69a2dafdb57519c818a85eb3aca8653ee9b43

loras/illu/pov-hands-female-orgasm-illustriousxl-lora-nochekaiser.sha256 ADDED Viewed

	@@ -0,0 +1 @@


1	+ 7f549e859d87c1a98b8616e948803c1540597f10ea5bbd82c86328a01c56e466

loras/illu/random-nsfw-poses-v8-illustriousxl-lora-nochekaiser.sha256 ADDED Viewed

	@@ -0,0 +1 @@


1	+ f039cc6e3c52beb304749f5cee05cdc37bb191c1b601f757fc4f0923fd3b32ed

loras/pony/R3DStyle.sha256 ADDED Viewed

	@@ -0,0 +1 @@


1	+ 91ded154761569f71ff3dcc28f0715a55d88388dbae2b2325eb9883d4851c2b0