Instructions to use microsoft/Florence-2-large-ft with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use microsoft/Florence-2-large-ft with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="microsoft/Florence-2-large-ft", trust_remote_code=True)# Load model directly from transformers import AutoProcessor, AutoModelForMultimodalLM processor = AutoProcessor.from_pretrained("microsoft/Florence-2-large-ft", trust_remote_code=True) model = AutoModelForMultimodalLM.from_pretrained("microsoft/Florence-2-large-ft", trust_remote_code=True) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use microsoft/Florence-2-large-ft with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "microsoft/Florence-2-large-ft" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "microsoft/Florence-2-large-ft", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/microsoft/Florence-2-large-ft
- SGLang
How to use microsoft/Florence-2-large-ft with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "microsoft/Florence-2-large-ft" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "microsoft/Florence-2-large-ft", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "microsoft/Florence-2-large-ft" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "microsoft/Florence-2-large-ft", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use microsoft/Florence-2-large-ft with Docker Model Runner:
docker model run hf.co/microsoft/Florence-2-large-ft
fix: compatibility patches for newer transformers (4.50+ / 5.x)
#44
by TheeRealDude - opened
- configuration_florence2.py +1 -1
- modeling_florence2.py +9 -16
- processing_florence2.py +1 -1
configuration_florence2.py
CHANGED
|
@@ -262,7 +262,7 @@ class Florence2LanguageConfig(PretrainedConfig):
|
|
| 262 |
)
|
| 263 |
|
| 264 |
# ensure backward compatibility for BART CNN models
|
| 265 |
-
if self
|
| 266 |
self.forced_bos_token_id = self.bos_token_id
|
| 267 |
warnings.warn(
|
| 268 |
f"Please make sure the config includes `forced_bos_token_id={self.bos_token_id}` in future versions. "
|
|
|
|
| 262 |
)
|
| 263 |
|
| 264 |
# ensure backward compatibility for BART CNN models
|
| 265 |
+
if getattr(self, "forced_bos_token_id", None) is None and kwargs.get("force_bos_token_to_be_generated", False):
|
| 266 |
self.forced_bos_token_id = self.bos_token_id
|
| 267 |
warnings.warn(
|
| 268 |
f"Please make sure the config includes `forced_bos_token_id={self.bos_token_id}` in future versions. "
|
modeling_florence2.py
CHANGED
|
@@ -554,7 +554,13 @@ class DaViT(nn.Module):
|
|
| 554 |
assert self.num_stages == len(self.num_heads) == len(self.num_groups)
|
| 555 |
|
| 556 |
num_stages = len(embed_dims)
|
| 557 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 558 |
|
| 559 |
depth_offset = 0
|
| 560 |
convs = []
|
|
@@ -2331,21 +2337,8 @@ class Florence2PreTrainedModel(PreTrainedModel):
|
|
| 2331 |
supports_gradient_checkpointing = True
|
| 2332 |
_skip_keys_device_placement = "past_key_values"
|
| 2333 |
|
| 2334 |
-
|
| 2335 |
-
|
| 2336 |
-
"""
|
| 2337 |
-
Retrieve language_model's attribute to check whether the model supports
|
| 2338 |
-
Flash Attention 2 or not.
|
| 2339 |
-
"""
|
| 2340 |
-
return self.language_model._supports_flash_attn_2
|
| 2341 |
-
|
| 2342 |
-
@property
|
| 2343 |
-
def _supports_sdpa(self):
|
| 2344 |
-
"""
|
| 2345 |
-
Retrieve language_model's attribute to check whether the model supports
|
| 2346 |
-
SDPA or not.
|
| 2347 |
-
"""
|
| 2348 |
-
return self.language_model._supports_sdpa
|
| 2349 |
|
| 2350 |
|
| 2351 |
FLORENCE2_INPUTS_DOCSTRING = r"""
|
|
|
|
| 554 |
assert self.num_stages == len(self.num_heads) == len(self.num_groups)
|
| 555 |
|
| 556 |
num_stages = len(embed_dims)
|
| 557 |
+
_n_dpr = sum(depths) * 2
|
| 558 |
+
if _n_dpr == 0:
|
| 559 |
+
dpr = []
|
| 560 |
+
elif _n_dpr == 1:
|
| 561 |
+
dpr = [0.0]
|
| 562 |
+
else:
|
| 563 |
+
dpr = [drop_path_rate * i / (_n_dpr - 1) for i in range(_n_dpr)]
|
| 564 |
|
| 565 |
depth_offset = 0
|
| 566 |
convs = []
|
|
|
|
| 2337 |
supports_gradient_checkpointing = True
|
| 2338 |
_skip_keys_device_placement = "past_key_values"
|
| 2339 |
|
| 2340 |
+
_supports_flash_attn_2 = False
|
| 2341 |
+
_supports_sdpa = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2342 |
|
| 2343 |
|
| 2344 |
FLORENCE2_INPUTS_DOCSTRING = r"""
|
processing_florence2.py
CHANGED
|
@@ -84,7 +84,7 @@ class Florence2Processor(ProcessorMixin):
|
|
| 84 |
|
| 85 |
tokens_to_add = {
|
| 86 |
'additional_special_tokens': \
|
| 87 |
-
tokenizer
|
| 88 |
['<od>', '</od>', '<ocr>', '</ocr>'] + \
|
| 89 |
[f'<loc_{x}>' for x in range(1000)] + \
|
| 90 |
['<cap>', '</cap>', '<ncap>', '</ncap>','<dcap>', '</dcap>', '<grounding>', '</grounding>', '<seg>', '</seg>', '<sep>', '<region_cap>', '</region_cap>', '<region_to_desciption>', '</region_to_desciption>', '<proposal>', '</proposal>', '<poly>', '</poly>', '<and>']
|
|
|
|
| 84 |
|
| 85 |
tokens_to_add = {
|
| 86 |
'additional_special_tokens': \
|
| 87 |
+
list(getattr(tokenizer, 'additional_special_tokens', []) or []) + \
|
| 88 |
['<od>', '</od>', '<ocr>', '</ocr>'] + \
|
| 89 |
[f'<loc_{x}>' for x in range(1000)] + \
|
| 90 |
['<cap>', '</cap>', '<ncap>', '</ncap>','<dcap>', '</dcap>', '<grounding>', '</grounding>', '<seg>', '</seg>', '<sep>', '<region_cap>', '</region_cap>', '<region_to_desciption>', '</region_to_desciption>', '<proposal>', '</proposal>', '<poly>', '</poly>', '<and>']
|