Image-Text-to-Text
MLX
Safetensors
English
falcon_ocr
ocr
vision-language
falcon
apple-silicon
custom_code
Eval Results
Instructions to use mlx-community/Falcon-OCR-bf16 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- MLX
How to use mlx-community/Falcon-OCR-bf16 with MLX:
# Make sure mlx-vlm is installed # pip install --upgrade mlx-vlm from mlx_vlm import load, generate from mlx_vlm.prompt_utils import apply_chat_template from mlx_vlm.utils import load_config # Load the model model, processor = load("mlx-community/Falcon-OCR-bf16") config = load_config("mlx-community/Falcon-OCR-bf16") # Prepare input image = ["http://images.cocodataset.org/val2017/000000039769.jpg"] prompt = "Describe this image." # Apply chat template formatted_prompt = apply_chat_template( processor, config, prompt, num_images=1 ) # Generate output output = generate(model, processor, formatted_prompt, image) print(output) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- LM Studio
| { | |
| "backend": "tokenizers", | |
| "caption_token": "<|CAPTION|>", | |
| "category_sep_token": "<|category_sep|>", | |
| "clean_up_tokenization_spaces": true, | |
| "coord_token": "<|coord|>", | |
| "det_token": "<|DET|>", | |
| "detailed_caption_token": "<|DETAILED_CAPTION|>", | |
| "diffusion_query_token": "<|diffusion_query|>", | |
| "end_of_diffusion_query_token": "<|end_of_diffusion_query|>", | |
| "end_of_image_token": "<|end_of_image|>", | |
| "end_of_query_token": "<|end_of_query|>", | |
| "end_of_turn_token": "<|end_of_turn|>", | |
| "end_of_video_token": "<|end_of_video|>", | |
| "eos_token": "<|end_of_text|>", | |
| "frame_sep_token": "<|frame_sep|>", | |
| "image_cls_token": "<|image_cls|>", | |
| "image_mask_token": "<|image_mask_token|>", | |
| "image_reg_1_token": "<|image_reg_1|>", | |
| "image_reg_2_token": "<|image_reg_2|>", | |
| "image_reg_3_token": "<|image_reg_3|>", | |
| "image_reg_4_token": "<|image_reg_4|>", | |
| "image_reg_5_token": "<|image_reg_5|>", | |
| "image_reg_6_token": "<|image_reg_6|>", | |
| "image_reg_7_token": "<|image_reg_7|>", | |
| "image_reg_8_token": "<|image_reg_8|>", | |
| "image_row_sep_token": "<|image_row_sep|>", | |
| "image_token": "<|image|>", | |
| "is_local": true, | |
| "layout_detection_token": "<|LAYOUT_DETECTION|>", | |
| "model_input_names": [ | |
| "input_ids", | |
| "attention_mask" | |
| ], | |
| "model_max_length": 1000000000000000019884624838656, | |
| "model_specific_special_tokens": { | |
| "caption_token": "<|CAPTION|>", | |
| "category_sep_token": "<|category_sep|>", | |
| "coord_token": "<|coord|>", | |
| "det_token": "<|DET|>", | |
| "detailed_caption_token": "<|DETAILED_CAPTION|>", | |
| "diffusion_query_token": "<|diffusion_query|>", | |
| "end_of_diffusion_query_token": "<|end_of_diffusion_query|>", | |
| "end_of_image_token": "<|end_of_image|>", | |
| "end_of_query_token": "<|end_of_query|>", | |
| "end_of_turn_token": "<|end_of_turn|>", | |
| "end_of_video_token": "<|end_of_video|>", | |
| "frame_sep_token": "<|frame_sep|>", | |
| "image_cls_token": "<|image_cls|>", | |
| "image_mask_token": "<|image_mask_token|>", | |
| "image_reg_1_token": "<|image_reg_1|>", | |
| "image_reg_2_token": "<|image_reg_2|>", | |
| "image_reg_3_token": "<|image_reg_3|>", | |
| "image_reg_4_token": "<|image_reg_4|>", | |
| "image_reg_5_token": "<|image_reg_5|>", | |
| "image_reg_6_token": "<|image_reg_6|>", | |
| "image_reg_7_token": "<|image_reg_7|>", | |
| "image_reg_8_token": "<|image_reg_8|>", | |
| "image_row_sep_token": "<|image_row_sep|>", | |
| "image_token": "<|image|>", | |
| "layout_detection_token": "<|LAYOUT_DETECTION|>", | |
| "object_token": "<|object|>", | |
| "ocr_doc_parser_token": "<|OCR_DOC_PARSER|>", | |
| "ocr_grounding_token": "<|OCR_GROUNDING|>", | |
| "ocr_plain_token": "<|OCR_PLAIN|>", | |
| "perceive_token": "<|perceive|>", | |
| "point_ref_seg_token": "<|POINT_REF_SEG|>", | |
| "pointing_token": "<|POINTING|>", | |
| "qa_token": "<|QA|>", | |
| "ref_seg_token": "<|REF_SEG|>", | |
| "seg_token": "<|seg|>", | |
| "size_token": "<|size|>", | |
| "start_of_diffusion_query_token": "<|start_of_diffusion_query|>", | |
| "start_of_image_token": "<|start_of_image|>", | |
| "start_of_query_token": "<|start_of_query|>", | |
| "start_of_turn_token": "<|start_of_turn|>", | |
| "start_of_video_token": "<|start_of_video|>", | |
| "table_data_end_token": "</td>", | |
| "table_data_start_token": "<td>", | |
| "table_header_end_token": "</th>", | |
| "table_header_start_token": "<th>", | |
| "table_row_end_token": "</tr>", | |
| "table_row_start_token": "<tr>", | |
| "task_sep_token": "<|task_sep|>" | |
| }, | |
| "object_token": "<|object|>", | |
| "ocr_doc_parser_token": "<|OCR_DOC_PARSER|>", | |
| "ocr_grounding_token": "<|OCR_GROUNDING|>", | |
| "ocr_plain_token": "<|OCR_PLAIN|>", | |
| "perceive_token": "<|perceive|>", | |
| "point_ref_seg_token": "<|POINT_REF_SEG|>", | |
| "pointing_token": "<|POINTING|>", | |
| "qa_token": "<|QA|>", | |
| "ref_seg_token": "<|REF_SEG|>", | |
| "seg_token": "<|seg|>", | |
| "size_token": "<|size|>", | |
| "start_of_diffusion_query_token": "<|start_of_diffusion_query|>", | |
| "start_of_image_token": "<|start_of_image|>", | |
| "start_of_query_token": "<|start_of_query|>", | |
| "start_of_turn_token": "<|start_of_turn|>", | |
| "start_of_video_token": "<|start_of_video|>", | |
| "table_data_end_token": "</td>", | |
| "table_data_start_token": "<td>", | |
| "table_header_end_token": "</th>", | |
| "table_header_start_token": "<th>", | |
| "table_row_end_token": "</tr>", | |
| "table_row_start_token": "<tr>", | |
| "task_sep_token": "<|task_sep|>", | |
| "tokenizer_class": "TokenizersBackend" | |
| } | |