Image-Text-to-Text
MLX
Safetensors
English
falcon_ocr
ocr
vision-language
falcon
apple-silicon
custom_code
Eval Results
Instructions to use mlx-community/Falcon-OCR-bf16 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- MLX
How to use mlx-community/Falcon-OCR-bf16 with MLX:
# Make sure mlx-vlm is installed # pip install --upgrade mlx-vlm from mlx_vlm import load, generate from mlx_vlm.prompt_utils import apply_chat_template from mlx_vlm.utils import load_config # Load the model model, processor = load("mlx-community/Falcon-OCR-bf16") config = load_config("mlx-community/Falcon-OCR-bf16") # Prepare input image = ["http://images.cocodataset.org/val2017/000000039769.jpg"] prompt = "Describe this image." # Apply chat template formatted_prompt = apply_chat_template( processor, config, prompt, num_images=1 ) # Generate output output = generate(model, processor, formatted_prompt, image) print(output) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- LM Studio
File size: 4,352 Bytes
28ccccd | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 | {
"backend": "tokenizers",
"caption_token": "<|CAPTION|>",
"category_sep_token": "<|category_sep|>",
"clean_up_tokenization_spaces": true,
"coord_token": "<|coord|>",
"det_token": "<|DET|>",
"detailed_caption_token": "<|DETAILED_CAPTION|>",
"diffusion_query_token": "<|diffusion_query|>",
"end_of_diffusion_query_token": "<|end_of_diffusion_query|>",
"end_of_image_token": "<|end_of_image|>",
"end_of_query_token": "<|end_of_query|>",
"end_of_turn_token": "<|end_of_turn|>",
"end_of_video_token": "<|end_of_video|>",
"eos_token": "<|end_of_text|>",
"frame_sep_token": "<|frame_sep|>",
"image_cls_token": "<|image_cls|>",
"image_mask_token": "<|image_mask_token|>",
"image_reg_1_token": "<|image_reg_1|>",
"image_reg_2_token": "<|image_reg_2|>",
"image_reg_3_token": "<|image_reg_3|>",
"image_reg_4_token": "<|image_reg_4|>",
"image_reg_5_token": "<|image_reg_5|>",
"image_reg_6_token": "<|image_reg_6|>",
"image_reg_7_token": "<|image_reg_7|>",
"image_reg_8_token": "<|image_reg_8|>",
"image_row_sep_token": "<|image_row_sep|>",
"image_token": "<|image|>",
"is_local": true,
"layout_detection_token": "<|LAYOUT_DETECTION|>",
"model_input_names": [
"input_ids",
"attention_mask"
],
"model_max_length": 1000000000000000019884624838656,
"model_specific_special_tokens": {
"caption_token": "<|CAPTION|>",
"category_sep_token": "<|category_sep|>",
"coord_token": "<|coord|>",
"det_token": "<|DET|>",
"detailed_caption_token": "<|DETAILED_CAPTION|>",
"diffusion_query_token": "<|diffusion_query|>",
"end_of_diffusion_query_token": "<|end_of_diffusion_query|>",
"end_of_image_token": "<|end_of_image|>",
"end_of_query_token": "<|end_of_query|>",
"end_of_turn_token": "<|end_of_turn|>",
"end_of_video_token": "<|end_of_video|>",
"frame_sep_token": "<|frame_sep|>",
"image_cls_token": "<|image_cls|>",
"image_mask_token": "<|image_mask_token|>",
"image_reg_1_token": "<|image_reg_1|>",
"image_reg_2_token": "<|image_reg_2|>",
"image_reg_3_token": "<|image_reg_3|>",
"image_reg_4_token": "<|image_reg_4|>",
"image_reg_5_token": "<|image_reg_5|>",
"image_reg_6_token": "<|image_reg_6|>",
"image_reg_7_token": "<|image_reg_7|>",
"image_reg_8_token": "<|image_reg_8|>",
"image_row_sep_token": "<|image_row_sep|>",
"image_token": "<|image|>",
"layout_detection_token": "<|LAYOUT_DETECTION|>",
"object_token": "<|object|>",
"ocr_doc_parser_token": "<|OCR_DOC_PARSER|>",
"ocr_grounding_token": "<|OCR_GROUNDING|>",
"ocr_plain_token": "<|OCR_PLAIN|>",
"perceive_token": "<|perceive|>",
"point_ref_seg_token": "<|POINT_REF_SEG|>",
"pointing_token": "<|POINTING|>",
"qa_token": "<|QA|>",
"ref_seg_token": "<|REF_SEG|>",
"seg_token": "<|seg|>",
"size_token": "<|size|>",
"start_of_diffusion_query_token": "<|start_of_diffusion_query|>",
"start_of_image_token": "<|start_of_image|>",
"start_of_query_token": "<|start_of_query|>",
"start_of_turn_token": "<|start_of_turn|>",
"start_of_video_token": "<|start_of_video|>",
"table_data_end_token": "</td>",
"table_data_start_token": "<td>",
"table_header_end_token": "</th>",
"table_header_start_token": "<th>",
"table_row_end_token": "</tr>",
"table_row_start_token": "<tr>",
"task_sep_token": "<|task_sep|>"
},
"object_token": "<|object|>",
"ocr_doc_parser_token": "<|OCR_DOC_PARSER|>",
"ocr_grounding_token": "<|OCR_GROUNDING|>",
"ocr_plain_token": "<|OCR_PLAIN|>",
"perceive_token": "<|perceive|>",
"point_ref_seg_token": "<|POINT_REF_SEG|>",
"pointing_token": "<|POINTING|>",
"qa_token": "<|QA|>",
"ref_seg_token": "<|REF_SEG|>",
"seg_token": "<|seg|>",
"size_token": "<|size|>",
"start_of_diffusion_query_token": "<|start_of_diffusion_query|>",
"start_of_image_token": "<|start_of_image|>",
"start_of_query_token": "<|start_of_query|>",
"start_of_turn_token": "<|start_of_turn|>",
"start_of_video_token": "<|start_of_video|>",
"table_data_end_token": "</td>",
"table_data_start_token": "<td>",
"table_header_end_token": "</th>",
"table_header_start_token": "<th>",
"table_row_end_token": "</tr>",
"table_row_start_token": "<tr>",
"task_sep_token": "<|task_sep|>",
"tokenizer_class": "TokenizersBackend"
}
|