| | |
| | |
| |
|
| | import gradio as gr |
| | from PIL import Image |
| | import torch |
| | from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, BitsAndBytesConfig |
| |
|
| | |
| | |
| | |
| | bnb_config = BitsAndBytesConfig( |
| | load_in_4bit=True, |
| | bnb_4bit_quant_type="nf4", |
| | bnb_4bit_use_double_quant=True, |
| | bnb_4bit_compute_dtype=torch.float16, |
| | ) |
| |
|
| | |
| | model_name = "NAMAA-Space/Qari-OCR-0.2.2.1-VL-2B-Instruct" |
| |
|
| | model = Qwen2VLForConditionalGeneration.from_pretrained( |
| | model_name, |
| | quantization_config=bnb_config, |
| | device_map="auto", |
| | trust_remote_code=True |
| | ) |
| | processor = AutoProcessor.from_pretrained( |
| | model_name, |
| | trust_remote_code=True |
| | ) |
| |
|
| | max_tokens = 2000 |
| | prompt = ( |
| | "Below is the image of one page of a document, as well as some raw textual content " |
| | "that was previously extracted for it. Just return the plain text representation of " |
| | "this document as if you were reading it naturally. Do not hallucinate." |
| | ) |
| |
|
| | |
| | |
| | |
| | def ocr_from_image(img: Image.Image): |
| | img = img.convert("RGB") |
| | |
| | formatted = processor.chat_template_format(prompt) |
| |
|
| | inputs = processor( |
| | images=img, |
| | text=[formatted], |
| | return_tensors="pt", |
| | padding=True |
| | ).to(model.device) |
| |
|
| | outputs = model.generate( |
| | **inputs, |
| | max_new_tokens=max_tokens, |
| | do_sample=False |
| | ) |
| | result = processor.batch_decode( |
| | outputs, |
| | skip_special_tokens=True, |
| | clean_up_tokenization_spaces=False |
| | )[0] |
| | return result |
| |
|
| | |
| | |
| | |
| | demo = gr.Interface( |
| | fn=ocr_from_image, |
| | inputs=gr.Image(type="pil", label="Upload Arabic Document Image"), |
| | outputs=gr.Textbox(label="Extracted Text"), |
| | title="Qari-OCR Arabic v0.2 Online", |
| | description="ุฑูุน ุตูุฑุฉ ุตูุญุฉ ุนุฑุจูุฉ ูุงุณุชุฎุฑุงุฌ ุงููุต ุจุฏููุฉ ุนุงููุฉ ุจุงุณุชุฎุฏุงู
Qari-OCR" |
| | ) |
| |
|
| | if __name__ == "__main__": |
| | demo.launch(server_name="0.0.0.0", server_port=7860) |
| |
|