File size: 2,158 Bytes
56d2fb4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from transformers import pipeline
from PIL import Image
import gradio as gr

# Initialize TrOCR pipeline for specialized OCR
ocr_pipeline = pipeline("image-to-text", model="microsoft/trocr-base-printed")

def extract_text_from_image(image):
    if image is None:
        return "No image provided. Please upload an image file."
    
    try:
        # Convert Gradio image to PIL Image
        pil_image = Image.fromarray(image)
        
        # Extract text from image using TrOCR
        result = ocr_pipeline(pil_image)
        
        # Return the extracted text
        return result[0]['generated_text']
    except Exception as e:
        return f"Error during text extraction: {str(e)}"

# Gradio interface
with gr.Blocks(title="Image Text Extractor") as demo:
    gr.Markdown("# 📷 Image Text Extractor")
    gr.Markdown("Extract text from images using Microsoft's TrOCR model")
    
    with gr.Row():
        with gr.Column():
            image_input = gr.Image(
                type="numpy",
                label="Upload Image"
            )
            extract_btn = gr.Button("Extract Text", variant="primary")
            
        with gr.Column():
            text_output = gr.Textbox(
                lines=10,
                label="Extracted Text",
                interactive=False
            )
    
    extract_btn.click(
        extract_text_from_image,
        inputs=image_input,
        outputs=text_output
    )
    
    gr.Examples(
        examples=[
            ["example1.jpg"],
            ["example2.png"]
        ],
        inputs=[image_input],
    )
    
    gr.Markdown("### About This Model")
    gr.Markdown("- **Model**: [microsoft/trocr-base-printed](https://huggingface.co/microsoft/trocr-base-printed)")
    gr.Markdown("- **Task**: Optical Character Recognition (OCR)")
    gr.Markdown("- **Architecture**: Transformer-based OCR (TrOCR)")
    gr.Markdown("- **Capabilities**: Specialized for printed text extraction")
    gr.Markdown("- **Note**: First processing may take 15-25 seconds (model loading)")
    gr.Markdown("- **Supported Formats**: JPG, PNG, JPEG")

if __name__ == "__main__":
    demo.launch()