Spaces:

Josebert
/

JR_SmartOCR

Runtime error

App Files Files Community

JR_SmartOCR / app.py

Josebert

Update app.py

0e82584 verified 9 months ago

raw

history blame contribute delete

2.56 kB

	import gradio as gr
	import torch
	from PIL import Image
	from transformers import TrOCRProcessor, VisionEncoderDecoderModel
	import logging

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Initialize TrOCR model and processor
	try:
	processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
	model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
	if torch.cuda.is_available():
	model.to('cuda')
	except Exception as e:
	logger.error(f"Error loading model: {e}")
	raise

	def process_image(image):
	"""Process image and extract text using TrOCR"""
	try:
	# Convert to RGB if needed
	if image.mode != 'RGB':
	image = image.convert('RGB')

	# Prepare image for model
	pixel_values = processor(image, return_tensors="pt").pixel_values
	if torch.cuda.is_available():
	pixel_values = pixel_values.to('cuda')

	# Generate text
	generated_ids = model.generate(pixel_values, max_length=128)
	generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

	return generated_text.strip()
	except Exception as e:
	logger.error(f"Error processing image: {e}")
	return f"Error processing image: {str(e)}"

	def analyze_image(input_image):
	"""Main function to handle image analysis"""
	if input_image is None:
	return "Please upload an image."

	try:
	# Open and process image
	image = Image.open(input_image)

	# Extract text
	extracted_text = process_image(image)

	# Format response
	response = f"""📝 Extracted Text:
	{'-' * 40}
	{extracted_text}
	{'-' * 40}

	📊 Statistics:
	• Characters: {len(extracted_text)}
	• Words: {len(extracted_text.split())}
	"""
	return response
	except Exception as e:
	logger.error(f"Error in analysis: {e}")
	return f"Error analyzing image: {str(e)}"

	# Create Gradio interface
	demo = gr.Interface(
	fn=analyze_image,
	inputs=gr.Image(type="filepath", label="Upload Image"),
	outputs=gr.Textbox(label="Extracted Text", lines=10),
	title="📷 Smart OCR Text Extractor",
	description="""
	Extract text from images using Microsoft's TrOCR model.
	Supports handwritten and printed text.
	""",
	theme=gr.themes.Soft(),
	examples=[
	["example1.jpg"],
	["example2.png"]
	]
	)

	if __name__ == "__main__":
	demo.launch()