Spaces:

DocUA
/

LightOnOCR-1B-Demo

Running on Zero

App Files Files Community

LightOnOCR-1B-Demo / app_space.py

DocUA

Unified project structure: app_space.py for ZeroGPU, root README metadata

a25a813 4 months ago

raw

history blame contribute delete

7.79 kB

	#!/usr/bin/env python3
	"""
	Gradio web interface for LightOnOCR-1B specialized for Hugging Face Spaces.
	"""

	import os
	import sys
	import gradio as gr
	from pathlib import Path
	from PIL import Image
	import pypdfium2 as pdfium
	import spaces

	# Add project root to path
	sys.path.insert(0, str(Path(__file__).parent))
	from backends.pytorch_backend import PyTorchBackend

	# Global backend
	BACKEND = None

	def load_backend():
	"""Load PyTorch backend."""
	global BACKEND
	if BACKEND is None:
	print("Loading PyTorch backend...")
	BACKEND = PyTorchBackend()
	# We don't call load_model() explicitly here, it happens lazily or inside processed_image
	# But for ZeroGPU it's better to initialize it inside the GPU decorated function
	# or load it globally if it fits in VRAM on init (ZeroGPU swaps it in/out)
	# Standard pattern: Init model globally, use inside @spaces.GPU
	BACKEND.load_model()
	print(f"Backend loaded: {BACKEND.get_backend_info()}")
	return BACKEND

	# Initialize globally for ZeroGPU
	load_backend()

	def render_pdf_page(page, scale=2.0):
	"""Render PDF page to PIL Image."""
	return page.render(scale=scale, rev_byteorder=True).to_pil()

	def process_pdf(pdf_path, num_pages=1, scale=2.0):
	"""Extract images from PDF."""
	pdf = pdfium.PdfDocument(pdf_path)
	total_pages = len(pdf)
	pages_to_process = min(num_pages, total_pages, 10) # Max 10 pages
	images = []

	for i in range(pages_to_process):
	page = pdf[i]
	img = render_pdf_page(page, scale=scale)
	images.append(img)

	pdf.close()
	return images, total_pages

	@spaces.GPU(duration=120) # Increase duration for OCR
	def run_inference(image, max_tokens):
	"""Run inference on GPU."""
	global BACKEND
	if BACKEND is None:
	load_backend()
	return BACKEND.process_image(image, temperature=0.0, max_tokens=max_tokens)


	def process_input(file_input, scale, max_tokens, num_pages):
	"""Process uploaded file with OCR."""
	if file_input is None:
	yield "Idle", "Please upload an image or PDF first.", "", "", None
	return

	images_to_process = []
	page_info = ""
	display_image = None

	# ... (rest of image loading logic same as before, simplified for diff clarity)
	file_path = Path(file_input) if isinstance(file_input, str) else Path(file_input.name)
	if not file_path.exists():
	yield "Error", f"File not accessible: {file_path}", "", "", None
	return

	# Load images
	if file_path.suffix.lower() == '.pdf':
	try:
	images_to_process, total_pages = process_pdf(str(file_path), num_pages, scale)
	if len(images_to_process) == 0:
	yield "Error", "Could not extract pages from PDF.", "", "", None
	return
	display_image = images_to_process[0]
	page_info = f"Processing {len(images_to_process)} of {total_pages} pages"
	except Exception as e:
	yield "Error", f"Error processing PDF: {str(e)}", "", "", None
	return
	else:
	try:
	img = Image.open(file_path)
	images_to_process = [img]
	display_image = img
	page_info = "Processing image"
	except Exception as e:
	yield "Error", f"Error opening image: {str(e)}", "", "", None
	return

	# Process with OCR
	try:
	yield "Processing...", "Processing images...", "", page_info, display_image

	all_texts = []
	for i, img in enumerate(images_to_process):
	try:
	print(f"Processing page {i+1}/{len(images_to_process)}...")
	# Run inference on GPU (hardcoded temp=0.0)
	text = run_inference(img, max_tokens=max_tokens)
	all_texts.append(text.strip())

	# Update progress
	full_text = "\n\n---\n\n".join(all_texts)
	yield "Processing...", full_text, full_text, page_info, display_image
	except Exception as e:
	error_msg = f"Error on page {i+1}: {str(e)}"
	print(f"ERROR: {error_msg}")
	all_texts.append(f"[{error_msg}]")
	continue

	# Final result
	final_text = "\n\n---\n\n".join(all_texts)
	yield "Complete", final_text, final_text, page_info, display_image

	except Exception as e:
	error_msg = f"Error during processing: {str(e)}"
	yield "Error", error_msg, "", page_info, display_image


	# Create Gradio interface
	with gr.Blocks(title="📖 LightOnOCR-1B Demo", theme=gr.themes.Soft()) as demo:
	gr.Markdown(
	"""
	# 📖 LightOnOCR-1B - OCR Demo
	Upload an image or PDF to extract text. Running on ZeroGPU with PyTorch.
	"""
	)

	with gr.Row():
	with gr.Column(scale=1):
	file_input = gr.File(
	label="🖼️ Upload Image or PDF",
	file_types=[".pdf", ".png", ".jpg", ".jpeg"],
	type="filepath"
	)
	rendered_image = gr.Image(
	label="📄 Preview",
	type="pil",
	height=300,
	interactive=False
	)

	with gr.Accordion("⚙️ Settings", open=True):
	scale_slider = gr.Slider(
	minimum=1.0,
	maximum=3.0,
	value=2.0,
	step=0.5,
	label="PDF Scale",
	info="Higher = better quality, slower"
	)

	max_tokens_slider = gr.Slider(
	minimum=256,
	maximum=2048,
	value=1024,
	step=256,
	label="Max Tokens",
	info="Lower = faster, may cut off long text"
	)

	num_pages = gr.Slider(
	minimum=1,
	maximum=10,
	value=1,
	step=1,
	label="PDF Pages",
	info="Number of pages to process (max 10)"
	)

	page_info = gr.Textbox(
	label="Processing Info",
	value="",
	interactive=False
	)

	submit_btn = gr.Button("🚀 Extract Text", variant="primary", size="lg")
	clear_btn = gr.Button("🗑️ Clear", variant="secondary")

	with gr.Column(scale=2):
	status_display = gr.Textbox(
	label="Status",
	value="Idle",
	interactive=False
	)

	with gr.Tabs():
	with gr.Tab("📄 Rendered"):
	output_text = gr.Markdown(
	value="Extracted text will appear here...",
	height=600
	)
	with gr.Tab("📝 Raw Text"):
	raw_output = gr.Textbox(
	placeholder="Raw text will appear here...",
	lines=25,
	show_copy_button=True
	)

	# Event handlers
	submit_btn.click(
	fn=process_input,
	inputs=[file_input, scale_slider, max_tokens_slider, num_pages],
	outputs=[status_display, output_text, raw_output, page_info, rendered_image]
	)

	clear_btn.click(
	fn=lambda: ("Idle", None, "Extracted text will appear here...", "", "", None),
	outputs=[status_display, file_input, output_text, raw_output, page_info, rendered_image]
	)


	if __name__ == "__main__":
	demo.launch()