Spaces:
Running on Zero
Running on Zero
| #!/usr/bin/env python3 | |
| """ | |
| Gradio web interface for LightOnOCR-1B specialized for Hugging Face Spaces. | |
| """ | |
| import os | |
| import sys | |
| import gradio as gr | |
| from pathlib import Path | |
| from PIL import Image | |
| import pypdfium2 as pdfium | |
| import spaces | |
| # Add project root to path | |
| sys.path.insert(0, str(Path(__file__).parent)) | |
| from backends.pytorch_backend import PyTorchBackend | |
| # Global backend | |
| BACKEND = None | |
| def load_backend(): | |
| """Load PyTorch backend.""" | |
| global BACKEND | |
| if BACKEND is None: | |
| print("Loading PyTorch backend...") | |
| BACKEND = PyTorchBackend() | |
| # We don't call load_model() explicitly here, it happens lazily or inside processed_image | |
| # But for ZeroGPU it's better to initialize it inside the GPU decorated function | |
| # or load it globally if it fits in VRAM on init (ZeroGPU swaps it in/out) | |
| # Standard pattern: Init model globally, use inside @spaces.GPU | |
| BACKEND.load_model() | |
| print(f"Backend loaded: {BACKEND.get_backend_info()}") | |
| return BACKEND | |
| # Initialize globally for ZeroGPU | |
| load_backend() | |
| def render_pdf_page(page, scale=2.0): | |
| """Render PDF page to PIL Image.""" | |
| return page.render(scale=scale, rev_byteorder=True).to_pil() | |
| def process_pdf(pdf_path, num_pages=1, scale=2.0): | |
| """Extract images from PDF.""" | |
| pdf = pdfium.PdfDocument(pdf_path) | |
| total_pages = len(pdf) | |
| pages_to_process = min(num_pages, total_pages, 10) # Max 10 pages | |
| images = [] | |
| for i in range(pages_to_process): | |
| page = pdf[i] | |
| img = render_pdf_page(page, scale=scale) | |
| images.append(img) | |
| pdf.close() | |
| return images, total_pages | |
| # Increase duration for OCR | |
| def run_inference(image, max_tokens): | |
| """Run inference on GPU.""" | |
| global BACKEND | |
| if BACKEND is None: | |
| load_backend() | |
| return BACKEND.process_image(image, temperature=0.0, max_tokens=max_tokens) | |
| def process_input(file_input, scale, max_tokens, num_pages): | |
| """Process uploaded file with OCR.""" | |
| if file_input is None: | |
| yield "Idle", "Please upload an image or PDF first.", "", "", None | |
| return | |
| images_to_process = [] | |
| page_info = "" | |
| display_image = None | |
| # ... (rest of image loading logic same as before, simplified for diff clarity) | |
| file_path = Path(file_input) if isinstance(file_input, str) else Path(file_input.name) | |
| if not file_path.exists(): | |
| yield "Error", f"File not accessible: {file_path}", "", "", None | |
| return | |
| # Load images | |
| if file_path.suffix.lower() == '.pdf': | |
| try: | |
| images_to_process, total_pages = process_pdf(str(file_path), num_pages, scale) | |
| if len(images_to_process) == 0: | |
| yield "Error", "Could not extract pages from PDF.", "", "", None | |
| return | |
| display_image = images_to_process[0] | |
| page_info = f"Processing {len(images_to_process)} of {total_pages} pages" | |
| except Exception as e: | |
| yield "Error", f"Error processing PDF: {str(e)}", "", "", None | |
| return | |
| else: | |
| try: | |
| img = Image.open(file_path) | |
| images_to_process = [img] | |
| display_image = img | |
| page_info = "Processing image" | |
| except Exception as e: | |
| yield "Error", f"Error opening image: {str(e)}", "", "", None | |
| return | |
| # Process with OCR | |
| try: | |
| yield "Processing...", "Processing images...", "", page_info, display_image | |
| all_texts = [] | |
| for i, img in enumerate(images_to_process): | |
| try: | |
| print(f"Processing page {i+1}/{len(images_to_process)}...") | |
| # Run inference on GPU (hardcoded temp=0.0) | |
| text = run_inference(img, max_tokens=max_tokens) | |
| all_texts.append(text.strip()) | |
| # Update progress | |
| full_text = "\n\n---\n\n".join(all_texts) | |
| yield "Processing...", full_text, full_text, page_info, display_image | |
| except Exception as e: | |
| error_msg = f"Error on page {i+1}: {str(e)}" | |
| print(f"ERROR: {error_msg}") | |
| all_texts.append(f"[{error_msg}]") | |
| continue | |
| # Final result | |
| final_text = "\n\n---\n\n".join(all_texts) | |
| yield "Complete", final_text, final_text, page_info, display_image | |
| except Exception as e: | |
| error_msg = f"Error during processing: {str(e)}" | |
| yield "Error", error_msg, "", page_info, display_image | |
| # Create Gradio interface | |
| with gr.Blocks(title="π LightOnOCR-1B Demo", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown( | |
| """ | |
| # π LightOnOCR-1B - OCR Demo | |
| Upload an image or PDF to extract text. Running on ZeroGPU with PyTorch. | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| file_input = gr.File( | |
| label="πΌοΈ Upload Image or PDF", | |
| file_types=[".pdf", ".png", ".jpg", ".jpeg"], | |
| type="filepath" | |
| ) | |
| rendered_image = gr.Image( | |
| label="π Preview", | |
| type="pil", | |
| height=300, | |
| interactive=False | |
| ) | |
| with gr.Accordion("βοΈ Settings", open=True): | |
| scale_slider = gr.Slider( | |
| minimum=1.0, | |
| maximum=3.0, | |
| value=2.0, | |
| step=0.5, | |
| label="PDF Scale", | |
| info="Higher = better quality, slower" | |
| ) | |
| max_tokens_slider = gr.Slider( | |
| minimum=256, | |
| maximum=2048, | |
| value=1024, | |
| step=256, | |
| label="Max Tokens", | |
| info="Lower = faster, may cut off long text" | |
| ) | |
| num_pages = gr.Slider( | |
| minimum=1, | |
| maximum=10, | |
| value=1, | |
| step=1, | |
| label="PDF Pages", | |
| info="Number of pages to process (max 10)" | |
| ) | |
| page_info = gr.Textbox( | |
| label="Processing Info", | |
| value="", | |
| interactive=False | |
| ) | |
| submit_btn = gr.Button("π Extract Text", variant="primary", size="lg") | |
| clear_btn = gr.Button("ποΈ Clear", variant="secondary") | |
| with gr.Column(scale=2): | |
| status_display = gr.Textbox( | |
| label="Status", | |
| value="Idle", | |
| interactive=False | |
| ) | |
| with gr.Tabs(): | |
| with gr.Tab("π Rendered"): | |
| output_text = gr.Markdown( | |
| value="*Extracted text will appear here...*", | |
| height=600 | |
| ) | |
| with gr.Tab("π Raw Text"): | |
| raw_output = gr.Textbox( | |
| placeholder="Raw text will appear here...", | |
| lines=25, | |
| show_copy_button=True | |
| ) | |
| # Event handlers | |
| submit_btn.click( | |
| fn=process_input, | |
| inputs=[file_input, scale_slider, max_tokens_slider, num_pages], | |
| outputs=[status_display, output_text, raw_output, page_info, rendered_image] | |
| ) | |
| clear_btn.click( | |
| fn=lambda: ("Idle", None, "*Extracted text will appear here...*", "", "", None), | |
| outputs=[status_display, file_input, output_text, raw_output, page_info, rendered_image] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |