Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,9 @@
|
|
| 1 |
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import base64
|
| 3 |
import os
|
| 4 |
import re
|
|
@@ -107,21 +112,19 @@ class ModelManager:
|
|
| 107 |
|
| 108 |
# Load new model
|
| 109 |
print(f"Loading model: {model_name} ({model_id})...")
|
| 110 |
-
hf_token = os.environ.get("HF_TOKEN")
|
| 111 |
model = (
|
| 112 |
LightOnOcrForConditionalGeneration.from_pretrained(
|
| 113 |
model_id,
|
| 114 |
attn_implementation=attn_implementation,
|
| 115 |
torch_dtype=dtype,
|
| 116 |
trust_remote_code=True,
|
| 117 |
-
token=hf_token,
|
| 118 |
)
|
| 119 |
.to(device)
|
| 120 |
.eval()
|
| 121 |
)
|
| 122 |
|
| 123 |
processor = LightOnOcrProcessor.from_pretrained(
|
| 124 |
-
model_id, trust_remote_code=True
|
| 125 |
)
|
| 126 |
|
| 127 |
# Add to cache
|
|
@@ -496,9 +499,41 @@ def get_model_info_text(model_name):
|
|
| 496 |
# Create Gradio interface
|
| 497 |
with gr.Blocks(title="LightOnOCR-2 Multi-Model OCR") as demo:
|
| 498 |
gr.Markdown(f"""
|
| 499 |
-
# LightOnOCR-2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 500 |
|
| 501 |
-
**How to use:**
|
| 502 |
1. Select a model (OCR models for text extraction, Bbox models for region detection)
|
| 503 |
2. Upload an image or PDF
|
| 504 |
3. For PDFs: select which page to extract
|
|
@@ -610,4 +645,4 @@ with gr.Blocks(title="LightOnOCR-2 Multi-Model OCR") as demo:
|
|
| 610 |
|
| 611 |
|
| 612 |
if __name__ == "__main__":
|
| 613 |
-
demo.launch(theme=gr.themes.Soft())
|
|
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
+
import warnings
|
| 3 |
+
|
| 4 |
+
# Suppress FutureWarning from spaces library about torch.distributed.reduce_op
|
| 5 |
+
warnings.filterwarnings("ignore", category=FutureWarning, module="spaces")
|
| 6 |
+
|
| 7 |
import base64
|
| 8 |
import os
|
| 9 |
import re
|
|
|
|
| 112 |
|
| 113 |
# Load new model
|
| 114 |
print(f"Loading model: {model_name} ({model_id})...")
|
|
|
|
| 115 |
model = (
|
| 116 |
LightOnOcrForConditionalGeneration.from_pretrained(
|
| 117 |
model_id,
|
| 118 |
attn_implementation=attn_implementation,
|
| 119 |
torch_dtype=dtype,
|
| 120 |
trust_remote_code=True,
|
|
|
|
| 121 |
)
|
| 122 |
.to(device)
|
| 123 |
.eval()
|
| 124 |
)
|
| 125 |
|
| 126 |
processor = LightOnOcrProcessor.from_pretrained(
|
| 127 |
+
model_id, trust_remote_code=True
|
| 128 |
)
|
| 129 |
|
| 130 |
# Add to cache
|
|
|
|
| 499 |
# Create Gradio interface
|
| 500 |
with gr.Blocks(title="LightOnOCR-2 Multi-Model OCR") as demo:
|
| 501 |
gr.Markdown(f"""
|
| 502 |
+
# LightOnOCR-2
|
| 503 |
+
|
| 504 |
+
**Efficient end-to-end 1B-parameter vision-language model for OCR**
|
| 505 |
+
|
| 506 |
+
Convert documents (PDFs, scans, images) into clean, naturally ordered text without relying on brittle pipelines. LightOnOCR-2 achieves state-of-the-art performance on OlmOCR-Bench while being ~9× smaller and significantly faster than competing approaches.
|
| 507 |
+
|
| 508 |
+
### Highlights
|
| 509 |
+
|
| 510 |
+
| | |
|
| 511 |
+
|---|---|
|
| 512 |
+
| ⚡ **Speed** | 3.3× faster than Chandra, 1.7× faster than OlmOCR, 5× faster than dots.ocr |
|
| 513 |
+
| 💸 **Efficiency** | 5.71 pages/s on H100 (~493k pages/day) for **<$0.01 per 1,000 pages** |
|
| 514 |
+
| 🧠 **End-to-End** | Fully differentiable, no external OCR pipeline |
|
| 515 |
+
| 🧾 **Versatile** | Tables, receipts, forms, multi-column layouts, math notation |
|
| 516 |
+
| 📍 **Bbox variants** | Predict bounding boxes for embedded images |
|
| 517 |
+
|
| 518 |
+
### Resources
|
| 519 |
+
|
| 520 |
+
[Paper](https://huggingface.co/papers/lightonocr-2) | [Blog Post](https://huggingface.co/blog/lightonai/lightonocr-2) | [Demo](https://huggingface.co/spaces/lightonai/LightOnOCR-2-1B-Demo) | [Dataset](https://huggingface.co/datasets/lightonai/LightOnOCR-mix-0126) | [Finetuning Notebook](https://colab.research.google.com/drive/1WjbsFJZ4vOAAlKtcCauFLn_evo5UBRNa?usp=sharing)
|
| 521 |
+
|
| 522 |
+
### Model Variants
|
| 523 |
+
|
| 524 |
+
| Variant | Description |
|
| 525 |
+
|---------|-------------|
|
| 526 |
+
| **[LightOnOCR-2-1B](https://huggingface.co/lightonai/LightOnOCR-2-1B)** | Best OCR model (recommended) |
|
| 527 |
+
| **[LightOnOCR-2-1B-base](https://huggingface.co/lightonai/LightOnOCR-2-1B-base)** | Base model, ideal for fine-tuning |
|
| 528 |
+
| **[LightOnOCR-2-1B-bbox](https://huggingface.co/lightonai/LightOnOCR-2-1B-bbox)** | Best model with image bounding boxes |
|
| 529 |
+
| **[LightOnOCR-2-1B-bbox-base](https://huggingface.co/lightonai/LightOnOCR-2-1B-bbox-base)** | Base bbox model, ideal for fine-tuning |
|
| 530 |
+
| **[LightOnOCR-2-1B-ocr-soup](https://huggingface.co/lightonai/LightOnOCR-2-1B-ocr-soup)** | Merged variant for extra robustness |
|
| 531 |
+
| **[LightOnOCR-2-1B-bbox-soup](https://huggingface.co/lightonai/LightOnOCR-2-1B-bbox-soup)** | Merged variant: OCR + bbox combined |
|
| 532 |
+
|
| 533 |
+
---
|
| 534 |
+
|
| 535 |
+
### How to use
|
| 536 |
|
|
|
|
| 537 |
1. Select a model (OCR models for text extraction, Bbox models for region detection)
|
| 538 |
2. Upload an image or PDF
|
| 539 |
3. For PDFs: select which page to extract
|
|
|
|
| 645 |
|
| 646 |
|
| 647 |
if __name__ == "__main__":
|
| 648 |
+
demo.launch(theme=gr.themes.Soft(), ssr_mode=False)
|