Bapt120 commited on
Commit
3f41a2c
·
verified ·
1 Parent(s): 1c833a7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -6
app.py CHANGED
@@ -1,4 +1,9 @@
1
  #!/usr/bin/env python3
 
 
 
 
 
2
  import base64
3
  import os
4
  import re
@@ -107,21 +112,19 @@ class ModelManager:
107
 
108
  # Load new model
109
  print(f"Loading model: {model_name} ({model_id})...")
110
- hf_token = os.environ.get("HF_TOKEN")
111
  model = (
112
  LightOnOcrForConditionalGeneration.from_pretrained(
113
  model_id,
114
  attn_implementation=attn_implementation,
115
  torch_dtype=dtype,
116
  trust_remote_code=True,
117
- token=hf_token,
118
  )
119
  .to(device)
120
  .eval()
121
  )
122
 
123
  processor = LightOnOcrProcessor.from_pretrained(
124
- model_id, trust_remote_code=True, token=hf_token
125
  )
126
 
127
  # Add to cache
@@ -496,9 +499,41 @@ def get_model_info_text(model_name):
496
  # Create Gradio interface
497
  with gr.Blocks(title="LightOnOCR-2 Multi-Model OCR") as demo:
498
  gr.Markdown(f"""
499
- # LightOnOCR-2 Multi-Model OCR
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
500
 
501
- **How to use:**
502
  1. Select a model (OCR models for text extraction, Bbox models for region detection)
503
  2. Upload an image or PDF
504
  3. For PDFs: select which page to extract
@@ -610,4 +645,4 @@ with gr.Blocks(title="LightOnOCR-2 Multi-Model OCR") as demo:
610
 
611
 
612
  if __name__ == "__main__":
613
- demo.launch(theme=gr.themes.Soft())
 
1
  #!/usr/bin/env python3
2
+ import warnings
3
+
4
+ # Suppress FutureWarning from spaces library about torch.distributed.reduce_op
5
+ warnings.filterwarnings("ignore", category=FutureWarning, module="spaces")
6
+
7
  import base64
8
  import os
9
  import re
 
112
 
113
  # Load new model
114
  print(f"Loading model: {model_name} ({model_id})...")
 
115
  model = (
116
  LightOnOcrForConditionalGeneration.from_pretrained(
117
  model_id,
118
  attn_implementation=attn_implementation,
119
  torch_dtype=dtype,
120
  trust_remote_code=True,
 
121
  )
122
  .to(device)
123
  .eval()
124
  )
125
 
126
  processor = LightOnOcrProcessor.from_pretrained(
127
+ model_id, trust_remote_code=True
128
  )
129
 
130
  # Add to cache
 
499
  # Create Gradio interface
500
  with gr.Blocks(title="LightOnOCR-2 Multi-Model OCR") as demo:
501
  gr.Markdown(f"""
502
+ # LightOnOCR-2
503
+
504
+ **Efficient end-to-end 1B-parameter vision-language model for OCR**
505
+
506
+ Convert documents (PDFs, scans, images) into clean, naturally ordered text without relying on brittle pipelines. LightOnOCR-2 achieves state-of-the-art performance on OlmOCR-Bench while being ~9× smaller and significantly faster than competing approaches.
507
+
508
+ ### Highlights
509
+
510
+ | | |
511
+ |---|---|
512
+ | ⚡ **Speed** | 3.3× faster than Chandra, 1.7× faster than OlmOCR, 5× faster than dots.ocr |
513
+ | 💸 **Efficiency** | 5.71 pages/s on H100 (~493k pages/day) for **<$0.01 per 1,000 pages** |
514
+ | 🧠 **End-to-End** | Fully differentiable, no external OCR pipeline |
515
+ | 🧾 **Versatile** | Tables, receipts, forms, multi-column layouts, math notation |
516
+ | 📍 **Bbox variants** | Predict bounding boxes for embedded images |
517
+
518
+ ### Resources
519
+
520
+ [Paper](https://huggingface.co/papers/lightonocr-2) | [Blog Post](https://huggingface.co/blog/lightonai/lightonocr-2) | [Demo](https://huggingface.co/spaces/lightonai/LightOnOCR-2-1B-Demo) | [Dataset](https://huggingface.co/datasets/lightonai/LightOnOCR-mix-0126) | [Finetuning Notebook](https://colab.research.google.com/drive/1WjbsFJZ4vOAAlKtcCauFLn_evo5UBRNa?usp=sharing)
521
+
522
+ ### Model Variants
523
+
524
+ | Variant | Description |
525
+ |---------|-------------|
526
+ | **[LightOnOCR-2-1B](https://huggingface.co/lightonai/LightOnOCR-2-1B)** | Best OCR model (recommended) |
527
+ | **[LightOnOCR-2-1B-base](https://huggingface.co/lightonai/LightOnOCR-2-1B-base)** | Base model, ideal for fine-tuning |
528
+ | **[LightOnOCR-2-1B-bbox](https://huggingface.co/lightonai/LightOnOCR-2-1B-bbox)** | Best model with image bounding boxes |
529
+ | **[LightOnOCR-2-1B-bbox-base](https://huggingface.co/lightonai/LightOnOCR-2-1B-bbox-base)** | Base bbox model, ideal for fine-tuning |
530
+ | **[LightOnOCR-2-1B-ocr-soup](https://huggingface.co/lightonai/LightOnOCR-2-1B-ocr-soup)** | Merged variant for extra robustness |
531
+ | **[LightOnOCR-2-1B-bbox-soup](https://huggingface.co/lightonai/LightOnOCR-2-1B-bbox-soup)** | Merged variant: OCR + bbox combined |
532
+
533
+ ---
534
+
535
+ ### How to use
536
 
 
537
  1. Select a model (OCR models for text extraction, Bbox models for region detection)
538
  2. Upload an image or PDF
539
  3. For PDFs: select which page to extract
 
645
 
646
 
647
  if __name__ == "__main__":
648
+ demo.launch(theme=gr.themes.Soft(), ssr_mode=False)