Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,11 +2,12 @@ import gradio as gr
|
|
| 2 |
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
| 3 |
from PIL import Image
|
| 4 |
import re
|
|
|
|
| 5 |
|
| 6 |
# ===============================
|
| 7 |
-
# Charger le modèle
|
| 8 |
# ===============================
|
| 9 |
-
model_name = "microsoft/trocr-base-
|
| 10 |
processor = TrOCRProcessor.from_pretrained(model_name)
|
| 11 |
model = VisionEncoderDecoderModel.from_pretrained(model_name)
|
| 12 |
|
|
@@ -14,7 +15,7 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
| 14 |
model.to(device)
|
| 15 |
|
| 16 |
# ===============================
|
| 17 |
-
# Fonction
|
| 18 |
# ===============================
|
| 19 |
def extract_description(image_pil):
|
| 20 |
# OCR avec TrOCR
|
|
@@ -29,9 +30,9 @@ def extract_description(image_pil):
|
|
| 29 |
desc_col = []
|
| 30 |
header_found = False
|
| 31 |
headers = []
|
| 32 |
-
|
| 33 |
if lines:
|
| 34 |
-
first_line = lines[0]
|
| 35 |
# Split en colonnes par tabulation ou espaces multiples
|
| 36 |
headers = re.split(r"\t+|\s{2,}", first_line)
|
| 37 |
try:
|
|
@@ -61,7 +62,7 @@ demo = gr.Interface(
|
|
| 61 |
gr.Textbox(label="📋 Colonne Description"),
|
| 62 |
gr.Textbox(label="🛠 OCR complet pour debug")
|
| 63 |
],
|
| 64 |
-
title="Extraction de la colonne Description (TrOCR
|
| 65 |
description="Détection automatique de la colonne Description dans les factures avec TrOCR"
|
| 66 |
)
|
| 67 |
|
|
|
|
| 2 |
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
| 3 |
from PIL import Image
|
| 4 |
import re
|
| 5 |
+
import torch
|
| 6 |
|
| 7 |
# ===============================
|
| 8 |
+
# Charger le modèle TrOCR public
|
| 9 |
# ===============================
|
| 10 |
+
model_name = "microsoft/trocr-base-handwritten" # modèle public
|
| 11 |
processor = TrOCRProcessor.from_pretrained(model_name)
|
| 12 |
model = VisionEncoderDecoderModel.from_pretrained(model_name)
|
| 13 |
|
|
|
|
| 15 |
model.to(device)
|
| 16 |
|
| 17 |
# ===============================
|
| 18 |
+
# Fonction extraction colonne Description
|
| 19 |
# ===============================
|
| 20 |
def extract_description(image_pil):
|
| 21 |
# OCR avec TrOCR
|
|
|
|
| 30 |
desc_col = []
|
| 31 |
header_found = False
|
| 32 |
headers = []
|
| 33 |
+
|
| 34 |
if lines:
|
| 35 |
+
first_line = lines[0]
|
| 36 |
# Split en colonnes par tabulation ou espaces multiples
|
| 37 |
headers = re.split(r"\t+|\s{2,}", first_line)
|
| 38 |
try:
|
|
|
|
| 62 |
gr.Textbox(label="📋 Colonne Description"),
|
| 63 |
gr.Textbox(label="🛠 OCR complet pour debug")
|
| 64 |
],
|
| 65 |
+
title="Extraction de la colonne Description (TrOCR public)",
|
| 66 |
description="Détection automatique de la colonne Description dans les factures avec TrOCR"
|
| 67 |
)
|
| 68 |
|