File size: 1,828 Bytes
a3d2b53
1d03c47
a6c6224
8931404
3a9f6ca
a6c6224
6022908
a6c6224
 
00c654c
7a22053
a6c6224
 
 
 
 
7a22053
a6c6224
7a22053
a6c6224
 
7a22053
a6c6224
 
 
7a22053
a6c6224
7a22053
a6c6224
 
 
 
 
7a22053
a6c6224
7a22053
 
a6c6224
 
3a9f6ca
a6c6224
7a22053
a6c6224
7a22053
a6c6224
 
7a22053
a6c6224
 
35f4e3c
1d03c47
35f4e3c
7a22053
a6c6224
 
 
 
 
7a22053
a6c6224
 
 
7a22053
 
a6c6224
 
 
7a22053
35f4e3c
7a22053
35f4e3c
1d03c47
 
 
d8d4939
 
e981ea9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import os
import cv2
import easyocr
import pandas as pd
import gradio as gr
from PIL import Image

# Initialisation EasyOCR (anglais + français si besoin)
reader = easyocr.Reader(['en', 'fr'], gpu=False)


def extract_second_column(image):
    """
    OCR + extraction naïve de la 2e colonne
    """
    img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

    results = reader.readtext(img)

    # On trie les résultats par position horizontale (x)
    results_sorted = sorted(results, key=lambda x: x[0][0][0])

    # On estime les colonnes par position X
    xs = [r[0][0][0] for r in results_sorted]
    median_x = sorted(xs)[len(xs)//2]

    column_2 = []

    for bbox, text, conf in results_sorted:
        x = bbox[0][0]
        if x > median_x:  # 2e colonne
            if text.strip():
                column_2.append(text.strip())

    return column_2


def process_image(image):
    texts = extract_second_column(image)

    df = pd.DataFrame({"Colonne 2 (Texte)": texts})

    os.makedirs("/tmp/results", exist_ok=True)

    csv_path = "/tmp/results/colonne_2.csv"
    txt_path = "/tmp/results/colonne_2.txt"

    df.to_csv(csv_path, index=False, encoding="utf-8")
    df.to_csv(txt_path, index=False, header=False, encoding="utf-8")

    return df, csv_path, txt_path


with gr.Blocks(title="Extraction OCR – Colonne 2") as demo:
    gr.Markdown("## 📄 Extraction OCR – Colonne 2 (EasyOCR)")

    image_input = gr.Image(type="numpy", label="Télécharger une image")
    btn = gr.Button("Extraire la colonne 2")

    df_output = gr.Dataframe(label="Résultat")
    csv_file = gr.File(label="Télécharger CSV")
    txt_file = gr.File(label="Télécharger TXT")

    btn.click(
        process_image,
        inputs=image_input,
        outputs=[df_output, csv_file, txt_file]
    )

demo.launch()