import os import cv2 import easyocr import pandas as pd import gradio as gr from PIL import Image # Initialisation EasyOCR (anglais + français si besoin) reader = easyocr.Reader(['en', 'fr'], gpu=False) def extract_second_column(image): """ OCR + extraction naïve de la 2e colonne """ img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) results = reader.readtext(img) # On trie les résultats par position horizontale (x) results_sorted = sorted(results, key=lambda x: x[0][0][0]) # On estime les colonnes par position X xs = [r[0][0][0] for r in results_sorted] median_x = sorted(xs)[len(xs)//2] column_2 = [] for bbox, text, conf in results_sorted: x = bbox[0][0] if x > median_x: # 2e colonne if text.strip(): column_2.append(text.strip()) return column_2 def process_image(image): texts = extract_second_column(image) df = pd.DataFrame({"Colonne 2 (Texte)": texts}) os.makedirs("/tmp/results", exist_ok=True) csv_path = "/tmp/results/colonne_2.csv" txt_path = "/tmp/results/colonne_2.txt" df.to_csv(csv_path, index=False, encoding="utf-8") df.to_csv(txt_path, index=False, header=False, encoding="utf-8") return df, csv_path, txt_path with gr.Blocks(title="Extraction OCR – Colonne 2") as demo: gr.Markdown("## 📄 Extraction OCR – Colonne 2 (EasyOCR)") image_input = gr.Image(type="numpy", label="Télécharger une image") btn = gr.Button("Extraire la colonne 2") df_output = gr.Dataframe(label="Résultat") csv_file = gr.File(label="Télécharger CSV") txt_file = gr.File(label="Télécharger TXT") btn.click( process_image, inputs=image_input, outputs=[df_output, csv_file, txt_file] ) demo.launch()