Spaces:
Runtime error
Runtime error
| import os | |
| import cv2 | |
| import easyocr | |
| import pandas as pd | |
| import gradio as gr | |
| from PIL import Image | |
| # Initialisation EasyOCR (anglais + français si besoin) | |
| reader = easyocr.Reader(['en', 'fr'], gpu=False) | |
| def extract_second_column(image): | |
| """ | |
| OCR + extraction naïve de la 2e colonne | |
| """ | |
| img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) | |
| results = reader.readtext(img) | |
| # On trie les résultats par position horizontale (x) | |
| results_sorted = sorted(results, key=lambda x: x[0][0][0]) | |
| # On estime les colonnes par position X | |
| xs = [r[0][0][0] for r in results_sorted] | |
| median_x = sorted(xs)[len(xs)//2] | |
| column_2 = [] | |
| for bbox, text, conf in results_sorted: | |
| x = bbox[0][0] | |
| if x > median_x: # 2e colonne | |
| if text.strip(): | |
| column_2.append(text.strip()) | |
| return column_2 | |
| def process_image(image): | |
| texts = extract_second_column(image) | |
| df = pd.DataFrame({"Colonne 2 (Texte)": texts}) | |
| os.makedirs("/tmp/results", exist_ok=True) | |
| csv_path = "/tmp/results/colonne_2.csv" | |
| txt_path = "/tmp/results/colonne_2.txt" | |
| df.to_csv(csv_path, index=False, encoding="utf-8") | |
| df.to_csv(txt_path, index=False, header=False, encoding="utf-8") | |
| return df, csv_path, txt_path | |
| with gr.Blocks(title="Extraction OCR – Colonne 2") as demo: | |
| gr.Markdown("## 📄 Extraction OCR – Colonne 2 (EasyOCR)") | |
| image_input = gr.Image(type="numpy", label="Télécharger une image") | |
| btn = gr.Button("Extraire la colonne 2") | |
| df_output = gr.Dataframe(label="Résultat") | |
| csv_file = gr.File(label="Télécharger CSV") | |
| txt_file = gr.File(label="Télécharger TXT") | |
| btn.click( | |
| process_image, | |
| inputs=image_input, | |
| outputs=[df_output, csv_file, txt_file] | |
| ) | |
| demo.launch() | |