kebson's picture
Update app.py
a6c6224 verified
import os
import cv2
import easyocr
import pandas as pd
import gradio as gr
from PIL import Image
# Initialisation EasyOCR (anglais + français si besoin)
reader = easyocr.Reader(['en', 'fr'], gpu=False)
def extract_second_column(image):
"""
OCR + extraction naïve de la 2e colonne
"""
img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
results = reader.readtext(img)
# On trie les résultats par position horizontale (x)
results_sorted = sorted(results, key=lambda x: x[0][0][0])
# On estime les colonnes par position X
xs = [r[0][0][0] for r in results_sorted]
median_x = sorted(xs)[len(xs)//2]
column_2 = []
for bbox, text, conf in results_sorted:
x = bbox[0][0]
if x > median_x: # 2e colonne
if text.strip():
column_2.append(text.strip())
return column_2
def process_image(image):
texts = extract_second_column(image)
df = pd.DataFrame({"Colonne 2 (Texte)": texts})
os.makedirs("/tmp/results", exist_ok=True)
csv_path = "/tmp/results/colonne_2.csv"
txt_path = "/tmp/results/colonne_2.txt"
df.to_csv(csv_path, index=False, encoding="utf-8")
df.to_csv(txt_path, index=False, header=False, encoding="utf-8")
return df, csv_path, txt_path
with gr.Blocks(title="Extraction OCR – Colonne 2") as demo:
gr.Markdown("## 📄 Extraction OCR – Colonne 2 (EasyOCR)")
image_input = gr.Image(type="numpy", label="Télécharger une image")
btn = gr.Button("Extraire la colonne 2")
df_output = gr.Dataframe(label="Résultat")
csv_file = gr.File(label="Télécharger CSV")
txt_file = gr.File(label="Télécharger TXT")
btn.click(
process_image,
inputs=image_input,
outputs=[df_output, csv_file, txt_file]
)
demo.launch()