Spaces:
Runtime error
Runtime error
File size: 1,828 Bytes
a3d2b53 1d03c47 a6c6224 8931404 3a9f6ca a6c6224 6022908 a6c6224 00c654c 7a22053 a6c6224 7a22053 a6c6224 7a22053 a6c6224 7a22053 a6c6224 7a22053 a6c6224 7a22053 a6c6224 7a22053 a6c6224 7a22053 a6c6224 3a9f6ca a6c6224 7a22053 a6c6224 7a22053 a6c6224 7a22053 a6c6224 35f4e3c 1d03c47 35f4e3c 7a22053 a6c6224 7a22053 a6c6224 7a22053 a6c6224 7a22053 35f4e3c 7a22053 35f4e3c 1d03c47 d8d4939 e981ea9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
import os
import cv2
import easyocr
import pandas as pd
import gradio as gr
from PIL import Image
# Initialisation EasyOCR (anglais + français si besoin)
reader = easyocr.Reader(['en', 'fr'], gpu=False)
def extract_second_column(image):
"""
OCR + extraction naïve de la 2e colonne
"""
img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
results = reader.readtext(img)
# On trie les résultats par position horizontale (x)
results_sorted = sorted(results, key=lambda x: x[0][0][0])
# On estime les colonnes par position X
xs = [r[0][0][0] for r in results_sorted]
median_x = sorted(xs)[len(xs)//2]
column_2 = []
for bbox, text, conf in results_sorted:
x = bbox[0][0]
if x > median_x: # 2e colonne
if text.strip():
column_2.append(text.strip())
return column_2
def process_image(image):
texts = extract_second_column(image)
df = pd.DataFrame({"Colonne 2 (Texte)": texts})
os.makedirs("/tmp/results", exist_ok=True)
csv_path = "/tmp/results/colonne_2.csv"
txt_path = "/tmp/results/colonne_2.txt"
df.to_csv(csv_path, index=False, encoding="utf-8")
df.to_csv(txt_path, index=False, header=False, encoding="utf-8")
return df, csv_path, txt_path
with gr.Blocks(title="Extraction OCR – Colonne 2") as demo:
gr.Markdown("## 📄 Extraction OCR – Colonne 2 (EasyOCR)")
image_input = gr.Image(type="numpy", label="Télécharger une image")
btn = gr.Button("Extraire la colonne 2")
df_output = gr.Dataframe(label="Résultat")
csv_file = gr.File(label="Télécharger CSV")
txt_file = gr.File(label="Télécharger TXT")
btn.click(
process_image,
inputs=image_input,
outputs=[df_output, csv_file, txt_file]
)
demo.launch()
|