Spaces:
Sleeping
Sleeping
Commit ·
98e4ea6
1
Parent(s): 5f046ba
With OCR working ver 1
Browse files- app.py +36 -6
- packages.txt +1 -0
- requirements.txt +3 -54
app.py
CHANGED
|
@@ -1,10 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
-
import sys
|
| 3 |
|
| 4 |
-
def
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
|
| 8 |
|
| 9 |
-
|
| 10 |
-
demo.launch()
|
|
|
|
|
|
| 1 |
+
from pdf2image import convert_from_path
|
| 2 |
+
from PIL import Image
|
| 3 |
+
import pytesseract
|
| 4 |
import gradio as gr
|
|
|
|
| 5 |
|
| 6 |
+
def tesseract_ocr(filepath: str):
|
| 7 |
+
"""
|
| 8 |
+
Perform OCR on the given image file and return the extracted text.
|
| 9 |
+
"""
|
| 10 |
+
# Open the image file
|
| 11 |
+
with Image.open(filepath) as img:
|
| 12 |
+
# Use pytesseract to do OCR on the image
|
| 13 |
+
text = pytesseract.image_to_string(img)
|
| 14 |
+
return text
|
| 15 |
+
|
| 16 |
+
title = "Invoicer IA"
|
| 17 |
+
description = "extract data from invoice"
|
| 18 |
+
article = "This is a simple OCR application that extracts text from images using Tesseract OCR. You can upload an image of an invoice, and the application will return the extracted text."
|
| 19 |
+
examples = [["example_invoice.png"]]
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
with gr.Blocks(title=title) as demo:
|
| 23 |
+
gr.Markdown(f'<h1 style="text-align: center; margin-bottom: 1rem;">{title}</h1>')
|
| 24 |
+
gr.Markdown(description)
|
| 25 |
+
gr.Markdown(article)
|
| 26 |
+
|
| 27 |
+
with gr.Row():
|
| 28 |
+
with gr.Column():
|
| 29 |
+
image_input = gr.Image(type="filepath", label="Upload Invoice Image")
|
| 30 |
+
output_text = gr.Textbox(label="Extracted Text", lines=10, placeholder="Extracted text will appear here...")
|
| 31 |
+
|
| 32 |
+
with gr.Column():
|
| 33 |
+
submit_button = gr.Button("Submit")
|
| 34 |
+
|
| 35 |
|
| 36 |
+
submit_button.click(fn=tesseract_ocr, inputs=image_input, outputs=output_text)
|
| 37 |
|
| 38 |
+
if __name__ == "__main__":
|
| 39 |
+
demo.launch()
|
| 40 |
+
|
packages.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
tesseract-ocr-all
|
requirements.txt
CHANGED
|
@@ -1,54 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
certifi==2025.1.31
|
| 5 |
-
charset-normalizer==3.4.1
|
| 6 |
-
click==8.1.8
|
| 7 |
-
colorama==0.4.6
|
| 8 |
-
fastapi==0.115.12
|
| 9 |
-
ffmpy==0.5.0
|
| 10 |
-
filelock==3.18.0
|
| 11 |
-
fsspec==2025.3.2
|
| 12 |
-
gradio==5.23.3
|
| 13 |
-
gradio_client==1.8.0
|
| 14 |
-
groovy==0.1.2
|
| 15 |
-
h11==0.14.0
|
| 16 |
-
httpcore==1.0.7
|
| 17 |
-
httpx==0.28.1
|
| 18 |
-
huggingface-hub==0.30.1
|
| 19 |
-
idna==3.10
|
| 20 |
-
Jinja2==3.1.6
|
| 21 |
-
markdown-it-py==3.0.0
|
| 22 |
-
MarkupSafe==3.0.2
|
| 23 |
-
mdurl==0.1.2
|
| 24 |
-
numpy==2.2.4
|
| 25 |
-
orjson==3.10.16
|
| 26 |
-
packaging==24.2
|
| 27 |
-
pandas==2.2.3
|
| 28 |
-
pillow==11.1.0
|
| 29 |
-
pydantic==2.11.2
|
| 30 |
-
pydantic_core==2.33.1
|
| 31 |
-
pydub==0.25.1
|
| 32 |
-
Pygments==2.19.1
|
| 33 |
-
python-dateutil==2.9.0.post0
|
| 34 |
-
python-multipart==0.0.20
|
| 35 |
-
pytz==2025.2
|
| 36 |
-
PyYAML==6.0.2
|
| 37 |
-
requests==2.32.3
|
| 38 |
-
rich==14.0.0
|
| 39 |
-
ruff==0.11.4
|
| 40 |
-
safehttpx==0.1.6
|
| 41 |
-
semantic-version==2.10.0
|
| 42 |
-
shellingham==1.5.4
|
| 43 |
-
six==1.17.0
|
| 44 |
-
sniffio==1.3.1
|
| 45 |
-
starlette==0.46.1
|
| 46 |
-
tomlkit==0.13.2
|
| 47 |
-
tqdm==4.67.1
|
| 48 |
-
typer==0.15.2
|
| 49 |
-
typing-inspection==0.4.0
|
| 50 |
-
typing_extensions==4.13.1
|
| 51 |
-
tzdata==2025.2
|
| 52 |
-
urllib3==2.3.0
|
| 53 |
-
uvicorn==0.34.0
|
| 54 |
-
websockets==15.0.1
|
|
|
|
| 1 |
+
gradio
|
| 2 |
+
pytesseract
|
| 3 |
+
pdf2image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|