Spaces:
Build error
Build error
Commit
·
5568e6f
1
Parent(s):
a30c743
Fixed bug in ocr function
Browse filesFixed bugs on ocr function
app.py
CHANGED
|
@@ -231,14 +231,14 @@ def pdf_ocr(file, model_t, question):
|
|
| 231 |
# Perform OCR on the PDF if the extracted text is empty
|
| 232 |
if not text:
|
| 233 |
# Convert PDF pages to images
|
| 234 |
-
images =
|
| 235 |
for i, img in enumerate(images):
|
| 236 |
text += pytesseract.image_to_string(img, lang='ita')
|
| 237 |
|
| 238 |
# Clear the image list to free up memory
|
| 239 |
del images
|
| 240 |
|
| 241 |
-
ks = ('mq', 'metri quadri', 'm2')
|
| 242 |
quest = "Quanti metri quadri misura la superficie?"
|
| 243 |
totalK = ['totale', 'complessivo', 'complessiva']
|
| 244 |
|
|
@@ -296,6 +296,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 296 |
extract_button.click(fn = ocr_interface,
|
| 297 |
inputs=[pdf_input, model_input, question_input], outputs=[values_output, total_output, text_output])
|
| 298 |
|
| 299 |
-
gr.Examples(['Example1.pdf', 'Example2.pdf'], inputs = pdf_input)
|
| 300 |
|
| 301 |
demo.launch()
|
|
|
|
| 231 |
# Perform OCR on the PDF if the extracted text is empty
|
| 232 |
if not text:
|
| 233 |
# Convert PDF pages to images
|
| 234 |
+
images = convert_from_bytes(content)
|
| 235 |
for i, img in enumerate(images):
|
| 236 |
text += pytesseract.image_to_string(img, lang='ita')
|
| 237 |
|
| 238 |
# Clear the image list to free up memory
|
| 239 |
del images
|
| 240 |
|
| 241 |
+
ks = ('mq', 'MQ', 'Mq' 'metri quadri', 'm2')
|
| 242 |
quest = "Quanti metri quadri misura la superficie?"
|
| 243 |
totalK = ['totale', 'complessivo', 'complessiva']
|
| 244 |
|
|
|
|
| 296 |
extract_button.click(fn = ocr_interface,
|
| 297 |
inputs=[pdf_input, model_input, question_input], outputs=[values_output, total_output, text_output])
|
| 298 |
|
| 299 |
+
gr.Examples(['Example1(scanned).pdf', 'Example2.pdf', 'Example3Large.pdf'], inputs = pdf_input)
|
| 300 |
|
| 301 |
demo.launch()
|