Spaces:
Sleeping
Sleeping
| import os | |
| # os.system('chmod 777 /tmp') | |
| # os.system('apt-get update -y') | |
| # os.system('apt-get install tesseract-ocr -y') | |
| # os.system('pip install -q pytesseract') | |
| from base64 import b64decode, b64encode | |
| from io import BytesIO | |
| import tesserocr | |
| from fastapi import FastAPI, File, Form | |
| from PIL import Image | |
| from transformers import pipeline | |
| #import streamlit as st | |
| # pytesseract.pytesseract.tesseract_cmd = r’./Tesseract-OCR/tesseract.exe’ | |
| choices = os.popen('tesseract --list-langs').read().split('\n')[1:-1] | |
| description = """ | |
| Upload Receipt and get | |
| """ | |
| app = FastAPI( | |
| title="ReceiptOCR", | |
| docs_url="/", description=description) | |
| pipe = pipeline("document-question-answering", model="impira/layoutlm-document-qa") | |
| def read_root(): | |
| image = 'https://templates.invoicehome.com/invoice-template-us-neat-750px.png' | |
| question_1 = "What is the Total amount?" | |
| question_2 = "What is Total VAT amount?" | |
| question_3 = "What is the Date?" | |
| output_1 = pipe(image, question_1) | |
| output_2 = pipe(image, question_2) | |
| output_3 = pipe(image, question_3) | |
| response = {} | |
| response['total amount'] = output_1[0]['answer'] | |
| response['toal vat'] = output_2[0]['answer'] | |
| response['date'] = output_3[0]['answer'] | |
| return response | |