Spaces:
Running
Running
| from cord_inference import prediction as cord_prediction | |
| from sroie_inference import prediction as sroie_prediction | |
| import gradio as gr | |
| import json | |
| def prediction(image): | |
| # first use the model fine-tuned on sroie (for now it is Theivaprakasham/layoutlmv3-finetuned-sroie) | |
| # on the image, which returns a JSON with some info and an image with the corresponding boxes blurred | |
| j1, image_blurred = sroie_prediction(image) | |
| # then use the model fine-tuned on cord on the blurred image | |
| img = image_blurred.copy() | |
| j2, image_final = cord_prediction(img) | |
| # link the two json files | |
| if len(j1) == 0: | |
| j3 = j2 | |
| else: | |
| j3 = json.dumps(j1).split('}')[0] + ', ' + json.dumps(j2).split('{')[1] | |
| return j1, image_blurred, j2, image_final, j3 | |
| title = "Interactive demo: LayoutLMv3 for receipts" | |
| description = "Demo for Microsoft's LayoutLMv3, a Transformer for state-of-the-art document image understanding tasks. This particular space uses two instances of the model, one fine-tuned on CORD and the other SROIE.\n It firsts uses the fine-tune on SROIE to extract date, company and address, then the fine-tune on CORD for the other info. To use it, simply upload an image or use the example image below. Results will show up in a few seconds." | |
| examples = [['image.jpg'],['image.PNG']] | |
| css = """.output_image, .input_image {height: 600px !important}""" | |
| # gradio interface that takes in input an image and return a JSON file that contains its info | |
| # for now it shows also the intermediate steps | |
| iface = gr.Interface(fn=prediction, | |
| inputs=gr.Image(type="pil"), | |
| outputs=[gr.JSON(label="sroie parsing"), | |
| gr.Image(type="pil", label="blurred image"), | |
| gr.JSON(label="cord parsing"), | |
| gr.Image(type="pil", label="annotated image"), | |
| gr.JSON(label="final output")], | |
| title=title, | |
| description=description, | |
| examples=examples, | |
| css=css) | |
| iface.launch() | |