import gradio as gr from transformers import pipeline from PIL import Image import numpy as np def process_image(image): if image is None: yield [None, None, None] return model = pipeline("image-segmentation") scores = model(image) text = [] label = {} sections = [] for s in scores: if s['label'].startswith('LABEL_'): continue print(s) text.append(s['label']) label[s['label']] = s['score'] mask = np.array(s['mask']) mask = np.array(list(map(lambda l: list(map(lambda x: 1 if x > 0 else 0, l)), mask))) sections.append((mask, s['label'])) yield [','.join(text), label, (image, sections)] app = gr.Interface( title='Image To Text', #description='Image To Text', fn=process_image, inputs=gr.Image(type='pil'), outputs=[ gr.Textbox(label='text'), gr.Label(label='scores'), gr.AnnotatedImage(label='segmentation'), ], allow_flagging='never', concurrency_limit=20, examples=[['examples/sample1.jpg'], ['examples/sample2.jpg']], #cache_examples=False ) app.launch()