import gradio as gr
from transformers import pipeline
from PIL import Image
import numpy as np

def process_image(image):
	if image is None:
		yield [None, None, None]
		return

	model = pipeline("image-segmentation")
	scores = model(image)

	text = []
	label = {}
	sections = []
	for s in scores:
		if s['label'].startswith('LABEL_'):
			continue
		print(s)
		text.append(s['label'])
		label[s['label']] = s['score']
		mask = np.array(s['mask'])
		mask = np.array(list(map(lambda l: list(map(lambda x: 1 if x > 0 else 0, l)), mask)))
		sections.append((mask, s['label']))

		yield [','.join(text), label, (image, sections)]

app = gr.Interface(
	title='Image To Text',
	#description='Image To Text',
	fn=process_image,
	inputs=gr.Image(type='pil'),
	outputs=[
		gr.Textbox(label='text'),
		gr.Label(label='scores'),
		gr.AnnotatedImage(label='segmentation'),
	],
	allow_flagging='never',
	concurrency_limit=20,
	examples=[['examples/sample1.jpg'], ['examples/sample2.jpg']],
	#cache_examples=False
)
app.launch()