Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -90,61 +90,63 @@ def draw_ocr_bboxes(image, prediction):
|
|
| 90 |
return image
|
| 91 |
|
| 92 |
def process_image(image, task_prompt, text_input=None):
|
| 93 |
-
image = Image.fromarray(image)
|
| 94 |
if task_prompt == '<CAPTION>':
|
| 95 |
result = run_example(task_prompt, image)
|
| 96 |
-
return result
|
| 97 |
elif task_prompt == '<DETAILED_CAPTION>':
|
| 98 |
result = run_example(task_prompt, image)
|
| 99 |
-
return result
|
| 100 |
elif task_prompt == '<MORE_DETAILED_CAPTION>':
|
| 101 |
result = run_example(task_prompt, image)
|
| 102 |
-
return result
|
| 103 |
elif task_prompt == '<OD>':
|
| 104 |
results = run_example(task_prompt, image)
|
| 105 |
fig = plot_bbox(image, results['<OD>'])
|
| 106 |
-
return fig
|
| 107 |
elif task_prompt == '<DENSE_REGION_CAPTION>':
|
| 108 |
results = run_example(task_prompt, image)
|
| 109 |
fig = plot_bbox(image, results['<DENSE_REGION_CAPTION>'])
|
| 110 |
-
return fig
|
| 111 |
elif task_prompt == '<REGION_PROPOSAL>':
|
| 112 |
results = run_example(task_prompt, image)
|
| 113 |
fig = plot_bbox(image, results['<REGION_PROPOSAL>'])
|
| 114 |
-
return fig
|
| 115 |
elif task_prompt == '<CAPTION_TO_PHRASE_GROUNDING>':
|
| 116 |
results = run_example(task_prompt, image, text_input)
|
| 117 |
fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
|
| 118 |
-
return fig
|
| 119 |
elif task_prompt == '<REFERRING_EXPRESSION_SEGMENTATION>':
|
| 120 |
results = run_example(task_prompt, image, text_input)
|
| 121 |
output_image = copy.deepcopy(image)
|
| 122 |
output_image = draw_polygons(output_image, results['<REFERRING_EXPRESSION_SEGMENTATION>'], fill_mask=True)
|
| 123 |
-
return output_image
|
| 124 |
elif task_prompt == '<REGION_TO_SEGMENTATION>':
|
| 125 |
results = run_example(task_prompt, image, text_input)
|
| 126 |
output_image = copy.deepcopy(image)
|
| 127 |
output_image = draw_polygons(output_image, results['<REGION_TO_SEGMENTATION>'], fill_mask=True)
|
| 128 |
-
return output_image
|
| 129 |
elif task_prompt == '<OPEN_VOCABULARY_DETECTION>':
|
| 130 |
results = run_example(task_prompt, image, text_input)
|
| 131 |
bbox_results = convert_to_od_format(results['<OPEN_VOCABULARY_DETECTION>'])
|
| 132 |
fig = plot_bbox(image, bbox_results)
|
| 133 |
-
return fig
|
| 134 |
elif task_prompt == '<REGION_TO_CATEGORY>':
|
| 135 |
results = run_example(task_prompt, image, text_input)
|
| 136 |
-
return results
|
| 137 |
elif task_prompt == '<REGION_TO_DESCRIPTION>':
|
| 138 |
results = run_example(task_prompt, image, text_input)
|
| 139 |
-
return results
|
| 140 |
elif task_prompt == '<OCR>':
|
| 141 |
result = run_example(task_prompt, image)
|
| 142 |
-
return result
|
| 143 |
elif task_prompt == '<OCR_WITH_REGION>':
|
| 144 |
results = run_example(task_prompt, image)
|
| 145 |
output_image = copy.deepcopy(image)
|
| 146 |
output_image = draw_ocr_bboxes(output_image, results['<OCR_WITH_REGION>'])
|
| 147 |
-
return output_image
|
|
|
|
|
|
|
| 148 |
|
| 149 |
css = """
|
| 150 |
#output {
|
|
|
|
| 90 |
return image
|
| 91 |
|
| 92 |
def process_image(image, task_prompt, text_input=None):
|
| 93 |
+
image = Image.fromarray(image) # Convert NumPy array to PIL Image
|
| 94 |
if task_prompt == '<CAPTION>':
|
| 95 |
result = run_example(task_prompt, image)
|
| 96 |
+
return result, None
|
| 97 |
elif task_prompt == '<DETAILED_CAPTION>':
|
| 98 |
result = run_example(task_prompt, image)
|
| 99 |
+
return result, None
|
| 100 |
elif task_prompt == '<MORE_DETAILED_CAPTION>':
|
| 101 |
result = run_example(task_prompt, image)
|
| 102 |
+
return result, None
|
| 103 |
elif task_prompt == '<OD>':
|
| 104 |
results = run_example(task_prompt, image)
|
| 105 |
fig = plot_bbox(image, results['<OD>'])
|
| 106 |
+
return "", fig
|
| 107 |
elif task_prompt == '<DENSE_REGION_CAPTION>':
|
| 108 |
results = run_example(task_prompt, image)
|
| 109 |
fig = plot_bbox(image, results['<DENSE_REGION_CAPTION>'])
|
| 110 |
+
return "", fig
|
| 111 |
elif task_prompt == '<REGION_PROPOSAL>':
|
| 112 |
results = run_example(task_prompt, image)
|
| 113 |
fig = plot_bbox(image, results['<REGION_PROPOSAL>'])
|
| 114 |
+
return "", fig
|
| 115 |
elif task_prompt == '<CAPTION_TO_PHRASE_GROUNDING>':
|
| 116 |
results = run_example(task_prompt, image, text_input)
|
| 117 |
fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
|
| 118 |
+
return "", fig
|
| 119 |
elif task_prompt == '<REFERRING_EXPRESSION_SEGMENTATION>':
|
| 120 |
results = run_example(task_prompt, image, text_input)
|
| 121 |
output_image = copy.deepcopy(image)
|
| 122 |
output_image = draw_polygons(output_image, results['<REFERRING_EXPRESSION_SEGMENTATION>'], fill_mask=True)
|
| 123 |
+
return "", output_image
|
| 124 |
elif task_prompt == '<REGION_TO_SEGMENTATION>':
|
| 125 |
results = run_example(task_prompt, image, text_input)
|
| 126 |
output_image = copy.deepcopy(image)
|
| 127 |
output_image = draw_polygons(output_image, results['<REGION_TO_SEGMENTATION>'], fill_mask=True)
|
| 128 |
+
return "", output_image
|
| 129 |
elif task_prompt == '<OPEN_VOCABULARY_DETECTION>':
|
| 130 |
results = run_example(task_prompt, image, text_input)
|
| 131 |
bbox_results = convert_to_od_format(results['<OPEN_VOCABULARY_DETECTION>'])
|
| 132 |
fig = plot_bbox(image, bbox_results)
|
| 133 |
+
return "", fig
|
| 134 |
elif task_prompt == '<REGION_TO_CATEGORY>':
|
| 135 |
results = run_example(task_prompt, image, text_input)
|
| 136 |
+
return results, None
|
| 137 |
elif task_prompt == '<REGION_TO_DESCRIPTION>':
|
| 138 |
results = run_example(task_prompt, image, text_input)
|
| 139 |
+
return results, None
|
| 140 |
elif task_prompt == '<OCR>':
|
| 141 |
result = run_example(task_prompt, image)
|
| 142 |
+
return result, None
|
| 143 |
elif task_prompt == '<OCR_WITH_REGION>':
|
| 144 |
results = run_example(task_prompt, image)
|
| 145 |
output_image = copy.deepcopy(image)
|
| 146 |
output_image = draw_ocr_bboxes(output_image, results['<OCR_WITH_REGION>'])
|
| 147 |
+
return "", output_image
|
| 148 |
+
else:
|
| 149 |
+
return "", None # Return empty string and None for unknown task prompts
|
| 150 |
|
| 151 |
css = """
|
| 152 |
#output {
|